You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
6.3 KiB
204 lines
6.3 KiB
From 88f5739d3f0d34c51f318fc460b843253b4242e0 Mon Sep 17 00:00:00 2001
|
|
From: Nicolas Bock <nicolasbock@gmail.com>
|
|
Date: Fri, 8 Nov 2013 09:58:55 -0700
|
|
Subject: [PATCH 2/2] Make markupSanitizer.py support python 3.1 and 3.2
|
|
|
|
The script only supports <python-3 because of how uni-code literals are
|
|
treated in python-3.{1,2}. In python-2, a unicode string had to be prefixed
|
|
with 'u', while this notation was dropped in python-3.{1,2}. I have added a
|
|
check to the script so that it runs now with python-2.7 and python-3.{1,2,3}.
|
|
---
|
|
doc/markupSanitizer.py | 179 ++++++++++++++++++++++++++-----------------------
|
|
1 file changed, 95 insertions(+), 84 deletions(-)
|
|
|
|
diff --git a/doc/markupSanitizer.py b/doc/markupSanitizer.py
|
|
index f206cab..6fe247d 100755
|
|
--- a/doc/markupSanitizer.py
|
|
+++ b/doc/markupSanitizer.py
|
|
@@ -4,87 +4,98 @@ from bs4 import BeautifulSoup
|
|
import sys
|
|
import os
|
|
|
|
-# Accept filename as user input
|
|
-argc = len( sys.argv )
|
|
-if (argc < 2): raise Exception
|
|
-fileName = sys.argv[1];
|
|
-
|
|
-# Construct a DOM object
|
|
-soup = BeautifulSoup(open(fileName), "lxml")
|
|
-
|
|
-# Assuming, tt tags are not spewed recklessly by latex2html,
|
|
-# replace them with code tags
|
|
-for t in soup('tt'):
|
|
- t.wrap( soup.new_tag('code') )
|
|
- t.unwrap()
|
|
-
|
|
-# Rewrap all div class=alltt blocks in pre tags
|
|
-for d in soup('div','alltt'):
|
|
- d.wrap( soup.new_tag('pre') )
|
|
- d.unwrap()
|
|
-
|
|
-# Remove br and span tags from within pre sections
|
|
-for p in soup('pre'):
|
|
- for b in p('br'):
|
|
- b.extract()
|
|
- for s in p('span'):
|
|
- s.unwrap()
|
|
-
|
|
-# Remove all useless class 'arabic' spans
|
|
-for s in soup('span','arabic'):
|
|
- s.unwrap()
|
|
-
|
|
-# Extract the navigation bar
|
|
-navmenu = soup.find('div', 'navigation')
|
|
-if navmenu:
|
|
- navmenu.extract()
|
|
-
|
|
-# Wrap the remaining contents within a div
|
|
-if not soup.find('div', id='maincontainer'):
|
|
- soup.body['id'] = 'maincontainer'
|
|
- soup.body.name = 'div'
|
|
- soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
|
|
-
|
|
-if navmenu:
|
|
- # If this navmenu doesn't already have a TOC, insert one
|
|
- if not navmenu.find('ul','manual-toc'):
|
|
- # Add a toc within the navmenu
|
|
- navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
|
|
- navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
|
|
- navmenuTOC.append( BeautifulSoup("".join([
|
|
- '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
|
|
- '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'])
|
|
- ) )
|
|
- navmenu.append(navmenuTOC)
|
|
-
|
|
- # Insert navigation symbols to prev and next links
|
|
- prevsymbol = soup.new_tag('span')
|
|
- prevsymbol['class'] = 'navsymbol'
|
|
- prevsymbol.string = u'\xab'
|
|
- prv = navmenu.find('li',id='nav-prev')
|
|
- if prv:
|
|
- prv.find('a').insert(0, prevsymbol)
|
|
-
|
|
- nextsymbol = soup.new_tag('span')
|
|
- nextsymbol['class'] = 'navsymbol'
|
|
- nextsymbol.string = u'\xbb'
|
|
- nxt = navmenu.find('li',id='nav-next')
|
|
- if nxt:
|
|
- nxt.find('a').append(nextsymbol)
|
|
-
|
|
- # Reinsert the navigation bar at the end
|
|
- soup.body.append(navmenu)
|
|
-
|
|
-# Extract the title
|
|
-titl = soup.find('title')
|
|
-
|
|
-# Replace the head section with the user-supplied head markup
|
|
-soup.find('head').extract()
|
|
-newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
|
|
-newhead = newhead.find('head').extract()
|
|
-newhead.append(titl)
|
|
-soup.html.body.insert_before(newhead)
|
|
-
|
|
-# Print cleaned up markup to stdout
|
|
-print( soup.prettify(formatter="html") )
|
|
-
|
|
+def main ():
|
|
+ # Accept filename as user input
|
|
+ argc = len( sys.argv )
|
|
+ if (argc < 2): raise Exception
|
|
+ fileName = sys.argv[1];
|
|
+
|
|
+ # Construct a DOM object
|
|
+ soup = BeautifulSoup(open(fileName), "lxml")
|
|
+
|
|
+ # Assuming, tt tags are not spewed recklessly by latex2html,
|
|
+ # replace them with code tags
|
|
+ for t in soup('tt'):
|
|
+ t.wrap( soup.new_tag('code') )
|
|
+ t.unwrap()
|
|
+
|
|
+ # Rewrap all div class=alltt blocks in pre tags
|
|
+ for d in soup('div','alltt'):
|
|
+ d.wrap( soup.new_tag('pre') )
|
|
+ d.unwrap()
|
|
+
|
|
+ # Remove br and span tags from within pre sections
|
|
+ for p in soup('pre'):
|
|
+ for b in p('br'):
|
|
+ b.extract()
|
|
+ for s in p('span'):
|
|
+ s.unwrap()
|
|
+
|
|
+ # Remove all useless class 'arabic' spans
|
|
+ for s in soup('span','arabic'):
|
|
+ s.unwrap()
|
|
+
|
|
+ # Extract the navigation bar
|
|
+ navmenu = soup.find('div', 'navigation')
|
|
+ if navmenu:
|
|
+ navmenu.extract()
|
|
+
|
|
+ # Wrap the remaining contents within a div
|
|
+ if not soup.find('div', id='maincontainer'):
|
|
+ soup.body['id'] = 'maincontainer'
|
|
+ soup.body.name = 'div'
|
|
+ soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
|
|
+
|
|
+ if navmenu:
|
|
+ # If this navmenu doesn't already have a TOC, insert one
|
|
+ if not navmenu.find('ul','manual-toc'):
|
|
+ # Add a toc within the navmenu
|
|
+ navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
|
|
+ navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
|
|
+ navmenuTOC.append( BeautifulSoup("".join([
|
|
+ '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
|
|
+ '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'])
|
|
+ ) )
|
|
+ navmenu.append(navmenuTOC)
|
|
+
|
|
+ # Insert navigation symbols to prev and next links
|
|
+ prevsymbol = soup.new_tag('span')
|
|
+ prevsymbol['class'] = 'navsymbol'
|
|
+ prevsymbol.string = u('\xab')
|
|
+ prv = navmenu.find('li',id='nav-prev')
|
|
+ if prv:
|
|
+ prv.find('a').insert(0, prevsymbol)
|
|
+
|
|
+ nextsymbol = soup.new_tag('span')
|
|
+ nextsymbol['class'] = 'navsymbol'
|
|
+ nextsymbol.string = u('\xbb')
|
|
+ nxt = navmenu.find('li',id='nav-next')
|
|
+ if nxt:
|
|
+ nxt.find('a').append(nextsymbol)
|
|
+
|
|
+ # Reinsert the navigation bar at the end
|
|
+ soup.body.append(navmenu)
|
|
+
|
|
+ # Extract the title
|
|
+ titl = soup.find('title')
|
|
+
|
|
+ # Replace the head section with the user-supplied head markup
|
|
+ soup.find('head').extract()
|
|
+ newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
|
|
+ newhead = newhead.find('head').extract()
|
|
+ newhead.append(titl)
|
|
+ soup.html.body.insert_before(newhead)
|
|
+
|
|
+ # Print cleaned up markup to stdout
|
|
+ print( soup.prettify(formatter="html") )
|
|
+
|
|
+if sys.version < '3':
|
|
+ import codecs
|
|
+ def u (x):
|
|
+ return codecs.unicode_escape_decode(x)[0]
|
|
+else:
|
|
+ def u (x):
|
|
+ return x
|
|
+
|
|
+if __name__ == "__main__":
|
|
+ main()
|
|
--
|
|
1.8.1.5
|
|
|