You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gentoo-overlay/sys-cluster/charm/files/charm-6.5.1-fix-markupSanit...

204 lines
6.3 KiB

From 88f5739d3f0d34c51f318fc460b843253b4242e0 Mon Sep 17 00:00:00 2001
From: Nicolas Bock <nicolasbock@gmail.com>
Date: Fri, 8 Nov 2013 09:58:55 -0700
Subject: [PATCH 2/2] Make markupSanitizer.py support python 3.1 and 3.2
The script only supports <python-3 because of how uni-code literals are
treated in python-3.{1,2}. In python-2, a unicode string had to be prefixed
with 'u', while this notation was dropped in python-3.{1,2}. I have added a
check to the script so that it runs now with python-2.7 and python-3.{1,2,3}.
---
doc/markupSanitizer.py | 179 ++++++++++++++++++++++++++-----------------------
1 file changed, 95 insertions(+), 84 deletions(-)
diff --git a/doc/markupSanitizer.py b/doc/markupSanitizer.py
index f206cab..6fe247d 100755
--- a/doc/markupSanitizer.py
+++ b/doc/markupSanitizer.py
@@ -4,87 +4,98 @@ from bs4 import BeautifulSoup
import sys
import os
-# Accept filename as user input
-argc = len( sys.argv )
-if (argc < 2): raise Exception
-fileName = sys.argv[1];
-
-# Construct a DOM object
-soup = BeautifulSoup(open(fileName), "lxml")
-
-# Assuming, tt tags are not spewed recklessly by latex2html,
-# replace them with code tags
-for t in soup('tt'):
- t.wrap( soup.new_tag('code') )
- t.unwrap()
-
-# Rewrap all div class=alltt blocks in pre tags
-for d in soup('div','alltt'):
- d.wrap( soup.new_tag('pre') )
- d.unwrap()
-
-# Remove br and span tags from within pre sections
-for p in soup('pre'):
- for b in p('br'):
- b.extract()
- for s in p('span'):
- s.unwrap()
-
-# Remove all useless class 'arabic' spans
-for s in soup('span','arabic'):
- s.unwrap()
-
-# Extract the navigation bar
-navmenu = soup.find('div', 'navigation')
-if navmenu:
- navmenu.extract()
-
-# Wrap the remaining contents within a div
-if not soup.find('div', id='maincontainer'):
- soup.body['id'] = 'maincontainer'
- soup.body.name = 'div'
- soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
-
-if navmenu:
- # If this navmenu doesn't already have a TOC, insert one
- if not navmenu.find('ul','manual-toc'):
- # Add a toc within the navmenu
- navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
- navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
- navmenuTOC.append( BeautifulSoup("".join([
- '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
- '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'])
- ) )
- navmenu.append(navmenuTOC)
-
- # Insert navigation symbols to prev and next links
- prevsymbol = soup.new_tag('span')
- prevsymbol['class'] = 'navsymbol'
- prevsymbol.string = u'\xab'
- prv = navmenu.find('li',id='nav-prev')
- if prv:
- prv.find('a').insert(0, prevsymbol)
-
- nextsymbol = soup.new_tag('span')
- nextsymbol['class'] = 'navsymbol'
- nextsymbol.string = u'\xbb'
- nxt = navmenu.find('li',id='nav-next')
- if nxt:
- nxt.find('a').append(nextsymbol)
-
- # Reinsert the navigation bar at the end
- soup.body.append(navmenu)
-
-# Extract the title
-titl = soup.find('title')
-
-# Replace the head section with the user-supplied head markup
-soup.find('head').extract()
-newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
-newhead = newhead.find('head').extract()
-newhead.append(titl)
-soup.html.body.insert_before(newhead)
-
-# Print cleaned up markup to stdout
-print( soup.prettify(formatter="html") )
-
+def main ():
+ # Accept filename as user input
+ argc = len( sys.argv )
+ if (argc < 2): raise Exception
+ fileName = sys.argv[1];
+
+ # Construct a DOM object
+ soup = BeautifulSoup(open(fileName), "lxml")
+
+ # Assuming, tt tags are not spewed recklessly by latex2html,
+ # replace them with code tags
+ for t in soup('tt'):
+ t.wrap( soup.new_tag('code') )
+ t.unwrap()
+
+ # Rewrap all div class=alltt blocks in pre tags
+ for d in soup('div','alltt'):
+ d.wrap( soup.new_tag('pre') )
+ d.unwrap()
+
+ # Remove br and span tags from within pre sections
+ for p in soup('pre'):
+ for b in p('br'):
+ b.extract()
+ for s in p('span'):
+ s.unwrap()
+
+ # Remove all useless class 'arabic' spans
+ for s in soup('span','arabic'):
+ s.unwrap()
+
+ # Extract the navigation bar
+ navmenu = soup.find('div', 'navigation')
+ if navmenu:
+ navmenu.extract()
+
+ # Wrap the remaining contents within a div
+ if not soup.find('div', id='maincontainer'):
+ soup.body['id'] = 'maincontainer'
+ soup.body.name = 'div'
+ soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
+
+ if navmenu:
+ # If this navmenu doesn't already have a TOC, insert one
+ if not navmenu.find('ul','manual-toc'):
+ # Add a toc within the navmenu
+ navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
+ navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
+ navmenuTOC.append( BeautifulSoup("".join([
+ '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
+ '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'])
+ ) )
+ navmenu.append(navmenuTOC)
+
+ # Insert navigation symbols to prev and next links
+ prevsymbol = soup.new_tag('span')
+ prevsymbol['class'] = 'navsymbol'
+ prevsymbol.string = u('\xab')
+ prv = navmenu.find('li',id='nav-prev')
+ if prv:
+ prv.find('a').insert(0, prevsymbol)
+
+ nextsymbol = soup.new_tag('span')
+ nextsymbol['class'] = 'navsymbol'
+ nextsymbol.string = u('\xbb')
+ nxt = navmenu.find('li',id='nav-next')
+ if nxt:
+ nxt.find('a').append(nextsymbol)
+
+ # Reinsert the navigation bar at the end
+ soup.body.append(navmenu)
+
+ # Extract the title
+ titl = soup.find('title')
+
+ # Replace the head section with the user-supplied head markup
+ soup.find('head').extract()
+ newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
+ newhead = newhead.find('head').extract()
+ newhead.append(titl)
+ soup.html.body.insert_before(newhead)
+
+ # Print cleaned up markup to stdout
+ print( soup.prettify(formatter="html") )
+
+if sys.version < '3':
+ import codecs
+ def u (x):
+ return codecs.unicode_escape_decode(x)[0]
+else:
+ def u (x):
+ return x
+
+if __name__ == "__main__":
+ main()
--
1.8.1.5