78 lines
2.7 KiB
Diff
78 lines
2.7 KiB
Diff
From 812793c07d3202d3f5bc39091aec2e7071d000c8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Sun, 1 Jan 2012 19:30:57 +0100
|
|
Subject: [PATCH] Use shipped sgmllib for Python 3.x
|
|
|
|
---
|
|
feedparser/feedparser.py | 19 +++----------------
|
|
setup.py | 2 +-
|
|
2 files changed, 4 insertions(+), 17 deletions(-)
|
|
|
|
diff --git a/feedparser/feedparser.py b/feedparser/feedparser.py
|
|
index 8275c29..9a8a053 100644
|
|
--- a/feedparser/feedparser.py
|
|
+++ b/feedparser/feedparser.py
|
|
@@ -204,17 +204,9 @@ else:
|
|
try:
|
|
import sgmllib
|
|
except ImportError:
|
|
- # This is probably Python 3, which doesn't include sgmllib anymore
|
|
- _SGML_AVAILABLE = 0
|
|
+ import _feedparser_sgmllib as sgmllib
|
|
|
|
- # Mock sgmllib enough to allow subclassing later on
|
|
- class sgmllib(object):
|
|
- class SGMLParser(object):
|
|
- def goahead(self, i):
|
|
- pass
|
|
- def parse_starttag(self, i):
|
|
- pass
|
|
-else:
|
|
+if True:
|
|
_SGML_AVAILABLE = 1
|
|
|
|
# sgmllib defines a number of module-level regular expressions that are
|
|
@@ -2520,9 +2512,6 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
|
|
_BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
|
|
|
|
def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type):
|
|
- if not _SGML_AVAILABLE:
|
|
- return htmlSource
|
|
-
|
|
p = _RelativeURIResolver(baseURI, encoding, _type)
|
|
p.feed(htmlSource)
|
|
return p.output()
|
|
@@ -2803,8 +2792,6 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
|
|
|
|
|
|
def _sanitizeHTML(htmlSource, encoding, _type):
|
|
- if not _SGML_AVAILABLE:
|
|
- return htmlSource
|
|
p = _HTMLSanitizer(encoding, _type)
|
|
htmlSource = htmlSource.replace('<![CDATA[', '<![CDATA[')
|
|
p.feed(htmlSource)
|
|
@@ -3890,7 +3877,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
|
|
result['bozo'] = 1
|
|
result['bozo_exception'] = feedparser.exc or e
|
|
use_strict_parser = 0
|
|
- if not use_strict_parser and _SGML_AVAILABLE:
|
|
+ if not use_strict_parser:
|
|
feedparser = _LooseFeedParser(baseuri, baselang, 'utf-8', entities)
|
|
feedparser.feed(data.decode('utf-8', 'replace'))
|
|
result['feed'] = feedparser.feeddata
|
|
diff --git a/setup.py b/setup.py
|
|
index a4a60fe..8c15451 100644
|
|
--- a/setup.py
|
|
+++ b/setup.py
|
|
@@ -16,7 +16,7 @@ setup(
|
|
download_url = 'http://code.google.com/p/feedparser/',
|
|
platforms = ['POSIX', 'Windows'],
|
|
package_dir = {'': 'feedparser'},
|
|
- py_modules = ['feedparser'],
|
|
+ py_modules = ['feedparser', '_feedparser_sgmllib'],
|
|
keywords = ['atom', 'cdf', 'feed', 'parser', 'rdf', 'rss'],
|
|
classifiers = [
|
|
'Development Status :: 5 - Production/Stable',
|
|
--
|
|
1.7.8.1
|
|
|