summaryrefslogtreecommitdiff
path: root/isso/utils/html.py
diff options
context:
space:
mode:
Diffstat (limited to 'isso/utils/html.py')
-rw-r--r--isso/utils/html.py60
1 files changed, 20 insertions, 40 deletions
diff --git a/isso/utils/html.py b/isso/utils/html.py
index 294b8d4..c0a20e4 100644
--- a/isso/utils/html.py
+++ b/isso/utils/html.py
@@ -7,56 +7,36 @@ import pkg_resources
from distutils.version import LooseVersion as Version
-HTML5LIB_VERSION = Version(pkg_resources.get_distribution("html5lib").version)
-HTML5LIB_SIMPLETREE = Version("0.95")
-
from isso.compat import reduce
-import html5lib
-from html5lib.sanitizer import HTMLSanitizer
-from html5lib.serializer import HTMLSerializer
+import bleach
import misaka
-def Sanitizer(elements, attributes):
-
- class Inner(HTMLSanitizer):
-
- # attributes found in Sundown's HTML serializer [1] except for <img> tag,
- # because images are not generated anyways.
- #
- # [1] https://github.com/vmg/sundown/blob/master/html/html.c
- allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
- "pre", "code", "blockquote",
- "del", "ins", "strong", "em",
- "h1", "h2", "h3", "h4", "h5", "h6",
- "table", "thead", "tbody", "th", "td"] + elements
-
- # href for <a> and align for <table>
- allowed_attributes = ["align", "href"] + attributes
-
- # remove disallowed tokens from the output
- def disallowed_token(self, token, token_type):
- return None
-
- return Inner
-
+# attributes found in Sundown's HTML serializer [1] except for <img> tag,
+# because images are not generated anyways.
+#
+# [1] https://github.com/vmg/sundown/blob/master/html/html.c
+ALLOWED_ELEMENTS = ["a", "p", "hr", "br", "ol", "ul", "li",
+ "pre", "code", "blockquote",
+ "del", "ins", "strong", "em",
+ "h1", "h2", "h3", "h4", "h5", "h6",
+ "table", "thead", "tbody", "th", "td"]
-def sanitize(tokenizer, document):
+# href for <a> and align for <table>
+ALLOWED_ATTRIBUTES = ["align", "href"]
- parser = html5lib.HTMLParser(tokenizer=tokenizer)
- domtree = parser.parseFragment(document)
- if HTML5LIB_VERSION > HTML5LIB_SIMPLETREE:
- builder = "etree"
- else:
- builder = "simpletree"
+class Sanitizer(object):
- stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
- serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
+ def __init__(self, elements, attributes):
+ self.elements = ALLOWED_ELEMENTS + elements
+ self.attributes = ALLOWED_ATTRIBUTES + attributes
- return serializer.render(stream)
+ def sanitize(self, text):
+ return bleach.clean(text, tags=self.elements,
+ attributes=self.attributes, strip=True)
def Markdown(extensions=("strikethrough", "superscript", "autolink")):
@@ -96,7 +76,7 @@ class Markup(object):
conf.getlist("allowed-elements"),
conf.getlist("allowed-attributes"))
- self._render = lambda text: sanitize(sanitizer, parser(text))
+ self._render = lambda text: sanitizer.sanitize(parser(text))
def render(self, text):
return self._render(text)