1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# -*- coding: utf-8 -*-
import re
SUPPORTED_FORMATS = {
'markdown': ['.mdown', '.markdown', '.markdn', '.md', '.mdn', '.mdwn'],
'restructuredtext': ['.rst', '.rest'],
'textile': ['.textile'],
}
class Parser(object):
"""This class generates the HTML code depending on which syntax is used in
the souce document.
The Parser currently supports both Markdown and restructuredText
syntaxes.
"""
RST_REPLACEMENTS = [
(r'<div.*?>', r'', re.UNICODE),
(r'</div>', r'', re.UNICODE),
(r'<p class="system-message-\w+">.*?</p>', r'', re.UNICODE),
(r'Document or section may not begin with a transition\.',
r'', re.UNICODE),
(r'<h(\d+?).*?>', r'<h\1>', re.DOTALL | re.UNICODE),
(r'<hr.*?>\n', r'<hr />\n', re.DOTALL | re.UNICODE),
]
md_extensions = ''
def __init__(self, extension, encoding='utf8', md_extensions=''):
"""Configures this parser.
"""
self.encoding = encoding
self.format = None
for supp_format, supp_extensions in SUPPORTED_FORMATS.items():
for supp_extension in supp_extensions:
if supp_extension == extension:
self.format = supp_format
if not self.format:
raise NotImplementedError(u"Unsupported format %s" % extension)
if md_extensions:
exts = (value.strip() for value in md_extensions.split(','))
self.md_extensions = filter(None, exts)
def parse(self, text):
"""Parses and renders a text as HTML regarding current format.
"""
if self.format == 'markdown':
try:
import markdown
except ImportError:
raise RuntimeError(u"Looks like markdown is not installed")
if text.startswith(u'\ufeff'): # check for unicode BOM
text = text[1:]
return markdown.markdown(text, self.md_extensions)
elif self.format == 'restructuredtext':
try:
from landslide.rst import html_body
except ImportError:
raise RuntimeError(u"Looks like docutils are not installed")
html = html_body(text, input_encoding=self.encoding)
# RST generates pretty much markup to be removed in our case
for (pattern, replacement, mode) in self.RST_REPLACEMENTS:
html = re.sub(re.compile(pattern, mode), replacement, html, 0)
return html.strip()
elif self.format == 'textile':
try:
import textile
except ImportError:
raise RuntimeError(u"Looks like textile is not installed")
text = text.replace('\n---\n', '\n<hr />\n')
return textile.textile(text, encoding=self.encoding)
else:
raise NotImplementedError(u"Unsupported format %s, cannot parse"
% self.format)
|