diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-17 02:49:28 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-17 02:49:28 +0000 |
commit | 6f16d52c1185fea757047374100a9f10be7af3fc (patch) | |
tree | d23a2504c5badd85ee0e351b1cdd94dd1f581741 /src/Text/Pandoc/Readers | |
parent | 2094534b3cd88c7b5addb27ef52900c737cd1e35 (diff) |
Changed parsing of code blocks in HTML reader:
+ <code> tag is no longer needed. <pre> suffices.
+ all HTML tags in the code block (e.g. for syntax highlighting)
are skipped, because they are not portable to other output formats.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1022 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b6aac2b48..69ce97eff 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -100,7 +100,7 @@ extractTagType ('<':rest) = map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest extractTagType _ = "" --- | Parse any HTML tag (closing or opening) and return text of tag +-- | Parse any HTML tag (opening or self-closing) and return text of tag anyHtmlTag = try $ do char '<' spaces @@ -313,15 +313,16 @@ hrule = try $ do -- code blocks -- +-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are +-- skipped, because they are not portable to output formats other than HTML. codeBlock = try $ do htmlTag "pre" - spaces - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") - spaces - htmlEndTag "pre" + result <- manyTill + (many1 (satisfy (/= '<')) <|> + ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) + (htmlEndTag "pre") return $ CodeBlock $ stripTrailingNewlines $ - decodeCharacterReferences result + decodeCharacterReferences $ concat result -- -- block quotes |