From 4bfe140ed1cc7a07d762e4920e7365427d0b9618 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 22 Dec 2010 17:05:17 -0800 Subject: Made --smart work with HTML reader. It did not work before, because - and quotes were gobbled up by the str parser. --- src/Text/Pandoc/Readers/HTML.hs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index ade15349e..1bbb11e62 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -587,8 +587,7 @@ plain = many1 inline >>= return . Plain . normalizeSpaces -- inline :: GenParser Char ParserState Inline -inline = choice [ smartPunctuation inline - , str +inline = choice [ str , strong , emph , superscript @@ -600,9 +599,10 @@ inline = choice [ smartPunctuation inline , whitespace , link , image + , smartPunctuation inline , charRef , rawHtmlInline - , char '&' >> return (Str "&") -- common HTML error + , symbol ] "inline" code :: GenParser Char ParserState Inline @@ -620,6 +620,12 @@ rawHtmlInline = do state <- getState if stateParseRaw state then return (HtmlInline result) else return (Str "") +symbol :: GenParser Char ParserState Inline +symbol = do + notFollowedBy (char '<') + c <- oneOf specialChars + return $ Str [c] + betweenTags :: [Char] -> GenParser Char ParserState [Inline] betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>= return . normalizeSpaces @@ -657,7 +663,10 @@ linebreak :: GenParser Char ParserState Inline linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak str :: GenParser Char st Inline -str = many1 (noneOf "< \t\n&") >>= return . Str +str = many1 (noneOf $ specialChars ++ " \t\n") >>= return . Str + +specialChars :: [Char] +specialChars = "<&-\"'.\8216\8217\8220\8221" -- -- links and images -- cgit v1.2.3