summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-22 17:05:17 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-22 17:05:17 -0800
commit4bfe140ed1cc7a07d762e4920e7365427d0b9618 (patch)
tree02cd044515f5bac5dce17d3a6344e3377ef4c107 /src/Text
parent63bf227e0414495a7966edd07d5faf6ca4a2f077 (diff)
Made --smart work with HTML reader.
It did not work before, because - and quotes were gobbled up by the str parser.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs17
1 files changed, 13 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index ade15349e..1bbb11e62 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -587,8 +587,7 @@ plain = many1 inline >>= return . Plain . normalizeSpaces
--
inline :: GenParser Char ParserState Inline
-inline = choice [ smartPunctuation inline
- , str
+inline = choice [ str
, strong
, emph
, superscript
@@ -600,9 +599,10 @@ inline = choice [ smartPunctuation inline
, whitespace
, link
, image
+ , smartPunctuation inline
, charRef
, rawHtmlInline
- , char '&' >> return (Str "&") -- common HTML error
+ , symbol
] <?> "inline"
code :: GenParser Char ParserState Inline
@@ -620,6 +620,12 @@ rawHtmlInline = do
state <- getState
if stateParseRaw state then return (HtmlInline result) else return (Str "")
+symbol :: GenParser Char ParserState Inline
+symbol = do
+ notFollowedBy (char '<')
+ c <- oneOf specialChars
+ return $ Str [c]
+
betweenTags :: [Char] -> GenParser Char ParserState [Inline]
betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>=
return . normalizeSpaces
@@ -657,7 +663,10 @@ linebreak :: GenParser Char ParserState Inline
linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak
str :: GenParser Char st Inline
-str = many1 (noneOf "< \t\n&") >>= return . Str
+str = many1 (noneOf $ specialChars ++ " \t\n") >>= return . Str
+
+specialChars :: [Char]
+specialChars = "<&-\"'.\8216\8217\8220\8221"
--
-- links and images