summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2011-07-23 12:35:01 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2011-07-23 12:43:01 -0700
commit6424e7d02c8800a1964d0ae26a523597b8a365fa (patch)
tree9a616458b5a912cb625ce4514351a8f7c47eef75 /src/Text/Pandoc/Parsing.hs
parent26418b7d14ce04a7386392388d2a3cbded205705 (diff)
Properly handle characters in the 128..159 range.
These aren't valid in HTML, but many HTML files produced by Windows tools contain them. We substitute correct unicode characters.
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Parsing.hs14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 187343f9c..c16d99bdf 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -758,7 +758,7 @@ charOrRef cs =
singleQuoteStart :: GenParser Char ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- try $ do charOrRef "'\8216"
+ try $ do charOrRef "'\8216\145"
notFollowedBy (oneOf ")!],.;:-? \t\n")
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
satisfy (not . isAlphaNum)))
@@ -767,23 +767,23 @@ singleQuoteStart = do
singleQuoteEnd :: GenParser Char st ()
singleQuoteEnd = try $ do
- charOrRef "'\8217"
+ charOrRef "'\8217\146"
notFollowedBy alphaNum
doubleQuoteStart :: GenParser Char ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- try $ do charOrRef "\"\8220"
+ try $ do charOrRef "\"\8220\147"
notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
doubleQuoteEnd :: GenParser Char st ()
doubleQuoteEnd = do
- charOrRef "\"\8221"
+ charOrRef "\"\8221\148"
return ()
ellipses :: GenParser Char st Inline
ellipses = do
- try (charOrRef "…") <|> try (string "..." >> return '…')
+ try (charOrRef "…\133") <|> try (string "..." >> return '…')
return Ellipses
dash :: GenParser Char st Inline
@@ -791,13 +791,13 @@ dash = enDash <|> emDash
enDash :: GenParser Char st Inline
enDash = do
- try (charOrRef "–") <|>
+ try (charOrRef "–\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return EnDash
emDash :: GenParser Char st Inline
emDash = do
- try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—')
+ try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—')
return EmDash
--