diff options
author | John MacFarlane <jgm@berkeley.edu> | 2011-07-23 12:35:01 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2011-07-23 12:43:01 -0700 |
commit | 6424e7d02c8800a1964d0ae26a523597b8a365fa (patch) | |
tree | 9a616458b5a912cb625ce4514351a8f7c47eef75 /src/Text/Pandoc/Parsing.hs | |
parent | 26418b7d14ce04a7386392388d2a3cbded205705 (diff) |
Properly handle characters in the 128..159 range.
These aren't valid in HTML, but many HTML files produced by
Windows tools contain them. We substitute correct unicode
characters.
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 187343f9c..c16d99bdf 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -758,7 +758,7 @@ charOrRef cs = singleQuoteStart :: GenParser Char ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - try $ do charOrRef "'\8216" + try $ do charOrRef "'\8216\145" notFollowedBy (oneOf ")!],.;:-? \t\n") notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> satisfy (not . isAlphaNum))) @@ -767,23 +767,23 @@ singleQuoteStart = do singleQuoteEnd :: GenParser Char st () singleQuoteEnd = try $ do - charOrRef "'\8217" + charOrRef "'\8217\146" notFollowedBy alphaNum doubleQuoteStart :: GenParser Char ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote - try $ do charOrRef "\"\8220" + try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) doubleQuoteEnd :: GenParser Char st () doubleQuoteEnd = do - charOrRef "\"\8221" + charOrRef "\"\8221\148" return () ellipses :: GenParser Char st Inline ellipses = do - try (charOrRef "…") <|> try (string "..." >> return '…') + try (charOrRef "…\133") <|> try (string "..." >> return '…') return Ellipses dash :: GenParser Char st Inline @@ -791,13 +791,13 @@ dash = enDash <|> emDash enDash :: GenParser Char st Inline enDash = do - try (charOrRef "–") <|> + try (charOrRef "–\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return EnDash emDash :: GenParser Char st Inline emDash = do - try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—') + try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—') return EmDash -- |