summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-12-29 23:44:12 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-12-29 23:44:12 -0800
commit925a4c5164026bfda25bf50b552bacec074fdf3f (patch)
tree2ddf5a2cd3530789434697d19c0bf8fd892d0f9f /src/Text/Pandoc/Readers
parent600c22e7bf57b219467794cd8e37c19571988462 (diff)
Better smart quote parsing.
* Added stateLastStrPos to ParserState. This lets us keep track of whether we're parsing the position immediately after a 'str'. If we encounter a ' in such a location, it must be an apostrophe, and can't be a single quote start. * Set this in the markdown, textile, html, and rst str parsers. * Closes #360.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs8
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs6
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
4 files changed, 15 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 70ea24680..179065413 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -421,8 +421,12 @@ pTagContents =
pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
pStr :: GenParser Char ParserState Inline
-pStr = liftM Str $ many1 $ satisfy $ \c ->
- not (isSpace c) && not (isSpecial c) && not (isBad c)
+pStr = do
+ result <- many1 $ satisfy $ \c ->
+ not (isSpace c) && not (isSpecial c) && not (isBad c)
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
+ return $ Str result
isSpecial :: Char -> Bool
isSpecial '"' = True
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index db68df629..d854bd3c7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1096,6 +1096,8 @@ str = do
lookAhead alphaNum >> return '\x2019')
-- for things like l'aide
else mzero
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
let result = a:as
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
if smart
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 3dcfe47d0..d8704d8c9 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -791,7 +791,11 @@ whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
-str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+str = do
+ result <- many1 (noneOf (specialChars ++ "\t\n "))
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
+ return $ Str result
-- an endline character that can be treated as a space, not a structural break
endline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 12d299aa4..4693bd06d 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -436,6 +436,8 @@ str = do
next <- lookAhead letter
guard $ isLetter (last xs) || isLetter next
return $ xs ++ "-"
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
return $ Str result
-- | Textile allows HTML span infos, we discard them