summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-12-14 12:47:15 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-12-14 12:48:43 -0800
commitb94f1e2045d8113f57adabf6c4e475c744a8ce80 (patch)
tree6a14bee332fb881684ca62cf2efc1aa6c2337bbe
parent3361f85f8ea2d153d6f5457cbae511e33a09e994 (diff)
RST reader: more accurate parsing of references.
Previously we erroneously included the enclosing backticks in a reference ID (closes #4156). This change also disables interpretation of syntax inside references, as in docutils. So, there is no emphasis in `my *link*`_
-rw-r--r--src/Text/Pandoc/Readers/RST.hs60
-rw-r--r--test/command/4156.md10
-rw-r--r--test/command/rst-links.md18
3 files changed, 52 insertions, 36 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 41b3c8b82..6b5d0a331 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1030,7 +1030,7 @@ noteBlock' marker = try $ do
citationMarker :: Monad m => RSTParser m [Char]
citationMarker = do
char '['
- res <- simpleReferenceName'
+ res <- simpleReferenceName
char ']'
return res
@@ -1039,7 +1039,7 @@ noteMarker = do
char '['
res <- many1 digit
<|>
- try (char '#' >> liftM ('#':) simpleReferenceName')
+ try (char '#' >> liftM ('#':) simpleReferenceName)
<|> count 1 (oneOf "#*")
char ']'
return res
@@ -1048,34 +1048,24 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: PandocMonad m => RSTParser m Inlines
+quotedReferenceName :: PandocMonad m => RSTParser m String
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- trimInlines . mconcat <$> many1Till inline (char '`')
-
-unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
-unquotedReferenceName = try $ do -- `` means inline code!
- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
+ manyTill anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName' :: Monad m => ParserT [Char] st m String
-simpleReferenceName' = do
+simpleReferenceName :: Monad m => ParserT [Char] st m String
+simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
- <|>
- try (oneOf "-_:+." <* lookAhead alphaNum)
+ <|> try (oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
-simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
-simpleReferenceName = B.str <$> simpleReferenceName'
-
-referenceName :: PandocMonad m => RSTParser m Inlines
-referenceName = quotedReferenceName <|>
- try (simpleReferenceName <* lookAhead (char ':')) <|>
- unquotedReferenceName
+referenceName :: PandocMonad m => RSTParser m String
+referenceName = quotedReferenceName <|> simpleReferenceName
referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do
@@ -1123,16 +1113,17 @@ anonymousKey = try $ do
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
-stripTicks :: String -> String
-stripTicks = reverse . stripTick . reverse . stripTick
- where stripTick ('`':xs) = xs
- stripTick xs = xs
-
referenceNames :: PandocMonad m => RSTParser m [String]
referenceNames = do
let rn = try $ do
string ".. _"
- (_, ref) <- withRaw referenceName
+ ref <- quotedReferenceName
+ <|> many ( noneOf ":\n"
+ <|> try (char '\n' <*
+ string " " <*
+ notFollowedBy blankline)
+ <|> try (char ':' <* lookAhead alphaNum)
+ )
char ':'
return ref
first <- rn
@@ -1147,16 +1138,15 @@ regularKey = try $ do
refs <- referenceNames
src <- targetURI
guard $ not (null src)
- let keys = map (toKey . stripTicks) refs
+ let keys = map toKey refs
forM_ keys $ \key ->
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
anchorDef :: PandocMonad m => RSTParser m [Char]
anchorDef = try $ do
- (refs, raw) <- withRaw (try (referenceNames <* blanklines))
- let keys = map stripTicks refs
- forM_ keys $ \rawkey ->
+ (refs, raw) <- withRaw $ try (referenceNames <* blanklines)
+ forM_ refs $ \rawkey ->
updateState $ \s -> s { stateKeys =
M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
@@ -1479,22 +1469,20 @@ explicitLink = try $ do
_ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
-citationName :: PandocMonad m => RSTParser m Inlines
+citationName :: PandocMonad m => RSTParser m String
citationName = do
raw <- citationMarker
- return $ B.str $ "[" ++ raw ++ "]"
+ return $ "[" ++ raw ++ "]"
referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName
- <|> simpleReferenceName
- <|> citationName) <*
- char '_'
+ ref <- (referenceName <|> citationName) <* char '_'
+ let label' = B.text ref
let isAnonKey (Key ('_':_)) = True
isAnonKey _ = False
state <- getState
let keyTable = stateKeys state
- key <- option (toKey $ stripTicks ref) $
+ key <- option (toKey ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
case anonKeys of
diff --git a/test/command/4156.md b/test/command/4156.md
new file mode 100644
index 000000000..073537d1f
--- /dev/null
+++ b/test/command/4156.md
@@ -0,0 +1,10 @@
+```
+% pandoc -f rst
+.. _`SOMEID`:
+
+foo
+^D
+<div id="SOMEID">
+<p>foo</p>
+</div>
+```
diff --git a/test/command/rst-links.md b/test/command/rst-links.md
new file mode 100644
index 000000000..496bebc54
--- /dev/null
+++ b/test/command/rst-links.md
@@ -0,0 +1,18 @@
+```
+% pandoc -f rst
+`*ab*`_
+
+.. _`*ab*`: foo
+^D
+<p><a href="foo">*ab*</a></p>
+```
+
+```
+% pandoc -f rst
+`A B
+c`_
+
+.. _A B C: foo
+^D
+<p><a href="foo">A B c</a></p>
+```