diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 63 |
1 files changed, 47 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 01cc5e2e8..53531dc1a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -471,8 +472,10 @@ lhsCodeBlockBirdWith c = try $ do return $ intercalate "\n" lns' birdTrackLine :: Char -> GenParser Char st [Char] -birdTrackLine c = do +birdTrackLine c = try $ do char c + -- allow html tags on left margin: + when (c == '<') $ notFollowedBy letter manyTill anyChar newline @@ -905,7 +908,7 @@ inlineParsers = [ whitespace , str , endline , code - , (fourOrMore '*' <|> fourOrMore '_') + , fours , strong , emph , note @@ -1016,24 +1019,45 @@ mathInline = try $ do -- to avoid performance problems, treat 4 or more _ or * in a row as a literal -- rather than attempting to parse for emph/strong -fourOrMore :: Char -> GenParser Char st Inline -fourOrMore c = try $ count 4 (char c) >> many (char c) >>= \s -> - return (Str $ replicate 4 c ++ s) +fours :: GenParser Char st Inline +fours = try $ do + x <- char '*' <|> char '_' + count 2 $ satisfy (==x) + rest <- many1 (satisfy (==x)) + return $ Str (x:x:x:rest) + +-- | Parses a list of inlines between start and end delimiters. +inlinesBetween :: (Show b) + => GenParser Char ParserState a + -> GenParser Char ParserState b + -> GenParser Char ParserState [Inline] +inlinesBetween start end = + normalizeSpaces `liftM` try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) + innerSpace = try $ whitespace >>~ notFollowedBy' end emph :: GenParser Char ParserState Inline -emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|> - (enclosed (char '_') (notFollowedBy' strong >> char '_' >> - notFollowedBy alphaNum) inline)) >>= - return . Emph . normalizeSpaces +emph = Emph `liftM` + (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) + where starStart = char '*' >> lookAhead nonspaceChar + starEnd = notFollowedBy' strong >> char '*' + ulStart = char '_' >> lookAhead nonspaceChar + ulEnd = notFollowedBy' strong >> char '_' strong :: GenParser Char ParserState Inline -strong = ((enclosed (string "**") (try $ string "**") inline) <|> - (enclosed (string "__") (try $ string "__") inline)) >>= - return . Strong . normalizeSpaces +strong = Strong `liftM` + (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) + where starStart = string "**" >> lookAhead nonspaceChar + starEnd = try $ string "**" + ulStart = string "__" >> lookAhead nonspaceChar + ulEnd = try $ string "__" strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= - return . Strikeout . normalizeSpaces +strikeout = Strikeout `liftM` + (failIfStrict >> inlinesBetween strikeStart strikeEnd) + where strikeStart = string "~~" >> lookAhead nonspaceChar + >> notFollowedBy (char '~') + strikeEnd = try $ string "~~" superscript :: GenParser Char ParserState Inline superscript = failIfStrict >> enclosed (char '^') (char '^') @@ -1185,7 +1209,14 @@ note = try $ do let notes = stateNotes state case lookup ref notes of Nothing -> fail "note not found" - Just raw -> liftM Note $ parseFromString parseBlocks raw + Just raw -> do + -- We temporarily empty the note list while parsing the note, + -- so that we don't get infinite loops with notes inside notes... + -- Note references inside other notes do not work. + updateState $ \st -> st{ stateNotes = [] } + contents <- parseFromString parseBlocks raw + updateState $ \st -> st{ stateNotes = notes } + return $ Note contents inlineNote :: GenParser Char ParserState Inline inlineNote = try $ do @@ -1281,7 +1312,7 @@ citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' first <- letter - rest <- many $ (noneOf ",;]@ \t\n") + rest <- many $ (noneOf ",;!?[]()@ \t\n") let key = first:rest st <- getState guard $ key `elem` stateCitations st |