diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 1449 |
1 files changed, 750 insertions, 699 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cd35a8738..14cf73de4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} + {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,63 +30,55 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown, - readMarkdownWithWarnings ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, sortBy, findIndex, intercalate ) +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) +import qualified Data.HashMap.Strict as H +import Data.List (intercalate, sortBy, transpose, elemIndex) import qualified Data.Map as M -import Data.Scientific (coefficient, base10Exponent) -import Data.Ord ( comparing ) -import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation ) import Data.Maybe -import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) -import Text.Pandoc.Generic (bottomUp) -import qualified Data.Text as T +import Data.Monoid ((<>)) +import Data.Ord (comparing) +import Data.Scientific (base10Exponent, coefficient) +import qualified Data.Set as Set import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Vector as V +import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..)) import qualified Data.Yaml as Yaml -import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..)) -import qualified Data.HashMap.Strict as H +import System.FilePath (addExtension, takeExtension) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) +import Text.Pandoc.Class (PandocMonad (..), report) +import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (tableWith) +import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, + isCommentTag, isInlineTag, isTextTag) +import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared -import Text.Pandoc.Pretty (charWidth) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, - isTextTag, isCommentTag ) -import Control.Monad -import System.FilePath (takeExtension, addExtension) -import Text.HTML.TagSoup -import qualified Data.Set as Set -import Text.Printf (printf) -import Debug.Trace (trace) -import Data.Monoid ((<>)) -import Text.Pandoc.Error -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser m = ParserT [Char] ParserState m -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - --- | Read markdown from an input string and return a pair of a Pandoc document --- and a list of warnings. -readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines +readMarkdown :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readMarkdown opts s = do + parsed <- readWithM parseMarkdown def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e -- -- Constants and data structure definitions @@ -117,44 +110,43 @@ isBlank _ = False -- -- | Succeeds when we're in list context. -inList :: MarkdownParser () +inList :: PandocMonad m => MarkdownParser m () inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: Parser [Char] st () +spnl :: PandocMonad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -indentSpaces :: MarkdownParser String +spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' = try $ do + xs <- many spaceChar + ys <- option "" $ try $ (:) <$> newline + <*> (many spaceChar <* notFollowedBy (char '\n')) + return (xs ++ ys) + +indentSpaces :: PandocMonad m => MarkdownParser m String indentSpaces = try $ do tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: MarkdownParser String +nonindentSpaces :: PandocMonad m => MarkdownParser m String nonindentSpaces = do - tabStop <- getOption readerTabStop - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" + n <- skipNonindentSpaces + return $ replicate n ' ' -- returns number of spaces parsed -skipNonindentSpaces :: MarkdownParser Int +skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int skipNonindentSpaces = do tabStop <- getOption readerTabStop - atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ') + gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar -atMostSpaces :: Int -> MarkdownParser Int -atMostSpaces n - | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 - | otherwise = return 0 - -litChar :: MarkdownParser Char +litChar :: PandocMonad m => MarkdownParser m Char litChar = escapedChar' <|> characterReference <|> noneOf "\n" @@ -162,30 +154,32 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) -inlinesInBalancedBrackets = do - char '[' - (_, raw) <- withRaw $ charsInBalancedBrackets 1 - guard $ not $ null raw - parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) - -charsInBalancedBrackets :: Int -> MarkdownParser () -charsInBalancedBrackets 0 = return () -charsInBalancedBrackets openBrackets = - (char '[' >> charsInBalancedBrackets (openBrackets + 1)) - <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) - <|> (( (() <$ code) - <|> (() <$ (escapedChar')) - <|> (newline >> notFollowedBy blankline) - <|> skipMany1 (noneOf "[]`\n\\") - <|> (() <$ count 1 (oneOf "`\\")) - ) >> charsInBalancedBrackets openBrackets) +inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) +inlinesInBalancedBrackets = + try $ char '[' >> withRaw (go 1) >>= + parseFromString inlines . stripBracket . snd + where stripBracket [] = [] + stripBracket xs = if last xs == ']' then init xs else xs + go :: PandocMonad m => Int -> MarkdownParser m () + go 0 = return () + go openBrackets = + (() <$ (escapedChar <|> + code <|> + rawHtmlInline <|> + rawLaTeXInline') >> go openBrackets) + <|> + (do char ']' + Control.Monad.when (openBrackets > 1) $ go (openBrackets - 1)) + <|> + (char '[' >> go (openBrackets + 1)) + <|> + (anyChar >> go openBrackets) -- -- document structure -- -rawTitleBlockLine :: MarkdownParser String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m String rawTitleBlockLine = do char '%' skipSpaces @@ -196,13 +190,13 @@ rawTitleBlockLine = do anyLine return $ trim $ unlines (first:rest) -titleLine :: MarkdownParser (F Inlines) +titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res + res <- parseFromString' inlines raw + return $ trimInlinesF res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) authorsLine = try $ do raw <- rawTitleBlockLine let sep = (char ';' <* spaces) <|> newline @@ -210,18 +204,18 @@ authorsLine = try $ do (trimInlinesF . mconcat <$> many (try $ notFollowedBy sep >> inline)) sep - sequence <$> parseFromString pAuthors raw + sequence <$> parseFromString' pAuthors raw -dateLine :: MarkdownParser (F Inlines) +dateLine :: PandocMonad m => MarkdownParser m (F Inlines) dateLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res + res <- parseFromString' inlines raw + return $ trimInlinesF res -titleBlock :: MarkdownParser () +titleBlock :: PandocMonad m => MarkdownParser m () titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: MarkdownParser () +pandocTitleBlock :: PandocMonad m => MarkdownParser m () pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -239,7 +233,8 @@ pandocTitleBlock = try $ do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } -yamlMetaBlock :: MarkdownParser (F Blocks) + +yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -250,87 +245,105 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - opts <- stateOptions <$> getState - meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ - H.foldrWithKey (\k v m -> - if ignorable k - then m - else case yamlToMeta opts v of - Left _ -> m - Right v' -> B.setMeta (T.unpack k) v' m) - nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta - Right _ -> do - addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - addWarning (Just $ setSourceLine - (setSourceColumn pos - (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - $ "Could not parse YAML header: " ++ - problem - _ -> addWarning (Just pos) - $ "Could not parse YAML header: " ++ - show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + case Yaml.decodeEither' $ UTF8.fromString rawYaml of + Right (Yaml.Object hashmap) -> do + let alist = H.toList hashmap + mapM_ (\(k, v) -> + if ignorable k + then return () + else do + v' <- yamlToMeta v + let k' = T.unpack k + updateState $ \st -> st{ stateMeta' = + do m <- stateMeta' st + -- if there's already a value, leave it unchanged + case lookupMeta k' m of + Just _ -> return m + Nothing -> do + v'' <- v' + return $ B.setMeta (T.unpack k) v'' m} + ) alist + Right Yaml.Null -> return () + Right _ -> do + logMessage $ + CouldNotParseYamlMetadata "not an object" + pos + return () + Left err' -> do + case err' of + InvalidYaml (Just YamlParseException{ + yamlProblem = problem + , yamlContext = _ctxt + , yamlProblemMark = Yaml.YamlMark { + yamlLine = yline + , yamlColumn = ycol + }}) -> + logMessage $ CouldNotParseYamlMetadata + problem (setSourceLine + (setSourceColumn pos + (sourceColumn pos + ycol)) + (sourceLine pos + 1 + yline)) + _ -> logMessage $ CouldNotParseYamlMetadata + (show err') pos + return () return mempty -- ignore fields ending with _ ignorable :: Text -> Bool ignorable t = (T.pack "_") `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue -toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) - where - toMeta p = - case p of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} - meta_exts = Set.fromList [ Ext_pandoc_title_block - , Ext_mmd_title_block - , Ext_yaml_metadata_block - ] - -yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue -yamlToMeta opts (Yaml.String t) = toMetaValue opts t -yamlToMeta _ (Yaml.Number n) +toMetaValue :: PandocMonad m + => Text -> MarkdownParser m (F MetaValue) +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> try (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. + +yamlToMeta :: PandocMonad m + => Yaml.Value -> MarkdownParser m (F MetaValue) +yamlToMeta (Yaml.String t) = toMetaValue t +yamlToMeta (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = return $ MetaString $ show + | base10Exponent n >= 0 = return $ return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = return $ MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) - (V.toList xs) -yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> - if ignorable k - then m - else (do - v' <- yamlToMeta opts v - m' <- m - return (M.insert (T.unpack k) v' m'))) - (return M.empty) o -yamlToMeta _ _ = return $ MetaString "" - -stopLine :: MarkdownParser () + | otherwise = return $ return $ MetaString $ show n +yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b +yamlToMeta (Yaml.Array xs) = do + xs' <- mapM yamlToMeta (V.toList xs) + return $ do + xs'' <- sequence xs' + return $ B.toMetaValue xs'' +yamlToMeta (Yaml.Object o) = do + let alist = H.toList o + foldM (\m (k,v) -> + if ignorable k + then return m + else do + v' <- yamlToMeta v + return $ do + MetaMap m' <- m + v'' <- v' + return (MetaMap $ M.insert (T.unpack k) v'' m')) + (return $ MetaMap M.empty) + alist +yamlToMeta _ = return $ return $ MetaString "" + +stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -mmdTitleBlock :: MarkdownParser () +mmdTitleBlock :: PandocMonad m => MarkdownParser m () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block firstPair <- kvPair False @@ -340,49 +353,44 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val + let val' = MetaBlocks $ B.toList $ B.plain $B.text val return (key',val') -parseMarkdown :: MarkdownParser Pandoc +parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc parseMarkdown = do - -- markdown allows raw HTML - updateState $ \state -> state { stateOptions = - let oldOpts = stateOptions state in - oldOpts{ readerParseRaw = True } } optional titleBlock blocks <- parseBlocks st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st - eastAsianLineBreaks <- option False $ - True <$ guardEnabled Ext_east_asian_line_breaks - return $ (if eastAsianLineBreaks - then bottomUp softBreakFilter - else id) $ Pandoc meta bs - -softBreakFilter :: [Inline] -> [Inline] -softBreakFilter (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), (c:_)) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs -softBreakFilter xs = xs - -referenceKey :: MarkdownParser (F Blocks) + -- check for notes with no corresponding note references + let notesUsed = stateNoteRefs st + let notesDefined = M.keys (stateNotes' st) + mapM_ (\n -> unless (n `Set.member` notesUsed) $ + case M.lookup n (stateNotes' st) of + Just (pos, _) -> report (NoteDefinedButNotUsed n pos) + Nothing -> throwError $ + PandocShouldNeverHappenError "note not found") + notesDefined + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- stateMeta' st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = liftM unwords $ many $ try $ do + let sourceURL = fmap unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes @@ -392,7 +400,9 @@ referenceKey = try $ do src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> skipSpaces >> attributes + do guardEnabled Ext_link_attributes + skipSpaces >> optional newline >> skipSpaces + attributes addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines @@ -402,18 +412,22 @@ referenceKey = try $ do let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of - Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" - Nothing -> return () + Just (t,a) | not (t == target && a == attr') -> + -- We don't warn on two duplicate keys if the targets are also + -- the same. This can happen naturally with --reference-location=block + -- or section. See #3701. + logMessage $ DuplicateLinkReference raw pos + _ -> return () updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: MarkdownParser String +referenceTitle :: PandocMonad m => MarkdownParser m String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: Char -> MarkdownParser String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m String quotedTitle c = try $ do char c notFollowedBy spaces @@ -425,7 +439,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -436,23 +450,23 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: MarkdownParser String +noteMarker :: PandocMonad m => MarkdownParser m String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: MarkdownParser String +rawLine :: PandocMonad m => MarkdownParser m String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: MarkdownParser String +rawLines :: PandocMonad m => MarkdownParser m String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -464,26 +478,23 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw - let newnote = (ref, parsed) + parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState - case lookup ref oldnotes of - Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'" + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos Nothing -> return () - updateState $ \s -> s { stateNotes' = newnote : oldnotes } + updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes } return mempty -- -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - tr <- getOption readerTrace - pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock @@ -493,10 +504,10 @@ block = do , header , lhsCodeBlock , divHtml + , divFenced , htmlBlock , table , codeBlockIndented - , guardEnabled Ext_latex_macros *> (macro >>= return . return) , rawTeXBlock , lineBlock , blockQuote @@ -509,30 +520,29 @@ block = do , para , plain ] <?> "block" - when tr $ do - st <- getState - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: PandocMonad m => MarkdownParser m (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxChar :: MarkdownParser Char +atxChar :: PandocMonad m => MarkdownParser m Char atxChar = do exts <- getOption readerExtensions - return $ if Set.member Ext_literate_haskell exts - then '=' else '#' + return $ if extensionEnabled Ext_literate_haskell exts + then '=' + else '#' -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: PandocMonad m => MarkdownParser m (F Blocks) atxHeader = try $ do - level <- atxChar >>= many1 . char >>= return . length + level <- fmap length (atxChar >>= many1 . char) notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list + guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar skipSpaces (text, raw) <- withRaw $ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) @@ -542,7 +552,7 @@ atxHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -atxClosing :: MarkdownParser Attr +atxClosing :: PandocMonad m => MarkdownParser m Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -553,7 +563,7 @@ atxClosing = try $ do blanklines return attr -setextHeaderEnd :: MarkdownParser Attr +setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr setextHeaderEnd = try $ do attr <- option nullAttr $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -561,13 +571,18 @@ setextHeaderEnd = try $ do blanklines return attr -mmdHeaderIdentifier :: MarkdownParser Attr +mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do - ident <- stripFirstAndLast . snd <$> reference + (_, raw) <- reference + let raw' = trim $ stripFirstAndLast raw + let ident = concat $ words $ map toLower raw' + let attr = (ident, [], []) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw' attr skipSpaces - return (ident,[],[]) + return attr -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: PandocMonad m => MarkdownParser m (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. @@ -579,13 +594,13 @@ setextHeader = try $ do underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + let level = fromMaybe 0 (elemIndex underlineChar setextHChars) + 1 attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = @@ -595,7 +610,7 @@ registerImplicitHeader raw attr@(ident, _, _) = do -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -609,20 +624,21 @@ hrule = try $ do -- code blocks -- -indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine = indentSpaces >> anyLineNewline -blockDelimiter :: (Char -> Bool) +blockDelimiter :: PandocMonad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] ParserState m Int blockDelimiter f len = try $ do + skipNonindentSpaces c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length + Nothing -> fmap ((+ 3) . length) (count 3 (char c) >> many (char c)) -attributes :: MarkdownParser Attr +attributes :: PandocMonad m => MarkdownParser m Attr attributes = try $ do char '{' spnl @@ -630,28 +646,28 @@ attributes = try $ do char '}' return $ foldl (\x f -> f x) nullAttr attrs -attribute :: MarkdownParser (Attr -> Attr) +attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: MarkdownParser String +identifier :: PandocMonad m => MarkdownParser m String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: MarkdownParser (Attr -> Attr) +identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do char '#' result <- identifier return $ \(_,cs,kvs) -> (result,cs,kvs) -classAttr :: MarkdownParser (Attr -> Attr) +classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) classAttr = try $ do char '.' result <- identifier return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs) -keyValAttr :: MarkdownParser (Attr -> Attr) +keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' @@ -664,33 +680,53 @@ keyValAttr = try $ do "class" -> (id',cs ++ words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) -specialAttr :: MarkdownParser (Attr -> Attr) +specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + +codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do + indentchars <- nonindentSpaces + let indentLevel = length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill (gobbleAtMostSpaces indentLevel >> anyLine) + (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String toLanguageId = map toLower . go - where go "c++" = "cpp" + where go "c++" = "cpp" go "objective-c" = "objectivec" - go x = x + go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -701,7 +737,7 @@ codeBlockIndented = do return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> @@ -709,7 +745,7 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: MarkdownParser String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -717,13 +753,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: MarkdownParser String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: MarkdownParser String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> MarkdownParser String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -735,7 +771,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -746,10 +782,10 @@ birdTrackLine c = try $ do -- block quotes -- -emailBlockQuoteStart :: MarkdownParser Char +emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: MarkdownParser [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [String] emailBlockQuote = try $ do emailBlockQuoteStart let emailLine = many $ nonEndline <|> try @@ -763,113 +799,136 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" return $ B.blockQuote <$> contents -- -- list blocks -- -bulletListStart :: MarkdownParser () +bulletListStart :: PandocMonad m => MarkdownParser m () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - () <$ atMostSpaces (tabStop - (endpos - startpos)) + gobbleSpaces 1 <|> () <$ lookAhead newline + try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return () -anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do +orderedListStart :: PandocMonad m + => Maybe (ListNumberStyle, ListNumberDelim) + -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) +orderedListStart mbstydelim = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - res <- do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead - char '.' - return (start, DefaultStyle, DefaultDelim) - <|> do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, - -- insist on more than one space - when (delim == Period && (style == UpperAlpha || - (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ - () <$ spaceChar - return (num, style, delim) - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res - -listStart :: MarkdownParser () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) - -listLine :: MarkdownParser String -listLine = try $ do - notFollowedBy' (do indentSpaces - many spaceChar + (do guardDisabled Ext_fancy_lists + start <- many1 digit >>= safeRead + char '.' + gobbleSpaces 1 <|> () <$ lookAhead newline + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (start, DefaultStyle, DefaultDelim)) + <|> + (do (num, style, delim) <- maybe + anyOrderedListMarker + (\(sty,delim) -> (\start -> (start,sty,delim)) <$> + orderedListMarker sty delim) + mbstydelim + gobbleSpaces 1 <|> () <$ lookAhead newline + -- if it could be an abbreviated first name, + -- insist on more than one space + when (delim == Period && (style == UpperAlpha || + (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ + () <$ lookAhead (newline <|> spaceChar) + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (num, style, delim)) + +listStart :: PandocMonad m => MarkdownParser m () +listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) + +listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine continuationIndent = try $ do + notFollowedBy' (do gobbleSpaces continuationIndent + skipMany spaceChar listStart) notFollowedByHtmlCloser - optional (() <$ indentSpaces) + notFollowedByDivCloser + optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: MarkdownParser String +listLineCommon :: PandocMonad m => MarkdownParser m String listLineCommon = concat <$> manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') - <|> liftM snd (htmlTag isCommentTag) + <|> fmap snd (htmlTag isCommentTag) <|> count 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: MarkdownParser a - -> MarkdownParser String -rawListItem start = try $ do +rawListItem :: PandocMonad m + => Bool -- four space rule + -> MarkdownParser m a + -> MarkdownParser m (String, Int) +rawListItem fourSpaceRule start = try $ do + pos1 <- getPosition start + pos2 <- getPosition + let continuationIndent = if fourSpaceRule + then 4 + else sourceColumn pos2 - sourceColumn pos1 first <- listLineCommon - rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine) + rest <- many (do notFollowedBy listStart + notFollowedBy (() <$ codeBlockFenced) + notFollowedBy blankline + listLine continuationIndent) blanks <- many blankline - return $ unlines (first:rest) ++ blanks + let result = unlines (first:rest) ++ blanks + return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: MarkdownParser String -listContinuation = try $ do - lookAhead indentSpaces - result <- many1 listContinuationLine +listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation continuationIndent = try $ do + x <- try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + notFollowedByDivCloser + gobbleSpaces continuationIndent + anyLineNewline + xs <- many $ try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + notFollowedByDivCloser + gobbleSpaces continuationIndent <|> notFollowedBy' listStart + anyLineNewline blanks <- many blankline - return $ concat result ++ blanks + return $ concat (x:xs) ++ blanks + +notFollowedByDivCloser :: PandocMonad m => MarkdownParser m () +notFollowedByDivCloser = + guardDisabled Ext_fenced_divs <|> + do divLevel <- stateFencedDivLevel <$> getState + guard (divLevel < 1) <|> notFollowedBy divFenceEnd -notFollowedByHtmlCloser :: MarkdownParser () +notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () notFollowedByHtmlCloser = do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just t -> notFollowedBy' $ htmlTag (~== TagClose t) Nothing -> return () -listContinuationLine :: MarkdownParser String -listContinuationLine = try $ do - notFollowedBy blankline - notFollowedBy' listStart - notFollowedByHtmlCloser - optional indentSpaces - result <- anyLine - return $ result ++ "\n" - -listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) -listItem start = try $ do - first <- rawListItem start - continuations <- many listContinuation +listItem :: PandocMonad m + => Bool -- four-space rule + -> MarkdownParser m a + -> MarkdownParser m (F Blocks) +listItem fourSpaceRule start = try $ do + (first, continuationIndent) <- rawListItem fourSpaceRule start + continuations <- many (listContinuation continuationIndent) -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -878,39 +937,34 @@ listItem start = try $ do setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may contain various block elements: let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: PandocMonad m => MarkdownParser m (F Blocks) orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart + (start, style, delim) <- lookAhead (orderedListStart Nothing) unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem - ( try $ do - optional newline -- if preceded by Plain block in a list - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - res <- orderedListMarker style delim - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res ) - start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items - -bulletList :: MarkdownParser (F Blocks) + fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule) + <|> return (style == Example) + items <- fmap sequence $ many1 $ listItem fourSpaceRule + (orderedListStart (Just (style, delim))) + start' <- (start <$ guardEnabled Ext_startnum) <|> return 1 + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items + +bulletList :: PandocMonad m => MarkdownParser m (F Blocks) bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule) + <|> return False + items <- fmap sequence $ many1 $ listItem fourSpaceRule bulletListStart + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: MarkdownParser () +defListMarker :: PandocMonad m => MarkdownParser m () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' @@ -921,52 +975,53 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks])) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks . (++"\n")) raw + term <- parseFromString' (trimInlinesF <$> inlines) rawLine' + contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: Bool -> MarkdownParser String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker - firstline <- anyLine + firstline <- anyLineNewline let dline = try ( do notFollowedBy blankline notFollowedByHtmlCloser + notFollowedByDivCloser if compact -- laziness not compatible with compact then () <$ indentSpaces else (() <$ indentSpaces) <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- liftM concat $ many $ try $ do + cont <- fmap concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ + return $ trimr (firstline ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do lookAhead (anyLine >> - optional (blankline >> notFollowedBy (table >> return ())) >> + optional (blankline >> notFollowedBy (Control.Monad.void table)) >> -- don't capture table caption as def list! defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) compactDefinitionList = do guardEnabled Ext_compact_definition_lists items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + return $ B.definitionList <$> fmap compactifyDL items -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) normalDefinitionList = do guardEnabled Ext_definition_lists items <- fmap sequence $ many1 $ definitionListItem False @@ -976,10 +1031,10 @@ normalDefinitionList = do -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: PandocMonad m => MarkdownParser m (F Blocks) para = try $ do exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline + result <- trimInlinesF <$> inlines1 option (B.plain <$> result) $ try $ do newline @@ -997,29 +1052,35 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero + <|> do guardEnabled Ext_fenced_divs + divLevel <- stateFencedDivLevel <$> getState + if divLevel > 0 + then lookAhead divFenceEnd + else mzero return $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `Set.member` exts -> + | not (null alt) && + Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' -plain :: MarkdownParser (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline +plain :: PandocMonad m => MarkdownParser m (F Blocks) +plain = fmap B.plain . trimInlinesF <$> inlines1 -- -- raw html -- -htmlElement :: MarkdownParser String +htmlElement :: PandocMonad m => MarkdownParser m String htmlElement = rawVerbatimBlock <|> strictHtmlBlock - <|> liftM snd (htmlTag isBlockTag) + <|> fmap snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock = do guardEnabled Ext_raw_html try (do @@ -1044,43 +1105,59 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ if null first + then mempty + else return $ B.rawBlock "html" first -strictHtmlBlock :: MarkdownParser String +strictHtmlBlock :: PandocMonad m => MarkdownParser m String strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: MarkdownParser String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m String rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True isVerbTag (TagOpen "script" _) = True isVerbTag _ = False -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> - rawConTeXtEnvironment `sepEndBy1` blankline) - spaces - return $ return result - -rawHtmlBlocks :: MarkdownParser (F Blocks) + lookAhead $ try $ char '\\' >> letter + result <- (B.rawBlock "context" . trim . concat <$> + many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand) + <*> spnl')) + <|> (B.rawBlock "latex" . trim . concat <$> + many1 ((++) <$> rawLaTeXBlock <*> spnl')) + return $ case B.toList result of + [RawBlock _ cs] + | all (`elem` [' ','\t','\n']) cs -> return mempty + -- don't create a raw block for suppressed macro defs + _ -> return result + +conTeXtCommand :: PandocMonad m => MarkdownParser m String +conTeXtCommand = oneOfStrings ["\\placeformula"] + +rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + -- we don't want '<td> text' to be a code block: + skipMany spaceChar + indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) + let block' = do notFollowedBy' closer + gobbleAtMostSpaces indentlevel + block + contents <- mconcat <$> many block' result <- (closer >>= \(_, rawcloser) -> return ( return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> @@ -1101,11 +1178,11 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) + mapM (parseFromString' (trimInlinesF <$> inlines)) return $ B.lineBlock <$> sequence lines' -- @@ -1114,8 +1191,9 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: PandocMonad m + => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1125,8 +1203,9 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1137,17 +1216,17 @@ simpleTableHeader headless = try $ do let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + let aligns = zipWith alignType (map (: []) rawHeads) lengths let rawHeads' = if headless then replicate (length dashes) "" else rawHeads heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' + $ + mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1161,25 +1240,26 @@ alignType strLst len = let nonempties = filter (not . null) $ map trimr strLst (leftSpace, rightSpace) = case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: MarkdownParser String +tableFooter :: PandocMonad m => MarkdownParser m String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: MarkdownParser Char +tableSep :: PandocMonad m => MarkdownParser m Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. -rawTableLine :: [Int] - -> MarkdownParser [String] +rawTableLine :: PandocMonad m + => [Int] + -> MarkdownParser m [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -1187,31 +1267,34 @@ rawTableLine indices = do splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine :: [Int] - -> MarkdownParser (F [Blocks]) +tableLine :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + fmap sequence . mapM (parseFromString' (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). -multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) +multilineRow :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: PandocMonad m => MarkdownParser m (F Inlines) tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces - string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines + (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:" + trimInlinesF <$> inlines1 <* blanklines -- Parse a simple table with '---' header and one line per row. -simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1224,13 +1307,15 @@ simpleTable headless = do -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +multilineTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter -multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +multilineTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1243,7 +1328,7 @@ multilineTableHeader headless = try $ do let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' rawHeadsList <- if headless - then liftM (map (:[]) . tail . + then fmap (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map (tail . splitStringByIndices (init indices)) @@ -1253,101 +1338,18 @@ multilineTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ - mapM (parseFromString (mconcat <$> many plain)) $ - map trim rawHeads + mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices) -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: Char -> Parser [Char] st ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: Char -> Parser [Char] st [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: Char -> MarkdownParser Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) +gridTable :: PandocMonad m => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable headless = gridTableWith' parseBlocks headless -gridTableRawLine :: [Int] -> MarkdownParser [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: MarkdownParser [Char] -gridTableFooter = blanklines - -pipeBreak :: MarkdownParser ([Alignment], [Int]) +pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1359,7 +1361,7 @@ pipeBreak = try $ do blankline return $ unzip (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar @@ -1372,41 +1374,40 @@ pipeTable = try $ do numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> - fromIntegral (len + 1) / fromIntegral numColumns) - seplengths + fromIntegral len / fromIntegral (sum seplengths)) + seplengths else replicate (length aligns) 0.0 - return $ (aligns, widths, heads', sequence lines'') + return (aligns, widths, heads', sequence lines'') -sepPipe :: MarkdownParser () +sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) pipeTableRow = try $ do scanForPipe skipMany spaceChar openPipe <- (True <$ char '|') <|> return False -- split into cells - let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') + let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= - parseFromString pipeTableCell - cells <- cellContents `sepEndBy1` (char '|') + parseFromString' pipeTableCell + cells <- cellContents `sepEndBy1` char '|' -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) blankline return $ sequence cells -pipeTableCell :: MarkdownParser (F Blocks) -pipeTableCell = do - result <- many inline - if null result - then return mempty - else return $ B.plain . mconcat <$> sequence result +pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) +pipeTableCell = + (do result <- inlines1 + return $ B.plain <$> result) + <|> return mempty -pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1414,15 +1415,15 @@ pipeTableHeaderPart = try $ do right <- optionMaybe (char ':') skipMany spaceChar let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right - return $ - ((case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter), len) + return + (case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: PandocMonad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1432,22 +1433,23 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) - -> MarkdownParser sep - -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith :: PandocMonad m + => MarkdownParser m (F [Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser m (F [Blocks])) + -> MarkdownParser m sep + -> MarkdownParser m end + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) + let widths = if null indices then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') + return (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: PandocMonad m => MarkdownParser m (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1462,8 +1464,8 @@ table = try $ do (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption - Just c -> return c + Nothing -> option (return mempty) tableCaption + Just c -> return c -- renormalize widths if greater than 100%: let totalWidth = sum widths let widths' = if totalWidth < 1 @@ -1479,7 +1481,13 @@ table = try $ do -- inline -- -inline :: MarkdownParser (F Inlines) +inlines :: PandocMonad m => MarkdownParser m (F Inlines) +inlines = mconcat <$> many inline + +inlines1 :: PandocMonad m => MarkdownParser m (F Inlines) +inlines1 = mconcat <$> many1 inline + +inline :: PandocMonad m => MarkdownParser m (F Inlines) inline = choice [ whitespace , bareURL , str @@ -1499,6 +1507,7 @@ inline = choice [ whitespace , autoLink , spanHtml , rawHtmlInline + , escapedNewline , escapedChar , rawLaTeXInline' , exampleRef @@ -1509,32 +1518,36 @@ inline = choice [ whitespace , ltSign ] <?> "inline" -escapedChar' :: MarkdownParser Char +escapedChar' :: PandocMonad m => MarkdownParser m Char escapedChar' = try $ do char '\\' (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> (guardEnabled Ext_angle_brackets_escapable >> oneOf "\\`*_{}[]()>#+-.!~\"<>") - <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines) +escapedNewline = try $ do + guardEnabled Ext_escaped_line_breaks + char '\\' + lookAhead (char '\n') -- don't consume the newline (see #3730) + return $ return B.linebreak + +escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do result <- escapedChar' case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + _ -> return $ return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' return $ return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' @@ -1542,10 +1555,10 @@ exampleRef = try $ do return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' @@ -1554,28 +1567,36 @@ symbol = do return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result - -math :: MarkdownParser (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> - ((getOption readerSmart >>= guard) *> (return <$> apostrophe) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result + +math :: PandocMonad m => MarkdownParser m (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> + (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. -enclosure :: Char - -> MarkdownParser (F Inlines) +enclosure :: PandocMonad m + => Char + -> MarkdownParser m (F Inlines) enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1584,14 +1605,14 @@ enclosure c = do <|> (guard =<< notAfterString) cs <- many1 (char c) (return (B.str cs) <>) <$> whitespace - <|> do + <|> case length cs of - 3 -> three c - 2 -> two c mempty - 1 -> one c mempty - _ -> return (return $ B.str cs) + 3 -> three c + 2 -> two c mempty + 1 -> one c mempty + _ -> return (return $ B.str cs) -ender :: Char -> Int -> MarkdownParser () +ender :: PandocMonad m => Char -> Int -> MarkdownParser m () ender c n = try $ do count n (char c) guard (c == '*') @@ -1602,104 +1623,96 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) + (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) + <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) + (ender c 2 >> updateLastStrPos >> + return (B.strong <$> (prefix' <> contents))) <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) + (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' -- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (Show b) - => MarkdownParser a - -> MarkdownParser b - -> MarkdownParser (F Inlines) +inlinesBetween :: PandocMonad m + => (Show b) + => MarkdownParser m a + -> MarkdownParser m b + -> MarkdownParser m (F Inlines) inlinesBetween start end = (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) +strikeout :: PandocMonad m => MarkdownParser m (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) +superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = fmap B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) +subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = fmap B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) +whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: PandocMonad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 alphaNum + result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos - let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - isSmart <- getOption readerSmart - if isSmart - then case likelyAbbrev result of - [] -> return $ return $ B.str result - xs -> choice (map (\x -> - try (string x >> oneOf " \n" >> - lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result - --- | if the string matches the beginning of an abbreviation (before --- the first period, return strings that would finish the abbreviation. -likelyAbbrev :: String -> [String] -likelyAbbrev x = - let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", - "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", - "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", - "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", - "ch.", "sec.", "cf.", "cp."] - abbrPairs = map (break (=='.')) abbrevs - in map snd $ filter (\(y,_) -> y == x) abbrPairs + (do guardEnabled Ext_smart + abbrevs <- getOption readerAbbreviations + if not (null result) && last result == '.' && result `Set.member` abbrevs + then try (do ils <- whitespace <|> endline + lookAhead alphaNum + return $ do + ils' <- ils + if ils' == B.space + then return (B.str result <> B.str "\160") + else -- linebreak or softbreak + return (ils' <> B.str result <> B.str "\160")) + <|> return (return (B.str result)) + else return (return (B.str result))) + <|> return (return (B.str result)) -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: PandocMonad m => MarkdownParser m (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1711,6 +1724,7 @@ endline = try $ do guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser + notFollowedByDivCloser (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) @@ -1721,23 +1735,25 @@ endline = try $ do -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) -reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets +reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference = do + guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") + guardDisabled Ext_citations <|> notFollowedBy' (string "[@") + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: MarkdownParser [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] parenthesizedChars = do result <- charsInBalanced '(' ')' litChar return $ '(' : result ++ ")" -- source for a link, with optional title -source :: MarkdownParser (String, String) +source :: PandocMonad m => MarkdownParser m (String, String) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> (count 1 litChar)) + <|> (notFollowedBy (oneOf " )") >> count 1 litChar) <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) let sourceURL = (unwords . words . concat) <$> many urlChunk let betweenAngles = try $ @@ -1748,10 +1764,10 @@ source = do char ')' return (escapeURI $ trimr src, tit) -linkTitle :: MarkdownParser String +linkTitle :: PandocMonad m => MarkdownParser m String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: PandocMonad m => MarkdownParser m (F Inlines) link = try $ do st <- getState guard $ stateAllowLinks st @@ -1760,21 +1776,31 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -bracketedSpan :: MarkdownParser (F Inlines) +bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines) bracketedSpan = try $ do guardEnabled Ext_bracketed_spans (lab,_) <- reference attr <- attributes - let (ident,classes,keyvals) = attr - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> lab - _ -> return $ B.spanWith attr <$> lab - -regLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) + return $ if isSmallCaps attr + then B.smallcaps <$> lab + else B.spanWith attr <$> lab + +-- | We treat a span as SmallCaps if class is "smallcaps" (and +-- no other attributes are set or if style is "font-variant:small-caps" +-- (and no other attributes are set). +isSmallCaps :: Attr -> Bool +isSmallCaps ("",["smallcaps"],[]) = True +isSmallCaps ("",[],kvs) = + case lookup "style" kvs of + Just s -> map toLower (filter (`notElem` " \t;") s) == + "font-variant:small-caps" + Nothing -> False +isSmallCaps _ = False + +regLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> F Inlines + -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ @@ -1782,20 +1808,24 @@ regLink constructor lab = try $ do return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) +referenceLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> (F Inlines, String) + -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (_,raw') <- option (mempty, "") $ - lookAhead (try (guardEnabled Ext_citations >> - spnl >> normalCite >> return (mempty, ""))) + lookAhead (try (do guardEnabled Ext_citations + guardDisabled Ext_spaced_reference_links <|> spnl + normalCite + return (mempty, ""))) <|> - try (spnl >> reference) + try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' - parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + parsedRaw <- parseFromString' inlines raw' + fallback <- parseFromString' inlines $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1820,11 +1850,11 @@ referenceLink constructor (lab, raw) = do dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB where dropRB (']':xs) = xs - dropRB xs = xs + dropRB xs = xs dropLB ('[':xs) = xs - dropLB xs = xs + dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks @@ -1832,7 +1862,7 @@ bareURL = try $ do notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink = try $ do getState >>= guard . stateAllowLinks char '<' @@ -1846,7 +1876,7 @@ autoLink = try $ do guardEnabled Ext_link_attributes >> attributes return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference @@ -1856,54 +1886,55 @@ image = try $ do _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: PandocMonad m => MarkdownParser m (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker + updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) } return $ do notes <- asksF stateNotes' - case lookup ref notes of + case M.lookup ref notes of Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do + Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve -- notes, to avoid infinite looping with notes inside -- notes: - let contents' = runF contents st{ stateNotes' = [] } + let contents' = runF contents st{ stateNotes' = M.empty } return $ B.note contents' -inlineNote :: MarkdownParser (F Inlines) +inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets return $ B.note . B.para <$> contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines) rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + lookAhead $ try $ char '\\' >> letter + notFollowedBy' rawConTeXtEnvironment + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> (many1 letter) - contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar)) + <|> many1 letter + contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1911,14 +1942,11 @@ spanHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents - -divHtml :: MarkdownParser (F Blocks) + return $ if isSmallCaps (ident, classes, keyvals) + then B.smallcaps <$> contents + else B.spanWith (ident, classes, keyvals) <$> contents + +divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1940,7 +1968,29 @@ divHtml = try $ do else -- avoid backtracing return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +divFenced :: PandocMonad m => MarkdownParser m (F Blocks) +divFenced = try $ do + guardEnabled Ext_fenced_divs + string ":::" + skipMany (char ':') + skipMany spaceChar + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + skipMany spaceChar + skipMany (char ':') + blankline + updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } + bs <- mconcat <$> manyTill block divFenceEnd + updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } + return $ B.divWith attribs <$> bs + +divFenceEnd :: PandocMonad m => MarkdownParser m () +divFenceEnd = try $ do + string ":::" + skipMany (char ':') + blanklines + return () + +rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1962,7 +2012,7 @@ rawHtmlInline = do emojiChars :: [Char] emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] -emoji :: MarkdownParser (F Inlines) +emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' @@ -1974,21 +2024,22 @@ emoji = try $ do -- Citations -cite :: MarkdownParser (F Inlines) +cite :: PandocMonad m => MarkdownParser m (F Inlines) cite = do guardEnabled Ext_citations - citations <- textualCite + textualCite <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs - return citations + return $ flip B.cite (B.text raw) <$> cs -textualCite :: MarkdownParser (F Inlines) +textualCite :: PandocMonad m => MarkdownParser m (F Inlines) textualCite = try $ do - (_, key) <- citeKey + (suppressAuthor, key) <- citeKey let first = Citation{ citationId = key , citationPrefix = [] , citationSuffix = [] - , citationMode = AuthorInText + , citationMode = if suppressAuthor + then SuppressAuthor + else AuthorInText , citationNoteNum = 0 , citationHash = 0 } @@ -2003,7 +2054,7 @@ textualCite = try $ do let (spaces',raw') = span isSpace raw spc | null spaces' = mempty | otherwise = B.space - lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' + lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback @@ -2017,7 +2068,7 @@ textualCite = try $ do Just n -> B.str (show n) _ -> B.cite [first] $ B.str $ '@':key) -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do spnl char '[' @@ -2032,7 +2083,7 @@ bareloc c = try $ do rest' <- rest return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: MarkdownParser (F [Citation]) +normalCite :: PandocMonad m => MarkdownParser m (F [Citation]) normalCite = try $ do char '[' spnl @@ -2041,7 +2092,7 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: PandocMonad m => MarkdownParser m (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl @@ -2050,14 +2101,14 @@ suffix = try $ do then (B.space <>) <$> rest else rest -prefix :: MarkdownParser (F Inlines) +prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) + manyTill inline (char ']' <|> fmap (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) +citeList :: PandocMonad m => MarkdownParser m (F [Citation]) citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: PandocMonad m => MarkdownParser m (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -2065,23 +2116,23 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser (F Inlines) + return Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: PandocMonad m => MarkdownParser m (F Inlines) smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [apostrophe, dash, ellipses]) -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ @@ -2091,10 +2142,10 @@ singleQuoted = try $ do -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return + withQuoteContext InDoubleQuote (doubleQuoteEnd >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + <|> return (return (B.str "\8220") <> contents) |