summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1449
1 files changed, 750 insertions, 699 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index cd35a8738..14cf73de4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
+
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,63 +30,55 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown ( readMarkdown,
- readMarkdownWithWarnings ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-import Data.List ( transpose, sortBy, findIndex, intercalate )
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
+import qualified Data.HashMap.Strict as H
+import Data.List (intercalate, sortBy, transpose, elemIndex)
import qualified Data.Map as M
-import Data.Scientific (coefficient, base10Exponent)
-import Data.Ord ( comparing )
-import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
import Data.Maybe
-import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
-import Text.Pandoc.Generic (bottomUp)
-import qualified Data.Text as T
+import Data.Monoid ((<>))
+import Data.Ord (comparing)
+import Data.Scientific (base10Exponent, coefficient)
+import qualified Data.Set as Set
import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Vector as V
+import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
import qualified Data.Yaml as Yaml
-import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..))
-import qualified Data.HashMap.Strict as H
+import System.FilePath (addExtension, takeExtension)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Data.Vector as V
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import Text.Pandoc.Class (PandocMonad (..), report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (tableWith)
+import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
+ isCommentTag, isInlineTag, isTextTag)
+import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
-import Text.Pandoc.Pretty (charWidth)
+import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
- isTextTag, isCommentTag )
-import Control.Monad
-import System.FilePath (takeExtension, addExtension)
-import Text.HTML.TagSoup
-import qualified Data.Set as Set
-import Text.Printf (printf)
-import Debug.Trace (trace)
-import Data.Monoid ((<>))
-import Text.Pandoc.Error
-type MarkdownParser = Parser [Char] ParserState
+type MarkdownParser m = ParserT [Char] ParserState m
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readMarkdown opts s =
- (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
-
--- | Read markdown from an input string and return a pair of a Pandoc document
--- and a list of warnings.
-readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError (Pandoc, [String])
-readMarkdownWithWarnings opts s =
- (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
+readMarkdown :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readMarkdown opts s = do
+ parsed <- readWithM parseMarkdown def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
--
-- Constants and data structure definitions
@@ -117,44 +110,43 @@ isBlank _ = False
--
-- | Succeeds when we're in list context.
-inList :: MarkdownParser ()
+inList :: PandocMonad m => MarkdownParser m ()
inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: Parser [Char] st ()
+spnl :: PandocMonad m => ParserT [Char] st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-indentSpaces :: MarkdownParser String
+spnl' :: PandocMonad m => ParserT [Char] st m String
+spnl' = try $ do
+ xs <- many spaceChar
+ ys <- option "" $ try $ (:) <$> newline
+ <*> (many spaceChar <* notFollowedBy (char '\n'))
+ return (xs ++ ys)
+
+indentSpaces :: PandocMonad m => MarkdownParser m String
indentSpaces = try $ do
tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
-nonindentSpaces :: MarkdownParser String
+nonindentSpaces :: PandocMonad m => MarkdownParser m String
nonindentSpaces = do
- tabStop <- getOption readerTabStop
- sps <- many (char ' ')
- if length sps < tabStop
- then return sps
- else unexpected "indented line"
+ n <- skipNonindentSpaces
+ return $ replicate n ' '
-- returns number of spaces parsed
-skipNonindentSpaces :: MarkdownParser Int
+skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
skipNonindentSpaces = do
tabStop <- getOption readerTabStop
- atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ')
+ gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar
-atMostSpaces :: Int -> MarkdownParser Int
-atMostSpaces n
- | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
- | otherwise = return 0
-
-litChar :: MarkdownParser Char
+litChar :: PandocMonad m => MarkdownParser m Char
litChar = escapedChar'
<|> characterReference
<|> noneOf "\n"
@@ -162,30 +154,32 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
-inlinesInBalancedBrackets = do
- char '['
- (_, raw) <- withRaw $ charsInBalancedBrackets 1
- guard $ not $ null raw
- parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
-
-charsInBalancedBrackets :: Int -> MarkdownParser ()
-charsInBalancedBrackets 0 = return ()
-charsInBalancedBrackets openBrackets =
- (char '[' >> charsInBalancedBrackets (openBrackets + 1))
- <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
- <|> (( (() <$ code)
- <|> (() <$ (escapedChar'))
- <|> (newline >> notFollowedBy blankline)
- <|> skipMany1 (noneOf "[]`\n\\")
- <|> (() <$ count 1 (oneOf "`\\"))
- ) >> charsInBalancedBrackets openBrackets)
+inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
+inlinesInBalancedBrackets =
+ try $ char '[' >> withRaw (go 1) >>=
+ parseFromString inlines . stripBracket . snd
+ where stripBracket [] = []
+ stripBracket xs = if last xs == ']' then init xs else xs
+ go :: PandocMonad m => Int -> MarkdownParser m ()
+ go 0 = return ()
+ go openBrackets =
+ (() <$ (escapedChar <|>
+ code <|>
+ rawHtmlInline <|>
+ rawLaTeXInline') >> go openBrackets)
+ <|>
+ (do char ']'
+ Control.Monad.when (openBrackets > 1) $ go (openBrackets - 1))
+ <|>
+ (char '[' >> go (openBrackets + 1))
+ <|>
+ (anyChar >> go openBrackets)
--
-- document structure
--
-rawTitleBlockLine :: MarkdownParser String
+rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
rawTitleBlockLine = do
char '%'
skipSpaces
@@ -196,13 +190,13 @@ rawTitleBlockLine = do
anyLine
return $ trim $ unlines (first:rest)
-titleLine :: MarkdownParser (F Inlines)
+titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
+ res <- parseFromString' inlines raw
+ return $ trimInlinesF res
-authorsLine :: MarkdownParser (F [Inlines])
+authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
authorsLine = try $ do
raw <- rawTitleBlockLine
let sep = (char ';' <* spaces) <|> newline
@@ -210,18 +204,18 @@ authorsLine = try $ do
(trimInlinesF . mconcat <$> many
(try $ notFollowedBy sep >> inline))
sep
- sequence <$> parseFromString pAuthors raw
+ sequence <$> parseFromString' pAuthors raw
-dateLine :: MarkdownParser (F Inlines)
+dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
dateLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
+ res <- parseFromString' inlines raw
+ return $ trimInlinesF res
-titleBlock :: MarkdownParser ()
+titleBlock :: PandocMonad m => MarkdownParser m ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
-pandocTitleBlock :: MarkdownParser ()
+pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
@@ -239,7 +233,8 @@ pandocTitleBlock = try $ do
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-yamlMetaBlock :: MarkdownParser (F Blocks)
+
+yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
pos <- getPosition
@@ -250,87 +245,105 @@ yamlMetaBlock = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- opts <- stateOptions <$> getState
- meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> return $ return $
- H.foldrWithKey (\k v m ->
- if ignorable k
- then m
- else case yamlToMeta opts v of
- Left _ -> m
- Right v' -> B.setMeta (T.unpack k) v' m)
- nullMeta hashmap
- Right Yaml.Null -> return $ return nullMeta
- Right _ -> do
- addWarning (Just pos) "YAML header is not an object"
- return $ return nullMeta
- Left err' -> do
- case err' of
- InvalidYaml (Just YamlParseException{
- yamlProblem = problem
- , yamlContext = _ctxt
- , yamlProblemMark = Yaml.YamlMark {
- yamlLine = yline
- , yamlColumn = ycol
- }}) ->
- addWarning (Just $ setSourceLine
- (setSourceColumn pos
- (sourceColumn pos + ycol))
- (sourceLine pos + 1 + yline))
- $ "Could not parse YAML header: " ++
- problem
- _ -> addWarning (Just pos)
- $ "Could not parse YAML header: " ++
- show err'
- return $ return nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+ case Yaml.decodeEither' $ UTF8.fromString rawYaml of
+ Right (Yaml.Object hashmap) -> do
+ let alist = H.toList hashmap
+ mapM_ (\(k, v) ->
+ if ignorable k
+ then return ()
+ else do
+ v' <- yamlToMeta v
+ let k' = T.unpack k
+ updateState $ \st -> st{ stateMeta' =
+ do m <- stateMeta' st
+ -- if there's already a value, leave it unchanged
+ case lookupMeta k' m of
+ Just _ -> return m
+ Nothing -> do
+ v'' <- v'
+ return $ B.setMeta (T.unpack k) v'' m}
+ ) alist
+ Right Yaml.Null -> return ()
+ Right _ -> do
+ logMessage $
+ CouldNotParseYamlMetadata "not an object"
+ pos
+ return ()
+ Left err' -> do
+ case err' of
+ InvalidYaml (Just YamlParseException{
+ yamlProblem = problem
+ , yamlContext = _ctxt
+ , yamlProblemMark = Yaml.YamlMark {
+ yamlLine = yline
+ , yamlColumn = ycol
+ }}) ->
+ logMessage $ CouldNotParseYamlMetadata
+ problem (setSourceLine
+ (setSourceColumn pos
+ (sourceColumn pos + ycol))
+ (sourceLine pos + 1 + yline))
+ _ -> logMessage $ CouldNotParseYamlMetadata
+ (show err') pos
+ return ()
return mempty
-- ignore fields ending with _
ignorable :: Text -> Bool
ignorable t = (T.pack "_") `T.isSuffixOf` t
-toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
-toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
- where
- toMeta p =
- case p of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
- | endsWithNewline x -> MetaBlocks [Para xs]
- | otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
- opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
- meta_exts = Set.fromList [ Ext_pandoc_title_block
- , Ext_mmd_title_block
- , Ext_yaml_metadata_block
- ]
-
-yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
-yamlToMeta opts (Yaml.String t) = toMetaValue opts t
-yamlToMeta _ (Yaml.Number n)
+toMetaValue :: PandocMonad m
+ => Text -> MarkdownParser m (F MetaValue)
+toMetaValue x =
+ parseFromString' parser' (T.unpack x)
+ where parser' = (asInlines <$> ((trimInlinesF . mconcat)
+ <$> try (guard (not endsWithNewline)
+ *> manyTill inline eof)))
+ <|> (asBlocks <$> parseBlocks)
+ asBlocks p = do
+ p' <- p
+ return $ MetaBlocks (B.toList p')
+ asInlines p = do
+ p' <- p
+ return $ MetaInlines (B.toList p')
+ endsWithNewline = T.pack "\n" `T.isSuffixOf` x
+ -- Note: a standard quoted or unquoted YAML value will
+ -- not end in a newline, but a "block" set off with
+ -- `|` or `>` will.
+
+yamlToMeta :: PandocMonad m
+ => Yaml.Value -> MarkdownParser m (F MetaValue)
+yamlToMeta (Yaml.String t) = toMetaValue t
+yamlToMeta (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = return $ MetaString $ show
+ | base10Exponent n >= 0 = return $ return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
- | otherwise = return $ MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
- (V.toList xs)
-yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
- if ignorable k
- then m
- else (do
- v' <- yamlToMeta opts v
- m' <- m
- return (M.insert (T.unpack k) v' m')))
- (return M.empty) o
-yamlToMeta _ _ = return $ MetaString ""
-
-stopLine :: MarkdownParser ()
+ | otherwise = return $ return $ MetaString $ show n
+yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
+yamlToMeta (Yaml.Array xs) = do
+ xs' <- mapM yamlToMeta (V.toList xs)
+ return $ do
+ xs'' <- sequence xs'
+ return $ B.toMetaValue xs''
+yamlToMeta (Yaml.Object o) = do
+ let alist = H.toList o
+ foldM (\m (k,v) ->
+ if ignorable k
+ then return m
+ else do
+ v' <- yamlToMeta v
+ return $ do
+ MetaMap m' <- m
+ v'' <- v'
+ return (MetaMap $ M.insert (T.unpack k) v'' m'))
+ (return $ MetaMap M.empty)
+ alist
+yamlToMeta _ = return $ return $ MetaString ""
+
+stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-mmdTitleBlock :: MarkdownParser ()
+mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
firstPair <- kvPair False
@@ -340,49 +353,44 @@ mmdTitleBlock = try $ do
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: Bool -> MarkdownParser (String, MetaValue)
+kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
kvPair allowEmpty = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
val <- trim <$> manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
guard $ allowEmpty || not (null val)
let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
+ let val' = MetaBlocks $ B.toList $ B.plain $B.text val
return (key',val')
-parseMarkdown :: MarkdownParser Pandoc
+parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
parseMarkdown = do
- -- markdown allows raw HTML
- updateState $ \state -> state { stateOptions =
- let oldOpts = stateOptions state in
- oldOpts{ readerParseRaw = True } }
optional titleBlock
blocks <- parseBlocks
st <- getState
- let meta = runF (stateMeta' st) st
- let Pandoc _ bs = B.doc $ runF blocks st
- eastAsianLineBreaks <- option False $
- True <$ guardEnabled Ext_east_asian_line_breaks
- return $ (if eastAsianLineBreaks
- then bottomUp softBreakFilter
- else id) $ Pandoc meta bs
-
-softBreakFilter :: [Inline] -> [Inline]
-softBreakFilter (x:SoftBreak:y:zs) =
- case (stringify x, stringify y) of
- (xs@(_:_), (c:_))
- | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
- _ -> x:SoftBreak:y:zs
-softBreakFilter xs = xs
-
-referenceKey :: MarkdownParser (F Blocks)
+ -- check for notes with no corresponding note references
+ let notesUsed = stateNoteRefs st
+ let notesDefined = M.keys (stateNotes' st)
+ mapM_ (\n -> unless (n `Set.member` notesUsed) $
+ case M.lookup n (stateNotes' st) of
+ Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
+ Nothing -> throwError $
+ PandocShouldNeverHappenError "note not found")
+ notesDefined
+ let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+ meta <- stateMeta' st
+ return $ Pandoc meta bs) st
+ reportLogMessages
+ return doc
+
+referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL = liftM unwords $ many $ try $ do
+ let sourceURL = fmap unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
@@ -392,7 +400,9 @@ referenceKey = try $ do
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
- guardEnabled Ext_link_attributes >> skipSpaces >> attributes
+ do guardEnabled Ext_link_attributes
+ skipSpaces >> optional newline >> skipSpaces
+ attributes
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
@@ -402,18 +412,22 @@ referenceKey = try $ do
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
- Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
- Nothing -> return ()
+ Just (t,a) | not (t == target && a == attr') ->
+ -- We don't warn on two duplicate keys if the targets are also
+ -- the same. This can happen naturally with --reference-location=block
+ -- or section. See #3701.
+ logMessage $ DuplicateLinkReference raw pos
+ _ -> return ()
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
-referenceTitle :: MarkdownParser String
+referenceTitle :: PandocMonad m => MarkdownParser m String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-- A link title in quotes
-quotedTitle :: Char -> MarkdownParser String
+quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
quotedTitle c = try $ do
char c
notFollowedBy spaces
@@ -425,7 +439,7 @@ quotedTitle c = try $ do
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
-- an abbreviation.
-abbrevKey :: MarkdownParser (F Blocks)
+abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
@@ -436,23 +450,23 @@ abbrevKey = do
blanklines
return $ return mempty
-noteMarker :: MarkdownParser String
+noteMarker :: PandocMonad m => MarkdownParser m String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-rawLine :: MarkdownParser String
+rawLine :: PandocMonad m => MarkdownParser m String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: MarkdownParser String
+rawLines :: PandocMonad m => MarkdownParser m String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: MarkdownParser (F Blocks)
+noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -464,26 +478,23 @@ noteBlock = try $ do
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
- parsed <- parseFromString parseBlocks raw
- let newnote = (ref, parsed)
+ parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
- case lookup ref oldnotes of
- Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'"
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
Nothing -> return ()
- updateState $ \s -> s { stateNotes' = newnote : oldnotes }
+ updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes }
return mempty
--
-- parsing blocks
--
-parseBlocks :: MarkdownParser (F Blocks)
+parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
-block :: MarkdownParser (F Blocks)
+block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
- tr <- getOption readerTrace
- pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
@@ -493,10 +504,10 @@ block = do
, header
, lhsCodeBlock
, divHtml
+ , divFenced
, htmlBlock
, table
, codeBlockIndented
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
, rawTeXBlock
, lineBlock
, blockQuote
@@ -509,30 +520,29 @@ block = do
, para
, plain
] <?> "block"
- when tr $ do
- st <- getState
- trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList $ runF res st)) (return ())
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
return res
--
-- header blocks
--
-header :: MarkdownParser (F Blocks)
+header :: PandocMonad m => MarkdownParser m (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
-atxChar :: MarkdownParser Char
+atxChar :: PandocMonad m => MarkdownParser m Char
atxChar = do
exts <- getOption readerExtensions
- return $ if Set.member Ext_literate_haskell exts
- then '=' else '#'
+ return $ if extensionEnabled Ext_literate_haskell exts
+ then '='
+ else '#'
-atxHeader :: MarkdownParser (F Blocks)
+atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
atxHeader = try $ do
- level <- atxChar >>= many1 . char >>= return . length
+ level <- fmap length (atxChar >>= many1 . char)
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
+ guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar
skipSpaces
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
@@ -542,7 +552,7 @@ atxHeader = try $ do
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-atxClosing :: MarkdownParser Attr
+atxClosing :: PandocMonad m => MarkdownParser m Attr
atxClosing = try $ do
attr' <- option nullAttr
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
@@ -553,7 +563,7 @@ atxClosing = try $ do
blanklines
return attr
-setextHeaderEnd :: MarkdownParser Attr
+setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
setextHeaderEnd = try $ do
attr <- option nullAttr
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
@@ -561,13 +571,18 @@ setextHeaderEnd = try $ do
blanklines
return attr
-mmdHeaderIdentifier :: MarkdownParser Attr
+mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
- ident <- stripFirstAndLast . snd <$> reference
+ (_, raw) <- reference
+ let raw' = trim $ stripFirstAndLast raw
+ let ident = concat $ words $ map toLower raw'
+ let attr = (ident, [], [])
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw' attr
skipSpaces
- return (ident,[],[])
+ return attr
-setextHeader :: MarkdownParser (F Blocks)
+setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
@@ -579,13 +594,13 @@ setextHeader = try $ do
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
+ let level = fromMaybe 0 (elemIndex underlineChar setextHChars) + 1
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: String -> Attr -> MarkdownParser ()
+registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _) = do
let key = toKey $ "[" ++ raw ++ "]"
updateState (\s -> s { stateHeaderKeys =
@@ -595,7 +610,7 @@ registerImplicitHeader raw attr@(ident, _, _) = do
-- hrule block
--
-hrule :: Parser [Char] st (F Blocks)
+hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -609,20 +624,21 @@ hrule = try $ do
-- code blocks
--
-indentedLine :: MarkdownParser String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+indentedLine :: PandocMonad m => MarkdownParser m String
+indentedLine = indentSpaces >> anyLineNewline
-blockDelimiter :: (Char -> Bool)
+blockDelimiter :: PandocMonad m
+ => (Char -> Bool)
-> Maybe Int
- -> Parser [Char] st Int
+ -> ParserT [Char] ParserState m Int
blockDelimiter f len = try $ do
+ skipNonindentSpaces
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
+ Nothing -> fmap ((+ 3) . length) (count 3 (char c) >> many (char c))
-attributes :: MarkdownParser Attr
+attributes :: PandocMonad m => MarkdownParser m Attr
attributes = try $ do
char '{'
spnl
@@ -630,28 +646,28 @@ attributes = try $ do
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
-attribute :: MarkdownParser (Attr -> Attr)
+attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
-identifier :: MarkdownParser String
+identifier :: PandocMonad m => MarkdownParser m String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
-identifierAttr :: MarkdownParser (Attr -> Attr)
+identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
char '#'
result <- identifier
return $ \(_,cs,kvs) -> (result,cs,kvs)
-classAttr :: MarkdownParser (Attr -> Attr)
+classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
classAttr = try $ do
char '.'
result <- identifier
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
-keyValAttr :: MarkdownParser (Attr -> Attr)
+keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
@@ -664,33 +680,53 @@ keyValAttr = try $ do
"class" -> (id',cs ++ words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
-specialAttr :: MarkdownParser (Attr -> Attr)
+specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-codeBlockFenced :: MarkdownParser (F Blocks)
+rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute = do
+ char '{'
+ skipMany spaceChar
+ char '='
+ format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ skipMany spaceChar
+ char '}'
+ return format
+
+codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
+ indentchars <- nonindentSpaces
+ let indentLevel = length indentchars
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
- attr <- option ([],[],[]) $
- try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
blankline
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+ contents <- intercalate "\n" <$>
+ manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
+ (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawBlock syn contents
+ Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
toLanguageId = map toLower . go
- where go "c++" = "cpp"
+ where go "c++" = "cpp"
go "objective-c" = "objectivec"
- go x = x
+ go x = x
-codeBlockIndented :: MarkdownParser (F Blocks)
+codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
@@ -701,7 +737,7 @@ codeBlockIndented = do
return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: MarkdownParser (F Blocks)
+lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
(return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
@@ -709,7 +745,7 @@ lhsCodeBlock = do
<|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: MarkdownParser String
+lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
@@ -717,13 +753,13 @@ lhsCodeBlockLaTeX = try $ do
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: MarkdownParser String
+lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: MarkdownParser String
+lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: Char -> MarkdownParser String
+lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -735,7 +771,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> Parser [Char] st String
+birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -746,10 +782,10 @@ birdTrackLine c = try $ do
-- block quotes
--
-emailBlockQuoteStart :: MarkdownParser Char
+emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
-emailBlockQuote :: MarkdownParser [String]
+emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
let emailLine = many $ nonEndline <|> try
@@ -763,113 +799,136 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: MarkdownParser (F Blocks)
+blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n"
return $ B.blockQuote <$> contents
--
-- list blocks
--
-bulletListStart :: MarkdownParser ()
+bulletListStart :: PandocMonad m => MarkdownParser m ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- () <$ atMostSpaces (tabStop - (endpos - startpos))
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return ()
-anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
-anyOrderedListStart = try $ do
+orderedListStart :: PandocMonad m
+ => Maybe (ListNumberStyle, ListNumberDelim)
+ -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
+orderedListStart mbstydelim = try $ do
optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- res <- do guardDisabled Ext_fancy_lists
- start <- many1 digit >>= safeRead
- char '.'
- return (start, DefaultStyle, DefaultDelim)
- <|> do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name,
- -- insist on more than one space
- when (delim == Period && (style == UpperAlpha ||
- (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
- () <$ spaceChar
- return (num, style, delim)
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res
-
-listStart :: MarkdownParser ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-
-listLine :: MarkdownParser String
-listLine = try $ do
- notFollowedBy' (do indentSpaces
- many spaceChar
+ (do guardDisabled Ext_fancy_lists
+ start <- many1 digit >>= safeRead
+ char '.'
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
+ return (start, DefaultStyle, DefaultDelim))
+ <|>
+ (do (num, style, delim) <- maybe
+ anyOrderedListMarker
+ (\(sty,delim) -> (\start -> (start,sty,delim)) <$>
+ orderedListMarker sty delim)
+ mbstydelim
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ -- if it could be an abbreviated first name,
+ -- insist on more than one space
+ when (delim == Period && (style == UpperAlpha ||
+ (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
+ () <$ lookAhead (newline <|> spaceChar)
+ optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
+ return (num, style, delim))
+
+listStart :: PandocMonad m => MarkdownParser m ()
+listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing)
+
+listLine :: PandocMonad m => Int -> MarkdownParser m String
+listLine continuationIndent = try $ do
+ notFollowedBy' (do gobbleSpaces continuationIndent
+ skipMany spaceChar
listStart)
notFollowedByHtmlCloser
- optional (() <$ indentSpaces)
+ notFollowedByDivCloser
+ optional (() <$ gobbleSpaces continuationIndent)
listLineCommon
-listLineCommon :: MarkdownParser String
+listLineCommon :: PandocMonad m => MarkdownParser m String
listLineCommon = concat <$> manyTill
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
- <|> liftM snd (htmlTag isCommentTag)
+ <|> fmap snd (htmlTag isCommentTag)
<|> count 1 anyChar
) newline
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: MarkdownParser a
- -> MarkdownParser String
-rawListItem start = try $ do
+rawListItem :: PandocMonad m
+ => Bool -- four space rule
+ -> MarkdownParser m a
+ -> MarkdownParser m (String, Int)
+rawListItem fourSpaceRule start = try $ do
+ pos1 <- getPosition
start
+ pos2 <- getPosition
+ let continuationIndent = if fourSpaceRule
+ then 4
+ else sourceColumn pos2 - sourceColumn pos1
first <- listLineCommon
- rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
+ rest <- many (do notFollowedBy listStart
+ notFollowedBy (() <$ codeBlockFenced)
+ notFollowedBy blankline
+ listLine continuationIndent)
blanks <- many blankline
- return $ unlines (first:rest) ++ blanks
+ let result = unlines (first:rest) ++ blanks
+ return (result, continuationIndent)
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: MarkdownParser String
-listContinuation = try $ do
- lookAhead indentSpaces
- result <- many1 listContinuationLine
+listContinuation :: PandocMonad m => Int -> MarkdownParser m String
+listContinuation continuationIndent = try $ do
+ x <- try $ do
+ notFollowedBy blankline
+ notFollowedByHtmlCloser
+ notFollowedByDivCloser
+ gobbleSpaces continuationIndent
+ anyLineNewline
+ xs <- many $ try $ do
+ notFollowedBy blankline
+ notFollowedByHtmlCloser
+ notFollowedByDivCloser
+ gobbleSpaces continuationIndent <|> notFollowedBy' listStart
+ anyLineNewline
blanks <- many blankline
- return $ concat result ++ blanks
+ return $ concat (x:xs) ++ blanks
+
+notFollowedByDivCloser :: PandocMonad m => MarkdownParser m ()
+notFollowedByDivCloser =
+ guardDisabled Ext_fenced_divs <|>
+ do divLevel <- stateFencedDivLevel <$> getState
+ guard (divLevel < 1) <|> notFollowedBy divFenceEnd
-notFollowedByHtmlCloser :: MarkdownParser ()
+notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByHtmlCloser = do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
Nothing -> return ()
-listContinuationLine :: MarkdownParser String
-listContinuationLine = try $ do
- notFollowedBy blankline
- notFollowedBy' listStart
- notFollowedByHtmlCloser
- optional indentSpaces
- result <- anyLine
- return $ result ++ "\n"
-
-listItem :: MarkdownParser a
- -> MarkdownParser (F Blocks)
-listItem start = try $ do
- first <- rawListItem start
- continuations <- many listContinuation
+listItem :: PandocMonad m
+ => Bool -- four-space rule
+ -> MarkdownParser m a
+ -> MarkdownParser m (F Blocks)
+listItem fourSpaceRule start = try $ do
+ (first, continuationIndent) <- rawListItem fourSpaceRule start
+ continuations <- many (listContinuation continuationIndent)
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -878,39 +937,34 @@ listItem start = try $ do
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may contain various block elements:
let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
+ contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: MarkdownParser (F Blocks)
+orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
orderedList = try $ do
- (start, style, delim) <- lookAhead anyOrderedListStart
+ (start, style, delim) <- lookAhead (orderedListStart Nothing)
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
- items <- fmap sequence $ many1 $ listItem
- ( try $ do
- optional newline -- if preceded by Plain block in a list
- startpos <- sourceColumn <$> getPosition
- skipNonindentSpaces
- res <- orderedListMarker style delim
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res )
- start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items
-
-bulletList :: MarkdownParser (F Blocks)
+ fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
+ <|> return (style == Example)
+ items <- fmap sequence $ many1 $ listItem fourSpaceRule
+ (orderedListStart (Just (style, delim)))
+ start' <- (start <$ guardEnabled Ext_startnum) <|> return 1
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
bulletList = do
- items <- fmap sequence $ many1 $ listItem bulletListStart
- return $ B.bulletList <$> fmap compactify' items
+ fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
+ <|> return False
+ items <- fmap sequence $ many1 $ listItem fourSpaceRule bulletListStart
+ return $ B.bulletList <$> fmap compactify items
-- definition lists
-defListMarker :: MarkdownParser ()
+defListMarker :: PandocMonad m => MarkdownParser m ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
@@ -921,52 +975,53 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
- contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
+ term <- parseFromString' (trimInlinesF <$> inlines) rawLine'
+ contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
-defRawBlock :: Bool -> MarkdownParser String
+defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
- firstline <- anyLine
+ firstline <- anyLineNewline
let dline = try
( do notFollowedBy blankline
notFollowedByHtmlCloser
+ notFollowedByDivCloser
if compact -- laziness not compatible with compact
then () <$ indentSpaces
else (() <$ indentSpaces)
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
- cont <- liftM concat $ many $ try $ do
+ cont <- fmap concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+ return $ trimr (firstline ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
-definitionList :: MarkdownParser (F Blocks)
+definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
lookAhead (anyLine >>
- optional (blankline >> notFollowedBy (table >> return ())) >>
+ optional (blankline >> notFollowedBy (Control.Monad.void table)) >>
-- don't capture table caption as def list!
defListMarker)
compactDefinitionList <|> normalDefinitionList
-compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
items <- fmap sequence $ many1 $ definitionListItem True
- return $ B.definitionList <$> fmap compactify'DL items
+ return $ B.definitionList <$> fmap compactifyDL items
-normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
normalDefinitionList = do
guardEnabled Ext_definition_lists
items <- fmap sequence $ many1 $ definitionListItem False
@@ -976,10 +1031,10 @@ normalDefinitionList = do
-- paragraph block
--
-para :: MarkdownParser (F Blocks)
+para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
- result <- trimInlinesF . mconcat <$> many1 inline
+ result <- trimInlinesF <$> inlines1
option (B.plain <$> result)
$ try $ do
newline
@@ -997,29 +1052,35 @@ para = try $ do
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose "div"))
_ -> mzero
+ <|> do guardEnabled Ext_fenced_divs
+ divLevel <- stateFencedDivLevel <$> getState
+ if divLevel > 0
+ then lookAhead divFenceEnd
+ else mzero
return $ do
result' <- result
case B.toList result' of
[Image attr alt (src,tit)]
- | Ext_implicit_figures `Set.member` exts ->
+ | not (null alt) &&
+ Ext_implicit_figures `extensionEnabled` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
$ Image attr alt (src,'f':'i':'g':':':tit)
_ -> return $ B.para result'
-plain :: MarkdownParser (F Blocks)
-plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
+plain :: PandocMonad m => MarkdownParser m (F Blocks)
+plain = fmap B.plain . trimInlinesF <$> inlines1
--
-- raw html
--
-htmlElement :: MarkdownParser String
+htmlElement :: PandocMonad m => MarkdownParser m String
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
- <|> liftM snd (htmlTag isBlockTag)
+ <|> fmap snd (htmlTag isBlockTag)
-htmlBlock :: MarkdownParser (F Blocks)
+htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock = do
guardEnabled Ext_raw_html
try (do
@@ -1044,43 +1105,59 @@ htmlBlock = do
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
-htmlBlock' :: MarkdownParser (F Blocks)
+htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ return $ B.rawBlock "html" first
+ return $ if null first
+ then mempty
+ else return $ B.rawBlock "html" first
-strictHtmlBlock :: MarkdownParser String
+strictHtmlBlock :: PandocMonad m => MarkdownParser m String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: MarkdownParser String
+rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
isVerbTag (TagOpen "script" _) = True
isVerbTag _ = False
-rawTeXBlock :: MarkdownParser (F Blocks)
+rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
- rawConTeXtEnvironment `sepEndBy1` blankline)
- spaces
- return $ return result
-
-rawHtmlBlocks :: MarkdownParser (F Blocks)
+ lookAhead $ try $ char '\\' >> letter
+ result <- (B.rawBlock "context" . trim . concat <$>
+ many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand)
+ <*> spnl'))
+ <|> (B.rawBlock "latex" . trim . concat <$>
+ many1 ((++) <$> rawLaTeXBlock <*> spnl'))
+ return $ case B.toList result of
+ [RawBlock _ cs]
+ | all (`elem` [' ','\t','\n']) cs -> return mempty
+ -- don't create a raw block for suppressed macro defs
+ _ -> return result
+
+conTeXtCommand :: PandocMonad m => MarkdownParser m String
+conTeXtCommand = oneOfStrings ["\\placeformula"]
+
+rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ -- we don't want '<td> text' to be a code block:
+ skipMany spaceChar
+ indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (\x -> x ~== TagClose tagtype)
- contents <- mconcat <$> many (notFollowedBy' closer >> block)
+ let block' = do notFollowedBy' closer
+ gobbleAtMostSpaces indentlevel
+ block
+ contents <- mconcat <$> many block'
result <-
(closer >>= \(_, rawcloser) -> return (
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
@@ -1101,11 +1178,11 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
-- line block
--
-lineBlock :: MarkdownParser (F Blocks)
+lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
+ mapM (parseFromString' (trimInlinesF <$> inlines))
return $ B.lineBlock <$> sequence lines'
--
@@ -1114,8 +1191,9 @@ lineBlock = try $ do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> Parser [Char] st (Int, Int)
+dashedLine :: PandocMonad m
+ => Char
+ -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1125,8 +1203,9 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
-simpleTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -1137,17 +1216,17 @@ simpleTableHeader headless = try $ do
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
- rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
+ rawHeads <- fmap (tail . splitStringByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ let aligns = zipWith alignType (map (: []) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
- $ map trim rawHeads'
+ $
+ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads'
return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
@@ -1161,25 +1240,26 @@ alignType strLst len =
let nonempties = filter (not . null) $ map trimr strLst
(leftSpace, rightSpace) =
case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: MarkdownParser String
+tableFooter :: PandocMonad m => MarkdownParser m String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep :: MarkdownParser Char
+tableSep :: PandocMonad m => MarkdownParser m Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
-rawTableLine :: [Int]
- -> MarkdownParser [String]
+rawTableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m [String]
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
@@ -1187,31 +1267,34 @@ rawTableLine indices = do
splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
-tableLine :: [Int]
- -> MarkdownParser (F [Blocks])
+tableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+ fmap sequence . mapM (parseFromString' (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
-multilineRow :: [Int]
- -> MarkdownParser (F [Blocks])
+multilineRow :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+ fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: MarkdownParser (F Inlines)
+tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
- string ":" <|> string "Table:"
- trimInlinesF . mconcat <$> many1 inline <* blanklines
+ (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+ trimInlinesF <$> inlines1 <* blanklines
-- Parse a simple table with '---' header and one line per row.
-simpleTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
@@ -1224,13 +1307,15 @@ simpleTable headless = do
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-multilineTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+multilineTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
-multilineTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+multilineTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
@@ -1243,7 +1328,7 @@ multilineTableHeader headless = try $ do
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
rawHeadsList <- if headless
- then liftM (map (:[]) . tail .
+ then fmap (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
(tail . splitStringByIndices (init indices))
@@ -1253,101 +1338,18 @@ multilineTableHeader headless = try $ do
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
- mapM (parseFromString (mconcat <$> many plain)) $
- map trim rawHeads
+ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads
return (heads, aligns, indices)
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: Char -> Parser [Char] st ((Int, Int), Alignment)
-gridPart ch = do
- leftColon <- option False (True <$ char ':')
- dashes <- many1 (char ch)
- rightColon <- option False (True <$ char ':')
- char '+'
- let lengthDashes = length dashes + (if leftColon then 1 else 0) +
- (if rightColon then 1 else 0)
- let alignment = case (leftColon, rightColon) of
- (True, True) -> AlignCenter
- (True, False) -> AlignLeft
- (False, True) -> AlignRight
- (False, False) -> AlignDefault
- return ((lengthDashes, lengthDashes + 1), alignment)
-
-gridDashedLines :: Char -> Parser [Char] st [((Int, Int), Alignment)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: Char -> MarkdownParser Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return []
- else many1 (try (char '|' >> anyLine))
- underDashes <- if headless
- then return dashes
- else gridDashedLines '='
- guard $ length dashes == length underDashes
- let lines' = map (snd . fst) underDashes
- let indices = scanl (+) 0 lines'
- let aligns = map snd underDashes
- let rawHeads = if headless
- then replicate (length underDashes) ""
- else map (unlines . map trim) $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
- return (heads, aligns, indices)
+gridTable :: PandocMonad m => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless = gridTableWith' parseBlocks headless
-gridTableRawLine :: [Int] -> MarkdownParser [String]
-gridTableRawLine indices = do
- char '|'
- line <- anyLine
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: [Int]
- -> MarkdownParser (F [Blocks])
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- | Parse footer for a grid table.
-gridTableFooter :: MarkdownParser [Char]
-gridTableFooter = blanklines
-
-pipeBreak :: MarkdownParser ([Alignment], [Int])
+pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1359,7 +1361,7 @@ pipeBreak = try $ do
blankline
return $ unzip (first:rest)
-pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
@@ -1372,41 +1374,40 @@ pipeTable = try $ do
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
- fromIntegral (len + 1) / fromIntegral numColumns)
- seplengths
+ fromIntegral len / fromIntegral (sum seplengths))
+ seplengths
else replicate (length aligns) 0.0
- return $ (aligns, widths, heads', sequence lines'')
+ return (aligns, widths, heads', sequence lines'')
-sepPipe :: MarkdownParser ()
+sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: MarkdownParser (F [Blocks])
+pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
openPipe <- (True <$ char '|') <|> return False
-- split into cells
- let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
+ let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
- parseFromString pipeTableCell
- cells <- cellContents `sepEndBy1` (char '|')
+ parseFromString' pipeTableCell
+ cells <- cellContents `sepEndBy1` char '|'
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
blankline
return $ sequence cells
-pipeTableCell :: MarkdownParser (F Blocks)
-pipeTableCell = do
- result <- many inline
- if null result
- then return mempty
- else return $ B.plain . mconcat <$> sequence result
+pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
+pipeTableCell =
+ (do result <- inlines1
+ return $ B.plain <$> result)
+ <|> return mempty
-pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1414,15 +1415,15 @@ pipeTableHeaderPart = try $ do
right <- optionMaybe (char ':')
skipMany spaceChar
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
- return $
- ((case (left,right) of
- (Nothing,Nothing) -> AlignDefault
- (Just _,Nothing) -> AlignLeft
- (Nothing,Just _) -> AlignRight
- (Just _,Just _) -> AlignCenter), len)
+ return
+ (case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: Parser [Char] st ()
+scanForPipe :: PandocMonad m => ParserT [Char] st m ()
scanForPipe = do
inp <- getInput
case break (\c -> c == '\n' || c == '|') inp of
@@ -1432,22 +1433,23 @@ scanForPipe = do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
-tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int])
- -> ([Int] -> MarkdownParser (F [Blocks]))
- -> MarkdownParser sep
- -> MarkdownParser end
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith :: PandocMonad m
+ => MarkdownParser m (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> MarkdownParser m (F [Blocks]))
+ -> MarkdownParser m sep
+ -> MarkdownParser m end
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
- let widths = if (indices == [])
+ let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
- return $ (aligns, widths, heads, lines')
+ return (aligns, widths, heads, lines')
-table :: MarkdownParser (F Blocks)
+table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
@@ -1462,8 +1464,8 @@ table = try $ do
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
- Nothing -> option (return mempty) tableCaption
- Just c -> return c
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
-- renormalize widths if greater than 100%:
let totalWidth = sum widths
let widths' = if totalWidth < 1
@@ -1479,7 +1481,13 @@ table = try $ do
-- inline
--
-inline :: MarkdownParser (F Inlines)
+inlines :: PandocMonad m => MarkdownParser m (F Inlines)
+inlines = mconcat <$> many inline
+
+inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
+inlines1 = mconcat <$> many1 inline
+
+inline :: PandocMonad m => MarkdownParser m (F Inlines)
inline = choice [ whitespace
, bareURL
, str
@@ -1499,6 +1507,7 @@ inline = choice [ whitespace
, autoLink
, spanHtml
, rawHtmlInline
+ , escapedNewline
, escapedChar
, rawLaTeXInline'
, exampleRef
@@ -1509,32 +1518,36 @@ inline = choice [ whitespace
, ltSign
] <?> "inline"
-escapedChar' :: MarkdownParser Char
+escapedChar' :: PandocMonad m => MarkdownParser m Char
escapedChar' = try $ do
char '\\'
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> (guardEnabled Ext_angle_brackets_escapable >>
oneOf "\\`*_{}[]()>#+-.!~\"<>")
- <|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
<|> oneOf "\\`*_{}[]()>#+-.!~\""
-escapedChar :: MarkdownParser (F Inlines)
+escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
+escapedNewline = try $ do
+ guardEnabled Ext_escaped_line_breaks
+ char '\\'
+ lookAhead (char '\n') -- don't consume the newline (see #3730)
+ return $ return B.linebreak
+
+escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
- ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- '\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
- _ -> return $ return $ B.str [result]
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ _ -> return $ return $ B.str [result]
-ltSign :: MarkdownParser (F Inlines)
+ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
return $ return $ B.str "<"
-exampleRef :: MarkdownParser (F Inlines)
+exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
@@ -1542,10 +1555,10 @@ exampleRef = try $ do
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
-symbol :: MarkdownParser (F Inlines)
+symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
@@ -1554,28 +1567,36 @@ symbol = do
return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: MarkdownParser (F Inlines)
+code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ result <- (trim . concat) <$>
+ many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
- >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
-
-math :: MarkdownParser (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
- ((getOption readerSmart >>= guard) *> (return <$> apostrophe)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawInline syn result
+ Right attr -> B.codeWith attr result
+
+math :: PandocMonad m => MarkdownParser m (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
+ <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
+ (guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
-enclosure :: Char
- -> MarkdownParser (F Inlines)
+enclosure :: PandocMonad m
+ => Char
+ -> MarkdownParser m (F Inlines)
enclosure c = do
-- we can't start an enclosure with _ if after a string and
-- the intraword_underscores extension is enabled:
@@ -1584,14 +1605,14 @@ enclosure c = do
<|> (guard =<< notAfterString)
cs <- many1 (char c)
(return (B.str cs) <>) <$> whitespace
- <|> do
+ <|>
case length cs of
- 3 -> three c
- 2 -> two c mempty
- 1 -> one c mempty
- _ -> return (return $ B.str cs)
+ 3 -> three c
+ 2 -> two c mempty
+ 1 -> one c mempty
+ _ -> return (return $ B.str cs)
-ender :: Char -> Int -> MarkdownParser ()
+ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
ender c n = try $ do
count n (char c)
guard (c == '*')
@@ -1602,104 +1623,96 @@ ender c n = try $ do
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
-three :: Char -> MarkdownParser (F Inlines)
+three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
+ (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
+ <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
+ <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
<|> return (return (B.str [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
-two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
+ (ender c 2 >> updateLastStrPos >>
+ return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
-one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
+ (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
-strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
-- | Parses a list of inlines between start and end delimiters.
-inlinesBetween :: (Show b)
- => MarkdownParser a
- -> MarkdownParser b
- -> MarkdownParser (F Inlines)
+inlinesBetween :: PandocMonad m
+ => (Show b)
+ => MarkdownParser m a
+ -> MarkdownParser m b
+ -> MarkdownParser m (F Inlines)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
-strikeout :: MarkdownParser (F Inlines)
+strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
strikeout = fmap B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: MarkdownParser (F Inlines)
+superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = fmap B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: MarkdownParser (F Inlines)
+subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = fmap B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: MarkdownParser (F Inlines)
+whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: Parser [Char] st Char
+nonEndline :: PandocMonad m => ParserT [Char] st m Char
nonEndline = satisfy (/='\n')
-str :: MarkdownParser (F Inlines)
+str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
- result <- many1 alphaNum
+ result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
- let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- isSmart <- getOption readerSmart
- if isSmart
- then case likelyAbbrev result of
- [] -> return $ return $ B.str result
- xs -> choice (map (\x ->
- try (string x >> oneOf " \n" >>
- lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
-
--- | if the string matches the beginning of an abbreviation (before
--- the first period, return strings that would finish the abbreviation.
-likelyAbbrev :: String -> [String]
-likelyAbbrev x =
- let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
- "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
- "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
- "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
- "ch.", "sec.", "cf.", "cp."]
- abbrPairs = map (break (=='.')) abbrevs
- in map snd $ filter (\(y,_) -> y == x) abbrPairs
+ (do guardEnabled Ext_smart
+ abbrevs <- getOption readerAbbreviations
+ if not (null result) && last result == '.' && result `Set.member` abbrevs
+ then try (do ils <- whitespace <|> endline
+ lookAhead alphaNum
+ return $ do
+ ils' <- ils
+ if ils' == B.space
+ then return (B.str result <> B.str "\160")
+ else -- linebreak or softbreak
+ return (ils' <> B.str result <> B.str "\160"))
+ <|> return (return (B.str result))
+ else return (return (B.str result)))
+ <|> return (return (B.str result))
-- an endline character that can be treated as a space, not a structural break
-endline :: MarkdownParser (F Inlines)
+endline :: PandocMonad m => MarkdownParser m (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
@@ -1711,6 +1724,7 @@ endline = try $ do
guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
+ notFollowedByDivCloser
(eof >> return mempty)
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
@@ -1721,23 +1735,25 @@ endline = try $ do
--
-- a reference label for a link
-reference :: MarkdownParser (F Inlines, String)
-reference = do notFollowedBy' (string "[^") -- footnote reference
- withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
+reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
+reference = do
+ guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
+ guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-parenthesizedChars :: MarkdownParser [Char]
+parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
return $ '(' : result ++ ")"
-- source for a link, with optional title
-source :: MarkdownParser (String, String)
+source :: PandocMonad m => MarkdownParser m (String, String)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
- <|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
+ <|> (notFollowedBy (oneOf " )") >> count 1 litChar)
<|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
let sourceURL = (unwords . words . concat) <$> many urlChunk
let betweenAngles = try $
@@ -1748,10 +1764,10 @@ source = do
char ')'
return (escapeURI $ trimr src, tit)
-linkTitle :: MarkdownParser String
+linkTitle :: PandocMonad m => MarkdownParser m String
linkTitle = quotedTitle '"' <|> quotedTitle '\''
-link :: MarkdownParser (F Inlines)
+link :: PandocMonad m => MarkdownParser m (F Inlines)
link = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -1760,21 +1776,31 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-bracketedSpan :: MarkdownParser (F Inlines)
+bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
bracketedSpan = try $ do
guardEnabled Ext_bracketed_spans
(lab,_) <- reference
attr <- attributes
- let (ident,classes,keyvals) = attr
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> lab
- _ -> return $ B.spanWith attr <$> lab
-
-regLink :: (Attr -> String -> String -> Inlines -> Inlines)
- -> F Inlines -> MarkdownParser (F Inlines)
+ return $ if isSmallCaps attr
+ then B.smallcaps <$> lab
+ else B.spanWith attr <$> lab
+
+-- | We treat a span as SmallCaps if class is "smallcaps" (and
+-- no other attributes are set or if style is "font-variant:small-caps"
+-- (and no other attributes are set).
+isSmallCaps :: Attr -> Bool
+isSmallCaps ("",["smallcaps"],[]) = True
+isSmallCaps ("",[],kvs) =
+ case lookup "style" kvs of
+ Just s -> map toLower (filter (`notElem` " \t;") s) ==
+ "font-variant:small-caps"
+ Nothing -> False
+isSmallCaps _ = False
+
+regLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> F Inlines
+ -> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
attr <- option nullAttr $
@@ -1782,20 +1808,24 @@ regLink constructor lab = try $ do
return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
- -> (F Inlines, String) -> MarkdownParser (F Inlines)
+referenceLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String)
+ -> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(_,raw') <- option (mempty, "") $
- lookAhead (try (guardEnabled Ext_citations >>
- spnl >> normalCite >> return (mempty, "")))
+ lookAhead (try (do guardEnabled Ext_citations
+ guardDisabled Ext_spaced_reference_links <|> spnl
+ normalCite
+ return (mempty, "")))
<|>
- try (spnl >> reference)
+ try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
- parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ parsedRaw <- parseFromString' inlines raw'
+ fallback <- parseFromString' inlines $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
@@ -1820,11 +1850,11 @@ referenceLink constructor (lab, raw) = do
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
where dropRB (']':xs) = xs
- dropRB xs = xs
+ dropRB xs = xs
dropLB ('[':xs) = xs
- dropLB xs = xs
+ dropLB xs = xs
-bareURL :: MarkdownParser (F Inlines)
+bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
@@ -1832,7 +1862,7 @@ bareURL = try $ do
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig)
-autoLink :: MarkdownParser (F Inlines)
+autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
@@ -1846,7 +1876,7 @@ autoLink = try $ do
guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
-image :: MarkdownParser (F Inlines)
+image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
@@ -1856,54 +1886,55 @@ image = try $ do
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
-note :: MarkdownParser (F Inlines)
+note :: PandocMonad m => MarkdownParser m (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
+ updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) }
return $ do
notes <- asksF stateNotes'
- case lookup ref notes of
+ case M.lookup ref notes of
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
+ Just (_pos, contents) -> do
st <- askF
-- process the note in a context that doesn't resolve
-- notes, to avoid infinite looping with notes inside
-- notes:
- let contents' = runF contents st{ stateNotes' = [] }
+ let contents' = runF contents st{ stateNotes' = M.empty }
return $ B.note contents'
-inlineNote :: MarkdownParser (F Inlines)
+inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
return $ B.note . B.para <$> contents
-rawLaTeXInline' :: MarkdownParser (F Inlines)
+rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
+ lookAhead $ try $ char '\\' >> letter
+ notFollowedBy' rawConTeXtEnvironment
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: Parser [Char] st String
+rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
+ <|> many1 letter
+ contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar)
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
+inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-spanHtml :: MarkdownParser (F Inlines)
+spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
@@ -1911,14 +1942,11 @@ spanHtml = try $ do
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> contents
- _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
-
-divHtml :: MarkdownParser (F Blocks)
+ return $ if isSmallCaps (ident, classes, keyvals)
+ then B.smallcaps <$> contents
+ else B.spanWith (ident, classes, keyvals) <$> contents
+
+divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
@@ -1940,7 +1968,29 @@ divHtml = try $ do
else -- avoid backtracing
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
-rawHtmlInline :: MarkdownParser (F Inlines)
+divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
+divFenced = try $ do
+ guardEnabled Ext_fenced_divs
+ string ":::"
+ skipMany (char ':')
+ skipMany spaceChar
+ attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar)
+ skipMany spaceChar
+ skipMany (char ':')
+ blankline
+ updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+ bs <- mconcat <$> manyTill block divFenceEnd
+ updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+ return $ B.divWith attribs <$> bs
+
+divFenceEnd :: PandocMonad m => MarkdownParser m ()
+divFenceEnd = try $ do
+ string ":::"
+ skipMany (char ':')
+ blanklines
+ return ()
+
+rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1962,7 +2012,7 @@ rawHtmlInline = do
emojiChars :: [Char]
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
-emoji :: MarkdownParser (F Inlines)
+emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
@@ -1974,21 +2024,22 @@ emoji = try $ do
-- Citations
-cite :: MarkdownParser (F Inlines)
+cite :: PandocMonad m => MarkdownParser m (F Inlines)
cite = do
guardEnabled Ext_citations
- citations <- textualCite
+ textualCite
<|> do (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
- return citations
+ return $ flip B.cite (B.text raw) <$> cs
-textualCite :: MarkdownParser (F Inlines)
+textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
- (_, key) <- citeKey
+ (suppressAuthor, key) <- citeKey
let first = Citation{ citationId = key
, citationPrefix = []
, citationSuffix = []
- , citationMode = AuthorInText
+ , citationMode = if suppressAuthor
+ then SuppressAuthor
+ else AuthorInText
, citationNoteNum = 0
, citationHash = 0
}
@@ -2003,7 +2054,7 @@ textualCite = try $ do
let (spaces',raw') = span isSpace raw
spc | null spaces' = mempty
| otherwise = B.space
- lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
+ lab <- parseFromString' inlines $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
@@ -2017,7 +2068,7 @@ textualCite = try $ do
Just n -> B.str (show n)
_ -> B.cite [first] $ B.str $ '@':key)
-bareloc :: Citation -> MarkdownParser (F [Citation])
+bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
spnl
char '['
@@ -2032,7 +2083,7 @@ bareloc c = try $ do
rest' <- rest
return $ c{ citationSuffix = B.toList suff' } : rest'
-normalCite :: MarkdownParser (F [Citation])
+normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
normalCite = try $ do
char '['
spnl
@@ -2041,7 +2092,7 @@ normalCite = try $ do
char ']'
return citations
-suffix :: MarkdownParser (F Inlines)
+suffix :: PandocMonad m => MarkdownParser m (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
@@ -2050,14 +2101,14 @@ suffix = try $ do
then (B.space <>) <$> rest
else rest
-prefix :: MarkdownParser (F Inlines)
+prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
+ manyTill inline (char ']' <|> fmap (const ']') (lookAhead citeKey))
-citeList :: MarkdownParser (F [Citation])
+citeList :: PandocMonad m => MarkdownParser m (F [Citation])
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-citation :: MarkdownParser (F Citation)
+citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
@@ -2065,23 +2116,23 @@ citation = try $ do
return $ do
x <- pref
y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
-
-smart :: MarkdownParser (F Inlines)
+ return Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+smart :: PandocMonad m => MarkdownParser m (F Inlines)
smart = do
- getOption readerSmart >>= guard
+ guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [apostrophe, dash, ellipses])
-singleQuoted :: MarkdownParser (F Inlines)
+singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
@@ -2091,10 +2142,10 @@ singleQuoted = try $ do
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
-doubleQuoted :: MarkdownParser (F Inlines)
+doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
+ withQuoteContext InDoubleQuote (doubleQuoteEnd >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
+ <|> return (return (B.str "\8220") <> contents)