summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/RST.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r--src/Text/Pandoc/Readers/RST.hs1041
1 files changed, 701 insertions, 340 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e05b6cba2..e88d997f0 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,38 +30,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.RST (
- readRST,
- readRSTWithWarnings
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder (setMeta, fromList)
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
-import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero )
-import Data.List ( findIndex, intercalate,
- transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
-import Data.Maybe (fromMaybe)
+module Text.Pandoc.Readers.RST ( readRST ) where
+import Control.Arrow (second)
+import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
+import Control.Monad.Except (throwError)
+import Control.Monad.Identity (Identity (..))
+import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
+ nub, sort, transpose, union)
import qualified Data.Map as M
-import Text.Printf ( printf )
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import qualified Text.Pandoc.Builder as B
-import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit, isSpace)
+import Data.Maybe (fromMaybe, isJust)
import Data.Monoid ((<>))
+import Data.Sequence (ViewR (..), viewr)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs)
+import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
+import Text.Pandoc.Definition
import Text.Pandoc.Error
+import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Printf (printf)
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
-
-readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
-readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+-- TODO:
+-- [ ] .. parsed-literal
-type RSTParser = Parser [Char] ParserState
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readRST opts s = do
+ parsed <- readWithM parseRST def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+type RSTParser m = ParserT [Char] ParserState m
--
-- Constants and data structure definitions
@@ -87,9 +100,9 @@ isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level attr text):rest) =
- (Header (level - num) attr text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num (Header level attr text:rest) =
+ Header (level - num) attr text:promoteHeaders num rest
+promoteHeaders num (other:rest) = other:promoteHeaders num rest
promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
@@ -101,11 +114,11 @@ titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
titleTransform (bs, meta) =
let (bs', meta') =
case bs of
- ((Header 1 _ head1):(Header 2 _ head2):rest)
+ (Header 1 _ head1:Header 2 _ head2:rest)
| not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
(promoteHeaders 2 rest, setMeta "title" (fromList head1) $
setMeta "subtitle" (fromList head2) meta)
- ((Header 1 _ head1):rest)
+ (Header 1 _ head1:rest)
| not (any (isHeader 1) rest) -> -- title only
(promoteHeaders 1 rest,
setMeta "title" (fromList head1) meta)
@@ -121,8 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
$ M.adjust toPlain "date"
$ M.adjust toPlain "title"
- $ M.mapKeys (\k -> if k == "authors" then "author" else k)
- $ metamap
+ $ M.mapKeys (\k ->
+ if k == "authors"
+ then "author"
+ else k) metamap
toPlain (MetaBlocks [Para xs]) = MetaInlines xs
toPlain x = x
splitAuthors (MetaBlocks [Para xs])
@@ -131,6 +146,12 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
splitAuthors x = x
splitAuthors' = map normalizeSpaces .
splitOnSemi . concatMap factorSemi
+ normalizeSpaces = reverse . dropWhile isSp . reverse .
+ dropWhile isSp
+ isSp Space = True
+ isSp SoftBreak = True
+ isSp LineBreak = True
+ isSp _ = False
splitOnSemi = splitBy (==Str ";")
factorSemi (Str []) = []
factorSemi (Str s) = case break (==';') s of
@@ -141,41 +162,61 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
factorSemi (Str ys)
factorSemi x = [x]
-parseRST :: RSTParser Pandoc
+parseRST :: PandocMonad m => RSTParser m Pandoc
parseRST = do
optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- concat <$>
- manyTill (referenceKey <|> noteBlock <|> lineClump) eof
+ manyTill (referenceKey <|> anchorDef <|>
+ noteBlock <|> citationBlock <|>
+ headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ updateState $ \s -> s { stateNotes = reverse reversedNotes
+ , stateHeaders = mempty
+ , stateIdentifiers = mempty }
-- now parse it for real...
blocks <- B.toList <$> parseBlocks
+ citations <- (sort . M.toList . stateCitations) <$> getState
+ citationItems <- mapM parseCitation citations
+ let refBlock = if null citationItems
+ then []
+ else [Div ("citations",[],[]) $
+ B.toList $ B.definitionList citationItems]
standalone <- getOption readerStandalone
state <- getState
let meta = stateMeta state
let (blocks', meta') = if standalone
then titleTransform (blocks, meta)
else (blocks, meta)
- return $ Pandoc meta' blocks'
+ reportLogMessages
+ return $ Pandoc meta' (blocks' ++ refBlock)
+
+parseCitation :: PandocMonad m
+ => (String, String) -> RSTParser m (Inlines, [Blocks])
+parseCitation (ref, raw) = do
+ contents <- parseFromString' parseBlocks raw
+ return (B.spanWith (ref, ["citation-label"], []) (B.str ref),
+ [contents])
+
--
-- parsing blocks
--
-parseBlocks :: RSTParser Blocks
+parseBlocks :: PandocMonad m => RSTParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: RSTParser Blocks
+block :: PandocMonad m => RSTParser m Blocks
block = choice [ codeBlock
, blockQuote
, fieldList
, directive
+ , anchor
, comment
, header
, hrule
@@ -191,7 +232,7 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: Int -> RSTParser (String, String)
+rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
rawFieldListItem minIndent = try $ do
indent <- length <$> many (char ' ')
guard $ indent >= minIndent
@@ -204,15 +245,15 @@ rawFieldListItem minIndent = try $ do
let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
return (name, raw)
-fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
+fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
term <- parseInlineFromString name
- contents <- parseFromString parseBlocks raw
+ contents <- parseFromString' parseBlocks raw
optional blanklines
return (term, [contents])
-fieldList :: RSTParser Blocks
+fieldList :: PandocMonad m => RSTParser m Blocks
fieldList = try $ do
indent <- length <$> lookAhead (many spaceChar)
items <- many1 $ fieldListItem indent
@@ -224,43 +265,62 @@ fieldList = try $ do
-- line block
--
-lineBlock :: RSTParser Blocks
+lineBlock :: PandocMonad m => RSTParser m Blocks
lineBlock = try $ do
lines' <- lineBlockLines
lines'' <- mapM parseInlineFromString lines'
return $ B.lineBlock lines''
+lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
+lineBlockDirective body = do
+ lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
+ return $ B.lineBlock lines'
+
--
-- paragraph block
--
-- note: paragraph can end in a :: starting a code block
-para :: RSTParser Blocks
+para :: PandocMonad m => RSTParser m Blocks
para = try $ do
result <- trimInlines . mconcat <$> many1 inline
option (B.plain result) $ try $ do
newline
blanklines
case viewr (B.unMany result) of
- ys :> (Str xs) | "::" `isSuffixOf` xs -> do
+ ys :> Str xs | "::" `isSuffixOf` xs -> do
raw <- option mempty codeBlockBody
return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
<> raw
_ -> return (B.para result)
-plain :: RSTParser Blocks
+plain :: PandocMonad m => RSTParser m Blocks
plain = B.plain . trimInlines . mconcat <$> many1 inline
--
-- header blocks
--
-header :: RSTParser Blocks
+header :: PandocMonad m => RSTParser m Blocks
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader :: RSTParser Blocks
-doubleHeader = try $ do
+doubleHeader :: PandocMonad m => RSTParser m Blocks
+doubleHeader = do
+ (txt, c) <- doubleHeader'
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case elemIndex (DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], length headerTable + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ attr <- registerHeader nullAttr txt
+ return $ B.headerWith attr level txt
+
+doubleHeader' :: PandocMonad m => RSTParser m (Inlines, Char)
+doubleHeader' = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
@@ -268,48 +328,45 @@ doubleHeader = try $ do
newline
txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else return ()
+ let len = sourceColumn pos - 1
+ when (len > lenTop) $ fail "title longer than border"
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
+ return (txt, c)
+
+-- a header with line on the bottom only
+singleHeader :: PandocMonad m => RSTParser m Blocks
+singleHeader = do
+ (txt, c) <- singleHeader'
state <- getState
let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ let (headerTable',level) = case elemIndex (SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], length headerTable + 1)
setState (state { stateHeaderTable = headerTable' })
attr <- registerHeader nullAttr txt
return $ B.headerWith attr level txt
--- a header with line on the bottom only
-singleHeader :: RSTParser Blocks
-singleHeader = try $ do
+singleHeader' :: PandocMonad m => RSTParser m (Inlines, Char)
+singleHeader' = try $ do
notFollowedBy' whitespace
- txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
+ lookAhead $ anyLine >> oneOf underlineChars
+ txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
pos <- getPosition
- let len = (sourceColumn pos) - 1
+ let len = sourceColumn pos - 1
blankline
c <- oneOf underlineChars
count (len - 1) (char c)
many (char c)
blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- attr <- registerHeader nullAttr txt
- return $ B.headerWith attr level txt
+ return (txt, c)
--
-- hrule block
--
-hrule :: Parser [Char] st Blocks
+hrule :: Monad m => ParserT [Char] st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -323,14 +380,14 @@ hrule = try $ do
--
-- read a line indented by a given string
-indentedLine :: String -> Parser [Char] st [Char]
+indentedLine :: Monad m => String -> ParserT [Char] st m [Char]
indentedLine indents = try $ do
string indents
anyLine
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
-indentedBlock :: Parser [Char] st [Char]
+indentedBlock :: Monad m => ParserT [Char] st m [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many1 $ try $ do b <- option "" blanklines
@@ -339,24 +396,24 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
-quotedBlock :: Parser [Char] st [Char]
+quotedBlock :: Monad m => ParserT [Char] st m [Char]
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ unlines lns
-codeBlockStart :: Parser [Char] st Char
+codeBlockStart :: Monad m => ParserT [Char] st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: Parser [Char] st Blocks
+codeBlock :: Monad m => ParserT [Char] st m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: Parser [Char] st Blocks
+codeBlockBody :: Monad m => ParserT [Char] st m Blocks
codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
(indentedBlock <|> quotedBlock)
-lhsCodeBlock :: RSTParser Blocks
+lhsCodeBlock :: Monad m => RSTParser m Blocks
lhsCodeBlock = try $ do
getPosition >>= guard . (==1) . sourceColumn
guardEnabled Ext_literate_haskell
@@ -366,14 +423,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
$ intercalate "\n" lns
-latexCodeBlock :: Parser [Char] st [[Char]]
+latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Parser [Char] st [[Char]]
+birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@@ -381,99 +438,159 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (drop 1) lns
else lns
-birdTrackLine :: Parser [Char] st [Char]
+birdTrackLine :: Monad m => ParserT [Char] st m [Char]
birdTrackLine = char '>' >> anyLine
--
-- block quotes
--
-blockQuote :: RSTParser Blocks
+blockQuote :: PandocMonad m => RSTParser m Blocks
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ raw ++ "\n\n"
return $ B.blockQuote contents
+{-
+Unsupported options for include:
+tab-width
+encoding
+-}
+
+includeDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+includeDirective top fields body = do
+ let f = trim top
+ guard $ not (null f)
+ guard $ null (trim body)
+ -- options
+ let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
+ let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
+ oldPos <- getPosition
+ oldInput <- getInput
+ containers <- stateContainers <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ updateState $ \s -> s{ stateContainers = f : stateContainers s }
+ mbContents <- readFileFromDirs ["."] f
+ contentLines <- case mbContents of
+ Just s -> return $ lines s
+ Nothing -> do
+ logMessage $ CouldNotLoadIncludeFile f oldPos
+ return []
+ let numLines = length contentLines
+ let startLine' = case startLine of
+ Nothing -> 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let endLine' = case endLine of
+ Nothing -> numLines + 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let contentLines' = drop (startLine' - 1)
+ $ take (endLine' - 1) contentLines
+ let contentLines'' = (case trim <$> lookup "end-before" fields of
+ Just patt -> takeWhile (not . (patt `isInfixOf`))
+ Nothing -> id) .
+ (case trim <$> lookup "start-after" fields of
+ Just patt -> drop 1 .
+ dropWhile (not . (patt `isInfixOf`))
+ Nothing -> id) $ contentLines'
+ let contents' = unlines contentLines'' ++ "\n"
+ case lookup "code" fields of
+ Just lang -> do
+ let numberLines = lookup "number-lines" fields
+ let classes = trimr lang : ["numberLines" | isJust numberLines] ++
+ maybe [] words (lookup "class" fields)
+ let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines
+ let ident = maybe "" trimr $ lookup "name" fields
+ let attribs = (ident, classes, kvs)
+ return $ B.codeBlockWith attribs contents'
+ Nothing -> case lookup "literal" fields of
+ Just _ -> return $ B.rawBlock "rst" contents'
+ Nothing -> do
+ setPosition $ newPos f 1 1
+ setInput contents'
+ bs <- optional blanklines >>
+ (mconcat <$> many block)
+ setInput oldInput
+ setPosition oldPos
+ updateState $ \s -> s{ stateContainers =
+ tail $ stateContainers s }
+ return bs
+
+
--
-- list blocks
--
-list :: RSTParser Blocks
+list :: PandocMonad m => RSTParser m Blocks
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-definitionListItem :: RSTParser (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks])
definitionListItem = try $ do
-- avoid capturing a directive or comment
notFollowedBy (try $ char '.' >> char '.')
term <- trimInlines . mconcat <$> many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n"
+ contents <- parseFromString' parseBlocks $ raw ++ "\n"
return (term, [contents])
-definitionList :: RSTParser Blocks
+definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Parser [Char] st Int
+bulletListStart :: Monad m => ParserT [Char] st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
+ white <- many1 spaceChar <|> "" <$ lookAhead (char '\n')
return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart :: ListNumberStyle
+orderedListStart :: Monad m => ListNumberStyle
-> ListNumberDelim
- -> RSTParser Int
+ -> RSTParser m Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
- white <- many1 spaceChar
+ white <- many1 spaceChar <|> "" <$ lookAhead (char '\n')
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Int -> RSTParser [Char]
+listLine :: Monad m => Int -> RSTParser m [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
- line <- anyLine
- return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> RSTParser [Char]
-indentWith num = do
- tabStop <- getOption readerTabStop
- if (num < tabStop)
- then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+ anyLineNewline
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: RSTParser Int
- -> RSTParser (Int, [Char])
+rawListItem :: Monad m => RSTParser m Int
+ -> RSTParser m (Int, [Char])
rawListItem start = try $ do
markerLength <- start
- firstLine <- anyLine
+ firstLine <- anyLineNewline
restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+ return (markerLength, firstLine ++ concat restLines)
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Int -> RSTParser [Char]
+listContinuation :: Monad m => Int -> RSTParser m [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
-listItem :: RSTParser Int
- -> RSTParser Blocks
+listItem :: PandocMonad m
+ => RSTParser m Int
+ -> RSTParser m Blocks
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline <* lookAhead start),
- many1 blankline ] -- whole list must end with blank.
+ skipMany1 blankline <|> () <$ lookAhead start
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -481,52 +598,52 @@ listItem start = try $ do
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+ parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n"
updateState (\st -> st {stateParserContext = oldContext})
return $ case B.toList parsed of
- [Para xs] -> B.singleton $ Plain xs
- [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys]
- [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys]
- [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
+ [Para xs] ->
+ B.singleton $ Plain xs
+ [Para xs, BulletList ys] ->
+ B.fromList [Plain xs, BulletList ys]
+ [Para xs, OrderedList s ys] ->
+ B.fromList [Plain xs, OrderedList s ys]
+ [Para xs, DefinitionList ys] ->
+ B.fromList [Plain xs, DefinitionList ys]
_ -> parsed
-orderedList :: RSTParser Blocks
+orderedList :: PandocMonad m => RSTParser m Blocks
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify' items
+ let items' = compactify items
return $ B.orderedListWith (start, style, delim) items'
-bulletList :: RSTParser Blocks
-bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+bulletList :: PandocMonad m => RSTParser m Blocks
+bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart)
--
-- directive (e.g. comment, container, compound-paragraph)
--
-comment :: RSTParser Blocks
+comment :: Monad m => RSTParser m Blocks
comment = try $ do
string ".."
skipMany1 spaceChar <|> (() <$ lookAhead newline)
- notFollowedBy' directiveLabel
+ -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
manyTill anyChar blanklines
optional indentedBlock
return mempty
-directiveLabel :: RSTParser String
+directiveLabel :: Monad m => RSTParser m String
directiveLabel = map toLower
<$> many1Till (letter <|> char '-') (try $ string "::")
-directive :: RSTParser Blocks
+directive :: PandocMonad m => RSTParser m Blocks
directive = try $ do
string ".."
directive'
--- TODO: line-block, parsed-literal, table, csv-table, list-table
--- date
--- include
--- title
-directive' :: RSTParser Blocks
+directive' :: PandocMonad m => RSTParser m Blocks
directive' = do
skipMany1 spaceChar
label <- directiveLabel
@@ -541,43 +658,71 @@ directive' = do
body <- option "" $ try $ blanklines >> indentedBlock
optional blanklines
let body' = body ++ "\n\n"
- imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height")
+ name = trim $ fromMaybe "" (lookup "name" fields)
+ imgAttr cl = ("", classes, widthAttr ++ heightAttr)
where
- classes = words $ maybe "" trim $ lookup cl fields
- getAtt k = case lookup k fields of
- Just v -> [(k, filter (not . isSpace) v)]
- Nothing -> []
+ classes = words $ maybe "" trim (lookup cl fields) ++
+ maybe "" (\x -> "align-" ++ trim x)
+ (lookup "align" fields)
+ scale = case trim <$> lookup "scale" fields of
+ Just v -> case reverse v of
+ '%':vv ->
+ case safeRead (reverse vv) of
+ Just (percent :: Double)
+ -> percent / 100.0
+ Nothing -> 1.0
+ _ ->
+ case safeRead v of
+ Just (s :: Double) -> s
+ Nothing -> 1.0
+ Nothing -> 1.0
+ widthAttr = maybe [] (\x -> [("width",
+ show $ scaleDimension scale x)])
+ $ lookup "width" fields >>=
+ (lengthToDim . filter (not . isSpace))
+ heightAttr = maybe [] (\x -> [("height",
+ show $ scaleDimension scale x)])
+ $ lookup "height" fields >>=
+ (lengthToDim . filter (not . isSpace))
case label of
+ "include" -> includeDirective top fields body'
+ "table" -> tableDirective top fields body'
+ "list-table" -> listTableDirective top fields body'
+ "csv-table" -> csvTableDirective top fields body'
+ "line-block" -> lineBlockDirective body'
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
- "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
- "container" -> parseFromString parseBlocks body'
+ "role" -> addNewRole top $ map (second trim) fields
+ "container" -> B.divWith (name, "container" : words top, []) <$>
+ parseFromString' parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
parseInlineFromString (trim top)
"unicode" -> B.para <$> -- consumed by substKey
parseInlineFromString (trim $ unicodeTransform top)
- "compound" -> parseFromString parseBlocks body'
- "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
- "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
- "highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
+ "compound" -> parseFromString' parseBlocks body'
+ "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body'
+ "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body'
+ "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body'
"rubric" -> B.para . B.strong <$> parseInlineFromString top
_ | label `elem` ["attention","caution","danger","error","hint",
- "important","note","tip","warning"] ->
- do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
- return $ B.divWith ("",["admonition", label],[]) bod
- "admonition" ->
- do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
- return $ B.divWith ("",["admonition"],[]) bod
+ "important","note","tip","warning","admonition"] ->
+ do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ let lab = case label of
+ "admonition" -> mempty
+ (l:ls) -> B.divWith ("",["admonition-title"],[])
+ (B.para (B.str (toUpper l : ls)))
+ [] -> mempty
+ return $ B.divWith ("",[label],[]) (lab <> bod)
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
tit <- B.para . B.strong <$> parseInlineFromString
(trim top ++ if null subtit
then ""
else (": " ++ subtit))
- bod <- parseFromString parseBlocks body'
+ bod <- parseFromString' parseBlocks body'
return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
"topic" ->
do tit <- B.para . B.strong <$> parseInlineFromString top
- bod <- parseFromString parseBlocks body'
+ bod <- parseFromString' parseBlocks body'
return $ B.divWith ("",["topic"],[]) $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
s { stateRstDefaultRole =
@@ -588,14 +733,15 @@ directive' = do
codeblock (words $ fromMaybe [] $ lookup "class" fields)
(lookup "number-lines" fields) (trim top) body
"aafig" -> do
- let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields)
+ let attribs = ("", ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
"math" -> return $ B.para $ mconcat $ map B.displayMath
$ toChunks $ top ++ "\n\n" ++ body
"figure" -> do
- (caption, legend) <- parseFromString extractCaption body'
+ (caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
- return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend
+ return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
+ caption) <> legend
"image" -> do
let src = escapeURI $ trim top
let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -606,28 +752,150 @@ directive' = do
$ B.imageWith attr src "" alt
Nothing -> B.imageWith attr src "" alt
"class" -> do
- let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ let attrs = ("", splitBy isSpace $ trim top,
+ map (second trimr) fields)
-- directive content or the first immediately following element
children <- case body of
"" -> block
- _ -> parseFromString parseBlocks body'
+ _ -> parseFromString' parseBlocks body'
return $ B.divWith attrs children
other -> do
pos <- getPosition
- addWarning (Just pos) $ "ignoring unknown directive: " ++ other
- return mempty
+ logMessage $ SkippedContent (".. " ++ other) pos
+ bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ return $ B.divWith ("",[other],[]) bod
+
+tableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String -> RSTParser m Blocks
+tableDirective top fields body = do
+ bs <- parseFromString' parseBlocks body
+ case B.toList bs of
+ [Table _ aligns' widths' header' rows'] -> do
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ columns <- getOption readerColumns
+ let numOfCols = length header'
+ let normWidths ws =
+ map (/ max 1.0 (fromIntegral (columns - numOfCols))) ws
+ let widths = case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0.0
+ Just "grid" -> widths'
+ Just specs -> normWidths
+ $ map (fromMaybe (0 :: Double) . safeRead)
+ $ splitBy (`elem` (" ," :: String)) specs
+ Nothing -> widths'
+ -- align is not applicable since we can't represent whole table align
+ return $ B.singleton $ Table (B.toList title)
+ aligns' widths header' rows'
+ _ -> return mempty
+
+
+-- TODO: :stub-columns:.
+-- Only the first row becomes the header even if header-rows: > 1,
+-- since Pandoc doesn't support a table with multiple header rows.
+-- We don't need to parse :align: as it represents the whole table align.
+listTableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+listTableDirective top fields body = do
+ bs <- parseFromString' parseBlocks body
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ let rows = takeRows $ B.toList bs
+ headerRowsNum = fromMaybe (0 :: Int) $
+ lookup "header-rows" fields >>= safeRead
+ (headerRow,bodyRows,numOfCols) = case rows of
+ x:xs -> if headerRowsNum > 0
+ then (x, xs, length x)
+ else ([], rows, length x)
+ _ -> ([],[],0)
+ widths = case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0
+ Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
+ splitBy (`elem` (" ," :: String)) specs
+ _ -> replicate numOfCols 0
+ return $ B.table title
+ (zip (replicate numOfCols AlignDefault) widths)
+ headerRow
+ bodyRows
+ where takeRows [BulletList rows] = map takeCells rows
+ takeRows _ = []
+ takeCells [BulletList cells] = map B.fromList cells
+ takeCells _ = []
+ normWidths ws = map (/ max 1 (sum ws)) ws
+
+csvTableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+csvTableDirective top fields rawcsv = do
+ let explicitHeader = trim <$> lookup "header" fields
+ let opts = defaultCSVOptions{
+ csvDelim = case trim <$> lookup "delim" fields of
+ Just "tab" -> '\t'
+ Just "space" -> ' '
+ Just [c] -> c
+ _ -> ','
+ , csvQuote = case trim <$> lookup "quote" fields of
+ Just [c] -> c
+ _ -> '"'
+ , csvEscape = case trim <$> lookup "escape" fields of
+ Just [c] -> Just c
+ _ -> Nothing
+ , csvKeepSpace = case trim <$> lookup "keepspace" fields of
+ Just "true" -> True
+ _ -> False
+ }
+ let headerRowsNum = fromMaybe (case explicitHeader of
+ Just _ -> 1 :: Int
+ Nothing -> 0 :: Int) $
+ lookup "header-rows" fields >>= safeRead
+ rawcsv' <- case trim <$>
+ lookup "file" fields `mplus` lookup "url" fields of
+ Just u -> do
+ (bs, _) <- fetchItem u
+ return $ UTF8.toString bs
+ Nothing -> return rawcsv
+ let res = parseCSV opts (T.pack $ case explicitHeader of
+ Just h -> h ++ "\n" ++ rawcsv'
+ Nothing -> rawcsv')
+ case res of
+ Left e ->
+ throwError $ PandocParsecError "csv table" e
+ Right rawrows -> do
+ let parseCell = parseFromString' (plain <|> return mempty) . T.unpack
+ let parseRow = mapM parseCell
+ rows <- mapM parseRow rawrows
+ let (headerRow,bodyRows,numOfCols) =
+ case rows of
+ x:xs -> if headerRowsNum > 0
+ then (x, xs, length x)
+ else ([], rows, length x)
+ _ -> ([],[],0)
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ let normWidths ws = map (/ max 1 (sum ws)) ws
+ let widths =
+ case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0
+ Just specs -> normWidths
+ $ map (fromMaybe (0 :: Double) . safeRead)
+ $ splitBy (`elem` (" ," :: String)) specs
+ _ -> replicate numOfCols 0
+ return $ B.table title
+ (zip (replicate numOfCols AlignDefault) widths)
+ headerRow
+ bodyRows
-- TODO:
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
-addNewRole :: String -> [(String, String)] -> RSTParser Blocks
+addNewRole :: PandocMonad m
+ => String -> [(String, String)] -> RSTParser m Blocks
addNewRole roleString fields = do
- (role, parentRole) <- parseFromString inheritedRole roleString
+ pos <- getPosition
+ (role, parentRole) <- parseFromString' inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
let getBaseRole (r, f, a) roles =
case M.lookup r roles of
Just (r', f', a') -> getBaseRole (r', f', a') roles
- Nothing -> (r, f, a)
+ Nothing -> (r, f, a)
(baseRole, baseFmt, baseAttr) =
getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
@@ -641,34 +909,32 @@ addNewRole roleString fields = do
in (ident, nub . (role :) . annotate $ classes, keyValues)
-- warn about syntax we ignore
- flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (baseRole /= "code") $ addWarning Nothing $
- "ignoring :language: field because the parent of role :" ++
- role ++ ": is :" ++ baseRole ++ ": not :code:"
- "format" -> when (baseRole /= "raw") $ addWarning Nothing $
- "ignoring :format: field because the parent of role :" ++
- role ++ ": is :" ++ baseRole ++ ": not :raw:"
- _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
- ": in definition of role :" ++ role ++ ": in"
+ forM_ fields $ \(key, _) -> case key of
+ "language" -> when (baseRole /= "code") $ logMessage $
+ SkippedContent ":language: [because parent of role is not :code:]"
+ pos
+ "format" -> when (baseRole /= "raw") $ logMessage $
+ SkippedContent ":format: [because parent of role is not :raw:]" pos
+ _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
when (parentRole == "raw" && countKeys "format" > 1) $
- addWarning Nothing $
- "ignoring :format: fields after the first in the definition of role :"
- ++ role ++": in"
+ logMessage $ SkippedContent
+ ":format: [after first in definition of role]"
+ pos
when (parentRole == "code" && countKeys "language" > 1) $
- addWarning Nothing $
- "ignoring :language: fields after the first in the definition of role :"
- ++ role ++": in"
+ logMessage $ SkippedContent
+ ":language: [after first in definition of role]" pos
updateState $ \s -> s {
stateRstCustomRoles =
M.insert role (baseRole, fmt, attr) customRoles
}
- return $ B.singleton Null
+ return mempty
where
countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
- (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')')
+ <|> pure "span")
-- Can contain character codes as decimal numbers or
@@ -700,24 +966,29 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
where (ds,rest) = span isHexDigit s
mbc = safeRead ('\'':'\\':'x':ds ++ "'")
-extractCaption :: RSTParser (Inlines, Blocks)
+extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
extractCaption = do
capt <- trimInlines . mconcat <$> many inline
legend <- optional blanklines >> (mconcat <$> many block)
return (capt,legend)
--- divide string by blanklines
+-- divide string by blanklines, and surround with
+-- \begin{aligned}...\end{aligned} if needed.
toChunks :: String -> [String]
toChunks = dropWhile null
- . map (trim . unlines)
+ . map (addAligned . trim . unlines)
. splitBy (all (`elem` (" \t" :: String))) . lines
+ -- we put this in an aligned environment if it contains \\, see #4254
+ where addAligned s = if "\\\\" `isInfixOf` s
+ then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}"
+ else s
-codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks
+codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
codeblock classes numberLines lang body =
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
where attribs = ("", classes', kvs)
classes' = "sourceCode" : lang
- : maybe [] (\_ -> ["numberLines"]) numberLines
+ : maybe [] (const ["numberLines"]) numberLines
++ classes
kvs = case numberLines of
Just "" -> []
@@ -728,30 +999,52 @@ codeblock classes numberLines lang body =
--- note block
---
-noteBlock :: RSTParser [Char]
+noteBlock :: Monad m => RSTParser m [Char]
noteBlock = try $ do
+ (ref, raw, replacement) <- noteBlock' noteMarker
+ updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s }
+ -- return blanks so line count isn't affected
+ return replacement
+
+citationBlock :: Monad m => RSTParser m [Char]
+citationBlock = try $ do
+ (ref, raw, replacement) <- noteBlock' citationMarker
+ updateState $ \s ->
+ s { stateCitations = M.insert ref raw (stateCitations s),
+ stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[]))
+ (stateKeys s) }
+ -- return blanks so line count isn't affected
+ return replacement
+
+noteBlock' :: Monad m
+ => RSTParser m String -> RSTParser m (String, String, String)
+noteBlock' marker = try $ do
startPos <- getPosition
string ".."
spaceChar >> skipMany spaceChar
- ref <- noteMarker
+ ref <- marker
first <- (spaceChar >> skipMany spaceChar >> anyLine)
<|> (newline >> return "")
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
- let newnote = (ref, raw)
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return (ref, raw, replacement)
+
+citationMarker :: Monad m => RSTParser m [Char]
+citationMarker = do
+ char '['
+ res <- simpleReferenceName
+ char ']'
+ return res
-noteMarker :: RSTParser [Char]
+noteMarker :: Monad m => RSTParser m [Char]
noteMarker = do
char '['
res <- many1 digit
- <|> (try $ char '#' >> liftM ('#':) simpleReferenceName')
+ <|>
+ try (char '#' >> liftM ('#':) simpleReferenceName)
<|> count 1 (oneOf "#*")
char ']'
return res
@@ -760,39 +1053,26 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: RSTParser Inlines
+quotedReferenceName :: PandocMonad m => RSTParser m String
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- trimInlines . mconcat <$> many1Till inline (char '`')
- return label'
-
-unquotedReferenceName :: RSTParser Inlines
-unquotedReferenceName = try $ do
- label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
- return label'
+ manyTill anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName' :: Parser [Char] st String
-simpleReferenceName' = do
+simpleReferenceName :: Monad m => ParserT [Char] st m String
+simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
- <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
+ <|> try (oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
-simpleReferenceName :: Parser [Char] st Inlines
-simpleReferenceName = do
- raw <- simpleReferenceName'
- return $ B.str raw
+referenceName :: PandocMonad m => RSTParser m String
+referenceName = quotedReferenceName <|> simpleReferenceName
-referenceName :: RSTParser Inlines
-referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName <* lookAhead (char ':')) <|>
- unquotedReferenceName
-
-referenceKey :: RSTParser [Char]
+referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do
startPos <- getPosition
choice [substKey, anonymousKey, regularKey]
@@ -801,16 +1081,16 @@ referenceKey = do
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-targetURI :: Parser [Char] st [Char]
+targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
- return $ escapeURI $ trim $ contents
+ return $ escapeURI $ trim contents
-substKey :: RSTParser ()
+substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do
string ".."
skipMany1 spaceChar
@@ -826,31 +1106,85 @@ substKey = try $ do
[Para ils] -> return $ B.fromList ils
_ -> mzero
let key = toKey $ stripFirstAndLast ref
- updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
+ updateState $ \s -> s{ stateSubstitutions =
+ M.insert key il $ stateSubstitutions s }
-anonymousKey :: RSTParser ()
+anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
-
-stripTicks :: String -> String
-stripTicks = reverse . stripTick . reverse . stripTick
- where stripTick ('`':xs) = xs
- stripTick xs = xs
-
-regularKey :: RSTParser ()
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames = do
+ let rn = try $ do
+ string ".. _"
+ ref <- quotedReferenceName
+ <|> many ( noneOf ":\n"
+ <|> try (char '\n' <*
+ string " " <*
+ notFollowedBy blankline)
+ <|> try (char ':' <* lookAhead alphaNum)
+ )
+ char ':'
+ return ref
+ first <- rn
+ rest <- many (try (blanklines *> rn))
+ return (first:rest)
+
+regularKey :: PandocMonad m => RSTParser m ()
regularKey = try $ do
- string ".. _"
- (_,ref) <- withRaw referenceName
- char ':'
+ -- we allow several references to the same URL, e.g.
+ -- .. _hello:
+ -- .. _goodbye: url.com
+ refs <- referenceNames
src <- targetURI
- let key = toKey $ stripTicks ref
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+ guard $ not (null src)
+ let keys = map toKey refs
+ forM_ keys $ \key ->
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef = try $ do
+ (refs, raw) <- withRaw $ try (referenceNames <* blanklines)
+ forM_ refs $ \rawkey ->
+ updateState $ \s -> s { stateKeys =
+ M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ -- keep this for 2nd round of parsing, where we'll add the divs (anchor)
+ return raw
+
+anchor :: PandocMonad m => RSTParser m Blocks
+anchor = try $ do
+ refs <- referenceNames
+ blanklines
+ b <- block
+ let addDiv ref = B.divWith (ref, [], [])
+ let emptySpanWithId id' = Span (id',[],[]) []
+ -- put identifier on next block:
+ case B.toList b of
+ [Header lev (_,classes,kvs) txt] ->
+ case reverse refs of
+ [] -> return b
+ (r:rs) -> return $ B.singleton $
+ Header lev (r,classes,kvs)
+ (txt ++ map emptySpanWithId rs)
+ -- we avoid generating divs for headers,
+ -- because it hides them from promoteHeader, see #4240
+ _ -> return $ foldr addDiv b refs
+
+headerBlock :: PandocMonad m => RSTParser m [Char]
+headerBlock = do
+ ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader')
+ (ident,_,_) <- registerHeader nullAttr txt
+ let key = toKey (stringify txt)
+ updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr)
+ $ stateKeys s }
+ return raw
+
--
-- tables
@@ -869,45 +1203,53 @@ regularKey = try $ do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Char -> Parser [Char] st (Int, Int)
+dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
-simpleTableSep :: Char -> RSTParser Char
+simpleTableSep :: Monad m => Char -> RSTParser m Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: RSTParser [Char]
+simpleTableFooter :: Monad m => RSTParser m [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: [Int] -> RSTParser [String]
-simpleTableRawLine indices = do
- line <- many1Till anyChar newline
- return (simpleTableSplitLine indices line)
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
+
+simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLineWithEmptyCell indices = try $ do
+ cs <- simpleTableRawLine indices
+ let isEmptyCell = all (\c -> c == ' ' || c == '\t')
+ guard $ any isEmptyCell cs
+ return cs
-- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: [Int] -> RSTParser [[Block]]
+simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
- colLines <- return [] -- TODO
- let cols = map unlines . transpose $ firstLine : colLines
- mapM (parseFromString (B.toList . mconcat <$> many plain)) cols
+ conLines <- many $ simpleTableRawLineWithEmptyCell indices
+ let cols = map unlines . transpose $ firstLine : conLines ++
+ [replicate (length indices) ""
+ | not (null conLines)]
+ mapM (parseFromString' parseBlocks) cols
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
- map trim
+ map trimr
$ tail $ splitByIndices (init indices) line
-simpleTableHeader :: Bool -- ^ Headerless table
- -> RSTParser ([[Block]], [Alignment], [Int])
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m ([Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -921,26 +1263,37 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $
- map trim rawHeads
+ heads <- mapM ( parseFromString' (mconcat <$> many plain) . trim) rawHeads
return (heads, aligns, indices)
-- Parse a simple table.
-simpleTable :: Bool -- ^ Headerless table
- -> RSTParser Blocks
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ let wrapIdFst (a, b, c) = (Identity a, b, c)
+ wrapId = fmap Identity
+ tbl <- runIdentity <$> tableWith
+ (wrapIdFst <$> simpleTableHeader headless)
+ (wrapId <$> simpleTableRow)
+ sep simpleTableFooter
-- Simple tables get 0s for relative column widths (i.e., use default)
- return $ B.singleton $ Table c a (replicate (length a) 0) h l
+ case B.toList tbl of
+ [Table c a _w h l] -> return $ B.singleton $
+ Table c a (replicate (length a) 0) h l
+ _ ->
+ throwError $ PandocShouldNeverHappenError
+ "tableWith returned something unexpected"
where
sep = return () -- optional (simpleTableSep '-')
-gridTable :: Bool -- ^ Headerless table
- -> RSTParser Blocks
-gridTable headerless = B.singleton
- <$> gridTableWith (B.toList <$> parseBlocks) headerless
+gridTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
+gridTable headerless = runIdentity <$>
+ gridTableWith (Identity <$> parseBlocks) headerless
-table :: RSTParser Blocks
+table :: PandocMonad m => RSTParser m Blocks
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
@@ -948,7 +1301,7 @@ table = gridTable False <|> simpleTable False <|>
-- inline
--
-inline :: RSTParser Inlines
+inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws
, whitespace
, link
@@ -964,29 +1317,30 @@ inline = choice [ note -- can start with whitespace, so try before ws
, escapedChar
, symbol ] <?> "inline"
-parseInlineFromString :: String -> RSTParser Inlines
-parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
+parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
-hyphens :: RSTParser Inlines
+hyphens :: Monad m => RSTParser m Inlines
hyphens = do
result <- many1 (char '-')
optional endline
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Parser [Char] st Inlines
+escapedChar :: Monad m => ParserT [Char] st m Inlines
escapedChar = do c <- escaped anyChar
- return $ if c == ' ' -- '\ ' is null in RST
+ return $ if c == ' ' || c == '\n' || c == '\r'
+ -- '\ ' is null in RST
then mempty
else B.str [c]
-symbol :: RSTParser Inlines
+symbol :: Monad m => RSTParser m Inlines
symbol = do
result <- oneOf specialChars
return $ B.str [result]
-- parses inline code, between codeStart and codeEnd
-code :: RSTParser Inlines
+code :: Monad m => RSTParser m Inlines
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
@@ -994,7 +1348,7 @@ code = try $ do
$ trim $ unwords $ lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
-atStart :: RSTParser a -> RSTParser a
+atStart :: Monad m => RSTParser m a -> RSTParser m a
atStart p = do
pos <- getPosition
st <- getState
@@ -1002,11 +1356,11 @@ atStart p = do
guard $ stateLastStrPos st /= Just pos
p
-emph :: RSTParser Inlines
+emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$>
enclosed (atStart $ char '*') (char '*') inline
-strong :: RSTParser Inlines
+strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$>
enclosed (atStart $ string "**") (try $ string "**") inline
@@ -1018,12 +1372,13 @@ strong = B.strong . trimInlines . mconcat <$>
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
-interpretedRole :: RSTParser Inlines
+interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
(role, contents) <- roleBefore <|> roleAfter
renderRole contents Nothing role nullAttr
-renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
+renderRole :: PandocMonad m
+ => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
@@ -1048,10 +1403,8 @@ renderRole contents fmt role attr = case role of
case M.lookup custom customRoles of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
- return $ B.str contents -- Undefined role
+ Nothing -> -- undefined role
+ return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@@ -1061,33 +1414,33 @@ renderRole contents fmt role attr = case role of
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
addClass :: String -> Attr -> Attr
-addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
-roleName :: RSTParser String
+roleName :: PandocMonad m => RSTParser m String
roleName = many1 (letter <|> char '-')
-roleMarker :: RSTParser String
+roleMarker :: PandocMonad m => RSTParser m String
roleMarker = char ':' *> roleName <* char ':'
-roleBefore :: RSTParser (String,String)
+roleBefore :: PandocMonad m => RSTParser m (String,String)
roleBefore = try $ do
role <- roleMarker
contents <- unmarkedInterpretedText
return (role,contents)
-roleAfter :: RSTParser (String,String)
+roleAfter :: PandocMonad m => RSTParser m (String,String)
roleAfter = try $ do
contents <- unmarkedInterpretedText
role <- roleMarker <|> (stateRstDefaultRole <$> getState)
return (role,contents)
-unmarkedInterpretedText :: RSTParser [Char]
+unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
-whitespace :: RSTParser Inlines
+whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
-str :: RSTParser Inlines
+str :: Monad m => RSTParser m Inlines
str = do
let strChar = noneOf ("\t\n " ++ specialChars)
result <- many1 strChar
@@ -1095,26 +1448,24 @@ str = do
return $ B.str result
-- an endline character that can be treated as a space, not a structural break
-endline :: RSTParser Inlines
+endline :: Monad m => RSTParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts at beginning of line differently in a list:
st <- getState
- if (stateParserContext st) == ListItemState
- then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ when (stateParserContext st == ListItemState) $ notFollowedBy (anyOrderedListMarker >> spaceChar) >>
notFollowedBy' bulletListStart
- else return ()
return B.softbreak
--
-- links
--
-link :: RSTParser Inlines
+link :: PandocMonad m => RSTParser m Inlines
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink :: RSTParser Inlines
+explicitLink :: PandocMonad m => RSTParser m Inlines
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
@@ -1128,61 +1479,73 @@ explicitLink = try $ do
then B.str src
else label'
-- `link <google_>` is a reference link to _google!
- (src',tit,attr) <- case reverse src of
- '_':xs -> do
- keyTable <- stateKeys <$> getState
- let key = toKey $ reverse xs
- case M.lookup key keyTable of
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++
- show key
- return ("","",nullAttr)
- Just ((s,t),a) -> return (s,t,a)
- _ -> return (src, "", nullAttr)
+ ((src',tit),attr) <- case reverse src of
+ '_':xs -> lookupKey [] (toKey (reverse xs))
+ _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
-referenceLink :: RSTParser Inlines
+citationName :: PandocMonad m => RSTParser m String
+citationName = do
+ raw <- citationMarker
+ return $ "[" ++ raw ++ "]"
+
+referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
- char '_'
- state <- getState
- let keyTable = stateKeys state
+ ref <- (referenceName <|> citationName) <* char '_'
+ let label' = B.text ref
let isAnonKey (Key ('_':_)) = True
isAnonKey _ = False
- key <- option (toKey $ stripTicks ref) $
+ state <- getState
+ let keyTable = stateKeys state
+ key <- option (toKey ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
- if null anonKeys
- then mzero
- else return (head anonKeys)
- ((src,tit), attr) <- case M.lookup key keyTable of
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++
- show key
- return (("",""),nullAttr)
- Just val -> return val
+ case anonKeys of
+ [] -> mzero
+ (k:_) -> return k
+ ((src,tit), attr) <- lookupKey [] key
-- if anonymous link, remove key so it won't be used again
- when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
+ when (isAnonKey key) $ updateState $ \s ->
+ s{ stateKeys = M.delete key keyTable }
return $ B.linkWith attr src tit label'
-autoURI :: RSTParser Inlines
+-- We keep a list of oldkeys so we can detect lookup loops.
+lookupKey :: PandocMonad m
+ => [Key] -> Key -> RSTParser m ((String, String), Attr)
+lookupKey oldkeys key = do
+ pos <- getPosition
+ state <- getState
+ let keyTable = stateKeys state
+ case M.lookup key keyTable of
+ Nothing -> do
+ let Key key' = key
+ logMessage $ ReferenceNotFound key' pos
+ return (("",""),nullAttr)
+ -- check for keys of the form link_, which need to be resolved:
+ Just ((u@(_:_),""),_) | last u == '_' -> do
+ let rawkey = init u
+ let newkey = toKey rawkey
+ if newkey `elem` oldkeys
+ then do
+ logMessage $ CircularReference rawkey pos
+ return (("",""),nullAttr)
+ else lookupKey (key:oldkeys) newkey
+ Just val -> return val
+
+autoURI :: Monad m => RSTParser m Inlines
autoURI = do
(orig, src) <- uri
return $ B.link src "" $ B.str orig
-autoEmail :: RSTParser Inlines
+autoEmail :: Monad m => RSTParser m Inlines
autoEmail = do
(orig, src) <- emailAddress
return $ B.link src "" $ B.str orig
-autoLink :: RSTParser Inlines
+autoLink :: PandocMonad m => RSTParser m Inlines
autoLink = autoURI <|> autoEmail
-subst :: RSTParser Inlines
+subst :: PandocMonad m => RSTParser m Inlines
subst = try $ do
(_,ref) <- withRaw $ enclosed (char '|') (char '|') inline
state <- getState
@@ -1191,12 +1554,11 @@ subst = try $ do
case M.lookup key substTable of
Nothing -> do
pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++ show key
+ logMessage $ ReferenceNotFound (show key) pos
return mempty
Just target -> return target
-note :: RSTParser Inlines
+note :: PandocMonad m => RSTParser m Inlines
note = try $ do
optional whitespace
ref <- noteMarker
@@ -1206,8 +1568,7 @@ note = try $ do
case lookup ref notes of
Nothing -> do
pos <- getPosition
- addWarning (Just pos) $
- "Could not find note for " ++ show ref
+ logMessage $ ReferenceNotFound ref pos
return mempty
Just raw -> do
-- We temporarily empty the note list while parsing the note,
@@ -1215,8 +1576,8 @@ note = try $ do
-- Note references inside other notes are allowed in reST, but
-- not yet in this implementation.
updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
+ contents <- parseFromString' parseBlocks raw
+ let newnotes = if ref == "*" || ref == "#" -- auto-numbered
-- delete the note so the next auto-numbered note
-- doesn't get the same contents:
then deleteFirstsBy (==) notes [(ref,raw)]
@@ -1224,20 +1585,20 @@ note = try $ do
updateState $ \st -> st{ stateNotes = newnotes }
return $ B.note contents
-smart :: RSTParser Inlines
+smart :: PandocMonad m => RSTParser m Inlines
smart = do
- getOption readerSmart >>= guard
+ guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice [apostrophe, dash, ellipses]
-singleQuoted :: RSTParser Inlines
+singleQuoted :: PandocMonad m => RSTParser m Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
B.singleQuoted . trimInlines . mconcat <$>
many1Till inline singleQuoteEnd
-doubleQuoted :: RSTParser Inlines
+doubleQuoted :: PandocMonad m => RSTParser m Inlines
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $