diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 1041 |
1 files changed, 701 insertions, 340 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e05b6cba2..e88d997f0 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,38 +30,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( - readRST, - readRSTWithWarnings - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Builder (setMeta, fromList) -import Text.Pandoc.Shared -import Text.Pandoc.Parsing -import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero ) -import Data.List ( findIndex, intercalate, - transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) -import Data.Maybe (fromMaybe) +module Text.Pandoc.Readers.RST ( readRST ) where +import Control.Arrow (second) +import Control.Monad (forM_, guard, liftM, mplus, mzero, when) +import Control.Monad.Except (throwError) +import Control.Monad.Identity (Identity (..)) +import Data.Char (isHexDigit, isSpace, toLower, toUpper) +import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, + nub, sort, transpose, union) import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import qualified Text.Pandoc.Builder as B -import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit, isSpace) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid ((<>)) +import Data.Sequence (ViewR (..), viewr) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) +import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV) +import Text.Pandoc.Definition import Text.Pandoc.Error +import Text.Pandoc.ImageSize (lengthToDim, scaleDimension) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Printf (printf) --- | Parse reStructuredText string and return Pandoc document. -readRST :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") - -readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) -readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") +-- TODO: +-- [ ] .. parsed-literal -type RSTParser = Parser [Char] ParserState +-- | Parse reStructuredText string and return Pandoc document. +readRST :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readRST opts s = do + parsed <- readWithM parseRST def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e + +type RSTParser m = ParserT [Char] ParserState m -- -- Constants and data structure definitions @@ -87,9 +100,9 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level attr text):rest) = - (Header (level - num) attr text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders num (Header level attr text:rest) = + Header (level - num) attr text:promoteHeaders num rest +promoteHeaders num (other:rest) = other:promoteHeaders num rest promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) @@ -101,11 +114,11 @@ titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata titleTransform (bs, meta) = let (bs', meta') = case bs of - ((Header 1 _ head1):(Header 2 _ head2):rest) + (Header 1 _ head1:Header 2 _ head2:rest) | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ setMeta "subtitle" (fromList head2) meta) - ((Header 1 _ head1):rest) + (Header 1 _ head1:rest) | not (any (isHeader 1) rest) -> -- title only (promoteHeaders 1 rest, setMeta "title" (fromList head1) meta) @@ -121,8 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.mapKeys (\k -> if k == "authors" then "author" else k) - $ metamap + $ M.mapKeys (\k -> + if k == "authors" + then "author" + else k) metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x splitAuthors (MetaBlocks [Para xs]) @@ -131,6 +146,12 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds splitAuthors x = x splitAuthors' = map normalizeSpaces . splitOnSemi . concatMap factorSemi + normalizeSpaces = reverse . dropWhile isSp . reverse . + dropWhile isSp + isSp Space = True + isSp SoftBreak = True + isSp LineBreak = True + isSp _ = False splitOnSemi = splitBy (==Str ";") factorSemi (Str []) = [] factorSemi (Str s) = case break (==';') s of @@ -141,41 +162,61 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds factorSemi (Str ys) factorSemi x = [x] -parseRST :: RSTParser Pandoc +parseRST :: PandocMonad m => RSTParser m Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> lineClump) eof + manyTill (referenceKey <|> anchorDef <|> + noteBlock <|> citationBlock <|> + headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos st' <- getState let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } + updateState $ \s -> s { stateNotes = reverse reversedNotes + , stateHeaders = mempty + , stateIdentifiers = mempty } -- now parse it for real... blocks <- B.toList <$> parseBlocks + citations <- (sort . M.toList . stateCitations) <$> getState + citationItems <- mapM parseCitation citations + let refBlock = if null citationItems + then [] + else [Div ("citations",[],[]) $ + B.toList $ B.definitionList citationItems] standalone <- getOption readerStandalone state <- getState let meta = stateMeta state let (blocks', meta') = if standalone then titleTransform (blocks, meta) else (blocks, meta) - return $ Pandoc meta' blocks' + reportLogMessages + return $ Pandoc meta' (blocks' ++ refBlock) + +parseCitation :: PandocMonad m + => (String, String) -> RSTParser m (Inlines, [Blocks]) +parseCitation (ref, raw) = do + contents <- parseFromString' parseBlocks raw + return (B.spanWith (ref, ["citation-label"], []) (B.str ref), + [contents]) + -- -- parsing blocks -- -parseBlocks :: RSTParser Blocks +parseBlocks :: PandocMonad m => RSTParser m Blocks parseBlocks = mconcat <$> manyTill block eof -block :: RSTParser Blocks +block :: PandocMonad m => RSTParser m Blocks block = choice [ codeBlock , blockQuote , fieldList , directive + , anchor , comment , header , hrule @@ -191,7 +232,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Int -> RSTParser (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent @@ -204,15 +245,15 @@ rawFieldListItem minIndent = try $ do let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" return (name, raw) -fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) +fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent term <- parseInlineFromString name - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) -fieldList :: RSTParser Blocks +fieldList :: PandocMonad m => RSTParser m Blocks fieldList = try $ do indent <- length <$> lookAhead (many spaceChar) items <- many1 $ fieldListItem indent @@ -224,43 +265,62 @@ fieldList = try $ do -- line block -- -lineBlock :: RSTParser Blocks +lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines lines'' <- mapM parseInlineFromString lines' return $ B.lineBlock lines'' +lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective body = do + lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + return $ B.lineBlock lines' + -- -- paragraph block -- -- note: paragraph can end in a :: starting a code block -para :: RSTParser Blocks +para :: PandocMonad m => RSTParser m Blocks para = try $ do result <- trimInlines . mconcat <$> many1 inline option (B.plain result) $ try $ do newline blanklines case viewr (B.unMany result) of - ys :> (Str xs) | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `isSuffixOf` xs -> do raw <- option mempty codeBlockBody return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) <> raw _ -> return (B.para result) -plain :: RSTParser Blocks +plain :: PandocMonad m => RSTParser m Blocks plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- header blocks -- -header :: RSTParser Blocks +header :: PandocMonad m => RSTParser m Blocks header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: RSTParser Blocks -doubleHeader = try $ do +doubleHeader :: PandocMonad m => RSTParser m Blocks +doubleHeader = do + (txt, c) <- doubleHeader' + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case elemIndex (DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], length headerTable + 1) + setState (state { stateHeaderTable = headerTable' }) + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt + +doubleHeader' :: PandocMonad m => RSTParser m (Inlines, Char) +doubleHeader' = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line let lenTop = length (c:rest) @@ -268,48 +328,45 @@ doubleHeader = try $ do newline txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () + let len = sourceColumn pos - 1 + when (len > lenTop) $ fail "title longer than border" blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. + return (txt, c) + +-- a header with line on the bottom only +singleHeader :: PandocMonad m => RSTParser m Blocks +singleHeader = do + (txt, c) <- singleHeader' state <- getState let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + let (headerTable',level) = case elemIndex (SingleHeader c) headerTable of Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + Nothing -> (headerTable ++ [SingleHeader c], length headerTable + 1) setState (state { stateHeaderTable = headerTable' }) attr <- registerHeader nullAttr txt return $ B.headerWith attr level txt --- a header with line on the bottom only -singleHeader :: RSTParser Blocks -singleHeader = try $ do +singleHeader' :: PandocMonad m => RSTParser m (Inlines, Char) +singleHeader' = try $ do notFollowedBy' whitespace - txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline}) + lookAhead $ anyLine >> oneOf underlineChars + txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) pos <- getPosition - let len = (sourceColumn pos) - 1 + let len = sourceColumn pos - 1 blankline c <- oneOf underlineChars count (len - 1) (char c) many (char c) blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - attr <- registerHeader nullAttr txt - return $ B.headerWith attr level txt + return (txt, c) -- -- hrule block -- -hrule :: Parser [Char] st Blocks +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -323,14 +380,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> Parser [Char] st [Char] +indentedLine :: Monad m => String -> ParserT [Char] st m [Char] indentedLine indents = try $ do string indents anyLine -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: Parser [Char] st [Char] +indentedBlock :: Monad m => ParserT [Char] st m [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -339,24 +396,24 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -quotedBlock :: Parser [Char] st [Char] +quotedBlock :: Monad m => ParserT [Char] st m [Char] quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ unlines lns -codeBlockStart :: Parser [Char] st Char +codeBlockStart :: Monad m => ParserT [Char] st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Parser [Char] st Blocks +codeBlock :: Monad m => ParserT [Char] st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Parser [Char] st Blocks +codeBlockBody :: Monad m => ParserT [Char] st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) -lhsCodeBlock :: RSTParser Blocks +lhsCodeBlock :: Monad m => RSTParser m Blocks lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell @@ -366,14 +423,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns -latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -381,99 +438,159 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (drop 1) lns else lns -birdTrackLine :: Parser [Char] st [Char] +birdTrackLine :: Monad m => ParserT [Char] st m [Char] birdTrackLine = char '>' >> anyLine -- -- block quotes -- -blockQuote :: RSTParser Blocks +blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n\n" return $ B.blockQuote contents +{- +Unsupported options for include: +tab-width +encoding +-} + +includeDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +includeDirective top fields body = do + let f = trim top + guard $ not (null f) + guard $ null (trim body) + -- options + let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead + let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead + oldPos <- getPosition + oldInput <- getInput + containers <- stateContainers <$> getState + when (f `elem` containers) $ + throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + updateState $ \s -> s{ stateContainers = f : stateContainers s } + mbContents <- readFileFromDirs ["."] f + contentLines <- case mbContents of + Just s -> return $ lines s + Nothing -> do + logMessage $ CouldNotLoadIncludeFile f oldPos + return [] + let numLines = length contentLines + let startLine' = case startLine of + Nothing -> 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let endLine' = case endLine of + Nothing -> numLines + 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let contentLines' = drop (startLine' - 1) + $ take (endLine' - 1) contentLines + let contentLines'' = (case trim <$> lookup "end-before" fields of + Just patt -> takeWhile (not . (patt `isInfixOf`)) + Nothing -> id) . + (case trim <$> lookup "start-after" fields of + Just patt -> drop 1 . + dropWhile (not . (patt `isInfixOf`)) + Nothing -> id) $ contentLines' + let contents' = unlines contentLines'' ++ "\n" + case lookup "code" fields of + Just lang -> do + let numberLines = lookup "number-lines" fields + let classes = trimr lang : ["numberLines" | isJust numberLines] ++ + maybe [] words (lookup "class" fields) + let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines + let ident = maybe "" trimr $ lookup "name" fields + let attribs = (ident, classes, kvs) + return $ B.codeBlockWith attribs contents' + Nothing -> case lookup "literal" fields of + Just _ -> return $ B.rawBlock "rst" contents' + Nothing -> do + setPosition $ newPos f 1 1 + setInput contents' + bs <- optional blanklines >> + (mconcat <$> many block) + setInput oldInput + setPosition oldPos + updateState $ \s -> s{ stateContainers = + tail $ stateContainers s } + return bs + + -- -- list blocks -- -list :: RSTParser Blocks +list :: PandocMonad m => RSTParser m Blocks list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: RSTParser (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n" return (term, [contents]) -definitionList :: RSTParser Blocks +definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Parser [Char] st Int +bulletListStart :: Monad m => ParserT [Char] st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers - white <- many1 spaceChar + white <- many1 spaceChar <|> "" <$ lookAhead (char '\n') return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) -orderedListStart :: ListNumberStyle +orderedListStart :: Monad m => ListNumberStyle -> ListNumberDelim - -> RSTParser Int + -> RSTParser m Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar + white <- many1 spaceChar <|> "" <$ lookAhead (char '\n') return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> RSTParser [Char] +listLine :: Monad m => Int -> RSTParser m [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength - line <- anyLine - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> RSTParser [Char] -indentWith num = do - tabStop <- getOption readerTabStop - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + anyLineNewline -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: RSTParser Int - -> RSTParser (Int, [Char]) +rawListItem :: Monad m => RSTParser m Int + -> RSTParser m (Int, [Char]) rawListItem start = try $ do markerLength <- start - firstLine <- anyLine + firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + return (markerLength, firstLine ++ concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> RSTParser [Char] +listContinuation :: Monad m => Int -> RSTParser m [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: RSTParser Int - -> RSTParser Blocks +listItem :: PandocMonad m + => RSTParser m Int + -> RSTParser m Blocks listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline <* lookAhead start), - many1 blankline ] -- whole list must end with blank. + skipMany1 blankline <|> () <$ lookAhead start -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -481,52 +598,52 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks + parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of - [Para xs] -> B.singleton $ Plain xs - [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys] - [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys] - [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] + [Para xs] -> + B.singleton $ Plain xs + [Para xs, BulletList ys] -> + B.fromList [Plain xs, BulletList ys] + [Para xs, OrderedList s ys] -> + B.fromList [Plain xs, OrderedList s ys] + [Para xs, DefinitionList ys] -> + B.fromList [Plain xs, DefinitionList ys] _ -> parsed -orderedList :: RSTParser Blocks +orderedList :: PandocMonad m => RSTParser m Blocks orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar) items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify' items + let items' = compactify items return $ B.orderedListWith (start, style, delim) items' -bulletList :: RSTParser Blocks -bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) +bulletList :: PandocMonad m => RSTParser m Blocks +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart) -- -- directive (e.g. comment, container, compound-paragraph) -- -comment :: RSTParser Blocks +comment :: Monad m => RSTParser m Blocks comment = try $ do string ".." skipMany1 spaceChar <|> (() <$ lookAhead newline) - notFollowedBy' directiveLabel + -- notFollowedBy' directiveLabel -- comment comes after directive so unnec. manyTill anyChar blanklines optional indentedBlock return mempty -directiveLabel :: RSTParser String +directiveLabel :: Monad m => RSTParser m String directiveLabel = map toLower <$> many1Till (letter <|> char '-') (try $ string "::") -directive :: RSTParser Blocks +directive :: PandocMonad m => RSTParser m Blocks directive = try $ do string ".." directive' --- TODO: line-block, parsed-literal, table, csv-table, list-table --- date --- include --- title -directive' :: RSTParser Blocks +directive' :: PandocMonad m => RSTParser m Blocks directive' = do skipMany1 spaceChar label <- directiveLabel @@ -541,43 +658,71 @@ directive' = do body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" - imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height") + name = trim $ fromMaybe "" (lookup "name" fields) + imgAttr cl = ("", classes, widthAttr ++ heightAttr) where - classes = words $ maybe "" trim $ lookup cl fields - getAtt k = case lookup k fields of - Just v -> [(k, filter (not . isSpace) v)] - Nothing -> [] + classes = words $ maybe "" trim (lookup cl fields) ++ + maybe "" (\x -> "align-" ++ trim x) + (lookup "align" fields) + scale = case trim <$> lookup "scale" fields of + Just v -> case reverse v of + '%':vv -> + case safeRead (reverse vv) of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> + case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 + widthAttr = maybe [] (\x -> [("width", + show $ scaleDimension scale x)]) + $ lookup "width" fields >>= + (lengthToDim . filter (not . isSpace)) + heightAttr = maybe [] (\x -> [("height", + show $ scaleDimension scale x)]) + $ lookup "height" fields >>= + (lengthToDim . filter (not . isSpace)) case label of + "include" -> includeDirective top fields body' + "table" -> tableDirective top fields body' + "list-table" -> listTableDirective top fields body' + "csv-table" -> csvTableDirective top fields body' + "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields - "container" -> parseFromString parseBlocks body' + "role" -> addNewRole top $ map (second trim) fields + "container" -> B.divWith (name, "container" : words top, []) <$> + parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey parseInlineFromString (trim $ unicodeTransform top) - "compound" -> parseFromString parseBlocks body' - "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' - "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' - "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' + "compound" -> parseFromString' parseBlocks body' + "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' + "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' + "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", - "important","note","tip","warning"] -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' - return $ B.divWith ("",["admonition", label],[]) bod - "admonition" -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' - return $ B.divWith ("",["admonition"],[]) bod + "important","note","tip","warning","admonition"] -> + do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + let lab = case label of + "admonition" -> mempty + (l:ls) -> B.divWith ("",["admonition-title"],[]) + (B.para (B.str (toUpper l : ls))) + [] -> mempty + return $ B.divWith ("",[label],[]) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields tit <- B.para . B.strong <$> parseInlineFromString (trim top ++ if null subtit then "" else (": " ++ subtit)) - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["sidebar"],[]) $ tit <> bod "topic" -> do tit <- B.para . B.strong <$> parseInlineFromString top - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["topic"],[]) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> s { stateRstDefaultRole = @@ -588,14 +733,15 @@ directive' = do codeblock (words $ fromMaybe [] $ lookup "class" fields) (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) + let attribs = ("", ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do - (caption, legend) <- parseFromString extractCaption body' + (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend + return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" + caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -606,28 +752,150 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + let attrs = ("", splitBy isSpace $ trim top, + map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block - _ -> parseFromString parseBlocks body' + _ -> parseFromString' parseBlocks body' return $ B.divWith attrs children other -> do pos <- getPosition - addWarning (Just pos) $ "ignoring unknown directive: " ++ other - return mempty + logMessage $ SkippedContent (".. " ++ other) pos + bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + return $ B.divWith ("",[other],[]) bod + +tableDirective :: PandocMonad m + => String -> [(String, String)] -> String -> RSTParser m Blocks +tableDirective top fields body = do + bs <- parseFromString' parseBlocks body + case B.toList bs of + [Table _ aligns' widths' header' rows'] -> do + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + columns <- getOption readerColumns + let numOfCols = length header' + let normWidths ws = + map (/ max 1.0 (fromIntegral (columns - numOfCols))) ws + let widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0.0 + Just "grid" -> widths' + Just specs -> normWidths + $ map (fromMaybe (0 :: Double) . safeRead) + $ splitBy (`elem` (" ," :: String)) specs + Nothing -> widths' + -- align is not applicable since we can't represent whole table align + return $ B.singleton $ Table (B.toList title) + aligns' widths header' rows' + _ -> return mempty + + +-- TODO: :stub-columns:. +-- Only the first row becomes the header even if header-rows: > 1, +-- since Pandoc doesn't support a table with multiple header rows. +-- We don't need to parse :align: as it represents the whole table align. +listTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +listTableDirective top fields body = do + bs <- parseFromString' parseBlocks body + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let rows = takeRows $ B.toList bs + headerRowsNum = fromMaybe (0 :: Int) $ + lookup "header-rows" fields >>= safeRead + (headerRow,bodyRows,numOfCols) = case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ + splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows + where takeRows [BulletList rows] = map takeCells rows + takeRows _ = [] + takeCells [BulletList cells] = map B.fromList cells + takeCells _ = [] + normWidths ws = map (/ max 1 (sum ws)) ws + +csvTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +csvTableDirective top fields rawcsv = do + let explicitHeader = trim <$> lookup "header" fields + let opts = defaultCSVOptions{ + csvDelim = case trim <$> lookup "delim" fields of + Just "tab" -> '\t' + Just "space" -> ' ' + Just [c] -> c + _ -> ',' + , csvQuote = case trim <$> lookup "quote" fields of + Just [c] -> c + _ -> '"' + , csvEscape = case trim <$> lookup "escape" fields of + Just [c] -> Just c + _ -> Nothing + , csvKeepSpace = case trim <$> lookup "keepspace" fields of + Just "true" -> True + _ -> False + } + let headerRowsNum = fromMaybe (case explicitHeader of + Just _ -> 1 :: Int + Nothing -> 0 :: Int) $ + lookup "header-rows" fields >>= safeRead + rawcsv' <- case trim <$> + lookup "file" fields `mplus` lookup "url" fields of + Just u -> do + (bs, _) <- fetchItem u + return $ UTF8.toString bs + Nothing -> return rawcsv + let res = parseCSV opts (T.pack $ case explicitHeader of + Just h -> h ++ "\n" ++ rawcsv' + Nothing -> rawcsv') + case res of + Left e -> + throwError $ PandocParsecError "csv table" e + Right rawrows -> do + let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseRow = mapM parseCell + rows <- mapM parseRow rawrows + let (headerRow,bodyRows,numOfCols) = + case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let normWidths ws = map (/ max 1 (sum ws)) ws + let widths = + case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths + $ map (fromMaybe (0 :: Double) . safeRead) + $ splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix -addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole :: PandocMonad m + => String -> [(String, String)] -> RSTParser m Blocks addNewRole roleString fields = do - (role, parentRole) <- parseFromString inheritedRole roleString + pos <- getPosition + (role, parentRole) <- parseFromString' inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of Just (r', f', a') -> getBaseRole (r', f', a') roles - Nothing -> (r, f, a) + Nothing -> (r, f, a) (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt @@ -641,34 +909,32 @@ addNewRole roleString fields = do in (ident, nub . (role :) . annotate $ classes, keyValues) -- warn about syntax we ignore - flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (baseRole /= "code") $ addWarning Nothing $ - "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ baseRole ++ ": not :code:" - "format" -> when (baseRole /= "raw") $ addWarning Nothing $ - "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ baseRole ++ ": not :raw:" - _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ - ": in definition of role :" ++ role ++ ": in" + forM_ fields $ \(key, _) -> case key of + "language" -> when (baseRole /= "code") $ logMessage $ + SkippedContent ":language: [because parent of role is not :code:]" + pos + "format" -> when (baseRole /= "raw") $ logMessage $ + SkippedContent ":format: [because parent of role is not :raw:]" pos + _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ - addWarning Nothing $ - "ignoring :format: fields after the first in the definition of role :" - ++ role ++": in" + logMessage $ SkippedContent + ":format: [after first in definition of role]" + pos when (parentRole == "code" && countKeys "language" > 1) $ - addWarning Nothing $ - "ignoring :language: fields after the first in the definition of role :" - ++ role ++": in" + logMessage $ SkippedContent + ":language: [after first in definition of role]" pos updateState $ \s -> s { stateRstCustomRoles = M.insert role (baseRole, fmt, attr) customRoles } - return $ B.singleton Null + return mempty where countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') + <|> pure "span") -- Can contain character codes as decimal numbers or @@ -700,24 +966,29 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc where (ds,rest) = span isHexDigit s mbc = safeRead ('\'':'\\':'x':ds ++ "'") -extractCaption :: RSTParser (Inlines, Blocks) +extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do capt <- trimInlines . mconcat <$> many inline legend <- optional blanklines >> (mconcat <$> many block) return (capt,legend) --- divide string by blanklines +-- divide string by blanklines, and surround with +-- \begin{aligned}...\end{aligned} if needed. toChunks :: String -> [String] toChunks = dropWhile null - . map (trim . unlines) + . map (addAligned . trim . unlines) . splitBy (all (`elem` (" \t" :: String))) . lines + -- we put this in an aligned environment if it contains \\, see #4254 + where addAligned s = if "\\\\" `isInfixOf` s + then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + else s -codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks +codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks codeblock classes numberLines lang body = return $ B.codeBlockWith attribs $ stripTrailingNewlines body where attribs = ("", classes', kvs) classes' = "sourceCode" : lang - : maybe [] (\_ -> ["numberLines"]) numberLines + : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = case numberLines of Just "" -> [] @@ -728,30 +999,52 @@ codeblock classes numberLines lang body = --- note block --- -noteBlock :: RSTParser [Char] +noteBlock :: Monad m => RSTParser m [Char] noteBlock = try $ do + (ref, raw, replacement) <- noteBlock' noteMarker + updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } + -- return blanks so line count isn't affected + return replacement + +citationBlock :: Monad m => RSTParser m [Char] +citationBlock = try $ do + (ref, raw, replacement) <- noteBlock' citationMarker + updateState $ \s -> + s { stateCitations = M.insert ref raw (stateCitations s), + stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + (stateKeys s) } + -- return blanks so line count isn't affected + return replacement + +noteBlock' :: Monad m + => RSTParser m String -> RSTParser m (String, String, String) +noteBlock' marker = try $ do startPos <- getPosition string ".." spaceChar >> skipMany spaceChar - ref <- noteMarker + ref <- marker first <- (spaceChar >> skipMany spaceChar >> anyLine) <|> (newline >> return "") blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let newnote = (ref, raw) - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + return (ref, raw, replacement) + +citationMarker :: Monad m => RSTParser m [Char] +citationMarker = do + char '[' + res <- simpleReferenceName + char ']' + return res -noteMarker :: RSTParser [Char] +noteMarker :: Monad m => RSTParser m [Char] noteMarker = do char '[' res <- many1 digit - <|> (try $ char '#' >> liftM ('#':) simpleReferenceName') + <|> + try (char '#' >> liftM ('#':) simpleReferenceName) <|> count 1 (oneOf "#*") char ']' return res @@ -760,39 +1053,26 @@ noteMarker = do -- reference key -- -quotedReferenceName :: RSTParser Inlines +quotedReferenceName :: PandocMonad m => RSTParser m String quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- trimInlines . mconcat <$> many1Till inline (char '`') - return label' - -unquotedReferenceName :: RSTParser Inlines -unquotedReferenceName = try $ do - label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') - return label' + manyTill anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: Parser [Char] st String -simpleReferenceName' = do +simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) + <|> try (oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) -simpleReferenceName :: Parser [Char] st Inlines -simpleReferenceName = do - raw <- simpleReferenceName' - return $ B.str raw +referenceName :: PandocMonad m => RSTParser m String +referenceName = quotedReferenceName <|> simpleReferenceName -referenceName :: RSTParser Inlines -referenceName = quotedReferenceName <|> - (try $ simpleReferenceName <* lookAhead (char ':')) <|> - unquotedReferenceName - -referenceKey :: RSTParser [Char] +referenceKey :: PandocMonad m => RSTParser m [Char] referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] @@ -801,16 +1081,16 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: Parser [Char] st [Char] +targetURI :: Monad m => ParserT [Char] st m [Char] targetURI = do skipSpaces optional newline contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines - return $ escapeURI $ trim $ contents + return $ escapeURI $ trim contents -substKey :: RSTParser () +substKey :: PandocMonad m => RSTParser m () substKey = try $ do string ".." skipMany1 spaceChar @@ -826,31 +1106,85 @@ substKey = try $ do [Para ils] -> return $ B.fromList ils _ -> mzero let key = toKey $ stripFirstAndLast ref - updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } + updateState $ \s -> s{ stateSubstitutions = + M.insert key il $ stateSubstitutions s } -anonymousKey :: RSTParser () +anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } - -stripTicks :: String -> String -stripTicks = reverse . stripTick . reverse . stripTick - where stripTick ('`':xs) = xs - stripTick xs = xs - -regularKey :: RSTParser () + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } + +referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames = do + let rn = try $ do + string ".. _" + ref <- quotedReferenceName + <|> many ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) + char ':' + return ref + first <- rn + rest <- many (try (blanklines *> rn)) + return (first:rest) + +regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' + -- we allow several references to the same URL, e.g. + -- .. _hello: + -- .. _goodbye: url.com + refs <- referenceNames src <- targetURI - let key = toKey $ stripTicks ref - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + guard $ not (null src) + let keys = map toKey refs + forM_ keys $ \key -> + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } + +anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef = try $ do + (refs, raw) <- withRaw $ try (referenceNames <* blanklines) + forM_ refs $ \rawkey -> + updateState $ \s -> s { stateKeys = + M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + -- keep this for 2nd round of parsing, where we'll add the divs (anchor) + return raw + +anchor :: PandocMonad m => RSTParser m Blocks +anchor = try $ do + refs <- referenceNames + blanklines + b <- block + let addDiv ref = B.divWith (ref, [], []) + let emptySpanWithId id' = Span (id',[],[]) [] + -- put identifier on next block: + case B.toList b of + [Header lev (_,classes,kvs) txt] -> + case reverse refs of + [] -> return b + (r:rs) -> return $ B.singleton $ + Header lev (r,classes,kvs) + (txt ++ map emptySpanWithId rs) + -- we avoid generating divs for headers, + -- because it hides them from promoteHeader, see #4240 + _ -> return $ foldr addDiv b refs + +headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock = do + ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') + (ident,_,_) <- registerHeader nullAttr txt + let key = toKey (stringify txt) + updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + $ stateKeys s } + return raw + -- -- tables @@ -869,45 +1203,53 @@ regularKey = try $ do -- Grid tables TODO: -- - column spans -dashedLine :: Char -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> RSTParser Char +simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: RSTParser [Char] +simpleTableFooter :: Monad m => RSTParser m [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> RSTParser [String] -simpleTableRawLine indices = do - line <- many1Till anyChar newline - return (simpleTableSplitLine indices line) +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine + +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell indices = try $ do + cs <- simpleTableRawLine indices + let isEmptyCell = all (\c -> c == ' ' || c == '\t') + guard $ any isEmptyCell cs + return cs -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> RSTParser [[Block]] +simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices - colLines <- return [] -- TODO - let cols = map unlines . transpose $ firstLine : colLines - mapM (parseFromString (B.toList . mconcat <$> many plain)) cols + conLines <- many $ simpleTableRawLineWithEmptyCell indices + let cols = map unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] + mapM (parseFromString' parseBlocks) cols simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = - map trim + map trimr $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> RSTParser ([[Block]], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -921,26 +1263,37 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $ - map trim rawHeads + heads <- mapM ( parseFromString' (mconcat <$> many plain) . trim) rawHeads return (heads, aligns, indices) -- Parse a simple table. -simpleTable :: Bool -- ^ Headerless table - -> RSTParser Blocks +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + let wrapIdFst (a, b, c) = (Identity a, b, c) + wrapId = fmap Identity + tbl <- runIdentity <$> tableWith + (wrapIdFst <$> simpleTableHeader headless) + (wrapId <$> simpleTableRow) + sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) - return $ B.singleton $ Table c a (replicate (length a) 0) h l + case B.toList tbl of + [Table c a _w h l] -> return $ B.singleton $ + Table c a (replicate (length a) 0) h l + _ -> + throwError $ PandocShouldNeverHappenError + "tableWith returned something unexpected" where sep = return () -- optional (simpleTableSep '-') -gridTable :: Bool -- ^ Headerless table - -> RSTParser Blocks -gridTable headerless = B.singleton - <$> gridTableWith (B.toList <$> parseBlocks) headerless +gridTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks +gridTable headerless = runIdentity <$> + gridTableWith (Identity <$> parseBlocks) headerless -table :: RSTParser Blocks +table :: PandocMonad m => RSTParser m Blocks table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" @@ -948,7 +1301,7 @@ table = gridTable False <|> simpleTable False <|> -- inline -- -inline :: RSTParser Inlines +inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws , whitespace , link @@ -964,29 +1317,30 @@ inline = choice [ note -- can start with whitespace, so try before ws , escapedChar , symbol ] <?> "inline" -parseInlineFromString :: String -> RSTParser Inlines -parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) +parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines +parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) -hyphens :: RSTParser Inlines +hyphens :: Monad m => RSTParser m Inlines hyphens = do result <- many1 (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Parser [Char] st Inlines +escapedChar :: Monad m => ParserT [Char] st m Inlines escapedChar = do c <- escaped anyChar - return $ if c == ' ' -- '\ ' is null in RST + return $ if c == ' ' || c == '\n' || c == '\r' + -- '\ ' is null in RST then mempty else B.str [c] -symbol :: RSTParser Inlines +symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars return $ B.str [result] -- parses inline code, between codeStart and codeEnd -code :: RSTParser Inlines +code :: Monad m => RSTParser m Inlines code = try $ do string "``" result <- manyTill anyChar (try (string "``")) @@ -994,7 +1348,7 @@ code = try $ do $ trim $ unwords $ lines result -- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: RSTParser a -> RSTParser a +atStart :: Monad m => RSTParser m a -> RSTParser m a atStart p = do pos <- getPosition st <- getState @@ -1002,11 +1356,11 @@ atStart p = do guard $ stateLastStrPos st /= Just pos p -emph :: RSTParser Inlines +emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> enclosed (atStart $ char '*') (char '*') inline -strong :: RSTParser Inlines +strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> enclosed (atStart $ string "**") (try $ string "**") inline @@ -1018,12 +1372,13 @@ strong = B.strong . trimInlines . mconcat <$> -- - Classes are silently discarded in addNewRole -- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. -interpretedRole :: RSTParser Inlines +interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter renderRole contents Nothing role nullAttr -renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole :: PandocMonad m + => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ B.str contents "superscript" -> return $ B.superscript $ B.str contents @@ -1048,10 +1403,8 @@ renderRole contents fmt role attr = case role of case M.lookup custom customRoles of Just (newRole, newFmt, newAttr) -> renderRole contents newFmt newRole newAttr - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" - return $ B.str contents -- Undefined role + Nothing -> -- undefined role + return $ B.spanWith ("",[],[("role",role)]) (B.str contents) where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) @@ -1061,33 +1414,33 @@ renderRole contents fmt role attr = case role of pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" addClass :: String -> Attr -> Attr -addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) +addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues) -roleName :: RSTParser String +roleName :: PandocMonad m => RSTParser m String roleName = many1 (letter <|> char '-') -roleMarker :: RSTParser String +roleMarker :: PandocMonad m => RSTParser m String roleMarker = char ':' *> roleName <* char ':' -roleBefore :: RSTParser (String,String) +roleBefore :: PandocMonad m => RSTParser m (String,String) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: RSTParser (String,String) +roleAfter :: PandocMonad m => RSTParser m (String,String) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: RSTParser [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar -whitespace :: RSTParser Inlines +whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" -str :: RSTParser Inlines +str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) result <- many1 strChar @@ -1095,26 +1448,24 @@ str = do return $ B.str result -- an endline character that can be treated as a space, not a structural break -endline :: RSTParser Inlines +endline :: Monad m => RSTParser m Inlines endline = try $ do newline notFollowedBy blankline -- parse potential list-starts at beginning of line differently in a list: st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> + when (stateParserContext st == ListItemState) $ notFollowedBy (anyOrderedListMarker >> spaceChar) >> notFollowedBy' bulletListStart - else return () return B.softbreak -- -- links -- -link :: RSTParser Inlines +link :: PandocMonad m => RSTParser m Inlines link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: RSTParser Inlines +explicitLink :: PandocMonad m => RSTParser m Inlines explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code @@ -1128,61 +1479,73 @@ explicitLink = try $ do then B.str src else label' -- `link <google_>` is a reference link to _google! - (src',tit,attr) <- case reverse src of - '_':xs -> do - keyTable <- stateKeys <$> getState - let key = toKey $ reverse xs - case M.lookup key keyTable of - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ - show key - return ("","",nullAttr) - Just ((s,t),a) -> return (s,t,a) - _ -> return (src, "", nullAttr) + ((src',tit),attr) <- case reverse src of + '_':xs -> lookupKey [] (toKey (reverse xs)) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -referenceLink :: RSTParser Inlines +citationName :: PandocMonad m => RSTParser m String +citationName = do + raw <- citationMarker + return $ "[" ++ raw ++ "]" + +referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do - (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <* - char '_' - state <- getState - let keyTable = stateKeys state + ref <- (referenceName <|> citationName) <* char '_' + let label' = B.text ref let isAnonKey (Key ('_':_)) = True isAnonKey _ = False - key <- option (toKey $ stripTicks ref) $ + state <- getState + let keyTable = stateKeys state + key <- option (toKey ref) $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable - if null anonKeys - then mzero - else return (head anonKeys) - ((src,tit), attr) <- case M.lookup key keyTable of - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ - show key - return (("",""),nullAttr) - Just val -> return val + case anonKeys of + [] -> mzero + (k:_) -> return k + ((src,tit), attr) <- lookupKey [] key -- if anonymous link, remove key so it won't be used again - when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } + when (isAnonKey key) $ updateState $ \s -> + s{ stateKeys = M.delete key keyTable } return $ B.linkWith attr src tit label' -autoURI :: RSTParser Inlines +-- We keep a list of oldkeys so we can detect lookup loops. +lookupKey :: PandocMonad m + => [Key] -> Key -> RSTParser m ((String, String), Attr) +lookupKey oldkeys key = do + pos <- getPosition + state <- getState + let keyTable = stateKeys state + case M.lookup key keyTable of + Nothing -> do + let Key key' = key + logMessage $ ReferenceNotFound key' pos + return (("",""),nullAttr) + -- check for keys of the form link_, which need to be resolved: + Just ((u@(_:_),""),_) | last u == '_' -> do + let rawkey = init u + let newkey = toKey rawkey + if newkey `elem` oldkeys + then do + logMessage $ CircularReference rawkey pos + return (("",""),nullAttr) + else lookupKey (key:oldkeys) newkey + Just val -> return val + +autoURI :: Monad m => RSTParser m Inlines autoURI = do (orig, src) <- uri return $ B.link src "" $ B.str orig -autoEmail :: RSTParser Inlines +autoEmail :: Monad m => RSTParser m Inlines autoEmail = do (orig, src) <- emailAddress return $ B.link src "" $ B.str orig -autoLink :: RSTParser Inlines +autoLink :: PandocMonad m => RSTParser m Inlines autoLink = autoURI <|> autoEmail -subst :: RSTParser Inlines +subst :: PandocMonad m => RSTParser m Inlines subst = try $ do (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline state <- getState @@ -1191,12 +1554,11 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ show key + logMessage $ ReferenceNotFound (show key) pos return mempty Just target -> return target -note :: RSTParser Inlines +note :: PandocMonad m => RSTParser m Inlines note = try $ do optional whitespace ref <- noteMarker @@ -1206,8 +1568,7 @@ note = try $ do case lookup ref notes of Nothing -> do pos <- getPosition - addWarning (Just pos) $ - "Could not find note for " ++ show ref + logMessage $ ReferenceNotFound ref pos return mempty Just raw -> do -- We temporarily empty the note list while parsing the note, @@ -1215,8 +1576,8 @@ note = try $ do -- Note references inside other notes are allowed in reST, but -- not yet in this implementation. updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - let newnotes = if (ref == "*" || ref == "#") -- auto-numbered + contents <- parseFromString' parseBlocks raw + let newnotes = if ref == "*" || ref == "#" -- auto-numbered -- delete the note so the next auto-numbered note -- doesn't get the same contents: then deleteFirstsBy (==) notes [(ref,raw)] @@ -1224,20 +1585,20 @@ note = try $ do updateState $ \st -> st{ stateNotes = newnotes } return $ B.note contents -smart :: RSTParser Inlines +smart :: PandocMonad m => RSTParser m Inlines smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice [apostrophe, dash, ellipses] -singleQuoted :: RSTParser Inlines +singleQuoted :: PandocMonad m => RSTParser m Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -doubleQuoted :: RSTParser Inlines +doubleQuoted :: PandocMonad m => RSTParser m Inlines doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ |