diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/RST.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 178 |
1 files changed, 35 insertions, 143 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 5e7ea512e..13afe5053 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,10 +31,13 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, unless, liftM ) -import Data.List ( findIndex, delete, intercalate, transpose ) +import Control.Monad ( when, unless ) +import Data.List ( findIndex, intercalate, transpose, sort ) +import qualified Data.Map as M +import Text.Printf ( printf ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -- ^ Parser state, including options for parser @@ -93,9 +96,6 @@ parseRST = do docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat setInput docMinusKeys setPosition startPos - st <- getState - let reversedKeys = stateKeys st - updateState $ \s -> s { stateKeys = reverse reversedKeys } -- now parse it for real... blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -425,7 +425,7 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char st Int + -> GenParser Char ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar @@ -540,10 +540,10 @@ referenceName = quotedReferenceName <|> referenceKey :: GenParser Char ParserState [Char] referenceKey = do startPos <- getPosition - key <- choice [imageKey, anonymousKey, regularKey] + (key, target) <- choice [imageKey, anonymousKey, regularKey] st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = key : oldkeys } + updateState $ \s -> s { stateKeys = M.insert key target oldkeys } optional blanklines endPos <- getPosition -- return enough blanks to replace key @@ -558,28 +558,29 @@ targetURI = do blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) +imageKey :: GenParser Char ParserState (Key, Target) imageKey = try $ do string ".. |" ref <- manyTill inline (char '|') skipSpaces string "image::" src <- targetURI - return (normalizeSpaces ref, (src, "")) + return (Key (normalizeSpaces ref), (src, "")) -anonymousKey :: GenParser Char st ([Inline], (String, [Char])) +anonymousKey :: GenParser Char st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - return ([Str "_"], (src, "")) + pos <- getPosition + return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) -regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) +regularKey :: GenParser Char ParserState (Key, Target) regularKey = try $ do string ".. _" ref <- referenceName char ':' src <- targetURI - return (normalizeSpaces ref, (src, "")) + return (Key (normalizeSpaces ref), (src, "")) -- -- tables @@ -607,41 +608,20 @@ dashedLine ch = do simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -gridPart :: Char -> GenParser Char st (Int, Int) -gridPart ch = do - dashes <- many1 (char ch) - char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline - -- Parse a table row separator simpleTableSep :: Char -> GenParser Char ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -gridTableSep :: Char -> GenParser Char ParserState Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - -- Parse a table footer simpleTableFooter :: GenParser Char ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -gridTableFooter :: GenParser Char ParserState [Char] -gridTableFooter = blanklines - -- Parse a raw line and split it into chunks by indices. simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices $ removeTrailingSpace line) - -- Parse a table row and return a list of blocks (columns). simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] simpleTableRow indices = do @@ -651,64 +631,13 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : colLines mapM (parseFromString (many plain)) cols -gridTableRow :: [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols - -compactifyCell :: [Block] -> [Block] -compactifyCell bs = head $ compactify [bs] - simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = - map removeFinalBar $ tail $ splitByIndices (init indices) line - -removeFinalBar :: String -> String -removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . - reverse - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - return (rawHeads, aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments - let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose - $ map (gridTableSplitLine indices) rawContent - return (rawHeads, aligns, indices) - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser - footerParser - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let captions = [] -- no notion of captions in RST - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table captions aligns widths heads lines' - --- Parse a simple table with '---' header and one line per row. +-- Parse a simple table. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + -> GenParser Char ParserState Block +gridTable = gridTableWith block (return []) table :: GenParser Char ParserState Block table = gridTable False <|> simpleTable False <|> @@ -889,17 +777,21 @@ explicitLink = try $ do referenceLink :: GenParser Char ParserState Inline referenceLink = try $ do label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' - key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link state <- getState let keyTable = stateKeys state + let isAnonKey (Key [Str ('_':_)]) = True + isAnonKey _ = False + key <- option (Key label') $ + do char '_' + let anonKeys = sort $ filter isAnonKey $ M.keys keyTable + if null anonKeys + then pzero + else return (head anonKeys) (src,tit) <- case lookupKeySrc keyTable key of Nothing -> fail "no corresponding key" Just target -> return target - -- if anonymous link, remove first anon key so it won't be used again - let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key - else keyTable - setState $ state { stateKeys = keyTable' } + -- if anonymous link, remove key so it won't be used again + when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } return $ Link (normalizeSpaces label') (src, tit) autoURI :: GenParser Char ParserState Inline @@ -922,7 +814,7 @@ image = try $ do ref <- manyTill inline (char '|') state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable ref of + (src,tit) <- case lookupKeySrc keyTable (Key ref) of Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) (src, tit) |