From 9ead748cc9cd79a8f490776d4d5dfe6cd0beb6f1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 8 Dec 2010 08:17:44 -0800 Subject: RST reader: Added footnote suppport. Resolves issue #258. Note that there are some differences in how docutils and pandoc treat footnotes. Currently pandoc ignores the numeral or symbol used in the note; footnotes are put in an auto-numbered ordered list. --- README | 6 ++--- src/Text/Pandoc/Readers/RST.hs | 54 +++++++++++++++++++++++++++++++++++++++--- tests/rst-reader.native | 8 ++++++- tests/rst-reader.rst | 28 ++++++++++++++++++++++ 4 files changed, 88 insertions(+), 8 deletions(-) diff --git a/README b/README index 9e8422724..91ec5e70b 100644 --- a/README +++ b/README @@ -66,10 +66,8 @@ To convert `hello.html` from html to markdown: Supported output formats are listed below under the `-t/--to` option. Supported input formats are listed below under the `-f/--from` option. Note -that the `rst` reader only parses a subset of reStructuredText syntax. For -example, it doesn't handle tables, option lists, or footnotes. But for simple -documents it should be adequate. The `textile`, `latex`, and `html` readers -are also limited in what they can do. +that the `rst`, `textile`, `latex`, and `html` readers are not complete; +there are some constructs that they do not parse. If the input or output format is not specified explicitly, `pandoc` will attempt to guess it from the extensions of diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index f9a907f75..e929e2b91 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec import Control.Monad ( when, unless ) -import Data.List ( findIndex, intercalate, transpose, sort ) +import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -91,11 +91,15 @@ titleTransform blocks = (blocks, []) parseRST :: GenParser Char ParserState Pandoc parseRST = do startPos <- getPosition - -- go through once just to get list of reference keys + -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat + docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>= + return . concat setInput docMinusKeys setPosition startPos + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -508,6 +512,32 @@ unknownDirective = try $ do many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline) return Null +--- +--- note block +--- + +noteBlock :: GenParser Char ParserState [Char] +noteBlock = try $ do + startPos <- getPosition + string ".." + spaceChar >> skipMany spaceChar + ref <- noteMarker + spaceChar >> skipMany spaceChar + first <- anyLine + blanks <- option "" blanklines + rest <- option "" indentedBlock + endPos <- getPosition + let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" + let newnote = (ref, raw) + st <- getState + let oldnotes = stateNotes st + updateState $ \s -> s { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +noteMarker :: GenParser Char ParserState [Char] +noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']' + -- -- reference key -- @@ -692,6 +722,7 @@ inline = choice [ smartPunctuation inline , superscript , subscript , escapedChar + , note , symbol ] "inline" hyphens :: GenParser Char ParserState Inline @@ -820,3 +851,20 @@ image = try $ do Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) (src, tit) + +note :: GenParser Char ParserState Inline +note = try $ do + ref <- noteMarker + char '_' + state <- getState + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just raw -> do + contents <- parseFromString parseBlocks raw + when (ref == "*" || ref == "#") $ do -- auto-numbered + -- delete the note so the next auto-numbered note + -- doesn't get the same contents: + let newnotes = deleteFirstsBy (==) notes [(ref,raw)] + updateState $ \st -> st{ stateNotes = newnotes } + return $ Note contents diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 2cb0ef8ae..5ece59eb3 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -308,4 +308,10 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ": , Para [Str "r1",Space,Str "bis"] ], [ BulletList [ [ Plain [Str "b"] ] , [ Plain [Str "b",Space,Str "2"] ] - , [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ] ] + , [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ] +, Header 1 [Str "Footnotes"] +, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "one",Space,Str "line",Str "."]]] +, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line",Str "."]]] +, Para [Note [Para [Str "Note",Space,Str "with"],Para [Str "continuation",Space,Str "block",Str "."]]] +, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line"],Para [Str "and",Space,Str "a",Space,Str "second",Space,Str "para",Str "."]]] +, Para [Str "Not",Space,Str "in",Space,Str "note",Str "."] ] diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst index 8c4b7d726..519f0080c 100644 --- a/tests/rst-reader.rst +++ b/tests/rst-reader.rst @@ -508,3 +508,31 @@ Multiple blocks in a cell | | - b 2 | c 2 | | r1 bis | - b 2 | c 2 | +------------------+-----------+------------+ + +Footnotes +========= + +[1]_ + +[#]_ + +[#]_ + +[*]_ + +.. [1] Note with one line. + +.. [#] Note with + continuation line. + +.. [#] Note with + + continuation block. + +.. [*] Note with + continuation line + + and a second para. + +Not in note. + -- cgit v1.2.3