summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-08 08:17:44 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-08 08:39:50 -0800
commit9ead748cc9cd79a8f490776d4d5dfe6cd0beb6f1 (patch)
tree3ed850ba16fa19a5ee5bb871801d1fb209321422
parent91978d2201585110469dc8c1a9149d76a1736763 (diff)
RST reader: Added footnote suppport.
Resolves issue #258. Note that there are some differences in how docutils and pandoc treat footnotes. Currently pandoc ignores the numeral or symbol used in the note; footnotes are put in an auto-numbered ordered list.
-rw-r--r--README6
-rw-r--r--src/Text/Pandoc/Readers/RST.hs54
-rw-r--r--tests/rst-reader.native8
-rw-r--r--tests/rst-reader.rst28
4 files changed, 88 insertions, 8 deletions
diff --git a/README b/README
index 9e8422724..91ec5e70b 100644
--- a/README
+++ b/README
@@ -66,10 +66,8 @@ To convert `hello.html` from html to markdown:
Supported output formats are listed below under the `-t/--to` option.
Supported input formats are listed below under the `-f/--from` option. Note
-that the `rst` reader only parses a subset of reStructuredText syntax. For
-example, it doesn't handle tables, option lists, or footnotes. But for simple
-documents it should be adequate. The `textile`, `latex`, and `html` readers
-are also limited in what they can do.
+that the `rst`, `textile`, `latex`, and `html` readers are not complete;
+there are some constructs that they do not parse.
If the input or output format is not specified explicitly, `pandoc`
will attempt to guess it from the extensions of
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index f9a907f75..e929e2b91 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when, unless )
-import Data.List ( findIndex, intercalate, transpose, sort )
+import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
@@ -91,11 +91,15 @@ titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
startPos <- getPosition
- -- go through once just to get list of reference keys
+ -- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+ docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>=
+ return . concat
setInput docMinusKeys
setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -508,6 +512,32 @@ unknownDirective = try $ do
many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
return Null
+---
+--- note block
+---
+
+noteBlock :: GenParser Char ParserState [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ string ".."
+ spaceChar >> skipMany spaceChar
+ ref <- noteMarker
+ spaceChar >> skipMany spaceChar
+ first <- anyLine
+ blanks <- option "" blanklines
+ rest <- option "" indentedBlock
+ endPos <- getPosition
+ let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
+ let newnote = (ref, raw)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+noteMarker :: GenParser Char ParserState [Char]
+noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']'
+
--
-- reference key
--
@@ -692,6 +722,7 @@ inline = choice [ smartPunctuation inline
, superscript
, subscript
, escapedChar
+ , note
, symbol ] <?> "inline"
hyphens :: GenParser Char ParserState Inline
@@ -820,3 +851,20 @@ image = try $ do
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
+
+note :: GenParser Char ParserState Inline
+note = try $ do
+ ref <- noteMarker
+ char '_'
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just raw -> do
+ contents <- parseFromString parseBlocks raw
+ when (ref == "*" || ref == "#") $ do -- auto-numbered
+ -- delete the note so the next auto-numbered note
+ -- doesn't get the same contents:
+ let newnotes = deleteFirstsBy (==) notes [(ref,raw)]
+ updateState $ \st -> st{ stateNotes = newnotes }
+ return $ Note contents
diff --git a/tests/rst-reader.native b/tests/rst-reader.native
index 2cb0ef8ae..5ece59eb3 100644
--- a/tests/rst-reader.native
+++ b/tests/rst-reader.native
@@ -308,4 +308,10 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite",Str ":
, Para [Str "r1",Space,Str "bis"] ], [ BulletList
[ [ Plain [Str "b"] ]
, [ Plain [Str "b",Space,Str "2"] ]
- , [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ] ]
+ , [ Plain [Str "b",Space,Str "2"] ] ] ], [ Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"] ] ] ]
+, Header 1 [Str "Footnotes"]
+, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "one",Space,Str "line",Str "."]]]
+, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line",Str "."]]]
+, Para [Note [Para [Str "Note",Space,Str "with"],Para [Str "continuation",Space,Str "block",Str "."]]]
+, Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line"],Para [Str "and",Space,Str "a",Space,Str "second",Space,Str "para",Str "."]]]
+, Para [Str "Not",Space,Str "in",Space,Str "note",Str "."] ]
diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst
index 8c4b7d726..519f0080c 100644
--- a/tests/rst-reader.rst
+++ b/tests/rst-reader.rst
@@ -508,3 +508,31 @@ Multiple blocks in a cell
| | - b 2 | c 2 |
| r1 bis | - b 2 | c 2 |
+------------------+-----------+------------+
+
+Footnotes
+=========
+
+[1]_
+
+[#]_
+
+[#]_
+
+[*]_
+
+.. [1] Note with one line.
+
+.. [#] Note with
+ continuation line.
+
+.. [#] Note with
+
+ continuation block.
+
+.. [*] Note with
+ continuation line
+
+ and a second para.
+
+Not in note.
+