summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2009-11-28 03:22:18 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2009-11-28 03:22:18 +0000
commitd1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3 (patch)
tree2d27c04cfccdd5f6606219c1277428ed20cbefc7 /src/Text/Pandoc
parent1d440130c431405f7edbd7d8beae584101debbb6 (diff)
Markdown reader: parse refs and notes in the same pass.
Previously the markdown reader made one pass for references, a second pass for notes (which it parsed and stored in the parser state), and a third pass for the rest. This patch achieves a 10% speed improvement by storing the raw notes on the first (reference) pass, then parsing them when the notes are inserted into the AST. This eliminates the need for a second pass to parse notes. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1629 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs33
-rw-r--r--src/Text/Pandoc/Shared.hs2
2 files changed, 14 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 226252381..7a16f1578 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -164,23 +164,18 @@ parseMarkdown = do
-- markdown allows raw HTML
updateState (\state -> state { stateParseRaw = True })
startPos <- getPosition
- -- go through once just to get list of reference keys
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
- return . concat
+ -- go through once just to get list of reference keys and notes
+ -- docMinusKeys is the raw document with blanks where the keys/notes were...
+ st <- getState
+ let firstPassParser = referenceKey
+ <|> (if stateStrict st then pzero else noteBlock)
+ <|> lineClump
+ docMinusKeys <- liftM concat $ manyTill firstPassParser eof
setInput docMinusKeys
setPosition startPos
- st <- getState
- -- go through again for notes unless strict...
- if stateStrict st
- then return ()
- else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
- return . concat
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- setInput docMinusNotes
- setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
(title, author, date) <- option ([],[],"") titleBlock
blocks <- parseBlocks
@@ -243,9 +238,7 @@ noteBlock = try $ do
raw <- sepBy rawLines (try (blankline >> indentSpaces))
optional blanklines
endPos <- getPosition
- -- parse the extracted text, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- let newnote = (ref, contents)
+ let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
@@ -1174,8 +1167,8 @@ note = try $ do
state <- getState
let notes = stateNotes state
case lookup ref notes of
- Nothing -> fail "note not found"
- Just contents -> return $ Note contents
+ Nothing -> fail "note not found"
+ Just raw -> liftM Note $ parseFromString parseBlocks raw
inlineNote :: GenParser Char ParserState Inline
inlineNote = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index f920c79aa..810911165 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -713,7 +713,7 @@ data QuoteContext
| NoQuote -- ^ Used when not parsing inside quotes
deriving (Eq, Show)
-type NoteTable = [(String, [Block])]
+type NoteTable = [(String, String)]
type KeyTable = [([Inline], Target)]