Markdown reader: parse refs and notes in the same pass.

Previously the markdown reader made one pass for references, a second pass for notes (which it parsed and stored in the parser state), and a third pass for the rest. This patch achieves a 10% speed improvement by storing the raw notes on the first (reference) pass, then parsing them when the notes are inserted into the AST. This eliminates the need for a second pass to parse notes. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1629 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2009-11-28 03:22:18 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2009-11-28 03:22:18 +0000
commit: d1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3 (patch)
tree: 2d27c04cfccdd5f6606219c1277428ed20cbefc7 /src/Text/Pandoc
parent: 1d440130c431405f7edbd7d8beae584101debbb6 (diff)
2 files changed, 14 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 226252381..7a16f1578 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -164,23 +164,18 @@ parseMarkdown = do
   -- markdown allows raw HTML
   updateState (\state -> state { stateParseRaw = True })
   startPos <- getPosition
-  -- go through once just to get list of reference keys
-  -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= 
-                  return . concat
+  -- go through once just to get list of reference keys and notes
+  -- docMinusKeys is the raw document with blanks where the keys/notes were...
+  st <- getState
+  let firstPassParser = referenceKey
+                     <|> (if stateStrict st then pzero else noteBlock)
+                     <|> lineClump
+  docMinusKeys <- liftM concat $ manyTill firstPassParser eof
   setInput docMinusKeys
   setPosition startPos
-  st <- getState
-  -- go through again for notes unless strict...
-  if stateStrict st
-     then return ()
-     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= 
-                              return . concat
-             st' <- getState
-             let reversedNotes = stateNotes st'
-             updateState $ \s -> s { stateNotes = reverse reversedNotes }
-             setInput docMinusNotes
-             setPosition startPos
+  st' <- getState
+  let reversedNotes = stateNotes st'
+  updateState $ \s -> s { stateNotes = reverse reversedNotes }
   -- now parse it for real...
   (title, author, date) <- option ([],[],"") titleBlock
   blocks <- parseBlocks
@@ -243,9 +238,7 @@ noteBlock = try $ do
   raw <- sepBy rawLines (try (blankline >> indentSpaces))
   optional blanklines
   endPos <- getPosition
-  -- parse the extracted text, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
-  let newnote = (ref, contents)
+  let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
   st <- getState
   let oldnotes = stateNotes st
   updateState $ \s -> s { stateNotes = newnote : oldnotes }
@@ -1174,8 +1167,8 @@ note = try $ do
   state <- getState
   let notes = stateNotes state
   case lookup ref notes of
-    Nothing       -> fail "note not found"
-    Just contents -> return $ Note contents
+    Nothing   -> fail "note not found"
+    Just raw  -> liftM Note $ parseFromString parseBlocks raw
 
 inlineNote :: GenParser Char ParserState Inline
 inlineNote = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index f920c79aa..810911165 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -713,7 +713,7 @@ data QuoteContext
     | NoQuote         -- ^ Used when not parsing inside quotes
     deriving (Eq, Show)
 
-type NoteTable = [(String, [Block])]
+type NoteTable = [(String, String)]
 
 type KeyTable = [([Inline], Target)]
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2009-11-28 03:22:18 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2009-11-28 03:22:18 +0000
commit	d1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3 (patch)
tree	2d27c04cfccdd5f6606219c1277428ed20cbefc7 /src/Text/Pandoc
parent	1d440130c431405f7edbd7d8beae584101debbb6 (diff)