Major code cleanup in all modules. (Removed unneeded imports,

reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-08-15 06:00:58 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-08-15 06:00:58 +0000
commit: a8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree: 2ce7c3be138e30936210196fa828816298139ec6 /src/Text/Pandoc/Readers/Markdown.hs
parent: e814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)
1 files changed, 273 insertions, 389 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3ccb74ba7..80a8507b4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown (
                                      readMarkdown 
                                     ) where
 
-import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
+import Data.List ( transpose, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
 import Data.Char ( isAlphaNum )
-import Text.Pandoc.ParserCombinators
 import Text.Pandoc.Definition
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
 import Text.Pandoc.Shared 
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
 import Text.Pandoc.Readers.HTML ( rawHtmlBlock, 
                                   anyHtmlBlockTag, anyHtmlInlineTag,
                                   anyHtmlTag, anyHtmlEndTag,
                                   htmlEndTag, extractTagType,
                                   htmlBlockElement )
-import Text.Pandoc.Entities ( characterEntity, decodeEntities )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.ParserCombinators.Parsec
 
 -- | Read markdown from an input string and return a Pandoc document.
 readMarkdown :: ParserState -> String -> Pandoc
 readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
 
--- | Parse markdown string with default options and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseMarkdown 
-
 --
 -- Constants and data structure definitions
 --
@@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\""
 -- auxiliary functions
 --
 
--- | Skip a single endline if there is one.
-skipEndline = option Space endline
-
 indentSpaces = try $ do
   state <- getState
   let tabStop = stateTabStop state
   try (count tabStop (char ' ')) <|> 
-    (do{many (char ' '); string "\t"}) <?> "indentation"
+    (many (char ' ') >> string "\t") <?> "indentation"
 
 nonindentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
-  choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
+  choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)]
 
 -- | Fail unless we're at beginning of a line.
 failUnlessBeginningOfLine = do
@@ -94,20 +87,21 @@ failUnlessSmart = do
   state <- getState
   if stateSmart state then return () else fail "Smart typography feature"
 
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do 
+  (Str res) <- inline 
+  if res == str then return res else fail $ "unexpected Str content"
+
 -- | Parse a sequence of inline elements between a string
 --  @opener@ and a string @closer@, including inlines
 --  between balanced pairs of @opener@ and a @closer@.
 inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
 inlinesInBalanced opener closer = try $ do
-  let openerSymbol = try $ do 
-         res <- inline 
-         if res == Str opener
-            then return res 
-            else pzero
-  try (string opener)
-  result <- manyTill (     (do lookAhead openerSymbol
-                               bal <- inlinesInBalanced opener closer 
-                               return $ [Str opener] ++ bal ++ [Str closer])
+  string opener
+  result <- manyTill ( (do lookAhead (inlineString opener)
+                           -- because it might be a link...
+                           bal <- inlinesInBalanced opener closer 
+                           return $ [Str opener] ++ bal ++ [Str closer])
                        <|> (count 1 inline)) 
                      (try (string closer))
   return $ concat result
@@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do
 -- document structure
 --
 
-titleLine = try (do
-  char '%'
-  skipSpaces
-  line <- manyTill inline newline
-  return line)
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
 
-authorsLine = try (do
+authorsLine = try $ do 
   char '%'
   skipSpaces
   authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
   newline
-  return (map (decodeEntities . removeLeadingTrailingSpace) authors))
+  return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
 
-dateLine = try (do
+dateLine = try $ do
   char '%'
   skipSpaces
   date <- many (noneOf "\n")
   newline
-  return (decodeEntities $ removeTrailingSpace date))
+  return $ decodeCharacterReferences $ removeTrailingSpace date
 
-titleBlock = try (do
+titleBlock = try $ do
   failIfStrict
   title <- option [] titleLine
   author <- option [] authorsLine
   date <- option "" dateLine
-  option "" blanklines
-  return (title, author, date))
+  optional blanklines
+  return (title, author, date)
 
 parseMarkdown = do
-  updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML
+  -- markdown allows raw HTML
+  updateState (\state -> state { stateParseRaw = True })
   (title, author, date) <- option ([],[],"") titleBlock
   -- go through once just to get list of reference keys
-  refs <- manyTill (referenceKey <|> (do l <- lineClump
-                                         return (LineClump l))) eof 
+  refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
   let keys = map (\(KeyBlock label target) -> (label, target)) $ 
                  filter isKeyBlock refs
   let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
   setInput $ concat rawlines -- with keys stripped out 
   updateState (\state -> state { stateKeys = keys })
-  -- now go through for notes
-  refs <- manyTill (noteBlock <|> (do l <- lineClump
-                                      return (LineClump l))) eof 
+  -- now go through for notes (which may contain references - hence 2nd pass)
+  refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof 
   let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ 
                    filter isNoteBlock refs
   let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
-  setInput $ concat rawlines -- with note blocks and keys stripped out 
+  -- go through a 3rd time, with note blocks and keys stripped out
+  setInput $ concat rawlines 
   updateState (\state -> state { stateNotes = notes })
-  blocks <- parseBlocks  -- go through again, for real
+  blocks <- parseBlocks 
   let blocks' = filter (/= Null) blocks
-  return (Pandoc (Meta title author date) blocks')
+  return $ Pandoc (Meta title author date) blocks'
 
 -- 
--- initial pass for references
+-- initial pass for references and notes
 --
 
 referenceKey = try $ do
@@ -176,9 +166,9 @@ referenceKey = try $ do
   label <- reference
   char ':'
   skipSpaces
-  option ' ' (char '<')
+  optional (char '<')
   src <- many (noneOf "> \n\t")
-  option ' ' (char '>')
+  optional (char '>')
   tit <- option "" title 
   blanklines 
   return $ KeyBlock label (removeTrailingSpace src,  tit)
@@ -189,33 +179,28 @@ noteMarker = try $ do
   manyTill (noneOf " \t\n") (char ']')
 
 rawLine = try $ do
-  notFollowedBy' blankline
+  notFollowedBy blankline
   notFollowedBy' noteMarker
   contents <- many1 nonEndline
-  end <- option "" (do
-                      newline
-                      option "" indentSpaces
-                      return "\n")
-  return (contents ++ end)
+  end <- option "" (newline >> optional indentSpaces >> return "\n") 
+  return $ contents ++ end
 
-rawLines = do
-    lines <- many1 rawLine
-    return (concat lines)
+rawLines = many1 rawLine >>= return . concat
 
 noteBlock = try $ do
   failIfStrict
   ref <- noteMarker
   char ':'
-  option ' ' blankline
-  option "" indentSpaces
-  raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
-  option "" blanklines
+  optional blankline
+  optional indentSpaces
+  raw <- sepBy rawLines (try (blankline >> indentSpaces))
+  optional blanklines
   -- parse the extracted text, which may contain various block elements:
   rest <- getInput
   setInput $ (joinWithSep "\n" raw) ++ "\n\n"
   contents <- parseBlocks
   setInput rest
-  return (NoteBlock ref contents)
+  return $ NoteBlock ref contents
 
 --
 -- parsing blocks
@@ -239,48 +224,39 @@ block = choice [ header
 -- header blocks
 --
 
-header = choice [ setextHeader, atxHeader ] <?> "header"
+header = setextHeader <|> atxHeader <?> "header"
 
-atxHeader = try (do
+atxHeader = try $ do
   lead <- many1 (char '#')
-  notFollowedBy (char '.') -- this would be a list
-  notFollowedBy (char ')')
+  notFollowedBy (char '.' <|> char ')') -- this would be a list
   skipSpaces
   txt <- manyTill inline atxClosing
-  return (Header (length lead) (normalizeSpaces txt)))
+  return $ Header (length lead) (normalizeSpaces txt)
 
-atxClosing = try (do
-  skipMany (char '#')
-  skipSpaces
-  newline
-  option "" blanklines)
+atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >> 
+                   option "" blanklines
 
 setextHeader = choice $ 
-               map (\x -> setextH x) (enumFromTo 1 (length setextHChars))
+               map (\x -> setextH x) $ enumFromTo 1 (length setextHChars)
 
-setextH n = try (do
+setextH n = try $ do
   txt <- many1Till inline newline
   many1 (char (setextHChars !! (n-1)))
   skipSpaces
   newline
-  option "" blanklines
-  return (Header n (normalizeSpaces txt)))
+  optional blanklines
+  return $ Header n (normalizeSpaces txt)
 
 --
 -- hrule block
 --
 
-hruleWith chr = try (do
-  skipSpaces
-  char chr
-  skipSpaces
-  char chr
-  skipSpaces
-  char chr
-  skipMany (oneOf (chr:spaceChars))
+hruleWith chr = try $ do
+  count 3 (skipSpaces >> char chr)
+  skipMany (skipSpaces >> char chr)
   newline
-  option "" blanklines
-  return HorizontalRule)
+  optional blanklines
+  return HorizontalRule
 
 hrule = choice (map hruleWith hruleChars) <?> "hrule"
 
@@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule"
 -- code blocks
 --
 
-indentedLine = try (do
+indentedLine = try $ do
   indentSpaces
   result <- manyTill anyChar newline
-  return (result ++ "\n"))
+  return $ result ++ "\n"
 
 -- two or more indented lines, possibly separated by blank lines
-indentedBlock = try (do 
+indentedBlock = try $ do 
   res1 <- indentedLine
   blanks <- many blankline 
-  res2 <- choice [indentedBlock, indentedLine]
-  return (res1 ++ blanks ++ res2))
+  res2 <- indentedBlock <|> indentedLine
+  return $ res1 ++ blanks ++ res2
 
-codeBlock = do
-  result <- choice [indentedBlock, indentedLine]
-  option "" blanklines
-  return (CodeBlock (stripTrailingNewlines result))
+codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>=
+            return . CodeBlock . stripTrailingNewlines
 
 --
 -- block quotes
 --
 
-emacsBoxQuote = try (do
+emacsBoxQuote = try $ do
   failIfStrict
   string ",----"
   manyTill anyChar newline
-  raw <- manyTill (try (do 
-                          char '|'
-                          option ' ' (char ' ')
-                          result <- manyTill anyChar newline
-                          return result))
-                   (string "`----")
-  manyTill anyChar newline
-  option "" blanklines
-  return raw)
+  raw <- manyTill 
+         (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+         (try (string "`----"))
+  blanklines
+  return raw
 
-emailBlockQuoteStart = try (do
-  nonindentSpaces
-  char '>'
-  option ' ' (char ' ')
-  return "> ")
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
 
-emailBlockQuote = try (do
+emailBlockQuote = try $ do
   emailBlockQuoteStart
-  raw <- sepBy (many (choice [nonEndline, 
-                              (try (do 
-                                      endline
-                                      notFollowedBy' emailBlockQuoteStart
-                                      return '\n'))]))
-               (try (do {newline; emailBlockQuoteStart}))
-  newline <|> (do{ eof; return '\n' })
-  option "" blanklines
-  return raw)
+  raw <- sepBy (many (nonEndline <|> 
+                      (try (endline >> notFollowedBy emailBlockQuoteStart >>
+                       return '\n'))))
+               (try (newline >> emailBlockQuoteStart))
+  newline <|> (eof >> return '\n')
+  optional blanklines
+  return raw
 
 blockQuote = do 
-  raw <- choice [ emailBlockQuote, emacsBoxQuote ]
+  raw <- emailBlockQuote <|> emacsBoxQuote
   -- parse the extracted block, which may contain various block elements:
   rest <- getInput
   setInput $ (joinWithSep "\n" raw) ++ "\n\n"
   contents <- parseBlocks
   setInput rest
-  return (BlockQuote contents)
+  return $ BlockQuote contents
  
 --
 -- list blocks
@@ -357,7 +321,7 @@ blockQuote = do
 list = choice [ bulletList, orderedList, definitionList ] <?> "list"
 
 bulletListStart = try $ do
-  option ' ' newline -- if preceded by a Plain block in a list context
+  optional newline -- if preceded by a Plain block in a list context
   nonindentSpaces
   notFollowedBy' hrule  -- because hrules start out just like lists
   oneOf bulletListMarkers
@@ -365,7 +329,7 @@ bulletListStart = try $ do
   skipSpaces
 
 anyOrderedListStart = try $ do
-  option ' ' newline -- if preceded by a Plain block in a list context
+  optional  newline -- if preceded by a Plain block in a list context
   nonindentSpaces
   state <- getState
   if stateStrict state
@@ -375,7 +339,7 @@ anyOrderedListStart = try $ do
      else anyOrderedListMarker
 
 orderedListStart style delim = try $ do
-  option ' ' newline -- if preceded by a Plain block in a list context
+  optional newline -- if preceded by a Plain block in a list context
   nonindentSpaces
   state <- getState
   if stateStrict state
@@ -387,40 +351,39 @@ orderedListStart style delim = try $ do
   skipSpaces
 
 -- parse a line of a list item (start = parser for beginning of list item)
-listLine start = try (do
+listLine start = try $ do
   notFollowedBy' start
   notFollowedBy blankline
-  notFollowedBy' (do 
-                    indentSpaces
-                    many (spaceChar)
-                    choice [bulletListStart, anyOrderedListStart >> return ()])
+  notFollowedBy' (do indentSpaces
+                     many (spaceChar)
+                     bulletListStart <|> (anyOrderedListStart >> return ()))
   line <- manyTill anyChar newline
-  return (line ++ "\n"))
+  return $ line ++ "\n"
 
 -- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = try (do
+rawListItem start = try $ do
   start
   result <- many1 (listLine start)
   blanks <- many blankline
-  return ((concat result) ++ blanks))
+  return $ concat result ++ blanks
 
 -- continuation of a list item - indented and separated by blankline 
 -- or (in compact lists) endline.
 -- note: nested lists are parsed as continuations
-listContinuation start = try (do
+listContinuation start = try $ do
   lookAhead indentSpaces
   result <- many1 (listContinuationLine start)
   blanks <- many blankline
-  return ((concat result) ++ blanks))
+  return $ concat result ++ blanks
 
-listContinuationLine start = try (do
-  notFollowedBy' blankline
+listContinuationLine start = try $ do
+  notFollowedBy blankline
   notFollowedBy' start
-  option "" indentSpaces
+  optional indentSpaces
   result <- manyTill anyChar newline
-  return (result ++ "\n"))
+  return $ result ++ "\n"
 
-listItem start = try (do 
+listItem start = try $ do 
   first <- rawListItem start
   continuations <- many (listContinuation start)
   -- parsing with ListItemState forces markers at beginning of lines to
@@ -436,18 +399,15 @@ listItem start = try (do
   contents <- parseBlocks
   setInput rest
   updateState (\st -> st {stateParserContext = oldContext})
-  return contents)
+  return contents
 
-orderedList = try (do
+orderedList = do
   (start, style, delim) <- lookAhead anyOrderedListStart
   items <- many1 (listItem (orderedListStart style delim))
-  let items' = compactify items
-  return (OrderedList (start, style, delim) items'))
+  return $ OrderedList (start, style, delim) $ compactify items
 
-bulletList = try (do
-  items <- many1 (listItem bulletListStart)
-  let items' = compactify items
-  return (BulletList items'))
+bulletList = many1 (listItem bulletListStart) >>= 
+             return . BulletList . compactify
 
 -- definition lists
 
@@ -470,9 +430,9 @@ defRawBlock = try $ do
   char ':'
   state <- getState
   let tabStop = stateTabStop state
-  try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"})
+  try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
   firstline <- anyLine
-  rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine})
+  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
   trailing <- option "" blanklines
   return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
 
@@ -488,71 +448,62 @@ definitionList = do
 -- paragraph block
 --
 
-para = try (do 
+para = try $ do 
   result <- many1 inline
   newline
   st <- getState
   if stateStrict st
-     then choice [lookAhead blockQuote, lookAhead header, 
-                  (do{blanklines; return Null})]
-     else choice [(do{lookAhead emacsBoxQuote; return Null}), 
-                  (do{blanklines; return Null})]
-  let result' = normalizeSpaces result
-  return (Para result'))
-
-plain = do
-  result <- many1 inline
-  let result' = normalizeSpaces result
-  return (Plain result')
+     then choice [ lookAhead blockQuote, lookAhead header, 
+                   (blanklines >> return Null) ]
+     else choice [ lookAhead emacsBoxQuote >> return Null, 
+                   (blanklines >> return Null) ]
+  return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces 
 
 -- 
 -- raw html
 --
 
-htmlElement = choice [strictHtmlBlock,
-                      htmlBlockElement] <?> "html element"
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
 
 htmlBlock = do
   st <- getState
   if stateStrict st
-    then do
-           failUnlessBeginningOfLine
-           first <- htmlElement
-           finalSpace <- many (oneOf spaceChars)
-           finalNewlines <- many newline
-           return (RawHtml (first ++ finalSpace ++ finalNewlines))
+    then try $ do failUnlessBeginningOfLine
+                  first <- htmlElement
+                  finalSpace <- many (oneOf spaceChars)
+                  finalNewlines <- many newline
+                  return $ RawHtml $ first ++ finalSpace ++ finalNewlines
     else rawHtmlBlocks
 
 -- True if tag is self-closing
 isSelfClosing tag = 
   isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
 
-strictHtmlBlock = try (do
+strictHtmlBlock = try $ do
   tag <- anyHtmlBlockTag 
   let tag' = extractTagType tag
   if isSelfClosing tag || tag' == "hr" 
      then return tag
-     else do
-            contents <- many (do{notFollowedBy' (htmlEndTag tag'); 
-                                 htmlElement <|> (count 1 anyChar)})
-            end <- htmlEndTag tag'
-            return $ tag ++ (concat contents) ++ end)
+     else do contents <- many (notFollowedBy' (htmlEndTag tag') >> 
+                               (htmlElement <|> (count 1 anyChar)))
+             end <- htmlEndTag tag'
+             return $ tag ++ concat contents ++ end
 
-rawHtmlBlocks = try (do
+rawHtmlBlocks = try $ do
   htmlBlocks <- many1 rawHtmlBlock    
   let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
-  let combined' = if (last combined == '\n')
+  let combined' = if not (null combined) && last combined == '\n'
                      then init combined  -- strip extra newline 
                      else combined 
-  return (RawHtml combined'))
+  return $ RawHtml combined'
 
 --
 -- LaTeX
 --
 
-rawLaTeXEnvironment' = do
-  failIfStrict
-  rawLaTeXEnvironment
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
 
 --
 -- Tables
@@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do
 
 -- Parse a dashed line with optional trailing spaces; return its length
 -- and the length including trailing space.
-dashedLine ch = do
-    dashes <- many1 (char ch)
-    sp     <- many spaceChar
-    return $ (length dashes, length $ dashes ++ sp)
+dashedLine ch = try $ do
+  dashes <- many1 (char ch)
+  sp     <- many spaceChar
+  return $ (length dashes, length $ dashes ++ sp)
 
 -- Parse a table header with dashed lines of '-' preceded by 
 -- one line of text.
-simpleTableHeader = do
-    rawContent  <- anyLine
-    initSp      <- nonindentSpaces
-    dashes      <- many1 (dashedLine '-')
-    newline
-    let (lengths, lines) = unzip dashes
-    let indices  = scanl (+) (length initSp) lines
-    let rawHeads = tail $ splitByIndices (init indices) rawContent
-    let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
-    return $ (rawHeads, aligns, indices)
+simpleTableHeader = try $ do
+  rawContent  <- anyLine
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeads = tail $ splitByIndices (init indices) rawContent
+  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+  return (rawHeads, aligns, indices)
 
 -- Parse a table footer - dashed lines followed by blank line.
-tableFooter = try $ do
-    nonindentSpaces
-    many1 (dashedLine '-')
-    blanklines
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
 
 -- Parse a table separator - dashed line.
-tableSep = try $ do
-    nonindentSpaces
-    many1 (dashedLine '-')
-    string "\n"
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
 
 -- Parse a raw line and split it into chunks by indices.
 rawTableLine indices = do
-    notFollowedBy' (blanklines <|> tableFooter)
-    line <- many1Till anyChar newline
-    return $ map removeLeadingTrailingSpace $ tail $ 
-             splitByIndices (init indices) line
+  notFollowedBy' (blanklines <|> tableFooter)
+  line <- many1Till anyChar newline
+  return $ map removeLeadingTrailingSpace $ tail $ 
+           splitByIndices (init indices) line
 
 -- Parse a table line and return a list of lists of blocks (columns).
-tableLine indices = try $ do
-    rawline <- rawTableLine indices
-    mapM (parseFromString (many plain)) rawline
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
 
 -- Parse a multiline table row and return a list of blocks (columns).
 multilineRow indices = try $ do
-    colLines <- many1 (rawTableLine indices)
-    option "" blanklines
-    let cols = map unlines $ transpose colLines
-    mapM (parseFromString (many plain)) cols
+  colLines <- many1 (rawTableLine indices)
+  optional blanklines
+  let cols = map unlines $ transpose colLines
+  mapM (parseFromString (many plain)) cols
 
 -- Calculate relative widths of table columns, based on indices
 widthsFromIndices :: Int     -- Number of columns on terminal
@@ -615,22 +558,22 @@ widthsFromIndices :: Int     -- Number of columns on terminal
                   -> [Float] -- Fractional relative sizes of columns
 widthsFromIndices _ [] = []  
 widthsFromIndices numColumns indices = 
-    let lengths = zipWith (-) indices (0:indices)
-        totLength = sum lengths
-        quotient = if totLength > numColumns
-                     then fromIntegral totLength
-                     else fromIntegral numColumns
-        fracs = map (\l -> (fromIntegral l) / quotient) lengths in
-    tail fracs
+  let lengths = zipWith (-) indices (0:indices)
+      totLength = sum lengths
+      quotient = if totLength > numColumns
+                   then fromIntegral totLength
+                   else fromIntegral numColumns
+      fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+  tail fracs
 
 -- Parses a table caption:  inlines beginning with 'Table:'
 -- and followed by blank lines.
 tableCaption = try $ do
-    nonindentSpaces
-    string "Table:"
-    result <- many1 inline
-    blanklines
-    return $ normalizeSpaces result
+  nonindentSpaces
+  string "Table:"
+  result <- many1 inline
+  blanklines
+  return $ normalizeSpaces result
 
 -- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
 tableWith headerParser lineParser footerParser = try $ do
@@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
 multilineTable = tableWith multilineTableHeader multilineRow tableFooter
 
 multilineTableHeader = try $ do
-    tableSep 
-    rawContent  <- many1 (do{notFollowedBy' tableSep; 
-                             many1Till anyChar newline})
-    initSp      <- nonindentSpaces
-    dashes      <- many1 (dashedLine '-')
-    newline
-    let (lengths, lines) = unzip dashes
-    let indices  = scanl (+) (length initSp) lines
-    let rawHeadsList = transpose $ map 
-                       (\ln -> tail $ splitByIndices (init indices) ln)
-                       rawContent
-    let rawHeads = map (joinWithSep " ") rawHeadsList
-    let aligns   = zipWith alignType rawHeadsList lengths
-    return $ ((map removeLeadingTrailingSpace rawHeads),
-             aligns, indices)
-
--- Returns the longest of a list of strings.
-longest :: [String] -> String
-longest [] = ""
-longest [x] = x
-longest (x:xs) =
-    if (length x) >= (maximum $ map length xs)
-      then x
-      else longest xs
+  tableSep 
+  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeadsList = transpose $ map 
+                     (\ln -> tail $ splitByIndices (init indices) ln)
+                     rawContent
+  let rawHeads = map (joinWithSep " ") rawHeadsList
+  let aligns   = zipWith alignType rawHeadsList lengths
+  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
 
 -- Returns an alignment type for a table, based on a list of strings
 -- (the rows of the column header) and a number (the length of the
@@ -684,19 +616,17 @@ longest (x:xs) =
 alignType :: [String] -> Int -> Alignment
 alignType []  len = AlignDefault
 alignType strLst len =
-    let str        = longest $ map removeTrailingSpace strLst
-        leftSpace  = if null str then False else ((str !! 0) `elem` " \t")
-        rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
-    case (leftSpace, rightSpace) of
+  let str        = head $ sortBy (comparing length) $ 
+                          map removeTrailingSpace strLst
+      leftSpace  = if null str then False else (str !! 0) `elem` " \t"
+      rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+  in  case (leftSpace, rightSpace) of
         (True,  False)   -> AlignRight
         (False, True)    -> AlignLeft
-        (True, True)     -> AlignCenter
+        (True,  True)    -> AlignCenter
         (False, False)   -> AlignDefault
 
-table = do
-    failIfStrict
-    result <- simpleTable <|> multilineTable <?> "table"
-    return result
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
 
 -- 
 -- inline
@@ -704,7 +634,7 @@ table = do
 
 inline = choice [ rawLaTeXInline'
                 , escapedChar
-                , entity
+                , charRef
                 , note
                 , inlineNote
                 , link
@@ -734,80 +664,64 @@ escapedChar = try $ do
   result <- if stateStrict state 
               then oneOf "\\`*_{}[]()>#+-.!~"
               else satisfy (not . isAlphaNum)
-  return (Str [result])
+  return $ Str [result]
 
-ltSign = try (do
+ltSign = try $ do
   notFollowedBy (noneOf "<")   -- continue only if it's a <
   notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
   char '<'
-  return (Str ['<']))
+  return $ Str ['<']
 
 specialCharsMinusLt = filter (/= '<') specialChars
 
 symbol = do 
   result <- oneOf specialCharsMinusLt
-  return (Str [result])
+  return $ Str [result]
 
 -- parses inline code, between n `s and n `s
-code = try (do 
+code = try $ do 
   starts <- many1 (char '`')
   let num = length starts
   result <- many1Till anyChar (try (count num (char '`')))
   -- get rid of any internal newlines
-  let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-  return (Code result'))
+  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
 
-mathWord = many1 (choice [ (noneOf " \t\n\\$"), 
-                           (try (do
-                                   c <- char '\\'
-                                   notFollowedBy (char '$')
-                                   return c))])
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+                  (try (char '\\') >>~ notFollowedBy (char '$')))
 
-math = try (do
+math = try $ do
   failIfStrict
   char '$'
   notFollowedBy space
   words <- sepBy1 mathWord (many1 space)
   char '$'
-  return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
+  return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
 
-emph = do
-  result <- choice [ (enclosed (char '*') (char '*') inline), 
-                     (enclosed (char '_') (char '_') inline) ]
-  return $ Emph (normalizeSpaces result)
+emph = ((enclosed (char '*') (char '*') inline) <|>
+        (enclosed (char '_') (char '_') inline)) >>= 
+        return . Emph . normalizeSpaces
 
-strong = do
-  result <- (enclosed (string "**") (string "**") inline) <|> 
-            (enclosed (string "__") (string "__") inline)
-  return $ Strong (normalizeSpaces result)
+strong = ((enclosed (string "**") (string "**") inline) <|> 
+          (enclosed (string "__") (string "__") inline)) >>=
+         return . Strong . normalizeSpaces
 
-strikeout = do
-  failIfStrict
-  result <- enclosed (string "~~") (string "~~") inline
-  return $ Strikeout (normalizeSpaces result)
+strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>=
+            return . Strikeout . normalizeSpaces
 
-superscript = do
-  failIfStrict
-  result <- enclosed (char '^') (char '^') 
-            (notFollowedBy' whitespace >> inline)  -- may not contain Space
-  return $ Superscript result
+superscript = failIfStrict >> enclosed (char '^') (char '^') 
+              (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+              return . Superscript
 
-subscript = do
-  failIfStrict
-  result <- enclosed (char '~') (char '~')
-           (notFollowedBy' whitespace >> inline)   -- may not contain Space
-  return $ Subscript result
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+            (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
+            return . Subscript 
 
-smartPunctuation = do
-  failUnlessSmart
-  choice [ quoted, apostrophe, dash, ellipses ]
+smartPunctuation = failUnlessSmart >> 
+                   choice [ quoted, apostrophe, dash, ellipses ]
 
-apostrophe = do
-  char '\'' <|> char '\8217'
-  return Apostrophe
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
 
-quoted = do
-  doubleQuoted <|> singleQuoted 
+quoted = doubleQuoted <|> singleQuoted 
 
 withQuoteContext context parser = do
   oldState <- getState
@@ -820,15 +734,13 @@ withQuoteContext context parser = do
 
 singleQuoted = try $ do
   singleQuoteStart
-  withQuoteContext InSingleQuote $ do
-    result <- many1Till inline singleQuoteEnd
-    return $ Quoted SingleQuote $ normalizeSpaces result
+  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+    return . Quoted SingleQuote . normalizeSpaces
 
 doubleQuoted = try $ do 
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $ do
-    result <- many1Till inline doubleQuoteEnd
-    return $ Quoted DoubleQuote $ normalizeSpaces result
+  withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+    return . Quoted DoubleQuote . normalizeSpaces
 
 failIfInQuoteContext context = do
   st <- getState
@@ -836,88 +748,65 @@ failIfInQuoteContext context = do
     then fail "already inside quotes"
     else return ()
 
-singleQuoteStart = try $ do 
+singleQuoteStart = do 
   failIfInQuoteContext InSingleQuote
-  char '\8216' <|> do 
-                     char '\''  
-                     notFollowedBy (oneOf ")!],.;:-? \t\n")
-                     notFollowedBy (try (do  -- possessive or contraction
-                                           oneOfStrings ["s","t","m","ve","ll","re"]
-                                           satisfy (not . isAlphaNum)))
-                     return '\''
-
-singleQuoteEnd = try $ do
-  char '\'' <|> char '\8217'
-  notFollowedBy alphaNum
-
-doubleQuoteStart = try $ do
-  failIfInQuoteContext InDoubleQuote
-  char '"' <|> char '\8220'
-  notFollowedBy (oneOf " \t\n")
+  char '\8216' <|> 
+     do char '\''  
+        notFollowedBy (oneOf ")!],.;:-? \t\n")
+        notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+                            satisfy (not . isAlphaNum))) -- possess/contraction
+        return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+                   (char '"' <|> char '\8220') >>
+                   notFollowedBy (oneOf " \t\n")
 
 doubleQuoteEnd = char '"' <|> char '\8221'
 
-ellipses = try (do
-  oneOfStrings ["...", " . . . ", ". . .", " . . ."]
-  return Ellipses)
+ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >>
+                 return Ellipses
 
 dash = enDash <|> emDash
 
-enDash = try (do
-  char '-'
-  notFollowedBy (noneOf "0123456789")
-  return EnDash) 
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
 
-emDash = try (do
-  skipSpaces
-  oneOfStrings ["---", "--"]
-  skipSpaces
-  return EmDash)
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+               skipSpaces >> return EmDash
 
-whitespace = do
-  many1 (oneOf spaceChars) <?> "whitespace"
-  return Space
+whitespace = (many1 (oneOf spaceChars) >> return Space) <?> "whitespace"
 
-tabchar = do
-  tab
-  return (Str "\t")
+tabchar = tab >> return (Str "\t")
 
 -- hard line break
-linebreak = try (do
-  oneOf spaceChars
-  many1 (oneOf spaceChars) 
-  endline
-  return LineBreak )
+linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >>
+                  endline >> return LineBreak
 
 nonEndline = satisfy (/='\n')
 
-entity = do
-  ent <- characterEntity
-  return $ Str [ent]
-
 strChar = noneOf (specialChars ++ spaceChars ++ "\n")
 
-str = do 
-  result <- many1 strChar
-  return (Str result)
+str = many1 strChar >>= return . Str
 
 -- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
   newline
   notFollowedBy blankline
   st <- getState
   if stateStrict st 
     then do
-           notFollowedBy' emailBlockQuoteStart
+           notFollowedBy emailBlockQuoteStart
            notFollowedBy (char '#')  -- atx header
-           notFollowedBy (try (do{manyTill anyChar newline; 
-                                  oneOf setextHChars}))  -- setext header
+           notFollowedBy (manyTill anyChar newline >> oneOf setextHChars) 
+           -- setext header
     else return () 
   -- parse potential list-starts differently if in a list:
-  if (stateParserContext st) == ListItemState
-     then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()]
+  if stateParserContext st == ListItemState
+     then notFollowedBy' (bulletListStart <|> 
+                          (anyOrderedListStart >> return ()))
      else return ()
-  return Space)
+  return Space
 
 --
 -- links
@@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >>  -- footnote reference
 -- source for a link, with optional title
 source = try $ do 
   char '('
-  option ' ' (char '<')
+  optional (char '<')
   src <- many (noneOf ")> \t\n")
-  option ' ' (char '>')
+  optional (char '>')
   tit <- option "" title
   skipSpaces
   char ')'
   return (removeTrailingSpace src, tit)
 
-titleWith startChar endChar = try (do
+titleWith startChar endChar = try $ do
   leadingSpace <- many1 (oneOf " \t\n")
   if length (filter (=='\n') leadingSpace) > 1
     then fail "title must be separated by space and on same or next line"
     else return ()
   char startChar
-  tit <- manyTill anyChar (try (do char endChar
-                                   skipSpaces
-                                   notFollowedBy (noneOf ")\n")))
-  return $ decodeEntities tit)
+  tit <- manyTill anyChar (try (char endChar >> skipSpaces >>
+                                notFollowedBy (noneOf ")\n")))
+  return $ decodeCharacterReferences tit
 
 title = choice [ titleWith '(' ')', 
                  titleWith '"' '"', 
@@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')',
 
 link = choice [explicitLink, referenceLink] <?> "link"
 
-explicitLink = try (do
+explicitLink = try $ do
   label <- reference
   src <- source 
-  return (Link label src)) 
+  return $ Link label src
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink = try $ do
   label <- reference
-  ref <- option [] (try (do skipSpaces
-                            option ' ' newline
-                            skipSpaces
-                            reference))
+  ref <- option [] (try (skipSpaces >> optional newline >>
+                         skipSpaces >> reference))
   let ref' = if null ref then label else ref
   state <- getState
   case lookupKeySrc (stateKeys state) ref' of
-     Nothing -> fail "no corresponding key" 
+     Nothing     -> fail "no corresponding key" 
      Just target -> return (Link label target)
 
 autoLink = autoLinkEmail <|> autoLinkRegular
@@ -992,10 +878,10 @@ autoLinkRegular = try $ do
   let src = prot ++ rest
   return $ Link [Code src] (src, "")
 
-image = try (do
+image = try $ do
   char '!'
   (Link label src) <- link
-  return (Image label src)) 
+  return $ Image label src
 
 note = try $ do
   failIfStrict
@@ -1003,23 +889,21 @@ note = try $ do
   state <- getState
   let notes = stateNotes state
   case lookup ref notes of
-    Nothing -> fail "note not found"
-    Just contents -> return (Note contents)
+    Nothing       -> fail "note not found"
+    Just contents -> return $ Note contents
 
 inlineNote = try $ do
   failIfStrict
   char '^'
   contents <- inlinesInBalanced "[" "]"
-  return (Note [Para contents])
+  return $ Note [Para contents]
 
-rawLaTeXInline' = do
-  failIfStrict
-  rawLaTeXInline
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
 
 rawHtmlInline' = do
   st <- getState
-  result <- if stateStrict st
-              then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
-              else choice [htmlBlockElement, anyHtmlInlineTag]
-  return (HtmlInline result)
+  result <- choice $ if stateStrict st
+                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
+                        else [htmlBlockElement, anyHtmlInlineTag]
+  return $ HtmlInline result
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-08-15 06:00:58 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-08-15 06:00:58 +0000
commit	a8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree	2ce7c3be138e30936210196fa828816298139ec6 /src/Text/Pandoc/Readers/Markdown.hs
parent	e814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)