Major code cleanup in all modules. (Removed unneeded imports,

reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-08-15 06:00:58 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-08-15 06:00:58 +0000
commit: a8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree: 2ce7c3be138e30936210196fa828816298139ec6 /src/Text/Pandoc/Readers/RST.hs
parent: e814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)
1 files changed, 147 insertions, 174 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a36c33d92..ce8fedf02 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST (
                                 readRST
                                ) where
 import Text.Pandoc.Definition
-import Text.Pandoc.ParserCombinators
 import Text.Pandoc.Shared 
-import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
-import Text.Regex ( matchRegex, mkRegex )
 import Text.ParserCombinators.Parsec
-import Data.Maybe ( fromMaybe )
 import Data.List ( findIndex, delete )
-import Data.Char ( toUpper )
 
 -- | Parse reStructuredText string and return Pandoc document.
 readRST :: ParserState -> String -> Pandoc
 readRST state str = (readWith parseRST) state (str ++ "\n\n")
 
--- | Parse a string and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseRST
-
 --
 -- Constants and data structure definitions
 ---
@@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-"
 -- parsing documents
 --
 
-isAnonKey (ref, src) = (ref == [Str "_"])
-
-isHeader1 :: Block -> Bool
-isHeader1 (Header 1 _) = True
-isHeader1 _ = False
+isAnonKey (ref, src) = ref == [Str "_"]
 
-isHeader2 :: Block -> Bool
-isHeader2 (Header 2 _) = True
-isHeader2 _ = False
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
 
 -- | Promote all headers in a list of blocks.  (Part of
 -- title transformation for RST.)
@@ -86,23 +73,23 @@ promoteHeaders num [] = []
 titleTransform :: [Block]              -- ^ list of blocks
                -> ([Block], [Inline])  -- ^ modified list of blocks, title
 titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title subtitle
-    if (any isHeader1 rest) || (any isHeader2 rest)
+    if (any (isHeader 1) rest) || (any (isHeader 2) rest)
        then ((Header 1 head1):(Header 2 head2):rest, [])
        else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
 titleTransform ((Header 1 head1):rest) =       -- title, no subtitle
-    if (any isHeader1 rest)
+    if (any (isHeader 1) rest)
        then ((Header 1 head1):rest, [])
        else ((promoteHeaders 1 rest), head1)
 titleTransform blocks = (blocks, [])
 
 parseRST = do
-  -- first pass: get anonymous keys
-  refs <- manyTill (referenceKey <|> (do l <- lineClump
-                                         return (LineClump l))) eof
+  -- first pass: get keys
+  refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
   let keys = map (\(KeyBlock label target) -> (label, target)) $
                  filter isKeyBlock refs
+  -- second pass, with keys stripped out
   let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
-  setInput $ concat rawlines -- with keys stripped out
+  setInput $ concat rawlines 
   updateState (\state -> state { stateKeys = keys })
   blocks <- parseBlocks
   let blocks' = filter (/= Null) blocks
@@ -113,7 +100,7 @@ parseRST = do
   let authors = stateAuthors state
   let date = stateDate state
   let title' = if (null title) then (stateTitle state) else title
-  return (Pandoc (Meta title' authors date) blocks'')
+  return $ Pandoc (Meta title' authors date) blocks''
 
 --
 -- parsing blocks
@@ -121,32 +108,39 @@ parseRST = do
 
 parseBlocks = manyTill block eof
 
-block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, 
-                 imageBlock, unknownDirective, header,
-                 hrule, list, fieldList, lineBlock, para, plain,
-                 nullBlock ] <?> "block"
+block = choice [ codeBlock
+               , rawHtmlBlock
+               , rawLaTeXBlock
+               , blockQuote
+               , imageBlock
+               , unknownDirective
+               , header
+               , hrule
+               , list
+               , fieldList
+               , lineBlock
+               , para
+               , plain
+               , nullBlock ] <?> "block"
 
 --
 -- field list
 --
 
-fieldListItem = try (do
+fieldListItem = try $ do
   char ':'
   name <- many1 alphaNum
   string ": "
   skipSpaces
   first <- manyTill anyChar newline
-  rest <- many (do
-                  notFollowedBy (char ':')
-                  notFollowedBy blankline 
-                  skipSpaces
-                  manyTill anyChar newline )
-  return (name, (joinWithSep " " (first:rest)))) 
-
-fieldList = try (do
+  rest <- many (notFollowedBy ((char ':') <|> blankline)  >>
+                skipSpaces >> manyTill anyChar newline)
+  return $ (name, (joinWithSep " " (first:rest)))
+
+fieldList = try $ do
   items <- many1 fieldListItem
   blanklines
-  let authors = case (lookup "Authors" items) of
+  let authors = case lookup "Authors" items of
                   Just auth -> [auth]
                   Nothing  -> map snd (filter (\(x,y) -> x == "Author") items)
   let date = case (lookup "Date" items) of
@@ -162,82 +156,74 @@ fieldList = try (do
   updateState (\st -> st { stateAuthors = authors, 
                            stateDate = date, 
                            stateTitle = title })
-  return (BlockQuote result))
+  return $ BlockQuote result
 
 --
 -- line block
 --
 
-lineBlockLine = try (do
+lineBlockLine = try $ do
   string "| "
   white <- many (oneOf " \t")
   line <- manyTill inline newline
-  let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-  return line')
+  return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
 
-lineBlock = try (do
+lineBlock = try $ do
   lines <- many1 lineBlockLine
   blanklines
-  return $ Para (concat lines))
+  return $ Para (concat lines)
 
 --
 -- paragraph block
 --
 
-para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
 
-codeBlockStart = try (do
-  string "::"
-  blankline
-  blankline)
+codeBlockStart = try $ string "::" >> blankline >> blankline
 
 -- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try (do
-  result <- many1 (do {notFollowedBy' codeBlockStart; inline})
+paraBeforeCodeBlock = try $ do
+  result <- many1 (notFollowedBy' codeBlockStart >> inline)
   lookAhead (string "::")
-  return (Para (if (last result == Space)
-                   then normalizeSpaces result
-                   else (normalizeSpaces result) ++ [Str ":"])))
+  return $ Para $ if last result == Space
+                     then normalizeSpaces result
+                     else (normalizeSpaces result) ++ [Str ":"]
 
 -- regular paragraph
-paraNormal = try (do 
+paraNormal = try $ do 
   result <- many1 inline
   newline
   blanklines
-  let result' = normalizeSpaces result
-  return (Para result'))
+  return $ Para $ normalizeSpaces result
 
-plain = do
-  result <- many1 inline
-  let result' = normalizeSpaces result
-  return (Plain result')
+plain = many1 inline >>= return . Plain . normalizeSpaces 
 
 --
 -- image block
 --
 
-imageBlock = try (do
+imageBlock = try $ do
   string ".. image:: "
   src <- manyTill anyChar newline
-  return (Plain [Image [Str "image"] (src, "")]))
+  return $ Plain [Image [Str "image"] (src, "")]
 
 --
 -- header blocks
 --
 
-header = choice [ doubleHeader, singleHeader ] <?> "header"
+header = doubleHeader <|> singleHeader <?> "header"
 
 -- a header with lines on top and bottom
-doubleHeader = try (do
+doubleHeader = try $ do
   c <- oneOf underlineChars
   rest <- many (char c)  -- the top line
   let lenTop = length (c:rest)
   skipSpaces
   newline
-  txt <- many1 (do {notFollowedBy blankline; inline})
-  pos <- getPosition  
+  txt <- many1 (notFollowedBy blankline >> inline)
+  pos <- getPosition
   let len = (sourceColumn pos) - 1
-  if (len > lenTop) then fail "title longer than border" else (do {return ()})
+  if (len > lenTop) then fail "title longer than border" else return ()
   blankline              -- spaces and newline
   count lenTop (char c)  -- the bottom line
   blanklines
@@ -249,10 +235,10 @@ doubleHeader = try (do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return (Header level (normalizeSpaces txt)))
+  return $ Header level (normalizeSpaces txt)
 
 -- a header with line on the bottom only
-singleHeader = try (do 
+singleHeader = try $ do 
   notFollowedBy' whitespace
   txt <- many1 (do {notFollowedBy blankline; inline})
   pos <- getPosition
@@ -268,19 +254,19 @@ singleHeader = try (do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return (Header level (normalizeSpaces txt)))
+  return $ Header level (normalizeSpaces txt)
 
 --
 -- hrule block
 --
 
-hruleWith chr = try (do
+hruleWith chr = try $ do
   count 4 (char chr)
   skipMany (char chr)
   skipSpaces
   newline
   blanklines
-  return HorizontalRule)
+  return HorizontalRule
 
 hrule = choice (map hruleWith underlineChars) <?> "hrule"
 
@@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
 --
 
 -- read a line indented by a given string
-indentedLine indents = try (do
+indentedLine indents = try $ do
   string indents
   result <- manyTill anyChar newline
-  return (result ++ "\n"))
+  return $ result ++ "\n"
 
--- two or more indented lines, possibly separated by blank lines
--- if variable = True, then any indent will work, but it must be consistent through the block
--- if variable = False, indent should be one tab or equivalent in spaces
-indentedBlock variable = try (do 
+-- two or more indented lines, possibly separated by blank lines.
+-- if variable = True, then any indent will work, but it must be
+-- consistent through the block.
+-- if variable = False, indent should be one tab or equivalent in spaces.
+indentedBlock variable = try $ do 
   state <- getState
   let tabStop = stateTabStop state
   indents <- if variable
@@ -305,51 +292,47 @@ indentedBlock variable = try (do
                 else oneOfStrings ["\t", (replicate tabStop ' ')]
   firstline <- manyTill anyChar newline
   rest <- many (choice [ indentedLine indents, 
-                         try (do 
-                                b <- blanklines
-                                l <- indentedLine indents
-                                return (b ++ l))])
-  option "" blanklines
-  return (firstline ++ "\n" ++ (concat rest)))
-
-codeBlock = try (do
+                         try (do b <- blanklines
+                                 l <- indentedLine indents
+                                 return (b ++ l))])
+  optional blanklines
+  return $ firstline ++ "\n" ++ concat rest
+
+codeBlock = try $ do
   codeBlockStart
   result <- indentedBlock False
   -- the False means we want one tab stop indent on each line
-  return (CodeBlock (stripTrailingNewlines result)))
+  return $ CodeBlock $ stripTrailingNewlines result
 
 --
 -- raw html
 --
 
-rawHtmlBlock = try (do
-  string ".. raw:: html"
-  blanklines
-  result <- indentedBlock True
-  return (RawHtml result))
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+                     indentedBlock True >>= return . RawHtml
 
 --
 -- raw latex
 --
 
-rawLaTeXBlock = try (do
+rawLaTeXBlock = try $ do
   string ".. raw:: latex"
   blanklines
   result <- indentedBlock True
-  return (Para [(TeX result)]))
+  return $ Para [(TeX result)]
 
 --
 -- block quotes
 --
 
-blockQuote = try (do
+blockQuote = try $ do
   raw <- indentedBlock True
   -- parse the extracted block, which may contain various block elements:
   rest <- getInput
   setInput $ raw ++ "\n\n"
   contents <- parseBlocks
   setInput rest
-  return (BlockQuote contents))
+  return $ BlockQuote contents
 
 --
 -- list blocks
@@ -369,15 +352,14 @@ definitionListItem = try $ do
 
 definitionList = try $ do
   items <- many1 definitionListItem 
-  return (DefinitionList items)
+  return $ DefinitionList items
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try (do
+bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
   white <- many1 spaceChar
-  let len = length (marker:white)
-  return len) 
+  return $ length (marker:white)
 
 -- parses ordered list start and returns its length (inc following whitespace)
 orderedListStart style delim = try $ do
@@ -386,11 +368,11 @@ orderedListStart style delim = try $ do
   return $ markerLen + length white
 
 -- parse a line of a list item
-listLine markerLength = try (do
+listLine markerLength = try $ do
   notFollowedBy blankline
   indentWith markerLength
   line <- manyTill anyChar newline
-  return (line ++ "\n"))
+  return $ line ++ "\n"
 
 -- indent by specified number of spaces (or equiv. tabs)
 indentWith num = do
@@ -399,7 +381,7 @@ indentWith num = do
   if (num < tabStop)
      then count num  (char ' ')
      else choice [ try (count num (char ' ')), 
-                   (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] 
+                   (try (char '\t' >> count (num - tabStop) (char ' '))) ] 
 
 -- parse raw text for one list item, excluding start marker and continuations
 rawListItem start = try $ do
@@ -411,19 +393,16 @@ rawListItem start = try $ do
 -- continuation of a list item - indented and separated by blankline or 
 -- (in compact lists) endline.  
 -- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try (do
+listContinuation markerLength = try $ do
   blanks <- many1 blankline
   result <- many1 (listLine markerLength)
-  return (blanks ++ (concat result)))
+  return $ blanks ++ concat result
 
-listItem start = try (do 
+listItem start = try $ do 
   (markerLength, first) <- rawListItem start
   rest <- many (listContinuation markerLength)
-  blanks <- choice [ try (do 
-                            b <- many blankline
-                            lookAhead start
-                            return b), 
-                     many1 blankline ]  -- whole list must end with blank
+  blanks <- choice [ try (many blankline >>~ lookAhead start),
+                     many1 blankline ]  -- whole list must end with blank.
   -- parsing with ListItemState forces markers at beginning of lines to
   -- count as list item markers, even if not separated by blank space.
   -- see definition of "endline"
@@ -436,52 +415,44 @@ listItem start = try (do
   parsed <- parseBlocks
   setInput remaining 
   updateState (\st -> st {stateParserContext = oldContext})
-  return parsed)
+  return parsed
 
 orderedList = try $ do
   (start, style, delim) <- lookAhead anyOrderedListMarker 
   items <- many1 (listItem (orderedListStart style delim))
   let items' = compactify items
-  return (OrderedList (start, style, delim) items')
+  return $ OrderedList (start, style, delim) items'
 
-bulletList = try (do
+bulletList = try $ do
   items <- many1 (listItem bulletListStart)
   let items' = compactify items
-  return (BulletList items'))
+  return $ BulletList items'
 
 --
 -- unknown directive (e.g. comment)
 --
 
-unknownDirective = try (do
+unknownDirective = try $ do
   string ".. "
   manyTill anyChar newline
-  many (do
-          string "   "
-          char ':'
-          many1 (noneOf "\n:") 
-          char ':'
-          many1 (noneOf "\n")
-          newline)
-  option "" blanklines
-  return Null)
+  many (string "   :" >> many1 (noneOf "\n:") >> char ':' >>
+        many1 (noneOf "\n") >> newline)
+  optional blanklines
+  return Null
 
 -- 
 -- reference key
 --
 
-referenceKey = do
-  result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
-  option "" blanklines
-  return result
+referenceKey = 
+  choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~ 
+  optional blanklines
 
 targetURI = try $ do
   skipSpaces
-  option ' ' newline
-  contents <- many1 (try (do many spaceChar
-                             newline
-                             many1 spaceChar
-                             noneOf " \t\n") <|> noneOf "\n")
+  optional newline
+  contents <- many1 (try (many spaceChar >> newline >> 
+                          many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
   blanklines
   return contents
 
@@ -516,71 +487,73 @@ regularKey = try $ do
  -- inline
  --
 
-inline = choice [ superscript, subscript,
-                  escapedChar, link, image, hyphens, strong, emph, code,
-                  str, tabchar, whitespace, endline, symbol ] <?> "inline"
-
-hyphens = try (do
+inline = choice [ superscript
+                , subscript
+                , escapedChar
+                , link
+                , image
+                , hyphens
+                , strong
+                , emph
+                , code
+                , str
+                , tabchar
+                , whitespace
+                , endline
+                , symbol ] <?> "inline"
+
+hyphens = try $ do
   result <- many1 (char '-')
   option Space endline 
   -- don't want to treat endline after hyphen or dash as a space
-  return (Str result))
+  return $ Str result
 
 escapedChar = escaped anyChar
 
 symbol = do 
   result <- oneOf specialChars
-  return (Str [result])
+  return $ Str [result]
 
 -- parses inline code, between codeStart and codeEnd
-code = try (do 
+code = try $ do 
   string "``"
   result <- manyTill anyChar (try (string "``"))
-  let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-  return (Code result'))
+  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
 
-emph = do
-  result <- enclosed (char '*') (char '*') inline
-  return (Emph (normalizeSpaces result))
+emph = enclosed (char '*') (char '*') inline >>= 
+       return . Emph . normalizeSpaces
 
-strong = do
-  result <- enclosed (string "**") (string "**") inline
-  return (Strong (normalizeSpaces result))
+strong = enclosed (string "**") (string "**") inline >>= 
+         return . Strong . normalizeSpaces
 
 interpreted role = try $ do
-  option "" (try $ string "\\ ")
+  optional $ try $ string "\\ "
   result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
   nextChar <- lookAhead anyChar
   try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
   return [Str result]
 
 superscript = interpreted "sup" >>= (return . Superscript)
+
 subscript = interpreted "sub" >>= (return . Subscript)
 
-whitespace = do
-  many1 spaceChar <?> "whitespace"
-  return Space
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
 
-tabchar = do
-  tab
-  return (Str "\t")
+tabchar = tab >> return (Str "\t")
 
-str = do 
-  notFollowedBy' oneWordReference
-  result <- many1 (noneOf (specialChars ++ "\t\n "))
-  return (Str result)
+str = notFollowedBy' oneWordReference >> 
+      many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
 
 -- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
   newline
   notFollowedBy blankline
   -- parse potential list-starts at beginning of line differently in a list:
   st <- getState
   if ((stateParserContext st) == ListItemState)
-     then do notFollowedBy' anyOrderedListMarker
-             notFollowedBy' bulletListStart
-     else option () pzero
-  return Space)
+     then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart
+     else return ()
+  return Space
 
 --
 -- links
@@ -628,10 +601,10 @@ referenceLink = try $ do
 uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", 
                            "mailto:", "news:", "telnet:" ]
 
-uri = try (do
+uri = try $ do
   scheme <- uriScheme
   identifier <- many1 (noneOf " \t\n")
-  return (scheme ++ identifier))
+  return $ scheme ++ identifier
 
 autoURI = try $ do
   src <- uri
@@ -639,20 +612,20 @@ autoURI = try $ do
 
 emailChar = alphaNum <|> oneOf "-+_."
 
-emailAddress = try (do
+emailAddress = try $ do
   firstLetter <- alphaNum
   restAddr <- many emailChar
   let addr = firstLetter:restAddr
   char '@'
   dom <- domain
-  return (addr ++ '@':dom))
+  return $ addr ++ '@':dom
 
 domainChar = alphaNum <|> char '-'
 
-domain = try (do
+domain = try $ do
   first <- many1 domainChar
   dom <- many1 (try (do{ char '.'; many1 domainChar }))
-  return (joinWithSep "." (first:dom)))
+  return $ joinWithSep "." (first:dom)
 
 autoEmail = try $ do
   src <- emailAddress
@@ -669,5 +642,5 @@ image = try $ do
   src <- case lookupKeySrc keyTable ref of
            Nothing     -> fail "no corresponding key"
            Just target -> return target
-  return (Image (normalizeSpaces ref) src)
+  return $ Image (normalizeSpaces ref) src
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-08-15 06:00:58 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-08-15 06:00:58 +0000
commit	a8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree	2ce7c3be138e30936210196fa828816298139ec6 /src/Text/Pandoc/Readers/RST.hs
parent	e814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)