summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-08-15 06:00:58 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-08-15 06:00:58 +0000
commita8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree2ce7c3be138e30936210196fa828816298139ec6 /src/Text
parente814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)
Major code cleanup in all modules. (Removed unneeded imports,
reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs4
-rw-r--r--src/Text/Pandoc/Blocks.hs26
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs (renamed from src/Text/Pandoc/Entities.hs)77
-rw-r--r--src/Text/Pandoc/Definition.hs15
-rw-r--r--src/Text/Pandoc/ParserCombinators.hs198
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs362
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs536
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs662
-rw-r--r--src/Text/Pandoc/Readers/RST.hs321
-rw-r--r--src/Text/Pandoc/Shared.hs980
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs23
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs199
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs475
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs45
-rw-r--r--src/Text/Pandoc/Writers/Man.hs26
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs75
-rw-r--r--src/Text/Pandoc/Writers/RST.hs46
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs94
18 files changed, 1917 insertions, 2247 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index fe724987c..df73ed325 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -39,8 +39,8 @@ inline links:
>
> markdownToRST :: String -> String
> markdownToRST = toUTF8 .
-> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
-> (readMarkdown defaultParserState) . fromUTF8
+> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
+> (readMarkdown defaultParserState) . fromUTF8
>
> main = interact markdownToRST
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
index ffcd5bfe0..cfc22cb3e 100644
--- a/src/Text/Pandoc/Blocks.hs
+++ b/src/Text/Pandoc/Blocks.hs
@@ -43,9 +43,8 @@ module Text.Pandoc.Blocks
rightAlignBlock
)
where
-
import Text.PrettyPrint
-import Data.List (transpose, intersperse)
+import Data.List ( intersperse )
-- | A fixed-width block of text. Parameters are width of block,
-- height of block, and list of lines.
@@ -53,6 +52,17 @@ data TextBlock = TextBlock Int Int [String]
instance Show TextBlock where
show x = show $ blockToDoc x
+-- | Break lines in a list of lines so that none are greater than
+-- a given width.
+breakLines :: Int -- ^ Maximum length of lines.
+ -> [String] -- ^ List of lines.
+ -> [String]
+breakLines width [] = []
+breakLines width (l:ls) =
+ if length l > width
+ then (take width l):(breakLines width ((drop width l):ls))
+ else l:(breakLines width ls)
+
-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
docToBlock :: Int -- ^ Width of text block.
-> Doc -- ^ @Doc@ to convert.
@@ -60,13 +70,8 @@ docToBlock :: Int -- ^ Width of text block.
docToBlock width doc =
let rendered = renderStyle (style {lineLength = width,
ribbonsPerLine = 1}) doc
- lns = lines rendered
- chop [] = []
- chop (l:ls) = if length l > width
- then (take width l):(chop ((drop width l):ls))
- else l:(chop ls)
- lns' = chop lns
- in TextBlock width (length lns') lns'
+ lns = breakLines width $ lines rendered
+ in TextBlock width (length lns) lns
-- | Convert a @TextBlock@ to a @Doc@ element.
blockToDoc :: TextBlock -> Doc
@@ -116,8 +121,7 @@ isWhitespace x = x `elem` " \t"
-- | Left-aligns the contents of a @TextBlock@ within the block.
leftAlignBlock :: TextBlock -> TextBlock
leftAlignBlock (TextBlock width height lns) =
- TextBlock width height $
- map (dropWhile isWhitespace) lns
+ TextBlock width height $ map (dropWhile isWhitespace) lns
-- | Right-aligns the contents of a @TextBlock@ within the block.
rightAlignBlock :: TextBlock -> TextBlock
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/CharacterReferences.hs
index 125774d4d..deb2c3f1a 100644
--- a/src/Text/Pandoc/Entities.hs
+++ b/src/Text/Pandoc/CharacterReferences.hs
@@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Entities
+ Module : Text.Pandoc.CharacterReferences
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
@@ -25,37 +25,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-Functions for encoding unicode characters as entity references,
-and vice versa.
+Functions for parsing character references.
-}
-module Text.Pandoc.Entities (
- charToEntity,
- charToNumericalEntity,
- decodeEntities,
- escapeCharForXML,
- escapeStringForXML,
- characterEntity
+module Text.Pandoc.CharacterReferences (
+ characterReference,
+ decodeCharacterReferences,
) where
-import Data.Char ( chr, ord )
+import Data.Char ( chr )
import Text.ParserCombinators.Parsec
-import Data.Maybe ( fromMaybe )
import qualified Data.Map as Map
--- | Returns a string containing an entity reference for the character.
-charToEntity :: Char -> String
-charToEntity char = Map.findWithDefault (charToNumericalEntity char) char reverseEntityTable
-
--- | Returns a string containing a numerical entity reference for the char.
-charToNumericalEntity :: Char -> String
-charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
-
-- | Parse character entity.
-characterEntity :: GenParser Char st Char
-characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "character entity"
+characterReference :: GenParser Char st Char
+characterReference = characterEntity <|>
+ hexadecimalCharacterReference <|>
+ decimalCharacterReference <?>
+ "character entity"
-- | Parse character entity.
-namedEntity :: GenParser Char st Char
-namedEntity = try $ do
+characterEntity :: GenParser Char st Char
+characterEntity = try $ do
st <- char '&'
body <- many1 alphaNum
end <- char ';'
@@ -63,8 +52,8 @@ namedEntity = try $ do
return $ Map.findWithDefault '?' entity entityTable
-- | Parse hexadecimal entity.
-hexEntity :: GenParser Char st Char
-hexEntity = try $ do
+hexadecimalCharacterReference :: GenParser Char st Char
+hexadecimalCharacterReference = try $ do
st <- string "&#"
hex <- oneOf "Xx"
body <- many1 (oneOf "0123456789ABCDEFabcdef")
@@ -72,49 +61,23 @@ hexEntity = try $ do
return $ chr $ read ('0':'x':body)
-- | Parse decimal entity.
-decimalEntity :: GenParser Char st Char
-decimalEntity = try $ do
+decimalCharacterReference :: GenParser Char st Char
+decimalCharacterReference = try $ do
st <- string "&#"
body <- many1 digit
end <- char ';'
return $ chr $ read body
--- | Escape one character as needed for XML.
-escapeCharForXML :: Char -> String
-escapeCharForXML x =
- case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- '\160' -> "&nbsp;"
- c -> [c]
-
--- | True if the character needs to be escaped.
-needsEscaping :: Char -> Bool
-needsEscaping c = c `elem` "&<>\"\160"
-
--- | Escape string as needed for XML. Entity references are not preserved.
-escapeStringForXML :: String -> String
-escapeStringForXML "" = ""
-escapeStringForXML str =
- case break needsEscaping str of
- (okay, "") -> okay
- (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
-
-- | Convert entities in a string to characters.
-decodeEntities :: String -> String
-decodeEntities str =
- case parse (many (characterEntity <|> anyChar)) str str of
+decodeCharacterReferences :: String -> String
+decodeCharacterReferences str =
+ case parse (many (characterReference <|> anyChar)) str str of
Left err -> error $ "\nError: " ++ show err
Right result -> result
entityTable :: Map.Map String Char
entityTable = Map.fromList entityTableList
-reverseEntityTable :: Map.Map Char String
-reverseEntityTable = Map.fromList $ map (\(a,b) -> (b,a)) entityTableList
-
entityTableList :: [(String, Char)]
entityTableList = [
("&quot;", chr 34),
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index 3d3858b7e..7d1125c5a 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -34,10 +34,10 @@ data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
-- | Bibliographic information for the document: title (list of 'Inline'),
-- authors (list of strings), date (string).
-data Meta = Meta [Inline] -- title
- [String] -- authors
- String -- date
- deriving (Eq, Show, Read)
+data Meta = Meta [Inline] -- title
+ [String] -- authors
+ String -- date
+ deriving (Eq, Show, Read)
-- | Alignment of a table column.
data Alignment = AlignLeft
@@ -65,12 +65,11 @@ data ListNumberDelim = DefaultDelim
-- | Block element.
data Block
= Plain [Inline] -- ^ Plain text, not a paragraph
- | Null -- ^ Nothing
| Para [Inline] -- ^ Paragraph
| CodeBlock String -- ^ Code block (literal)
| RawHtml String -- ^ Raw HTML block (literal)
| BlockQuote [Block] -- ^ Block quote (list of blocks)
- | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes,
+ | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
-- and a list of items, each a list of blocks)
| BulletList [[Block]] -- ^ Bullet list (list of items, each
-- a list of blocks)
@@ -84,6 +83,7 @@ data Block
-- relative column widths, column headers
-- (each a list of blocks), and rows
-- (each a list of lists of blocks)
+ | Null -- ^ Nothing
deriving (Eq, Read, Show)
-- | Type of quotation marks to use in Quoted inline.
@@ -112,6 +112,5 @@ data Inline
| Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
| Image [Inline] Target -- ^ Image: alt text (list of inlines), target
-- and target
- | Note [Block] -- ^ Footnote or endnote - reference (string),
- -- text (list of blocks)
+ | Note [Block] -- ^ Footnote or endnote
deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/ParserCombinators.hs b/src/Text/Pandoc/ParserCombinators.hs
deleted file mode 100644
index 559a654cc..000000000
--- a/src/Text/Pandoc/ParserCombinators.hs
+++ /dev/null
@@ -1,198 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.ParserCombinators
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Parser combinators used in Pandoc readers.
--}
-module Text.Pandoc.ParserCombinators (
- anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- spaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- charsInBalanced',
- romanNumeral,
- withHorizDisplacement
- ) where
-import Text.ParserCombinators.Parsec
-import Data.Char ( toUpper, toLower )
-
---- | Parse any line of text
-anyLine :: GenParser Char st [Char]
-anyLine = try (manyTill anyChar newline) <|> many1 anyChar
- -- second alternative is for a line ending with eof
-
--- | Parses a space or tab.
-spaceChar :: CharParser st Char
-spaceChar = oneOf " \t"
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
-blankline = try (do
- skipSpaces
- newline)
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
-blanklines = try (many1 blankline)
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try (do
- start
- notFollowedBy space
- result <- many1Till parser (try end)
- return result)
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
-many1Till p end = try (do
- first <- p
- rest <- manyTill p end
- return (first:rest))
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
-notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) }
- <|> return ())
-
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-
--- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = try (do
- firstChar <- choice [ char (toUpper x), char (toLower x) ]
- rest <- stringAnyCase xs
- return (firstChar:rest))
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
-parseFromString parser str = try $ do
- oldInput <- getInput
- setInput str
- result <- parser
- setInput oldInput
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = do
- lines <- many1 (do{notFollowedBy blankline; anyLine})
- blanks <- blanklines <|> (do{eof; return "\n"})
- return ((unlines lines) ++ blanks)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)". Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
- char open
- raw <- manyTill ( (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> (do notFollowedBy' (blankline >> blanklines)
- count 1 anyChar))
- (char close)
- return $ concat raw
-
--- | Like charsInBalanced, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
- char open
- raw <- manyTill ( (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> count 1 anyChar)
- (char close)
- return $ concat raw
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -> -- ^ Uppercase if true
- GenParser Char st Int
-romanNumeral upper = try $ do
- let char' c = char (if upper then toUpper c else c)
- let one = char' 'i'
- let five = char' 'v'
- let ten = char' 'x'
- let fifty = char' 'l'
- let hundred = char' 'c'
- let fivehundred = char' 'd'
- let thousand = char' 'm'
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5*) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 1742667b8..1eb5d7b4a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML (
) where
import Text.ParserCombinators.Parsec
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( characterEntity, decodeEntities )
+import Text.Pandoc.CharacterReferences ( characterReference,
+ decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
-import Data.List ( intersect, takeWhile, dropWhile )
+import Data.List ( takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
-- | Convert HTML-formatted string to 'Pandoc' document.
@@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state
-> Pandoc
readHtml = readWith parseHtml
--- for testing
-testString :: String -> IO ()
-testString = testStringWith parseHtml
-
--
-- Constants
--
@@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
--
-- | Read blocks until end tag.
-blocksTilEnd tag = try (do
- blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
- return $ filter (/= Null) blocks)
+blocksTilEnd tag = do
+ blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+ return $ filter (/= Null) blocks
-- | Read inlines until end tag.
-inlinesTilEnd tag = try (do
- inlines <- manyTill inline (htmlEndTag tag)
- return inlines)
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-- | Parse blocks between open and close tag.
-blocksIn tag = try $ do
- htmlTag tag
- spaces
- blocksTilEnd tag
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-- | Parse inlines between open and close tag.
-inlinesIn tag = try $ do
- htmlTag tag
- spaces
- inlinesTilEnd tag
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
@@ -103,19 +91,19 @@ extractTagType ('<':rest) =
extractTagType _ = ""
-- | Parse any HTML tag (closing or opening) and return text of tag
-anyHtmlTag = try (do
+anyHtmlTag = try $ do
char '<'
spaces
tag <- many1 alphaNum
attribs <- htmlAttributes
spaces
ender <- option "" (string "/")
- let ender' = if (null ender) then "" else " /"
+ let ender' = if null ender then "" else " /"
spaces
char '>'
- return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
+ return $ "<" ++ tag ++ attribs ++ ender' ++ ">"
-anyHtmlEndTag = try (do
+anyHtmlEndTag = try $ do
char '<'
spaces
char '/'
@@ -123,19 +111,19 @@ anyHtmlEndTag = try (do
tagType <- many1 alphaNum
spaces
char '>'
- return ("</" ++ tagType ++ ">"))
+ return $ "</" ++ tagType ++ ">"
htmlTag :: String -> GenParser Char st (String, [(String, String)])
-htmlTag tag = try (do
+htmlTag tag = try $ do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
- option "" (string "/")
+ optional (string "/")
spaces
char '>'
- return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
+ return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
-- parses a quoted html attribute value
quoted quoteChar = do
@@ -145,20 +133,20 @@ quoted quoteChar = do
htmlAttributes = do
attrList <- many htmlAttribute
- return (concatMap (\(name, content, raw) -> raw) attrList)
+ return $ concatMap (\(name, content, raw) -> raw) attrList
htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
--- minimized boolean attribute (no = and value)
-htmlMinimizedAttribute = try (do
+-- minimized boolean attribute
+htmlMinimizedAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
notFollowedBy (char '=')
let content = name
- return (name, content, (" " ++ name)))
+ return (name, content, (" " ++ name))
-htmlRegularAttribute = try (do
+htmlRegularAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
@@ -170,10 +158,10 @@ htmlRegularAttribute = try (do
a <- many (alphaNum <|> (oneOf "-._:"))
return (a,"")) ]
return (name, content,
- (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
+ (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-- | Parse an end tag of type 'tag'
-htmlEndTag tag = try (do
+htmlEndTag tag = try $ do
char '<'
spaces
char '/'
@@ -181,87 +169,83 @@ htmlEndTag tag = try (do
stringAnyCase tag
spaces
char '>'
- return ("</" ++ tag ++ ">"))
+ return $ "</" ++ tag ++ ">"
-- | Returns @True@ if the tag is an inline tag.
isInline tag = (extractTagType tag) `elem` inlineHtmlTags
-anyHtmlBlockTag = try (do
- tag <- choice [anyHtmlTag, anyHtmlEndTag]
- if isInline tag then fail "inline tag" else return tag)
+anyHtmlBlockTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then fail "inline tag" else return tag
-anyHtmlInlineTag = try (do
- tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
- if isInline tag then return tag else fail "not an inline tag")
+anyHtmlInlineTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then return tag else fail "not an inline tag"
-- | Parses material between script tags.
-- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript = try (do
+htmlScript = try $ do
open <- string "<script"
rest <- manyTill anyChar (htmlEndTag "script")
- return (open ++ rest ++ "</script>"))
+ return $ open ++ rest ++ "</script>"
htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
-rawHtmlBlock = try (do
- notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
+rawHtmlBlock = try $ do
+ notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
body <- htmlBlockElement <|> anyHtmlBlockTag
- sp <- (many space)
+ sp <- many space
state <- getState
- if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null)
+ if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
-- | Parses an HTML comment.
-htmlComment = try (do
+htmlComment = try $ do
string "<!--"
comment <- manyTill anyChar (try (string "-->"))
- return ("<!--" ++ comment ++ "-->"))
+ return $ "<!--" ++ comment ++ "-->"
--
-- parsing documents
--
-xmlDec = try (do
+xmlDec = try $ do
string "<?"
rest <- manyTill anyChar (char '>')
- return ("<?" ++ rest ++ ">"))
+ return $ "<?" ++ rest ++ ">"
-definition = try (do
+definition = try $ do
string "<!"
rest <- manyTill anyChar (char '>')
- return ("<!" ++ rest ++ ">"))
+ return $ "<!" ++ rest ++ ">"
-nonTitleNonHead = try (do
- notFollowedBy' (htmlTag "title")
- notFollowedBy' (htmlTag "/head")
- result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
- return result)
+nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
+ ((rawHtmlBlock >> return ' ') <|> anyChar)
-parseTitle = try (do
- (tag, attribs) <- htmlTag "title"
+parseTitle = try $ do
+ (tag, _) <- htmlTag "title"
contents <- inlinesTilEnd tag
spaces
- return contents)
+ return contents
-- parse header and return meta-information (for now, just title)
-parseHead = try (do
+parseHead = try $ do
htmlTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
skipMany nonTitleNonHead
htmlTag "/head"
- return (contents, [], ""))
+ return (contents, [], "")
-skipHtmlTag tag = option ("",[]) (htmlTag tag)
+skipHtmlTag tag = optional (htmlTag tag)
-- h1 class="title" representation of title in body
-bodyTitle = try (do
+bodyTitle = try $ do
(tag, attribs) <- htmlTag "h1"
cl <- case (extractAttribute "class" attribs) of
- Just "title" -> do {return ""}
+ Just "title" -> return ""
otherwise -> fail "not title"
inlinesTilEnd "h1"
- return "")
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
@@ -271,27 +255,30 @@ parseHtml = do
spaces
skipHtmlTag "body"
spaces
- option "" bodyTitle -- skip title in body, because it's represented in meta
+ optional bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
spaces
- option "" (htmlEndTag "body")
+ optional (htmlEndTag "body")
spaces
- option "" (htmlEndTag "html")
+ optional (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
- return (Pandoc (Meta title authors date) blocks)
+ return $ Pandoc (Meta title authors date) blocks
--
-- parsing blocks
--
-parseBlocks = do
- spaces
- result <- sepEndBy block spaces
- return $ filter (/= Null) result
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
- rawHtmlBlock ] <?> "block"
+block = choice [ codeBlock
+ , header
+ , hrule
+ , list
+ , blockQuote
+ , para
+ , plain
+ , rawHtmlBlock ] <?> "block"
--
-- header blocks
@@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-headerLevel n = try (do
+headerLevel n = try $ do
let level = "h" ++ show n
(tag, attribs) <- htmlTag level
contents <- inlinesTilEnd level
- return (Header n (normalizeSpaces contents)))
+ return $ Header n (normalizeSpaces contents)
--
-- hrule block
--
-hrule = try (do
+hrule = try $ do
(tag, attribs) <- htmlTag "hr"
state <- getState
- if (not (null attribs)) && (stateParseRaw state)
- then -- in this case we want to parse it as raw html
- unexpected "attributes in hr"
- else return HorizontalRule)
+ if not (null attribs) && stateParseRaw state
+ then unexpected "attributes in hr" -- parse as raw in this case
+ else return HorizontalRule
--
-- code blocks
--
-codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
+codeBlock = preCodeBlock <|> bareCodeBlock <?> "code block"
-preCodeBlock = try (do
+preCodeBlock = try $ do
htmlTag "pre"
spaces
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
+ result <- bareCodeBlock
spaces
htmlEndTag "pre"
- return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
+ return result
-bareCodeBlock = try (do
+bareCodeBlock = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
- return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
+ return $ CodeBlock $ stripTrailingNewlines $
+ decodeCharacterReferences result
--
-- block quotes
--
-blockQuote = try (do
- tag <- htmlTag "blockquote"
- spaces
- blocks <- blocksTilEnd "blockquote"
- return (BlockQuote blocks))
+blockQuote = try $ htmlTag "blockquote" >> spaces >>
+ blocksTilEnd "blockquote" >>= (return . BlockQuote)
--
-- list blocks
@@ -354,119 +337,105 @@ blockQuote = try (do
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
- (start, style) <- option (1, DefaultStyle) $
- do failIfStrict
- let sta = fromMaybe "1" $
- lookup "start" attribs
- let sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- let sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
- return (read sta, sty')
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ol"
- return (OrderedList (start, style, DefaultDelim) items)
+ (_, attribs) <- htmlTag "ol"
+ (start, style) <- option (1, DefaultStyle) $
+ do failIfStrict
+ let sta = fromMaybe "1" $
+ lookup "start" attribs
+ let sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ let sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ return (read sta, sty')
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ol"
+ return $ OrderedList (start, style, DefaultDelim) items
bulletList = try $ do
- htmlTag "ul"
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ul"
- return (BulletList items)
+ htmlTag "ul"
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ul"
+ return $ BulletList items
definitionList = try $ do
- failIfStrict -- def lists not part of standard markdown
- tag <- htmlTag "dl"
- spaces
- items <- sepEndBy1 definitionListItem spaces
- htmlEndTag "dl"
- return (DefinitionList items)
+ failIfStrict -- def lists not part of standard markdown
+ tag <- htmlTag "dl"
+ spaces
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return $ DefinitionList items
definitionListItem = try $ do
- terms <- sepEndBy1 (inlinesIn "dt") spaces
- defs <- sepEndBy1 (blocksIn "dd") spaces
- let term = joinWithSep [LineBreak] terms
- return (term, concat defs)
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = joinWithSep [LineBreak] terms
+ return (term, concat defs)
--
-- paragraph block
--
-para = try (do
- tag <- htmlTag "p"
- result <- inlinesTilEnd "p"
- return (Para (normalizeSpaces result)))
+para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces
--
-- plain block
--
-plain = do
- result <- many1 inline
- return (Plain (normalizeSpaces result))
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- inline
--
-inline = choice [ text, special ] <?> "inline"
-
-text = choice [ entity, strong, emph, superscript, subscript,
- strikeout, spanStrikeout, code, str,
- linebreak, whitespace ] <?> "text"
-
-special = choice [ link, image, rawHtmlInline ] <?>
- "link, inline html, or image"
-
-entity = do
- ent <- characterEntity
- return $ Str [ent]
-
-code = try (do
+inline = choice [ charRef
+ , strong
+ , emph
+ , superscript
+ , subscript
+ , strikeout
+ , spanStrikeout
+ , code
+ , str
+ , linebreak
+ , whitespace
+ , link
+ , image
+ , rawHtmlInline ] <?> "inline"
+
+code = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
-- remove internal line breaks, leading and trailing space,
- -- and decode entities
- let result' = decodeEntities $ removeLeadingTrailingSpace $
- joinWithSep " " $ lines result
- return (Code result'))
+ -- and decode character references
+ return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
+ joinWithSep " " $ lines result
rawHtmlInline = do
- result <- choice [htmlScript, anyHtmlInlineTag]
+ result <- htmlScript <|> anyHtmlInlineTag
state <- getState
if stateParseRaw state then return (HtmlInline result) else return (Str "")
-betweenTags tag = try (do
- htmlTag tag
- result <- inlinesTilEnd tag
- return (normalizeSpaces result))
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+ return . normalizeSpaces
-emph = try (do
- result <- choice [betweenTags "em", betweenTags "it"]
- return (Emph result))
+emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
-superscript = try $ do
- failIfStrict -- strict markdown has no superscript, so treat as raw HTML
- result <- betweenTags "sup"
- return (Superscript result)
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-subscript = try $ do
- failIfStrict -- strict markdown has no subscript, so treat as raw HTML
- result <- betweenTags "sub"
- return (Subscript result)
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-strikeout = try $ do
- failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- result <- choice [betweenTags "s", betweenTags "strike"]
- return (Strikeout result)
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+ return . Strikeout
spanStrikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
@@ -474,25 +443,14 @@ spanStrikeout = try $ do
result <- case (extractAttribute "class" attributes) of
Just "strikeout" -> inlinesTilEnd "span"
_ -> fail "not a strikeout"
- return (Strikeout result)
+ return $ Strikeout result
-strong = try (do
- result <- choice [betweenTags "b", betweenTags "strong"]
- return (Strong result))
-
-whitespace = do
- many1 space
- return Space
+whitespace = many1 space >> return Space
-- hard line break
-linebreak = do
- htmlTag "br"
- option ' ' newline
- return LineBreak
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
-str = do
- result <- many1 (noneOf "<& \t\n")
- return (Str result)
+str = many1 (noneOf "<& \t\n") >>= return . Str
--
-- links and images
@@ -501,27 +459,27 @@ str = do
-- extract contents of attribute (attribute names are case-insensitive)
extractAttribute name [] = Nothing
extractAttribute name ((attrName, contents):rest) =
- let name' = map toLower name
- attrName' = map toLower attrName in
- if (attrName' == name')
- then Just (decodeEntities contents)
- else extractAttribute name rest
+ let name' = map toLower name
+ attrName' = map toLower attrName
+ in if attrName' == name'
+ then Just (decodeCharacterReferences contents)
+ else extractAttribute name rest
link = try $ do
(tag, attributes) <- htmlTag "a"
url <- case (extractAttribute "href" attributes) of
- Just url -> do {return url}
+ Just url -> return url
Nothing -> fail "no href"
- let title = fromMaybe "" (extractAttribute "title" attributes)
+ let title = fromMaybe "" $ extractAttribute "title" attributes
label <- inlinesTilEnd "a"
return $ Link (normalizeSpaces label) (url, title)
image = try $ do
(tag, attributes) <- htmlTag "img"
url <- case (extractAttribute "src" attributes) of
- Just url -> do {return url}
+ Just url -> return url
Nothing -> fail "no src"
- let title = fromMaybe "" (extractAttribute "title" attributes)
+ let title = fromMaybe "" $ extractAttribute "title" attributes
let alt = fromMaybe "" (extractAttribute "alt" attributes)
return $ Image [Str alt] (url, title)
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 73a3e4a8f..4b91b528c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX (
) where
import Text.ParserCombinators.Parsec
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Data.Maybe ( fromMaybe )
@@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> Pandoc
readLaTeX = readWith parseLaTeX
--- for testing
-testString = testStringWith parseLaTeX
-
-- characters with special meaning
specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
@@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
--
-- | Returns text between brackets and its matching pair.
-bracketedText openB closeB = try (do
+bracketedText openB closeB = do
result <- charsInBalanced' openB closeB
- return ([openB] ++ result ++ [closeB]))
+ return $ [openB] ++ result ++ [closeB]
-- | Returns an option or argument of a LaTeX command.
-optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-- | True if the string begins with '{'.
isArg ('{':rest) = True
@@ -73,62 +69,55 @@ isArg other = False
commandArgs = many optOrArg
-- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command = try (do
+command = try $ do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
args <- commandArgs
- return (name, star, args))
+ return (name, star, args)
-begin name = try (do
- string "\\begin{"
- string name
- char '}'
- option [] commandArgs
+begin name = try $ do
+ string $ "\\begin{" ++ name ++ "}"
+ optional commandArgs
spaces
- return name)
+ return name
-end name = try (do
- string "\\end{"
- string name
- char '}'
+end name = try $ do
+ string $ "\\end{" ++ name ++ "}"
spaces
- return name)
+ return name
-- | Returns a list of block elements containing the contents of an
-- environment.
-environment name = try (do
- begin name
- spaces
- contents <- manyTill block (end name)
- return contents)
+environment name = try $ begin name >> spaces >> manyTill block (end name)
-anyEnvironment = try (do
+anyEnvironment = try $ do
string "\\begin{"
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
- option [] commandArgs
+ optional commandArgs
spaces
contents <- manyTill block (end (name ++ star))
- return (BlockQuote contents))
+ return $ BlockQuote contents
--
-- parsing documents
--
-- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble = try (do
- manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}"))
- spaces)
+processLaTeXPreamble = try $ manyTill
+ (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}")) >>
+ spaces
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
- option () processLaTeXPreamble -- preamble might not be present (fragment)
+ optional processLaTeXPreamble -- preamble might not be present (fragment)
+ spaces
blocks <- parseBlocks
spaces
- option "" (try (string "\\end{document}")) -- might not be present (in fragment)
+ optional $ try (string "\\end{document}") -- might not be present (fragment)
spaces
eof
state <- getState
@@ -136,21 +125,27 @@ parseLaTeX = do
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
- return (Pandoc (Meta title' authors' date') blocks')
+ return $ Pandoc (Meta title' authors' date') blocks'
--
-- parsing blocks
--
-parseBlocks = do
- spaces
- result <- many block
- return result
-
-block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
- comment, bibliographic, para, specialEnvironment,
- itemBlock, unknownEnvironment, unknownCommand ] <?>
- "block"
+parseBlocks = spaces >> many block
+
+block = choice [ hrule
+ , codeBlock
+ , header
+ , list
+ , blockQuote
+ , mathBlock
+ , comment
+ , bibliographic
+ , para
+ , specialEnvironment
+ , itemBlock
+ , unknownEnvironment
+ , unknownCommand ] <?> "block"
--
-- header blocks
@@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-headerLevel n = try (do
+headerLevel n = try $ do
let subs = concat $ replicate (n - 1) "sub"
string ("\\" ++ subs ++ "section")
- option ' ' (char '*')
+ optional (char '*')
char '{'
title <- manyTill inline (char '}')
spaces
- return (Header n (normalizeSpaces title)))
+ return $ Header n (normalizeSpaces title)
--
-- hrule block
--
-hrule = try (do
- oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ]
- spaces
- return HorizontalRule)
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ] >> spaces >> return HorizontalRule
--
-- code blocks
@@ -183,37 +175,28 @@ hrule = try (do
codeBlock = codeBlock1 <|> codeBlock2
-codeBlock1 = try (do
+codeBlock1 = try $ do
string "\\begin{verbatim}" -- don't use begin function because it
-- gobbles whitespace
- option "" blanklines -- we want to gobble blank lines, but not
+ optional blanklines -- we want to gobble blank lines, but not
-- leading space
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
spaces
- return (CodeBlock (stripTrailingNewlines contents)))
+ return $ CodeBlock (stripTrailingNewlines contents)
-codeBlock2 = try (do
- string "\\begin{Verbatim}" -- used by fancyverb package
+codeBlock2 = try $ do
+ string "\\begin{Verbatim}" -- used by fancyvrb package
option "" blanklines
contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
spaces
- return (CodeBlock (stripTrailingNewlines contents)))
+ return $ CodeBlock (stripTrailingNewlines contents)
--
-- block quotes
--
-blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
-
-blockQuote1 = try (do
- blocks <- environment "quote"
- spaces
- return (BlockQuote blocks))
-
-blockQuote2 = try (do
- blocks <- environment "quotation"
- spaces
- return (BlockQuote blocks))
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
+ return . BlockQuote
--
-- math block
@@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
mathBlockWith (begin "displaymath") (end "displaymath") <|>
mathBlockWith (string "\\[") (string "\\]") <?> "math block"
-mathBlockWith start end = try (do
+mathBlockWith start end = try $ do
start
spaces
result <- manyTill anyChar end
spaces
- return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
+ return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
--
-- list blocks
@@ -237,69 +220,66 @@ mathBlockWith start end = try (do
list = bulletList <|> orderedList <|> definitionList <?> "list"
listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\state -> state {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\state -> state {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
+ ("item", _, args) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\state -> state {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\state -> state {stateParserContext = oldParserContext})
+ opt <- case args of
+ ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
+ parseFromString (many inline) $ tail $ init x
+ _ -> return []
+ return (opt, blocks)
orderedList = try $ do
- string "\\begin{enumerate}"
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (char 'i')
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
+ string "\\begin{enumerate}"
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ do failIfStrict
+ char '['
+ res <- anyOrderedListMarker
+ char ']'
+ return res
+ spaces
+ option "" $ try $ do string "\\setlength{\\itemindent}"
+ char '{'
+ manyTill anyChar (char '}')
+ spaces
+ start <- option 1 $ try $ do failIfStrict
+ string "\\setcounter{enum"
+ many1 (char 'i')
+ string "}{"
+ num <- many1 digit
+ char '}'
+ spaces
+ return $ (read num) + 1
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return $ OrderedList (start, style, delim) $ map snd items
bulletList = try $ do
- begin "itemize"
- spaces
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList $ map snd items)
definitionList = try $ do
- begin "description"
- spaces
- items <- many listItem
- end "description"
- spaces
- return (DefinitionList items)
+ begin "description"
+ spaces
+ items <- many listItem
+ end "description"
+ spaces
+ return (DefinitionList items)
--
-- paragraph block
--
-para = try (do
- result <- many1 inline
- spaces
- return (Para (normalizeSpaces result)))
+para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
--
-- title authors date
@@ -307,33 +287,30 @@ para = try (do
bibliographic = choice [ maketitle, title, authors, date ]
-maketitle = try (do
- string "\\maketitle"
- spaces
- return Null)
+maketitle = try (string "\\maketitle") >> spaces >> return Null
-title = try (do
+title = try $ do
string "\\title{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = tit })
- return Null)
+ return Null
-authors = try (do
+authors = try $ do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
let authors' = map removeLeadingTrailingSpace $ lines $
substitute "\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
- return Null)
+ return Null
-date = try (do
+date = try $ do
string "\\date{"
date' <- manyTill anyChar (char '}')
spaces
updateState (\state -> state { stateDate = date' })
- return Null)
+ return Null
--
-- item block
@@ -341,14 +318,14 @@ date = try (do
--
-- this forces items to be parsed in different blocks
-itemBlock = try (do
+itemBlock = try $ do
("item", _, args) <- command
state <- getState
if (stateParserContext state == ListItemState)
then fail "item should be handled by list block"
else if null args
then return Null
- else return (Plain [Str (stripFirstAndLast (head args))]))
+ else return $ Plain [Str (stripFirstAndLast (head args))]
--
-- raw LaTeX
@@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw
-- | Parse any LaTeX environment and return a Para block containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = try (do
- string "\\begin"
- char '{'
+rawLaTeXEnvironment = try $ do
+ string "\\begin{"
name <- many1 alphaNum
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
args <- option [] commandArgs
let argStr = concat args
- contents <- manyTill (choice [(many1 (noneOf "\\")),
+ contents <- manyTill (choice [ (many1 (noneOf "\\")),
(do
(Para [TeX str]) <- rawLaTeXEnvironment
return str),
string "\\" ])
(end name')
spaces
- return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
- (concat contents) ++ "\\end{" ++ name' ++ "}")]))
+ return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
+ concat contents ++ "\\end{" ++ name' ++ "}"]
-unknownEnvironment = try (do
+unknownEnvironment = try $ do
state <- getState
result <- if stateParseRaw state -- check whether we should include raw TeX
then rawLaTeXEnvironment -- if so, get whole raw environment
else anyEnvironment -- otherwise just the contents
- return result)
+ return result
-unknownCommand = try (do
- notFollowedBy' $ choice $ map end
- ["itemize", "enumerate", "description", "document"]
+unknownCommand = try $ do
+ notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
+ "document"]
(name, star, args) <- command
spaces
let argStr = concat args
state <- getState
- if (name == "item") && ((stateParserContext state) == ListItemState)
+ if name == "item" && (stateParserContext state) == ListItemState
then fail "should not be parsed as raw"
else string ""
if stateParseRaw state
- then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
- else return (Plain [Str (joinWithSep " " args)]))
+ then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
+ else return $ Plain [Str (joinWithSep " " args)]
-- latex comment
-comment = try (do
- char '%'
- result <- manyTill anyChar newline
- spaces
- return Null)
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
--
-- inline
--
-inline = choice [ strong, emph, strikeout, superscript, subscript,
- ref, lab, code, linebreak, spacer,
- math, ellipses, emDash, enDash, hyphen, quoted, apostrophe,
- accentedChar, specialChar, specialInline, escapedChar,
- unescapedChar, str, endline, whitespace ] <?> "inline"
-
-specialInline = choice [ url, link, image, footnote, rawLaTeXInline ]
- <?> "link, raw TeX, note, or image"
+inline = choice [ strong
+ , emph
+ , strikeout
+ , superscript
+ , subscript
+ , ref
+ , lab
+ , code
+ , linebreak
+ , spacer
+ , math
+ , ellipses
+ , emDash
+ , enDash
+ , hyphen
+ , quoted
+ , apostrophe
+ , accentedChar
+ , specialChar
+ , url
+ , link
+ , image
+ , footnote
+ , rawLaTeXInline
+ , escapedChar
+ , unescapedChar
+ , str
+ , endline
+ , whitespace ] <?> "inline"
accentedChar = normalAccentedChar <|> specialAccentedChar
-normalAccentedChar = try (do
+normalAccentedChar = try $ do
char '\\'
accent <- oneOf "'`^\"~"
- character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
+ character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum
let table = fromMaybe [] $ lookup character accentTable
let result = case lookup accent table of
Just num -> chr num
Nothing -> '?'
- return (Str [result]))
+ return $ Str [result]
-- an association list of letters and association list of accents
-- and decimal character numbers.
@@ -451,245 +444,179 @@ accentTable =
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
- oslash, pound, euro, copyright, sect ]
+ oslash, pound, euro, copyright, sect ]
-ccedil = try (do
+ccedil = try $ do
char '\\'
letter <- oneOfStrings ["cc", "cC"]
let num = if letter == "cc" then 231 else 199
- return (Str [chr num]))
+ return $ Str [chr num]
-aring = try (do
+aring = try $ do
char '\\'
letter <- oneOfStrings ["aa", "AA"]
let num = if letter == "aa" then 229 else 197
- return (Str [chr num]))
+ return $ Str [chr num]
-iuml = try (do
- string "\\\""
- oneOfStrings ["\\i", "{\\i}"]
- return (Str [chr 239]))
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 239])
-icirc = try (do
- string "\\^"
- oneOfStrings ["\\i", "{\\i}"]
- return (Str [chr 238]))
+icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 238])
-szlig = try (do
- string "\\ss"
- return (Str [chr 223]))
+szlig = try (string "\\ss") >> return (Str [chr 223])
-oslash = try (do
+oslash = try $ do
char '\\'
letter <- choice [char 'o', char 'O']
let num = if letter == 'o' then 248 else 216
- return (Str [chr num]))
+ return $ Str [chr num]
-aelig = try (do
+aelig = try $ do
char '\\'
letter <- oneOfStrings ["ae", "AE"]
let num = if letter == "ae" then 230 else 198
- return (Str [chr num]))
+ return $ Str [chr num]
-pound = try (do
- string "\\pounds"
- return (Str [chr 163]))
+pound = try (string "\\pounds") >> return (Str [chr 163])
-euro = try (do
- string "\\euro"
- return (Str [chr 8364]))
+euro = try (string "\\euro") >> return (Str [chr 8364])
-copyright = try (do
- string "\\copyright"
- return (Str [chr 169]))
+copyright = try (string "\\copyright") >> return (Str [chr 169])
-sect = try (do
- string "\\S"
- return (Str [chr 167]))
+sect = try (string "\\S") >> return (Str [chr 167])
escapedChar = do
result <- escaped (oneOf " $%&_#{}\n")
- return (if result == Str "\n" then Str " " else result)
+ return $ if result == Str "\n" then Str " " else result
-unescapedChar = do -- ignore standalone, nonescaped special characters
- oneOf "$^&_#{}|<>"
- return (Str "")
+-- ignore standalone, nonescaped special characters
+unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "")
specialChar = choice [ backslash, tilde, caret, bar, lt, gt ]
-backslash = try (do
- string "\\textbackslash"
- return (Str "\\"))
+backslash = try (string "\\textbackslash") >> return (Str "\\")
-tilde = try (do
- string "\\ensuremath{\\sim}"
- return (Str "~"))
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-caret = try (do
- string "\\^{}"
- return (Str "^"))
+caret = try (string "\\^{}") >> return (Str "^")
-bar = try (do
- string "\\textbar"
- return (Str "\\"))
+bar = try (string "\\textbar") >> return (Str "\\")
-lt = try (do
- string "\\textless"
- return (Str "<"))
+lt = try (string "\\textless") >> return (Str "<")
-gt = try (do
- string "\\textgreater"
- return (Str ">"))
+gt = try (string "\\textgreater") >> return (Str ">")
-code = try (do
+code = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
- let result' = removeLeadingTrailingSpace result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace result
-emph = try (do
- oneOfStrings [ "\\emph{", "\\textit{" ]
- result <- manyTill inline (char '}')
- return (Emph result))
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+ manyTill inline (char '}') >>= return . Emph
-strikeout = try $ do
- string "\\sout{"
- result <- manyTill inline (char '}')
- return (Strikeout result)
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+ return . Strikeout
-superscript = try $ do
- string "\\textsuperscript{"
- result <- manyTill inline (char '}')
- return (Superscript result)
+superscript = try $ string "\\textsuperscript{" >>
+ manyTill inline (char '}') >>= return . Superscript
-- note: \textsubscript isn't a standard latex command, but we use
-- a defined version in pandoc.
-subscript = try $ do
- string "\\textsubscript{"
- result <- manyTill inline (char '}')
- return (Subscript result)
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+ return . Subscript
-apostrophe = do
- char '\''
- return Apostrophe
+apostrophe = char '\'' >> return Apostrophe
-quoted = do
- doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted
-singleQuoted = try (do
- result <- enclosed singleQuoteStart singleQuoteEnd inline
- return $ Quoted SingleQuote $ normalizeSpaces result)
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+ return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted = try (do
- result <- enclosed doubleQuoteStart doubleQuoteEnd inline
- return $ Quoted DoubleQuote $ normalizeSpaces result)
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+ return . Quoted DoubleQuote . normalizeSpaces
singleQuoteStart = char '`'
-singleQuoteEnd = char '\'' >> notFollowedBy alphaNum
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
doubleQuoteStart = string "``"
doubleQuoteEnd = string "''"
-ellipses = try (do
- string "\\ldots"
- option "" (try (string "{}"))
- return Ellipses)
+ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
+ return Ellipses
-enDash = try (do
- string "--"
- notFollowedBy (char '-')
- return EnDash)
+enDash = try (string "--") >> return EnDash
-emDash = try (do
- string "---"
- return EmDash)
+emDash = try (string "---") >> return EmDash
-hyphen = do
- char '-'
- return (Str "-")
+hyphen = char '-' >> return (Str "-")
-lab = try (do
+lab = try $ do
string "\\label{"
result <- manyTill anyChar (char '}')
- return (Str ("(" ++ result ++ ")")))
+ return $ Str $ "(" ++ result ++ ")"
-ref = try (do
- string "\\ref{"
- result <- manyTill anyChar (char '}')
- return (Str (result)))
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-strong = try (do
- string "\\textbf{"
- result <- manyTill inline (char '}')
- return (Strong result))
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+ return . Strong
-whitespace = do
- many1 (oneOf "~ \t")
- return Space
+whitespace = many1 (oneOf "~ \t") >> return Space
-- hard line break
-linebreak = try (do
- string "\\\\"
- return LineBreak)
+linebreak = try (string "\\\\") >> return LineBreak
-spacer = try $ do
- string "\\,"
- return (Str "")
+spacer = try (string "\\,") >> return (Str "")
-str = do
- result <- many1 (noneOf specialChars)
- return (Str result)
+str = many1 (noneOf specialChars) >>= return . Str
-- endline internal to paragraph
-endline = try (do
- newline
- notFollowedBy blankline
- return Space)
+endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
math = math1 <|> math2 <?> "math"
-math1 = try (do
+math1 = try $ do
char '$'
result <- many (noneOf "$")
char '$'
- return (TeX ("$" ++ result ++ "$")))
+ return $ TeX ("$" ++ result ++ "$")
-math2 = try (do
+math2 = try $ do
string "\\("
result <- many (noneOf "$")
string "\\)"
- return (TeX ("$" ++ result ++ "$")))
+ return $ TeX ("$" ++ result ++ "$")
--
-- links and images
--
-url = try (do
+url = try $ do
string "\\url"
url <- charsInBalanced '{' '}'
- return (Link [Code url] (url, "")))
+ return $ Link [Code url] (url, "")
-link = try (do
+link = try $ do
string "\\href{"
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
- return (Link (normalizeSpaces label) (url, "")))
+ return $ Link (normalizeSpaces label) (url, "")
-image = try (do
+image = try $ do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
let src = if null args' then
("", "")
else
(stripFirstAndLast (head args'), "")
- return (Image [Str "image"] src))
+ return $ Image [Str "image"] src
-footnote = try (do
+footnote = try $ do
(name, _, (contents:[])) <- command
if ((name == "footnote") || (name == "thanks"))
then string ""
@@ -700,16 +627,15 @@ footnote = try (do
setInput $ contents'
blocks <- parseBlocks
setInput rest
- return (Note blocks))
+ return $ Note blocks
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try (do
+rawLaTeXInline = try $ do
(name, star, args) <- command
- let argStr = concat args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item"))
then fail "not an inline command"
else string ""
- return (TeX ("\\" ++ name ++ star ++ argStr)))
+ return $ TeX ("\\" ++ name ++ star ++ concat args)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3ccb74ba7..80a8507b4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
+import Data.List ( transpose, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
-import Text.Pandoc.Entities ( characterEntity, decodeEntities )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -> String -> Pandoc
readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
--- | Parse markdown string with default options and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseMarkdown
-
--
-- Constants and data structure definitions
--
@@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\""
-- auxiliary functions
--
--- | Skip a single endline if there is one.
-skipEndline = option Space endline
-
indentSpaces = try $ do
state <- getState
let tabStop = stateTabStop state
try (count tabStop (char ' ')) <|>
- (do{many (char ' '); string "\t"}) <?> "indentation"
+ (many (char ' ') >> string "\t") <?> "indentation"
nonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
- choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
+ choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)]
-- | Fail unless we're at beginning of a line.
failUnlessBeginningOfLine = do
@@ -94,20 +87,21 @@ failUnlessSmart = do
state <- getState
if stateSmart state then return () else fail "Smart typography feature"
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do
+ (Str res) <- inline
+ if res == str then return res else fail $ "unexpected Str content"
+
-- | Parse a sequence of inline elements between a string
-- @opener@ and a string @closer@, including inlines
-- between balanced pairs of @opener@ and a @closer@.
inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
inlinesInBalanced opener closer = try $ do
- let openerSymbol = try $ do
- res <- inline
- if res == Str opener
- then return res
- else pzero
- try (string opener)
- result <- manyTill ( (do lookAhead openerSymbol
- bal <- inlinesInBalanced opener closer
- return $ [Str opener] ++ bal ++ [Str closer])
+ string opener
+ result <- manyTill ( (do lookAhead (inlineString opener)
+ -- because it might be a link...
+ bal <- inlinesInBalanced opener closer
+ return $ [Str opener] ++ bal ++ [Str closer])
<|> (count 1 inline))
(try (string closer))
return $ concat result
@@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do
-- document structure
--
-titleLine = try (do
- char '%'
- skipSpaces
- line <- manyTill inline newline
- return line)
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-authorsLine = try (do
+authorsLine = try $ do
char '%'
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
newline
- return (map (decodeEntities . removeLeadingTrailingSpace) authors))
+ return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
-dateLine = try (do
+dateLine = try $ do
char '%'
skipSpaces
date <- many (noneOf "\n")
newline
- return (decodeEntities $ removeTrailingSpace date))
+ return $ decodeCharacterReferences $ removeTrailingSpace date
-titleBlock = try (do
+titleBlock = try $ do
failIfStrict
title <- option [] titleLine
author <- option [] authorsLine
date <- option "" dateLine
- option "" blanklines
- return (title, author, date))
+ optional blanklines
+ return (title, author, date)
parseMarkdown = do
- updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML
+ -- markdown allows raw HTML
+ updateState (\state -> state { stateParseRaw = True })
(title, author, date) <- option ([],[],"") titleBlock
-- go through once just to get list of reference keys
- refs <- manyTill (referenceKey <|> (do l <- lineClump
- return (LineClump l))) eof
+ refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
setInput $ concat rawlines -- with keys stripped out
updateState (\state -> state { stateKeys = keys })
- -- now go through for notes
- refs <- manyTill (noteBlock <|> (do l <- lineClump
- return (LineClump l))) eof
+ -- now go through for notes (which may contain references - hence 2nd pass)
+ refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof
let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
filter isNoteBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
- setInput $ concat rawlines -- with note blocks and keys stripped out
+ -- go through a 3rd time, with note blocks and keys stripped out
+ setInput $ concat rawlines
updateState (\state -> state { stateNotes = notes })
- blocks <- parseBlocks -- go through again, for real
+ blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
- return (Pandoc (Meta title author date) blocks')
+ return $ Pandoc (Meta title author date) blocks'
--
--- initial pass for references
+-- initial pass for references and notes
--
referenceKey = try $ do
@@ -176,9 +166,9 @@ referenceKey = try $ do
label <- reference
char ':'
skipSpaces
- option ' ' (char '<')
+ optional (char '<')
src <- many (noneOf "> \n\t")
- option ' ' (char '>')
+ optional (char '>')
tit <- option "" title
blanklines
return $ KeyBlock label (removeTrailingSpace src, tit)
@@ -189,33 +179,28 @@ noteMarker = try $ do
manyTill (noneOf " \t\n") (char ']')
rawLine = try $ do
- notFollowedBy' blankline
+ notFollowedBy blankline
notFollowedBy' noteMarker
contents <- many1 nonEndline
- end <- option "" (do
- newline
- option "" indentSpaces
- return "\n")
- return (contents ++ end)
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
+ return $ contents ++ end
-rawLines = do
- lines <- many1 rawLine
- return (concat lines)
+rawLines = many1 rawLine >>= return . concat
noteBlock = try $ do
failIfStrict
ref <- noteMarker
char ':'
- option ' ' blankline
- option "" indentSpaces
- raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
- option "" blanklines
+ optional blankline
+ optional indentSpaces
+ raw <- sepBy rawLines (try (blankline >> indentSpaces))
+ optional blanklines
-- parse the extracted text, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (NoteBlock ref contents)
+ return $ NoteBlock ref contents
--
-- parsing blocks
@@ -239,48 +224,39 @@ block = choice [ header
-- header blocks
--
-header = choice [ setextHeader, atxHeader ] <?> "header"
+header = setextHeader <|> atxHeader <?> "header"
-atxHeader = try (do
+atxHeader = try $ do
lead <- many1 (char '#')
- notFollowedBy (char '.') -- this would be a list
- notFollowedBy (char ')')
+ notFollowedBy (char '.' <|> char ')') -- this would be a list
skipSpaces
txt <- manyTill inline atxClosing
- return (Header (length lead) (normalizeSpaces txt)))
+ return $ Header (length lead) (normalizeSpaces txt)
-atxClosing = try (do
- skipMany (char '#')
- skipSpaces
- newline
- option "" blanklines)
+atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >>
+ option "" blanklines
setextHeader = choice $
- map (\x -> setextH x) (enumFromTo 1 (length setextHChars))
+ map (\x -> setextH x) $ enumFromTo 1 (length setextHChars)
-setextH n = try (do
+setextH n = try $ do
txt <- many1Till inline newline
many1 (char (setextHChars !! (n-1)))
skipSpaces
newline
- option "" blanklines
- return (Header n (normalizeSpaces txt)))
+ optional blanklines
+ return $ Header n (normalizeSpaces txt)
--
-- hrule block
--
-hruleWith chr = try (do
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipMany (oneOf (chr:spaceChars))
+hruleWith chr = try $ do
+ count 3 (skipSpaces >> char chr)
+ skipMany (skipSpaces >> char chr)
newline
- option "" blanklines
- return HorizontalRule)
+ optional blanklines
+ return HorizontalRule
hrule = choice (map hruleWith hruleChars) <?> "hrule"
@@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule"
-- code blocks
--
-indentedLine = try (do
+indentedLine = try $ do
indentSpaces
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
-- two or more indented lines, possibly separated by blank lines
-indentedBlock = try (do
+indentedBlock = try $ do
res1 <- indentedLine
blanks <- many blankline
- res2 <- choice [indentedBlock, indentedLine]
- return (res1 ++ blanks ++ res2))
+ res2 <- indentedBlock <|> indentedLine
+ return $ res1 ++ blanks ++ res2
-codeBlock = do
- result <- choice [indentedBlock, indentedLine]
- option "" blanklines
- return (CodeBlock (stripTrailingNewlines result))
+codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>=
+ return . CodeBlock . stripTrailingNewlines
--
-- block quotes
--
-emacsBoxQuote = try (do
+emacsBoxQuote = try $ do
failIfStrict
string ",----"
manyTill anyChar newline
- raw <- manyTill (try (do
- char '|'
- option ' ' (char ' ')
- result <- manyTill anyChar newline
- return result))
- (string "`----")
- manyTill anyChar newline
- option "" blanklines
- return raw)
+ raw <- manyTill
+ (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+ (try (string "`----"))
+ blanklines
+ return raw
-emailBlockQuoteStart = try (do
- nonindentSpaces
- char '>'
- option ' ' (char ' ')
- return "> ")
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
-emailBlockQuote = try (do
+emailBlockQuote = try $ do
emailBlockQuoteStart
- raw <- sepBy (many (choice [nonEndline,
- (try (do
- endline
- notFollowedBy' emailBlockQuoteStart
- return '\n'))]))
- (try (do {newline; emailBlockQuoteStart}))
- newline <|> (do{ eof; return '\n' })
- option "" blanklines
- return raw)
+ raw <- sepBy (many (nonEndline <|>
+ (try (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n'))))
+ (try (newline >> emailBlockQuoteStart))
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
blockQuote = do
- raw <- choice [ emailBlockQuote, emacsBoxQuote ]
+ raw <- emailBlockQuote <|> emacsBoxQuote
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (BlockQuote contents)
+ return $ BlockQuote contents
--
-- list blocks
@@ -357,7 +321,7 @@ blockQuote = do
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
bulletListStart = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
oneOf bulletListMarkers
@@ -365,7 +329,7 @@ bulletListStart = try $ do
skipSpaces
anyOrderedListStart = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
state <- getState
if stateStrict state
@@ -375,7 +339,7 @@ anyOrderedListStart = try $ do
else anyOrderedListMarker
orderedListStart style delim = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
state <- getState
if stateStrict state
@@ -387,40 +351,39 @@ orderedListStart style delim = try $ do
skipSpaces
-- parse a line of a list item (start = parser for beginning of list item)
-listLine start = try (do
+listLine start = try $ do
notFollowedBy' start
notFollowedBy blankline
- notFollowedBy' (do
- indentSpaces
- many (spaceChar)
- choice [bulletListStart, anyOrderedListStart >> return ()])
+ notFollowedBy' (do indentSpaces
+ many (spaceChar)
+ bulletListStart <|> (anyOrderedListStart >> return ()))
line <- manyTill anyChar newline
- return (line ++ "\n"))
+ return $ line ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = try (do
+rawListItem start = try $ do
start
result <- many1 (listLine start)
blanks <- many blankline
- return ((concat result) ++ blanks))
+ return $ concat result ++ blanks
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation start = try (do
+listContinuation start = try $ do
lookAhead indentSpaces
result <- many1 (listContinuationLine start)
blanks <- many blankline
- return ((concat result) ++ blanks))
+ return $ concat result ++ blanks
-listContinuationLine start = try (do
- notFollowedBy' blankline
+listContinuationLine start = try $ do
+ notFollowedBy blankline
notFollowedBy' start
- option "" indentSpaces
+ optional indentSpaces
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
-listItem start = try (do
+listItem start = try $ do
first <- rawListItem start
continuations <- many (listContinuation start)
-- parsing with ListItemState forces markers at beginning of lines to
@@ -436,18 +399,15 @@ listItem start = try (do
contents <- parseBlocks
setInput rest
updateState (\st -> st {stateParserContext = oldContext})
- return contents)
+ return contents
-orderedList = try (do
+orderedList = do
(start, style, delim) <- lookAhead anyOrderedListStart
items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify items
- return (OrderedList (start, style, delim) items'))
+ return $ OrderedList (start, style, delim) $ compactify items
-bulletList = try (do
- items <- many1 (listItem bulletListStart)
- let items' = compactify items
- return (BulletList items'))
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
-- definition lists
@@ -470,9 +430,9 @@ defRawBlock = try $ do
char ':'
state <- getState
let tabStop = stateTabStop state
- try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"})
+ try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
firstline <- anyLine
- rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine})
+ rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
trailing <- option "" blanklines
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
@@ -488,71 +448,62 @@ definitionList = do
-- paragraph block
--
-para = try (do
+para = try $ do
result <- many1 inline
newline
st <- getState
if stateStrict st
- then choice [lookAhead blockQuote, lookAhead header,
- (do{blanklines; return Null})]
- else choice [(do{lookAhead emacsBoxQuote; return Null}),
- (do{blanklines; return Null})]
- let result' = normalizeSpaces result
- return (Para result'))
-
-plain = do
- result <- many1 inline
- let result' = normalizeSpaces result
- return (Plain result')
+ then choice [ lookAhead blockQuote, lookAhead header,
+ (blanklines >> return Null) ]
+ else choice [ lookAhead emacsBoxQuote >> return Null,
+ (blanklines >> return Null) ]
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- raw html
--
-htmlElement = choice [strictHtmlBlock,
- htmlBlockElement] <?> "html element"
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
htmlBlock = do
st <- getState
if stateStrict st
- then do
- failUnlessBeginningOfLine
- first <- htmlElement
- finalSpace <- many (oneOf spaceChars)
- finalNewlines <- many newline
- return (RawHtml (first ++ finalSpace ++ finalNewlines))
+ then try $ do failUnlessBeginningOfLine
+ first <- htmlElement
+ finalSpace <- many (oneOf spaceChars)
+ finalNewlines <- many newline
+ return $ RawHtml $ first ++ finalSpace ++ finalNewlines
else rawHtmlBlocks
-- True if tag is self-closing
isSelfClosing tag =
isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
-strictHtmlBlock = try (do
+strictHtmlBlock = try $ do
tag <- anyHtmlBlockTag
let tag' = extractTagType tag
if isSelfClosing tag || tag' == "hr"
then return tag
- else do
- contents <- many (do{notFollowedBy' (htmlEndTag tag');
- htmlElement <|> (count 1 anyChar)})
- end <- htmlEndTag tag'
- return $ tag ++ (concat contents) ++ end)
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ (htmlElement <|> (count 1 anyChar)))
+ end <- htmlEndTag tag'
+ return $ tag ++ concat contents ++ end
-rawHtmlBlocks = try (do
+rawHtmlBlocks = try $ do
htmlBlocks <- many1 rawHtmlBlock
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
- let combined' = if (last combined == '\n')
+ let combined' = if not (null combined) && last combined == '\n'
then init combined -- strip extra newline
else combined
- return (RawHtml combined'))
+ return $ RawHtml combined'
--
-- LaTeX
--
-rawLaTeXEnvironment' = do
- failIfStrict
- rawLaTeXEnvironment
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
--
-- Tables
@@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
+dashedLine ch = try $ do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
-- Parse a table header with dashed lines of '-' preceded by
-- one line of text.
-simpleTableHeader = do
- rawContent <- anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeads = tail $ splitByIndices (init indices) rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
- return $ (rawHeads, aligns, indices)
+simpleTableHeader = try $ do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return (rawHeads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter = try $ do
- nonindentSpaces
- many1 (dashedLine '-')
- blanklines
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep = try $ do
- nonindentSpaces
- many1 (dashedLine '-')
- string "\n"
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
-- Parse a raw line and split it into chunks by indices.
rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
- line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
- splitByIndices (init indices) line
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
-tableLine indices = try $ do
- rawline <- rawTableLine indices
- mapM (parseFromString (many plain)) rawline
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow indices = try $ do
- colLines <- many1 (rawTableLine indices)
- option "" blanklines
- let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
+ colLines <- many1 (rawTableLine indices)
+ optional blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
@@ -615,22 +558,22 @@ widthsFromIndices :: Int -- Number of columns on terminal
-> [Float] -- Fractional relative sizes of columns
widthsFromIndices _ [] = []
widthsFromIndices numColumns indices =
- let lengths = zipWith (-) indices (0:indices)
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption = try $ do
- nonindentSpaces
- string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
tableWith headerParser lineParser footerParser = try $ do
@@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
multilineTableHeader = try $ do
- tableSep
- rawContent <- many1 (do{notFollowedBy' tableSep;
- many1Till anyChar newline})
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeadsList = transpose $ map
- (\ln -> tail $ splitByIndices (init indices) ln)
- rawContent
- let rawHeads = map (joinWithSep " ") rawHeadsList
- let aligns = zipWith alignType rawHeadsList lengths
- return $ ((map removeLeadingTrailingSpace rawHeads),
- aligns, indices)
-
--- Returns the longest of a list of strings.
-longest :: [String] -> String
-longest [] = ""
-longest [x] = x
-longest (x:xs) =
- if (length x) >= (maximum $ map length xs)
- then x
- else longest xs
+ tableSep
+ rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (joinWithSep " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -684,19 +616,17 @@ longest (x:xs) =
alignType :: [String] -> Int -> Alignment
alignType [] len = AlignDefault
alignType strLst len =
- let str = longest $ map removeTrailingSpace strLst
- leftSpace = if null str then False else ((str !! 0) `elem` " \t")
- rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
- case (leftSpace, rightSpace) of
+ let str = head $ sortBy (comparing length) $
+ map removeTrailingSpace strLst
+ leftSpace = if null str then False else (str !! 0) `elem` " \t"
+ rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+ in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
- (True, True) -> AlignCenter
+ (True, True) -> AlignCenter
(False, False) -> AlignDefault
-table = do
- failIfStrict
- result <- simpleTable <|> multilineTable <?> "table"
- return result
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
--
-- inline
@@ -704,7 +634,7 @@ table = do
inline = choice [ rawLaTeXInline'
, escapedChar
- , entity
+ , charRef
, note
, inlineNote
, link
@@ -734,80 +664,64 @@ escapedChar = try $ do
result <- if stateStrict state
then oneOf "\\`*_{}[]()>#+-.!~"
else satisfy (not . isAlphaNum)
- return (Str [result])
+ return $ Str [result]
-ltSign = try (do
+ltSign = try $ do
notFollowedBy (noneOf "<") -- continue only if it's a <
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
- return (Str ['<']))
+ return $ Str ['<']
specialCharsMinusLt = filter (/= '<') specialChars
symbol = do
result <- oneOf specialCharsMinusLt
- return (Str [result])
+ return $ Str [result]
-- parses inline code, between n `s and n `s
-code = try (do
+code = try $ do
starts <- many1 (char '`')
let num = length starts
result <- many1Till anyChar (try (count num (char '`')))
-- get rid of any internal newlines
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-mathWord = many1 (choice [ (noneOf " \t\n\\$"),
- (try (do
- c <- char '\\'
- notFollowedBy (char '$')
- return c))])
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+ (try (char '\\') >>~ notFollowedBy (char '$')))
-math = try (do
+math = try $ do
failIfStrict
char '$'
notFollowedBy space
words <- sepBy1 mathWord (many1 space)
char '$'
- return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
+ return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
-emph = do
- result <- choice [ (enclosed (char '*') (char '*') inline),
- (enclosed (char '_') (char '_') inline) ]
- return $ Emph (normalizeSpaces result)
+emph = ((enclosed (char '*') (char '*') inline) <|>
+ (enclosed (char '_') (char '_') inline)) >>=
+ return . Emph . normalizeSpaces
-strong = do
- result <- (enclosed (string "**") (string "**") inline) <|>
- (enclosed (string "__") (string "__") inline)
- return $ Strong (normalizeSpaces result)
+strong = ((enclosed (string "**") (string "**") inline) <|>
+ (enclosed (string "__") (string "__") inline)) >>=
+ return . Strong . normalizeSpaces
-strikeout = do
- failIfStrict
- result <- enclosed (string "~~") (string "~~") inline
- return $ Strikeout (normalizeSpaces result)
+strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>=
+ return . Strikeout . normalizeSpaces
-superscript = do
- failIfStrict
- result <- enclosed (char '^') (char '^')
- (notFollowedBy' whitespace >> inline) -- may not contain Space
- return $ Superscript result
+superscript = failIfStrict >> enclosed (char '^') (char '^')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Superscript
-subscript = do
- failIfStrict
- result <- enclosed (char '~') (char '~')
- (notFollowedBy' whitespace >> inline) -- may not contain Space
- return $ Subscript result
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Subscript
-smartPunctuation = do
- failUnlessSmart
- choice [ quoted, apostrophe, dash, ellipses ]
+smartPunctuation = failUnlessSmart >>
+ choice [ quoted, apostrophe, dash, ellipses ]
-apostrophe = do
- char '\'' <|> char '\8217'
- return Apostrophe
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-quoted = do
- doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted
withQuoteContext context parser = do
oldState <- getState
@@ -820,15 +734,13 @@ withQuoteContext context parser = do
singleQuoted = try $ do
singleQuoteStart
- withQuoteContext InSingleQuote $ do
- result <- many1Till inline singleQuoteEnd
- return $ Quoted SingleQuote $ normalizeSpaces result
+ withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
doubleQuoted = try $ do
doubleQuoteStart
- withQuoteContext InDoubleQuote $ do
- result <- many1Till inline doubleQuoteEnd
- return $ Quoted DoubleQuote $ normalizeSpaces result
+ withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+ return . Quoted DoubleQuote . normalizeSpaces
failIfInQuoteContext context = do
st <- getState
@@ -836,88 +748,65 @@ failIfInQuoteContext context = do
then fail "already inside quotes"
else return ()
-singleQuoteStart = try $ do
+singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|> do
- char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (do -- possessive or contraction
- oneOfStrings ["s","t","m","ve","ll","re"]
- satisfy (not . isAlphaNum)))
- return '\''
-
-singleQuoteEnd = try $ do
- char '\'' <|> char '\8217'
- notFollowedBy alphaNum
-
-doubleQuoteStart = try $ do
- failIfInQuoteContext InDoubleQuote
- char '"' <|> char '\8220'
- notFollowedBy (oneOf " \t\n")
+ char '\8216' <|>
+ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum))) -- possess/contraction
+ return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+ (char '"' <|> char '\8220') >>
+ notFollowedBy (oneOf " \t\n")
doubleQuoteEnd = char '"' <|> char '\8221'
-ellipses = try (do
- oneOfStrings ["...", " . . . ", ". . .", " . . ."]
- return Ellipses)
+ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >>
+ return Ellipses
dash = enDash <|> emDash
-enDash = try (do
- char '-'
- notFollowedBy (noneOf "0123456789")
- return EnDash)
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-emDash = try (do
- skipSpaces
- oneOfStrings ["---", "--"]
- skipSpaces
- return EmDash)
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+ skipSpaces >> return EmDash
-whitespace = do
- many1 (oneOf spaceChars) <?> "whitespace"
- return Space
+whitespace = (many1 (oneOf spaceChars) >> return Space) <?> "whitespace"
-tabchar = do
- tab
- return (Str "\t")
+tabchar = tab >> return (Str "\t")
-- hard line break
-linebreak = try (do
- oneOf spaceChars
- many1 (oneOf spaceChars)
- endline
- return LineBreak )
+linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >>
+ endline >> return LineBreak
nonEndline = satisfy (/='\n')
-entity = do
- ent <- characterEntity
- return $ Str [ent]
-
strChar = noneOf (specialChars ++ spaceChars ++ "\n")
-str = do
- result <- many1 strChar
- return (Str result)
+str = many1 strChar >>= return . Str
-- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
newline
notFollowedBy blankline
st <- getState
if stateStrict st
then do
- notFollowedBy' emailBlockQuoteStart
+ notFollowedBy emailBlockQuoteStart
notFollowedBy (char '#') -- atx header
- notFollowedBy (try (do{manyTill anyChar newline;
- oneOf setextHChars})) -- setext header
+ notFollowedBy (manyTill anyChar newline >> oneOf setextHChars)
+ -- setext header
else return ()
-- parse potential list-starts differently if in a list:
- if (stateParserContext st) == ListItemState
- then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()]
+ if stateParserContext st == ListItemState
+ then notFollowedBy' (bulletListStart <|>
+ (anyOrderedListStart >> return ()))
else return ()
- return Space)
+ return Space
--
-- links
@@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference
-- source for a link, with optional title
source = try $ do
char '('
- option ' ' (char '<')
+ optional (char '<')
src <- many (noneOf ")> \t\n")
- option ' ' (char '>')
+ optional (char '>')
tit <- option "" title
skipSpaces
char ')'
return (removeTrailingSpace src, tit)
-titleWith startChar endChar = try (do
+titleWith startChar endChar = try $ do
leadingSpace <- many1 (oneOf " \t\n")
if length (filter (=='\n') leadingSpace) > 1
then fail "title must be separated by space and on same or next line"
else return ()
char startChar
- tit <- manyTill anyChar (try (do char endChar
- skipSpaces
- notFollowedBy (noneOf ")\n")))
- return $ decodeEntities tit)
+ tit <- manyTill anyChar (try (char endChar >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
title = choice [ titleWith '(' ')',
titleWith '"' '"',
@@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')',
link = choice [explicitLink, referenceLink] <?> "link"
-explicitLink = try (do
+explicitLink = try $ do
label <- reference
src <- source
- return (Link label src))
+ return $ Link label src
-- a link like [this][ref] or [this][] or [this]
referenceLink = try $ do
label <- reference
- ref <- option [] (try (do skipSpaces
- option ' ' newline
- skipSpaces
- reference))
+ ref <- option [] (try (skipSpaces >> optional newline >>
+ skipSpaces >> reference))
let ref' = if null ref then label else ref
state <- getState
case lookupKeySrc (stateKeys state) ref' of
- Nothing -> fail "no corresponding key"
+ Nothing -> fail "no corresponding key"
Just target -> return (Link label target)
autoLink = autoLinkEmail <|> autoLinkRegular
@@ -992,10 +878,10 @@ autoLinkRegular = try $ do
let src = prot ++ rest
return $ Link [Code src] (src, "")
-image = try (do
+image = try $ do
char '!'
(Link label src) <- link
- return (Image label src))
+ return $ Image label src
note = try $ do
failIfStrict
@@ -1003,23 +889,21 @@ note = try $ do
state <- getState
let notes = stateNotes state
case lookup ref notes of
- Nothing -> fail "note not found"
- Just contents -> return (Note contents)
+ Nothing -> fail "note not found"
+ Just contents -> return $ Note contents
inlineNote = try $ do
failIfStrict
char '^'
contents <- inlinesInBalanced "[" "]"
- return (Note [Para contents])
+ return $ Note [Para contents]
-rawLaTeXInline' = do
- failIfStrict
- rawLaTeXInline
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
rawHtmlInline' = do
st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else choice [htmlBlockElement, anyHtmlInlineTag]
- return (HtmlInline result)
+ result <- choice $ if stateStrict st
+ then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ else [htmlBlockElement, anyHtmlInlineTag]
+ return $ HtmlInline result
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a36c33d92..ce8fedf02 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Shared
-import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
-import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
-import Data.Maybe ( fromMaybe )
import Data.List ( findIndex, delete )
-import Data.Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
readRST state str = (readWith parseRST) state (str ++ "\n\n")
--- | Parse a string and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseRST
-
--
-- Constants and data structure definitions
---
@@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-"
-- parsing documents
--
-isAnonKey (ref, src) = (ref == [Str "_"])
-
-isHeader1 :: Block -> Bool
-isHeader1 (Header 1 _) = True
-isHeader1 _ = False
+isAnonKey (ref, src) = ref == [Str "_"]
-isHeader2 :: Block -> Bool
-isHeader2 (Header 2 _) = True
-isHeader2 _ = False
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
@@ -86,23 +73,23 @@ promoteHeaders num [] = []
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
- if (any isHeader1 rest) || (any isHeader2 rest)
+ if (any (isHeader 1) rest) || (any (isHeader 2) rest)
then ((Header 1 head1):(Header 2 head2):rest, [])
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
- if (any isHeader1 rest)
+ if (any (isHeader 1) rest)
then ((Header 1 head1):rest, [])
else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST = do
- -- first pass: get anonymous keys
- refs <- manyTill (referenceKey <|> (do l <- lineClump
- return (LineClump l))) eof
+ -- first pass: get keys
+ refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
+ -- second pass, with keys stripped out
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
- setInput $ concat rawlines -- with keys stripped out
+ setInput $ concat rawlines
updateState (\state -> state { stateKeys = keys })
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -113,7 +100,7 @@ parseRST = do
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
- return (Pandoc (Meta title' authors date) blocks'')
+ return $ Pandoc (Meta title' authors date) blocks''
--
-- parsing blocks
@@ -121,32 +108,39 @@ parseRST = do
parseBlocks = manyTill block eof
-block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
- imageBlock, unknownDirective, header,
- hrule, list, fieldList, lineBlock, para, plain,
- nullBlock ] <?> "block"
+block = choice [ codeBlock
+ , rawHtmlBlock
+ , rawLaTeXBlock
+ , blockQuote
+ , imageBlock
+ , unknownDirective
+ , header
+ , hrule
+ , list
+ , fieldList
+ , lineBlock
+ , para
+ , plain
+ , nullBlock ] <?> "block"
--
-- field list
--
-fieldListItem = try (do
+fieldListItem = try $ do
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
- rest <- many (do
- notFollowedBy (char ':')
- notFollowedBy blankline
- skipSpaces
- manyTill anyChar newline )
- return (name, (joinWithSep " " (first:rest))))
-
-fieldList = try (do
+ rest <- many (notFollowedBy ((char ':') <|> blankline) >>
+ skipSpaces >> manyTill anyChar newline)
+ return $ (name, (joinWithSep " " (first:rest)))
+
+fieldList = try $ do
items <- many1 fieldListItem
blanklines
- let authors = case (lookup "Authors" items) of
+ let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
let date = case (lookup "Date" items) of
@@ -162,82 +156,74 @@ fieldList = try (do
updateState (\st -> st { stateAuthors = authors,
stateDate = date,
stateTitle = title })
- return (BlockQuote result))
+ return $ BlockQuote result
--
-- line block
--
-lineBlockLine = try (do
+lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
- let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
- return line')
+ return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-lineBlock = try (do
+lineBlock = try $ do
lines <- many1 lineBlockLine
blanklines
- return $ Para (concat lines))
+ return $ Para (concat lines)
--
-- paragraph block
--
-para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-codeBlockStart = try (do
- string "::"
- blankline
- blankline)
+codeBlockStart = try $ string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try (do
- result <- many1 (do {notFollowedBy' codeBlockStart; inline})
+paraBeforeCodeBlock = try $ do
+ result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
- return (Para (if (last result == Space)
- then normalizeSpaces result
- else (normalizeSpaces result) ++ [Str ":"])))
+ return $ Para $ if last result == Space
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
-paraNormal = try (do
+paraNormal = try $ do
result <- many1 inline
newline
blanklines
- let result' = normalizeSpaces result
- return (Para result'))
+ return $ Para $ normalizeSpaces result
-plain = do
- result <- many1 inline
- let result' = normalizeSpaces result
- return (Plain result')
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
-imageBlock = try (do
+imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
- return (Plain [Image [Str "image"] (src, "")]))
+ return $ Plain [Image [Str "image"] (src, "")]
--
-- header blocks
--
-header = choice [ doubleHeader, singleHeader ] <?> "header"
+header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader = try (do
+doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
skipSpaces
newline
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
+ txt <- many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else (do {return ()})
+ if (len > lenTop) then fail "title longer than border" else return ()
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
@@ -249,10 +235,10 @@ doubleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
-singleHeader = try (do
+singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
@@ -268,19 +254,19 @@ singleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ return $ Header level (normalizeSpaces txt)
--
-- hrule block
--
-hruleWith chr = try (do
+hruleWith chr = try $ do
count 4 (char chr)
skipMany (char chr)
skipSpaces
newline
blanklines
- return HorizontalRule)
+ return HorizontalRule
hrule = choice (map hruleWith underlineChars) <?> "hrule"
@@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
--
-- read a line indented by a given string
-indentedLine indents = try (do
+indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
--- two or more indented lines, possibly separated by blank lines
--- if variable = True, then any indent will work, but it must be consistent through the block
--- if variable = False, indent should be one tab or equivalent in spaces
-indentedBlock variable = try (do
+-- two or more indented lines, possibly separated by blank lines.
+-- if variable = True, then any indent will work, but it must be
+-- consistent through the block.
+-- if variable = False, indent should be one tab or equivalent in spaces.
+indentedBlock variable = try $ do
state <- getState
let tabStop = stateTabStop state
indents <- if variable
@@ -305,51 +292,47 @@ indentedBlock variable = try (do
else oneOfStrings ["\t", (replicate tabStop ' ')]
firstline <- manyTill anyChar newline
rest <- many (choice [ indentedLine indents,
- try (do
- b <- blanklines
- l <- indentedLine indents
- return (b ++ l))])
- option "" blanklines
- return (firstline ++ "\n" ++ (concat rest)))
-
-codeBlock = try (do
+ try (do b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l))])
+ optional blanklines
+ return $ firstline ++ "\n" ++ concat rest
+
+codeBlock = try $ do
codeBlockStart
result <- indentedBlock False
-- the False means we want one tab stop indent on each line
- return (CodeBlock (stripTrailingNewlines result)))
+ return $ CodeBlock $ stripTrailingNewlines result
--
-- raw html
--
-rawHtmlBlock = try (do
- string ".. raw:: html"
- blanklines
- result <- indentedBlock True
- return (RawHtml result))
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+ indentedBlock True >>= return . RawHtml
--
-- raw latex
--
-rawLaTeXBlock = try (do
+rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
result <- indentedBlock True
- return (Para [(TeX result)]))
+ return $ Para [(TeX result)]
--
-- block quotes
--
-blockQuote = try (do
+blockQuote = try $ do
raw <- indentedBlock True
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ raw ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (BlockQuote contents))
+ return $ BlockQuote contents
--
-- list blocks
@@ -369,15 +352,14 @@ definitionListItem = try $ do
definitionList = try $ do
items <- many1 definitionListItem
- return (DefinitionList items)
+ return $ DefinitionList items
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try (do
+bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
- let len = length (marker:white)
- return len)
+ return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart style delim = try $ do
@@ -386,11 +368,11 @@ orderedListStart style delim = try $ do
return $ markerLen + length white
-- parse a line of a list item
-listLine markerLength = try (do
+listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
- return (line ++ "\n"))
+ return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
indentWith num = do
@@ -399,7 +381,7 @@ indentWith num = do
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
- (try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start = try $ do
@@ -411,19 +393,16 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try (do
+listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
- return (blanks ++ (concat result)))
+ return $ blanks ++ concat result
-listItem start = try (do
+listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (do
- b <- many blankline
- lookAhead start
- return b),
- many1 blankline ] -- whole list must end with blank
+ blanks <- choice [ try (many blankline >>~ lookAhead start),
+ many1 blankline ] -- whole list must end with blank.
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -436,52 +415,44 @@ listItem start = try (do
parsed <- parseBlocks
setInput remaining
updateState (\st -> st {stateParserContext = oldContext})
- return parsed)
+ return parsed
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListMarker
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
- return (OrderedList (start, style, delim) items')
+ return $ OrderedList (start, style, delim) items'
-bulletList = try (do
+bulletList = try $ do
items <- many1 (listItem bulletListStart)
let items' = compactify items
- return (BulletList items'))
+ return $ BulletList items'
--
-- unknown directive (e.g. comment)
--
-unknownDirective = try (do
+unknownDirective = try $ do
string ".. "
manyTill anyChar newline
- many (do
- string " "
- char ':'
- many1 (noneOf "\n:")
- char ':'
- many1 (noneOf "\n")
- newline)
- option "" blanklines
- return Null)
+ many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
+ many1 (noneOf "\n") >> newline)
+ optional blanklines
+ return Null
--
-- reference key
--
-referenceKey = do
- result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
- option "" blanklines
- return result
+referenceKey =
+ choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~
+ optional blanklines
targetURI = try $ do
skipSpaces
- option ' ' newline
- contents <- many1 (try (do many spaceChar
- newline
- many1 spaceChar
- noneOf " \t\n") <|> noneOf "\n")
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return contents
@@ -516,71 +487,73 @@ regularKey = try $ do
-- inline
--
-inline = choice [ superscript, subscript,
- escapedChar, link, image, hyphens, strong, emph, code,
- str, tabchar, whitespace, endline, symbol ] <?> "inline"
-
-hyphens = try (do
+inline = choice [ superscript
+ , subscript
+ , escapedChar
+ , link
+ , image
+ , hyphens
+ , strong
+ , emph
+ , code
+ , str
+ , tabchar
+ , whitespace
+ , endline
+ , symbol ] <?> "inline"
+
+hyphens = try $ do
result <- many1 (char '-')
option Space endline
-- don't want to treat endline after hyphen or dash as a space
- return (Str result))
+ return $ Str result
escapedChar = escaped anyChar
symbol = do
result <- oneOf specialChars
- return (Str [result])
+ return $ Str [result]
-- parses inline code, between codeStart and codeEnd
-code = try (do
+code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-emph = do
- result <- enclosed (char '*') (char '*') inline
- return (Emph (normalizeSpaces result))
+emph = enclosed (char '*') (char '*') inline >>=
+ return . Emph . normalizeSpaces
-strong = do
- result <- enclosed (string "**") (string "**") inline
- return (Strong (normalizeSpaces result))
+strong = enclosed (string "**") (string "**") inline >>=
+ return . Strong . normalizeSpaces
interpreted role = try $ do
- option "" (try $ string "\\ ")
+ optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
nextChar <- lookAhead anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript = interpreted "sup" >>= (return . Superscript)
+
subscript = interpreted "sub" >>= (return . Subscript)
-whitespace = do
- many1 spaceChar <?> "whitespace"
- return Space
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
-tabchar = do
- tab
- return (Str "\t")
+tabchar = tab >> return (Str "\t")
-str = do
- notFollowedBy' oneWordReference
- result <- many1 (noneOf (specialChars ++ "\t\n "))
- return (Str result)
+str = notFollowedBy' oneWordReference >>
+ many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts at beginning of line differently in a list:
st <- getState
if ((stateParserContext st) == ListItemState)
- then do notFollowedBy' anyOrderedListMarker
- notFollowedBy' bulletListStart
- else option () pzero
- return Space)
+ then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart
+ else return ()
+ return Space
--
-- links
@@ -628,10 +601,10 @@ referenceLink = try $ do
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
-uri = try (do
+uri = try $ do
scheme <- uriScheme
identifier <- many1 (noneOf " \t\n")
- return (scheme ++ identifier))
+ return $ scheme ++ identifier
autoURI = try $ do
src <- uri
@@ -639,20 +612,20 @@ autoURI = try $ do
emailChar = alphaNum <|> oneOf "-+_."
-emailAddress = try (do
+emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
- return (addr ++ '@':dom))
+ return $ addr ++ '@':dom
domainChar = alphaNum <|> char '-'
-domain = try (do
+domain = try $ do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
- return (joinWithSep "." (first:dom)))
+ return $ joinWithSep "." (first:dom)
autoEmail = try $ do
src <- emailAddress
@@ -669,5 +642,5 @@ image = try $ do
src <- case lookupKeySrc keyTable ref of
Nothing -> fail "no corresponding key"
Just target -> return target
- return (Image (normalizeSpaces ref) src)
+ return $ Image (normalizeSpaces ref) src
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 587e3891a..31ce1c348 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -32,12 +32,11 @@ module Text.Pandoc.Shared (
splitBy,
splitByIndices,
substitute,
- -- * Text processing
joinWithSep,
+ -- * Text processing
tabsToSpaces,
backslashEscapes,
escapeStringUsing,
- endsWith,
stripTrailingNewlines,
removeLeadingTrailingSpace,
removeLeadingSpace,
@@ -46,8 +45,33 @@ module Text.Pandoc.Shared (
camelCaseToHyphenated,
toRomanNumeral,
-- * Parsing
+ (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
readWith,
testStringWith,
+ ParserState (..),
+ defaultParserState,
Reference (..),
isNoteBlock,
isKeyBlock,
@@ -55,14 +79,10 @@ module Text.Pandoc.Shared (
HeaderType (..),
ParserContext (..),
QuoteContext (..),
- ParserState (..),
NoteTable,
- defaultParserState,
- nullBlock,
- failIfStrict,
- escaped,
- anyOrderedListMarker,
- orderedListMarker,
+ KeyTable,
+ lookupKeySrc,
+ refsMatch,
-- * Native format prettyprinting
prettyPandoc,
-- * Pandoc block and inline list processing
@@ -74,214 +94,387 @@ module Text.Pandoc.Shared (
isHeaderBlock,
-- * Writer options
WriterOptions (..),
- defaultWriterOptions,
- -- * Reference key lookup functions
- KeyTable,
- lookupKeySrc,
- refsMatch,
+ defaultWriterOptions
) where
+
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec
-import Text.Pandoc.ParserCombinators
-import Text.Pandoc.Entities ( decodeEntities, escapeStringForXML )
+import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, chr, isLower, isUpper )
-import Data.List ( find, groupBy, isPrefixOf )
+import Data.List ( find, groupBy, isPrefixOf, isSuffixOf )
--- | Parse a string with a given parser and state.
-readWith :: GenParser Char ParserState a -- ^ parser
- -> ParserState -- ^ initial state
- -> String -- ^ input string
- -> a
-readWith parser state input =
- case runParser parser state "source" input of
- Left err -> error $ "\nError:\n" ++ show err
- Right result -> result
+--
+-- List processing
+--
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) =>
- GenParser Char ParserState a
- -> String
- -> IO ()
-testStringWith parser str = putStrLn $ show $
- readWith parser defaultParserState str
+-- | Split list by groups of one or more sep.
+splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy sep lst =
+ let (first, rest) = break (== sep) lst
+ rest' = dropWhile (== sep) rest
+ in first:(splitBy sep rest')
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ lst = lst
+substitute target replacement lst =
+ if target `isPrefixOf` lst
+ then replacement ++ (substitute target replacement $ drop (length target) lst)
+ else (head lst):(substitute target replacement $ tail lst)
-data QuoteContext
- = InSingleQuote -- ^ Used when we're parsing inside single quotes
- | InDoubleQuote -- ^ Used when we're parsing inside double quotes
- | NoQuote -- ^ Used when we're not parsing inside quotes
- deriving (Eq, Show)
+-- | Joins a list of lists, separated by another list.
+joinWithSep :: [a] -- ^ List to use as separator
+ -> [[a]] -- ^ Lists to join
+ -> [a]
+joinWithSep sep [] = []
+joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-type KeyTable = [([Inline], Target)]
+--
+-- Text processing
+--
-type NoteTable = [(String, [Block])]
+-- | Convert tabs to spaces (with adjustable tab stop).
+tabsToSpaces :: Int -- ^ Tabstop
+ -> String -- ^ String to convert
+ -> String
+tabsToSpaces tabstop str =
+ unlines $ map (tabsInLine tabstop tabstop) (lines str)
--- | References from preliminary parsing
-data Reference
- = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title)
- | NoteBlock String [Block] -- ^ Footnote reference and contents
- | LineClump String -- ^ Raw clump of lines with blanks at end
- deriving (Eq, Read, Show)
+-- | Convert tabs to spaces in one line.
+tabsInLine :: Int -- ^ Number of spaces to next tab stop
+ -> Int -- ^ Tabstop
+ -> String -- ^ Line to convert
+ -> String
+tabsInLine num tabstop [] = ""
+tabsInLine num tabstop (c:cs) =
+ let (replacement, nextnum) = if c == '\t'
+ then (replicate num ' ', tabstop)
+ else if num > 1
+ then ([c], num - 1)
+ else ([c], tabstop)
+ in replacement ++ tabsInLine nextnum tabstop cs
--- | Auxiliary functions used in preliminary parsing
-isNoteBlock :: Reference -> Bool
-isNoteBlock (NoteBlock _ _) = True
-isNoteBlock _ = False
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-isKeyBlock :: Reference -> Bool
-isKeyBlock (KeyBlock _ _) = True
-isKeyBlock _ = False
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing escapeTable [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
-isLineClump :: Reference -> Bool
-isLineClump (LineClump _) = True
-isLineClump _ = False
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
-data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML
- -- and LaTeX?
- stateParserContext :: ParserContext, -- ^ What are we parsing?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ If @True@, parse
- -- bibliographic info
- stateTitle :: [Inline], -- ^ Title of document
- stateAuthors :: [String], -- ^ Authors of document
- stateDate :: String, -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax
- stateSmart :: Bool, -- ^ Use smart typography
- stateColumns :: Int, -- ^ Number of columns in
- -- terminal (used for tables)
- stateHeaderTable :: [HeaderType] -- ^ List of header types used,
- -- in what order (rst only)
- }
- deriving Show
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateKeys = [],
- stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
- stateTitle = [],
- stateAuthors = [],
- stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateColumns = 80,
- stateHeaderTable = [] }
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (`elem` " \n\t")
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x =
+ if x >= 4000 || x < 0
+ then "?"
+ else case x of
+ x | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
+ x | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
+ x | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
+ x | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
+ x | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
+ x | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
+ x | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
+ x | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
+ x | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
+ x | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ x | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
+ x | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ x | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
+ 0 -> ""
+
+--
+-- Parsing
+--
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = try (manyTill anyChar newline) <|> many1 anyChar
+ -- second alternative is for a line ending with eof
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' parser = try $ (do result <- try parser
+ unexpected (show result))
+ <|> return ()
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = oneOf " \t"
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser (try end)
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = try $ do
+ firstChar <- choice [ char (toUpper x), char (toLower x) ]
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = try $ do
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = do
+ lines <- many1 (notFollowedBy blankline >> anyLine)
+ blanks <- blanklines <|> (eof >> return "\n")
+ return $ (unlines lines) ++ blanks
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- manyTill ( (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> (do notFollowedBy' (blankline >> blanklines)
+ count 1 anyChar))
+ (char close)
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- manyTill ( (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> count 1 anyChar)
+ (char close)
+ return $ concat raw
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upper = try $ do
+ let charAnyCase c = char (if upper then toUpper c else c)
+ let one = charAnyCase 'i'
+ let five = charAnyCase 'v'
+ let ten = charAnyCase 'x'
+ let fifty = charAnyCase 'l'
+ let hundred = charAnyCase 'c'
+ let fivehundred = charAnyCase 'd'
+ let thousand = charAnyCase 'm'
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
-- | Parses a character and returns 'Null' (so that the parser can move on
-- if it gets stuck).
nullBlock :: GenParser Char st Block
-nullBlock = do
- anyChar
- return Null
+nullBlock = anyChar >> return Null
-- | Fail if reader is in strict markdown syntax mode.
failIfStrict :: GenParser Char ParserState ()
failIfStrict = do
state <- getState
- if stateStrict state then fail "Strict mode" else return ()
+ if stateStrict state then fail "strict mode" else return ()
-- | Parses backslash, then applies character parser.
escaped :: GenParser Char st Char -- ^ Parser for character to escape
-> GenParser Char st Inline
-escaped parser = try (do
- char '\\'
- result <- parser
- return (Str [result]))
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: GenParser Char st (ListNumberStyle, Int)
upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
+ num <- romanNumeral True
+ return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
lowerRoman :: GenParser Char st (ListNumberStyle, Int)
lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
+ num <- romanNumeral False
+ return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
decimal :: GenParser Char st (ListNumberStyle, Int)
decimal = do
- num <- many1 digit
- return (Decimal, read num)
+ num <- many1 digit
+ return (Decimal, read num)
-- | Parses a '#' returns (DefaultStyle, 1).
defaultNum :: GenParser Char st (ListNumberStyle, Int)
defaultNum = do
- char '#'
- return (DefaultStyle, 1)
+ char '#'
+ return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
upperAlpha :: GenParser Char st (ListNumberStyle, Int)
upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
romanOne :: GenParser Char st (ListNumberStyle, Int)
-romanOne = (do char 'i'
- return (LowerRoman, 1)) <|>
- (do char 'I'
- return (UpperRoman, 1))
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
anyOrderedListMarker :: GenParser Char st ListAttributes
-anyOrderedListMarker = choice $ [delimParser numParser | delimParser <-
- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
inPeriod :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
-- | Parses a list number (num) followed by a paren, returns list attributes.
inOneParen :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
inTwoParens :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
@@ -289,21 +482,158 @@ orderedListMarker :: ListNumberStyle
-> ListNumberDelim
-> GenParser Char st Int
orderedListMarker style delim = do
- let num = case style of
- DefaultStyle -> decimal <|> defaultNum
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, style, delim) <- context num
- return start
+ let num = case style of
+ DefaultStyle -> decimal <|> defaultNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, style, delim) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateKeys = [],
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateColumns = 80,
+ stateHeaderTable = [] }
+
+-- | References from preliminary parsing.
+data Reference
+ = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title)
+ | NoteBlock String [Block] -- ^ Footnote reference and contents
+ | LineClump String -- ^ Raw clump of lines with blanks at end
+ deriving (Eq, Read, Show)
+
+-- | Auxiliary functions used in preliminary parsing.
+isNoteBlock :: Reference -> Bool
+isNoteBlock (NoteBlock _ _) = True
+isNoteBlock _ = False
+
+isKeyBlock :: Reference -> Bool
+isKeyBlock (KeyBlock _ _) = True
+isKeyBlock _ = False
+
+isLineClump :: Reference -> Bool
+isLineClump (LineClump _) = True
+isLineClump _ = False
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, [Block])]
+
+type KeyTable = [([Inline], Target)]
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case find (refsMatch key . fst) table of
+ Nothing -> Nothing
+ Just (_, src) -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+--
+-- Native format prettyprinting
+--
+
-- | Indent string as a block.
indentBy :: Int -- ^ Number of spaces to indent the block
-> Int -- ^ Number of spaces (rel to block) to indent first line
@@ -311,9 +641,10 @@ indentBy :: Int -- ^ Number of spaces to indent the block
-> String
indentBy num first [] = ""
indentBy num first str =
- let (firstLine:restLines) = lines str
- firstLineIndent = num + first in
- (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines)
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines)
-- | Prettyprint list of Pandoc blocks elements.
prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
@@ -321,142 +652,40 @@ prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
-> String
prettyBlockList indent [] = indentBy indent 0 "[]"
prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
- (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
+ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
-- | Prettyprint Pandoc block element.
prettyBlock :: Block -> String
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
(prettyBlockList 2 blocks)
prettyBlock (OrderedList attribs blockLists) =
- "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
- (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
- blockLists)) ++ " ]"
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
prettyBlock (BulletList blockLists) = "BulletList\n" ++
- indentBy 2 0 ("[ " ++ (joinWithSep ", "
- (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+ indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
- indentBy 2 0 ("[" ++ (joinWithSep ",\n"
- (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
- indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
+ indentBy 2 0 ("[" ++ (joinWithSep ",\n"
+ (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
+ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
prettyBlock (Table caption aligns widths header rows) =
- "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
- show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
- (joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
- where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
- (map (\blocks -> prettyBlockList 2 blocks)
- cols))) ++ " ]"
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
prettyBlock block = show block
-- | Prettyprint Pandoc document.
prettyPandoc :: Pandoc -> String
-prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++
- ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
-
--- | Convert tabs to spaces (with adjustable tab stop).
-tabsToSpaces :: Int -- ^ Tabstop
- -> String -- ^ String to convert
- -> String
-tabsToSpaces tabstop str =
- unlines (map (tabsInLine tabstop tabstop) (lines str))
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
--- | Convert tabs to spaces in one line.
-tabsInLine :: Int -- ^ Number of spaces to next tab stop
- -> Int -- ^ Tabstop
- -> String -- ^ Line to convert
- -> String
-tabsInLine num tabstop "" = ""
-tabsInLine num tabstop (c:cs) =
- let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in
- let nextnumraw = (num - (length replacement)) in
- let nextnum = if (nextnumraw < 1)
- then (nextnumraw + tabstop)
- else nextnumraw in
- replacement ++ (tabsInLine nextnum tabstop cs)
-
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char] -- ^ list of special characters to escape
- -> [(Char, String)]
-backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, String)] -> String -> String
-escapeStringUsing escapeTable "" = ""
-escapeStringUsing escapeTable (x:xs) =
- case (lookup x escapeTable) of
- Just str -> str ++ rest
- Nothing -> x:rest
- where rest = escapeStringUsing escapeTable xs
-
--- | Returns @True@ if string ends with given character.
-endsWith :: Char -> [Char] -> Bool
-endsWith char [] = False
-endsWith char str = (char == last str)
-
--- | Joins a list of lists, separated by another list.
-joinWithSep :: [a] -- ^ List to use as separator
- -> [[a]] -- ^ Lists to join
- -> [a]
-joinWithSep sep [] = []
-joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-
--- | Strip trailing newlines from string.
-stripTrailingNewlines :: String -> String
-stripTrailingNewlines "" = ""
-stripTrailingNewlines str =
- if (last str) == '\n'
- then stripTrailingNewlines (init str)
- else str
-
--- | Remove leading and trailing space (including newlines) from string.
-removeLeadingTrailingSpace :: String -> String
-removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
-
--- | Remove leading space (including newlines) from string.
-removeLeadingSpace :: String -> String
-removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') ||
- (x == '\t'))
-
--- | Remove trailing space (including newlines) from string.
-removeTrailingSpace :: String -> String
-removeTrailingSpace = reverse . removeLeadingSpace . reverse
-
--- | Strip leading and trailing characters from string
-stripFirstAndLast :: String -> String
-stripFirstAndLast str =
- drop 1 $ take ((length str) - 1) str
-
--- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-camelCaseToHyphenated :: String -> String
-camelCaseToHyphenated "" = ""
-camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
- a:'-':(toLower b):(camelCaseToHyphenated rest)
-camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
-
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ lst = lst
-substitute target replacement lst =
- if isPrefixOf target lst
- then replacement ++ (substitute target replacement $ drop (length target) lst)
- else (head lst):(substitute target replacement $ tail lst)
-
--- | Split list into groups separated by sep.
-splitBy :: (Eq a) => a -> [a] -> [[a]]
-splitBy _ [] = []
-splitBy sep lst =
- let (first, rest) = break (== sep) lst
- rest' = dropWhile (== sep) rest in
- first:(splitBy sep rest')
-
--- | Split list into chunks divided at specified indices.
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst =
- let (first, rest) = splitAt x lst in
- first:(splitByIndices (map (\y -> y - x) xs) rest)
+--
+-- Pandoc block and inline list processing
+--
-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
@@ -466,8 +695,10 @@ orderedListMarkers (start, numstyle, numdelim) =
seq = case numstyle of
DefaultStyle -> map show [start..]
Decimal -> map show [start..]
- UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z']
- LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z']
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
@@ -477,27 +708,6 @@ orderedListMarkers (start, numstyle, numdelim) =
TwoParens -> "(" ++ str ++ ")"
in map inDelim seq
--- | Convert number < 4000 to uppercase roman numeral.
-toRomanNumeral :: Int -> String
-toRomanNumeral x =
- if x >= 4000 || x < 0
- then "?"
- else case x of
- x | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
- x | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
- x | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
- x | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
- x | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
- x | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
- x | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
- x | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
- x | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
- x | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
- x | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
- x | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
- x | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
- 0 -> ""
-
-- | Normalize a list of inline elements: remove leading and trailing
-- @Space@ elements, collapse double @Space@s into singles, and
-- remove empty Str elements.
@@ -507,16 +717,14 @@ normalizeSpaces list =
let removeDoubles [] = []
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
removeDoubles ((Str ""):rest) = removeDoubles rest
- removeDoubles (x:rest) = x:(removeDoubles rest) in
- let removeLeading [] = []
- removeLeading lst = if ((head lst) == Space)
- then tail lst
- else lst in
- let removeTrailing [] = []
- removeTrailing lst = if ((last lst) == Space)
- then init lst
- else lst in
- removeLeading $ removeTrailing $ removeDoubles list
+ removeDoubles (x:rest) = x:(removeDoubles rest)
+ removeLeading (Space:xs) = removeLeading xs
+ removeLeading x = x
+ removeTrailing [] = []
+ removeTrailing lst = if (last lst == Space)
+ then init lst
+ else lst
+ in removeLeading $ removeTrailing $ removeDoubles list
-- | Change final list item from @Para@ to @Plain@ if the list should
-- be compact.
@@ -524,122 +732,86 @@ compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
-> [[Block]]
compactify [] = []
compactify items =
- let final = last items
- others = init items in
- case final of
- [Para a] -> if any containsPara others
- then items
- else others ++ [[Plain a]]
- otherwise -> items
+ let final = last items
+ others = init items
+ in case final of
+ [Para a] -> if any containsPara others
+ then items
+ else others ++ [[Plain a]]
+ otherwise -> items
containsPara :: [Block] -> Bool
containsPara [] = False
containsPara ((Para a):rest) = True
-containsPara ((BulletList items):rest) = (any containsPara items) ||
- (containsPara rest)
-containsPara ((OrderedList _ items):rest) = (any containsPara items) ||
- (containsPara rest)
+containsPara ((BulletList items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((OrderedList _ items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
+ containsPara rest
containsPara (x:rest) = containsPara rest
-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
| Sec [Inline] [Element] deriving (Eq, Read, Show)
--- | Returns true on Header block with level at least 'level'
+-- | Returns @True@ on Header block with at least the specified level
headerAtLeast :: Int -> Block -> Bool
headerAtLeast level (Header x _) = x <= level
headerAtLeast level _ = False
--- | Convert list of Pandoc blocks into list of Elements (hierarchical)
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize [] = []
hierarchicalize (block:rest) =
case block of
- (Header level title) -> let (thisSection, rest') = break (headerAtLeast
- level) rest in
- (Sec title (hierarchicalize thisSection)):
- (hierarchicalize rest')
- x -> (Blk x):(hierarchicalize rest)
+ (Header level title) ->
+ let (thisSection, rest') = break (headerAtLeast level) rest
+ in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ x -> (Blk x):(hierarchicalize rest)
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
+--
+-- Writer options
+--
+
-- | Options for writers
data WriterOptions = WriterOptions
- { writerStandalone :: Bool -- ^ Include header and footer
- , writerTitlePrefix :: String -- ^ Prefix for HTML titles
- , writerHeader :: String -- ^ Header for the document
- , writerIncludeBefore :: String -- ^ String to include before the body
- , writerIncludeAfter :: String -- ^ String to include after the body
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerS5 :: Bool -- ^ We're writing S5
- , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
- , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js
- , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerIncremental :: Bool -- ^ Incremental S5 lists
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerTabStop :: Int -- ^ Tabstop for conversion between
- -- spaces and tabs
- } deriving Show
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
+ , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ } deriving Show
-- | Default writer options.
defaultWriterOptions =
- WriterOptions { writerStandalone = False,
- writerHeader = "",
- writerTitlePrefix = "",
- writerTabStop = 4,
- writerTableOfContents = False,
- writerS5 = False,
- writerUseASCIIMathML = False,
- writerASCIIMathMLURL = Nothing,
- writerIgnoreNotes = False,
- writerIncremental = False,
- writerNumberSections = False,
- writerIncludeBefore = "",
- writerIncludeAfter = "",
- writerStrictMarkdown = False,
- writerReferenceLinks = False }
-
---
--- code to lookup reference keys in key table
---
-
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> [Inline] -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case table of
- [] -> Nothing
- (k, src):rest -> if (refsMatch k key)
- then Just src
- else lookupKeySrc rest key
-
--- | Returns @True@ if keys match (case insensitive).
-refsMatch :: [Inline] -> [Inline] -> Bool
-refsMatch ((Str x):restx) ((Str y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Emph x):restx) ((Emph y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strong x):restx) ((Strong y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Superscript x):restx) ((Superscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Subscript x):restx) ((Subscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
- t == u && refsMatch x y && refsMatch restx resty
-refsMatch ((Code x):restx) ((Code y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((TeX x):restx) ((TeX y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
-refsMatch [] x = null x
-refsMatch x [] = null x
+ WriterOptions { writerStandalone = False,
+ writerHeader = "",
+ writerTitlePrefix = "",
+ writerTabStop = 4,
+ writerTableOfContents = False,
+ writerS5 = False,
+ writerUseASCIIMathML = False,
+ writerASCIIMathMLURL = Nothing,
+ writerIgnoreNotes = False,
+ writerIncremental = False,
+ writerNumberSections = False,
+ writerIncludeBefore = "",
+ writerIncludeAfter = "",
+ writerStrictMarkdown = False,
+ writerReferenceLinks = False }
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 1f93787b0..13912a9f3 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ConTeXt
- Copyright : Copyright (C) 2006-7 John MacFarlane
+ Copyright : Copyright (C) 2007 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,9 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into ConTeXt.
-}
-module Text.Pandoc.Writers.ConTeXt (
- writeConTeXt
- ) where
+module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
@@ -40,8 +38,7 @@ type WriterState = Int -- number of next URL reference
-- | Convert Pandoc to ConTeXt.
writeConTeXt :: WriterOptions -> Pandoc -> String
-writeConTeXt options document =
- evalState (pandocToConTeXt options document) 1
+writeConTeXt options document = evalState (pandocToConTeXt options document) 1
pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
pandocToConTeXt options (Pandoc meta blocks) = do
@@ -111,8 +108,8 @@ stringToConTeXt = concatMap escapeCharForConTeXt
-- | Convert Pandoc block element to ConTeXt.
blockToConTeXt :: Block -> State WriterState String
blockToConTeXt Null = return ""
-blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= (return . (++ "\n"))
-blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= (return . (++ "\n\n"))
+blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
+blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
blockToConTeXt (BlockQuote lst) = do
contents <- blockListToConTeXt lst
return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
@@ -137,12 +134,12 @@ blockToConTeXt (OrderedList attribs lst) = case attribs of
return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
"\\stopitemize\n"
blockToConTeXt (DefinitionList lst) =
- mapM defListItemToConTeXt lst >>= (return . (++ "\n") . concat)
+ mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
blockToConTeXt (Header level lst) = do
contents <- inlineListToConTeXt lst
- return $ if (level > 0) && (level <= 3)
- then "\\" ++ (concat (replicate (level - 1) "sub")) ++
+ return $ if level > 0 && level <= 3
+ then "\\" ++ concat (replicate (level - 1) "sub") ++
"section{" ++ contents ++ "}\n\n"
else contents ++ "\n\n"
blockToConTeXt (Table caption aligns widths heads rows) = do
@@ -186,12 +183,12 @@ defListItemToConTeXt (term, def) = do
-- | Convert list of block elements to ConTeXt.
blockListToConTeXt :: [Block] -> State WriterState String
-blockListToConTeXt lst = mapM blockToConTeXt lst >>= (return . concat)
+blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
-- | Convert list of inline elements to ConTeXt.
inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
-> State WriterState String
-inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= (return . concat)
+inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index ecd27ee0c..e34b1959c 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -30,16 +30,35 @@ Conversion of 'Pandoc' documents to Docbook XML.
module Text.Pandoc.Writers.Docbook ( writeDocbook) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( escapeStringForXML )
-import Data.Char ( toLower, ord )
-import Data.List ( isPrefixOf, partition, drop )
+import Data.List ( isPrefixOf, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
-
--
-- code to format XML
--
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '\160' -> "&nbsp;"
+ c -> [c]
+
+-- | True if the character needs to be escaped.
+needsEscaping :: Char -> Bool
+needsEscaping c = c `elem` "&<>\"\160"
+
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML "" = ""
+escapeStringForXML str =
+ case break needsEscaping str of
+ (okay, "") -> okay
+ (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
+
-- | Return a text object with a string of formatted XML attributes.
attributeList :: [(String, String)] -> Doc
attributeList = text . concatMap
@@ -52,10 +71,10 @@ inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
inTags isIndented tagType attribs contents =
let openTag = char '<' <> text tagType <> attributeList attribs <>
char '>'
- closeTag = text "</" <> text tagType <> char '>' in
- if isIndented
- then openTag $$ nest 2 contents $$ closeTag
- else openTag <> contents <> closeTag
+ closeTag = text "</" <> text tagType <> char '>'
+ in if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
-- | Return a self-closing tag of tagType with specified attributes
selfClosingTag :: String -> [(String, String)] -> Doc
@@ -79,42 +98,42 @@ authorToDocbook :: [Char] -> Doc
authorToDocbook name = inTagsIndented "author" $
if ',' `elem` name
then -- last name first
- let (lastname, rest) = break (==',') name
- firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ let (lastname, rest) = break (==',') name
+ firstname = removeLeadingSpace rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
else -- last name last
- let namewords = words name
- lengthname = length namewords
- (firstname, lastname) = case lengthname of
- 0 -> ("","")
- 1 -> ("", name)
- n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (joinWithSep " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
writeDocbook opts (Pandoc (Meta title authors date) blocks) =
- let head = if (writerStandalone opts)
- then text (writerHeader opts)
- else empty
- meta = if (writerStandalone opts)
- then inTagsIndented "articleinfo" $
- (inTagsSimple "title" (wrap opts title)) $$
- (vcat (map authorToDocbook authors)) $$
- (inTagsSimple "date" (text $ escapeStringForXML date))
- else empty
+ let head = if writerStandalone opts
+ then text (writerHeader opts)
+ else empty
+ meta = if writerStandalone opts
+ then inTagsIndented "articleinfo" $
+ (inTagsSimple "title" (wrap opts title)) $$
+ (vcat (map authorToDocbook authors)) $$
+ (inTagsSimple "date" (text $ escapeStringForXML date))
+ else empty
elements = hierarchicalize blocks
- before = writerIncludeBefore opts
- after = writerIncludeAfter opts
- body = (if null before then empty else text before) $$
- vcat (map (elementToDocbook opts) elements) $$
- (if null after then empty else text after)
- body' = if writerStandalone opts
- then inTagsIndented "article" (meta $$ body)
- else body in
- render $ head $$ body' $$ text ""
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
+ body = (if null before then empty else text before) $$
+ vcat (map (elementToDocbook opts) elements) $$
+ (if null after then empty else text after)
+ body' = if writerStandalone opts
+ then inTagsIndented "article" (meta $$ body)
+ else body
+ in render $ head $$ body' $$ text ""
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
@@ -123,10 +142,10 @@ elementToDocbook opts (Sec title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
- else elements in
- inTagsIndented "section" $
- inTagsSimple "title" (wrap opts title) $$
- vcat (map (elementToDocbook opts) elements')
+ else elements
+ in inTagsIndented "section" $
+ inTagsSimple "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
@@ -145,30 +164,27 @@ deflistItemsToDocbook opts items =
-- | Convert a term and a list of blocks into a Docbook varlistentry.
deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
deflistItemToDocbook opts term def =
- let def' = map plainToPara def in
- inTagsIndented "varlistentry" $
- inTagsIndented "term" (inlinesToDocbook opts term) $$
- inTagsIndented "listitem" (blocksToDocbook opts def')
+ let def' = map plainToPara def
+ in inTagsIndented "varlistentry" $
+ inTagsIndented "term" (inlinesToDocbook opts term) $$
+ inTagsIndented "listitem" (blocksToDocbook opts def')
-- | Convert a list of lists of blocks to a list of Docbook list items.
listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
-listItemsToDocbook opts items =
- vcat $ map (listItemToDocbook opts) items
+listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
-- | Convert a list of blocks into a Docbook list item.
listItemToDocbook :: WriterOptions -> [Block] -> Doc
listItemToDocbook opts item =
- let item' = map plainToPara item in
- inTagsIndented "listitem" (blocksToDocbook opts item')
+ inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
-- | Convert a Pandoc block element to Docbook.
blockToDocbook :: WriterOptions -> Block -> Doc
blockToDocbook opts Null = empty
blockToDocbook opts (Plain lst) = wrap opts lst
-blockToDocbook opts (Para lst) =
- inTagsIndented "para" (wrap opts lst)
+blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
blockToDocbook opts (BlockQuote blocks) =
- inTagsIndented "blockquote" (blocksToDocbook opts blocks)
+ inTagsIndented "blockquote" $ blocksToDocbook opts blocks
blockToDocbook opts (CodeBlock str) =
text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
blockToDocbook opts (BulletList lst) =
@@ -198,16 +214,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) =
then empty
else inTagsIndented "caption"
(inlinesToDocbook opts caption)
- tableType = if isEmpty captionDoc then "informaltable" else "table" in
- inTagsIndented tableType $ captionDoc $$
- (colHeadsToDocbook opts alignStrings widths headers) $$
- (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+ tableType = if isEmpty captionDoc then "informaltable" else "table"
+ in inTagsIndented tableType $ captionDoc $$
+ (colHeadsToDocbook opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToDocbook opts alignStrings) rows)
colHeadsToDocbook opts alignStrings widths headers =
- let heads = zipWith3
- (\align width item -> tableItemToDocbook opts "th" align width item)
- alignStrings widths headers in
- inTagsIndented "tr" $ vcat heads
+ let heads = zipWith3 (\align width item ->
+ tableItemToDocbook opts "th" align width item)
+ alignStrings widths headers
+ in inTagsIndented "tr" $ vcat heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
@@ -215,20 +231,16 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
-tableRowToDocbook opts aligns cols =
- inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
+ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
tableItemToDocbook opts tag align width item =
let attrib = [("align", align)] ++
- if (width /= 0)
- then [("style", "{width: " ++
- show (truncate (100*width)) ++ "%;}")]
- else [] in
- inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-
--- | Put string in CDATA section
-cdata :: String -> Doc
-cdata str = text $ "<![CDATA[" ++ str ++ "]]>"
+ if width /= 0
+ then [("style", "{width: " ++
+ show (truncate (100*width)) ++ "%;}")]
+ else []
+ in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-- | Take list of inline elements and return wrapped doc.
wrap :: WriterOptions -> [Inline] -> Doc
@@ -236,25 +248,24 @@ wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
-inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
+inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
inlineToDocbook opts (Str str) = text $ escapeStringForXML str
inlineToDocbook opts (Emph lst) =
- inTagsSimple "emphasis" (inlinesToDocbook opts lst)
+ inTagsSimple "emphasis" $ inlinesToDocbook opts lst
inlineToDocbook opts (Strong lst) =
- inTags False "emphasis" [("role", "strong")]
- (inlinesToDocbook opts lst)
+ inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
inlineToDocbook opts (Strikeout lst) =
- inTags False "emphasis" [("role", "strikethrough")]
- (inlinesToDocbook opts lst)
+ inTags False "emphasis" [("role", "strikethrough")] $
+ inlinesToDocbook opts lst
inlineToDocbook opts (Superscript lst) =
- inTagsSimple "superscript" (inlinesToDocbook opts lst)
+ inTagsSimple "superscript" $ inlinesToDocbook opts lst
inlineToDocbook opts (Subscript lst) =
- inTagsSimple "subscript" (inlinesToDocbook opts lst)
+ inTagsSimple "subscript" $ inlinesToDocbook opts lst
inlineToDocbook opts (Quoted _ lst) =
- inTagsSimple "quote" (inlinesToDocbook opts lst)
+ inTagsSimple "quote" $ inlinesToDocbook opts lst
inlineToDocbook opts Apostrophe = char '\''
inlineToDocbook opts Ellipses = text "&#8230;"
inlineToDocbook opts EmDash = text "&#8212;"
@@ -263,26 +274,24 @@ inlineToDocbook opts (Code str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
inlineToDocbook opts (HtmlInline str) = empty
-inlineToDocbook opts LineBreak =
- text $ "<literallayout></literallayout>"
+inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
inlineToDocbook opts Space = char ' '
inlineToDocbook opts (Link txt (src, tit)) =
if isPrefixOf "mailto:" src
- then let src' = drop 7 src
- emailLink = inTagsSimple "email" $ text (escapeStringForXML $ src')
- in if txt == [Code src']
- then emailLink
- else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
- char ')'
- else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
+ then let src' = drop 7 src
+ emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ src'
+ in if txt == [Code src']
+ then emailLink
+ else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
+ char ')'
+ else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
inlineToDocbook opts (Image alt (src, tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
- inTagsIndented "title"
- (text $ escapeStringForXML tit) in
- inTagsIndented "inlinemediaobject" $
- inTagsIndented "imageobject" $
- titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 34c59f334..ace5cfe5f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -27,15 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to HTML.
-}
-module Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) where
+module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
import Text.Pandoc.ASCIIMathML
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
-import Text.Pandoc.Entities (decodeEntities)
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, partition, intersperse )
+import Data.List ( isPrefixOf, intersperse )
import qualified Data.Set as S
import Control.Monad.State
import Text.XHtml.Transitional
@@ -55,8 +55,8 @@ defaultWriterState = WriterState {stNotes= [], stIds = [],
writeHtmlString :: WriterOptions -> Pandoc -> String
writeHtmlString opts =
if writerStandalone opts
- then renderHtml . (writeHtml opts)
- else renderHtmlFragment . (writeHtml opts)
+ then renderHtml . writeHtml opts
+ else renderHtmlFragment . writeHtml opts
-- | Convert Pandoc document to Html structure.
writeHtml :: WriterOptions -> Pandoc -> Html
@@ -74,49 +74,51 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
map (\a -> meta ! [name "author", content a]) authors) +++
(if null date
then noHtml
- else meta ! [name "date", content date])
- titleHeader = if (writerStandalone opts) && (not (null tit)) &&
- (not (writerS5 opts))
+ else meta ! [name "date", content date])
+ titleHeader = if writerStandalone opts && not (null tit) &&
+ not (writerS5 opts)
then h1 ! [theclass "title"] $ topTitle
else noHtml
headerBlocks = filter isHeaderBlock blocks
- ids = uniqueIdentifiers $ map (\(Header _ lst) -> lst) headerBlocks
- toc = if writerTableOfContents opts
- then tableOfContents opts headerBlocks ids
- else noHtml
+ ids = uniqueIdentifiers $
+ map (\(Header _ lst) -> lst) headerBlocks
+ toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks ids
+ else noHtml
(blocks', newstate) =
- runState (blockListToHtml opts blocks)
- (defaultWriterState {stIds = ids})
- cssLines = stCSS newstate
- css = if S.null cssLines
- then noHtml
- else style ! [thetype "text/css"] $ primHtml $
- '\n':(unlines $ S.toList cssLines)
- math = if stMath newstate
- then case writerASCIIMathMLURL opts of
- Just path -> script ! [src path,
- thetype "text/javascript"] $ noHtml
- Nothing -> primHtml asciiMathMLScript
- else noHtml
- head = header $ metadata +++ math +++ css +++
- primHtml (writerHeader opts)
- notes = reverse (stNotes newstate)
- before = primHtml $ writerIncludeBefore opts
- after = primHtml $ writerIncludeAfter opts
- thebody = before +++ titleHeader +++ toc +++ blocks' +++
- footnoteSection opts notes +++ after
+ runState (blockListToHtml opts blocks)
+ (defaultWriterState {stIds = ids})
+ cssLines = stCSS newstate
+ css = if S.null cssLines
+ then noHtml
+ else style ! [thetype "text/css"] $ primHtml $
+ '\n':(unlines $ S.toList cssLines)
+ math = if stMath newstate
+ then case writerASCIIMathMLURL opts of
+ Just path -> script ! [src path,
+ thetype "text/javascript"] $
+ noHtml
+ Nothing -> primHtml asciiMathMLScript
+ else noHtml
+ head = header $ metadata +++ math +++ css +++
+ primHtml (writerHeader opts)
+ notes = reverse (stNotes newstate)
+ before = primHtml $ writerIncludeBefore opts
+ after = primHtml $ writerIncludeAfter opts
+ thebody = before +++ titleHeader +++ toc +++ blocks' +++
+ footnoteSection opts notes +++ after
in if writerStandalone opts
- then head +++ (body thebody)
+ then head +++ body thebody
else thebody
-- | Construct table of contents from list of header blocks and identifiers.
-- Assumes there are as many identifiers as header blocks.
tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
tableOfContents opts headers ids =
- let opts' = opts { writerIgnoreNotes = True }
+ let opts' = opts { writerIgnoreNotes = True }
contentsTree = hierarchicalize headers
- contents = evalState (mapM (elementToListItem opts') contentsTree)
- (defaultWriterState {stIds = ids})
+ contents = evalState (mapM (elementToListItem opts') contentsTree)
+ (defaultWriterState {stIds = ids})
in thediv ! [identifier "toc"] $ unordList contents
-- | Converts an Element to a list item for a table of contents,
@@ -135,7 +137,8 @@ elementToListItem opts (Sec headerText subsecs) = do
let subList = if null subHeads
then noHtml
else unordList subHeads
- return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ subList
+ return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
+ subList
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
@@ -143,62 +146,61 @@ footnoteSection :: WriterOptions -> [Html] -> Html
footnoteSection opts notes =
if null notes
then noHtml
- else thediv ! [theclass "footnotes"] $
- hr +++ (olist << notes)
+ else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
-- | Obfuscate a "mailto:" link using Javascript.
obfuscateLink :: WriterOptions -> String -> String -> Html
obfuscateLink opts text src =
let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
- src' = map toLower src in
- case (matchRegex emailRegex src') of
- (Just [name, domain]) ->
- let domain' = substitute "." " dot " domain
- at' = obfuscateChar '@'
- (linkText, altText) =
- if text == drop 7 src' -- autolink
- then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
- else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
- domain' ++ ")") in
- if writerStrictMarkdown opts
- then -- need to use primHtml or &'s are escaped to &amp; in URL
- primHtml $ "<a href=\"" ++ (obfuscateString src')
- ++ "\">" ++ (obfuscateString text) ++ "</a>"
- else (script ! [thetype "text/javascript"] $
- primHtml ("\n<!--\nh='" ++
- obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
- obfuscateString name ++ "';e=n+a+h;\n" ++
- "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
- linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
- noscript (primHtml $ obfuscateString altText)
- _ -> anchor ! [href src] $ primHtml text -- malformed email
+ src' = map toLower src
+ in case (matchRegex emailRegex src') of
+ (Just [name, domain]) ->
+ let domain' = substitute "." " dot " domain
+ at' = obfuscateChar '@'
+ (linkText, altText) =
+ if text == drop 7 src' -- autolink
+ then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
+ else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
+ domain' ++ ")")
+ in if writerStrictMarkdown opts
+ then -- need to use primHtml or &'s are escaped to &amp; in URL
+ primHtml $ "<a href=\"" ++ (obfuscateString src')
+ ++ "\">" ++ (obfuscateString text) ++ "</a>"
+ else (script ! [thetype "text/javascript"] $
+ primHtml ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ obfuscateString name ++ "';e=n+a+h;\n" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
+ noscript (primHtml $ obfuscateString altText)
+ _ -> anchor ! [href src] $ primHtml text -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
obfuscateChar char =
- let num = ord char in
- let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
- "&#" ++ numstr ++ ";"
+ let num = ord char
+ numstr = if even num then show num else "x" ++ showHex num ""
+ in "&#" ++ numstr ++ ";"
-- | Obfuscate string using entities.
obfuscateString :: String -> String
-obfuscateString = (concatMap obfuscateChar) . decodeEntities
+obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
-- | True if character is a punctuation character (unicode).
isPunctuation :: Char -> Bool
isPunctuation c =
- let c' = ord c in
- if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
- (c' >= 0xE000 && c' <= 0xE0FF)
- then True
- else False
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
-- | Add CSS for document header.
addToCSS :: String -> State WriterState ()
addToCSS item = do
st <- get
let current = stCSS st
- put $ st {stCSS = (S.insert item current)}
+ put $ st {stCSS = S.insert item current}
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
@@ -206,27 +208,26 @@ inlineListToIdentifier [] = ""
inlineListToIdentifier (x:xs) =
xAsText ++ inlineListToIdentifier xs
where xAsText = case x of
- Str s -> filter
- (\c -> (c == '-') || not (isPunctuation c)) $
- concat $ intersperse "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier lst
- Strikeout lst -> inlineListToIdentifier lst
- Superscript lst -> inlineListToIdentifier lst
- Subscript lst -> inlineListToIdentifier lst
- Strong lst -> inlineListToIdentifier lst
- Quoted _ lst -> inlineListToIdentifier lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier lst
- Image lst _ -> inlineListToIdentifier lst
- Note _ -> ""
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier lst
+ Strikeout lst -> inlineListToIdentifier lst
+ Superscript lst -> inlineListToIdentifier lst
+ Subscript lst -> inlineListToIdentifier lst
+ Strong lst -> inlineListToIdentifier lst
+ Quoted _ lst -> inlineListToIdentifier lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier lst
+ Image lst _ -> inlineListToIdentifier lst
+ Note _ -> ""
-- | Return unique identifiers for list of inline lists.
uniqueIdentifiers :: [[Inline]] -> [String]
@@ -236,102 +237,99 @@ uniqueIdentifiers ls =
matches = length $ filter (== new) nonuniqueIds
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
in (new:nonuniqueIds, new':uniqueIds)
- in reverse $ snd (foldl addIdentifier ([],[]) $ ls)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
-blockToHtml opts block =
- case block of
- (Null) -> return $ noHtml
- (Plain lst) -> inlineListToHtml opts lst
- (Para lst) -> inlineListToHtml opts lst >>= (return . paragraph)
- (RawHtml str) -> return $ primHtml str
- (HorizontalRule) -> return $ hr
- (CodeBlock str) -> return $ pre $ thecode << (str ++ "\n")
+blockToHtml opts Null = return $ noHtml
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
+blockToHtml opts (RawHtml str) = return $ primHtml str
+blockToHtml opts (HorizontalRule) = return $ hr
+blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
-- the final \n for consistency with Markdown.pl
- (BlockQuote blocks) -> -- in S5, treat list in blockquote specially
- -- if default is incremental, make it nonincremental;
- -- otherwise incremental
- if writerS5 opts
- then let inc = not (writerIncremental opts) in
- case blocks of
- [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
- (BulletList lst)
- [OrderedList attribs lst] ->
- blockToHtml (opts {writerIncremental = inc})
- (OrderedList attribs lst)
- otherwise -> blockListToHtml opts blocks >>=
- (return . blockquote)
- else blockListToHtml opts blocks >>= (return . blockquote)
- (Header level lst) -> do contents <- inlineListToHtml opts lst
- st <- get
- let ids = stIds st
- let (id, rest) = if null ids
- then ("", [])
- else (head ids, tail ids)
- put $ st {stIds = rest}
- let attribs = [identifier id]
- let headerHtml = case level of
- 1 -> h1 contents ! attribs
- 2 -> h2 contents ! attribs
- 3 -> h3 contents ! attribs
- 4 -> h4 contents ! attribs
- 5 -> h5 contents ! attribs
- 6 -> h6 contents ! attribs
- _ -> paragraph contents ! attribs
- let headerHtml' = if writerTableOfContents opts
- then anchor ! [href ("#TOC-" ++ id)] $
- headerHtml
- else headerHtml
- return headerHtml'
- (BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ unordList ! attribs $ contents
- (OrderedList (startnum, numstyle, _) lst) -> do
- contents <- mapM (blockListToHtml opts) lst
- let numstyle' = camelCaseToHyphenated $ show numstyle
- let attribs = (if writerIncremental opts
- then [theclass "incremental"]
- else []) ++
- (if startnum /= 1
- then [start startnum]
- else []) ++
- (if numstyle /= DefaultStyle
- then [theclass numstyle']
- else [])
- if numstyle /= DefaultStyle
- then addToCSS $ "ol." ++ numstyle' ++
- " { list-style-type: " ++
- numstyle' ++ "; }"
- else return ()
- return $ ordList ! attribs $ contents
- (DefinitionList lst) -> do contents <- mapM (\(term, def) ->
- do term' <- inlineListToHtml opts term
- def' <- blockListToHtml opts def
- return $ (term', def'))
- lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ defList ! attribs $ contents
- (Table capt aligns widths headers rows) ->
- do let alignStrings = map alignmentToString aligns
- captionDoc <- if null capt
- then return noHtml
- else inlineListToHtml opts capt >>=
- (return . caption)
- colHeads <- colHeadsToHtml opts alignStrings
- widths headers
- rows' <- mapM (tableRowToHtml opts alignStrings) rows
- return $ table $ captionDoc +++ colHeads +++ rows'
+blockToHtml opts (BlockQuote blocks) =
+ -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ if writerS5 opts
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList attribs lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (OrderedList attribs lst)
+ otherwise -> blockListToHtml opts blocks >>=
+ (return . blockquote)
+ else blockListToHtml opts blocks >>= (return . blockquote)
+blockToHtml opts (Header level lst) = do
+ contents <- inlineListToHtml opts lst
+ st <- get
+ let ids = stIds st
+ let (id, rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ let attribs = [identifier id]
+ let headerHtml = case level of
+ 1 -> h1 contents ! attribs
+ 2 -> h2 contents ! attribs
+ 3 -> h3 contents ! attribs
+ 4 -> h4 contents ! attribs
+ 5 -> h5 contents ! attribs
+ 6 -> h6 contents ! attribs
+ _ -> paragraph contents ! attribs
+ return $ if writerTableOfContents opts
+ then anchor ! [href ("#TOC-" ++ id)] $ headerHtml
+ else headerHtml
+blockToHtml opts (BulletList lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ unordList ! attribs $ contents
+blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ let attribs = (if writerIncremental opts
+ then [theclass "incremental"]
+ else []) ++
+ (if startnum /= 1
+ then [start startnum]
+ else []) ++
+ (if numstyle /= DefaultStyle
+ then [theclass numstyle']
+ else [])
+ if numstyle /= DefaultStyle
+ then addToCSS $ "ol." ++ numstyle' ++
+ " { list-style-type: " ++
+ numstyle' ++ "; }"
+ else return ()
+ return $ ordList ! attribs $ contents
+blockToHtml opts (DefinitionList lst) = do
+ contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
+ def' <- blockListToHtml opts def
+ return $ (term', def')) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ defList ! attribs $ contents
+blockToHtml opts (Table capt aligns widths headers rows) = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return noHtml
+ else inlineListToHtml opts capt >>= return . caption
+ colHeads <- colHeadsToHtml opts alignStrings
+ widths headers
+ rows' <- mapM (tableRowToHtml opts alignStrings) rows
+ return $ table $ captionDoc +++ colHeads +++ rows'
-colHeadsToHtml opts alignStrings widths headers =
- do heads <- sequence $ zipWith3
- (\align width item -> tableItemToHtml opts th align width item)
- alignStrings widths headers
- return $ tr $ toHtmlFromList heads
+colHeadsToHtml opts alignStrings widths headers = do
+ heads <- sequence $ zipWith3
+ (\align width item -> tableItemToHtml opts th align width item)
+ alignStrings widths headers
+ return $ tr $ toHtmlFromList heads
alignmentToString alignment = case alignment of
AlignLeft -> "left"
@@ -339,24 +337,27 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
-tableRowToHtml opts aligns cols =
- do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols
- return $ tr $ toHtmlFromList contents
+tableRowToHtml opts aligns cols =
+ (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
+ return . tr . toHtmlFromList
-tableItemToHtml opts tag align' width item =
- do contents <- blockListToHtml opts item
- let attrib = [align align'] ++
- if (width /= 0)
- then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")]
- else []
- return $ tag ! attrib $ contents
+tableItemToHtml opts tag align' width item = do
+ contents <- blockListToHtml opts item
+ let attrib = [align align'] ++
+ if width /= 0
+ then [thestyle ("{width: " ++ show (truncate (100*width)) ++
+ "%;}")]
+ else []
+ return $ tag ! attrib $ contents
blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
-blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList)
+blockListToHtml opts lst =
+ mapM (blockToHtml opts) lst >>= return . toHtmlFromList
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
-inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList)
+inlineListToHtml opts lst =
+ mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
@@ -369,52 +370,58 @@ inlineToHtml opts inline =
(EnDash) -> return $ primHtmlChar "ndash"
(Ellipses) -> return $ primHtmlChar "hellip"
(Apostrophe) -> return $ primHtmlChar "rsquo"
- (Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize)
- (Strong lst) -> inlineListToHtml opts lst >>= (return . strong)
+ (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
+ (Strong lst) -> inlineListToHtml opts lst >>= return . strong
(Code str) -> return $ thecode << str
- (Strikeout lst) -> addToCSS ".strikeout { text-decoration: line-through; }" >>
+ (Strikeout lst) -> addToCSS
+ ".strikeout { text-decoration: line-through; }" >>
inlineListToHtml opts lst >>=
- (return . (thespan ! [theclass "strikeout"]))
- (Superscript lst) -> inlineListToHtml opts lst >>= (return . sup)
- (Subscript lst) -> inlineListToHtml opts lst >>= (return . sub)
+ return . (thespan ! [theclass "strikeout"])
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
(Quoted quoteType lst) ->
let (leftQuote, rightQuote) = case quoteType of
SingleQuote -> (primHtmlChar "lsquo",
primHtmlChar "rsquo")
DoubleQuote -> (primHtmlChar "ldquo",
- primHtmlChar "rdquo") in
- do contents <- inlineListToHtml opts lst
- return $ leftQuote +++ contents +++ rightQuote
- (TeX str) -> do if writerUseASCIIMathML opts
- then modify (\st -> st {stMath = True})
- else return ()
- return $ stringToHtml str
+ primHtmlChar "rdquo")
+ in do contents <- inlineListToHtml opts lst
+ return $ leftQuote +++ contents +++ rightQuote
+ (TeX str) -> (if writerUseASCIIMathML opts
+ then modify (\st -> st {stMath = True})
+ else return ()) >> return (stringToHtml str)
(HtmlInline str) -> return $ primHtml str
(Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
- do return $ obfuscateLink opts str src
- (Link txt (src,tit)) | "mailto:" `isPrefixOf` src ->
- do linkText <- inlineListToHtml opts txt
- return $ obfuscateLink opts (show linkText) src
- (Link txt (src,tit)) ->
- do linkText <- inlineListToHtml opts txt
- return $ anchor ! ([href src] ++
- if null tit then [] else [title tit]) $ linkText
- (Image txt (source,tit)) ->
- do alternate <- inlineListToHtml opts txt
- let alternate' = renderHtmlFragment alternate
- let attributes = [src source, title tit] ++
- if null txt then [] else [alt alternate']
- return $ image ! attributes
- -- note: null title included, as in Markdown.pl
- (Note contents) -> do st <- get
- let notes = stNotes st
- let number = (length notes) + 1
- let ref = show number
- htmlContents <- blockListToNote opts ref contents
- put $ st {stNotes = (htmlContents:notes)} -- push contents onto front of notes
- return $ anchor ! [href ("#fn" ++ ref),
- theclass "footnoteRef",
- identifier ("fnref" ++ ref)] << sup << ref
+ return $ obfuscateLink opts str src
+ (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
+ linkText <- inlineListToHtml opts txt
+ return $ obfuscateLink opts (show linkText) src
+ (Link txt (src,tit)) -> do
+ linkText <- inlineListToHtml opts txt
+ return $ anchor ! ([href src] ++
+ if null tit then [] else [title tit]) $
+ linkText
+ (Image txt (source,tit)) -> do
+ alternate <- inlineListToHtml opts txt
+ let alternate' = renderHtmlFragment alternate
+ let attributes = [src source, title tit] ++
+ if null txt
+ then []
+ else [alt alternate']
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
+ st <- get
+ let notes = stNotes st
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ -- push contents onto front of notes
+ put $ st {stNotes = (htmlContents:notes)}
+ return $ anchor ! [href ("#fn" ++ ref),
+ theclass "footnoteRef",
+ identifier ("fnref" ++ ref)] <<
+ sup << ref
blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
blockListToNote opts ref blocks =
@@ -434,6 +441,6 @@ blockListToNote opts ref blocks =
[Plain (lst ++ backlink)]
_ -> otherBlocks ++ [lastBlock,
Plain backlink]
- in do contents <- blockListToHtml opts blocks'
- return $ li ! [identifier ("fn" ++ ref)] $ contents
+ in do contents <- blockListToHtml opts blocks'
+ return $ li ! [identifier ("fn" ++ ref)] $ contents
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 3d0c66e45..ad1f3e45f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -27,16 +27,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into LaTeX.
-}
-module Text.Pandoc.Writers.LaTeX (
- writeLaTeX
- ) where
+module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
import Data.List ( (\\), isInfixOf )
+import Data.Char ( toLower )
import qualified Data.Set as S
import Control.Monad.State
-import Data.Char ( toLower )
data WriterState =
WriterState { stIncludes :: S.Set String -- strings to include in header
@@ -77,16 +75,16 @@ latexHeader :: WriterOptions -- ^ Options, including LaTeX header
-> Meta -- ^ Meta with bibliographic information
-> State WriterState String
latexHeader options (Meta title authors date) = do
- titletext <- if null title
- then return ""
- else do title' <- inlineListToLaTeX title
- return $ "\\title{" ++ title' ++ "}\n"
- extras <- get >>= (return . unlines . S.toList. stIncludes)
+ titletext <- if null title
+ then return ""
+ else do title' <- inlineListToLaTeX title
+ return $ "\\title{" ++ title' ++ "}\n"
+ extras <- get >>= (return . unlines . S.toList. stIncludes)
let verbatim = if "\\usepackage{fancyvrb}" `isInfixOf` extras
then "\\VerbatimFootnotes % allows verbatim text in footnotes\n"
else ""
- let authorstext = "\\author{" ++ (joinWithSep "\\\\"
- (map stringToLaTeX authors)) ++ "}\n"
+ let authorstext = "\\author{" ++
+ joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}\n"
let datetext = if date == ""
then ""
else "\\date{" ++ stringToLaTeX date ++ "}\n"
@@ -124,8 +122,8 @@ deVerb (other:rest) = other:(deVerb rest)
blockToLaTeX :: Block -- ^ Block to convert
-> State WriterState String
blockToLaTeX Null = return ""
-blockToLaTeX (Plain lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n"))
-blockToLaTeX (Para lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n\n"))
+blockToLaTeX (Plain lst) = inlineListToLaTeX lst >>= return . (++ "\n")
+blockToLaTeX (Para lst) = inlineListToLaTeX lst >>= return . (++ "\n\n")
blockToLaTeX (BlockQuote lst) = do
contents <- blockListToLaTeX lst
return $ "\\begin{quote}\n" ++ contents ++ "\\end{quote}\n"
@@ -184,22 +182,22 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
colWidths aligns
let tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++
headers ++ "\\hline\n" ++ concat rows' ++ "\\end{tabular}\n"
- let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
+ let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n"
addToHeader "\\usepackage{array}\n\
- \% This is needed because raggedright in table elements redefines \\\\:\n\
- \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
- \\\let\\PBS=\\PreserveBackslash"
+ \% This is needed because raggedright in table elements redefines \\\\:\n\
+ \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
+ \\\let\\PBS=\\PreserveBackslash"
return $ if null captionText
then centered tableBody ++ "\n"
- else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++
- captionText ++ "}\n" ++ "\\end{table}\n\n"
+ else "\\begin{table}[h]\n" ++ centered tableBody ++
+ "\\caption{" ++ captionText ++ "}\n" ++ "\\end{table}\n\n"
-blockListToLaTeX lst = mapM blockToLaTeX lst >>= (return . concat)
+blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . concat
tableRowToLaTeX cols =
- mapM blockListToLaTeX cols >>= (return . (++ "\\\\\n") . (joinWithSep " & "))
+ mapM blockListToLaTeX cols >>= return . (++ "\\\\\n") . (joinWithSep " & ")
-listItemToLaTeX lst = blockListToLaTeX lst >>= (return . ("\\item "++))
+listItemToLaTeX lst = blockListToLaTeX lst >>= return . ("\\item "++)
defListItemToLaTeX (term, def) = do
term' <- inlineListToLaTeX $ deVerb term
@@ -209,8 +207,7 @@ defListItemToLaTeX (term, def) = do
-- | Convert list of inline elements to LaTeX.
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
-> State WriterState String
-inlineListToLaTeX lst =
- mapM inlineToLaTeX lst >>= (return . concat)
+inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . concat
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 3232a454a..b9596dc2d 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -28,14 +28,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to groff man page format.
-}
-module Text.Pandoc.Writers.Man (
- writeMan
- ) where
+module Text.Pandoc.Writers.Man ( writeMan) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
-import Data.Char ( toUpper )
-import Data.List ( group, isPrefixOf, drop, find, nub, intersperse )
+import Data.List ( isPrefixOf, drop, nub, intersperse )
import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
@@ -45,16 +42,15 @@ type WriterState = (Notes, Preprocessors)
-- | Convert Pandoc to Man.
writeMan :: WriterOptions -> Pandoc -> String
-writeMan opts document =
- render $ evalState (pandocToMan opts document) ([],[])
+writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
-- | Return groff man representation of document.
pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
pandocToMan opts (Pandoc meta blocks) = do
let before = writerIncludeBefore opts
let after = writerIncludeAfter opts
- before' = if null before then empty else text before
- after' = if null after then empty else text after
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
(head, foot) <- metaToMan opts meta
body <- blockListToMan opts blocks
(notes, preprocessors) <- get
@@ -84,8 +80,8 @@ metaToMan options (Meta title authors date) = do
1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
return $ if writerStandalone options
- then (head, foot)
- else (empty, empty)
+ then (head, foot)
+ else (empty, empty)
-- | Return man representation of notes.
notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
@@ -93,7 +89,7 @@ notesToMan opts notes =
if null notes
then return empty
else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
- (return . (text ".SH NOTES" $$) . vcat)
+ return . (text ".SH NOTES" $$) . vcat
-- | Return man representation of a note.
noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@@ -110,8 +106,7 @@ wrappedMan opts sect = do
-- | Association list of characters to escape.
manEscapes :: [(Char, String)]
-manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++
- backslashEscapes "\".@\\"
+manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
-- | Escape special characters for Man.
escapeString :: String -> String
@@ -140,8 +135,7 @@ blockToMan opts (Header level inlines) = do
return $ text heading <> contents
blockToMan opts (CodeBlock str) = return $
text ".PP" $$ text "\\f[CR]" $$
- text ((unlines . map (" " ++) . lines) (escapeCode str)) <>
- text "\\f[]"
+ text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
blockToMan opts (BlockQuote blocks) = do
contents <- blockListToMan opts blocks
return $ text ".RS" $$ contents $$ text ".RE"
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index eb633166d..e7acd762c 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -29,9 +29,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text.
Markdown: <http://daringfireball.net/projects/markdown/>
-}
-module Text.Pandoc.Writers.Markdown (
- writeMarkdown
- ) where
+module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Blocks
@@ -53,10 +51,10 @@ pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
pandocToMarkdown opts (Pandoc meta blocks) = do
let before = writerIncludeBefore opts
let after = writerIncludeAfter opts
- before' = if null before then empty else text before
- after' = if null after then empty else text after
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
metaBlock <- metaToMarkdown opts meta
- let head = if (writerStandalone opts)
+ let head = if writerStandalone opts
then metaBlock $+$ text (writerHeader opts)
else empty
let headerBlocks = filter isHeaderBlock blocks
@@ -73,8 +71,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
-- | Return markdown representation of reference key table.
keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToMarkdown opts refs =
- mapM (keyToMarkdown opts) refs >>= (return . vcat)
+keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-- | Return markdown representation of a reference key.
keyToMarkdown :: WriterOptions
@@ -90,7 +87,7 @@ keyToMarkdown opts (label, (src, tit)) = do
notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToMarkdown opts notes =
mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
- (return . vcat)
+ return . vcat
-- | Return markdown representation of a note.
noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@@ -143,8 +140,7 @@ tableOfContents opts headers =
-- | Converts an Element to a list item for a table of contents,
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec headerText subsecs) =
- [Plain headerText] ++
+elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
if null subsecs
then []
else [BulletList $ map elementToListItem subsecs]
@@ -184,9 +180,8 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do
let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
(zipWith docToBlock widthsInChars)
let head = makeRow headers'
- rows' <- mapM (\row -> do
- cols <- mapM (blockListToMarkdown opts) row
- return $ makeRow cols) rows
+ rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
+ return $ makeRow cols) rows
let tableWidth = sum widthsInChars
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
let isMultilineTable = maxRowHeight > 1
@@ -208,8 +203,7 @@ blockToMarkdown opts (OrderedList attribs items) = do
let markers = orderedListMarkers attribs
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
- else m)
- markers
+ else m) markers
contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
zip markers' items
return $ (vcat contents) <> text "\n"
@@ -241,8 +235,8 @@ definitionListItemToMarkdown opts (label, items) = do
let tabStop = writerTabStop opts
let leader = char ':'
contents <- mapM (\item -> blockToMarkdown opts item >>=
- (\txt -> return (leader $$ nest tabStop txt)))
- items >>= (return . vcat)
+ (\txt -> return (leader $$ nest tabStop txt)))
+ items >>= return . vcat
return $ labelText $+$ contents
-- | Convert list of Pandoc block elements to markdown.
@@ -250,29 +244,30 @@ blockListToMarkdown :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToMarkdown opts blocks =
- mapM (blockToMarkdown opts) blocks >>= (return . vcat)
+ mapM (blockToMarkdown opts) blocks >>= return . vcat
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
getReference :: [Inline] -> Target -> State WriterState [Inline]
getReference label (src, tit) = do
- (_,refs) <- get
- case find ((== (src, tit)) . snd) refs of
- Just (ref, _) -> return ref
- Nothing -> do
- let label' = case find ((== label) . fst) refs of
- Just _ -> -- label is used; generate numerical label
- case find (\n -> not (any (== [Str (show n)])
- (map fst refs))) [1..10000] of
- Just x -> [Str (show x)]
- Nothing -> error "no unique label"
- Nothing -> label
- modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
- return label'
+ (_,refs) <- get
+ case find ((== (src, tit)) . snd) refs of
+ Just (ref, _) -> return ref
+ Nothing -> do
+ let label' = case find ((== label) . fst) refs of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> not (any (== [Str (show n)])
+ (map fst refs))) [1..10000] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
+ Nothing -> label
+ modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
+ return label'
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat)
+inlineListToMarkdown opts lst =
+ mapM (inlineToMarkdown opts) lst >>= return . hcat
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
@@ -327,13 +322,13 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
return $ if useAuto
then char '<' <> text srcSuffix <> char '>'
else if useRefLinks
- then let first = char '[' <> linktext <> char ']'
- second = if txt == ref
- then text "[]"
- else char '[' <> reftext <> char ']'
- in first <> second
- else char '[' <> linktext <> char ']' <>
- char '(' <> text src <> linktitle <> char ')'
+ then let first = char '[' <> linktext <> char ']'
+ second = if txt == ref
+ then text "[]"
+ else char '[' <> reftext <> char ']'
+ in first <> second
+ else char '[' <> linktext <> char ']' <>
+ char '(' <> text src <> linktitle <> char ')'
inlineToMarkdown opts (Image alternate (source, tit)) = do
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index c39f7bdab..70df479b5 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -29,13 +29,11 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
-module Text.Pandoc.Writers.RST (
- writeRST
- ) where
+module Text.Pandoc.Writers.RST ( writeRST) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Blocks
-import Data.List ( group, isPrefixOf, drop, find, intersperse )
+import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
@@ -70,8 +68,7 @@ pandocToRST opts (Pandoc meta blocks) = do
-- | Return RST representation of reference key table.
keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToRST opts refs =
- mapM (keyToRST opts) refs >>= (return . vcat)
+keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
-- | Return RST representation of a reference key.
keyToRST :: WriterOptions
@@ -85,7 +82,7 @@ keyToRST opts (label, (src, tit)) = do
notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToRST opts notes =
mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
- (return . vcat)
+ return . vcat
-- | Return RST representation of a note.
noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
@@ -96,8 +93,7 @@ noteToRST opts num note = do
-- | Return RST representation of picture reference table.
pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
-pictTableToRST opts refs =
- mapM (pictToRST opts) refs >>= (return . vcat)
+pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
-- | Return RST representation of a picture substitution reference.
pictToRST :: WriterOptions
@@ -112,7 +108,7 @@ pictToRST opts (label, (src, _)) = do
wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
wrappedRST opts inlines =
mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>=
- (return . vcat)
+ return . vcat
wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc
wrappedRSTSection opts sect = do
@@ -160,21 +156,19 @@ blockToRST :: WriterOptions -- ^ Options
blockToRST opts Null = return empty
blockToRST opts (Plain inlines) = wrappedRST opts inlines
blockToRST opts (Para [TeX str]) =
- let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
- return $ hang (text "\n.. raw:: latex\n") 3
- (vcat $ map text (lines str'))
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
blockToRST opts (Para inlines) = do
contents <- wrappedRST opts inlines
return $ contents <> text "\n"
blockToRST opts (RawHtml str) =
- let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
- return $ hang (text "\n.. raw:: html\n") 3
- (vcat $ map text (lines str'))
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
blockToRST opts HorizontalRule = return $ text "--------------\n"
blockToRST opts (Header level inlines) = do
contents <- inlineListToRST opts inlines
let headerLength = length $ render contents
- let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1)
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
let border = text $ replicate headerLength headerChar
return $ contents $+$ border <> text "\n"
blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
@@ -200,11 +194,10 @@ blockToRST opts (Table caption aligns widths headers rows) = do
beg = TextBlock 2 height (replicate height "| ")
end = TextBlock 2 height (replicate height " |")
middle = hcatBlocks $ intersperse sep blocks
- let makeRow = hpipeBlocks . (zipWith docToBlock widthsInChars)
+ let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
let head = makeRow headers'
- rows' <- mapM (\row -> do
- cols <- mapM (blockListToRST opts) row
- return $ makeRow cols) rows
+ rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
+ return $ makeRow cols) rows
let tableWidth = sum widthsInChars
let maxRowHeight = maximum $ map heightOfBlock (head:rows')
let border ch = char '+' <> char ch <>
@@ -225,8 +218,7 @@ blockToRST opts (OrderedList (start, style, delim) items) = do
(start, style, delim)
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
- in m ++ replicate s ' ')
- markers
+ in m ++ replicate s ' ') markers
contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
zip markers' items
-- ensure that sublists have preceding blank line
@@ -262,11 +254,11 @@ blockListToRST :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToRST opts blocks =
- mapM (blockToRST opts) blocks >>= (return . vcat)
+ mapM (blockToRST opts) blocks >>= return . vcat
-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat)
+inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
-- | Convert Pandoc inline element to RST.
inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
@@ -319,8 +311,8 @@ inlineToRST opts (Link txt (src, tit)) = do
inlineToRST opts (Image alternate (source, tit)) = do
(notes, refs, pics) <- get
let labelsUsed = map fst pics
- let txt = if (null alternate) || (alternate == [Str ""]) ||
- (alternate `elem` labelsUsed)
+ let txt = if null alternate || alternate == [Str ""] ||
+ alternate `elem` labelsUsed
then [Str $ "image" ++ show (length refs)]
else alternate
let pics' = if (txt, (source, tit)) `elem` pics
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 9b3d6662c..3bd5c63b2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -27,12 +27,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
-module Text.Pandoc.Writers.RTF ( writeRTF) where
+module Text.Pandoc.Writers.RTF ( writeRTF ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Regex ( matchRegexAll, mkRegex )
import Data.List ( isSuffixOf )
-import Data.Char ( ord, chr )
+import Data.Char ( ord )
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
@@ -44,22 +44,22 @@ writeRTF options (Pandoc meta blocks) =
then tableOfContents $ filter isHeaderBlock blocks
else ""
foot = if writerStandalone options then "\n}\n" else ""
- body = (writerIncludeBefore options) ++
+ body = writerIncludeBefore options ++
concatMap (blockToRTF 0 AlignDefault) blocks ++
- (writerIncludeAfter options) in
- head ++ toc ++ body ++ foot
+ writerIncludeAfter options
+ in head ++ toc ++ body ++ foot
-- | Construct table of contents from list of header blocks.
tableOfContents :: [Block] -> String
tableOfContents headers =
let contentsTree = hierarchicalize headers
- in concatMap (blockToRTF 0 AlignDefault) $ [Header 1 [Str "Contents"],
- BulletList (map elementToListItem contentsTree)]
+ in concatMap (blockToRTF 0 AlignDefault) $
+ [Header 1 [Str "Contents"],
+ BulletList (map elementToListItem contentsTree)]
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec sectext subsecs) =
- [Plain sectext] ++
+elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
if null subsecs
then []
else [BulletList (map elementToListItem subsecs)]
@@ -67,10 +67,10 @@ elementToListItem (Sec sectext subsecs) =
-- | Convert unicode characters (> 127) into rich text format representation.
handleUnicode :: String -> String
handleUnicode [] = []
-handleUnicode (c:cs) = if (ord c) > 127
- then '\\':'u':(show (ord c)) ++ "?" ++
- (handleUnicode cs)
- else c:(handleUnicode cs)
+handleUnicode (c:cs) =
+ if ord c > 127
+ then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
+ else c:(handleUnicode cs)
-- | Escape special characters.
escapeSpecial :: String -> String
@@ -127,7 +127,7 @@ listIncrement = 360
-- | Returns appropriate bullet list marker for indent level.
bulletMarker :: Int -> String
-bulletMarker indent = case (indent `mod` 720) of
+bulletMarker indent = case indent `mod` 720 of
0 -> "\\bullet "
otherwise -> "\\endash "
@@ -135,7 +135,7 @@ bulletMarker indent = case (indent `mod` 720) of
orderedMarkers :: Int -> ListAttributes -> [String]
orderedMarkers indent (start, style, delim) =
if style == DefaultStyle && delim == DefaultDelim
- then case (indent `mod` 720) of
+ then case indent `mod` 720 of
0 -> orderedListMarkers (start, Decimal, Period)
otherwise -> orderedListMarkers (start, LowerAlpha, Period)
else orderedListMarkers (start, style, delim)
@@ -145,21 +145,21 @@ rtfHeader :: String -- ^ header text
-> Meta -- ^ bibliographic information
-> String
rtfHeader headerText (Meta title authors date) =
- let titletext = if null title
+ let titletext = if null title
+ then ""
+ else rtfPar 0 0 AlignCenter $
+ "\\b \\fs36 " ++ inlineListToRTF title
+ authorstext = if null authors
then ""
- else rtfPar 0 0 AlignCenter ("\\b \\fs36 " ++
- inlineListToRTF title)
- authorstext = if null authors
- then ""
- else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\"
- (map stringToRTF authors)))
- datetext = if date == ""
- then ""
- else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
- let spacer = if null (titletext ++ authorstext ++ datetext)
+ else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
+ map stringToRTF authors))
+ datetext = if date == ""
then ""
- else rtfPar 0 0 AlignDefault "" in
- headerText ++ titletext ++ authorstext ++ datetext ++ spacer
+ else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
+ let spacer = if null (titletext ++ authorstext ++ datetext)
+ then ""
+ else rtfPar 0 0 AlignDefault "" in
+ headerText ++ titletext ++ authorstext ++ datetext ++ spacer
-- | Convert Pandoc block element to RTF.
blockToRTF :: Int -- ^ indent level
@@ -168,31 +168,27 @@ blockToRTF :: Int -- ^ indent level
-> String
blockToRTF _ _ Null = ""
blockToRTF indent alignment (Plain lst) =
- rtfCompact indent 0 alignment (inlineListToRTF lst)
+ rtfCompact indent 0 alignment $ inlineListToRTF lst
blockToRTF indent alignment (Para lst) =
- rtfPar indent 0 alignment (inlineListToRTF lst)
+ rtfPar indent 0 alignment $ inlineListToRTF lst
blockToRTF indent alignment (BlockQuote lst) =
concatMap (blockToRTF (indent + indentIncrement) alignment) lst
blockToRTF indent _ (CodeBlock str) =
rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
blockToRTF _ _ (RawHtml str) = ""
-blockToRTF indent alignment (BulletList lst) =
- spaceAtEnd $
+blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
-blockToRTF indent alignment (OrderedList attribs lst) =
- spaceAtEnd $ concat $
+blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
-blockToRTF indent alignment (DefinitionList lst) =
- spaceAtEnd $
+blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
concatMap (definitionListItemToRTF alignment indent) lst
blockToRTF indent _ HorizontalRule =
rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
-blockToRTF indent alignment (Header level lst) =
- rtfPar indent 0 alignment ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
- (inlineListToRTF lst))
+blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
+ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
blockToRTF indent alignment (Table caption aligns sizes headers rows) =
- (tableRowToRTF True indent aligns sizes headers) ++ (concatMap
- (tableRowToRTF False indent aligns sizes) rows) ++
+ tableRowToRTF True indent aligns sizes headers ++
+ concatMap (tableRowToRTF False indent aligns sizes) rows ++
rtfPar indent 0 alignment (inlineListToRTF caption)
tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
@@ -201,8 +197,10 @@ tableRowToRTF header indent aligns sizes cols =
totalTwips = 6 * 1440 -- 6 inches
rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
0 sizes
- cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs"
- else "") ++ "\\cellx" ++ show edge) rightEdges
+ cellDefs = map (\edge -> (if header
+ then "\\clbrdrb\\brdrs"
+ else "") ++ "\\cellx" ++ show edge)
+ rightEdges
start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
"\\trkeep\\intbl\n{\n"
end = "}\n\\intbl\\row}\n"
@@ -234,11 +232,12 @@ listItemToRTF alignment indent marker list =
let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
-- insert the list marker into the (processed) first block
let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
- Just (before, matched, after, _) -> before ++ "\\fi" ++
- show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
- show listIncrement ++ "\\tab" ++ after
+ Just (before, matched, after, _) ->
+ before ++ "\\fi" ++ show (0 - listIncrement) ++
+ " " ++ marker ++ "\\tx" ++
+ show listIncrement ++ "\\tab" ++ after
Nothing -> first in
- modFirst ++ (concat rest)
+ modFirst ++ concat rest
-- | Convert definition list item (label, list of blocks) to RTF.
definitionListItemToRTF :: Alignment -- ^ alignment
@@ -285,4 +284,3 @@ inlineToRTF (Image alternate (source, tit)) =
inlineToRTF (Note contents) =
"{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
(concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
-