From a8e2199034679c07411c76c42ab1ffb52b170029 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 15 Aug 2007 06:00:58 +0000 Subject: Major code cleanup in all modules. (Removed unneeded imports, reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc.hs | 4 +- src/Text/Pandoc/Blocks.hs | 26 +- src/Text/Pandoc/CharacterReferences.hs | 335 +++++++++++ src/Text/Pandoc/Definition.hs | 15 +- src/Text/Pandoc/Entities.hs | 372 ------------- src/Text/Pandoc/ParserCombinators.hs | 198 ------- src/Text/Pandoc/Readers/HTML.hs | 362 ++++++------ src/Text/Pandoc/Readers/LaTeX.hs | 536 ++++++++---------- src/Text/Pandoc/Readers/Markdown.hs | 662 +++++++++------------- src/Text/Pandoc/Readers/RST.hs | 321 +++++------ src/Text/Pandoc/Shared.hs | 980 +++++++++++++++++++-------------- src/Text/Pandoc/Writers/ConTeXt.hs | 23 +- src/Text/Pandoc/Writers/Docbook.hs | 199 +++---- src/Text/Pandoc/Writers/HTML.hs | 475 ++++++++-------- src/Text/Pandoc/Writers/LaTeX.hs | 45 +- src/Text/Pandoc/Writers/Man.hs | 26 +- src/Text/Pandoc/Writers/Markdown.hs | 75 ++- src/Text/Pandoc/Writers/RST.hs | 46 +- src/Text/Pandoc/Writers/RTF.hs | 94 ++-- src/templates/DefaultHeaders.hs | 30 +- src/templates/S5.hs | 59 +- 21 files changed, 2308 insertions(+), 2575 deletions(-) create mode 100644 src/Text/Pandoc/CharacterReferences.hs delete mode 100644 src/Text/Pandoc/Entities.hs delete mode 100644 src/Text/Pandoc/ParserCombinators.hs (limited to 'src') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index fe724987c..df73ed325 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -39,8 +39,8 @@ inline links: > > markdownToRST :: String -> String > markdownToRST = toUTF8 . -> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . -> (readMarkdown defaultParserState) . fromUTF8 +> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . +> (readMarkdown defaultParserState) . fromUTF8 > > main = interact markdownToRST diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs index ffcd5bfe0..cfc22cb3e 100644 --- a/src/Text/Pandoc/Blocks.hs +++ b/src/Text/Pandoc/Blocks.hs @@ -43,9 +43,8 @@ module Text.Pandoc.Blocks rightAlignBlock ) where - import Text.PrettyPrint -import Data.List (transpose, intersperse) +import Data.List ( intersperse ) -- | A fixed-width block of text. Parameters are width of block, -- height of block, and list of lines. @@ -53,6 +52,17 @@ data TextBlock = TextBlock Int Int [String] instance Show TextBlock where show x = show $ blockToDoc x +-- | Break lines in a list of lines so that none are greater than +-- a given width. +breakLines :: Int -- ^ Maximum length of lines. + -> [String] -- ^ List of lines. + -> [String] +breakLines width [] = [] +breakLines width (l:ls) = + if length l > width + then (take width l):(breakLines width ((drop width l):ls)) + else l:(breakLines width ls) + -- | Convert a @Doc@ element into a @TextBlock@ with a specified width. docToBlock :: Int -- ^ Width of text block. -> Doc -- ^ @Doc@ to convert. @@ -60,13 +70,8 @@ docToBlock :: Int -- ^ Width of text block. docToBlock width doc = let rendered = renderStyle (style {lineLength = width, ribbonsPerLine = 1}) doc - lns = lines rendered - chop [] = [] - chop (l:ls) = if length l > width - then (take width l):(chop ((drop width l):ls)) - else l:(chop ls) - lns' = chop lns - in TextBlock width (length lns') lns' + lns = breakLines width $ lines rendered + in TextBlock width (length lns) lns -- | Convert a @TextBlock@ to a @Doc@ element. blockToDoc :: TextBlock -> Doc @@ -116,8 +121,7 @@ isWhitespace x = x `elem` " \t" -- | Left-aligns the contents of a @TextBlock@ within the block. leftAlignBlock :: TextBlock -> TextBlock leftAlignBlock (TextBlock width height lns) = - TextBlock width height $ - map (dropWhile isWhitespace) lns + TextBlock width height $ map (dropWhile isWhitespace) lns -- | Right-aligns the contents of a @TextBlock@ within the block. rightAlignBlock :: TextBlock -> TextBlock diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs new file mode 100644 index 000000000..deb2c3f1a --- /dev/null +++ b/src/Text/Pandoc/CharacterReferences.hs @@ -0,0 +1,335 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.CharacterReferences + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for parsing character references. +-} +module Text.Pandoc.CharacterReferences ( + characterReference, + decodeCharacterReferences, + ) where +import Data.Char ( chr ) +import Text.ParserCombinators.Parsec +import qualified Data.Map as Map + +-- | Parse character entity. +characterReference :: GenParser Char st Char +characterReference = characterEntity <|> + hexadecimalCharacterReference <|> + decimalCharacterReference + "character entity" + +-- | Parse character entity. +characterEntity :: GenParser Char st Char +characterEntity = try $ do + st <- char '&' + body <- many1 alphaNum + end <- char ';' + let entity = "&" ++ body ++ ";" + return $ Map.findWithDefault '?' entity entityTable + +-- | Parse hexadecimal entity. +hexadecimalCharacterReference :: GenParser Char st Char +hexadecimalCharacterReference = try $ do + st <- string "&#" + hex <- oneOf "Xx" + body <- many1 (oneOf "0123456789ABCDEFabcdef") + end <- char ';' + return $ chr $ read ('0':'x':body) + +-- | Parse decimal entity. +decimalCharacterReference :: GenParser Char st Char +decimalCharacterReference = try $ do + st <- string "&#" + body <- many1 digit + end <- char ';' + return $ chr $ read body + +-- | Convert entities in a string to characters. +decodeCharacterReferences :: String -> String +decodeCharacterReferences str = + case parse (many (characterReference <|> anyChar)) str str of + Left err -> error $ "\nError: " ++ show err + Right result -> result + +entityTable :: Map.Map String Char +entityTable = Map.fromList entityTableList + +entityTableList :: [(String, Char)] +entityTableList = [ + (""", chr 34), + ("&", chr 38), + ("<", chr 60), + (">", chr 62), + (" ", chr 160), + ("¡", chr 161), + ("¢", chr 162), + ("£", chr 163), + ("¤", chr 164), + ("¥", chr 165), + ("¦", chr 166), + ("§", chr 167), + ("¨", chr 168), + ("©", chr 169), + ("ª", chr 170), + ("«", chr 171), + ("¬", chr 172), + ("­", chr 173), + ("®", chr 174), + ("¯", chr 175), + ("°", chr 176), + ("±", chr 177), + ("²", chr 178), + ("³", chr 179), + ("´", chr 180), + ("µ", chr 181), + ("¶", chr 182), + ("·", chr 183), + ("¸", chr 184), + ("¹", chr 185), + ("º", chr 186), + ("»", chr 187), + ("¼", chr 188), + ("½", chr 189), + ("¾", chr 190), + ("¿", chr 191), + ("À", chr 192), + ("Á", chr 193), + ("Â", chr 194), + ("Ã", chr 195), + ("Ä", chr 196), + ("Å", chr 197), + ("Æ", chr 198), + ("Ç", chr 199), + ("È", chr 200), + ("É", chr 201), + ("Ê", chr 202), + ("Ë", chr 203), + ("Ì", chr 204), + ("Í", chr 205), + ("Î", chr 206), + ("Ï", chr 207), + ("Ð", chr 208), + ("Ñ", chr 209), + ("Ò", chr 210), + ("Ó", chr 211), + ("Ô", chr 212), + ("Õ", chr 213), + ("Ö", chr 214), + ("×", chr 215), + ("Ø", chr 216), + ("Ù", chr 217), + ("Ú", chr 218), + ("Û", chr 219), + ("Ü", chr 220), + ("Ý", chr 221), + ("Þ", chr 222), + ("ß", chr 223), + ("à", chr 224), + ("á", chr 225), + ("â", chr 226), + ("ã", chr 227), + ("ä", chr 228), + ("å", chr 229), + ("æ", chr 230), + ("ç", chr 231), + ("è", chr 232), + ("é", chr 233), + ("ê", chr 234), + ("ë", chr 235), + ("ì", chr 236), + ("í", chr 237), + ("î", chr 238), + ("ï", chr 239), + ("ð", chr 240), + ("ñ", chr 241), + ("ò", chr 242), + ("ó", chr 243), + ("ô", chr 244), + ("õ", chr 245), + ("ö", chr 246), + ("÷", chr 247), + ("ø", chr 248), + ("ù", chr 249), + ("ú", chr 250), + ("û", chr 251), + ("ü", chr 252), + ("ý", chr 253), + ("þ", chr 254), + ("ÿ", chr 255), + ("Œ", chr 338), + ("œ", chr 339), + ("Š", chr 352), + ("š", chr 353), + ("Ÿ", chr 376), + ("ƒ", chr 402), + ("ˆ", chr 710), + ("˜", chr 732), + ("Α", chr 913), + ("Β", chr 914), + ("Γ", chr 915), + ("Δ", chr 916), + ("Ε", chr 917), + ("Ζ", chr 918), + ("Η", chr 919), + ("Θ", chr 920), + ("Ι", chr 921), + ("Κ", chr 922), + ("Λ", chr 923), + ("Μ", chr 924), + ("Ν", chr 925), + ("Ξ", chr 926), + ("Ο", chr 927), + ("Π", chr 928), + ("Ρ", chr 929), + ("Σ", chr 931), + ("Τ", chr 932), + ("Υ", chr 933), + ("Φ", chr 934), + ("Χ", chr 935), + ("Ψ", chr 936), + ("Ω", chr 937), + ("α", chr 945), + ("β", chr 946), + ("γ", chr 947), + ("δ", chr 948), + ("ε", chr 949), + ("ζ", chr 950), + ("η", chr 951), + ("θ", chr 952), + ("ι", chr 953), + ("κ", chr 954), + ("λ", chr 955), + ("μ", chr 956), + ("ν", chr 957), + ("ξ", chr 958), + ("ο", chr 959), + ("π", chr 960), + ("ρ", chr 961), + ("ς", chr 962), + ("σ", chr 963), + ("τ", chr 964), + ("υ", chr 965), + ("φ", chr 966), + ("χ", chr 967), + ("ψ", chr 968), + ("ω", chr 969), + ("ϑ", chr 977), + ("ϒ", chr 978), + ("ϖ", chr 982), + (" ", chr 8194), + (" ", chr 8195), + (" ", chr 8201), + ("‌", chr 8204), + ("‍", chr 8205), + ("‎", chr 8206), + ("‏", chr 8207), + ("–", chr 8211), + ("—", chr 8212), + ("‘", chr 8216), + ("’", chr 8217), + ("‚", chr 8218), + ("“", chr 8220), + ("”", chr 8221), + ("„", chr 8222), + ("†", chr 8224), + ("‡", chr 8225), + ("•", chr 8226), + ("…", chr 8230), + ("‰", chr 8240), + ("′", chr 8242), + ("″", chr 8243), + ("‹", chr 8249), + ("›", chr 8250), + ("‾", chr 8254), + ("⁄", chr 8260), + ("€", chr 8364), + ("ℑ", chr 8465), + ("℘", chr 8472), + ("ℜ", chr 8476), + ("™", chr 8482), + ("ℵ", chr 8501), + ("←", chr 8592), + ("↑", chr 8593), + ("→", chr 8594), + ("↓", chr 8595), + ("↔", chr 8596), + ("↵", chr 8629), + ("⇐", chr 8656), + ("⇑", chr 8657), + ("⇒", chr 8658), + ("⇓", chr 8659), + ("⇔", chr 8660), + ("∀", chr 8704), + ("∂", chr 8706), + ("∃", chr 8707), + ("∅", chr 8709), + ("∇", chr 8711), + ("∈", chr 8712), + ("∉", chr 8713), + ("∋", chr 8715), + ("∏", chr 8719), + ("∑", chr 8721), + ("−", chr 8722), + ("∗", chr 8727), + ("√", chr 8730), + ("∝", chr 8733), + ("∞", chr 8734), + ("∠", chr 8736), + ("∧", chr 8743), + ("∨", chr 8744), + ("∩", chr 8745), + ("∪", chr 8746), + ("∫", chr 8747), + ("∴", chr 8756), + ("∼", chr 8764), + ("≅", chr 8773), + ("≈", chr 8776), + ("≠", chr 8800), + ("≡", chr 8801), + ("≤", chr 8804), + ("≥", chr 8805), + ("⊂", chr 8834), + ("⊃", chr 8835), + ("⊄", chr 8836), + ("⊆", chr 8838), + ("⊇", chr 8839), + ("⊕", chr 8853), + ("⊗", chr 8855), + ("⊥", chr 8869), + ("⋅", chr 8901), + ("⌈", chr 8968), + ("⌉", chr 8969), + ("⌊", chr 8970), + ("⌋", chr 8971), + ("⟨", chr 9001), + ("⟩", chr 9002), + ("◊", chr 9674), + ("♠", chr 9824), + ("♣", chr 9827), + ("♥", chr 9829), + ("♦", chr 9830) + ] diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 3d3858b7e..7d1125c5a 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -34,10 +34,10 @@ data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) -- | Bibliographic information for the document: title (list of 'Inline'), -- authors (list of strings), date (string). -data Meta = Meta [Inline] -- title - [String] -- authors - String -- date - deriving (Eq, Show, Read) +data Meta = Meta [Inline] -- title + [String] -- authors + String -- date + deriving (Eq, Show, Read) -- | Alignment of a table column. data Alignment = AlignLeft @@ -65,12 +65,11 @@ data ListNumberDelim = DefaultDelim -- | Block element. data Block = Plain [Inline] -- ^ Plain text, not a paragraph - | Null -- ^ Nothing | Para [Inline] -- ^ Paragraph | CodeBlock String -- ^ Code block (literal) | RawHtml String -- ^ Raw HTML block (literal) | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes, + | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes -- and a list of items, each a list of blocks) | BulletList [[Block]] -- ^ Bullet list (list of items, each -- a list of blocks) @@ -84,6 +83,7 @@ data Block -- relative column widths, column headers -- (each a list of blocks), and rows -- (each a list of lists of blocks) + | Null -- ^ Nothing deriving (Eq, Read, Show) -- | Type of quotation marks to use in Quoted inline. @@ -112,6 +112,5 @@ data Inline | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target | Image [Inline] Target -- ^ Image: alt text (list of inlines), target -- and target - | Note [Block] -- ^ Footnote or endnote - reference (string), - -- text (list of blocks) + | Note [Block] -- ^ Footnote or endnote deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs deleted file mode 100644 index 125774d4d..000000000 --- a/src/Text/Pandoc/Entities.hs +++ /dev/null @@ -1,372 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Entities - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for encoding unicode characters as entity references, -and vice versa. --} -module Text.Pandoc.Entities ( - charToEntity, - charToNumericalEntity, - decodeEntities, - escapeCharForXML, - escapeStringForXML, - characterEntity - ) where -import Data.Char ( chr, ord ) -import Text.ParserCombinators.Parsec -import Data.Maybe ( fromMaybe ) -import qualified Data.Map as Map - --- | Returns a string containing an entity reference for the character. -charToEntity :: Char -> String -charToEntity char = Map.findWithDefault (charToNumericalEntity char) char reverseEntityTable - --- | Returns a string containing a numerical entity reference for the char. -charToNumericalEntity :: Char -> String -charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";" - --- | Parse character entity. -characterEntity :: GenParser Char st Char -characterEntity = namedEntity <|> hexEntity <|> decimalEntity "character entity" - --- | Parse character entity. -namedEntity :: GenParser Char st Char -namedEntity = try $ do - st <- char '&' - body <- many1 alphaNum - end <- char ';' - let entity = "&" ++ body ++ ";" - return $ Map.findWithDefault '?' entity entityTable - --- | Parse hexadecimal entity. -hexEntity :: GenParser Char st Char -hexEntity = try $ do - st <- string "&#" - hex <- oneOf "Xx" - body <- many1 (oneOf "0123456789ABCDEFabcdef") - end <- char ';' - return $ chr $ read ('0':'x':body) - --- | Parse decimal entity. -decimalEntity :: GenParser Char st Char -decimalEntity = try $ do - st <- string "&#" - body <- many1 digit - end <- char ';' - return $ chr $ read body - --- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String -escapeCharForXML x = - case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - '\160' -> " " - c -> [c] - --- | True if the character needs to be escaped. -needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"\160" - --- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML "" = "" -escapeStringForXML str = - case break needsEscaping str of - (okay, "") -> okay - (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs - --- | Convert entities in a string to characters. -decodeEntities :: String -> String -decodeEntities str = - case parse (many (characterEntity <|> anyChar)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> result - -entityTable :: Map.Map String Char -entityTable = Map.fromList entityTableList - -reverseEntityTable :: Map.Map Char String -reverseEntityTable = Map.fromList $ map (\(a,b) -> (b,a)) entityTableList - -entityTableList :: [(String, Char)] -entityTableList = [ - (""", chr 34), - ("&", chr 38), - ("<", chr 60), - (">", chr 62), - (" ", chr 160), - ("¡", chr 161), - ("¢", chr 162), - ("£", chr 163), - ("¤", chr 164), - ("¥", chr 165), - ("¦", chr 166), - ("§", chr 167), - ("¨", chr 168), - ("©", chr 169), - ("ª", chr 170), - ("«", chr 171), - ("¬", chr 172), - ("­", chr 173), - ("®", chr 174), - ("¯", chr 175), - ("°", chr 176), - ("±", chr 177), - ("²", chr 178), - ("³", chr 179), - ("´", chr 180), - ("µ", chr 181), - ("¶", chr 182), - ("·", chr 183), - ("¸", chr 184), - ("¹", chr 185), - ("º", chr 186), - ("»", chr 187), - ("¼", chr 188), - ("½", chr 189), - ("¾", chr 190), - ("¿", chr 191), - ("À", chr 192), - ("Á", chr 193), - ("Â", chr 194), - ("Ã", chr 195), - ("Ä", chr 196), - ("Å", chr 197), - ("Æ", chr 198), - ("Ç", chr 199), - ("È", chr 200), - ("É", chr 201), - ("Ê", chr 202), - ("Ë", chr 203), - ("Ì", chr 204), - ("Í", chr 205), - ("Î", chr 206), - ("Ï", chr 207), - ("Ð", chr 208), - ("Ñ", chr 209), - ("Ò", chr 210), - ("Ó", chr 211), - ("Ô", chr 212), - ("Õ", chr 213), - ("Ö", chr 214), - ("×", chr 215), - ("Ø", chr 216), - ("Ù", chr 217), - ("Ú", chr 218), - ("Û", chr 219), - ("Ü", chr 220), - ("Ý", chr 221), - ("Þ", chr 222), - ("ß", chr 223), - ("à", chr 224), - ("á", chr 225), - ("â", chr 226), - ("ã", chr 227), - ("ä", chr 228), - ("å", chr 229), - ("æ", chr 230), - ("ç", chr 231), - ("è", chr 232), - ("é", chr 233), - ("ê", chr 234), - ("ë", chr 235), - ("ì", chr 236), - ("í", chr 237), - ("î", chr 238), - ("ï", chr 239), - ("ð", chr 240), - ("ñ", chr 241), - ("ò", chr 242), - ("ó", chr 243), - ("ô", chr 244), - ("õ", chr 245), - ("ö", chr 246), - ("÷", chr 247), - ("ø", chr 248), - ("ù", chr 249), - ("ú", chr 250), - ("û", chr 251), - ("ü", chr 252), - ("ý", chr 253), - ("þ", chr 254), - ("ÿ", chr 255), - ("Œ", chr 338), - ("œ", chr 339), - ("Š", chr 352), - ("š", chr 353), - ("Ÿ", chr 376), - ("ƒ", chr 402), - ("ˆ", chr 710), - ("˜", chr 732), - ("Α", chr 913), - ("Β", chr 914), - ("Γ", chr 915), - ("Δ", chr 916), - ("Ε", chr 917), - ("Ζ", chr 918), - ("Η", chr 919), - ("Θ", chr 920), - ("Ι", chr 921), - ("Κ", chr 922), - ("Λ", chr 923), - ("Μ", chr 924), - ("Ν", chr 925), - ("Ξ", chr 926), - ("Ο", chr 927), - ("Π", chr 928), - ("Ρ", chr 929), - ("Σ", chr 931), - ("Τ", chr 932), - ("Υ", chr 933), - ("Φ", chr 934), - ("Χ", chr 935), - ("Ψ", chr 936), - ("Ω", chr 937), - ("α", chr 945), - ("β", chr 946), - ("γ", chr 947), - ("δ", chr 948), - ("ε", chr 949), - ("ζ", chr 950), - ("η", chr 951), - ("θ", chr 952), - ("ι", chr 953), - ("κ", chr 954), - ("λ", chr 955), - ("μ", chr 956), - ("ν", chr 957), - ("ξ", chr 958), - ("ο", chr 959), - ("π", chr 960), - ("ρ", chr 961), - ("ς", chr 962), - ("σ", chr 963), - ("τ", chr 964), - ("υ", chr 965), - ("φ", chr 966), - ("χ", chr 967), - ("ψ", chr 968), - ("ω", chr 969), - ("ϑ", chr 977), - ("ϒ", chr 978), - ("ϖ", chr 982), - (" ", chr 8194), - (" ", chr 8195), - (" ", chr 8201), - ("‌", chr 8204), - ("‍", chr 8205), - ("‎", chr 8206), - ("‏", chr 8207), - ("–", chr 8211), - ("—", chr 8212), - ("‘", chr 8216), - ("’", chr 8217), - ("‚", chr 8218), - ("“", chr 8220), - ("”", chr 8221), - ("„", chr 8222), - ("†", chr 8224), - ("‡", chr 8225), - ("•", chr 8226), - ("…", chr 8230), - ("‰", chr 8240), - ("′", chr 8242), - ("″", chr 8243), - ("‹", chr 8249), - ("›", chr 8250), - ("‾", chr 8254), - ("⁄", chr 8260), - ("€", chr 8364), - ("ℑ", chr 8465), - ("℘", chr 8472), - ("ℜ", chr 8476), - ("™", chr 8482), - ("ℵ", chr 8501), - ("←", chr 8592), - ("↑", chr 8593), - ("→", chr 8594), - ("↓", chr 8595), - ("↔", chr 8596), - ("↵", chr 8629), - ("⇐", chr 8656), - ("⇑", chr 8657), - ("⇒", chr 8658), - ("⇓", chr 8659), - ("⇔", chr 8660), - ("∀", chr 8704), - ("∂", chr 8706), - ("∃", chr 8707), - ("∅", chr 8709), - ("∇", chr 8711), - ("∈", chr 8712), - ("∉", chr 8713), - ("∋", chr 8715), - ("∏", chr 8719), - ("∑", chr 8721), - ("−", chr 8722), - ("∗", chr 8727), - ("√", chr 8730), - ("∝", chr 8733), - ("∞", chr 8734), - ("∠", chr 8736), - ("∧", chr 8743), - ("∨", chr 8744), - ("∩", chr 8745), - ("∪", chr 8746), - ("∫", chr 8747), - ("∴", chr 8756), - ("∼", chr 8764), - ("≅", chr 8773), - ("≈", chr 8776), - ("≠", chr 8800), - ("≡", chr 8801), - ("≤", chr 8804), - ("≥", chr 8805), - ("⊂", chr 8834), - ("⊃", chr 8835), - ("⊄", chr 8836), - ("⊆", chr 8838), - ("⊇", chr 8839), - ("⊕", chr 8853), - ("⊗", chr 8855), - ("⊥", chr 8869), - ("⋅", chr 8901), - ("⌈", chr 8968), - ("⌉", chr 8969), - ("⌊", chr 8970), - ("⌋", chr 8971), - ("⟨", chr 9001), - ("⟩", chr 9002), - ("◊", chr 9674), - ("♠", chr 9824), - ("♣", chr 9827), - ("♥", chr 9829), - ("♦", chr 9830) - ] diff --git a/src/Text/Pandoc/ParserCombinators.hs b/src/Text/Pandoc/ParserCombinators.hs deleted file mode 100644 index 559a654cc..000000000 --- a/src/Text/Pandoc/ParserCombinators.hs +++ /dev/null @@ -1,198 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.ParserCombinators - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Parser combinators used in Pandoc readers. --} -module Text.Pandoc.ParserCombinators ( - anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - spaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - charsInBalanced', - romanNumeral, - withHorizDisplacement - ) where -import Text.ParserCombinators.Parsec -import Data.Char ( toUpper, toLower ) - ---- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = try (manyTill anyChar newline) <|> many1 anyChar - -- second alternative is for a line ending with eof - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = oneOf " \t" - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try (do - skipSpaces - newline) - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = try (many1 blankline) - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try (do - start - notFollowedBy space - result <- many1Till parser (try end) - return result) - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] -many1Till p end = try (do - first <- p - rest <- manyTill p end - return (first:rest)) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) } - <|> return ()) - --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = try (do - firstChar <- choice [ char (toUpper x), char (toLower x) ] - rest <- stringAnyCase xs - return (firstChar:rest)) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = try $ do - oldInput <- getInput - setInput str - result <- parser - setInput oldInput - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = do - lines <- many1 (do{notFollowedBy blankline; anyLine}) - blanks <- blanklines <|> (do{eof; return "\n"}) - return ((unlines lines) ++ blanks) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do - char open - raw <- manyTill ( (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> (do notFollowedBy' (blankline >> blanklines) - count 1 anyChar)) - (char close) - return $ concat raw - --- | Like charsInBalanced, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- manyTill ( (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> count 1 anyChar) - (char close) - return $ concat raw - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -> -- ^ Uppercase if true - GenParser Char st Int -romanNumeral upper = try $ do - let char' c = char (if upper then toUpper c else c) - let one = char' 'i' - let five = char' 'v' - let ten = char' 'x' - let fifty = char' 'l' - let hundred = char' 'c' - let fivehundred = char' 'd' - let thousand = char' 'm' - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5*) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 1742667b8..1eb5d7b4a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML ( ) where import Text.ParserCombinators.Parsec -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( characterEntity, decodeEntities ) +import Text.Pandoc.CharacterReferences ( characterReference, + decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) -import Data.List ( intersect, takeWhile, dropWhile ) +import Data.List ( takeWhile, dropWhile ) import Data.Char ( toUpper, toLower, isAlphaNum ) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state -> Pandoc readHtml = readWith parseHtml --- for testing -testString :: String -> IO () -testString = testStringWith parseHtml - -- -- Constants -- @@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", -- -- | Read blocks until end tag. -blocksTilEnd tag = try (do - blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag) - return $ filter (/= Null) blocks) +blocksTilEnd tag = do + blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) + return $ filter (/= Null) blocks -- | Read inlines until end tag. -inlinesTilEnd tag = try (do - inlines <- manyTill inline (htmlEndTag tag) - return inlines) +inlinesTilEnd tag = manyTill inline (htmlEndTag tag) -- | Parse blocks between open and close tag. -blocksIn tag = try $ do - htmlTag tag - spaces - blocksTilEnd tag +blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag -- | Parse inlines between open and close tag. -inlinesIn tag = try $ do - htmlTag tag - spaces - inlinesTilEnd tag +inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag -- | Extract type from a tag: e.g. @br@ from @\@ extractTagType :: String -> String @@ -103,19 +91,19 @@ extractTagType ('<':rest) = extractTagType _ = "" -- | Parse any HTML tag (closing or opening) and return text of tag -anyHtmlTag = try (do +anyHtmlTag = try $ do char '<' spaces tag <- many1 alphaNum attribs <- htmlAttributes spaces ender <- option "" (string "/") - let ender' = if (null ender) then "" else " /" + let ender' = if null ender then "" else " /" spaces char '>' - return ("<" ++ tag ++ attribs ++ ender' ++ ">")) + return $ "<" ++ tag ++ attribs ++ ender' ++ ">" -anyHtmlEndTag = try (do +anyHtmlEndTag = try $ do char '<' spaces char '/' @@ -123,19 +111,19 @@ anyHtmlEndTag = try (do tagType <- many1 alphaNum spaces char '>' - return ("")) + return $ "" htmlTag :: String -> GenParser Char st (String, [(String, String)]) -htmlTag tag = try (do +htmlTag tag = try $ do char '<' spaces stringAnyCase tag attribs <- many htmlAttribute spaces - option "" (string "/") + optional (string "/") spaces char '>' - return (tag, (map (\(name, content, raw) -> (name, content)) attribs))) + return (tag, (map (\(name, content, raw) -> (name, content)) attribs)) -- parses a quoted html attribute value quoted quoteChar = do @@ -145,20 +133,20 @@ quoted quoteChar = do htmlAttributes = do attrList <- many htmlAttribute - return (concatMap (\(name, content, raw) -> raw) attrList) + return $ concatMap (\(name, content, raw) -> raw) attrList htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute --- minimized boolean attribute (no = and value) -htmlMinimizedAttribute = try (do +-- minimized boolean attribute +htmlMinimizedAttribute = try $ do many1 space name <- many1 (choice [letter, oneOf ".-_:"]) spaces notFollowedBy (char '=') let content = name - return (name, content, (" " ++ name))) + return (name, content, (" " ++ name)) -htmlRegularAttribute = try (do +htmlRegularAttribute = try $ do many1 space name <- many1 (choice [letter, oneOf ".-_:"]) spaces @@ -170,10 +158,10 @@ htmlRegularAttribute = try (do a <- many (alphaNum <|> (oneOf "-._:")) return (a,"")) ] return (name, content, - (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)) -- | Parse an end tag of type 'tag' -htmlEndTag tag = try (do +htmlEndTag tag = try $ do char '<' spaces char '/' @@ -181,87 +169,83 @@ htmlEndTag tag = try (do stringAnyCase tag spaces char '>' - return ("")) + return $ "" -- | Returns @True@ if the tag is an inline tag. isInline tag = (extractTagType tag) `elem` inlineHtmlTags -anyHtmlBlockTag = try (do - tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then fail "inline tag" else return tag) +anyHtmlBlockTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isInline tag then fail "inline tag" else return tag -anyHtmlInlineTag = try (do - tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then return tag else fail "not an inline tag") +anyHtmlInlineTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isInline tag then return tag else fail "not an inline tag" -- | Parses material between script tags. -- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript = try (do +htmlScript = try $ do open <- string "")) + return $ open ++ rest ++ "" htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ] -rawHtmlBlock = try (do - notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) +rawHtmlBlock = try $ do + notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") body <- htmlBlockElement <|> anyHtmlBlockTag - sp <- (many space) + sp <- many space state <- getState - if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null -- | Parses an HTML comment. -htmlComment = try (do +htmlComment = try $ do string "")) - return ("")) + return $ "" -- -- parsing documents -- -xmlDec = try (do +xmlDec = try $ do string "') - return ("")) + return $ "" -definition = try (do +definition = try $ do string "') - return ("")) + return $ "" -nonTitleNonHead = try (do - notFollowedBy' (htmlTag "title") - notFollowedBy' (htmlTag "/head") - result <- choice [do {rawHtmlBlock; return ' '}, anyChar] - return result) +nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >> + ((rawHtmlBlock >> return ' ') <|> anyChar) -parseTitle = try (do - (tag, attribs) <- htmlTag "title" +parseTitle = try $ do + (tag, _) <- htmlTag "title" contents <- inlinesTilEnd tag spaces - return contents) + return contents -- parse header and return meta-information (for now, just title) -parseHead = try (do +parseHead = try $ do htmlTag "head" spaces skipMany nonTitleNonHead contents <- option [] parseTitle skipMany nonTitleNonHead htmlTag "/head" - return (contents, [], "")) + return (contents, [], "") -skipHtmlTag tag = option ("",[]) (htmlTag tag) +skipHtmlTag tag = optional (htmlTag tag) -- h1 class="title" representation of title in body -bodyTitle = try (do +bodyTitle = try $ do (tag, attribs) <- htmlTag "h1" cl <- case (extractAttribute "class" attribs) of - Just "title" -> do {return ""} + Just "title" -> return "" otherwise -> fail "not title" inlinesTilEnd "h1" - return "") parseHtml = do sepEndBy (choice [xmlDec, definition, htmlComment]) spaces @@ -271,27 +255,30 @@ parseHtml = do spaces skipHtmlTag "body" spaces - option "" bodyTitle -- skip title in body, because it's represented in meta + optional bodyTitle -- skip title in body, because it's represented in meta blocks <- parseBlocks spaces - option "" (htmlEndTag "body") + optional (htmlEndTag "body") spaces - option "" (htmlEndTag "html") + optional (htmlEndTag "html") many anyChar -- ignore anything after eof - return (Pandoc (Meta title authors date) blocks) + return $ Pandoc (Meta title authors date) blocks -- -- parsing blocks -- -parseBlocks = do - spaces - result <- sepEndBy block spaces - return $ filter (/= Null) result +parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) -block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain, - rawHtmlBlock ] "block" +block = choice [ codeBlock + , header + , hrule + , list + , blockQuote + , para + , plain + , rawHtmlBlock ] "block" -- -- header blocks @@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain, header = choice (map headerLevel (enumFromTo 1 5)) "header" -headerLevel n = try (do +headerLevel n = try $ do let level = "h" ++ show n (tag, attribs) <- htmlTag level contents <- inlinesTilEnd level - return (Header n (normalizeSpaces contents))) + return $ Header n (normalizeSpaces contents) -- -- hrule block -- -hrule = try (do +hrule = try $ do (tag, attribs) <- htmlTag "hr" state <- getState - if (not (null attribs)) && (stateParseRaw state) - then -- in this case we want to parse it as raw html - unexpected "attributes in hr" - else return HorizontalRule) + if not (null attribs) && stateParseRaw state + then unexpected "attributes in hr" -- parse as raw in this case + else return HorizontalRule -- -- code blocks -- -codeBlock = choice [ preCodeBlock, bareCodeBlock ] "code block" +codeBlock = preCodeBlock <|> bareCodeBlock "code block" -preCodeBlock = try (do +preCodeBlock = try $ do htmlTag "pre" spaces - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") + result <- bareCodeBlock spaces htmlEndTag "pre" - return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) + return result -bareCodeBlock = try (do +bareCodeBlock = try $ do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") - return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) + return $ CodeBlock $ stripTrailingNewlines $ + decodeCharacterReferences result -- -- block quotes -- -blockQuote = try (do - tag <- htmlTag "blockquote" - spaces - blocks <- blocksTilEnd "blockquote" - return (BlockQuote blocks)) +blockQuote = try $ htmlTag "blockquote" >> spaces >> + blocksTilEnd "blockquote" >>= (return . BlockQuote) -- -- list blocks @@ -354,119 +337,105 @@ blockQuote = try (do list = choice [ bulletList, orderedList, definitionList ] "list" orderedList = try $ do - (_, attribs) <- htmlTag "ol" - (start, style) <- option (1, DefaultStyle) $ - do failIfStrict - let sta = fromMaybe "1" $ - lookup "start" attribs - let sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - let sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle - return (read sta, sty') - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ol" - return (OrderedList (start, style, DefaultDelim) items) + (_, attribs) <- htmlTag "ol" + (start, style) <- option (1, DefaultStyle) $ + do failIfStrict + let sta = fromMaybe "1" $ + lookup "start" attribs + let sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + let sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle + return (read sta, sty') + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ol" + return $ OrderedList (start, style, DefaultDelim) items bulletList = try $ do - htmlTag "ul" - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ul" - return (BulletList items) + htmlTag "ul" + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ul" + return $ BulletList items definitionList = try $ do - failIfStrict -- def lists not part of standard markdown - tag <- htmlTag "dl" - spaces - items <- sepEndBy1 definitionListItem spaces - htmlEndTag "dl" - return (DefinitionList items) + failIfStrict -- def lists not part of standard markdown + tag <- htmlTag "dl" + spaces + items <- sepEndBy1 definitionListItem spaces + htmlEndTag "dl" + return $ DefinitionList items definitionListItem = try $ do - terms <- sepEndBy1 (inlinesIn "dt") spaces - defs <- sepEndBy1 (blocksIn "dd") spaces - let term = joinWithSep [LineBreak] terms - return (term, concat defs) + terms <- sepEndBy1 (inlinesIn "dt") spaces + defs <- sepEndBy1 (blocksIn "dd") spaces + let term = joinWithSep [LineBreak] terms + return (term, concat defs) -- -- paragraph block -- -para = try (do - tag <- htmlTag "p" - result <- inlinesTilEnd "p" - return (Para (normalizeSpaces result))) +para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces -- -- plain block -- -plain = do - result <- many1 inline - return (Plain (normalizeSpaces result)) +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- inline -- -inline = choice [ text, special ] "inline" - -text = choice [ entity, strong, emph, superscript, subscript, - strikeout, spanStrikeout, code, str, - linebreak, whitespace ] "text" - -special = choice [ link, image, rawHtmlInline ] - "link, inline html, or image" - -entity = do - ent <- characterEntity - return $ Str [ent] - -code = try (do +inline = choice [ charRef + , strong + , emph + , superscript + , subscript + , strikeout + , spanStrikeout + , code + , str + , linebreak + , whitespace + , link + , image + , rawHtmlInline ] "inline" + +code = try $ do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") -- remove internal line breaks, leading and trailing space, - -- and decode entities - let result' = decodeEntities $ removeLeadingTrailingSpace $ - joinWithSep " " $ lines result - return (Code result')) + -- and decode character references + return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ + joinWithSep " " $ lines result rawHtmlInline = do - result <- choice [htmlScript, anyHtmlInlineTag] + result <- htmlScript <|> anyHtmlInlineTag state <- getState if stateParseRaw state then return (HtmlInline result) else return (Str "") -betweenTags tag = try (do - htmlTag tag - result <- inlinesTilEnd tag - return (normalizeSpaces result)) +betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= + return . normalizeSpaces -emph = try (do - result <- choice [betweenTags "em", betweenTags "it"] - return (Emph result)) +emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph -superscript = try $ do - failIfStrict -- strict markdown has no superscript, so treat as raw HTML - result <- betweenTags "sup" - return (Superscript result) +strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong -subscript = try $ do - failIfStrict -- strict markdown has no subscript, so treat as raw HTML - result <- betweenTags "sub" - return (Subscript result) +superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript -strikeout = try $ do - failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - result <- choice [betweenTags "s", betweenTags "strike"] - return (Strikeout result) +subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript + +strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= + return . Strikeout spanStrikeout = try $ do failIfStrict -- strict markdown has no strikeout, so treat as raw HTML @@ -474,25 +443,14 @@ spanStrikeout = try $ do result <- case (extractAttribute "class" attributes) of Just "strikeout" -> inlinesTilEnd "span" _ -> fail "not a strikeout" - return (Strikeout result) + return $ Strikeout result -strong = try (do - result <- choice [betweenTags "b", betweenTags "strong"] - return (Strong result)) - -whitespace = do - many1 space - return Space +whitespace = many1 space >> return Space -- hard line break -linebreak = do - htmlTag "br" - option ' ' newline - return LineBreak +linebreak = htmlTag "br" >> optional newline >> return LineBreak -str = do - result <- many1 (noneOf "<& \t\n") - return (Str result) +str = many1 (noneOf "<& \t\n") >>= return . Str -- -- links and images @@ -501,27 +459,27 @@ str = do -- extract contents of attribute (attribute names are case-insensitive) extractAttribute name [] = Nothing extractAttribute name ((attrName, contents):rest) = - let name' = map toLower name - attrName' = map toLower attrName in - if (attrName' == name') - then Just (decodeEntities contents) - else extractAttribute name rest + let name' = map toLower name + attrName' = map toLower attrName + in if attrName' == name' + then Just (decodeCharacterReferences contents) + else extractAttribute name rest link = try $ do (tag, attributes) <- htmlTag "a" url <- case (extractAttribute "href" attributes) of - Just url -> do {return url} + Just url -> return url Nothing -> fail "no href" - let title = fromMaybe "" (extractAttribute "title" attributes) + let title = fromMaybe "" $ extractAttribute "title" attributes label <- inlinesTilEnd "a" return $ Link (normalizeSpaces label) (url, title) image = try $ do (tag, attributes) <- htmlTag "img" url <- case (extractAttribute "src" attributes) of - Just url -> do {return url} + Just url -> return url Nothing -> fail "no src" - let title = fromMaybe "" (extractAttribute "title" attributes) + let title = fromMaybe "" $ extractAttribute "title" attributes let alt = fromMaybe "" (extractAttribute "alt" attributes) return $ Image [Str alt] (url, title) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 73a3e4a8f..4b91b528c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX ( ) where import Text.ParserCombinators.Parsec -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition import Text.Pandoc.Shared import Data.Maybe ( fromMaybe ) @@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser -> Pandoc readLaTeX = readWith parseLaTeX --- for testing -testString = testStringWith parseLaTeX - -- characters with special meaning specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" @@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" -- -- | Returns text between brackets and its matching pair. -bracketedText openB closeB = try (do +bracketedText openB closeB = do result <- charsInBalanced' openB closeB - return ([openB] ++ result ++ [closeB])) + return $ [openB] ++ result ++ [closeB] -- | Returns an option or argument of a LaTeX command. -optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ] +optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' -- | True if the string begins with '{'. isArg ('{':rest) = True @@ -73,62 +69,55 @@ isArg other = False commandArgs = many optOrArg -- | Parses LaTeX command, returns (name, star, list of options or arguments). -command = try (do +command = try $ do char '\\' name <- many1 alphaNum star <- option "" (string "*") -- some commands have starred versions args <- commandArgs - return (name, star, args)) + return (name, star, args) -begin name = try (do - string "\\begin{" - string name - char '}' - option [] commandArgs +begin name = try $ do + string $ "\\begin{" ++ name ++ "}" + optional commandArgs spaces - return name) + return name -end name = try (do - string "\\end{" - string name - char '}' +end name = try $ do + string $ "\\end{" ++ name ++ "}" spaces - return name) + return name -- | Returns a list of block elements containing the contents of an -- environment. -environment name = try (do - begin name - spaces - contents <- manyTill block (end name) - return contents) +environment name = try $ begin name >> spaces >> manyTill block (end name) -anyEnvironment = try (do +anyEnvironment = try $ do string "\\begin{" name <- many alphaNum star <- option "" (string "*") -- some environments have starred variants char '}' - option [] commandArgs + optional commandArgs spaces contents <- manyTill block (end (name ++ star)) - return (BlockQuote contents)) + return $ BlockQuote contents -- -- parsing documents -- -- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = try (do - manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) - spaces) +processLaTeXPreamble = try $ manyTill + (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) >> + spaces -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do - option () processLaTeXPreamble -- preamble might not be present (fragment) + optional processLaTeXPreamble -- preamble might not be present (fragment) + spaces blocks <- parseBlocks spaces - option "" (try (string "\\end{document}")) -- might not be present (in fragment) + optional $ try (string "\\end{document}") -- might not be present (fragment) spaces eof state <- getState @@ -136,21 +125,27 @@ parseLaTeX = do let title' = stateTitle state let authors' = stateAuthors state let date' = stateDate state - return (Pandoc (Meta title' authors' date') blocks') + return $ Pandoc (Meta title' authors' date') blocks' -- -- parsing blocks -- -parseBlocks = do - spaces - result <- many block - return result - -block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, - comment, bibliographic, para, specialEnvironment, - itemBlock, unknownEnvironment, unknownCommand ] - "block" +parseBlocks = spaces >> many block + +block = choice [ hrule + , codeBlock + , header + , list + , blockQuote + , mathBlock + , comment + , bibliographic + , para + , specialEnvironment + , itemBlock + , unknownEnvironment + , unknownCommand ] "block" -- -- header blocks @@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, header = choice (map headerLevel (enumFromTo 1 5)) "header" -headerLevel n = try (do +headerLevel n = try $ do let subs = concat $ replicate (n - 1) "sub" string ("\\" ++ subs ++ "section") - option ' ' (char '*') + optional (char '*') char '{' title <- manyTill inline (char '}') spaces - return (Header n (normalizeSpaces title))) + return $ Header n (normalizeSpaces title) -- -- hrule block -- -hrule = try (do - oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", - "\\newpage" ] - spaces - return HorizontalRule) +hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] >> spaces >> return HorizontalRule -- -- code blocks @@ -183,37 +175,28 @@ hrule = try (do codeBlock = codeBlock1 <|> codeBlock2 -codeBlock1 = try (do +codeBlock1 = try $ do string "\\begin{verbatim}" -- don't use begin function because it -- gobbles whitespace - option "" blanklines -- we want to gobble blank lines, but not + optional blanklines -- we want to gobble blank lines, but not -- leading space contents <- manyTill anyChar (try (string "\\end{verbatim}")) spaces - return (CodeBlock (stripTrailingNewlines contents))) + return $ CodeBlock (stripTrailingNewlines contents) -codeBlock2 = try (do - string "\\begin{Verbatim}" -- used by fancyverb package +codeBlock2 = try $ do + string "\\begin{Verbatim}" -- used by fancyvrb package option "" blanklines contents <- manyTill anyChar (try (string "\\end{Verbatim}")) spaces - return (CodeBlock (stripTrailingNewlines contents))) + return $ CodeBlock (stripTrailingNewlines contents) -- -- block quotes -- -blockQuote = choice [ blockQuote1, blockQuote2 ] "blockquote" - -blockQuote1 = try (do - blocks <- environment "quote" - spaces - return (BlockQuote blocks)) - -blockQuote2 = try (do - blocks <- environment "quotation" - spaces - return (BlockQuote blocks)) +blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= + return . BlockQuote -- -- math block @@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|> mathBlockWith (begin "displaymath") (end "displaymath") <|> mathBlockWith (string "\\[") (string "\\]") "math block" -mathBlockWith start end = try (do +mathBlockWith start end = try $ do start spaces result <- manyTill anyChar end spaces - return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]])) + return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]] -- -- list blocks @@ -237,69 +220,66 @@ mathBlockWith start end = try (do list = bulletList <|> orderedList <|> definitionList "list" listItem = try $ do - ("item", _, args) <- command - spaces - state <- getState - let oldParserContext = stateParserContext state - updateState (\state -> state {stateParserContext = ListItemState}) - blocks <- many block - updateState (\state -> state {stateParserContext = oldParserContext}) - opt <- case args of - ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> - parseFromString (many inline) $ tail $ init x - _ -> return [] - return (opt, blocks) + ("item", _, args) <- command + spaces + state <- getState + let oldParserContext = stateParserContext state + updateState (\state -> state {stateParserContext = ListItemState}) + blocks <- many block + updateState (\state -> state {stateParserContext = oldParserContext}) + opt <- case args of + ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> + parseFromString (many inline) $ tail $ init x + _ -> return [] + return (opt, blocks) orderedList = try $ do - string "\\begin{enumerate}" - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ do failIfStrict - char '[' - res <- anyOrderedListMarker - char ']' - return res - spaces - option "" $ try $ do string "\\setlength{\\itemindent}" - char '{' - manyTill anyChar (char '}') - spaces - start <- option 1 $ try $ do failIfStrict - string "\\setcounter{enum" - many1 (char 'i') - string "}{" - num <- many1 digit - char '}' - spaces - return $ (read num) + 1 - items <- many listItem - end "enumerate" - spaces - return $ OrderedList (start, style, delim) $ map snd items + string "\\begin{enumerate}" + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ + try $ do failIfStrict + char '[' + res <- anyOrderedListMarker + char ']' + return res + spaces + option "" $ try $ do string "\\setlength{\\itemindent}" + char '{' + manyTill anyChar (char '}') + spaces + start <- option 1 $ try $ do failIfStrict + string "\\setcounter{enum" + many1 (char 'i') + string "}{" + num <- many1 digit + char '}' + spaces + return $ (read num) + 1 + items <- many listItem + end "enumerate" + spaces + return $ OrderedList (start, style, delim) $ map snd items bulletList = try $ do - begin "itemize" - spaces - items <- many listItem - end "itemize" - spaces - return (BulletList $ map snd items) + begin "itemize" + spaces + items <- many listItem + end "itemize" + spaces + return (BulletList $ map snd items) definitionList = try $ do - begin "description" - spaces - items <- many listItem - end "description" - spaces - return (DefinitionList items) + begin "description" + spaces + items <- many listItem + end "description" + spaces + return (DefinitionList items) -- -- paragraph block -- -para = try (do - result <- many1 inline - spaces - return (Para (normalizeSpaces result))) +para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces -- -- title authors date @@ -307,33 +287,30 @@ para = try (do bibliographic = choice [ maketitle, title, authors, date ] -maketitle = try (do - string "\\maketitle" - spaces - return Null) +maketitle = try (string "\\maketitle") >> spaces >> return Null -title = try (do +title = try $ do string "\\title{" tit <- manyTill inline (char '}') spaces updateState (\state -> state { stateTitle = tit }) - return Null) + return Null -authors = try (do +authors = try $ do string "\\author{" authors <- manyTill anyChar (char '}') spaces let authors' = map removeLeadingTrailingSpace $ lines $ substitute "\\\\" "\n" authors updateState (\state -> state { stateAuthors = authors' }) - return Null) + return Null -date = try (do +date = try $ do string "\\date{" date' <- manyTill anyChar (char '}') spaces updateState (\state -> state { stateDate = date' }) - return Null) + return Null -- -- item block @@ -341,14 +318,14 @@ date = try (do -- -- this forces items to be parsed in different blocks -itemBlock = try (do +itemBlock = try $ do ("item", _, args) <- command state <- getState if (stateParserContext state == ListItemState) then fail "item should be handled by list block" else if null args then return Null - else return (Plain [Str (stripFirstAndLast (head args))])) + else return $ Plain [Str (stripFirstAndLast (head args))] -- -- raw LaTeX @@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw -- | Parse any LaTeX environment and return a Para block containing -- the whole literal environment as raw TeX. rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = try (do - string "\\begin" - char '{' +rawLaTeXEnvironment = try $ do + string "\\begin{" name <- many1 alphaNum star <- option "" (string "*") -- for starred variants let name' = name ++ star char '}' args <- option [] commandArgs let argStr = concat args - contents <- manyTill (choice [(many1 (noneOf "\\")), + contents <- manyTill (choice [ (many1 (noneOf "\\")), (do (Para [TeX str]) <- rawLaTeXEnvironment return str), string "\\" ]) (end name') spaces - return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++ - (concat contents) ++ "\\end{" ++ name' ++ "}")])) + return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ + concat contents ++ "\\end{" ++ name' ++ "}"] -unknownEnvironment = try (do +unknownEnvironment = try $ do state <- getState result <- if stateParseRaw state -- check whether we should include raw TeX then rawLaTeXEnvironment -- if so, get whole raw environment else anyEnvironment -- otherwise just the contents - return result) + return result -unknownCommand = try (do - notFollowedBy' $ choice $ map end - ["itemize", "enumerate", "description", "document"] +unknownCommand = try $ do + notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", + "document"] (name, star, args) <- command spaces let argStr = concat args state <- getState - if (name == "item") && ((stateParserContext state) == ListItemState) + if name == "item" && (stateParserContext state) == ListItemState then fail "should not be parsed as raw" else string "" if stateParseRaw state - then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) - else return (Plain [Str (joinWithSep " " args)])) + then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] + else return $ Plain [Str (joinWithSep " " args)] -- latex comment -comment = try (do - char '%' - result <- manyTill anyChar newline - spaces - return Null) +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null -- -- inline -- -inline = choice [ strong, emph, strikeout, superscript, subscript, - ref, lab, code, linebreak, spacer, - math, ellipses, emDash, enDash, hyphen, quoted, apostrophe, - accentedChar, specialChar, specialInline, escapedChar, - unescapedChar, str, endline, whitespace ] "inline" - -specialInline = choice [ url, link, image, footnote, rawLaTeXInline ] - "link, raw TeX, note, or image" +inline = choice [ strong + , emph + , strikeout + , superscript + , subscript + , ref + , lab + , code + , linebreak + , spacer + , math + , ellipses + , emDash + , enDash + , hyphen + , quoted + , apostrophe + , accentedChar + , specialChar + , url + , link + , image + , footnote + , rawLaTeXInline + , escapedChar + , unescapedChar + , str + , endline + , whitespace ] "inline" accentedChar = normalAccentedChar <|> specialAccentedChar -normalAccentedChar = try (do +normalAccentedChar = try $ do char '\\' accent <- oneOf "'`^\"~" - character <- choice [ between (char '{') (char '}') anyChar, anyChar ] + character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum let table = fromMaybe [] $ lookup character accentTable let result = case lookup accent table of Just num -> chr num Nothing -> '?' - return (Str [result])) + return $ Str [result] -- an association list of letters and association list of accents -- and decimal character numbers. @@ -451,245 +444,179 @@ accentTable = ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, - oslash, pound, euro, copyright, sect ] + oslash, pound, euro, copyright, sect ] -ccedil = try (do +ccedil = try $ do char '\\' letter <- oneOfStrings ["cc", "cC"] let num = if letter == "cc" then 231 else 199 - return (Str [chr num])) + return $ Str [chr num] -aring = try (do +aring = try $ do char '\\' letter <- oneOfStrings ["aa", "AA"] let num = if letter == "aa" then 229 else 197 - return (Str [chr num])) + return $ Str [chr num] -iuml = try (do - string "\\\"" - oneOfStrings ["\\i", "{\\i}"] - return (Str [chr 239])) +iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 239]) -icirc = try (do - string "\\^" - oneOfStrings ["\\i", "{\\i}"] - return (Str [chr 238])) +icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 238]) -szlig = try (do - string "\\ss" - return (Str [chr 223])) +szlig = try (string "\\ss") >> return (Str [chr 223]) -oslash = try (do +oslash = try $ do char '\\' letter <- choice [char 'o', char 'O'] let num = if letter == 'o' then 248 else 216 - return (Str [chr num])) + return $ Str [chr num] -aelig = try (do +aelig = try $ do char '\\' letter <- oneOfStrings ["ae", "AE"] let num = if letter == "ae" then 230 else 198 - return (Str [chr num])) + return $ Str [chr num] -pound = try (do - string "\\pounds" - return (Str [chr 163])) +pound = try (string "\\pounds") >> return (Str [chr 163]) -euro = try (do - string "\\euro" - return (Str [chr 8364])) +euro = try (string "\\euro") >> return (Str [chr 8364]) -copyright = try (do - string "\\copyright" - return (Str [chr 169])) +copyright = try (string "\\copyright") >> return (Str [chr 169]) -sect = try (do - string "\\S" - return (Str [chr 167])) +sect = try (string "\\S") >> return (Str [chr 167]) escapedChar = do result <- escaped (oneOf " $%&_#{}\n") - return (if result == Str "\n" then Str " " else result) + return $ if result == Str "\n" then Str " " else result -unescapedChar = do -- ignore standalone, nonescaped special characters - oneOf "$^&_#{}|<>" - return (Str "") +-- ignore standalone, nonescaped special characters +unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "") specialChar = choice [ backslash, tilde, caret, bar, lt, gt ] -backslash = try (do - string "\\textbackslash" - return (Str "\\")) +backslash = try (string "\\textbackslash") >> return (Str "\\") -tilde = try (do - string "\\ensuremath{\\sim}" - return (Str "~")) +tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") -caret = try (do - string "\\^{}" - return (Str "^")) +caret = try (string "\\^{}") >> return (Str "^") -bar = try (do - string "\\textbar" - return (Str "\\")) +bar = try (string "\\textbar") >> return (Str "\\") -lt = try (do - string "\\textless" - return (Str "<")) +lt = try (string "\\textless") >> return (Str "<") -gt = try (do - string "\\textgreater" - return (Str ">")) +gt = try (string "\\textgreater") >> return (Str ">") -code = try (do +code = try $ do string "\\verb" marker <- anyChar result <- manyTill anyChar (char marker) - let result' = removeLeadingTrailingSpace result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace result -emph = try (do - oneOfStrings [ "\\emph{", "\\textit{" ] - result <- manyTill inline (char '}') - return (Emph result)) +emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> + manyTill inline (char '}') >>= return . Emph -strikeout = try $ do - string "\\sout{" - result <- manyTill inline (char '}') - return (Strikeout result) +strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= + return . Strikeout -superscript = try $ do - string "\\textsuperscript{" - result <- manyTill inline (char '}') - return (Superscript result) +superscript = try $ string "\\textsuperscript{" >> + manyTill inline (char '}') >>= return . Superscript -- note: \textsubscript isn't a standard latex command, but we use -- a defined version in pandoc. -subscript = try $ do - string "\\textsubscript{" - result <- manyTill inline (char '}') - return (Subscript result) +subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= + return . Subscript -apostrophe = do - char '\'' - return Apostrophe +apostrophe = char '\'' >> return Apostrophe -quoted = do - doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted -singleQuoted = try (do - result <- enclosed singleQuoteStart singleQuoteEnd inline - return $ Quoted SingleQuote $ normalizeSpaces result) +singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= + return . Quoted SingleQuote . normalizeSpaces -doubleQuoted = try (do - result <- enclosed doubleQuoteStart doubleQuoteEnd inline - return $ Quoted DoubleQuote $ normalizeSpaces result) +doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= + return . Quoted DoubleQuote . normalizeSpaces singleQuoteStart = char '`' -singleQuoteEnd = char '\'' >> notFollowedBy alphaNum +singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum doubleQuoteStart = string "``" doubleQuoteEnd = string "''" -ellipses = try (do - string "\\ldots" - option "" (try (string "{}")) - return Ellipses) +ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >> + return Ellipses -enDash = try (do - string "--" - notFollowedBy (char '-') - return EnDash) +enDash = try (string "--") >> return EnDash -emDash = try (do - string "---" - return EmDash) +emDash = try (string "---") >> return EmDash -hyphen = do - char '-' - return (Str "-") +hyphen = char '-' >> return (Str "-") -lab = try (do +lab = try $ do string "\\label{" result <- manyTill anyChar (char '}') - return (Str ("(" ++ result ++ ")"))) + return $ Str $ "(" ++ result ++ ")" -ref = try (do - string "\\ref{" - result <- manyTill anyChar (char '}') - return (Str (result))) +ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str -strong = try (do - string "\\textbf{" - result <- manyTill inline (char '}') - return (Strong result)) +strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= + return . Strong -whitespace = do - many1 (oneOf "~ \t") - return Space +whitespace = many1 (oneOf "~ \t") >> return Space -- hard line break -linebreak = try (do - string "\\\\" - return LineBreak) +linebreak = try (string "\\\\") >> return LineBreak -spacer = try $ do - string "\\," - return (Str "") +spacer = try (string "\\,") >> return (Str "") -str = do - result <- many1 (noneOf specialChars) - return (Str result) +str = many1 (noneOf specialChars) >>= return . Str -- endline internal to paragraph -endline = try (do - newline - notFollowedBy blankline - return Space) +endline = try $ newline >> notFollowedBy blankline >> return Space -- math math = math1 <|> math2 "math" -math1 = try (do +math1 = try $ do char '$' result <- many (noneOf "$") char '$' - return (TeX ("$" ++ result ++ "$"))) + return $ TeX ("$" ++ result ++ "$") -math2 = try (do +math2 = try $ do string "\\(" result <- many (noneOf "$") string "\\)" - return (TeX ("$" ++ result ++ "$"))) + return $ TeX ("$" ++ result ++ "$") -- -- links and images -- -url = try (do +url = try $ do string "\\url" url <- charsInBalanced '{' '}' - return (Link [Code url] (url, ""))) + return $ Link [Code url] (url, "") -link = try (do +link = try $ do string "\\href{" url <- manyTill anyChar (char '}') char '{' label <- manyTill inline (char '}') - return (Link (normalizeSpaces label) (url, ""))) + return $ Link (normalizeSpaces label) (url, "") -image = try (do +image = try $ do ("includegraphics", _, args) <- command let args' = filter isArg args -- filter out options let src = if null args' then ("", "") else (stripFirstAndLast (head args'), "") - return (Image [Str "image"] src)) + return $ Image [Str "image"] src -footnote = try (do +footnote = try $ do (name, _, (contents:[])) <- command if ((name == "footnote") || (name == "thanks")) then string "" @@ -700,16 +627,15 @@ footnote = try (do setInput $ contents' blocks <- parseBlocks setInput rest - return (Note blocks)) + return $ Note blocks -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try (do +rawLaTeXInline = try $ do (name, star, args) <- command - let argStr = concat args state <- getState if ((name == "begin") || (name == "end") || (name == "item")) then fail "not an inline command" else string "" - return (TeX ("\\" ++ name ++ star ++ argStr))) + return $ TeX ("\\" ++ name ++ star ++ concat args) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3ccb74ba7..80a8507b4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup ) +import Data.List ( transpose, isSuffixOf, lookup, sortBy ) +import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement ) -import Text.Pandoc.Entities ( characterEntity, decodeEntities ) +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -> String -> Pandoc readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") --- | Parse markdown string with default options and print result (for testing). -testString :: String -> IO () -testString = testStringWith parseMarkdown - -- -- Constants and data structure definitions -- @@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\"" -- auxiliary functions -- --- | Skip a single endline if there is one. -skipEndline = option Space endline - indentSpaces = try $ do state <- getState let tabStop = stateTabStop state try (count tabStop (char ' ')) <|> - (do{many (char ' '); string "\t"}) "indentation" + (many (char ' ') >> string "\t") "indentation" nonindentSpaces = do state <- getState let tabStop = stateTabStop state - choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)])) + choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)] -- | Fail unless we're at beginning of a line. failUnlessBeginningOfLine = do @@ -94,20 +87,21 @@ failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" +-- | Parse an inline Str element with a given content. +inlineString str = try $ do + (Str res) <- inline + if res == str then return res else fail $ "unexpected Str content" + -- | Parse a sequence of inline elements between a string -- @opener@ and a string @closer@, including inlines -- between balanced pairs of @opener@ and a @closer@. inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] inlinesInBalanced opener closer = try $ do - let openerSymbol = try $ do - res <- inline - if res == Str opener - then return res - else pzero - try (string opener) - result <- manyTill ( (do lookAhead openerSymbol - bal <- inlinesInBalanced opener closer - return $ [Str opener] ++ bal ++ [Str closer]) + string opener + result <- manyTill ( (do lookAhead (inlineString opener) + -- because it might be a link... + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) <|> (count 1 inline)) (try (string closer)) return $ concat result @@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do -- document structure -- -titleLine = try (do - char '%' - skipSpaces - line <- manyTill inline newline - return line) +titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline -authorsLine = try (do +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") newline - return (map (decodeEntities . removeLeadingTrailingSpace) authors)) + return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors -dateLine = try (do +dateLine = try $ do char '%' skipSpaces date <- many (noneOf "\n") newline - return (decodeEntities $ removeTrailingSpace date)) + return $ decodeCharacterReferences $ removeTrailingSpace date -titleBlock = try (do +titleBlock = try $ do failIfStrict title <- option [] titleLine author <- option [] authorsLine date <- option "" dateLine - option "" blanklines - return (title, author, date)) + optional blanklines + return (title, author, date) parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML + -- markdown allows raw HTML + updateState (\state -> state { stateParseRaw = True }) (title, author, date) <- option ([],[],"") titleBlock -- go through once just to get list of reference keys - refs <- manyTill (referenceKey <|> (do l <- lineClump - return (LineClump l))) eof + refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof let keys = map (\(KeyBlock label target) -> (label, target)) $ filter isKeyBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs setInput $ concat rawlines -- with keys stripped out updateState (\state -> state { stateKeys = keys }) - -- now go through for notes - refs <- manyTill (noteBlock <|> (do l <- lineClump - return (LineClump l))) eof + -- now go through for notes (which may contain references - hence 2nd pass) + refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ filter isNoteBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines -- with note blocks and keys stripped out + -- go through a 3rd time, with note blocks and keys stripped out + setInput $ concat rawlines updateState (\state -> state { stateNotes = notes }) - blocks <- parseBlocks -- go through again, for real + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - return (Pandoc (Meta title author date) blocks') + return $ Pandoc (Meta title author date) blocks' -- --- initial pass for references +-- initial pass for references and notes -- referenceKey = try $ do @@ -176,9 +166,9 @@ referenceKey = try $ do label <- reference char ':' skipSpaces - option ' ' (char '<') + optional (char '<') src <- many (noneOf "> \n\t") - option ' ' (char '>') + optional (char '>') tit <- option "" title blanklines return $ KeyBlock label (removeTrailingSpace src, tit) @@ -189,33 +179,28 @@ noteMarker = try $ do manyTill (noneOf " \t\n") (char ']') rawLine = try $ do - notFollowedBy' blankline + notFollowedBy blankline notFollowedBy' noteMarker contents <- many1 nonEndline - end <- option "" (do - newline - option "" indentSpaces - return "\n") - return (contents ++ end) + end <- option "" (newline >> optional indentSpaces >> return "\n") + return $ contents ++ end -rawLines = do - lines <- many1 rawLine - return (concat lines) +rawLines = many1 rawLine >>= return . concat noteBlock = try $ do failIfStrict ref <- noteMarker char ':' - option ' ' blankline - option "" indentSpaces - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines + optional blankline + optional indentSpaces + raw <- sepBy rawLines (try (blankline >> indentSpaces)) + optional blanklines -- parse the extracted text, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (NoteBlock ref contents) + return $ NoteBlock ref contents -- -- parsing blocks @@ -239,48 +224,39 @@ block = choice [ header -- header blocks -- -header = choice [ setextHeader, atxHeader ] "header" +header = setextHeader <|> atxHeader "header" -atxHeader = try (do +atxHeader = try $ do lead <- many1 (char '#') - notFollowedBy (char '.') -- this would be a list - notFollowedBy (char ')') + notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces txt <- manyTill inline atxClosing - return (Header (length lead) (normalizeSpaces txt))) + return $ Header (length lead) (normalizeSpaces txt) -atxClosing = try (do - skipMany (char '#') - skipSpaces - newline - option "" blanklines) +atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >> + option "" blanklines setextHeader = choice $ - map (\x -> setextH x) (enumFromTo 1 (length setextHChars)) + map (\x -> setextH x) $ enumFromTo 1 (length setextHChars) -setextH n = try (do +setextH n = try $ do txt <- many1Till inline newline many1 (char (setextHChars !! (n-1))) skipSpaces newline - option "" blanklines - return (Header n (normalizeSpaces txt))) + optional blanklines + return $ Header n (normalizeSpaces txt) -- -- hrule block -- -hruleWith chr = try (do - skipSpaces - char chr - skipSpaces - char chr - skipSpaces - char chr - skipMany (oneOf (chr:spaceChars)) +hruleWith chr = try $ do + count 3 (skipSpaces >> char chr) + skipMany (skipSpaces >> char chr) newline - option "" blanklines - return HorizontalRule) + optional blanklines + return HorizontalRule hrule = choice (map hruleWith hruleChars) "hrule" @@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) "hrule" -- code blocks -- -indentedLine = try (do +indentedLine = try $ do indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -- two or more indented lines, possibly separated by blank lines -indentedBlock = try (do +indentedBlock = try $ do res1 <- indentedLine blanks <- many blankline - res2 <- choice [indentedBlock, indentedLine] - return (res1 ++ blanks ++ res2)) + res2 <- indentedBlock <|> indentedLine + return $ res1 ++ blanks ++ res2 -codeBlock = do - result <- choice [indentedBlock, indentedLine] - option "" blanklines - return (CodeBlock (stripTrailingNewlines result)) +codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>= + return . CodeBlock . stripTrailingNewlines -- -- block quotes -- -emacsBoxQuote = try (do +emacsBoxQuote = try $ do failIfStrict string ",----" manyTill anyChar newline - raw <- manyTill (try (do - char '|' - option ' ' (char ' ') - result <- manyTill anyChar newline - return result)) - (string "`----") - manyTill anyChar newline - option "" blanklines - return raw) + raw <- manyTill + (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) + (try (string "`----")) + blanklines + return raw -emailBlockQuoteStart = try (do - nonindentSpaces - char '>' - option ' ' (char ' ') - return "> ") +emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote = try (do +emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (choice [nonEndline, - (try (do - endline - notFollowedBy' emailBlockQuoteStart - return '\n'))])) - (try (do {newline; emailBlockQuoteStart})) - newline <|> (do{ eof; return '\n' }) - option "" blanklines - return raw) + raw <- sepBy (many (nonEndline <|> + (try (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n')))) + (try (newline >> emailBlockQuoteStart)) + newline <|> (eof >> return '\n') + optional blanklines + return raw blockQuote = do - raw <- choice [ emailBlockQuote, emacsBoxQuote ] + raw <- emailBlockQuote <|> emacsBoxQuote -- parse the extracted block, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (BlockQuote contents) + return $ BlockQuote contents -- -- list blocks @@ -357,7 +321,7 @@ blockQuote = do list = choice [ bulletList, orderedList, definitionList ] "list" bulletListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces notFollowedBy' hrule -- because hrules start out just like lists oneOf bulletListMarkers @@ -365,7 +329,7 @@ bulletListStart = try $ do skipSpaces anyOrderedListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -375,7 +339,7 @@ anyOrderedListStart = try $ do else anyOrderedListMarker orderedListStart style delim = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -387,40 +351,39 @@ orderedListStart style delim = try $ do skipSpaces -- parse a line of a list item (start = parser for beginning of list item) -listLine start = try (do +listLine start = try $ do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (do - indentSpaces - many (spaceChar) - choice [bulletListStart, anyOrderedListStart >> return ()]) + notFollowedBy' (do indentSpaces + many (spaceChar) + bulletListStart <|> (anyOrderedListStart >> return ())) line <- manyTill anyChar newline - return (line ++ "\n")) + return $ line ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try (do +rawListItem start = try $ do start result <- many1 (listLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation start = try (do +listContinuation start = try $ do lookAhead indentSpaces result <- many1 (listContinuationLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -listContinuationLine start = try (do - notFollowedBy' blankline +listContinuationLine start = try $ do + notFollowedBy blankline notFollowedBy' start - option "" indentSpaces + optional indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -listItem start = try (do +listItem start = try $ do first <- rawListItem start continuations <- many (listContinuation start) -- parsing with ListItemState forces markers at beginning of lines to @@ -436,18 +399,15 @@ listItem start = try (do contents <- parseBlocks setInput rest updateState (\st -> st {stateParserContext = oldContext}) - return contents) + return contents -orderedList = try (do +orderedList = do (start, style, delim) <- lookAhead anyOrderedListStart items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return (OrderedList (start, style, delim) items')) + return $ OrderedList (start, style, delim) $ compactify items -bulletList = try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify -- definition lists @@ -470,9 +430,9 @@ defRawBlock = try $ do char ':' state <- getState let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"}) + try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") firstline <- anyLine - rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine}) + rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) trailing <- option "" blanklines return $ firstline ++ "\n" ++ unlines rawlines ++ trailing @@ -488,71 +448,62 @@ definitionList = do -- paragraph block -- -para = try (do +para = try $ do result <- many1 inline newline st <- getState if stateStrict st - then choice [lookAhead blockQuote, lookAhead header, - (do{blanklines; return Null})] - else choice [(do{lookAhead emacsBoxQuote; return Null}), - (do{blanklines; return Null})] - let result' = normalizeSpaces result - return (Para result')) - -plain = do - result <- many1 inline - let result' = normalizeSpaces result - return (Plain result') + then choice [ lookAhead blockQuote, lookAhead header, + (blanklines >> return Null) ] + else choice [ lookAhead emacsBoxQuote >> return Null, + (blanklines >> return Null) ] + return $ Para $ normalizeSpaces result + +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- raw html -- -htmlElement = choice [strictHtmlBlock, - htmlBlockElement] "html element" +htmlElement = strictHtmlBlock <|> htmlBlockElement "html element" htmlBlock = do st <- getState if stateStrict st - then do - failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return (RawHtml (first ++ finalSpace ++ finalNewlines)) + then try $ do failUnlessBeginningOfLine + first <- htmlElement + finalSpace <- many (oneOf spaceChars) + finalNewlines <- many newline + return $ RawHtml $ first ++ finalSpace ++ finalNewlines else rawHtmlBlocks -- True if tag is self-closing isSelfClosing tag = isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag -strictHtmlBlock = try (do +strictHtmlBlock = try $ do tag <- anyHtmlBlockTag let tag' = extractTagType tag if isSelfClosing tag || tag' == "hr" then return tag - else do - contents <- many (do{notFollowedBy' (htmlEndTag tag'); - htmlElement <|> (count 1 anyChar)}) - end <- htmlEndTag tag' - return $ tag ++ (concat contents) ++ end) + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + (htmlElement <|> (count 1 anyChar))) + end <- htmlEndTag tag' + return $ tag ++ concat contents ++ end -rawHtmlBlocks = try (do +rawHtmlBlocks = try $ do htmlBlocks <- many1 rawHtmlBlock let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if (last combined == '\n') + let combined' = if not (null combined) && last combined == '\n' then init combined -- strip extra newline else combined - return (RawHtml combined')) + return $ RawHtml combined' -- -- LaTeX -- -rawLaTeXEnvironment' = do - failIfStrict - rawLaTeXEnvironment +rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment -- -- Tables @@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) +dashedLine ch = try $ do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) -- Parse a table header with dashed lines of '-' preceded by -- one line of text. -simpleTableHeader = do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return $ (rawHeads, aligns, indices) +simpleTableHeader = try $ do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return (rawHeads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ do - nonindentSpaces - many1 (dashedLine '-') - blanklines +tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep = try $ do - nonindentSpaces - many1 (dashedLine '-') - string "\n" +tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" -- Parse a raw line and split it into chunks by indices. rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = try $ do - rawline <- rawTableLine indices - mapM (parseFromString (many plain)) rawline +tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow indices = try $ do - colLines <- many1 (rawTableLine indices) - option "" blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + colLines <- many1 (rawTableLine indices) + optional blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -615,22 +558,22 @@ widthsFromIndices :: Int -- Number of columns on terminal -> [Float] -- Fractional relative sizes of columns widthsFromIndices _ [] = [] widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result -- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. tableWith headerParser lineParser footerParser = try $ do @@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines multilineTable = tableWith multilineTableHeader multilineRow tableFooter multilineTableHeader = try $ do - tableSep - rawContent <- many1 (do{notFollowedBy' tableSep; - many1Till anyChar newline}) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (joinWithSep " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return $ ((map removeLeadingTrailingSpace rawHeads), - aligns, indices) - --- Returns the longest of a list of strings. -longest :: [String] -> String -longest [] = "" -longest [x] = x -longest (x:xs) = - if (length x) >= (maximum $ map length xs) - then x - else longest xs + tableSep + rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (joinWithSep " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -684,19 +616,17 @@ longest (x:xs) = alignType :: [String] -> Int -> Alignment alignType [] len = AlignDefault alignType strLst len = - let str = longest $ map removeTrailingSpace strLst - leftSpace = if null str then False else ((str !! 0) `elem` " \t") - rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in - case (leftSpace, rightSpace) of + let str = head $ sortBy (comparing length) $ + map removeTrailingSpace strLst + leftSpace = if null str then False else (str !! 0) `elem` " \t" + rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" + in case (leftSpace, rightSpace) of (True, False) -> AlignRight (False, True) -> AlignLeft - (True, True) -> AlignCenter + (True, True) -> AlignCenter (False, False) -> AlignDefault -table = do - failIfStrict - result <- simpleTable <|> multilineTable "table" - return result +table = failIfStrict >> (simpleTable <|> multilineTable) "table" -- -- inline @@ -704,7 +634,7 @@ table = do inline = choice [ rawLaTeXInline' , escapedChar - , entity + , charRef , note , inlineNote , link @@ -734,80 +664,64 @@ escapedChar = try $ do result <- if stateStrict state then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) - return (Str [result]) + return $ Str [result] -ltSign = try (do +ltSign = try $ do notFollowedBy (noneOf "<") -- continue only if it's a < notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' - return (Str ['<'])) + return $ Str ['<'] specialCharsMinusLt = filter (/= '<') specialChars symbol = do result <- oneOf specialCharsMinusLt - return (Str [result]) + return $ Str [result] -- parses inline code, between n `s and n `s -code = try (do +code = try $ do starts <- many1 (char '`') let num = length starts result <- many1Till anyChar (try (count num (char '`'))) -- get rid of any internal newlines - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result -mathWord = many1 (choice [ (noneOf " \t\n\\$"), - (try (do - c <- char '\\' - notFollowedBy (char '$') - return c))]) +mathWord = many1 ((noneOf " \t\n\\$") <|> + (try (char '\\') >>~ notFollowedBy (char '$'))) -math = try (do +math = try $ do failIfStrict char '$' notFollowedBy space words <- sepBy1 mathWord (many1 space) char '$' - return (TeX ("$" ++ (joinWithSep " " words) ++ "$"))) + return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") -emph = do - result <- choice [ (enclosed (char '*') (char '*') inline), - (enclosed (char '_') (char '_') inline) ] - return $ Emph (normalizeSpaces result) +emph = ((enclosed (char '*') (char '*') inline) <|> + (enclosed (char '_') (char '_') inline)) >>= + return . Emph . normalizeSpaces -strong = do - result <- (enclosed (string "**") (string "**") inline) <|> - (enclosed (string "__") (string "__") inline) - return $ Strong (normalizeSpaces result) +strong = ((enclosed (string "**") (string "**") inline) <|> + (enclosed (string "__") (string "__") inline)) >>= + return . Strong . normalizeSpaces -strikeout = do - failIfStrict - result <- enclosed (string "~~") (string "~~") inline - return $ Strikeout (normalizeSpaces result) +strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>= + return . Strikeout . normalizeSpaces -superscript = do - failIfStrict - result <- enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Superscript result +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript -subscript = do - failIfStrict - result <- enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Subscript result +subscript = failIfStrict >> enclosed (char '~') (char '~') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript -smartPunctuation = do - failUnlessSmart - choice [ quoted, apostrophe, dash, ellipses ] +smartPunctuation = failUnlessSmart >> + choice [ quoted, apostrophe, dash, ellipses ] -apostrophe = do - char '\'' <|> char '\8217' - return Apostrophe +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe -quoted = do - doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted withQuoteContext context parser = do oldState <- getState @@ -820,15 +734,13 @@ withQuoteContext context parser = do singleQuoted = try $ do singleQuoteStart - withQuoteContext InSingleQuote $ do - result <- many1Till inline singleQuoteEnd - return $ Quoted SingleQuote $ normalizeSpaces result + withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces doubleQuoted = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ do - result <- many1Till inline doubleQuoteEnd - return $ Quoted DoubleQuote $ normalizeSpaces result + withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= + return . Quoted DoubleQuote . normalizeSpaces failIfInQuoteContext context = do st <- getState @@ -836,88 +748,65 @@ failIfInQuoteContext context = do then fail "already inside quotes" else return () -singleQuoteStart = try $ do +singleQuoteStart = do failIfInQuoteContext InSingleQuote - char '\8216' <|> do - char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (do -- possessive or contraction - oneOfStrings ["s","t","m","ve","ll","re"] - satisfy (not . isAlphaNum))) - return '\'' - -singleQuoteEnd = try $ do - char '\'' <|> char '\8217' - notFollowedBy alphaNum - -doubleQuoteStart = try $ do - failIfInQuoteContext InDoubleQuote - char '"' <|> char '\8220' - notFollowedBy (oneOf " \t\n") + char '\8216' <|> + do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) -- possess/contraction + return '\'' + +singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum + +doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> + (char '"' <|> char '\8220') >> + notFollowedBy (oneOf " \t\n") doubleQuoteEnd = char '"' <|> char '\8221' -ellipses = try (do - oneOfStrings ["...", " . . . ", ". . .", " . . ."] - return Ellipses) +ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> + return Ellipses dash = enDash <|> emDash -enDash = try (do - char '-' - notFollowedBy (noneOf "0123456789") - return EnDash) +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash -emDash = try (do - skipSpaces - oneOfStrings ["---", "--"] - skipSpaces - return EmDash) +emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> + skipSpaces >> return EmDash -whitespace = do - many1 (oneOf spaceChars) "whitespace" - return Space +whitespace = (many1 (oneOf spaceChars) >> return Space) "whitespace" -tabchar = do - tab - return (Str "\t") +tabchar = tab >> return (Str "\t") -- hard line break -linebreak = try (do - oneOf spaceChars - many1 (oneOf spaceChars) - endline - return LineBreak ) +linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >> + endline >> return LineBreak nonEndline = satisfy (/='\n') -entity = do - ent <- characterEntity - return $ Str [ent] - strChar = noneOf (specialChars ++ spaceChars ++ "\n") -str = do - result <- many1 strChar - return (Str result) +str = many1 strChar >>= return . Str -- an endline character that can be treated as a space, not a structural break -endline = try (do +endline = try $ do newline notFollowedBy blankline st <- getState if stateStrict st then do - notFollowedBy' emailBlockQuoteStart + notFollowedBy emailBlockQuoteStart notFollowedBy (char '#') -- atx header - notFollowedBy (try (do{manyTill anyChar newline; - oneOf setextHChars})) -- setext header + notFollowedBy (manyTill anyChar newline >> oneOf setextHChars) + -- setext header else return () -- parse potential list-starts differently if in a list: - if (stateParserContext st) == ListItemState - then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()] + if stateParserContext st == ListItemState + then notFollowedBy' (bulletListStart <|> + (anyOrderedListStart >> return ())) else return () - return Space) + return Space -- -- links @@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference -- source for a link, with optional title source = try $ do char '(' - option ' ' (char '<') + optional (char '<') src <- many (noneOf ")> \t\n") - option ' ' (char '>') + optional (char '>') tit <- option "" title skipSpaces char ')' return (removeTrailingSpace src, tit) -titleWith startChar endChar = try (do +titleWith startChar endChar = try $ do leadingSpace <- many1 (oneOf " \t\n") if length (filter (=='\n') leadingSpace) > 1 then fail "title must be separated by space and on same or next line" else return () char startChar - tit <- manyTill anyChar (try (do char endChar - skipSpaces - notFollowedBy (noneOf ")\n"))) - return $ decodeEntities tit) + tit <- manyTill anyChar (try (char endChar >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit title = choice [ titleWith '(' ')', titleWith '"' '"', @@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')', link = choice [explicitLink, referenceLink] "link" -explicitLink = try (do +explicitLink = try $ do label <- reference src <- source - return (Link label src)) + return $ Link label src -- a link like [this][ref] or [this][] or [this] referenceLink = try $ do label <- reference - ref <- option [] (try (do skipSpaces - option ' ' newline - skipSpaces - reference)) + ref <- option [] (try (skipSpaces >> optional newline >> + skipSpaces >> reference)) let ref' = if null ref then label else ref state <- getState case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" + Nothing -> fail "no corresponding key" Just target -> return (Link label target) autoLink = autoLinkEmail <|> autoLinkRegular @@ -992,10 +878,10 @@ autoLinkRegular = try $ do let src = prot ++ rest return $ Link [Code src] (src, "") -image = try (do +image = try $ do char '!' (Link label src) <- link - return (Image label src)) + return $ Image label src note = try $ do failIfStrict @@ -1003,23 +889,21 @@ note = try $ do state <- getState let notes = stateNotes state case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return (Note contents) + Nothing -> fail "note not found" + Just contents -> return $ Note contents inlineNote = try $ do failIfStrict char '^' contents <- inlinesInBalanced "[" "]" - return (Note [Para contents]) + return $ Note [Para contents] -rawLaTeXInline' = do - failIfStrict - rawLaTeXInline +rawLaTeXInline' = failIfStrict >> rawLaTeXInline rawHtmlInline' = do st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else choice [htmlBlockElement, anyHtmlInlineTag] - return (HtmlInline result) + result <- choice $ if stateStrict st + then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + else [htmlBlockElement, anyHtmlInlineTag] + return $ HtmlInline result diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a36c33d92..ce8fedf02 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.ParserCombinators import Text.Pandoc.Shared -import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag ) -import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec -import Data.Maybe ( fromMaybe ) import Data.List ( findIndex, delete ) -import Data.Char ( toUpper ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -> String -> Pandoc readRST state str = (readWith parseRST) state (str ++ "\n\n") --- | Parse a string and print result (for testing). -testString :: String -> IO () -testString = testStringWith parseRST - -- -- Constants and data structure definitions --- @@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-" -- parsing documents -- -isAnonKey (ref, src) = (ref == [Str "_"]) - -isHeader1 :: Block -> Bool -isHeader1 (Header 1 _) = True -isHeader1 _ = False +isAnonKey (ref, src) = ref == [Str "_"] -isHeader2 :: Block -> Bool -isHeader2 (Header 2 _) = True -isHeader2 _ = False +isHeader :: Int -> Block -> Bool +isHeader n (Header x _) = x == n +isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) @@ -86,23 +73,23 @@ promoteHeaders num [] = [] titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle - if (any isHeader1 rest) || (any isHeader2 rest) + if (any (isHeader 1) rest) || (any (isHeader 2) rest) then ((Header 1 head1):(Header 2 head2):rest, []) else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any isHeader1 rest) + if (any (isHeader 1) rest) then ((Header 1 head1):rest, []) else ((promoteHeaders 1 rest), head1) titleTransform blocks = (blocks, []) parseRST = do - -- first pass: get anonymous keys - refs <- manyTill (referenceKey <|> (do l <- lineClump - return (LineClump l))) eof + -- first pass: get keys + refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof let keys = map (\(KeyBlock label target) -> (label, target)) $ filter isKeyBlock refs + -- second pass, with keys stripped out let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines -- with keys stripped out + setInput $ concat rawlines updateState (\state -> state { stateKeys = keys }) blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -113,7 +100,7 @@ parseRST = do let authors = stateAuthors state let date = stateDate state let title' = if (null title) then (stateTitle state) else title - return (Pandoc (Meta title' authors date) blocks'') + return $ Pandoc (Meta title' authors date) blocks'' -- -- parsing blocks @@ -121,32 +108,39 @@ parseRST = do parseBlocks = manyTill block eof -block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, - imageBlock, unknownDirective, header, - hrule, list, fieldList, lineBlock, para, plain, - nullBlock ] "block" +block = choice [ codeBlock + , rawHtmlBlock + , rawLaTeXBlock + , blockQuote + , imageBlock + , unknownDirective + , header + , hrule + , list + , fieldList + , lineBlock + , para + , plain + , nullBlock ] "block" -- -- field list -- -fieldListItem = try (do +fieldListItem = try $ do char ':' name <- many1 alphaNum string ": " skipSpaces first <- manyTill anyChar newline - rest <- many (do - notFollowedBy (char ':') - notFollowedBy blankline - skipSpaces - manyTill anyChar newline ) - return (name, (joinWithSep " " (first:rest)))) - -fieldList = try (do + rest <- many (notFollowedBy ((char ':') <|> blankline) >> + skipSpaces >> manyTill anyChar newline) + return $ (name, (joinWithSep " " (first:rest))) + +fieldList = try $ do items <- many1 fieldListItem blanklines - let authors = case (lookup "Authors" items) of + let authors = case lookup "Authors" items of Just auth -> [auth] Nothing -> map snd (filter (\(x,y) -> x == "Author") items) let date = case (lookup "Date" items) of @@ -162,82 +156,74 @@ fieldList = try (do updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title }) - return (BlockQuote result)) + return $ BlockQuote result -- -- line block -- -lineBlockLine = try (do +lineBlockLine = try $ do string "| " white <- many (oneOf " \t") line <- manyTill inline newline - let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak] - return line') + return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] -lineBlock = try (do +lineBlock = try $ do lines <- many1 lineBlockLine blanklines - return $ Para (concat lines)) + return $ Para (concat lines) -- -- paragraph block -- -para = choice [ paraBeforeCodeBlock, paraNormal ] "paragraph" +para = paraBeforeCodeBlock <|> paraNormal "paragraph" -codeBlockStart = try (do - string "::" - blankline - blankline) +codeBlockStart = try $ string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock = try (do - result <- many1 (do {notFollowedBy' codeBlockStart; inline}) +paraBeforeCodeBlock = try $ do + result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") - return (Para (if (last result == Space) - then normalizeSpaces result - else (normalizeSpaces result) ++ [Str ":"]))) + return $ Para $ if last result == Space + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal = try (do +paraNormal = try $ do result <- many1 inline newline blanklines - let result' = normalizeSpaces result - return (Para result')) + return $ Para $ normalizeSpaces result -plain = do - result <- many1 inline - let result' = normalizeSpaces result - return (Plain result') +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock = try (do +imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline - return (Plain [Image [Str "image"] (src, "")])) + return $ Plain [Image [Str "image"] (src, "")] -- -- header blocks -- -header = choice [ doubleHeader, singleHeader ] "header" +header = doubleHeader <|> singleHeader "header" -- a header with lines on top and bottom -doubleHeader = try (do +doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line let lenTop = length (c:rest) skipSpaces newline - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition + txt <- many1 (notFollowedBy blankline >> inline) + pos <- getPosition let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else (do {return ()}) + if (len > lenTop) then fail "title longer than border" else return () blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines @@ -249,10 +235,10 @@ doubleHeader = try (do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader = try (do +singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) pos <- getPosition @@ -268,19 +254,19 @@ singleHeader = try (do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + return $ Header level (normalizeSpaces txt) -- -- hrule block -- -hruleWith chr = try (do +hruleWith chr = try $ do count 4 (char chr) skipMany (char chr) skipSpaces newline blanklines - return HorizontalRule) + return HorizontalRule hrule = choice (map hruleWith underlineChars) "hrule" @@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) "hrule" -- -- read a line indented by a given string -indentedLine indents = try (do +indentedLine indents = try $ do string indents result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" --- two or more indented lines, possibly separated by blank lines --- if variable = True, then any indent will work, but it must be consistent through the block --- if variable = False, indent should be one tab or equivalent in spaces -indentedBlock variable = try (do +-- two or more indented lines, possibly separated by blank lines. +-- if variable = True, then any indent will work, but it must be +-- consistent through the block. +-- if variable = False, indent should be one tab or equivalent in spaces. +indentedBlock variable = try $ do state <- getState let tabStop = stateTabStop state indents <- if variable @@ -305,51 +292,47 @@ indentedBlock variable = try (do else oneOfStrings ["\t", (replicate tabStop ' ')] firstline <- manyTill anyChar newline rest <- many (choice [ indentedLine indents, - try (do - b <- blanklines - l <- indentedLine indents - return (b ++ l))]) - option "" blanklines - return (firstline ++ "\n" ++ (concat rest))) - -codeBlock = try (do + try (do b <- blanklines + l <- indentedLine indents + return (b ++ l))]) + optional blanklines + return $ firstline ++ "\n" ++ concat rest + +codeBlock = try $ do codeBlockStart result <- indentedBlock False -- the False means we want one tab stop indent on each line - return (CodeBlock (stripTrailingNewlines result))) + return $ CodeBlock $ stripTrailingNewlines result -- -- raw html -- -rawHtmlBlock = try (do - string ".. raw:: html" - blanklines - result <- indentedBlock True - return (RawHtml result)) +rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> + indentedBlock True >>= return . RawHtml -- -- raw latex -- -rawLaTeXBlock = try (do +rawLaTeXBlock = try $ do string ".. raw:: latex" blanklines result <- indentedBlock True - return (Para [(TeX result)])) + return $ Para [(TeX result)] -- -- block quotes -- -blockQuote = try (do +blockQuote = try $ do raw <- indentedBlock True -- parse the extracted block, which may contain various block elements: rest <- getInput setInput $ raw ++ "\n\n" contents <- parseBlocks setInput rest - return (BlockQuote contents)) + return $ BlockQuote contents -- -- list blocks @@ -369,15 +352,14 @@ definitionListItem = try $ do definitionList = try $ do items <- many1 definitionListItem - return (DefinitionList items) + return $ DefinitionList items -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = try (do +bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers white <- many1 spaceChar - let len = length (marker:white) - return len) + return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) orderedListStart style delim = try $ do @@ -386,11 +368,11 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine markerLength = try (do +listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength line <- manyTill anyChar newline - return (line ++ "\n")) + return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) indentWith num = do @@ -399,7 +381,7 @@ indentWith num = do if (num < tabStop) then count num (char ' ') else choice [ try (count num (char ' ')), - (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] + (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations rawListItem start = try $ do @@ -411,19 +393,16 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation markerLength = try (do +listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) - return (blanks ++ (concat result))) + return $ blanks ++ concat result -listItem start = try (do +listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) - blanks <- choice [ try (do - b <- many blankline - lookAhead start - return b), - many1 blankline ] -- whole list must end with blank + blanks <- choice [ try (many blankline >>~ lookAhead start), + many1 blankline ] -- whole list must end with blank. -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -436,52 +415,44 @@ listItem start = try (do parsed <- parseBlocks setInput remaining updateState (\st -> st {stateParserContext = oldContext}) - return parsed) + return parsed orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListMarker items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items - return (OrderedList (start, style, delim) items') + return $ OrderedList (start, style, delim) items' -bulletList = try (do +bulletList = try $ do items <- many1 (listItem bulletListStart) let items' = compactify items - return (BulletList items')) + return $ BulletList items' -- -- unknown directive (e.g. comment) -- -unknownDirective = try (do +unknownDirective = try $ do string ".. " manyTill anyChar newline - many (do - string " " - char ':' - many1 (noneOf "\n:") - char ':' - many1 (noneOf "\n") - newline) - option "" blanklines - return Null) + many (string " :" >> many1 (noneOf "\n:") >> char ':' >> + many1 (noneOf "\n") >> newline) + optional blanklines + return Null -- -- reference key -- -referenceKey = do - result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] - option "" blanklines - return result +referenceKey = + choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~ + optional blanklines targetURI = try $ do skipSpaces - option ' ' newline - contents <- many1 (try (do many spaceChar - newline - many1 spaceChar - noneOf " \t\n") <|> noneOf "\n") + optional newline + contents <- many1 (try (many spaceChar >> newline >> + many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines return contents @@ -516,71 +487,73 @@ regularKey = try $ do -- inline -- -inline = choice [ superscript, subscript, - escapedChar, link, image, hyphens, strong, emph, code, - str, tabchar, whitespace, endline, symbol ] "inline" - -hyphens = try (do +inline = choice [ superscript + , subscript + , escapedChar + , link + , image + , hyphens + , strong + , emph + , code + , str + , tabchar + , whitespace + , endline + , symbol ] "inline" + +hyphens = try $ do result <- many1 (char '-') option Space endline -- don't want to treat endline after hyphen or dash as a space - return (Str result)) + return $ Str result escapedChar = escaped anyChar symbol = do result <- oneOf specialChars - return (Str [result]) + return $ Str [result] -- parses inline code, between codeStart and codeEnd -code = try (do +code = try $ do string "``" result <- manyTill anyChar (try (string "``")) - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result -emph = do - result <- enclosed (char '*') (char '*') inline - return (Emph (normalizeSpaces result)) +emph = enclosed (char '*') (char '*') inline >>= + return . Emph . normalizeSpaces -strong = do - result <- enclosed (string "**") (string "**") inline - return (Strong (normalizeSpaces result)) +strong = enclosed (string "**") (string "**") inline >>= + return . Strong . normalizeSpaces interpreted role = try $ do - option "" (try $ string "\\ ") + optional $ try $ string "\\ " result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar nextChar <- lookAhead anyChar try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") return [Str result] superscript = interpreted "sup" >>= (return . Superscript) + subscript = interpreted "sub" >>= (return . Subscript) -whitespace = do - many1 spaceChar "whitespace" - return Space +whitespace = many1 spaceChar >> return Space "whitespace" -tabchar = do - tab - return (Str "\t") +tabchar = tab >> return (Str "\t") -str = do - notFollowedBy' oneWordReference - result <- many1 (noneOf (specialChars ++ "\t\n ")) - return (Str result) +str = notFollowedBy' oneWordReference >> + many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str -- an endline character that can be treated as a space, not a structural break -endline = try (do +endline = try $ do newline notFollowedBy blankline -- parse potential list-starts at beginning of line differently in a list: st <- getState if ((stateParserContext st) == ListItemState) - then do notFollowedBy' anyOrderedListMarker - notFollowedBy' bulletListStart - else option () pzero - return Space) + then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart + else return () + return Space -- -- links @@ -628,10 +601,10 @@ referenceLink = try $ do uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", "news:", "telnet:" ] -uri = try (do +uri = try $ do scheme <- uriScheme identifier <- many1 (noneOf " \t\n") - return (scheme ++ identifier)) + return $ scheme ++ identifier autoURI = try $ do src <- uri @@ -639,20 +612,20 @@ autoURI = try $ do emailChar = alphaNum <|> oneOf "-+_." -emailAddress = try (do +emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar let addr = firstLetter:restAddr char '@' dom <- domain - return (addr ++ '@':dom)) + return $ addr ++ '@':dom domainChar = alphaNum <|> char '-' -domain = try (do +domain = try $ do first <- many1 domainChar dom <- many1 (try (do{ char '.'; many1 domainChar })) - return (joinWithSep "." (first:dom))) + return $ joinWithSep "." (first:dom) autoEmail = try $ do src <- emailAddress @@ -669,5 +642,5 @@ image = try $ do src <- case lookupKeySrc keyTable ref of Nothing -> fail "no corresponding key" Just target -> return target - return (Image (normalizeSpaces ref) src) + return $ Image (normalizeSpaces ref) src diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 587e3891a..31ce1c348 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -32,12 +32,11 @@ module Text.Pandoc.Shared ( splitBy, splitByIndices, substitute, - -- * Text processing joinWithSep, + -- * Text processing tabsToSpaces, backslashEscapes, escapeStringUsing, - endsWith, stripTrailingNewlines, removeLeadingTrailingSpace, removeLeadingSpace, @@ -46,8 +45,33 @@ module Text.Pandoc.Shared ( camelCaseToHyphenated, toRomanNumeral, -- * Parsing + (>>~), + anyLine, + many1Till, + notFollowedBy', + oneOfStrings, + spaceChar, + skipSpaces, + blankline, + blanklines, + enclosed, + stringAnyCase, + parseFromString, + lineClump, + charsInBalanced, + charsInBalanced', + romanNumeral, + withHorizDisplacement, + nullBlock, + failIfStrict, + escaped, + anyOrderedListMarker, + orderedListMarker, + charRef, readWith, testStringWith, + ParserState (..), + defaultParserState, Reference (..), isNoteBlock, isKeyBlock, @@ -55,14 +79,10 @@ module Text.Pandoc.Shared ( HeaderType (..), ParserContext (..), QuoteContext (..), - ParserState (..), NoteTable, - defaultParserState, - nullBlock, - failIfStrict, - escaped, - anyOrderedListMarker, - orderedListMarker, + KeyTable, + lookupKeySrc, + refsMatch, -- * Native format prettyprinting prettyPandoc, -- * Pandoc block and inline list processing @@ -74,214 +94,387 @@ module Text.Pandoc.Shared ( isHeaderBlock, -- * Writer options WriterOptions (..), - defaultWriterOptions, - -- * Reference key lookup functions - KeyTable, - lookupKeySrc, - refsMatch, + defaultWriterOptions ) where + import Text.Pandoc.Definition import Text.ParserCombinators.Parsec -import Text.Pandoc.ParserCombinators -import Text.Pandoc.Entities ( decodeEntities, escapeStringForXML ) +import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, chr, isLower, isUpper ) -import Data.List ( find, groupBy, isPrefixOf ) +import Data.List ( find, groupBy, isPrefixOf, isSuffixOf ) --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser - -> ParserState -- ^ initial state - -> String -- ^ input string - -> a -readWith parser state input = - case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> result +-- +-- List processing +-- --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => - GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = putStrLn $ show $ - readWith parser defaultParserState str +-- | Split list by groups of one or more sep. +splitBy :: (Eq a) => a -> [a] -> [[a]] +splitBy _ [] = [] +splitBy sep lst = + let (first, rest) = break (== sep) lst + rest' = dropWhile (== sep) rest + in first:(splitBy sep rest') -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) +-- | Split list into chunks divided at specified indices. +splitByIndices :: [Int] -> [a] -> [[a]] +splitByIndices [] lst = [lst] +splitByIndices (x:xs) lst = + let (first, rest) = splitAt x lst in + first:(splitByIndices (map (\y -> y - x) xs) rest) -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) +-- | Replace each occurrence of one sublist in a list with another. +substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] +substitute _ _ [] = [] +substitute [] _ lst = lst +substitute target replacement lst = + if target `isPrefixOf` lst + then replacement ++ (substitute target replacement $ drop (length target) lst) + else (head lst):(substitute target replacement $ tail lst) -data QuoteContext - = InSingleQuote -- ^ Used when we're parsing inside single quotes - | InDoubleQuote -- ^ Used when we're parsing inside double quotes - | NoQuote -- ^ Used when we're not parsing inside quotes - deriving (Eq, Show) +-- | Joins a list of lists, separated by another list. +joinWithSep :: [a] -- ^ List to use as separator + -> [[a]] -- ^ Lists to join + -> [a] +joinWithSep sep [] = [] +joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst -type KeyTable = [([Inline], Target)] +-- +-- Text processing +-- -type NoteTable = [(String, [Block])] +-- | Convert tabs to spaces (with adjustable tab stop). +tabsToSpaces :: Int -- ^ Tabstop + -> String -- ^ String to convert + -> String +tabsToSpaces tabstop str = + unlines $ map (tabsInLine tabstop tabstop) (lines str) --- | References from preliminary parsing -data Reference - = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title) - | NoteBlock String [Block] -- ^ Footnote reference and contents - | LineClump String -- ^ Raw clump of lines with blanks at end - deriving (Eq, Read, Show) +-- | Convert tabs to spaces in one line. +tabsInLine :: Int -- ^ Number of spaces to next tab stop + -> Int -- ^ Tabstop + -> String -- ^ Line to convert + -> String +tabsInLine num tabstop [] = "" +tabsInLine num tabstop (c:cs) = + let (replacement, nextnum) = if c == '\t' + then (replicate num ' ', tabstop) + else if num > 1 + then ([c], num - 1) + else ([c], tabstop) + in replacement ++ tabsInLine nextnum tabstop cs --- | Auxiliary functions used in preliminary parsing -isNoteBlock :: Reference -> Bool -isNoteBlock (NoteBlock _ _) = True -isNoteBlock _ = False +-- | Returns an association list of backslash escapes for the +-- designated characters. +backslashEscapes :: [Char] -- ^ list of special characters to escape + -> [(Char, String)] +backslashEscapes = map (\ch -> (ch, ['\\',ch])) -isKeyBlock :: Reference -> Bool -isKeyBlock (KeyBlock _ _) = True -isKeyBlock _ = False +-- | Escape a string of characters, using an association list of +-- characters and strings. +escapeStringUsing :: [(Char, String)] -> String -> String +escapeStringUsing escapeTable [] = "" +escapeStringUsing escapeTable (x:xs) = + case (lookup x escapeTable) of + Just str -> str ++ rest + Nothing -> x:rest + where rest = escapeStringUsing escapeTable xs -isLineClump :: Reference -> Bool -isLineClump (LineClump _) = True -isLineClump _ = False +-- | Strip trailing newlines from string. +stripTrailingNewlines :: String -> String +stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse -data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML - -- and LaTeX? - stateParserContext :: ParserContext, -- ^ What are we parsing? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateKeys :: KeyTable, -- ^ List of reference keys - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ If @True@, parse - -- bibliographic info - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [String], -- ^ Authors of document - stateDate :: String, -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax - stateSmart :: Bool, -- ^ Use smart typography - stateColumns :: Int, -- ^ Number of columns in - -- terminal (used for tables) - stateHeaderTable :: [HeaderType] -- ^ List of header types used, - -- in what order (rst only) - } - deriving Show +-- | Remove leading and trailing space (including newlines) from string. +removeLeadingTrailingSpace :: String -> String +removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace -defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateKeys = [], - stateNotes = [], - stateTabStop = 4, - stateStandalone = False, - stateTitle = [], - stateAuthors = [], - stateDate = [], - stateStrict = False, - stateSmart = False, - stateColumns = 80, - stateHeaderTable = [] } +-- | Remove leading space (including newlines) from string. +removeLeadingSpace :: String -> String +removeLeadingSpace = dropWhile (`elem` " \n\t") + +-- | Remove trailing space (including newlines) from string. +removeTrailingSpace :: String -> String +removeTrailingSpace = reverse . removeLeadingSpace . reverse + +-- | Strip leading and trailing characters from string +stripFirstAndLast :: String -> String +stripFirstAndLast str = + drop 1 $ take ((length str) - 1) str + +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +camelCaseToHyphenated :: String -> String +camelCaseToHyphenated [] = "" +camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = + a:'-':(toLower b):(camelCaseToHyphenated rest) +camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) + +-- | Convert number < 4000 to uppercase roman numeral. +toRomanNumeral :: Int -> String +toRomanNumeral x = + if x >= 4000 || x < 0 + then "?" + else case x of + x | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) + x | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) + x | x >= 500 -> "D" ++ toRomanNumeral (x - 500) + x | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) + x | x >= 100 -> "C" ++ toRomanNumeral (x - 100) + x | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) + x | x >= 50 -> "L" ++ toRomanNumeral (x - 50) + x | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) + x | x >= 10 -> "X" ++ toRomanNumeral (x - 10) + x | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) + x | x >= 5 -> "V" ++ toRomanNumeral (x - 5) + x | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) + x | x >= 1 -> "I" ++ toRomanNumeral (x - 1) + 0 -> "" + +-- +-- Parsing +-- + +-- | Like >>, but returns the operation on the left. +-- (Suggested by Tillmann Rendel on Haskell-cafe list.) +(>>~) :: (Monad m) => m a -> m b -> m a +a >>~ b = a >>= \x -> b >> return x + +-- | Parse any line of text +anyLine :: GenParser Char st [Char] +anyLine = try (manyTill anyChar newline) <|> many1 anyChar + -- second alternative is for a line ending with eof + +-- | Like @manyTill@, but reads at least one item. +many1Till :: GenParser tok st a + -> GenParser tok st end + -> GenParser tok st [a] +many1Till p end = do + first <- p + rest <- manyTill p end + return (first:rest) + +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. +notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' parser = try $ (do result <- try parser + unexpected (show result)) + <|> return () + +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings + +-- | Parses a space or tab. +spaceChar :: CharParser st Char +spaceChar = oneOf " \t" + +-- | Skips zero or more spaces or tabs. +skipSpaces :: GenParser Char st () +skipSpaces = skipMany spaceChar + +-- | Skips zero or more spaces or tabs, then reads a newline. +blankline :: GenParser Char st Char +blankline = try $ skipSpaces >> newline + +-- | Parses one or more blank lines and returns a string of newlines. +blanklines :: GenParser Char st [Char] +blanklines = many1 blankline + +-- | Parses material enclosed between start and end parsers. +enclosed :: GenParser Char st t -- ^ start parser + -> GenParser Char st end -- ^ end parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +enclosed start end parser = try $ + start >> notFollowedBy space >> many1Till parser (try end) + +-- | Parse string, case insensitive. +stringAnyCase :: [Char] -> CharParser st String +stringAnyCase [] = string "" +stringAnyCase (x:xs) = try $ do + firstChar <- choice [ char (toUpper x), char (toLower x) ] + rest <- stringAnyCase xs + return (firstChar:rest) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString parser str = try $ do + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + return result + +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = do + lines <- many1 (notFollowedBy blankline >> anyLine) + blanks <- blanklines <|> (eof >> return "\n") + return $ (unlines lines) ++ blanks + +-- | Parse a string of characters between an open character +-- and a close character, including text between balanced +-- pairs of open and close. For example, +-- @charsInBalanced '(' ')'@ will parse "(hello (there))" +-- and return "hello (there)". Stop if a blank line is +-- encountered. +charsInBalanced :: Char -> Char -> GenParser Char st String +charsInBalanced open close = try $ do + char open + raw <- manyTill ( (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> (do notFollowedBy' (blankline >> blanklines) + count 1 anyChar)) + (char close) + return $ concat raw + +-- | Like @charsInBalanced@, but allow blank lines in the content. +charsInBalanced' :: Char -> Char -> GenParser Char st String +charsInBalanced' open close = try $ do + char open + raw <- manyTill ( (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> count 1 anyChar) + (char close) + return $ concat raw + +-- | Parses a roman numeral (uppercase or lowercase), returns number. +romanNumeral :: Bool -- ^ Uppercase if true + -> GenParser Char st Int +romanNumeral upper = try $ do + let charAnyCase c = char (if upper then toUpper c else c) + let one = charAnyCase 'i' + let five = charAnyCase 'v' + let ten = charAnyCase 'x' + let fifty = charAnyCase 'l' + let hundred = charAnyCase 'c' + let fivehundred = charAnyCase 'd' + let thousand = charAnyCase 'm' + thousands <- many thousand >>= (return . (1000 *) . length) + ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 + fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 + hundreds <- many hundred >>= (return . (100 *) . length) + nineties <- option 0 $ try $ ten >> hundred >> return 90 + fifties <- many fifty >>= (return . (50 *) . length) + forties <- option 0 $ try $ ten >> fifty >> return 40 + tens <- many ten >>= (return . (10 *) . length) + nines <- option 0 $ try $ one >> ten >> return 9 + fives <- many five >>= (return . (5 *) . length) + fours <- option 0 $ try $ one >> five >> return 4 + ones <- many one >>= (return . length) + let total = thousands + ninehundreds + fivehundreds + fourhundreds + + hundreds + nineties + fifties + forties + tens + nines + + fives + fours + ones + if total == 0 + then fail "not a roman numeral" + else return total + +-- | Applies a parser, returns tuple of its results and its horizontal +-- displacement (the difference between the source column at the end +-- and the source column at the beginning). Vertical displacement +-- (source row) is ignored. +withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply + -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement parser = do + pos1 <- getPosition + result <- parser + pos2 <- getPosition + return (result, sourceColumn pos2 - sourceColumn pos1) -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). nullBlock :: GenParser Char st Block -nullBlock = do - anyChar - return Null +nullBlock = anyChar >> return Null -- | Fail if reader is in strict markdown syntax mode. failIfStrict :: GenParser Char ParserState () failIfStrict = do state <- getState - if stateStrict state then fail "Strict mode" else return () + if stateStrict state then fail "strict mode" else return () -- | Parses backslash, then applies character parser. escaped :: GenParser Char st Char -- ^ Parser for character to escape -> GenParser Char st Inline -escaped parser = try (do - char '\\' - result <- parser - return (Str [result])) +escaped parser = try $ do + char '\\' + result <- parser + return (Str [result]) -- | Parses an uppercase roman numeral and returns (UpperRoman, number). upperRoman :: GenParser Char st (ListNumberStyle, Int) upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) + num <- romanNumeral True + return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). lowerRoman :: GenParser Char st (ListNumberStyle, Int) lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) + num <- romanNumeral False + return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). decimal :: GenParser Char st (ListNumberStyle, Int) decimal = do - num <- many1 digit - return (Decimal, read num) + num <- many1 digit + return (Decimal, read num) -- | Parses a '#' returns (DefaultStyle, 1). defaultNum :: GenParser Char st (ListNumberStyle, Int) defaultNum = do - char '#' - return (DefaultStyle, 1) + char '#' + return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). lowerAlpha :: GenParser Char st (ListNumberStyle, Int) lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) + ch <- oneOf ['a'..'z'] + return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). upperAlpha :: GenParser Char st (ListNumberStyle, Int) upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) + ch <- oneOf ['A'..'Z'] + return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (do char 'i' - return (LowerRoman, 1)) <|> - (do char 'I' - return (UpperRoman, 1)) +romanOne = (char 'i' >> return (LowerRoman, 1)) <|> + (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. anyOrderedListMarker :: GenParser Char st ListAttributes -anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- - [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] +anyOrderedListMarker = choice $ + [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], + numParser <- [decimal, defaultNum, romanOne, + lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. inPeriod :: GenParser Char st (ListNumberStyle, Int) -> GenParser Char st ListAttributes inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) + (style, start) <- num + char '.' + let delim = if style == DefaultStyle + then DefaultDelim + else Period + return (start, style, delim) -- | Parses a list number (num) followed by a paren, returns list attributes. inOneParen :: GenParser Char st (ListNumberStyle, Int) -> GenParser Char st ListAttributes inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) + (style, start) <- num + char ')' + return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. inTwoParens :: GenParser Char st (ListNumberStyle, Int) -> GenParser Char st ListAttributes inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) + char '(' + (style, start) <- num + char ')' + return (start, style, TwoParens) -- | Parses an ordered list marker with a given style and delimiter, -- returns number. @@ -289,21 +482,158 @@ orderedListMarker :: ListNumberStyle -> ListNumberDelim -> GenParser Char st Int orderedListMarker style delim = do - let num = case style of - DefaultStyle -> decimal <|> defaultNum - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, style, delim) <- context num - return start + let num = case style of + DefaultStyle -> decimal <|> defaultNum + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + let context = case delim of + DefaultDelim -> inPeriod + Period -> inPeriod + OneParen -> inOneParen + TwoParens -> inTwoParens + (start, style, delim) <- context num + return start + +-- | Parses a character reference and returns a Str element. +charRef :: GenParser Char st Inline +charRef = do + c <- characterReference + return $ Str [c] + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ + readWith parser defaultParserState str + +-- | Parsing options. +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateKeys :: KeyTable, -- ^ List of reference keys + stateNotes :: NoteTable, -- ^ List of notes + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateStrict :: Bool, -- ^ Use strict markdown syntax? + stateSmart :: Bool, -- ^ Use smart typography? + stateColumns :: Int, -- ^ Number of columns in terminal + stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used + } + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateQuoteContext = NoQuote, + stateKeys = [], + stateNotes = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateStrict = False, + stateSmart = False, + stateColumns = 80, + stateHeaderTable = [] } + +-- | References from preliminary parsing. +data Reference + = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title) + | NoteBlock String [Block] -- ^ Footnote reference and contents + | LineClump String -- ^ Raw clump of lines with blanks at end + deriving (Eq, Read, Show) + +-- | Auxiliary functions used in preliminary parsing. +isNoteBlock :: Reference -> Bool +isNoteBlock (NoteBlock _ _) = True +isNoteBlock _ = False + +isKeyBlock :: Reference -> Bool +isKeyBlock (KeyBlock _ _) = True +isKeyBlock _ = False + +isLineClump :: Reference -> Bool +isLineClump (LineClump _) = True +isLineClump _ = False + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data QuoteContext + = InSingleQuote -- ^ Used when parsing inside single quotes + | InDoubleQuote -- ^ Used when parsing inside double quotes + | NoQuote -- ^ Used when not parsing inside quotes + deriving (Eq, Show) + +type NoteTable = [(String, [Block])] + +type KeyTable = [([Inline], Target)] + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> [Inline] -- ^ Key + -> Maybe Target +lookupKeySrc table key = case find (refsMatch key . fst) table of + Nothing -> Nothing + Just (_, src) -> Just src + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Superscript x):restx) ((Superscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Subscript x):restx) ((Subscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = + t == u && refsMatch x y && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x +-- +-- Native format prettyprinting +-- + -- | Indent string as a block. indentBy :: Int -- ^ Number of spaces to indent the block -> Int -- ^ Number of spaces (rel to block) to indent first line @@ -311,9 +641,10 @@ indentBy :: Int -- ^ Number of spaces to indent the block -> String indentBy num first [] = "" indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first in - (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines) + let (firstLine:restLines) = lines str + firstLineIndent = num + first + in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ + (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines) -- | Prettyprint list of Pandoc blocks elements. prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks @@ -321,142 +652,40 @@ prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks -> String prettyBlockList indent [] = indentBy indent 0 "[]" prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" + (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> String prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" + "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ + (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks) + blockLists)) ++ " ]" prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (joinWithSep ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" + indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ - indentBy 2 0 ("[" ++ (joinWithSep ",\n" - (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ - indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" + indentBy 2 0 ("[" ++ (joinWithSep ",\n" + (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ + indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" + "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ + show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ + (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" + where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) + cols))) ++ " ]" prettyBlock block = show block -- | Prettyprint Pandoc document. prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - --- | Convert tabs to spaces (with adjustable tab stop). -tabsToSpaces :: Int -- ^ Tabstop - -> String -- ^ String to convert - -> String -tabsToSpaces tabstop str = - unlines (map (tabsInLine tabstop tabstop) (lines str)) +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" --- | Convert tabs to spaces in one line. -tabsInLine :: Int -- ^ Number of spaces to next tab stop - -> Int -- ^ Tabstop - -> String -- ^ Line to convert - -> String -tabsInLine num tabstop "" = "" -tabsInLine num tabstop (c:cs) = - let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in - let nextnumraw = (num - (length replacement)) in - let nextnum = if (nextnumraw < 1) - then (nextnumraw + tabstop) - else nextnumraw in - replacement ++ (tabsInLine nextnum tabstop cs) - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing escapeTable "" = "" -escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs - --- | Returns @True@ if string ends with given character. -endsWith :: Char -> [Char] -> Bool -endsWith char [] = False -endsWith char str = (char == last str) - --- | Joins a list of lists, separated by another list. -joinWithSep :: [a] -- ^ List to use as separator - -> [[a]] -- ^ Lists to join - -> [a] -joinWithSep sep [] = [] -joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines "" = "" -stripTrailingNewlines str = - if (last str) == '\n' - then stripTrailingNewlines (init str) - else str - --- | Remove leading and trailing space (including newlines) from string. -removeLeadingTrailingSpace :: String -> String -removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace - --- | Remove leading space (including newlines) from string. -removeLeadingSpace :: String -> String -removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || - (x == '\t')) - --- | Remove trailing space (including newlines) from string. -removeTrailingSpace :: String -> String -removeTrailingSpace = reverse . removeLeadingSpace . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated "" = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst = - if isPrefixOf target lst - then replacement ++ (substitute target replacement $ drop (length target) lst) - else (head lst):(substitute target replacement $ tail lst) - --- | Split list into groups separated by sep. -splitBy :: (Eq a) => a -> [a] -> [[a]] -splitBy _ [] = [] -splitBy sep lst = - let (first, rest) = break (== sep) lst - rest' = dropWhile (== sep) rest in - first:(splitBy sep rest') - --- | Split list into chunks divided at specified indices. -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = - let (first, rest) = splitAt x lst in - first:(splitByIndices (map (\y -> y - x) xs) rest) +-- +-- Pandoc block and inline list processing +-- -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. @@ -466,8 +695,10 @@ orderedListMarkers (start, numstyle, numdelim) = seq = case numstyle of DefaultStyle -> map show [start..] Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] - LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z'] + UpperAlpha -> drop (start - 1) $ cycle $ + map singleton ['A'..'Z'] + LowerAlpha -> drop (start - 1) $ cycle $ + map singleton ['a'..'z'] UpperRoman -> map toRomanNumeral [start..] LowerRoman -> map (map toLower . toRomanNumeral) [start..] inDelim str = case numdelim of @@ -477,27 +708,6 @@ orderedListMarkers (start, numstyle, numdelim) = TwoParens -> "(" ++ str ++ ")" in map inDelim seq --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x = - if x >= 4000 || x < 0 - then "?" - else case x of - x | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) - x | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) - x | x >= 500 -> "D" ++ toRomanNumeral (x - 500) - x | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) - x | x >= 100 -> "C" ++ toRomanNumeral (x - 100) - x | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) - x | x >= 50 -> "L" ++ toRomanNumeral (x - 50) - x | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) - x | x >= 10 -> "X" ++ toRomanNumeral (x - 10) - x | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) - x | x >= 5 -> "V" ++ toRomanNumeral (x - 5) - x | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) - x | x >= 1 -> "I" ++ toRomanNumeral (x - 1) - 0 -> "" - -- | Normalize a list of inline elements: remove leading and trailing -- @Space@ elements, collapse double @Space@s into singles, and -- remove empty Str elements. @@ -507,16 +717,14 @@ normalizeSpaces list = let removeDoubles [] = [] removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) removeDoubles ((Str ""):rest) = removeDoubles rest - removeDoubles (x:rest) = x:(removeDoubles rest) in - let removeLeading [] = [] - removeLeading lst = if ((head lst) == Space) - then tail lst - else lst in - let removeTrailing [] = [] - removeTrailing lst = if ((last lst) == Space) - then init lst - else lst in - removeLeading $ removeTrailing $ removeDoubles list + removeDoubles (x:rest) = x:(removeDoubles rest) + removeLeading (Space:xs) = removeLeading xs + removeLeading x = x + removeTrailing [] = [] + removeTrailing lst = if (last lst == Space) + then init lst + else lst + in removeLeading $ removeTrailing $ removeDoubles list -- | Change final list item from @Para@ to @Plain@ if the list should -- be compact. @@ -524,122 +732,86 @@ compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -> [[Block]] compactify [] = [] compactify items = - let final = last items - others = init items in - case final of - [Para a] -> if any containsPara others - then items - else others ++ [[Plain a]] - otherwise -> items + let final = last items + others = init items + in case final of + [Para a] -> if any containsPara others + then items + else others ++ [[Plain a]] + otherwise -> items containsPara :: [Block] -> Bool containsPara [] = False containsPara ((Para a):rest) = True -containsPara ((BulletList items):rest) = (any containsPara items) || - (containsPara rest) -containsPara ((OrderedList _ items):rest) = (any containsPara items) || - (containsPara rest) +containsPara ((BulletList items):rest) = any containsPara items || + containsPara rest +containsPara ((OrderedList _ items):rest) = any containsPara items || + containsPara rest +containsPara ((DefinitionList items):rest) = any containsPara (map snd items) || + containsPara rest containsPara (x:rest) = containsPara rest -- | Data structure for defining hierarchical Pandoc documents data Element = Blk Block | Sec [Inline] [Element] deriving (Eq, Read, Show) --- | Returns true on Header block with level at least 'level' +-- | Returns @True@ on Header block with at least the specified level headerAtLeast :: Int -> Block -> Bool headerAtLeast level (Header x _) = x <= level headerAtLeast level _ = False --- | Convert list of Pandoc blocks into list of Elements (hierarchical) +-- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] hierarchicalize [] = [] hierarchicalize (block:rest) = case block of - (Header level title) -> let (thisSection, rest') = break (headerAtLeast - level) rest in - (Sec title (hierarchicalize thisSection)): - (hierarchicalize rest') - x -> (Blk x):(hierarchicalize rest) + (Header level title) -> + let (thisSection, rest') = break (headerAtLeast level) rest + in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + x -> (Blk x):(hierarchicalize rest) -- | True if block is a Header block. isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False +-- +-- Writer options +-- + -- | Options for writers data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerTitlePrefix :: String -- ^ Prefix for HTML titles - , writerHeader :: String -- ^ Header for the document - , writerIncludeBefore :: String -- ^ String to include before the body - , writerIncludeAfter :: String -- ^ String to include after the body - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerS5 :: Bool -- ^ We're writing S5 - , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML - , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerIncremental :: Bool -- ^ Incremental S5 lists - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerTabStop :: Int -- ^ Tabstop for conversion between - -- spaces and tabs - } deriving Show + { writerStandalone :: Bool -- ^ Include header and footer + , writerHeader :: String -- ^ Header for the document + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerS5 :: Bool -- ^ We're writing S5 + , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML + , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js + , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) + , writerIncremental :: Bool -- ^ Incremental S5 lists + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerIncludeBefore :: String -- ^ String to include before the body + , writerIncludeAfter :: String -- ^ String to include after the body + , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + } deriving Show -- | Default writer options. defaultWriterOptions = - WriterOptions { writerStandalone = False, - writerHeader = "", - writerTitlePrefix = "", - writerTabStop = 4, - writerTableOfContents = False, - writerS5 = False, - writerUseASCIIMathML = False, - writerASCIIMathMLURL = Nothing, - writerIgnoreNotes = False, - writerIncremental = False, - writerNumberSections = False, - writerIncludeBefore = "", - writerIncludeAfter = "", - writerStrictMarkdown = False, - writerReferenceLinks = False } - --- --- code to lookup reference keys in key table --- - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> [Inline] -- ^ Key - -> Maybe Target -lookupKeySrc table key = case table of - [] -> Nothing - (k, src):rest -> if (refsMatch k key) - then Just src - else lookupKeySrc rest key - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x + WriterOptions { writerStandalone = False, + writerHeader = "", + writerTitlePrefix = "", + writerTabStop = 4, + writerTableOfContents = False, + writerS5 = False, + writerUseASCIIMathML = False, + writerASCIIMathMLURL = Nothing, + writerIgnoreNotes = False, + writerIncremental = False, + writerNumberSections = False, + writerIncludeBefore = "", + writerIncludeAfter = "", + writerStrictMarkdown = False, + writerReferenceLinks = False } diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 1f93787b0..13912a9f3 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2007 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -27,9 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into ConTeXt. -} -module Text.Pandoc.Writers.ConTeXt ( - writeConTeXt - ) where +module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Printf ( printf ) @@ -40,8 +38,7 @@ type WriterState = Int -- number of next URL reference -- | Convert Pandoc to ConTeXt. writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = - evalState (pandocToConTeXt options document) 1 +writeConTeXt options document = evalState (pandocToConTeXt options document) 1 pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String pandocToConTeXt options (Pandoc meta blocks) = do @@ -111,8 +108,8 @@ stringToConTeXt = concatMap escapeCharForConTeXt -- | Convert Pandoc block element to ConTeXt. blockToConTeXt :: Block -> State WriterState String blockToConTeXt Null = return "" -blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= (return . (++ "\n")) -blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= (return . (++ "\n\n")) +blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n") +blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n") blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n" @@ -137,12 +134,12 @@ blockToConTeXt (OrderedList attribs lst) = case attribs of return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++ "\\stopitemize\n" blockToConTeXt (DefinitionList lst) = - mapM defListItemToConTeXt lst >>= (return . (++ "\n") . concat) + mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat blockToConTeXt HorizontalRule = return "\\thinrule\n\n" blockToConTeXt (Header level lst) = do contents <- inlineListToConTeXt lst - return $ if (level > 0) && (level <= 3) - then "\\" ++ (concat (replicate (level - 1) "sub")) ++ + return $ if level > 0 && level <= 3 + then "\\" ++ concat (replicate (level - 1) "sub") ++ "section{" ++ contents ++ "}\n\n" else contents ++ "\n\n" blockToConTeXt (Table caption aligns widths heads rows) = do @@ -186,12 +183,12 @@ defListItemToConTeXt (term, def) = do -- | Convert list of block elements to ConTeXt. blockListToConTeXt :: [Block] -> State WriterState String -blockListToConTeXt lst = mapM blockToConTeXt lst >>= (return . concat) +blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat -- | Convert list of inline elements to ConTeXt. inlineListToConTeXt :: [Inline] -- ^ Inlines to convert -> State WriterState String -inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= (return . concat) +inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index ecd27ee0c..e34b1959c 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -30,16 +30,35 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( escapeStringForXML ) -import Data.Char ( toLower, ord ) -import Data.List ( isPrefixOf, partition, drop ) +import Data.List ( isPrefixOf, drop ) import Text.PrettyPrint.HughesPJ hiding ( Str ) - -- -- code to format XML -- +-- | Escape one character as needed for XML. +escapeCharForXML :: Char -> String +escapeCharForXML x = case x of + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + '\160' -> " " + c -> [c] + +-- | True if the character needs to be escaped. +needsEscaping :: Char -> Bool +needsEscaping c = c `elem` "&<>\"\160" + +-- | Escape string as needed for XML. Entity references are not preserved. +escapeStringForXML :: String -> String +escapeStringForXML "" = "" +escapeStringForXML str = + case break needsEscaping str of + (okay, "") -> okay + (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs + -- | Return a text object with a string of formatted XML attributes. attributeList :: [(String, String)] -> Doc attributeList = text . concatMap @@ -52,10 +71,10 @@ inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc inTags isIndented tagType attribs contents = let openTag = char '<' <> text tagType <> attributeList attribs <> char '>' - closeTag = text " text tagType <> char '>' in - if isIndented - then openTag $$ nest 2 contents $$ closeTag - else openTag <> contents <> closeTag + closeTag = text " text tagType <> char '>' + in if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag -- | Return a self-closing tag of tagType with specified attributes selfClosingTag :: String -> [(String, String)] -> Doc @@ -79,42 +98,42 @@ authorToDocbook :: [Char] -> Doc authorToDocbook name = inTagsIndented "author" $ if ',' `elem` name then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest in + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (joinWithSep " " (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String writeDocbook opts (Pandoc (Meta title authors date) blocks) = - let head = if (writerStandalone opts) - then text (writerHeader opts) - else empty - meta = if (writerStandalone opts) - then inTagsIndented "articleinfo" $ - (inTagsSimple "title" (wrap opts title)) $$ - (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ escapeStringForXML date)) - else empty + let head = if writerStandalone opts + then text (writerHeader opts) + else empty + meta = if writerStandalone opts + then inTagsIndented "articleinfo" $ + (inTagsSimple "title" (wrap opts title)) $$ + (vcat (map authorToDocbook authors)) $$ + (inTagsSimple "date" (text $ escapeStringForXML date)) + else empty elements = hierarchicalize blocks - before = writerIncludeBefore opts - after = writerIncludeAfter opts - body = (if null before then empty else text before) $$ - vcat (map (elementToDocbook opts) elements) $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "article" (meta $$ body) - else body in - render $ head $$ body' $$ text "" + before = writerIncludeBefore opts + after = writerIncludeAfter opts + body = (if null before then empty else text before) $$ + vcat (map (elementToDocbook opts) elements) $$ + (if null after then empty else text after) + body' = if writerStandalone opts + then inTagsIndented "article" (meta $$ body) + else body + in render $ head $$ body' $$ text "" -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Element -> Doc @@ -123,10 +142,10 @@ elementToDocbook opts (Sec title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] - else elements in - inTagsIndented "section" $ - inTagsSimple "title" (wrap opts title) $$ - vcat (map (elementToDocbook opts) elements') + else elements + in inTagsIndented "section" $ + inTagsSimple "title" (wrap opts title) $$ + vcat (map (elementToDocbook opts) elements') -- | Convert a list of Pandoc blocks to Docbook. blocksToDocbook :: WriterOptions -> [Block] -> Doc @@ -145,30 +164,27 @@ deflistItemsToDocbook opts items = -- | Convert a term and a list of blocks into a Docbook varlistentry. deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc deflistItemToDocbook opts term def = - let def' = map plainToPara def in - inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') + let def' = map plainToPara def + in inTagsIndented "varlistentry" $ + inTagsIndented "term" (inlinesToDocbook opts term) $$ + inTagsIndented "listitem" (blocksToDocbook opts def') -- | Convert a list of lists of blocks to a list of Docbook list items. listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = - vcat $ map (listItemToDocbook opts) items +listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. listItemToDocbook :: WriterOptions -> [Block] -> Doc listItemToDocbook opts item = - let item' = map plainToPara item in - inTagsIndented "listitem" (blocksToDocbook opts item') + inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook opts Null = empty blockToDocbook opts (Plain lst) = wrap opts lst -blockToDocbook opts (Para lst) = - inTagsIndented "para" (wrap opts lst) +blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" (blocksToDocbook opts blocks) + inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook opts (CodeBlock str) = text "\n" <> text (escapeStringForXML str) <> text "\n" blockToDocbook opts (BulletList lst) = @@ -198,16 +214,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) = then empty else inTagsIndented "caption" (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" in - inTagsIndented tableType $ captionDoc $$ - (colHeadsToDocbook opts alignStrings widths headers) $$ - (vcat $ map (tableRowToDocbook opts alignStrings) rows) + tableType = if isEmpty captionDoc then "informaltable" else "table" + in inTagsIndented tableType $ captionDoc $$ + (colHeadsToDocbook opts alignStrings widths headers) $$ + (vcat $ map (tableRowToDocbook opts alignStrings) rows) colHeadsToDocbook opts alignStrings widths headers = - let heads = zipWith3 - (\align width item -> tableItemToDocbook opts "th" align width item) - alignStrings widths headers in - inTagsIndented "tr" $ vcat heads + let heads = zipWith3 (\align width item -> + tableItemToDocbook opts "th" align width item) + alignStrings widths headers + in inTagsIndented "tr" $ vcat heads alignmentToString alignment = case alignment of AlignLeft -> "left" @@ -215,20 +231,16 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook opts aligns cols = - inTagsIndented "tr" $ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols +tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ + vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols tableItemToDocbook opts tag align width item = let attrib = [("align", align)] ++ - if (width /= 0) - then [("style", "{width: " ++ - show (truncate (100*width)) ++ "%;}")] - else [] in - inTags True tag attrib $ vcat $ map (blockToDocbook opts) item - --- | Put string in CDATA section -cdata :: String -> Doc -cdata str = text $ "" + if width /= 0 + then [("style", "{width: " ++ + show (truncate (100*width)) ++ "%;}")] + else [] + in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item -- | Take list of inline elements and return wrapped doc. wrap :: WriterOptions -> [Inline] -> Doc @@ -236,25 +248,24 @@ wrap opts lst = fsep $ map (inlinesToDocbook opts) (splitBy Space lst) -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst) +inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc inlineToDocbook opts (Str str) = text $ escapeStringForXML str inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" (inlinesToDocbook opts lst) + inTagsSimple "emphasis" $ inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] - (inlinesToDocbook opts lst) + inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] - (inlinesToDocbook opts lst) + inTags False "emphasis" [("role", "strikethrough")] $ + inlinesToDocbook opts lst inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" (inlinesToDocbook opts lst) + inTagsSimple "superscript" $ inlinesToDocbook opts lst inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" (inlinesToDocbook opts lst) + inTagsSimple "subscript" $ inlinesToDocbook opts lst inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" (inlinesToDocbook opts lst) + inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts Apostrophe = char '\'' inlineToDocbook opts Ellipses = text "…" inlineToDocbook opts EmDash = text "—" @@ -263,26 +274,24 @@ inlineToDocbook opts (Code str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) inlineToDocbook opts (HtmlInline str) = empty -inlineToDocbook opts LineBreak = - text $ "" +inlineToDocbook opts LineBreak = text $ "" inlineToDocbook opts Space = char ' ' inlineToDocbook opts (Link txt (src, tit)) = if isPrefixOf "mailto:" src - then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text (escapeStringForXML $ src') - in if txt == [Code src'] - then emailLink - else inlinesToDocbook opts txt <+> char '(' <> emailLink <> - char ')' - else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt + then let src' = drop 7 src + emailLink = inTagsSimple "email" $ text $ + escapeStringForXML $ src' + in if txt == [Code src'] + then emailLink + else inlinesToDocbook opts txt <+> char '(' <> emailLink <> + char ')' + else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt inlineToDocbook opts (Image alt (src, tit)) = let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ - inTagsIndented "title" - (text $ escapeStringForXML tit) in - inTagsIndented "inlinemediaobject" $ - inTagsIndented "imageobject" $ - titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] + inTagsIndented "title" (text $ escapeStringForXML tit) + in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ + titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 34c59f334..ace5cfe5f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -27,15 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} -module Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) where +module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition import Text.Pandoc.ASCIIMathML +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared -import Text.Pandoc.Entities (decodeEntities) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, partition, intersperse ) +import Data.List ( isPrefixOf, intersperse ) import qualified Data.Set as S import Control.Monad.State import Text.XHtml.Transitional @@ -55,8 +55,8 @@ defaultWriterState = WriterState {stNotes= [], stIds = [], writeHtmlString :: WriterOptions -> Pandoc -> String writeHtmlString opts = if writerStandalone opts - then renderHtml . (writeHtml opts) - else renderHtmlFragment . (writeHtml opts) + then renderHtml . writeHtml opts + else renderHtmlFragment . writeHtml opts -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html @@ -74,49 +74,51 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) = map (\a -> meta ! [name "author", content a]) authors) +++ (if null date then noHtml - else meta ! [name "date", content date]) - titleHeader = if (writerStandalone opts) && (not (null tit)) && - (not (writerS5 opts)) + else meta ! [name "date", content date]) + titleHeader = if writerStandalone opts && not (null tit) && + not (writerS5 opts) then h1 ! [theclass "title"] $ topTitle else noHtml headerBlocks = filter isHeaderBlock blocks - ids = uniqueIdentifiers $ map (\(Header _ lst) -> lst) headerBlocks - toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks ids - else noHtml + ids = uniqueIdentifiers $ + map (\(Header _ lst) -> lst) headerBlocks + toc = if writerTableOfContents opts + then tableOfContents opts headerBlocks ids + else noHtml (blocks', newstate) = - runState (blockListToHtml opts blocks) - (defaultWriterState {stIds = ids}) - cssLines = stCSS newstate - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath newstate - then case writerASCIIMathMLURL opts of - Just path -> script ! [src path, - thetype "text/javascript"] $ noHtml - Nothing -> primHtml asciiMathMLScript - else noHtml - head = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes newstate) - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection opts notes +++ after + runState (blockListToHtml opts blocks) + (defaultWriterState {stIds = ids}) + cssLines = stCSS newstate + css = if S.null cssLines + then noHtml + else style ! [thetype "text/css"] $ primHtml $ + '\n':(unlines $ S.toList cssLines) + math = if stMath newstate + then case writerASCIIMathMLURL opts of + Just path -> script ! [src path, + thetype "text/javascript"] $ + noHtml + Nothing -> primHtml asciiMathMLScript + else noHtml + head = header $ metadata +++ math +++ css +++ + primHtml (writerHeader opts) + notes = reverse (stNotes newstate) + before = primHtml $ writerIncludeBefore opts + after = primHtml $ writerIncludeAfter opts + thebody = before +++ titleHeader +++ toc +++ blocks' +++ + footnoteSection opts notes +++ after in if writerStandalone opts - then head +++ (body thebody) + then head +++ body thebody else thebody -- | Construct table of contents from list of header blocks and identifiers. -- Assumes there are as many identifiers as header blocks. tableOfContents :: WriterOptions -> [Block] -> [String] -> Html tableOfContents opts headers ids = - let opts' = opts { writerIgnoreNotes = True } + let opts' = opts { writerIgnoreNotes = True } contentsTree = hierarchicalize headers - contents = evalState (mapM (elementToListItem opts') contentsTree) - (defaultWriterState {stIds = ids}) + contents = evalState (mapM (elementToListItem opts') contentsTree) + (defaultWriterState {stIds = ids}) in thediv ! [identifier "toc"] $ unordList contents -- | Converts an Element to a list item for a table of contents, @@ -135,7 +137,8 @@ elementToListItem opts (Sec headerText subsecs) = do let subList = if null subHeads then noHtml else unordList subHeads - return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ subList + return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ + subList -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. @@ -143,62 +146,61 @@ footnoteSection :: WriterOptions -> [Html] -> Html footnoteSection opts notes = if null notes then noHtml - else thediv ! [theclass "footnotes"] $ - hr +++ (olist << notes) + else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) -- | Obfuscate a "mailto:" link using Javascript. obfuscateLink :: WriterOptions -> String -> String -> Html obfuscateLink opts text src = let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$" - src' = map toLower src in - case (matchRegex emailRegex src') of - (Just [name, domain]) -> - let domain' = substitute "." " dot " domain - at' = obfuscateChar '@' - (linkText, altText) = - if text == drop 7 src' -- autolink - then ("''+e+''", name ++ " at " ++ domain') - else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++ - domain' ++ ")") in - if writerStrictMarkdown opts - then -- need to use primHtml or &'s are escaped to & in URL - primHtml $ "" ++ (obfuscateString text) ++ "" - else (script ! [thetype "text/javascript"] $ - primHtml ("\n\n")) +++ - noscript (primHtml $ obfuscateString altText) - _ -> anchor ! [href src] $ primHtml text -- malformed email + src' = map toLower src + in case (matchRegex emailRegex src') of + (Just [name, domain]) -> + let domain' = substitute "." " dot " domain + at' = obfuscateChar '@' + (linkText, altText) = + if text == drop 7 src' -- autolink + then ("''+e+''", name ++ " at " ++ domain') + else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++ + domain' ++ ")") + in if writerStrictMarkdown opts + then -- need to use primHtml or &'s are escaped to & in URL + primHtml $ "" ++ (obfuscateString text) ++ "" + else (script ! [thetype "text/javascript"] $ + primHtml ("\n\n")) +++ + noscript (primHtml $ obfuscateString altText) + _ -> anchor ! [href src] $ primHtml text -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String obfuscateChar char = - let num = ord char in - let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in - "&#" ++ numstr ++ ";" + let num = ord char + numstr = if even num then show num else "x" ++ showHex num "" + in "&#" ++ numstr ++ ";" -- | Obfuscate string using entities. obfuscateString :: String -> String -obfuscateString = (concatMap obfuscateChar) . decodeEntities +obfuscateString = concatMap obfuscateChar . decodeCharacterReferences -- | True if character is a punctuation character (unicode). isPunctuation :: Char -> Bool isPunctuation c = - let c' = ord c in - if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) || - (c' >= 0xE000 && c' <= 0xE0FF) - then True - else False + let c' = ord c + in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || + c' >= 0xE000 && c' <= 0xE0FF + then True + else False -- | Add CSS for document header. addToCSS :: String -> State WriterState () addToCSS item = do st <- get let current = stCSS st - put $ st {stCSS = (S.insert item current)} + put $ st {stCSS = S.insert item current} -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String @@ -206,27 +208,26 @@ inlineListToIdentifier [] = "" inlineListToIdentifier (x:xs) = xAsText ++ inlineListToIdentifier xs where xAsText = case x of - Str s -> filter - (\c -> (c == '-') || not (isPunctuation c)) $ - concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst - Note _ -> "" + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier lst + Strikeout lst -> inlineListToIdentifier lst + Superscript lst -> inlineListToIdentifier lst + Subscript lst -> inlineListToIdentifier lst + Strong lst -> inlineListToIdentifier lst + Quoted _ lst -> inlineListToIdentifier lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier lst + Image lst _ -> inlineListToIdentifier lst + Note _ -> "" -- | Return unique identifiers for list of inline lists. uniqueIdentifiers :: [[Inline]] -> [String] @@ -236,102 +237,99 @@ uniqueIdentifiers ls = matches = length $ filter (== new) nonuniqueIds new' = new ++ if matches > 0 then ("-" ++ show matches) else "" in (new:nonuniqueIds, new':uniqueIds) - in reverse $ snd (foldl addIdentifier ([],[]) $ ls) + in reverse $ snd $ foldl addIdentifier ([],[]) ls -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml opts block = - case block of - (Null) -> return $ noHtml - (Plain lst) -> inlineListToHtml opts lst - (Para lst) -> inlineListToHtml opts lst >>= (return . paragraph) - (RawHtml str) -> return $ primHtml str - (HorizontalRule) -> return $ hr - (CodeBlock str) -> return $ pre $ thecode << (str ++ "\n") +blockToHtml opts Null = return $ noHtml +blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) +blockToHtml opts (RawHtml str) = return $ primHtml str +blockToHtml opts (HorizontalRule) = return $ hr +blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n") -- the final \n for consistency with Markdown.pl - (BlockQuote blocks) -> -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - if writerS5 opts - then let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) - (BulletList lst) - [OrderedList attribs lst] -> - blockToHtml (opts {writerIncremental = inc}) - (OrderedList attribs lst) - otherwise -> blockListToHtml opts blocks >>= - (return . blockquote) - else blockListToHtml opts blocks >>= (return . blockquote) - (Header level lst) -> do contents <- inlineListToHtml opts lst - st <- get - let ids = stIds st - let (id, rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - let attribs = [identifier id] - let headerHtml = case level of - 1 -> h1 contents ! attribs - 2 -> h2 contents ! attribs - 3 -> h3 contents ! attribs - 4 -> h4 contents ! attribs - 5 -> h5 contents ! attribs - 6 -> h6 contents ! attribs - _ -> paragraph contents ! attribs - let headerHtml' = if writerTableOfContents opts - then anchor ! [href ("#TOC-" ++ id)] $ - headerHtml - else headerHtml - return headerHtml' - (BulletList lst) -> do contents <- mapM (blockListToHtml opts) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ unordList ! attribs $ contents - (OrderedList (startnum, numstyle, _) lst) -> do - contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle - let attribs = (if writerIncremental opts - then [theclass "incremental"] - else []) ++ - (if startnum /= 1 - then [start startnum] - else []) ++ - (if numstyle /= DefaultStyle - then [theclass numstyle'] - else []) - if numstyle /= DefaultStyle - then addToCSS $ "ol." ++ numstyle' ++ - " { list-style-type: " ++ - numstyle' ++ "; }" - else return () - return $ ordList ! attribs $ contents - (DefinitionList lst) -> do contents <- mapM (\(term, def) -> - do term' <- inlineListToHtml opts term - def' <- blockListToHtml opts def - return $ (term', def')) - lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ defList ! attribs $ contents - (Table capt aligns widths headers rows) -> - do let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return noHtml - else inlineListToHtml opts capt >>= - (return . caption) - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows' <- mapM (tableRowToHtml opts alignStrings) rows - return $ table $ captionDoc +++ colHeads +++ rows' +blockToHtml opts (BlockQuote blocks) = + -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; + -- otherwise incremental + if writerS5 opts + then let inc = not (writerIncremental opts) in + case blocks of + [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) + (BulletList lst) + [OrderedList attribs lst] -> + blockToHtml (opts {writerIncremental = inc}) + (OrderedList attribs lst) + otherwise -> blockListToHtml opts blocks >>= + (return . blockquote) + else blockListToHtml opts blocks >>= (return . blockquote) +blockToHtml opts (Header level lst) = do + contents <- inlineListToHtml opts lst + st <- get + let ids = stIds st + let (id, rest) = if null ids + then ("", []) + else (head ids, tail ids) + put $ st {stIds = rest} + let attribs = [identifier id] + let headerHtml = case level of + 1 -> h1 contents ! attribs + 2 -> h2 contents ! attribs + 3 -> h3 contents ! attribs + 4 -> h4 contents ! attribs + 5 -> h5 contents ! attribs + 6 -> h6 contents ! attribs + _ -> paragraph contents ! attribs + return $ if writerTableOfContents opts + then anchor ! [href ("#TOC-" ++ id)] $ headerHtml + else headerHtml +blockToHtml opts (BulletList lst) = do + contents <- mapM (blockListToHtml opts) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ unordList ! attribs $ contents +blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do + contents <- mapM (blockListToHtml opts) lst + let numstyle' = camelCaseToHyphenated $ show numstyle + let attribs = (if writerIncremental opts + then [theclass "incremental"] + else []) ++ + (if startnum /= 1 + then [start startnum] + else []) ++ + (if numstyle /= DefaultStyle + then [theclass numstyle'] + else []) + if numstyle /= DefaultStyle + then addToCSS $ "ol." ++ numstyle' ++ + " { list-style-type: " ++ + numstyle' ++ "; }" + else return () + return $ ordList ! attribs $ contents +blockToHtml opts (DefinitionList lst) = do + contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term + def' <- blockListToHtml opts def + return $ (term', def')) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ defList ! attribs $ contents +blockToHtml opts (Table capt aligns widths headers rows) = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return noHtml + else inlineListToHtml opts capt >>= return . caption + colHeads <- colHeadsToHtml opts alignStrings + widths headers + rows' <- mapM (tableRowToHtml opts alignStrings) rows + return $ table $ captionDoc +++ colHeads +++ rows' -colHeadsToHtml opts alignStrings widths headers = - do heads <- sequence $ zipWith3 - (\align width item -> tableItemToHtml opts th align width item) - alignStrings widths headers - return $ tr $ toHtmlFromList heads +colHeadsToHtml opts alignStrings widths headers = do + heads <- sequence $ zipWith3 + (\align width item -> tableItemToHtml opts th align width item) + alignStrings widths headers + return $ tr $ toHtmlFromList heads alignmentToString alignment = case alignment of AlignLeft -> "left" @@ -339,24 +337,27 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToHtml opts aligns cols = - do contents <- sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols - return $ tr $ toHtmlFromList contents +tableRowToHtml opts aligns cols = + (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>= + return . tr . toHtmlFromList -tableItemToHtml opts tag align' width item = - do contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if (width /= 0) - then [thestyle ("{width: " ++ show (truncate (100*width)) ++ "%;}")] - else [] - return $ tag ! attrib $ contents +tableItemToHtml opts tag align' width item = do + contents <- blockListToHtml opts item + let attrib = [align align'] ++ + if width /= 0 + then [thestyle ("{width: " ++ show (truncate (100*width)) ++ + "%;}")] + else [] + return $ tag ! attrib $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html -blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= (return . toHtmlFromList) +blockListToHtml opts lst = + mapM (blockToHtml opts) lst >>= return . toHtmlFromList -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= (return . toHtmlFromList) +inlineListToHtml opts lst = + mapM (inlineToHtml opts) lst >>= return . toHtmlFromList -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html @@ -369,52 +370,58 @@ inlineToHtml opts inline = (EnDash) -> return $ primHtmlChar "ndash" (Ellipses) -> return $ primHtmlChar "hellip" (Apostrophe) -> return $ primHtmlChar "rsquo" - (Emph lst) -> inlineListToHtml opts lst >>= (return . emphasize) - (Strong lst) -> inlineListToHtml opts lst >>= (return . strong) + (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize + (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str - (Strikeout lst) -> addToCSS ".strikeout { text-decoration: line-through; }" >> + (Strikeout lst) -> addToCSS + ".strikeout { text-decoration: line-through; }" >> inlineListToHtml opts lst >>= - (return . (thespan ! [theclass "strikeout"])) - (Superscript lst) -> inlineListToHtml opts lst >>= (return . sup) - (Subscript lst) -> inlineListToHtml opts lst >>= (return . sub) + return . (thespan ! [theclass "strikeout"]) + (Superscript lst) -> inlineListToHtml opts lst >>= return . sup + (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of SingleQuote -> (primHtmlChar "lsquo", primHtmlChar "rsquo") DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") in - do contents <- inlineListToHtml opts lst - return $ leftQuote +++ contents +++ rightQuote - (TeX str) -> do if writerUseASCIIMathML opts - then modify (\st -> st {stMath = True}) - else return () - return $ stringToHtml str + primHtmlChar "rdquo") + in do contents <- inlineListToHtml opts lst + return $ leftQuote +++ contents +++ rightQuote + (TeX str) -> (if writerUseASCIIMathML opts + then modify (\st -> st {stMath = True}) + else return ()) >> return (stringToHtml str) (HtmlInline str) -> return $ primHtml str (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src -> - do return $ obfuscateLink opts str src - (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> - do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (show linkText) src - (Link txt (src,tit)) -> - do linkText <- inlineListToHtml opts txt - return $ anchor ! ([href src] ++ - if null tit then [] else [title tit]) $ linkText - (Image txt (source,tit)) -> - do alternate <- inlineListToHtml opts txt - let alternate' = renderHtmlFragment alternate - let attributes = [src source, title tit] ++ - if null txt then [] else [alt alternate'] - return $ image ! attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do st <- get - let notes = stNotes st - let number = (length notes) + 1 - let ref = show number - htmlContents <- blockListToNote opts ref contents - put $ st {stNotes = (htmlContents:notes)} -- push contents onto front of notes - return $ anchor ! [href ("#fn" ++ ref), - theclass "footnoteRef", - identifier ("fnref" ++ ref)] << sup << ref + return $ obfuscateLink opts str src + (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do + linkText <- inlineListToHtml opts txt + return $ obfuscateLink opts (show linkText) src + (Link txt (src,tit)) -> do + linkText <- inlineListToHtml opts txt + return $ anchor ! ([href src] ++ + if null tit then [] else [title tit]) $ + linkText + (Image txt (source,tit)) -> do + alternate <- inlineListToHtml opts txt + let alternate' = renderHtmlFragment alternate + let attributes = [src source, title tit] ++ + if null txt + then [] + else [alt alternate'] + return $ image ! attributes + -- note: null title included, as in Markdown.pl + (Note contents) -> do + st <- get + let notes = stNotes st + let number = (length notes) + 1 + let ref = show number + htmlContents <- blockListToNote opts ref contents + -- push contents onto front of notes + put $ st {stNotes = (htmlContents:notes)} + return $ anchor ! [href ("#fn" ++ ref), + theclass "footnoteRef", + identifier ("fnref" ++ ref)] << + sup << ref blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = @@ -434,6 +441,6 @@ blockListToNote opts ref blocks = [Plain (lst ++ backlink)] _ -> otherBlocks ++ [lastBlock, Plain backlink] - in do contents <- blockListToHtml opts blocks' - return $ li ! [identifier ("fn" ++ ref)] $ contents + in do contents <- blockListToHtml opts blocks' + return $ li ! [identifier ("fn" ++ ref)] $ contents diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 3d0c66e45..ad1f3e45f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -27,16 +27,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into LaTeX. -} -module Text.Pandoc.Writers.LaTeX ( - writeLaTeX - ) where +module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Printf ( printf ) import Data.List ( (\\), isInfixOf ) +import Data.Char ( toLower ) import qualified Data.Set as S import Control.Monad.State -import Data.Char ( toLower ) data WriterState = WriterState { stIncludes :: S.Set String -- strings to include in header @@ -77,16 +75,16 @@ latexHeader :: WriterOptions -- ^ Options, including LaTeX header -> Meta -- ^ Meta with bibliographic information -> State WriterState String latexHeader options (Meta title authors date) = do - titletext <- if null title - then return "" - else do title' <- inlineListToLaTeX title - return $ "\\title{" ++ title' ++ "}\n" - extras <- get >>= (return . unlines . S.toList. stIncludes) + titletext <- if null title + then return "" + else do title' <- inlineListToLaTeX title + return $ "\\title{" ++ title' ++ "}\n" + extras <- get >>= (return . unlines . S.toList. stIncludes) let verbatim = if "\\usepackage{fancyvrb}" `isInfixOf` extras then "\\VerbatimFootnotes % allows verbatim text in footnotes\n" else "" - let authorstext = "\\author{" ++ (joinWithSep "\\\\" - (map stringToLaTeX authors)) ++ "}\n" + let authorstext = "\\author{" ++ + joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}\n" let datetext = if date == "" then "" else "\\date{" ++ stringToLaTeX date ++ "}\n" @@ -124,8 +122,8 @@ deVerb (other:rest) = other:(deVerb rest) blockToLaTeX :: Block -- ^ Block to convert -> State WriterState String blockToLaTeX Null = return "" -blockToLaTeX (Plain lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n")) -blockToLaTeX (Para lst) = (inlineListToLaTeX lst) >>= (return . (++ "\n\n")) +blockToLaTeX (Plain lst) = inlineListToLaTeX lst >>= return . (++ "\n") +blockToLaTeX (Para lst) = inlineListToLaTeX lst >>= return . (++ "\n\n") blockToLaTeX (BlockQuote lst) = do contents <- blockListToLaTeX lst return $ "\\begin{quote}\n" ++ contents ++ "\\end{quote}\n" @@ -184,22 +182,22 @@ blockToLaTeX (Table caption aligns widths heads rows) = do colWidths aligns let tableBody = "\\begin{tabular}{" ++ colDescriptors ++ "}\n" ++ headers ++ "\\hline\n" ++ concat rows' ++ "\\end{tabular}\n" - let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" + let centered str = "\\begin{center}\n" ++ str ++ "\\end{center}\n" addToHeader "\\usepackage{array}\n\ - \% This is needed because raggedright in table elements redefines \\\\:\n\ - \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\ - \\\let\\PBS=\\PreserveBackslash" + \% This is needed because raggedright in table elements redefines \\\\:\n\ + \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\ + \\\let\\PBS=\\PreserveBackslash" return $ if null captionText then centered tableBody ++ "\n" - else "\\begin{table}[h]\n" ++ centered tableBody ++ "\\caption{" ++ - captionText ++ "}\n" ++ "\\end{table}\n\n" + else "\\begin{table}[h]\n" ++ centered tableBody ++ + "\\caption{" ++ captionText ++ "}\n" ++ "\\end{table}\n\n" -blockListToLaTeX lst = mapM blockToLaTeX lst >>= (return . concat) +blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . concat tableRowToLaTeX cols = - mapM blockListToLaTeX cols >>= (return . (++ "\\\\\n") . (joinWithSep " & ")) + mapM blockListToLaTeX cols >>= return . (++ "\\\\\n") . (joinWithSep " & ") -listItemToLaTeX lst = blockListToLaTeX lst >>= (return . ("\\item "++)) +listItemToLaTeX lst = blockListToLaTeX lst >>= return . ("\\item "++) defListItemToLaTeX (term, def) = do term' <- inlineListToLaTeX $ deVerb term @@ -209,8 +207,7 @@ defListItemToLaTeX (term, def) = do -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState String -inlineListToLaTeX lst = - mapM inlineToLaTeX lst >>= (return . concat) +inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . concat isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 3232a454a..b9596dc2d 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -28,14 +28,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to groff man page format. -} -module Text.Pandoc.Writers.Man ( - writeMan - ) where +module Text.Pandoc.Writers.Man ( writeMan) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Printf ( printf ) -import Data.Char ( toUpper ) -import Data.List ( group, isPrefixOf, drop, find, nub, intersperse ) +import Data.List ( isPrefixOf, drop, nub, intersperse ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State @@ -45,16 +42,15 @@ type WriterState = (Notes, Preprocessors) -- | Convert Pandoc to Man. writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = - render $ evalState (pandocToMan opts document) ([],[]) +writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) -- | Return groff man representation of document. pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc pandocToMan opts (Pandoc meta blocks) = do let before = writerIncludeBefore opts let after = writerIncludeAfter opts - before' = if null before then empty else text before - after' = if null after then empty else text after + let before' = if null before then empty else text before + let after' = if null after then empty else text after (head, foot) <- metaToMan opts meta body <- blockListToMan opts blocks (notes, preprocessors) <- get @@ -84,8 +80,8 @@ metaToMan options (Meta title authors date) = do 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors) 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors) return $ if writerStandalone options - then (head, foot) - else (empty, empty) + then (head, foot) + else (empty, empty) -- | Return man representation of notes. notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc @@ -93,7 +89,7 @@ notesToMan opts notes = if null notes then return empty else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= - (return . (text ".SH NOTES" $$) . vcat) + return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc @@ -110,8 +106,7 @@ wrappedMan opts sect = do -- | Association list of characters to escape. manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ - backslashEscapes "\".@\\" +manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\" -- | Escape special characters for Man. escapeString :: String -> String @@ -140,8 +135,7 @@ blockToMan opts (Header level inlines) = do return $ text heading <> contents blockToMan opts (CodeBlock str) = return $ text ".PP" $$ text "\\f[CR]" $$ - text ((unlines . map (" " ++) . lines) (escapeCode str)) <> - text "\\f[]" + text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks return $ text ".RS" $$ contents $$ text ".RE" diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index eb633166d..e7acd762c 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -29,9 +29,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: -} -module Text.Pandoc.Writers.Markdown ( - writeMarkdown - ) where +module Text.Pandoc.Writers.Markdown ( writeMarkdown) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Blocks @@ -53,10 +51,10 @@ pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc pandocToMarkdown opts (Pandoc meta blocks) = do let before = writerIncludeBefore opts let after = writerIncludeAfter opts - before' = if null before then empty else text before - after' = if null after then empty else text after + let before' = if null before then empty else text before + let after' = if null after then empty else text after metaBlock <- metaToMarkdown opts meta - let head = if (writerStandalone opts) + let head = if writerStandalone opts then metaBlock $+$ text (writerHeader opts) else empty let headerBlocks = filter isHeaderBlock blocks @@ -73,8 +71,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do -- | Return markdown representation of reference key table. keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToMarkdown opts refs = - mapM (keyToMarkdown opts) refs >>= (return . vcat) +keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -- | Return markdown representation of a reference key. keyToMarkdown :: WriterOptions @@ -90,7 +87,7 @@ keyToMarkdown opts (label, (src, tit)) = do notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc notesToMarkdown opts notes = mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= - (return . vcat) + return . vcat -- | Return markdown representation of a note. noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc @@ -143,8 +140,7 @@ tableOfContents opts headers = -- | Converts an Element to a list item for a table of contents, elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec headerText subsecs) = - [Plain headerText] ++ +elementToListItem (Sec headerText subsecs) = [Plain headerText] ++ if null subsecs then [] else [BulletList $ map elementToListItem subsecs] @@ -184,9 +180,8 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do let makeRow = hsepBlocks . (zipWith alignHeader aligns) . (zipWith docToBlock widthsInChars) let head = makeRow headers' - rows' <- mapM (\row -> do - cols <- mapM (blockListToMarkdown opts) row - return $ makeRow cols) rows + rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row + return $ makeRow cols) rows let tableWidth = sum widthsInChars let maxRowHeight = maximum $ map heightOfBlock (head:rows') let isMultilineTable = maxRowHeight > 1 @@ -208,8 +203,7 @@ blockToMarkdown opts (OrderedList attribs items) = do let markers = orderedListMarkers attribs let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' - else m) - markers + else m) markers contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ (vcat contents) <> text "\n" @@ -241,8 +235,8 @@ definitionListItemToMarkdown opts (label, items) = do let tabStop = writerTabStop opts let leader = char ':' contents <- mapM (\item -> blockToMarkdown opts item >>= - (\txt -> return (leader $$ nest tabStop txt))) - items >>= (return . vcat) + (\txt -> return (leader $$ nest tabStop txt))) + items >>= return . vcat return $ labelText $+$ contents -- | Convert list of Pandoc block elements to markdown. @@ -250,29 +244,30 @@ blockListToMarkdown :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements -> State WriterState Doc blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) blocks >>= (return . vcat) + mapM (blockToMarkdown opts) blocks >>= return . vcat -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. getReference :: [Inline] -> Target -> State WriterState [Inline] getReference label (src, tit) = do - (_,refs) <- get - case find ((== (src, tit)) . snd) refs of - Just (ref, _) -> return ref - Nothing -> do - let label' = case find ((== label) . fst) refs of - Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst refs))) [1..10000] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label - modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) - return label' + (_,refs) <- get + case find ((== (src, tit)) . snd) refs of + Just (ref, _) -> return ref + Nothing -> do + let label' = case find ((== label) . fst) refs of + Just _ -> -- label is used; generate numerical label + case find (\n -> not (any (== [Str (show n)]) + (map fst refs))) [1..10000] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" + Nothing -> label + modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) + return label' -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = mapM (inlineToMarkdown opts) lst >>= (return . hcat) +inlineListToMarkdown opts lst = + mapM (inlineToMarkdown opts) lst >>= return . hcat -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc @@ -327,13 +322,13 @@ inlineToMarkdown opts (Link txt (src, tit)) = do return $ if useAuto then char '<' <> text srcSuffix <> char '>' else if useRefLinks - then let first = char '[' <> linktext <> char ']' - second = if txt == ref - then text "[]" - else char '[' <> reftext <> char ']' - in first <> second - else char '[' <> linktext <> char ']' <> - char '(' <> text src <> linktitle <> char ')' + then let first = char '[' <> linktext <> char ']' + second = if txt == ref + then text "[]" + else char '[' <> reftext <> char ']' + in first <> second + else char '[' <> linktext <> char ']' <> + char '(' <> text src <> linktitle <> char ')' inlineToMarkdown opts (Image alternate (source, tit)) = do let txt = if (null alternate) || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index c39f7bdab..70df479b5 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -29,13 +29,11 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: -} -module Text.Pandoc.Writers.RST ( - writeRST - ) where +module Text.Pandoc.Writers.RST ( writeRST) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Blocks -import Data.List ( group, isPrefixOf, drop, find, intersperse ) +import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State @@ -70,8 +68,7 @@ pandocToRST opts (Pandoc meta blocks) = do -- | Return RST representation of reference key table. keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToRST opts refs = - mapM (keyToRST opts) refs >>= (return . vcat) +keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat -- | Return RST representation of a reference key. keyToRST :: WriterOptions @@ -85,7 +82,7 @@ keyToRST opts (label, (src, tit)) = do notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc notesToRST opts notes = mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>= - (return . vcat) + return . vcat -- | Return RST representation of a note. noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc @@ -96,8 +93,7 @@ noteToRST opts num note = do -- | Return RST representation of picture reference table. pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -pictTableToRST opts refs = - mapM (pictToRST opts) refs >>= (return . vcat) +pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat -- | Return RST representation of a picture substitution reference. pictToRST :: WriterOptions @@ -112,7 +108,7 @@ pictToRST opts (label, (src, _)) = do wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc wrappedRST opts inlines = mapM (wrappedRSTSection opts) (splitBy LineBreak inlines) >>= - (return . vcat) + return . vcat wrappedRSTSection :: WriterOptions -> [Inline] -> State WriterState Doc wrappedRSTSection opts sect = do @@ -160,21 +156,19 @@ blockToRST :: WriterOptions -- ^ Options blockToRST opts Null = return empty blockToRST opts (Plain inlines) = wrappedRST opts inlines blockToRST opts (Para [TeX str]) = - let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - return $ hang (text "\n.. raw:: latex\n") 3 - (vcat $ map text (lines str')) + let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in + return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str') blockToRST opts (Para inlines) = do contents <- wrappedRST opts inlines return $ contents <> text "\n" blockToRST opts (RawHtml str) = - let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - return $ hang (text "\n.. raw:: html\n") 3 - (vcat $ map text (lines str')) + let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in + return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str') blockToRST opts HorizontalRule = return $ text "--------------\n" blockToRST opts (Header level inlines) = do contents <- inlineListToRST opts inlines let headerLength = length $ render contents - let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) let border = text $ replicate headerLength headerChar return $ contents $+$ border <> text "\n" blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$ @@ -200,11 +194,10 @@ blockToRST opts (Table caption aligns widths headers rows) = do beg = TextBlock 2 height (replicate height "| ") end = TextBlock 2 height (replicate height " |") middle = hcatBlocks $ intersperse sep blocks - let makeRow = hpipeBlocks . (zipWith docToBlock widthsInChars) + let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars let head = makeRow headers' - rows' <- mapM (\row -> do - cols <- mapM (blockListToRST opts) row - return $ makeRow cols) rows + rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row + return $ makeRow cols) rows let tableWidth = sum widthsInChars let maxRowHeight = maximum $ map heightOfBlock (head:rows') let border ch = char '+' <> char ch <> @@ -225,8 +218,7 @@ blockToRST opts (OrderedList (start, style, delim) items) = do (start, style, delim) let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') - markers + in m ++ replicate s ' ') markers contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $ zip markers' items -- ensure that sublists have preceding blank line @@ -262,11 +254,11 @@ blockListToRST :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements -> State WriterState Doc blockListToRST opts blocks = - mapM (blockToRST opts) blocks >>= (return . vcat) + mapM (blockToRST opts) blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= (return . hcat) +inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: WriterOptions -> Inline -> State WriterState Doc @@ -319,8 +311,8 @@ inlineToRST opts (Link txt (src, tit)) = do inlineToRST opts (Image alternate (source, tit)) = do (notes, refs, pics) <- get let labelsUsed = map fst pics - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate `elem` labelsUsed) + let txt = if null alternate || alternate == [Str ""] || + alternate `elem` labelsUsed then [Str $ "image" ++ show (length refs)] else alternate let pics' = if (txt, (source, tit)) `elem` pics diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 9b3d6662c..3bd5c63b2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -27,12 +27,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( writeRTF) where +module Text.Pandoc.Writers.RTF ( writeRTF ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Regex ( matchRegexAll, mkRegex ) import Data.List ( isSuffixOf ) -import Data.Char ( ord, chr ) +import Data.Char ( ord ) -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String @@ -44,22 +44,22 @@ writeRTF options (Pandoc meta blocks) = then tableOfContents $ filter isHeaderBlock blocks else "" foot = if writerStandalone options then "\n}\n" else "" - body = (writerIncludeBefore options) ++ + body = writerIncludeBefore options ++ concatMap (blockToRTF 0 AlignDefault) blocks ++ - (writerIncludeAfter options) in - head ++ toc ++ body ++ foot + writerIncludeAfter options + in head ++ toc ++ body ++ foot -- | Construct table of contents from list of header blocks. tableOfContents :: [Block] -> String tableOfContents headers = let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ [Header 1 [Str "Contents"], - BulletList (map elementToListItem contentsTree)] + in concatMap (blockToRTF 0 AlignDefault) $ + [Header 1 [Str "Contents"], + BulletList (map elementToListItem contentsTree)] elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec sectext subsecs) = - [Plain sectext] ++ +elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ if null subsecs then [] else [BulletList (map elementToListItem subsecs)] @@ -67,10 +67,10 @@ elementToListItem (Sec sectext subsecs) = -- | Convert unicode characters (> 127) into rich text format representation. handleUnicode :: String -> String handleUnicode [] = [] -handleUnicode (c:cs) = if (ord c) > 127 - then '\\':'u':(show (ord c)) ++ "?" ++ - (handleUnicode cs) - else c:(handleUnicode cs) +handleUnicode (c:cs) = + if ord c > 127 + then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs + else c:(handleUnicode cs) -- | Escape special characters. escapeSpecial :: String -> String @@ -127,7 +127,7 @@ listIncrement = 360 -- | Returns appropriate bullet list marker for indent level. bulletMarker :: Int -> String -bulletMarker indent = case (indent `mod` 720) of +bulletMarker indent = case indent `mod` 720 of 0 -> "\\bullet " otherwise -> "\\endash " @@ -135,7 +135,7 @@ bulletMarker indent = case (indent `mod` 720) of orderedMarkers :: Int -> ListAttributes -> [String] orderedMarkers indent (start, style, delim) = if style == DefaultStyle && delim == DefaultDelim - then case (indent `mod` 720) of + then case indent `mod` 720 of 0 -> orderedListMarkers (start, Decimal, Period) otherwise -> orderedListMarkers (start, LowerAlpha, Period) else orderedListMarkers (start, style, delim) @@ -145,21 +145,21 @@ rtfHeader :: String -- ^ header text -> Meta -- ^ bibliographic information -> String rtfHeader headerText (Meta title authors date) = - let titletext = if null title + let titletext = if null title + then "" + else rtfPar 0 0 AlignCenter $ + "\\b \\fs36 " ++ inlineListToRTF title + authorstext = if null authors then "" - else rtfPar 0 0 AlignCenter ("\\b \\fs36 " ++ - inlineListToRTF title) - authorstext = if null authors - then "" - else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" - (map stringToRTF authors))) - datetext = if date == "" - then "" - else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in - let spacer = if null (titletext ++ authorstext ++ datetext) + else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $ + map stringToRTF authors)) + datetext = if date == "" then "" - else rtfPar 0 0 AlignDefault "" in - headerText ++ titletext ++ authorstext ++ datetext ++ spacer + else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in + let spacer = if null (titletext ++ authorstext ++ datetext) + then "" + else rtfPar 0 0 AlignDefault "" in + headerText ++ titletext ++ authorstext ++ datetext ++ spacer -- | Convert Pandoc block element to RTF. blockToRTF :: Int -- ^ indent level @@ -168,31 +168,27 @@ blockToRTF :: Int -- ^ indent level -> String blockToRTF _ _ Null = "" blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment (inlineListToRTF lst) + rtfCompact indent 0 alignment $ inlineListToRTF lst blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment (inlineListToRTF lst) + rtfPar indent 0 alignment $ inlineListToRTF lst blockToRTF indent alignment (BlockQuote lst) = concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) blockToRTF _ _ (RawHtml str) = "" -blockToRTF indent alignment (BulletList lst) = - spaceAtEnd $ +blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = - spaceAtEnd $ concat $ +blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = - spaceAtEnd $ +blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ concatMap (definitionListItemToRTF alignment indent) lst blockToRTF indent _ HorizontalRule = rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level lst) = - rtfPar indent 0 alignment ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ - (inlineListToRTF lst)) +blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ + "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst blockToRTF indent alignment (Table caption aligns sizes headers rows) = - (tableRowToRTF True indent aligns sizes headers) ++ (concatMap - (tableRowToRTF False indent aligns sizes) rows) ++ + tableRowToRTF True indent aligns sizes headers ++ + concatMap (tableRowToRTF False indent aligns sizes) rows ++ rtfPar indent 0 alignment (inlineListToRTF caption) tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String @@ -201,8 +197,10 @@ tableRowToRTF header indent aligns sizes cols = totalTwips = 6 * 1440 -- 6 inches rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) 0 sizes - cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) rightEdges + cellDefs = map (\edge -> (if header + then "\\clbrdrb\\brdrs" + else "") ++ "\\cellx" ++ show edge) + rightEdges start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ "\\trkeep\\intbl\n{\n" end = "}\n\\intbl\\row}\n" @@ -234,11 +232,12 @@ listItemToRTF alignment indent marker list = let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in -- insert the list marker into the (processed) first block let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of - Just (before, matched, after, _) -> before ++ "\\fi" ++ - show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" ++ after + Just (before, matched, after, _) -> + before ++ "\\fi" ++ show (0 - listIncrement) ++ + " " ++ marker ++ "\\tx" ++ + show listIncrement ++ "\\tab" ++ after Nothing -> first in - modFirst ++ (concat rest) + modFirst ++ concat rest -- | Convert definition list item (label, list of blocks) to RTF. definitionListItemToRTF :: Alignment -- ^ alignment @@ -285,4 +284,3 @@ inlineToRTF (Image alternate (source, tit)) = inlineToRTF (Note contents) = "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" - diff --git a/src/templates/DefaultHeaders.hs b/src/templates/DefaultHeaders.hs index d7815fb8a..1bd9fe1d2 100644 --- a/src/templates/DefaultHeaders.hs +++ b/src/templates/DefaultHeaders.hs @@ -1,4 +1,32 @@ --- | Default headers for Pandoc writers. +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.DefaultHeaders + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Default headers for Pandoc writers. +-} module Text.Pandoc.Writers.DefaultHeaders ( defaultLaTeXHeader, defaultConTeXtHeader, diff --git a/src/templates/S5.hs b/src/templates/S5.hs index b015d8ca6..a0b69b132 100644 --- a/src/templates/S5.hs +++ b/src/templates/S5.hs @@ -1,5 +1,33 @@ --- | Definitions for creation of S5 powerpoint-like HTML. --- (See .) +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.S5 + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Definitions for creation of S5 powerpoint-like HTML. +(See .) +-} module Text.Pandoc.Writers.S5 ( -- * Strings s5Meta, @@ -60,13 +88,13 @@ layoutDiv :: [Inline] -- ^ Title of document (for header or footer) -> [Block] -- ^ List of block elements returned layoutDiv title date = [(RawHtml "
\n
\n
\n
\n
\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "
\n
\n")] -presentationStart = (RawHtml "
\n\n") +presentationStart = RawHtml "
\n\n" -presentationEnd = (RawHtml "
\n") +presentationEnd = RawHtml "
\n" -slideStart = (RawHtml "
\n") +slideStart = RawHtml "
\n" -slideEnd = (RawHtml "
\n") +slideEnd = RawHtml "
\n" -- | Returns 'True' if block is a Header 1. isH1 :: Block -> Bool @@ -84,15 +112,22 @@ insertSlides beginning blocks = beforeHead ++ [slideEnd] else if beginning then - beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest)) + beforeHead ++ + slideStart:(head rest):(insertSlides False (tail rest)) else - beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) + beforeHead ++ + slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) -- | Insert blocks into 'Pandoc' for slide structure. insertS5Structure :: Pandoc -> Pandoc insertS5Structure (Pandoc meta []) = Pandoc meta [] insertS5Structure (Pandoc (Meta title authors date) blocks) = - let slides = insertSlides True blocks - firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in - let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in - Pandoc (Meta title authors date) newBlocks + let slides = insertSlides True blocks + firstSlide = if not (null title) + then [slideStart, (Header 1 title), + (Header 3 [Str (joinWithSep ", " authors)]), + (Header 4 [Str date]), slideEnd] + else [] + newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ + slides ++ [presentationEnd] + in Pandoc (Meta title authors date) newBlocks -- cgit v1.2.3