summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-03 22:14:03 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-03 22:14:03 +0000
commit4a841bfc5464907adea4cdd655485565565b40ae (patch)
tree36c0a21e3639614c8d25b5fb1909c32d0ab11dcd /src/Text/Pandoc
parent3116d30133196e1bb258f7e74e03d4a85f3b21ae (diff)
Use template haskell to avoid the need for templates:
+ Added library Text.Pandoc.Include, with a template haskell function $(includeStrFrom fname) to include a file as a string constant at compile time. + This removes the need for the 'templates' directory or Makefile target. These have been removed. + The base source directory has been changed from src to . + A new 'data' directory has been added, containing the ASCIIMathML.js script, writer headers, and S5 files. + The src/wrappers directory has been moved to 'wrappers'. + The Text.Pandoc.ASCIIMathML library is no longer needed, since Text.Pandoc.Writers.HTML can use includeStrFrom to include the ASCIIMathML.js code directly. It has been removed. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1063 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Blocks.hs145
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs327
-rw-r--r--src/Text/Pandoc/Definition.hs116
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs496
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs651
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs909
-rw-r--r--src/Text/Pandoc/Readers/RST.hs640
-rw-r--r--src/Text/Pandoc/Shared.hs792
-rw-r--r--src/Text/Pandoc/UTF8.hs45
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs248
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs299
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs458
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs310
-rw-r--r--src/Text/Pandoc/Writers/Man.hs293
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs373
-rw-r--r--src/Text/Pandoc/Writers/RST.hs325
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs286
17 files changed, 0 insertions, 6713 deletions
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
deleted file mode 100644
index cfc22cb3e..000000000
--- a/src/Text/Pandoc/Blocks.hs
+++ /dev/null
@@ -1,145 +0,0 @@
-{-
-Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Blocks
- Copyright : Copyright (C) 2007 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for the manipulation of fixed-width blocks of text.
-These are used in the construction of plain-text tables.
--}
-
-module Text.Pandoc.Blocks
- (
- TextBlock (..),
- docToBlock,
- blockToDoc,
- widthOfBlock,
- heightOfBlock,
- hcatBlocks,
- hsepBlocks,
- centerAlignBlock,
- leftAlignBlock,
- rightAlignBlock
- )
-where
-import Text.PrettyPrint
-import Data.List ( intersperse )
-
--- | A fixed-width block of text. Parameters are width of block,
--- height of block, and list of lines.
-data TextBlock = TextBlock Int Int [String]
-instance Show TextBlock where
- show x = show $ blockToDoc x
-
--- | Break lines in a list of lines so that none are greater than
--- a given width.
-breakLines :: Int -- ^ Maximum length of lines.
- -> [String] -- ^ List of lines.
- -> [String]
-breakLines width [] = []
-breakLines width (l:ls) =
- if length l > width
- then (take width l):(breakLines width ((drop width l):ls))
- else l:(breakLines width ls)
-
--- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
-docToBlock :: Int -- ^ Width of text block.
- -> Doc -- ^ @Doc@ to convert.
- -> TextBlock
-docToBlock width doc =
- let rendered = renderStyle (style {lineLength = width,
- ribbonsPerLine = 1}) doc
- lns = breakLines width $ lines rendered
- in TextBlock width (length lns) lns
-
--- | Convert a @TextBlock@ to a @Doc@ element.
-blockToDoc :: TextBlock -> Doc
-blockToDoc (TextBlock _ _ lns) =
- if null lns
- then empty
- else vcat $ map text lns
-
--- | Returns width of a @TextBlock@ (number of columns).
-widthOfBlock :: TextBlock -> Int
-widthOfBlock (TextBlock width _ _) = width
-
--- | Returns height of a @TextBlock@ (number of rows).
-heightOfBlock :: TextBlock -> Int
-heightOfBlock (TextBlock _ height _) = height
-
--- | Pads a string out to a given width using spaces.
-hPad :: Int -- ^ Desired width.
- -> String -- ^ String to pad.
- -> String
-hPad width line =
- let lineLength = length line
- in if lineLength <= width
- then line ++ replicate (width - lineLength) ' '
- else take width line
-
--- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in
--- which they appear side by side.
-hcatBlocks :: [TextBlock] -> TextBlock
-hcatBlocks [] = TextBlock 0 0 []
-hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd.
-hcatBlocks ((TextBlock width1 height1 lns1):xs) =
- let (TextBlock width2 height2 lns2) = hcatBlocks xs
- height = max height1 height2
- width = width1 + width2
- lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) ""
- lns2' = lns2 ++ replicate (height - height2) ""
- lns = zipWith (++) lns1' lns2'
- in TextBlock width height lns
-
--- | Like @hcatBlocks@, but inserts space between the @TextBlock@s.
-hsepBlocks :: [TextBlock] -> TextBlock
-hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "]))
-
-isWhitespace x = x `elem` " \t"
-
--- | Left-aligns the contents of a @TextBlock@ within the block.
-leftAlignBlock :: TextBlock -> TextBlock
-leftAlignBlock (TextBlock width height lns) =
- TextBlock width height $ map (dropWhile isWhitespace) lns
-
--- | Right-aligns the contents of a @TextBlock@ within the block.
-rightAlignBlock :: TextBlock -> TextBlock
-rightAlignBlock (TextBlock width height lns) =
- let rightAlignLine ln =
- let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln
- in reverse (rest ++ spaces)
- in TextBlock width height $ map rightAlignLine lns
-
--- | Centers the contents of a @TextBlock@ within the block.
-centerAlignBlock :: TextBlock -> TextBlock
-centerAlignBlock (TextBlock width height lns) =
- let centerAlignLine ln =
- let ln' = hPad width ln
- (startSpaces, rest) = span isWhitespace ln'
- endSpaces = takeWhile isWhitespace (reverse ln')
- numSpaces = length (startSpaces ++ endSpaces)
- startSpaces' = replicate (quot numSpaces 2) ' '
- in startSpaces' ++ rest
- in TextBlock width height $ map centerAlignLine lns
-
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
deleted file mode 100644
index 466f5d8f4..000000000
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ /dev/null
@@ -1,327 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.CharacterReferences
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for parsing character references.
--}
-module Text.Pandoc.CharacterReferences (
- characterReference,
- decodeCharacterReferences,
- ) where
-import Data.Char ( chr )
-import Text.ParserCombinators.Parsec
-import qualified Data.Map as Map
-
--- | Parse character entity.
-characterReference :: GenParser Char st Char
-characterReference = try $ do
- st <- char '&'
- character <- numRef <|> entity
- end <- char ';'
- return character
-
-numRef :: GenParser Char st Char
-numRef = do
- char '#'
- num <- hexNum <|> decNum
- return $ chr $ num
-
-hexNum :: GenParser Char st Int
-hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++)
-
-decNum :: GenParser Char st Int
-decNum = many1 digit >>= return . read
-
-entity :: GenParser Char st Char
-entity = do
- body <- many1 alphaNum
- return $ Map.findWithDefault '?' body entityTable
-
--- | Convert entities in a string to characters.
-decodeCharacterReferences :: String -> String
-decodeCharacterReferences str =
- case parse (many (characterReference <|> anyChar)) str str of
- Left err -> error $ "\nError: " ++ show err
- Right result -> result
-
-entityTable :: Map.Map String Char
-entityTable = Map.fromList entityTableList
-
-entityTableList :: [(String, Char)]
-entityTableList = [
- ("quot", chr 34),
- ("amp", chr 38),
- ("lt", chr 60),
- ("gt", chr 62),
- ("nbsp", chr 160),
- ("iexcl", chr 161),
- ("cent", chr 162),
- ("pound", chr 163),
- ("curren", chr 164),
- ("yen", chr 165),
- ("brvbar", chr 166),
- ("sect", chr 167),
- ("uml", chr 168),
- ("copy", chr 169),
- ("ordf", chr 170),
- ("laquo", chr 171),
- ("not", chr 172),
- ("shy", chr 173),
- ("reg", chr 174),
- ("macr", chr 175),
- ("deg", chr 176),
- ("plusmn", chr 177),
- ("sup2", chr 178),
- ("sup3", chr 179),
- ("acute", chr 180),
- ("micro", chr 181),
- ("para", chr 182),
- ("middot", chr 183),
- ("cedil", chr 184),
- ("sup1", chr 185),
- ("ordm", chr 186),
- ("raquo", chr 187),
- ("frac14", chr 188),
- ("frac12", chr 189),
- ("frac34", chr 190),
- ("iquest", chr 191),
- ("Agrave", chr 192),
- ("Aacute", chr 193),
- ("Acirc", chr 194),
- ("Atilde", chr 195),
- ("Auml", chr 196),
- ("Aring", chr 197),
- ("AElig", chr 198),
- ("Ccedil", chr 199),
- ("Egrave", chr 200),
- ("Eacute", chr 201),
- ("Ecirc", chr 202),
- ("Euml", chr 203),
- ("Igrave", chr 204),
- ("Iacute", chr 205),
- ("Icirc", chr 206),
- ("Iuml", chr 207),
- ("ETH", chr 208),
- ("Ntilde", chr 209),
- ("Ograve", chr 210),
- ("Oacute", chr 211),
- ("Ocirc", chr 212),
- ("Otilde", chr 213),
- ("Ouml", chr 214),
- ("times", chr 215),
- ("Oslash", chr 216),
- ("Ugrave", chr 217),
- ("Uacute", chr 218),
- ("Ucirc", chr 219),
- ("Uuml", chr 220),
- ("Yacute", chr 221),
- ("THORN", chr 222),
- ("szlig", chr 223),
- ("agrave", chr 224),
- ("aacute", chr 225),
- ("acirc", chr 226),
- ("atilde", chr 227),
- ("auml", chr 228),
- ("aring", chr 229),
- ("aelig", chr 230),
- ("ccedil", chr 231),
- ("egrave", chr 232),
- ("eacute", chr 233),
- ("ecirc", chr 234),
- ("euml", chr 235),
- ("igrave", chr 236),
- ("iacute", chr 237),
- ("icirc", chr 238),
- ("iuml", chr 239),
- ("eth", chr 240),
- ("ntilde", chr 241),
- ("ograve", chr 242),
- ("oacute", chr 243),
- ("ocirc", chr 244),
- ("otilde", chr 245),
- ("ouml", chr 246),
- ("divide", chr 247),
- ("oslash", chr 248),
- ("ugrave", chr 249),
- ("uacute", chr 250),
- ("ucirc", chr 251),
- ("uuml", chr 252),
- ("yacute", chr 253),
- ("thorn", chr 254),
- ("yuml", chr 255),
- ("OElig", chr 338),
- ("oelig", chr 339),
- ("Scaron", chr 352),
- ("scaron", chr 353),
- ("Yuml", chr 376),
- ("fnof", chr 402),
- ("circ", chr 710),
- ("tilde", chr 732),
- ("Alpha", chr 913),
- ("Beta", chr 914),
- ("Gamma", chr 915),
- ("Delta", chr 916),
- ("Epsilon", chr 917),
- ("Zeta", chr 918),
- ("Eta", chr 919),
- ("Theta", chr 920),
- ("Iota", chr 921),
- ("Kappa", chr 922),
- ("Lambda", chr 923),
- ("Mu", chr 924),
- ("Nu", chr 925),
- ("Xi", chr 926),
- ("Omicron", chr 927),
- ("Pi", chr 928),
- ("Rho", chr 929),
- ("Sigma", chr 931),
- ("Tau", chr 932),
- ("Upsilon", chr 933),
- ("Phi", chr 934),
- ("Chi", chr 935),
- ("Psi", chr 936),
- ("Omega", chr 937),
- ("alpha", chr 945),
- ("beta", chr 946),
- ("gamma", chr 947),
- ("delta", chr 948),
- ("epsilon", chr 949),
- ("zeta", chr 950),
- ("eta", chr 951),
- ("theta", chr 952),
- ("iota", chr 953),
- ("kappa", chr 954),
- ("lambda", chr 955),
- ("mu", chr 956),
- ("nu", chr 957),
- ("xi", chr 958),
- ("omicron", chr 959),
- ("pi", chr 960),
- ("rho", chr 961),
- ("sigmaf", chr 962),
- ("sigma", chr 963),
- ("tau", chr 964),
- ("upsilon", chr 965),
- ("phi", chr 966),
- ("chi", chr 967),
- ("psi", chr 968),
- ("omega", chr 969),
- ("thetasym", chr 977),
- ("upsih", chr 978),
- ("piv", chr 982),
- ("ensp", chr 8194),
- ("emsp", chr 8195),
- ("thinsp", chr 8201),
- ("zwnj", chr 8204),
- ("zwj", chr 8205),
- ("lrm", chr 8206),
- ("rlm", chr 8207),
- ("ndash", chr 8211),
- ("mdash", chr 8212),
- ("lsquo", chr 8216),
- ("rsquo", chr 8217),
- ("sbquo", chr 8218),
- ("ldquo", chr 8220),
- ("rdquo", chr 8221),
- ("bdquo", chr 8222),
- ("dagger", chr 8224),
- ("Dagger", chr 8225),
- ("bull", chr 8226),
- ("hellip", chr 8230),
- ("permil", chr 8240),
- ("prime", chr 8242),
- ("Prime", chr 8243),
- ("lsaquo", chr 8249),
- ("rsaquo", chr 8250),
- ("oline", chr 8254),
- ("frasl", chr 8260),
- ("euro", chr 8364),
- ("image", chr 8465),
- ("weierp", chr 8472),
- ("real", chr 8476),
- ("trade", chr 8482),
- ("alefsym", chr 8501),
- ("larr", chr 8592),
- ("uarr", chr 8593),
- ("rarr", chr 8594),
- ("darr", chr 8595),
- ("harr", chr 8596),
- ("crarr", chr 8629),
- ("lArr", chr 8656),
- ("uArr", chr 8657),
- ("rArr", chr 8658),
- ("dArr", chr 8659),
- ("hArr", chr 8660),
- ("forall", chr 8704),
- ("part", chr 8706),
- ("exist", chr 8707),
- ("empty", chr 8709),
- ("nabla", chr 8711),
- ("isin", chr 8712),
- ("notin", chr 8713),
- ("ni", chr 8715),
- ("prod", chr 8719),
- ("sum", chr 8721),
- ("minus", chr 8722),
- ("lowast", chr 8727),
- ("radic", chr 8730),
- ("prop", chr 8733),
- ("infin", chr 8734),
- ("ang", chr 8736),
- ("and", chr 8743),
- ("or", chr 8744),
- ("cap", chr 8745),
- ("cup", chr 8746),
- ("int", chr 8747),
- ("there4", chr 8756),
- ("sim", chr 8764),
- ("cong", chr 8773),
- ("asymp", chr 8776),
- ("ne", chr 8800),
- ("equiv", chr 8801),
- ("le", chr 8804),
- ("ge", chr 8805),
- ("sub", chr 8834),
- ("sup", chr 8835),
- ("nsub", chr 8836),
- ("sube", chr 8838),
- ("supe", chr 8839),
- ("oplus", chr 8853),
- ("otimes", chr 8855),
- ("perp", chr 8869),
- ("sdot", chr 8901),
- ("lceil", chr 8968),
- ("rceil", chr 8969),
- ("lfloor", chr 8970),
- ("rfloor", chr 8971),
- ("lang", chr 9001),
- ("rang", chr 9002),
- ("loz", chr 9674),
- ("spades", chr 9824),
- ("clubs", chr 9827),
- ("hearts", chr 9829),
- ("diams", chr 9830)
- ]
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
deleted file mode 100644
index 7d1125c5a..000000000
--- a/src/Text/Pandoc/Definition.hs
+++ /dev/null
@@ -1,116 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Definition
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Definition of 'Pandoc' data structure for format-neutral representation
-of documents.
--}
-module Text.Pandoc.Definition where
-
-data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
-
--- | Bibliographic information for the document: title (list of 'Inline'),
--- authors (list of strings), date (string).
-data Meta = Meta [Inline] -- title
- [String] -- authors
- String -- date
- deriving (Eq, Show, Read)
-
--- | Alignment of a table column.
-data Alignment = AlignLeft
- | AlignRight
- | AlignCenter
- | AlignDefault deriving (Eq, Show, Read)
-
--- | List attributes.
-type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
-
--- | Style of list numbers.
-data ListNumberStyle = DefaultStyle
- | Decimal
- | LowerRoman
- | UpperRoman
- | LowerAlpha
- | UpperAlpha deriving (Eq, Show, Read)
-
--- | Delimiter of list numbers.
-data ListNumberDelim = DefaultDelim
- | Period
- | OneParen
- | TwoParens deriving (Eq, Show, Read)
-
--- | Block element.
-data Block
- = Plain [Inline] -- ^ Plain text, not a paragraph
- | Para [Inline] -- ^ Paragraph
- | CodeBlock String -- ^ Code block (literal)
- | RawHtml String -- ^ Raw HTML block (literal)
- | BlockQuote [Block] -- ^ Block quote (list of blocks)
- | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
- -- and a list of items, each a list of blocks)
- | BulletList [[Block]] -- ^ Bullet list (list of items, each
- -- a list of blocks)
- | DefinitionList [([Inline],[Block])] -- ^ Definition list
- -- (list of items, each a pair of an inline list,
- -- the term, and a block list)
- | Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
- | HorizontalRule -- ^ Horizontal rule
- | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table,
- -- with caption, column alignments,
- -- relative column widths, column headers
- -- (each a list of blocks), and rows
- -- (each a list of lists of blocks)
- | Null -- ^ Nothing
- deriving (Eq, Read, Show)
-
--- | Type of quotation marks to use in Quoted inline.
-data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read)
-
-type Target = (String, String) -- ^ Link target (URL, title)
-
--- | Inline elements.
-data Inline
- = Str String -- ^ Text (string)
- | Emph [Inline] -- ^ Emphasized text (list of inlines)
- | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
- | Strikeout [Inline] -- ^ Strikeout text (list of inlines)
- | Superscript [Inline] -- ^ Superscripted text (list of inlines)
- | Subscript [Inline] -- ^ Subscripted text (list of inlines)
- | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines)
- | Code String -- ^ Inline code (literal)
- | Space -- ^ Inter-word space
- | EmDash -- ^ Em dash
- | EnDash -- ^ En dash
- | Apostrophe -- ^ Apostrophe
- | Ellipses -- ^ Ellipses
- | LineBreak -- ^ Hard line break
- | TeX String -- ^ LaTeX code (literal)
- | HtmlInline String -- ^ HTML code (literal)
- | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
- | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
- -- and target
- | Note [Block] -- ^ Footnote or endnote
- deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
deleted file mode 100644
index 70a071152..000000000
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ /dev/null
@@ -1,496 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of HTML to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.HTML (
- readHtml,
- rawHtmlInline,
- rawHtmlBlock,
- anyHtmlBlockTag,
- anyHtmlInlineTag,
- anyHtmlTag,
- anyHtmlEndTag,
- htmlEndTag,
- extractTagType,
- htmlBlockElement
- ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.CharacterReferences ( characterReference,
- decodeCharacterReferences )
-import Data.Maybe ( fromMaybe )
-import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf )
-import Data.Char ( toUpper, toLower, isAlphaNum )
-
--- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState -- ^ Parser state
- -> String -- ^ String to parse
- -> Pandoc
-readHtml = readWith parseHtml
-
---
--- Constants
---
-
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
- "map", "area", "object", "script"]
-
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
- "br", "cite", "code", "dfn", "em", "font", "i", "img",
- "input", "kbd", "label", "q", "s", "samp", "select",
- "small", "span", "strike", "strong", "sub", "sup",
- "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
-
-blockHtmlTags = ["address", "blockquote", "center", "dir", "div",
- "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "hr", "isindex", "menu", "noframes",
- "noscript", "ol", "p", "pre", "table", "ul", "dd",
- "dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr"] ++ eitherBlockOrInline
-
---
--- HTML utility functions
---
-
--- | Read blocks until end tag.
-blocksTilEnd tag = do
- blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
- return $ filter (/= Null) blocks
-
--- | Read inlines until end tag.
-inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-
--- | Parse blocks between open and close tag.
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-
--- | Parse inlines between open and close tag.
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-
--- | Extract type from a tag: e.g. @br@ from @\<br\>@
-extractTagType :: String -> String
-extractTagType ('<':rest) =
- let isSpaceOrSlash c = c `elem` "/ \n\t" in
- map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
-extractTagType _ = ""
-
--- | Parse any HTML tag (opening or self-closing) and return text of tag
-anyHtmlTag = try $ do
- char '<'
- spaces
- tag <- many1 alphaNum
- attribs <- many htmlAttribute
- spaces
- ender <- option "" (string "/")
- let ender' = if null ender then "" else " /"
- spaces
- char '>'
- return $ "<" ++ tag ++
- concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
-
-anyHtmlEndTag = try $ do
- char '<'
- spaces
- char '/'
- spaces
- tagType <- many1 alphaNum
- spaces
- char '>'
- return $ "</" ++ tagType ++ ">"
-
-htmlTag :: String -> GenParser Char st (String, [(String, String)])
-htmlTag tag = try $ do
- char '<'
- spaces
- stringAnyCase tag
- attribs <- many htmlAttribute
- spaces
- optional (string "/")
- spaces
- char '>'
- return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
-
--- parses a quoted html attribute value
-quoted quoteChar = do
- result <- between (char quoteChar) (char quoteChar)
- (many (noneOf [quoteChar]))
- return (result, [quoteChar])
-
-htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
-
--- minimized boolean attribute
-htmlMinimizedAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- return (name, name, name)
-
-htmlRegularAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- spaces
- char '='
- spaces
- (content, quoteStr) <- choice [ (quoted '\''),
- (quoted '"'),
- (do
- a <- many (alphaNum <|> (oneOf "-._:"))
- return (a,"")) ]
- return (name, content,
- (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-
--- | Parse an end tag of type 'tag'
-htmlEndTag tag = try $ do
- char '<'
- spaces
- char '/'
- spaces
- stringAnyCase tag
- spaces
- char '>'
- return $ "</" ++ tag ++ ">"
-
--- | Returns @True@ if the tag is (or can be) an inline tag.
-isInline tag = (extractTagType tag) `elem` inlineHtmlTags
-
--- | Returns @True@ if the tag is (or can be) a block tag.
-isBlock tag = (extractTagType tag) `elem` blockHtmlTags
-
-anyHtmlBlockTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if isBlock tag then return tag else fail "inline tag"
-
-anyHtmlInlineTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if isInline tag then return tag else fail "not an inline tag"
-
--- | Parses material between script tags.
--- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript = try $ do
- open <- string "<script"
- rest <- manyTill anyChar (htmlEndTag "script")
- return $ open ++ rest ++ "</script>"
-
-htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
-
-rawHtmlBlock = try $ do
- notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
- body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
- sp <- many space
- state <- getState
- if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
-
--- | Parses an HTML comment.
-htmlComment = try $ do
- string "<!--"
- comment <- manyTill anyChar (try (string "-->"))
- return $ "<!--" ++ comment ++ "-->"
-
---
--- parsing documents
---
-
-xmlDec = try $ do
- string "<?"
- rest <- manyTill anyChar (char '>')
- return $ "<?" ++ rest ++ ">"
-
-definition = try $ do
- string "<!"
- rest <- manyTill anyChar (char '>')
- return $ "<!" ++ rest ++ ">"
-
-nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
- ((rawHtmlBlock >> return ' ') <|> anyChar)
-
-parseTitle = try $ do
- (tag, _) <- htmlTag "title"
- contents <- inlinesTilEnd tag
- spaces
- return contents
-
--- parse header and return meta-information (for now, just title)
-parseHead = try $ do
- htmlTag "head"
- spaces
- skipMany nonTitleNonHead
- contents <- option [] parseTitle
- skipMany nonTitleNonHead
- htmlTag "/head"
- return (contents, [], "")
-
-skipHtmlTag tag = optional (htmlTag tag)
-
--- h1 class="title" representation of title in body
-bodyTitle = try $ do
- (tag, attribs) <- htmlTag "h1"
- cl <- case (extractAttribute "class" attribs) of
- Just "title" -> return ""
- otherwise -> fail "not title"
- inlinesTilEnd "h1"
-
-parseHtml = do
- sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
- skipHtmlTag "html"
- spaces
- (title, authors, date) <- option ([], [], "") parseHead
- spaces
- skipHtmlTag "body"
- spaces
- optional bodyTitle -- skip title in body, because it's represented in meta
- blocks <- parseBlocks
- spaces
- optional (htmlEndTag "body")
- spaces
- optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
- eof
- return $ Pandoc (Meta title authors date) blocks
-
---
--- parsing blocks
---
-
-parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-
-block = choice [ codeBlock
- , header
- , hrule
- , list
- , blockQuote
- , para
- , plain
- , rawHtmlBlock ] <?> "block"
-
---
--- header blocks
---
-
-header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-
-headerLevel n = try $ do
- let level = "h" ++ show n
- (tag, attribs) <- htmlTag level
- contents <- inlinesTilEnd level
- return $ Header n (normalizeSpaces contents)
-
---
--- hrule block
---
-
-hrule = try $ do
- (tag, attribs) <- htmlTag "hr"
- state <- getState
- if not (null attribs) && stateParseRaw state
- then unexpected "attributes in hr" -- parse as raw in this case
- else return HorizontalRule
-
---
--- code blocks
---
-
--- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
--- skipped, because they are not portable to output formats other than HTML.
-codeBlock = try $ do
- htmlTag "pre"
- result <- manyTill
- (many1 (satisfy (/= '<')) <|>
- ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
- (htmlEndTag "pre")
- let result' = concat result
- -- drop leading newline if any
- let result'' = if "\n" `isPrefixOf` result'
- then drop 1 result'
- else result'
- -- drop trailing newline if any
- let result''' = if "\n" `isSuffixOf` result''
- then init result''
- else result''
- return $ CodeBlock $ decodeCharacterReferences result'''
-
---
--- block quotes
---
-
-blockQuote = try $ htmlTag "blockquote" >> spaces >>
- blocksTilEnd "blockquote" >>= (return . BlockQuote)
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
- (start, style) <- option (1, DefaultStyle) $
- do failIfStrict
- let sta = fromMaybe "1" $
- lookup "start" attribs
- let sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- let sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
- return (read sta, sty')
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ol"
- return $ OrderedList (start, style, DefaultDelim) items
-
-bulletList = try $ do
- htmlTag "ul"
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ul"
- return $ BulletList items
-
-definitionList = try $ do
- failIfStrict -- def lists not part of standard markdown
- tag <- htmlTag "dl"
- spaces
- items <- sepEndBy1 definitionListItem spaces
- htmlEndTag "dl"
- return $ DefinitionList items
-
-definitionListItem = try $ do
- terms <- sepEndBy1 (inlinesIn "dt") spaces
- defs <- sepEndBy1 (blocksIn "dd") spaces
- let term = joinWithSep [LineBreak] terms
- return (term, concat defs)
-
---
--- paragraph block
---
-
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
- return . Para . normalizeSpaces
-
---
--- plain block
---
-
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- inline
---
-
-inline = choice [ charRef
- , strong
- , emph
- , superscript
- , subscript
- , strikeout
- , spanStrikeout
- , code
- , str
- , linebreak
- , whitespace
- , link
- , image
- , rawHtmlInline
- ] <?> "inline"
-
-code = try $ do
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
- -- remove internal line breaks, leading and trailing space,
- -- and decode character references
- return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
- joinWithSep " " $ lines result
-
-rawHtmlInline = do
- result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag
- state <- getState
- if stateParseRaw state then return (HtmlInline result) else return (Str "")
-
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
- return . normalizeSpaces
-
-emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
-
-strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-
-superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-
-subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
-
-strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
- return . Strikeout
-
-spanStrikeout = try $ do
- failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- (tag, attributes) <- htmlTag "span"
- result <- case (extractAttribute "class" attributes) of
- Just "strikeout" -> inlinesTilEnd "span"
- _ -> fail "not a strikeout"
- return $ Strikeout result
-
-whitespace = many1 space >> return Space
-
--- hard line break
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
-
-str = many1 (noneOf "<& \t\n") >>= return . Str
-
---
--- links and images
---
-
--- extract contents of attribute (attribute names are case-insensitive)
-extractAttribute name [] = Nothing
-extractAttribute name ((attrName, contents):rest) =
- let name' = map toLower name
- attrName' = map toLower attrName
- in if attrName' == name'
- then Just (decodeCharacterReferences contents)
- else extractAttribute name rest
-
-link = try $ do
- (tag, attributes) <- htmlTag "a"
- url <- case (extractAttribute "href" attributes) of
- Just url -> return url
- Nothing -> fail "no href"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- label <- inlinesTilEnd "a"
- return $ Link (normalizeSpaces label) (url, title)
-
-image = try $ do
- (tag, attributes) <- htmlTag "img"
- url <- case (extractAttribute "src" attributes) of
- Just url -> return url
- Nothing -> fail "no src"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- let alt = fromMaybe "" (extractAttribute "alt" attributes)
- return $ Image [Str alt] (url, title)
-
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
deleted file mode 100644
index 37cc2bfe4..000000000
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ /dev/null
@@ -1,651 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of LaTeX to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.LaTeX (
- readLaTeX,
- rawLaTeXInline,
- rawLaTeXEnvironment
- ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr )
-import Data.List ( isPrefixOf, isSuffixOf )
-
--- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse
- -> Pandoc
-readLaTeX = readWith parseLaTeX
-
--- characters with special meaning
-specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText openB closeB = do
- result <- charsInBalanced' openB closeB
- return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-
--- | True if the string begins with '{'.
-isArg ('{':rest) = True
-isArg other = False
-
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs = many optOrArg
-
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command = do
- char '\\'
- name <- many1 letter
- star <- option "" (string "*") -- some commands have starred versions
- args <- commandArgs
- return (name, star, args)
-
-begin name = try $ do
- string $ "\\begin{" ++ name ++ "}"
- optional commandArgs
- spaces
- return name
-
-end name = try $ do
- string $ "\\end{" ++ name ++ "}"
- spaces
- return name
-
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment name = try $ begin name >> spaces >> manyTill block (end name)
-
-anyEnvironment = try $ do
- string "\\begin{"
- name <- many letter
- star <- option "" (string "*") -- some environments have starred variants
- char '}'
- optional commandArgs
- spaces
- contents <- manyTill block (end (name ++ star))
- return $ BlockQuote contents
-
---
--- parsing documents
---
-
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble = try $ manyTill
- (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}")) >>
- spaces
-
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX = do
- optional processLaTeXPreamble -- preamble might not be present (fragment)
- spaces
- blocks <- parseBlocks
- spaces
- optional $ try (string "\\end{document}" >> many anyChar)
- -- might not be present (fragment)
- spaces
- eof
- state <- getState
- let blocks' = filter (/= Null) blocks
- let title' = stateTitle state
- let authors' = stateAuthors state
- let date' = stateDate state
- return $ Pandoc (Meta title' authors' date') blocks'
-
---
--- parsing blocks
---
-
-parseBlocks = spaces >> many block
-
-block = choice [ hrule
- , codeBlock
- , header
- , list
- , blockQuote
- , mathBlock
- , comment
- , bibliographic
- , para
- , specialEnvironment
- , itemBlock
- , unknownEnvironment
- , unknownCommand ] <?> "block"
-
---
--- header blocks
---
-
-header = try $ do
- char '\\'
- subs <- many (try (string "sub"))
- string "section"
- optional (char '*')
- char '{'
- title <- manyTill inline (char '}')
- spaces
- return $ Header (length subs + 1) (normalizeSpaces title)
-
---
--- hrule block
---
-
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ] >> spaces >> return HorizontalRule
-
---
--- code blocks
---
-
-codeBlock = codeBlock1 <|> codeBlock2
-
-codeBlock1 = try $ do
- string "\\begin{verbatim}" -- don't use begin function because it
- -- gobbles whitespace
- optional blanklines -- we want to gobble blank lines, but not
- -- leading space
- contents <- manyTill anyChar (try (string "\\end{verbatim}"))
- spaces
- return $ CodeBlock (stripTrailingNewlines contents)
-
-codeBlock2 = try $ do
- string "\\begin{Verbatim}" -- used by fancyvrb package
- option "" blanklines
- contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
- spaces
- return $ CodeBlock (stripTrailingNewlines contents)
-
---
--- block quotes
---
-
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
- return . BlockQuote
-
---
--- math block
---
-
-mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
- mathBlockWith (begin "displaymath") (end "displaymath") <|>
- mathBlockWith (string "\\[") (string "\\]") <?> "math block"
-
-mathBlockWith start end = try $ do
- start
- spaces
- result <- manyTill anyChar end
- spaces
- return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
-
---
--- list blocks
---
-
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\state -> state {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\state -> state {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
-
-orderedList = try $ do
- string "\\begin{enumerate}"
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (oneOf "iv")
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList = try $ do
- begin "itemize"
- spaces
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
-
-definitionList = try $ do
- begin "description"
- spaces
- items <- many listItem
- end "description"
- spaces
- return (DefinitionList items)
-
---
--- paragraph block
---
-
-para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
-
---
--- title authors date
---
-
-bibliographic = choice [ maketitle, title, authors, date ]
-
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title = try $ do
- string "\\title{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = tit })
- return Null
-
-authors = try $ do
- string "\\author{"
- authors <- manyTill anyChar (char '}')
- spaces
- let authors' = map removeLeadingTrailingSpace $ lines $
- substitute "\\\\" "\n" authors
- updateState (\state -> state { stateAuthors = authors' })
- return Null
-
-date = try $ do
- string "\\date{"
- date' <- manyTill anyChar (char '}')
- spaces
- updateState (\state -> state { stateDate = date' })
- return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock = try $ do
- ("item", _, args) <- command
- state <- getState
- if (stateParserContext state == ListItemState)
- then fail "item should be handled by list block"
- else if null args
- then return Null
- else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX
---
-
-specialEnvironment = do -- these are always parsed as raw
- lookAhead (choice (map (\name -> begin name) ["tabular", "figure",
- "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
- rawLaTeXEnvironment
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = try $ do
- string "\\begin{"
- name <- many1 letter
- star <- option "" (string "*") -- for starred variants
- let name' = name ++ star
- char '}'
- args <- option [] commandArgs
- let argStr = concat args
- contents <- manyTill (choice [ (many1 (noneOf "\\")),
- (do
- (Para [TeX str]) <- rawLaTeXEnvironment
- return str),
- string "\\" ])
- (end name')
- spaces
- return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
- concat contents ++ "\\end{" ++ name' ++ "}"]
-
-unknownEnvironment = try $ do
- state <- getState
- result <- if stateParseRaw state -- check whether we should include raw TeX
- then rawLaTeXEnvironment -- if so, get whole raw environment
- else anyEnvironment -- otherwise just the contents
- return result
-
-unknownCommand = try $ do
- notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
- "document"]
- (name, star, args) <- command
- spaces
- let argStr = concat args
- state <- getState
- if name == "item" && (stateParserContext state) == ListItemState
- then fail "should not be parsed as raw"
- else string ""
- if stateParseRaw state
- then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
- else return $ Plain [Str (joinWithSep " " args)]
-
--- latex comment
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
-
---
--- inline
---
-
-inline = choice [ str
- , endline
- , whitespace
- , quoted
- , apostrophe
- , spacer
- , strong
- , math
- , ellipses
- , emDash
- , enDash
- , hyphen
- , emph
- , strikeout
- , superscript
- , subscript
- , ref
- , lab
- , code
- , url
- , link
- , image
- , footnote
- , linebreak
- , accentedChar
- , specialChar
- , rawLaTeXInline
- , escapedChar
- , unescapedChar
- ] <?> "inline"
-
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar = try $ do
- char '\\'
- accent <- oneOf "'`^\"~"
- character <- (try $ char '{' >> letter >>~ char '}') <|> letter
- let table = fromMaybe [] $ lookup character accentTable
- let result = case lookup accent table of
- Just num -> chr num
- Nothing -> '?'
- return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable =
- [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
- ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
- ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
- ('N', [('~', 209)]),
- ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
- ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
- ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
- ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
- ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
- ('n', [('~', 241)]),
- ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
- ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
- oslash, pound, euro, copyright, sect ]
-
-ccedil = try $ do
- char '\\'
- letter <- oneOfStrings ["cc", "cC"]
- let num = if letter == "cc" then 231 else 199
- return $ Str [chr num]
-
-aring = try $ do
- char '\\'
- letter <- oneOfStrings ["aa", "AA"]
- let num = if letter == "aa" then 229 else 197
- return $ Str [chr num]
-
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
- return (Str [chr 239])
-
-icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
- return (Str [chr 238])
-
-szlig = try (string "\\ss") >> return (Str [chr 223])
-
-oslash = try $ do
- char '\\'
- letter <- choice [char 'o', char 'O']
- let num = if letter == 'o' then 248 else 216
- return $ Str [chr num]
-
-aelig = try $ do
- char '\\'
- letter <- oneOfStrings ["ae", "AE"]
- let num = if letter == "ae" then 230 else 198
- return $ Str [chr num]
-
-pound = try (string "\\pounds") >> return (Str [chr 163])
-
-euro = try (string "\\euro") >> return (Str [chr 8364])
-
-copyright = try (string "\\copyright") >> return (Str [chr 169])
-
-sect = try (string "\\S") >> return (Str [chr 167])
-
-escapedChar = do
- result <- escaped (oneOf " $%&_#{}\n")
- return $ if result == Str "\n" then Str " " else result
-
--- ignore standalone, nonescaped special characters
-unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
-
-specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
-
-backslash = try (string "\\textbackslash") >> return (Str "\\")
-
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar = try (string "\\textbar") >> return (Str "\\")
-
-lt = try (string "\\textless") >> return (Str "<")
-
-gt = try (string "\\textgreater") >> return (Str ">")
-
-doubleQuote = char '"' >> return (Str "\"")
-
-code = code1 <|> code2
-
-code1 = try $ do
- string "\\verb"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code $ removeLeadingTrailingSpace result
-
-code2 = try $ do
- string "\\texttt{"
- result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code result
-
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
- manyTill inline (char '}') >>= return . Emph
-
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
- return . Strikeout
-
-superscript = try $ string "\\textsuperscript{" >>
- manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
- return . Subscript
-
-apostrophe = char '\'' >> return Apostrophe
-
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart = char '`'
-
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd = try $ string "''"
-
-ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
- return Ellipses
-
-enDash = try (string "--") >> return EnDash
-
-emDash = try (string "---") >> return EmDash
-
-hyphen = char '-' >> return (Str "-")
-
-lab = try $ do
- string "\\label{"
- result <- manyTill anyChar (char '}')
- return $ Str $ "(" ++ result ++ ")"
-
-ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
- return . Strong
-
-whitespace = many1 (oneOf "~ \t") >> return Space
-
--- hard line break
-linebreak = try (string "\\\\") >> return LineBreak
-
-spacer = try (string "\\,") >> return (Str "")
-
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math = math1 <|> math2 <?> "math"
-
-math1 = try $ do
- char '$'
- result <- many (noneOf "$")
- char '$'
- return $ TeX ("$" ++ result ++ "$")
-
-math2 = try $ do
- string "\\("
- result <- many (noneOf "$")
- string "\\)"
- return $ TeX ("$" ++ result ++ "$")
-
---
--- links and images
---
-
-url = try $ do
- string "\\url"
- url <- charsInBalanced '{' '}'
- return $ Link [Code url] (url, "")
-
-link = try $ do
- string "\\href{"
- url <- manyTill anyChar (char '}')
- char '{'
- label <- manyTill inline (char '}')
- return $ Link (normalizeSpaces label) (url, "")
-
-image = try $ do
- ("includegraphics", _, args) <- command
- let args' = filter isArg args -- filter out options
- let src = if null args' then
- ("", "")
- else
- (stripFirstAndLast (head args'), "")
- return $ Image [Str "image"] src
-
-footnote = try $ do
- (name, _, (contents:[])) <- command
- if ((name == "footnote") || (name == "thanks"))
- then string ""
- else fail "not a footnote or thanks command"
- let contents' = stripFirstAndLast contents
- -- parse the extracted block, which may contain various block elements:
- rest <- getInput
- setInput $ contents'
- blocks <- parseBlocks
- setInput rest
- return $ Note blocks
-
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
- (name, star, args) <- command
- state <- getState
- if ((name == "begin") || (name == "end") || (name == "item"))
- then fail "not an inline command"
- else string ""
- return $ TeX ("\\" ++ name ++ star ++ concat args)
-
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
deleted file mode 100644
index df84c0ac7..000000000
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ /dev/null
@@ -1,909 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of markdown-formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Markdown (
- readMarkdown
- ) where
-
-import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
-import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
-import Network.URI ( isURI )
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag, anyHtmlTag,
- anyHtmlEndTag, htmlEndTag, extractTagType,
- htmlBlockElement )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Text.ParserCombinators.Parsec
-
--- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ParserState -> String -> Pandoc
-readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
-
---
--- Constants and data structure definitions
---
-
-spaceChars = " \t"
-bulletListMarkers = "*+-"
-hruleChars = "*-_"
-setextHChars = "=-"
-
--- treat these as potentially non-text when parsing inline:
-specialChars = "\\[]*_~`<>$!^-.&'\""
-
---
--- auxiliary functions
---
-
-indentSpaces = try $ do
- state <- getState
- let tabStop = stateTabStop state
- try (count tabStop (char ' ')) <|>
- (many (char ' ') >> string "\t") <?> "indentation"
-
-nonindentSpaces = do
- state <- getState
- let tabStop = stateTabStop state
- sps <- many (char ' ')
- if length sps < tabStop
- then return sps
- else unexpected "indented line"
-
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine = do
- pos <- getPosition
- if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart = do
- state <- getState
- if stateSmart state then return () else fail "Smart typography feature"
-
--- | Parse an inline Str element with a given content.
-inlineString str = try $ do
- (Str res) <- inline
- if res == str then return res else fail $ "unexpected Str content"
-
--- | Parse a sequence of inline elements between a string
--- @opener@ and a string @closer@, including inlines
--- between balanced pairs of @opener@ and a @closer@.
-inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
-inlinesInBalanced opener closer = try $ do
- string opener
- result <- manyTill ( (do lookAhead (inlineString opener)
- -- because it might be a link...
- bal <- inlinesInBalanced opener closer
- return $ [Str opener] ++ bal ++ [Str closer])
- <|> (count 1 inline))
- (try (string closer))
- return $ concat result
-
---
--- document structure
---
-
-titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-
-authorsLine = try $ do
- char '%'
- skipSpaces
- authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
- newline
- return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
-
-dateLine = try $ do
- char '%'
- skipSpaces
- date <- many (noneOf "\n")
- newline
- return $ decodeCharacterReferences $ removeTrailingSpace date
-
-titleBlock = try $ do
- failIfStrict
- title <- option [] titleLine
- author <- option [] authorsLine
- date <- option "" dateLine
- optional blanklines
- return (title, author, date)
-
-parseMarkdown = do
- -- markdown allows raw HTML
- updateState (\state -> state { stateParseRaw = True })
- startPos <- getPosition
- -- go through once just to get list of reference keys
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
- return . concat
- setInput docMinusKeys
- setPosition startPos
- st <- getState
- -- go through again for notes unless strict...
- if stateStrict st
- then return ()
- else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
- return . concat
- st <- getState
- let reversedNotes = stateNotes st
- updateState $ \st -> st { stateNotes = reverse reversedNotes }
- setInput docMinusNotes
- setPosition startPos
- -- now parse it for real...
- (title, author, date) <- option ([],[],"") titleBlock
- blocks <- parseBlocks
- return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
-
---
--- initial pass for references and notes
---
-
-referenceKey = try $ do
- startPos <- getPosition
- nonindentSpaces
- label <- reference
- char ':'
- skipSpaces
- optional (char '<')
- src <- many (noneOf "> \n\t")
- optional (char '>')
- tit <- option "" referenceTitle
- blanklines
- endPos <- getPosition
- let newkey = (label, (removeTrailingSpace src, tit))
- st <- getState
- let oldkeys = stateKeys st
- updateState $ \st -> st { stateKeys = newkey : oldkeys }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-referenceTitle = try $ do
- (many1 spaceChar >> option '\n' newline) <|> newline
- skipSpaces
- tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
- <|> do delim <- char '\'' <|> char '"'
- manyTill anyChar (try (char delim >> skipSpaces >>
- notFollowedBy (noneOf ")\n")))
- return $ decodeCharacterReferences tit
-
-noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
-
-rawLine = do
- notFollowedBy blankline
- notFollowedBy' noteMarker
- contents <- many1 nonEndline
- end <- option "" (newline >> optional indentSpaces >> return "\n")
- return $ contents ++ end
-
-rawLines = many1 rawLine >>= return . concat
-
-noteBlock = try $ do
- startPos <- getPosition
- ref <- noteMarker
- char ':'
- optional blankline
- optional indentSpaces
- raw <- sepBy rawLines (try (blankline >> indentSpaces))
- optional blanklines
- endPos <- getPosition
- -- parse the extracted text, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
- let newnote = (ref, contents)
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \st -> st { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
---
--- parsing blocks
---
-
-parseBlocks = manyTill block eof
-
-block = choice [ header
- , table
- , codeBlock
- , hrule
- , list
- , blockQuote
- , htmlBlock
- , rawLaTeXEnvironment'
- , para
- , plain
- , nullBlock ] <?> "block"
-
---
--- header blocks
---
-
-header = atxHeader <|> setextHeader <?> "header"
-
-atxHeader = try $ do
- level <- many1 (char '#') >>= return . length
- notFollowedBy (char '.' <|> char ')') -- this would be a list
- skipSpaces
- text <- manyTill inline atxClosing >>= return . normalizeSpaces
- return $ Header level text
-
-atxClosing = try $ skipMany (char '#') >> blanklines
-
-setextHeader = try $ do
- -- first, see if this block has any chance of being a setextHeader:
- lookAhead (anyLine >> oneOf setextHChars)
- text <- many1Till inline newline >>= return . normalizeSpaces
- level <- choice $ zipWith
- (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
- setextHChars [1..(length setextHChars)]
- return $ Header level text
-
---
--- hrule block
---
-
-hrule = try $ do
- skipSpaces
- start <- oneOf hruleChars
- count 2 (skipSpaces >> char start)
- skipMany (skipSpaces >> char start)
- newline
- optional blanklines
- return HorizontalRule
-
---
--- code blocks
---
-
-indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
-
-codeBlock = do
- contents <- many1 (indentedLine <|>
- try (do b <- blanklines
- l <- indentedLine
- return $ b ++ l))
- optional blanklines
- return $ CodeBlock $ stripTrailingNewlines $ concat contents
-
---
--- block quotes
---
-
-emacsBoxQuote = try $ do
- failIfStrict
- string ",----"
- manyTill anyChar newline
- raw <- manyTill
- (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
- (try (string "`----"))
- blanklines
- return raw
-
-emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
-
-emailBlockQuote = try $ do
- emailBlockQuoteStart
- raw <- sepBy (many (nonEndline <|>
- (try (endline >> notFollowedBy emailBlockQuoteStart >>
- return '\n'))))
- (try (newline >> emailBlockQuoteStart))
- newline <|> (eof >> return '\n')
- optional blanklines
- return raw
-
-blockQuote = do
- raw <- emailBlockQuote <|> emacsBoxQuote
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
- return $ BlockQuote contents
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-bulletListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- nonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
- oneOf bulletListMarkers
- spaceChar
- skipSpaces
-
-anyOrderedListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- nonindentSpaces
- notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- state <- getState
- if stateStrict state
- then do many1 digit
- char '.'
- spaceChar
- return (1, DefaultStyle, DefaultDelim)
- else anyOrderedListMarker >>~ spaceChar
-
-orderedListStart style delim = try $ do
- optional newline -- if preceded by a Plain block in a list context
- nonindentSpaces
- state <- getState
- num <- if stateStrict state
- then do many1 digit
- char '.'
- return 1
- else orderedListMarker style delim
- if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then char '\t' <|> (spaceChar >> spaceChar)
- else spaceChar
- skipSpaces
-
--- parse a line of a list item (start = parser for beginning of list item)
-listLine start = try $ do
- notFollowedBy' start
- notFollowedBy blankline
- notFollowedBy' (do indentSpaces
- many (spaceChar)
- bulletListStart <|> (anyOrderedListStart >> return ()))
- line <- manyTill anyChar newline
- return $ line ++ "\n"
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = try $ do
- start
- result <- many1 (listLine start)
- blanks <- many blankline
- return $ concat result ++ blanks
-
--- continuation of a list item - indented and separated by blankline
--- or (in compact lists) endline.
--- note: nested lists are parsed as continuations
-listContinuation start = try $ do
- lookAhead indentSpaces
- result <- many1 (listContinuationLine start)
- blanks <- many blankline
- return $ concat result ++ blanks
-
-listContinuationLine start = try $ do
- notFollowedBy blankline
- notFollowedBy' start
- optional indentSpaces
- result <- manyTill anyChar newline
- return $ result ++ "\n"
-
-listItem start = try $ do
- first <- rawListItem start
- continuations <- many (listContinuation start)
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may contain various block elements:
- let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
- updateState (\st -> st {stateParserContext = oldContext})
- return contents
-
-orderedList = try $ do
- (start, style, delim) <- lookAhead anyOrderedListStart
- items <- many1 (listItem (orderedListStart style delim))
- return $ OrderedList (start, style, delim) $ compactify items
-
-bulletList = many1 (listItem bulletListStart) >>=
- return . BulletList . compactify
-
--- definition lists
-
-definitionListItem = try $ do
- notFollowedBy blankline
- notFollowedBy' indentSpaces
- -- first, see if this has any chance of being a definition list:
- lookAhead (anyLine >> char ':')
- term <- manyTill inline newline
- raw <- many1 defRawBlock
- state <- getState
- let oldContext = stateParserContext state
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ concat raw
- updateState (\st -> st {stateParserContext = oldContext})
- return ((normalizeSpaces term), contents)
-
-defRawBlock = try $ do
- char ':'
- state <- getState
- let tabStop = stateTabStop state
- try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
- firstline <- anyLine
- rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
- trailing <- option "" blanklines
- return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
-
-definitionList = do
- failIfStrict
- items <- many1 definitionListItem
- let (terms, defs) = unzip items
- let defs' = compactify defs
- let items' = zip terms defs'
- return $ DefinitionList items'
-
---
--- paragraph block
---
-
-para = try $ do
- result <- many1 inline
- newline
- blanklines <|> do st <- getState
- if stateStrict st
- then lookAhead (blockQuote <|> header) >> return ""
- else lookAhead emacsBoxQuote >> return ""
- return $ Para $ normalizeSpaces result
-
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- raw html
---
-
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
-
-htmlBlock = do
- st <- getState
- if stateStrict st
- then try $ do failUnlessBeginningOfLine
- first <- htmlElement
- finalSpace <- many (oneOf spaceChars)
- finalNewlines <- many newline
- return $ RawHtml $ first ++ finalSpace ++ finalNewlines
- else rawHtmlBlocks
-
--- True if tag is self-closing
-isSelfClosing tag =
- isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
-
-strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
- let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
- then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
- (htmlElement <|> (count 1 anyChar)))
- end <- htmlEndTag tag'
- return $ tag ++ concat contents ++ end
-
-rawHtmlBlocks = do
- htmlBlocks <- many1 rawHtmlBlock
- let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
- let combined' = if not (null combined) && last combined == '\n'
- then init combined -- strip extra newline
- else combined
- return $ RawHtml combined'
-
---
--- LaTeX
---
-
-rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
-
---
--- Tables
---
-
--- Parse a dashed line with optional trailing spaces; return its length
--- and the length including trailing space.
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
-
--- Parse a table header with dashed lines of '-' preceded by
--- one line of text.
-simpleTableHeader = try $ do
- rawContent <- anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeads = tail $ splitByIndices (init indices) rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
- return (rawHeads, aligns, indices)
-
--- Parse a table footer - dashed lines followed by blank line.
-tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
-
--- Parse a table separator - dashed line.
-tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
-
--- Parse a raw line and split it into chunks by indices.
-rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
- line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
- splitByIndices (init indices) line
-
--- Parse a table line and return a list of lists of blocks (columns).
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
-
--- Parse a multiline table row and return a list of blocks (columns).
-multilineRow indices = do
- colLines <- many1 (rawTableLine indices)
- optional blanklines
- let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Float] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths = zipWith (-) indices (0:indices)
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
--- Parses a table caption: inlines beginning with 'Table:'
--- and followed by blank lines.
-tableCaption = try $ do
- nonindentSpaces
- string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines
-
--- Parse a simple table with '---' header and one line per row.
-simpleTable = tableWith simpleTableHeader tableLine blanklines
-
--- Parse a multiline table: starts with row of '-' on top, then header
--- (which may be multiline), then the rows,
--- which may be multiline, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-multilineTable = tableWith multilineTableHeader multilineRow tableFooter
-
-multilineTableHeader = try $ do
- tableSep
- rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeadsList = transpose $ map
- (\ln -> tail $ splitByIndices (init indices) ln)
- rawContent
- let rawHeads = map (joinWithSep " ") rawHeadsList
- let aligns = zipWith alignType rawHeadsList lengths
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
-
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String] -> Int -> Alignment
-alignType [] len = AlignDefault
-alignType strLst len =
- let str = head $ sortBy (comparing length) $
- map removeTrailingSpace strLst
- leftSpace = if null str then False else (str !! 0) `elem` " \t"
- rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
-table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
-
---
--- inline
---
-
-inline = choice [ str
- , smartPunctuation
- , whitespace
- , endline
- , code
- , charRef
- , strong
- , emph
- , note
- , inlineNote
- , link
- , image
- , math
- , strikeout
- , superscript
- , subscript
- , autoLink
- , rawHtmlInline'
- , rawLaTeXInline'
- , escapedChar
- , symbol
- , ltSign ] <?> "inline"
-
-escapedChar = do
- char '\\'
- state <- getState
- result <- option '\\' $ if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
- return $ Str [result]
-
-ltSign = do
- st <- getState
- if stateStrict st
- then char '<'
- else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
- return $ Str ['<']
-
-specialCharsMinusLt = filter (/= '<') specialChars
-
-symbol = do
- result <- oneOf specialCharsMinusLt
- return $ Str [result]
-
--- parses inline code, between n `s and n `s
-code = try $ do
- starts <- many1 (char '`')
- skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
- (char '\n' >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
- notFollowedBy (char '`')))
- return $ Code $ removeLeadingTrailingSpace $ concat result
-
-mathWord = many1 ((noneOf " \t\n\\$") <|>
- (try (char '\\') >>~ notFollowedBy (char '$')))
-
-math = try $ do
- failIfStrict
- char '$'
- notFollowedBy space
- words <- sepBy1 mathWord (many1 space)
- char '$'
- return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
-
-emph = ((enclosed (char '*') (char '*') inline) <|>
- (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
- return . Emph . normalizeSpaces
-
-strong = ((enclosed (string "**") (try $ string "**") inline) <|>
- (enclosed (string "__") (try $ string "__") inline)) >>=
- return . Strong . normalizeSpaces
-
-strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
- return . Strikeout . normalizeSpaces
-
-superscript = failIfStrict >> enclosed (char '^') (char '^')
- (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
- return . Superscript
-
-subscript = failIfStrict >> enclosed (char '~') (char '~')
- (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
- return . Subscript
-
-smartPunctuation = failUnlessSmart >>
- choice [ quoted, apostrophe, dash, ellipses ]
-
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-
-quoted = doubleQuoted <|> singleQuoted
-
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
- then fail "already inside quotes"
- else return ()
-
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum))) -- possess/contraction
- return '\''
-
-singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
-
-doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
- (char '"' <|> char '\8220') >>
- notFollowedBy (oneOf " \t\n")
-
-doubleQuoteEnd = char '"' <|> char '\8221'
-
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-
-dash = enDash <|> emDash
-
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-
-emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
- skipSpaces >> return EmDash
-
-whitespace = do
- sps <- many1 (oneOf spaceChars)
- if length sps >= 2
- then option Space (endline >> return LineBreak)
- else return Space <?> "whitespace"
-
-nonEndline = satisfy (/='\n')
-
-strChar = noneOf (specialChars ++ spaceChars ++ "\n")
-
-str = many1 strChar >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline = try $ do
- newline
- notFollowedBy blankline
- st <- getState
- if stateStrict st
- then do notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
- else return ()
- -- parse potential list-starts differently if in a list:
- if stateParserContext st == ListItemState
- then notFollowedBy' (bulletListStart <|>
- (anyOrderedListStart >> return ()))
- else return ()
- return Space
-
---
--- links
---
-
--- a reference label for a link
-reference = notFollowedBy' (string "[^") >> -- footnote reference
- inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
-
--- source for a link, with optional title
-source = try $ do
- char '('
- optional (char '<')
- src <- many (noneOf ")> \t\n")
- optional (char '>')
- tit <- option "" linkTitle
- skipSpaces
- char ')'
- return (removeTrailingSpace src, tit)
-
-linkTitle = try $ do
- (many1 spaceChar >> option '\n' newline) <|> newline
- skipSpaces
- delim <- char '\'' <|> char '"'
- tit <- manyTill anyChar (try (char delim >> skipSpaces >>
- notFollowedBy (noneOf ")\n")))
- return $ decodeCharacterReferences tit
-
-link = try $ do
- label <- reference
- src <- source <|> referenceLink label
- return $ Link label src
-
--- a link like [this][ref] or [this][] or [this]
-referenceLink label = do
- ref <- option [] (try (optional (char ' ') >>
- optional (newline >> skipSpaces) >> reference))
- let ref' = if null ref then label else ref
- state <- getState
- case lookupKeySrc (stateKeys state) ref' of
- Nothing -> fail "no corresponding key"
- Just target -> return target
-
-emailAddress = try $ do
- name <- many1 (alphaNum <|> char '+')
- char '@'
- first <- many1 alphaNum
- rest <- many1 (char '.' >> many1 alphaNum)
- return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
-
-uri = try $ do
- str <- many1 (noneOf "\n\t >")
- if isURI str
- then return str
- else fail "not a URI"
-
-autoLink = try $ do
- char '<'
- src <- uri <|> emailAddress
- char '>'
- let src' = if "mailto:" `isPrefixOf` src
- then drop 7 src
- else src
- st <- getState
- return $ if stateStrict st
- then Link [Str src'] (src, "")
- else Link [Code src'] (src, "")
-
-image = try $ do
- char '!'
- (Link label src) <- link
- return $ Image label src
-
-note = try $ do
- failIfStrict
- ref <- noteMarker
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just contents -> return $ Note contents
-
-inlineNote = try $ do
- failIfStrict
- char '^'
- contents <- inlinesInBalanced "[" "]"
- return $ Note [Para contents]
-
-rawLaTeXInline' = failIfStrict >> rawLaTeXInline
-
-rawHtmlInline' = do
- st <- getState
- result <- choice $ if stateStrict st
- then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else [htmlBlockElement, anyHtmlInlineTag]
- return $ HtmlInline result
-
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
deleted file mode 100644
index 1239eb688..000000000
--- a/src/Text/Pandoc/Readers/RST.hs
+++ /dev/null
@@ -1,640 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion from reStructuredText to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.RST (
- readRST
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.ParserCombinators.Parsec
-import Data.List ( findIndex, delete )
-
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -> String -> Pandoc
-readRST state str = (readWith parseRST) state (str ++ "\n\n")
-
---
--- Constants and data structure definitions
----
-
-bulletListMarkers = "*+-"
-underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-
--- treat these as potentially non-text when parsing inline:
-specialChars = "\\`|*_<>$:[-"
-
---
--- parsing documents
---
-
-isAnonKey (ref, src) = ref == [Str "_"]
-
-isHeader :: Int -> Block -> Bool
-isHeader n (Header x _) = x == n
-isHeader _ _ = False
-
--- | Promote all headers in a list of blocks. (Part of
--- title transformation for RST.)
-promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) =
- (Header (level - num) text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders num [] = []
-
--- | If list of blocks starts with a header (or a header and subheader)
--- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
-titleTransform :: [Block] -- ^ list of blocks
- -> ([Block], [Inline]) -- ^ modified list of blocks, title
-titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
- if (any (isHeader 1) rest) || (any (isHeader 2) rest)
- then ((Header 1 head1):(Header 2 head2):rest, [])
- else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 head1):rest) = -- title, no subtitle
- if (any (isHeader 1) rest)
- then ((Header 1 head1):rest, [])
- else ((promoteHeaders 1 rest), head1)
-titleTransform blocks = (blocks, [])
-
-parseRST = do
- startPos <- getPosition
- -- go through once just to get list of reference keys
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
- setInput docMinusKeys
- setPosition startPos
- st <- getState
- let reversedKeys = stateKeys st
- updateState $ \st -> st { stateKeys = reverse reversedKeys }
- -- now parse it for real...
- blocks <- parseBlocks
- let blocks' = filter (/= Null) blocks
- state <- getState
- let (blocks'', title) = if stateStandalone state
- then titleTransform blocks'
- else (blocks', [])
- let authors = stateAuthors state
- let date = stateDate state
- let title' = if (null title) then (stateTitle state) else title
- return $ Pandoc (Meta title' authors date) blocks''
-
---
--- parsing blocks
---
-
-parseBlocks = manyTill block eof
-
-block = choice [ codeBlock
- , rawHtmlBlock
- , rawLaTeXBlock
- , fieldList
- , blockQuote
- , imageBlock
- , unknownDirective
- , header
- , hrule
- , list
- , lineBlock
- , para
- , plain
- , nullBlock ] <?> "block"
-
---
--- field list
---
-
-fieldListItem indent = try $ do
- string indent
- char ':'
- name <- many1 alphaNum
- string ": "
- skipSpaces
- first <- manyTill anyChar newline
- rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
- indentedBlock
- return (name, joinWithSep " " (first:(lines rest)))
-
-fieldList = try $ do
- indent <- lookAhead $ many (oneOf " \t")
- items <- many1 $ fieldListItem indent
- blanklines
- let authors = case lookup "Authors" items of
- Just auth -> [auth]
- Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
- if null authors
- then return ()
- else updateState $ \st -> st {stateAuthors = authors}
- case (lookup "Date" items) of
- Just dat -> updateState $ \st -> st {stateDate = dat}
- Nothing -> return ()
- case (lookup "Title" items) of
- Just tit -> parseFromString (many inline) tit >>=
- \t -> updateState $ \st -> st {stateTitle = t}
- Nothing -> return ()
- let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
- (x /= "Date") && (x /= "Title")) items
- if null remaining
- then return Null
- else do terms <- mapM (return . (:[]) . Str . fst) remaining
- defs <- mapM (parseFromString (many block) . snd)
- remaining
- return $ DefinitionList $ zip terms defs
-
---
--- line block
---
-
-lineBlockLine = try $ do
- string "| "
- white <- many (oneOf " \t")
- line <- manyTill inline newline
- return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-
-lineBlock = try $ do
- lines <- many1 lineBlockLine
- blanklines
- return $ Para (concat lines)
-
---
--- paragraph block
---
-
-para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-
-codeBlockStart = string "::" >> blankline >> blankline
-
--- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try $ do
- result <- many1 (notFollowedBy' codeBlockStart >> inline)
- lookAhead (string "::")
- return $ Para $ if last result == Space
- then normalizeSpaces result
- else (normalizeSpaces result) ++ [Str ":"]
-
--- regular paragraph
-paraNormal = try $ do
- result <- many1 inline
- newline
- blanklines
- return $ Para $ normalizeSpaces result
-
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- image block
---
-
-imageBlock = try $ do
- string ".. image:: "
- src <- manyTill anyChar newline
- fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
- many1 $ fieldListItem indent
- optional blanklines
- case lookup "alt" fields of
- Just alt -> return $ Plain [Image [Str alt] (src, alt)]
- Nothing -> return $ Plain [Image [Str "image"] (src, "")]
---
--- header blocks
---
-
-header = doubleHeader <|> singleHeader <?> "header"
-
--- a header with lines on top and bottom
-doubleHeader = try $ do
- c <- oneOf underlineChars
- rest <- many (char c) -- the top line
- let lenTop = length (c:rest)
- skipSpaces
- newline
- txt <- many1 (notFollowedBy blankline >> inline)
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else return ()
- blankline -- spaces and newline
- count lenTop (char c) -- the bottom line
- blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return $ Header level (normalizeSpaces txt)
-
--- a header with line on the bottom only
-singleHeader = try $ do
- notFollowedBy' whitespace
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- blankline
- c <- oneOf underlineChars
- rest <- count (len - 1) (char c)
- many (char c)
- blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return $ Header level (normalizeSpaces txt)
-
---
--- hrule block
---
-
-hrule = try $ do
- chr <- oneOf underlineChars
- count 3 (char chr)
- skipMany (char chr)
- blankline
- blanklines
- return HorizontalRule
-
---
--- code blocks
---
-
--- read a line indented by a given string
-indentedLine indents = try $ do
- string indents
- result <- manyTill anyChar newline
- return $ result ++ "\n"
-
--- two or more indented lines, possibly separated by blank lines.
--- any amount of indentation will work.
-indentedBlock = do
- indents <- lookAhead $ many1 (oneOf " \t")
- lns <- many $ choice $ [ indentedLine indents,
- try $ do b <- blanklines
- l <- indentedLine indents
- return (b ++ l) ]
- optional blanklines
- return $ concat lns
-
-codeBlock = try $ do
- codeBlockStart
- result <- indentedBlock
- return $ CodeBlock $ stripTrailingNewlines result
-
---
--- raw html
---
-
-rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
- indentedBlock >>= return . RawHtml
-
---
--- raw latex
---
-
-rawLaTeXBlock = try $ do
- string ".. raw:: latex"
- blanklines
- result <- indentedBlock
- return $ Para [(TeX result)]
-
---
--- block quotes
---
-
-blockQuote = do
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
- return $ BlockQuote contents
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-definitionListItem = try $ do
- term <- many1Till inline endline
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
- return (normalizeSpaces term, contents)
-
-definitionList = many1 definitionListItem >>= return . DefinitionList
-
--- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try $ do
- notFollowedBy' hrule -- because hrules start out just like lists
- marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
- return $ length (marker:white)
-
--- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart style delim = try $ do
- (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
- white <- many1 spaceChar
- return $ markerLen + length white
-
--- parse a line of a list item
-listLine markerLength = try $ do
- notFollowedBy blankline
- indentWith markerLength
- line <- manyTill anyChar newline
- return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith num = do
- state <- getState
- let tabStop = stateTabStop state
- if (num < tabStop)
- then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = do
- markerLength <- start
- firstLine <- manyTill anyChar newline
- restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
-
--- continuation of a list item - indented and separated by blankline or
--- (in compact lists) endline.
--- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try $ do
- blanks <- many1 blankline
- result <- many1 (listLine markerLength)
- return $ blanks ++ concat result
-
-listItem start = try $ do
- (markerLength, first) <- rawListItem start
- rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline >>~ lookAhead start),
- many1 blankline ] -- whole list must end with blank.
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
- updateState (\st -> st {stateParserContext = oldContext})
- return parsed
-
-orderedList = do
- (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
- items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify items
- return $ OrderedList (start, style, delim) items'
-
-bulletList = many1 (listItem bulletListStart) >>=
- return . BulletList . compactify
-
---
--- unknown directive (e.g. comment)
---
-
-unknownDirective = try $ do
- string ".. "
- manyTill anyChar newline
- many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
- many1 (noneOf "\n") >> newline)
- optional blanklines
- return Null
-
---
--- reference key
---
-
-referenceKey = do
- startPos <- getPosition
- key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
- st <- getState
- let oldkeys = stateKeys st
- updateState $ \st -> st { stateKeys = key : oldkeys }
- optional blanklines
- endPos <- getPosition
- -- return enough blanks to replace key
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-targetURI = do
- skipSpaces
- optional newline
- contents <- many1 (try (many spaceChar >> newline >>
- many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
- blanklines
- return contents
-
-imageKey = try $ do
- string ".. |"
- ref <- manyTill inline (char '|')
- skipSpaces
- string "image::"
- src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-anonymousKey = try $ do
- oneOfStrings [".. __:", "__"]
- src <- targetURI
- state <- getState
- return ([Str "_"], (removeLeadingTrailingSpace src, ""))
-
-regularKeyQuoted = try $ do
- string ".. _`"
- ref <- manyTill inline (char '`')
- char ':'
- src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-regularKey = try $ do
- string ".. _"
- ref <- manyTill inline (char ':')
- src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
- --
- -- inline
- --
-
-inline = choice [ link
- , str
- , whitespace
- , endline
- , strong
- , emph
- , code
- , image
- , hyphens
- , superscript
- , subscript
- , escapedChar
- , symbol ] <?> "inline"
-
-hyphens = do
- result <- many1 (char '-')
- option Space endline
- -- don't want to treat endline after hyphen or dash as a space
- return $ Str result
-
-escapedChar = escaped anyChar
-
-symbol = do
- result <- oneOf specialChars
- return $ Str [result]
-
--- parses inline code, between codeStart and codeEnd
-code = try $ do
- string "``"
- result <- manyTill anyChar (try (string "``"))
- return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-
-emph = enclosed (char '*') (char '*') inline >>=
- return . Emph . normalizeSpaces
-
-strong = enclosed (string "**") (try $ string "**") inline >>=
- return . Strong . normalizeSpaces
-
-interpreted role = try $ do
- optional $ try $ string "\\ "
- result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
- nextChar <- lookAhead anyChar
- try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
- return [Str result]
-
-superscript = interpreted "sup" >>= (return . Superscript)
-
-subscript = interpreted "sub" >>= (return . Subscript)
-
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
-
-str = notFollowedBy' oneWordReference >>
- many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline = try $ do
- newline
- notFollowedBy blankline
- -- parse potential list-starts at beginning of line differently in a list:
- st <- getState
- if (stateParserContext st) == ListItemState
- then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
- notFollowedBy' bulletListStart
- else return ()
- return Space
-
---
--- links
---
-
-link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-
-explicitLink = try $ do
- char '`'
- notFollowedBy (char '`') -- `` is marks start of inline code
- label <- manyTill inline (try (do {spaces; char '<'}))
- src <- manyTill (noneOf ">\n ") (char '>')
- skipSpaces
- string "`_"
- return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
-
-reference = try $ do
- char '`'
- notFollowedBy (char '`')
- label <- many1Till inline (char '`')
- char '_'
- return label
-
-oneWordReference = do
- raw <- many1 alphaNum
- char '_'
- notFollowedBy alphaNum -- because this_is_not a link
- return [Str raw]
-
-referenceLink = try $ do
- label <- reference <|> oneWordReference
- key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
- state <- getState
- let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable key of
- Nothing -> fail "no corresponding key"
- Just target -> return target
- -- if anonymous link, remove first anon key so it won't be used again
- let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], src) keyTable -- remove first anon key
- else keyTable
- setState $ state { stateKeys = keyTable' }
- return $ Link (normalizeSpaces label) src
-
-uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
- "mailto:", "news:", "telnet:" ]
-
-uri = try $ do
- scheme <- uriScheme
- identifier <- many1 (noneOf " \t\n")
- return $ scheme ++ identifier
-
-autoURI = do
- src <- uri
- return $ Link [Str src] (src, "")
-
-emailChar = alphaNum <|> oneOf "-+_."
-
-emailAddress = try $ do
- firstLetter <- alphaNum
- restAddr <- many emailChar
- let addr = firstLetter:restAddr
- char '@'
- dom <- domain
- return $ addr ++ '@':dom
-
-domainChar = alphaNum <|> char '-'
-
-domain = do
- first <- many1 domainChar
- dom <- many1 (try (do{ char '.'; many1 domainChar }))
- return $ joinWithSep "." (first:dom)
-
-autoEmail = do
- src <- emailAddress
- return $ Link [Str src] ("mailto:" ++ src, "")
-
-autoLink = autoURI <|> autoEmail
-
--- For now, we assume that all substitution references are for images.
-image = try $ do
- char '|'
- ref <- manyTill inline (char '|')
- state <- getState
- let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable ref of
- Nothing -> fail "no corresponding key"
- Just target -> return target
- return $ Image (normalizeSpaces ref) src
-
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
deleted file mode 100644
index f27c3ae75..000000000
--- a/src/Text/Pandoc/Shared.hs
+++ /dev/null
@@ -1,792 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Shared
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Utility functions and definitions used by the various Pandoc modules.
--}
-module Text.Pandoc.Shared (
- -- * List processing
- splitBy,
- splitByIndices,
- substitute,
- joinWithSep,
- -- * Text processing
- backslashEscapes,
- escapeStringUsing,
- stripTrailingNewlines,
- removeLeadingTrailingSpace,
- removeLeadingSpace,
- removeTrailingSpace,
- stripFirstAndLast,
- camelCaseToHyphenated,
- toRomanNumeral,
- wrapped,
- wrapIfNeeded,
- -- * Parsing
- (>>~),
- anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- spaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- charsInBalanced',
- romanNumeral,
- withHorizDisplacement,
- nullBlock,
- failIfStrict,
- escaped,
- anyOrderedListMarker,
- orderedListMarker,
- charRef,
- readWith,
- testStringWith,
- ParserState (..),
- defaultParserState,
- HeaderType (..),
- ParserContext (..),
- QuoteContext (..),
- NoteTable,
- KeyTable,
- lookupKeySrc,
- refsMatch,
- -- * Native format prettyprinting
- prettyPandoc,
- -- * Pandoc block and inline list processing
- orderedListMarkers,
- normalizeSpaces,
- compactify,
- Element (..),
- hierarchicalize,
- isHeaderBlock,
- -- * Writer options
- WriterOptions (..),
- defaultWriterOptions
- ) where
-
-import Text.Pandoc.Definition
-import Text.ParserCombinators.Parsec
-import Text.PrettyPrint.HughesPJ ( Doc, fsep )
-import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
-import Data.List ( find, isPrefixOf )
-import Control.Monad ( join )
-
---
--- List processing
---
-
--- | Split list by groups of one or more sep.
-splitBy :: (Eq a) => a -> [a] -> [[a]]
-splitBy _ [] = []
-splitBy sep lst =
- let (first, rest) = break (== sep) lst
- rest' = dropWhile (== sep) rest
- in first:(splitBy sep rest')
-
--- | Split list into chunks divided at specified indices.
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst =
- let (first, rest) = splitAt x lst in
- first:(splitByIndices (map (\y -> y - x) xs) rest)
-
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ lst = lst
-substitute target replacement lst =
- if target `isPrefixOf` lst
- then replacement ++ (substitute target replacement $ drop (length target) lst)
- else (head lst):(substitute target replacement $ tail lst)
-
--- | Joins a list of lists, separated by another list.
-joinWithSep :: [a] -- ^ List to use as separator
- -> [[a]] -- ^ Lists to join
- -> [a]
-joinWithSep _ [] = []
-joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-
---
--- Text processing
---
-
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char] -- ^ list of special characters to escape
- -> [(Char, String)]
-backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, String)] -> String -> String
-escapeStringUsing _ [] = ""
-escapeStringUsing escapeTable (x:xs) =
- case (lookup x escapeTable) of
- Just str -> str ++ rest
- Nothing -> x:rest
- where rest = escapeStringUsing escapeTable xs
-
--- | Strip trailing newlines from string.
-stripTrailingNewlines :: String -> String
-stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
-
--- | Remove leading and trailing space (including newlines) from string.
-removeLeadingTrailingSpace :: String -> String
-removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
-
--- | Remove leading space (including newlines) from string.
-removeLeadingSpace :: String -> String
-removeLeadingSpace = dropWhile (`elem` " \n\t")
-
--- | Remove trailing space (including newlines) from string.
-removeTrailingSpace :: String -> String
-removeTrailingSpace = reverse . removeLeadingSpace . reverse
-
--- | Strip leading and trailing characters from string
-stripFirstAndLast :: String -> String
-stripFirstAndLast str =
- drop 1 $ take ((length str) - 1) str
-
--- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-camelCaseToHyphenated :: String -> String
-camelCaseToHyphenated [] = ""
-camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
- a:'-':(toLower b):(camelCaseToHyphenated rest)
-camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
-
--- | Convert number < 4000 to uppercase roman numeral.
-toRomanNumeral :: Int -> String
-toRomanNumeral x =
- if x >= 4000 || x < 0
- then "?"
- else case x of
- _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
- _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
- _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
- _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
- _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
- _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
- _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
- _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
- _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
- _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
- _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
- _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
- _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
- _ -> ""
-
--- | Wrap inlines to line length.
-wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
-wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
- return . fsep
-
-wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
- [Inline] -> m Doc
-wrapIfNeeded opts = if writerWrapText opts
- then wrapped
- else ($)
-
---
--- Parsing
---
-
--- | Like >>, but returns the operation on the left.
--- (Suggested by Tillmann Rendel on Haskell-cafe list.)
-(>>~) :: (Monad m) => m a -> m b -> m a
-a >>~ b = a >>= \x -> b >> return x
-
--- | Parse any line of text
-anyLine :: GenParser Char st [Char]
-anyLine = manyTill anyChar newline
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
-many1Till p end = do
- first <- p
- rest <- manyTill p end
- return (first:rest)
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
-notFollowedBy' p = try $ join $ do a <- try p
- return (unexpected (show a))
- <|>
- return (return ())
--- (This version due to Andrew Pimlott on the Haskell mailing list.)
-
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-
--- | Parses a space or tab.
-spaceChar :: CharParser st Char
-spaceChar = char ' ' <|> char '\t'
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
-blankline = try $ skipSpaces >> newline
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
-blanklines = many1 blankline
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try $
- start >> notFollowedBy space >> many1Till parser end
-
--- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = do
- firstChar <- char (toUpper x) <|> char (toLower x)
- rest <- stringAnyCase xs
- return (firstChar:rest)
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
-parseFromString parser str = do
- oldPos <- getPosition
- oldInput <- getInput
- setInput str
- result <- parser
- setInput oldInput
- setPosition oldPos
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)". Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close, '\n']))
- <|> (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> try (string "\n" >>~ notFollowedBy' blanklines)
- char close
- return $ concat raw
-
--- | Like @charsInBalanced@, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close]))
- <|> (do res <- charsInBalanced' open close
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -- ^ Uppercase if true
- -> GenParser Char st Int
-romanNumeral upperCase = do
- let charAnyCase c = char (if upperCase then toUpper c else c)
- let one = charAnyCase 'i'
- let five = charAnyCase 'v'
- let ten = charAnyCase 'x'
- let fifty = charAnyCase 'l'
- let hundred = charAnyCase 'c'
- let fivehundred = charAnyCase 'd'
- let thousand = charAnyCase 'm'
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
--- | Parses a character and returns 'Null' (so that the parser can move on
--- if it gets stuck).
-nullBlock :: GenParser Char st Block
-nullBlock = anyChar >> return Null
-
--- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser Char ParserState ()
-failIfStrict = do
- state <- getState
- if stateStrict state then fail "strict mode" else return ()
-
--- | Parses backslash, then applies character parser.
-escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Inline
-escaped parser = try $ do
- char '\\'
- result <- parser
- return (Str [result])
-
--- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: GenParser Char st (ListNumberStyle, Int)
-upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
-
--- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: GenParser Char st (ListNumberStyle, Int)
-lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
-
--- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: GenParser Char st (ListNumberStyle, Int)
-decimal = do
- num <- many1 digit
- return (Decimal, read num)
-
--- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: GenParser Char st (ListNumberStyle, Int)
-defaultNum = do
- char '#'
- return (DefaultStyle, 1)
-
--- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
-lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
-
--- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: GenParser Char st (ListNumberStyle, Int)
-upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
-
--- | Parses a roman numeral i or I
-romanOne :: GenParser Char st (ListNumberStyle, Int)
-romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
- (char 'I' >> return (UpperRoman, 1))
-
--- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: GenParser Char st ListAttributes
-anyOrderedListMarker = choice $
- [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-
--- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
-
--- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
-
--- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
-
--- | Parses an ordered list marker with a given style and delimiter,
--- returns number.
-orderedListMarker :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char st Int
-orderedListMarker style delim = do
- let num = case style of
- DefaultStyle -> decimal <|> defaultNum
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, _, _) <- context num
- return start
-
--- | Parses a character reference and returns a Str element.
-charRef :: GenParser Char st Inline
-charRef = do
- c <- characterReference
- return $ Str [c]
-
--- | Parse a string with a given parser and state.
-readWith :: GenParser Char ParserState a -- ^ parser
- -> ParserState -- ^ initial state
- -> String -- ^ input string
- -> a
-readWith parser state input =
- case runParser parser state "source" input of
- Left err -> error $ "\nError:\n" ++ show err
- Right result -> result
-
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => GenParser Char ParserState a
- -> String
- -> IO ()
-testStringWith parser str = putStrLn $ show $
- readWith parser defaultParserState str
-
--- | Parsing options.
-data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ Parse bibliographic info?
- stateTitle :: [Inline], -- ^ Title of document
- stateAuthors :: [String], -- ^ Authors of document
- stateDate :: String, -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax?
- stateSmart :: Bool, -- ^ Use smart typography?
- stateColumns :: Int, -- ^ Number of columns in terminal
- stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
- }
- deriving Show
-
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateKeys = [],
- stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
- stateTitle = [],
- stateAuthors = [],
- stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateColumns = 80,
- stateHeaderTable = [] }
-
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
-
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
-
-data QuoteContext
- = InSingleQuote -- ^ Used when parsing inside single quotes
- | InDoubleQuote -- ^ Used when parsing inside double quotes
- | NoQuote -- ^ Used when not parsing inside quotes
- deriving (Eq, Show)
-
-type NoteTable = [(String, [Block])]
-
-type KeyTable = [([Inline], Target)]
-
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> [Inline] -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case find (refsMatch key . fst) table of
- Nothing -> Nothing
- Just (_, src) -> Just src
-
--- | Returns @True@ if keys match (case insensitive).
-refsMatch :: [Inline] -> [Inline] -> Bool
-refsMatch ((Str x):restx) ((Str y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Emph x):restx) ((Emph y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strong x):restx) ((Strong y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Superscript x):restx) ((Superscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Subscript x):restx) ((Subscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
- t == u && refsMatch x y && refsMatch restx resty
-refsMatch ((Code x):restx) ((Code y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((TeX x):restx) ((TeX y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
-refsMatch [] x = null x
-refsMatch x [] = null x
-
---
--- Native format prettyprinting
---
-
--- | Indent string as a block.
-indentBy :: Int -- ^ Number of spaces to indent the block
- -> Int -- ^ Number of spaces (rel to block) to indent first line
- -> String -- ^ Contents of block to indent
- -> String
-indentBy _ _ [] = ""
-indentBy num first str =
- let (firstLine:restLines) = lines str
- firstLineIndent = num + first
- in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
- (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines)
-
--- | Prettyprint list of Pandoc blocks elements.
-prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
- -> [Block] -- ^ List of blocks
- -> String
-prettyBlockList indent [] = indentBy indent 0 "[]"
-prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
- (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
-
--- | Prettyprint Pandoc block element.
-prettyBlock :: Block -> String
-prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
- (prettyBlockList 2 blocks)
-prettyBlock (OrderedList attribs blockLists) =
- "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
- (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
- blockLists)) ++ " ]"
-prettyBlock (BulletList blockLists) = "BulletList\n" ++
- indentBy 2 0 ("[ " ++ (joinWithSep ", "
- (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
-prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
- indentBy 2 0 ("[" ++ (joinWithSep ",\n"
- (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
- indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
-prettyBlock (Table caption aligns widths header rows) =
- "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
- show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
- (joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
- where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
- (map (\blocks -> prettyBlockList 2 blocks)
- cols))) ++ " ]"
-prettyBlock block = show block
-
--- | Prettyprint Pandoc document.
-prettyPandoc :: Pandoc -> String
-prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
- ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
-
---
--- Pandoc block and inline list processing
---
-
--- | Generate infinite lazy list of markers for an ordered list,
--- depending on list attributes.
-orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-orderedListMarkers (start, numstyle, numdelim) =
- let singleton c = [c]
- nums = case numstyle of
- DefaultStyle -> map show [start..]
- Decimal -> map show [start..]
- UpperAlpha -> drop (start - 1) $ cycle $
- map singleton ['A'..'Z']
- LowerAlpha -> drop (start - 1) $ cycle $
- map singleton ['a'..'z']
- UpperRoman -> map toRomanNumeral [start..]
- LowerRoman -> map (map toLower . toRomanNumeral) [start..]
- inDelim str = case numdelim of
- DefaultDelim -> str ++ "."
- Period -> str ++ "."
- OneParen -> str ++ ")"
- TwoParens -> "(" ++ str ++ ")"
- in map inDelim nums
-
--- | Normalize a list of inline elements: remove leading and trailing
--- @Space@ elements, collapse double @Space@s into singles, and
--- remove empty Str elements.
-normalizeSpaces :: [Inline] -> [Inline]
-normalizeSpaces [] = []
-normalizeSpaces list =
- let removeDoubles [] = []
- removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
- removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
- removeDoubles ((Str ""):rest) = removeDoubles rest
- removeDoubles (x:rest) = x:(removeDoubles rest)
- removeLeading (Space:xs) = removeLeading xs
- removeLeading x = x
- removeTrailing [] = []
- removeTrailing lst = if (last lst == Space)
- then init lst
- else lst
- in removeLeading $ removeTrailing $ removeDoubles list
-
--- | Change final list item from @Para@ to @Plain@ if the list should
--- be compact.
-compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
- -> [[Block]]
-compactify [] = []
-compactify items =
- let final = last items
- others = init items
- in case final of
- [Para a] -> if any containsPara others
- then items
- else others ++ [[Plain a]]
- _ -> items
-
-containsPara :: [Block] -> Bool
-containsPara [] = False
-containsPara ((Para _):_) = True
-containsPara ((BulletList items):rest) = any containsPara items ||
- containsPara rest
-containsPara ((OrderedList _ items):rest) = any containsPara items ||
- containsPara rest
-containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
- containsPara rest
-containsPara (_:rest) = containsPara rest
-
--- | Data structure for defining hierarchical Pandoc documents
-data Element = Blk Block
- | Sec [Inline] [Element] deriving (Eq, Read, Show)
-
--- | Returns @True@ on Header block with at least the specified level
-headerAtLeast :: Int -> Block -> Bool
-headerAtLeast level (Header x _) = x <= level
-headerAtLeast _ _ = False
-
--- | Convert list of Pandoc blocks into (hierarchical) list of Elements
-hierarchicalize :: [Block] -> [Element]
-hierarchicalize [] = []
-hierarchicalize (block:rest) =
- case block of
- (Header level title) ->
- let (thisSection, rest') = break (headerAtLeast level) rest
- in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
- x -> (Blk x):(hierarchicalize rest)
-
--- | True if block is a Header block.
-isHeaderBlock :: Block -> Bool
-isHeaderBlock (Header _ _) = True
-isHeaderBlock _ = False
-
---
--- Writer options
---
-
--- | Options for writers
-data WriterOptions = WriterOptions
- { writerStandalone :: Bool -- ^ Include header and footer
- , writerHeader :: String -- ^ Header for the document
- , writerTitlePrefix :: String -- ^ Prefix for HTML titles
- , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerS5 :: Bool -- ^ We're writing S5
- , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
- , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js
- , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerIncremental :: Bool -- ^ Incremental S5 lists
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerIncludeBefore :: String -- ^ String to include before the body
- , writerIncludeAfter :: String -- ^ String to include after the body
- , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerWrapText :: Bool -- ^ Wrap text to line length
- } deriving Show
-
--- | Default writer options.
-defaultWriterOptions :: WriterOptions
-defaultWriterOptions =
- WriterOptions { writerStandalone = False
- , writerHeader = ""
- , writerTitlePrefix = ""
- , writerTabStop = 4
- , writerTableOfContents = False
- , writerS5 = False
- , writerUseASCIIMathML = False
- , writerASCIIMathMLURL = Nothing
- , writerIgnoreNotes = False
- , writerIncremental = False
- , writerNumberSections = False
- , writerIncludeBefore = ""
- , writerIncludeAfter = ""
- , writerStrictMarkdown = False
- , writerReferenceLinks = False
- , writerWrapText = True
- }
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
deleted file mode 100644
index 16bdb9218..000000000
--- a/src/Text/Pandoc/UTF8.hs
+++ /dev/null
@@ -1,45 +0,0 @@
--- | Functions for converting Unicode strings to UTF-8 and vice versa.
---
--- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
--- (c) 2003, OGI School of Science & Engineering, Oregon Health and
--- Science University.
---
--- Modified by Martin Norbaeck
--- to pass illegal UTF-8 sequences through unchanged.
-module Text.Pandoc.UTF8 (
- fromUTF8,
- toUTF8
- ) where
-
--- From the Char module supplied with HBC.
-
--- | Take a UTF-8 string and decode it into a Unicode string.
-fromUTF8 :: String -> String
-fromUTF8 "" = ""
-fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
-fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
- '\x80' <= c' && c' <= '\xbf' =
- toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
- '\x80' <= c' && c' <= '\xbf' &&
- '\x80' <= c'' && c'' <= '\xbf' =
- toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:cs) = c : fromUTF8 cs
-
--- | Take a Unicode string and encode it as a UTF-8 string.
-toUTF8 :: String -> String
-toUTF8 "" = ""
-toUTF8 (c:cs) =
- if c > '\x0000' && c < '\x0080' then
- c : toUTF8 cs
- else if c < toEnum 0x0800 then
- let i = fromEnum c
- in toEnum (0xc0 + i `div` 0x40) :
- toEnum (0x80 + i `mod` 0x40) :
- toUTF8 cs
- else
- let i = fromEnum c
- in toEnum (0xe0 + i `div` 0x1000) :
- toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
- toEnum (0x80 + i `mod` 0x40) :
- toUTF8 cs
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
deleted file mode 100644
index 13912a9f3..000000000
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ /dev/null
@@ -1,248 +0,0 @@
-{-
-Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.ConTeXt
- Copyright : Copyright (C) 2007 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' format into ConTeXt.
--}
-module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Printf ( printf )
-import Data.List ( (\\), intersperse )
-import Control.Monad.State
-
-type WriterState = Int -- number of next URL reference
-
--- | Convert Pandoc to ConTeXt.
-writeConTeXt :: WriterOptions -> Pandoc -> String
-writeConTeXt options document = evalState (pandocToConTeXt options document) 1
-
-pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
-pandocToConTeXt options (Pandoc meta blocks) = do
- main <- blockListToConTeXt blocks
- let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options
- head <- if writerStandalone options
- then contextHeader options meta
- else return ""
- let toc = if writerTableOfContents options
- then "\\placecontent\n\n"
- else ""
- let foot = if writerStandalone options
- then "\n\\stoptext\n"
- else ""
- return $ head ++ toc ++ body ++ foot
-
--- | Insert bibliographic information into ConTeXt header.
-contextHeader :: WriterOptions -- ^ Options, including ConTeXt header
- -> Meta -- ^ Meta with bibliographic information
- -> State WriterState String
-contextHeader options (Meta title authors date) = do
- titletext <- if null title
- then return ""
- else inlineListToConTeXt title
- let authorstext = if null authors
- then ""
- else if length authors == 1
- then stringToConTeXt $ head authors
- else stringToConTeXt $ (joinWithSep ", " $
- init authors) ++ " & " ++ last authors
- let datetext = if date == ""
- then ""
- else stringToConTeXt date
- let titleblock = "\\doctitle{" ++ titletext ++ "}\n\
- \ \\author{" ++ authorstext ++ "}\n\
- \ \\date{" ++ datetext ++ "}\n\n"
- let setupheads = if (writerNumberSections options)
- then "\\setupheads[sectionnumber=yes, style=\\bf]\n"
- else "\\setupheads[sectionnumber=no, style=\\bf]\n"
- let header = writerHeader options
- return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n"
-
--- escape things as needed for ConTeXt
-
-escapeCharForConTeXt :: Char -> String
-escapeCharForConTeXt ch =
- case ch of
- '{' -> "\\letteropenbrace{}"
- '}' -> "\\letterclosebrace{}"
- '\\' -> "\\letterbackslash{}"
- '$' -> "\\$"
- '|' -> "\\letterbar{}"
- '^' -> "\\letterhat{}"
- '%' -> "\\%"
- '~' -> "\\lettertilde{}"
- '&' -> "\\&"
- '#' -> "\\#"
- '<' -> "\\letterless{}"
- '>' -> "\\lettermore{}"
- '_' -> "\\letterunderscore{}"
- x -> [x]
-
--- | Escape string for ConTeXt
-stringToConTeXt :: String -> String
-stringToConTeXt = concatMap escapeCharForConTeXt
-
--- | Convert Pandoc block element to ConTeXt.
-blockToConTeXt :: Block -> State WriterState String
-blockToConTeXt Null = return ""
-blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
-blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
-blockToConTeXt (BlockQuote lst) = do
- contents <- blockListToConTeXt lst
- return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
-blockToConTeXt (CodeBlock str) =
- return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n"
-blockToConTeXt (RawHtml str) = return ""
-blockToConTeXt (BulletList lst) = do
- contents <- mapM listItemToConTeXt lst
- return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n"
-blockToConTeXt (OrderedList attribs lst) = case attribs of
- (1, DefaultStyle, DefaultDelim) -> do
- contents <- mapM listItemToConTeXt lst
- return $ "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n"
- _ -> do
- let markers = take (length lst) $ orderedListMarkers attribs
- contents <- zipWithM orderedListItemToConTeXt markers lst
- let markerWidth = maximum $ map length markers
- let markerWidth' = if markerWidth < 3
- then ""
- else "[width=" ++
- show ((markerWidth + 2) `div` 2) ++ "em]"
- return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
- "\\stopitemize\n"
-blockToConTeXt (DefinitionList lst) =
- mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
-blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
-blockToConTeXt (Header level lst) = do
- contents <- inlineListToConTeXt lst
- return $ if level > 0 && level <= 3
- then "\\" ++ concat (replicate (level - 1) "sub") ++
- "section{" ++ contents ++ "}\n\n"
- else contents ++ "\n\n"
-blockToConTeXt (Table caption aligns widths heads rows) = do
- let colWidths = map printDecimal widths
- let colDescriptor colWidth alignment = (case alignment of
- AlignLeft -> 'l'
- AlignRight -> 'r'
- AlignCenter -> 'c'
- AlignDefault -> 'l'):
- "p(" ++ colWidth ++ "\\textwidth)|"
- let colDescriptors = "|" ++ (concat $
- zipWith colDescriptor colWidths aligns)
- headers <- tableRowToConTeXt heads
- captionText <- inlineListToConTeXt caption
- let captionText' = if null caption then "none" else captionText
- rows' <- mapM tableRowToConTeXt rows
- return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++
- colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++
- concat rows' ++ "\\HL\n\\stoptable\n\n"
-
-printDecimal :: Float -> String
-printDecimal = printf "%.2f"
-
-tableRowToConTeXt cols = do
- cols' <- mapM blockListToConTeXt cols
- return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n"
-
-listItemToConTeXt list = do
- contents <- blockListToConTeXt list
- return $ "\\item " ++ contents
-
-orderedListItemToConTeXt marker list = do
- contents <- blockListToConTeXt list
- return $ "\\sym{" ++ marker ++ "} " ++ contents
-
-defListItemToConTeXt (term, def) = do
- term' <- inlineListToConTeXt term
- def' <- blockListToConTeXt def
- return $ "\\startdescr{" ++ term' ++ "}\n" ++
- def' ++ "\n\\stopdescr\n"
-
--- | Convert list of block elements to ConTeXt.
-blockListToConTeXt :: [Block] -> State WriterState String
-blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
-
--- | Convert list of inline elements to ConTeXt.
-inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
- -> State WriterState String
-inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
-
-isQuoted :: Inline -> Bool
-isQuoted (Quoted _ _) = True
-isQuoted Apostrophe = True
-isQuoted _ = False
-
--- | Convert inline element to ConTeXt
-inlineToConTeXt :: Inline -- ^ Inline to convert
- -> State WriterState String
-inlineToConTeXt (Emph lst) = do
- contents <- inlineListToConTeXt lst
- return $ "{\\em " ++ contents ++ "}"
-inlineToConTeXt (Strong lst) = do
- contents <- inlineListToConTeXt lst
- return $ "{\\bf " ++ contents ++ "}"
-inlineToConTeXt (Strikeout lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\overstrikes{" ++ contents ++ "}"
-inlineToConTeXt (Superscript lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\high{" ++ contents ++ "}"
-inlineToConTeXt (Subscript lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\low{" ++ contents ++ "}"
-inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}"
-inlineToConTeXt (Quoted SingleQuote lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\quote{" ++ contents ++ "}"
-inlineToConTeXt (Quoted DoubleQuote lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\quotation{" ++ contents ++ "}"
-inlineToConTeXt Apostrophe = return "'"
-inlineToConTeXt EmDash = return "---"
-inlineToConTeXt EnDash = return "--"
-inlineToConTeXt Ellipses = return "\\ldots{}"
-inlineToConTeXt (Str str) = return $ stringToConTeXt str
-inlineToConTeXt (TeX str) = return str
-inlineToConTeXt (HtmlInline str) = return ""
-inlineToConTeXt (LineBreak) = return "\\crlf\n"
-inlineToConTeXt Space = return " "
-inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own
- inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
-inlineToConTeXt (Link text (src, _)) = do
- next <- get
- put (next + 1)
- let ref = show next
- label <- inlineListToConTeXt text
- return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++
- "]\\from[" ++ ref ++ "]"
-inlineToConTeXt (Image alternate (src, tit)) = do
- alt <- inlineListToConTeXt alternate
- return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++
- tit ++ "}\n{\\externalfigure[" ++ src ++ "]}"
-inlineToConTeXt (Note contents) = do
- contents' <- blockListToConTeXt contents
- return $ "\\footnote{" ++ contents' ++ "}"
-
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
deleted file mode 100644
index 13dc8585d..000000000
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ /dev/null
@@ -1,299 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Docbook
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to Docbook XML.
--}
-module Text.Pandoc.Writers.Docbook ( writeDocbook) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Data.List ( isPrefixOf, drop )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
-
---
--- code to format XML
---
-
--- | Escape one character as needed for XML.
-escapeCharForXML :: Char -> String
-escapeCharForXML x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- '\160' -> "&nbsp;"
- c -> [c]
-
--- | True if the character needs to be escaped.
-needsEscaping :: Char -> Bool
-needsEscaping c = c `elem` "&<>\"\160"
-
--- | Escape string as needed for XML. Entity references are not preserved.
-escapeStringForXML :: String -> String
-escapeStringForXML "" = ""
-escapeStringForXML str =
- case break needsEscaping str of
- (okay, "") -> okay
- (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
-
--- | Return a text object with a string of formatted XML attributes.
-attributeList :: [(String, String)] -> Doc
-attributeList = text . concatMap
- (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
- escapeStringForXML b ++ "\"")
-
--- | Put the supplied contents between start and end tags of tagType,
--- with specified attributes and (if specified) indentation.
-inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
-inTags isIndented tagType attribs contents =
- let openTag = char '<' <> text tagType <> attributeList attribs <>
- char '>'
- closeTag = text "</" <> text tagType <> char '>'
- in if isIndented
- then openTag $$ nest 2 contents $$ closeTag
- else openTag <> contents <> closeTag
-
--- | Return a self-closing tag of tagType with specified attributes
-selfClosingTag :: String -> [(String, String)] -> Doc
-selfClosingTag tagType attribs =
- char '<' <> text tagType <> attributeList attribs <> text " />"
-
--- | Put the supplied contents between start and end tags of tagType.
-inTagsSimple :: String -> Doc -> Doc
-inTagsSimple tagType = inTags False tagType []
-
--- | Put the supplied contents in indented block btw start and end tags.
-inTagsIndented :: String -> Doc -> Doc
-inTagsIndented tagType = inTags True tagType []
-
---
--- Docbook writer
---
-
--- | Convert list of authors to a docbook <author> section
-authorToDocbook :: [Char] -> Doc
-authorToDocbook name = inTagsIndented "author" $
- if ',' `elem` name
- then -- last name first
- let (lastname, rest) = break (==',') name
- firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
- else -- last name last
- let namewords = words name
- lengthname = length namewords
- (firstname, lastname) = case lengthname of
- 0 -> ("","")
- 1 -> ("", name)
- n -> (joinWithSep " " (take (n-1) namewords), last namewords)
- in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
-
--- | Convert Pandoc document to string in Docbook format.
-writeDocbook :: WriterOptions -> Pandoc -> String
-writeDocbook opts (Pandoc (Meta title authors date) blocks) =
- let head = if writerStandalone opts
- then text (writerHeader opts)
- else empty
- meta = if writerStandalone opts
- then inTagsIndented "articleinfo" $
- (inTagsSimple "title" (wrap opts title)) $$
- (vcat (map authorToDocbook authors)) $$
- (inTagsSimple "date" (text $ escapeStringForXML date))
- else empty
- elements = hierarchicalize blocks
- before = writerIncludeBefore opts
- after = writerIncludeAfter opts
- body = (if null before then empty else text before) $$
- vcat (map (elementToDocbook opts) elements) $$
- (if null after then empty else text after)
- body' = if writerStandalone opts
- then inTagsIndented "article" (meta $$ body)
- else body
- in render $ head $$ body' $$ text ""
-
--- | Convert an Element to Docbook.
-elementToDocbook :: WriterOptions -> Element -> Doc
-elementToDocbook opts (Blk block) = blockToDocbook opts block
-elementToDocbook opts (Sec title elements) =
- -- Docbook doesn't allow sections with no content, so insert some if needed
- let elements' = if null elements
- then [Blk (Para [])]
- else elements
- in inTagsIndented "section" $
- inTagsSimple "title" (wrap opts title) $$
- vcat (map (elementToDocbook opts) elements')
-
--- | Convert a list of Pandoc blocks to Docbook.
-blocksToDocbook :: WriterOptions -> [Block] -> Doc
-blocksToDocbook opts = vcat . map (blockToDocbook opts)
-
--- | Auxiliary function to convert Plain block to Para.
-plainToPara (Plain x) = Para x
-plainToPara x = x
-
--- | Convert a list of pairs of terms and definitions into a list of
--- Docbook varlistentrys.
-deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc
-deflistItemsToDocbook opts items =
- vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items
-
--- | Convert a term and a list of blocks into a Docbook varlistentry.
-deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
-deflistItemToDocbook opts term def =
- let def' = map plainToPara def
- in inTagsIndented "varlistentry" $
- inTagsIndented "term" (inlinesToDocbook opts term) $$
- inTagsIndented "listitem" (blocksToDocbook opts def')
-
--- | Convert a list of lists of blocks to a list of Docbook list items.
-listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
-listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
-
--- | Convert a list of blocks into a Docbook list item.
-listItemToDocbook :: WriterOptions -> [Block] -> Doc
-listItemToDocbook opts item =
- inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
-
--- | Convert a Pandoc block element to Docbook.
-blockToDocbook :: WriterOptions -> Block -> Doc
-blockToDocbook opts Null = empty
-blockToDocbook opts (Plain lst) = wrap opts lst
-blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
-blockToDocbook opts (BlockQuote blocks) =
- inTagsIndented "blockquote" $ blocksToDocbook opts blocks
-blockToDocbook opts (CodeBlock str) =
- text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
-blockToDocbook opts (BulletList lst) =
- inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
-blockToDocbook opts (OrderedList _ []) = empty
-blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) =
- let attribs = case numstyle of
- DefaultStyle -> []
- Decimal -> [("numeration", "arabic")]
- UpperAlpha -> [("numeration", "upperalpha")]
- LowerAlpha -> [("numeration", "loweralpha")]
- UpperRoman -> [("numeration", "upperroman")]
- LowerRoman -> [("numeration", "lowerroman")]
- items = if start == 1
- then listItemsToDocbook opts (first:rest)
- else (inTags True "listitem" [("override",show start)]
- (blocksToDocbook opts $ map plainToPara first)) $$
- listItemsToDocbook opts rest
- in inTags True "orderedlist" attribs items
-blockToDocbook opts (DefinitionList lst) =
- inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
-blockToDocbook opts (RawHtml str) = text str -- raw XML block
-blockToDocbook opts HorizontalRule = empty -- not semantic
-blockToDocbook opts (Table caption aligns widths headers rows) =
- let alignStrings = map alignmentToString aligns
- captionDoc = if null caption
- then empty
- else inTagsIndented "caption"
- (inlinesToDocbook opts caption)
- tableType = if isEmpty captionDoc then "informaltable" else "table"
- in inTagsIndented tableType $ captionDoc $$
- (colHeadsToDocbook opts alignStrings widths headers) $$
- (vcat $ map (tableRowToDocbook opts alignStrings) rows)
-
-colHeadsToDocbook opts alignStrings widths headers =
- let heads = zipWith3 (\align width item ->
- tableItemToDocbook opts "th" align width item)
- alignStrings widths headers
- in inTagsIndented "tr" $ vcat heads
-
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
-tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
- vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
-
-tableItemToDocbook opts tag align width item =
- let attrib = [("align", align)] ++
- if width /= 0
- then [("style", "{width: " ++
- show (truncate (100*width)) ++ "%;}")]
- else []
- in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
-
--- | Take list of inline elements and return wrapped doc.
-wrap :: WriterOptions -> [Inline] -> Doc
-wrap opts lst = if writerWrapText opts
- then fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
- else inlinesToDocbook opts lst
-
--- | Convert a list of inline elements to Docbook.
-inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
-inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
-
--- | Convert an inline element to Docbook.
-inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook opts (Str str) = text $ escapeStringForXML str
-inlineToDocbook opts (Emph lst) =
- inTagsSimple "emphasis" $ inlinesToDocbook opts lst
-inlineToDocbook opts (Strong lst) =
- inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
-inlineToDocbook opts (Strikeout lst) =
- inTags False "emphasis" [("role", "strikethrough")] $
- inlinesToDocbook opts lst
-inlineToDocbook opts (Superscript lst) =
- inTagsSimple "superscript" $ inlinesToDocbook opts lst
-inlineToDocbook opts (Subscript lst) =
- inTagsSimple "subscript" $ inlinesToDocbook opts lst
-inlineToDocbook opts (Quoted _ lst) =
- inTagsSimple "quote" $ inlinesToDocbook opts lst
-inlineToDocbook opts Apostrophe = char '\''
-inlineToDocbook opts Ellipses = text "&#8230;"
-inlineToDocbook opts EmDash = text "&#8212;"
-inlineToDocbook opts EnDash = text "&#8211;"
-inlineToDocbook opts (Code str) =
- inTagsSimple "literal" $ text (escapeStringForXML str)
-inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
-inlineToDocbook opts (HtmlInline str) = empty
-inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
-inlineToDocbook opts Space = char ' '
-inlineToDocbook opts (Link txt (src, tit)) =
- if isPrefixOf "mailto:" src
- then let src' = drop 7 src
- emailLink = inTagsSimple "email" $ text $
- escapeStringForXML $ src'
- in if txt == [Code src']
- then emailLink
- else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
- char ')'
- else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
-inlineToDocbook opts (Image alt (src, tit)) =
- let titleDoc = if null tit
- then empty
- else inTagsIndented "objectinfo" $
- inTagsIndented "title" (text $ escapeStringForXML tit)
- in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
- titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
-inlineToDocbook opts (Note contents) =
- inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
deleted file mode 100644
index 7ec95d8ef..000000000
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ /dev/null
@@ -1,458 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.HTML
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to HTML.
--}
-module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.ASCIIMathML
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Text.Pandoc.Shared
-import Text.Regex ( mkRegex, matchRegex )
-import Numeric ( showHex )
-import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, intersperse )
-import qualified Data.Set as S
-import Control.Monad.State
-import Text.XHtml.Transitional
-
-data WriterState = WriterState
- { stNotes :: [Html] -- ^ List of notes
- , stIds :: [String] -- ^ List of header identifiers
- , stMath :: Bool -- ^ Math is used in document
- , stCSS :: S.Set String -- ^ CSS to include in header
- } deriving Show
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState {stNotes= [], stIds = [],
- stMath = False, stCSS = S.empty}
-
--- Helpers to render HTML with the appropriate function.
-render opts = if writerWrapText opts then renderHtml else showHtml
-renderFragment opts = if writerWrapText opts
- then renderHtmlFragment
- else showHtmlFragment
-
--- | Convert Pandoc document to Html string.
-writeHtmlString :: WriterOptions -> Pandoc -> String
-writeHtmlString opts =
- if writerStandalone opts
- then render opts . writeHtml opts
- else renderFragment opts . writeHtml opts
-
--- | Convert Pandoc document to Html structure.
-writeHtml :: WriterOptions -> Pandoc -> Html
-writeHtml opts (Pandoc (Meta tit authors date) blocks) =
- let titlePrefix = writerTitlePrefix opts
- topTitle = evalState (inlineListToHtml opts tit) defaultWriterState
- topTitle' = if null titlePrefix
- then topTitle
- else titlePrefix +++ " - " +++ topTitle
- metadata = thetitle topTitle' +++
- meta ! [httpequiv "Content-Type",
- content "text/html; charset=UTF-8"] +++
- meta ! [name "generator", content "pandoc"] +++
- (toHtmlFromList $
- map (\a -> meta ! [name "author", content a]) authors) +++
- (if null date
- then noHtml
- else meta ! [name "date", content date])
- titleHeader = if writerStandalone opts && not (null tit) &&
- not (writerS5 opts)
- then h1 ! [theclass "title"] $ topTitle
- else noHtml
- headerBlocks = filter isHeaderBlock blocks
- ids = uniqueIdentifiers $
- map (\(Header _ lst) -> lst) headerBlocks
- toc = if writerTableOfContents opts
- then tableOfContents opts headerBlocks ids
- else noHtml
- (blocks', newstate) =
- runState (blockListToHtml opts blocks)
- (defaultWriterState {stIds = ids})
- cssLines = stCSS newstate
- css = if S.null cssLines
- then noHtml
- else style ! [thetype "text/css"] $ primHtml $
- '\n':(unlines $ S.toList cssLines)
- math = if stMath newstate
- then case writerASCIIMathMLURL opts of
- Just path -> script ! [src path,
- thetype "text/javascript"] $
- noHtml
- Nothing -> primHtml asciiMathMLScript
- else noHtml
- head = header $ metadata +++ math +++ css +++
- primHtml (writerHeader opts)
- notes = reverse (stNotes newstate)
- before = primHtml $ writerIncludeBefore opts
- after = primHtml $ writerIncludeAfter opts
- thebody = before +++ titleHeader +++ toc +++ blocks' +++
- footnoteSection opts notes +++ after
- in if writerStandalone opts
- then head +++ body thebody
- else thebody
-
--- | Construct table of contents from list of header blocks and identifiers.
--- Assumes there are as many identifiers as header blocks.
-tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
-tableOfContents _ [] _ = noHtml
-tableOfContents opts headers ids =
- let opts' = opts { writerIgnoreNotes = True }
- contentsTree = hierarchicalize headers
- contents = evalState (mapM (elementToListItem opts') contentsTree)
- (defaultWriterState {stIds = ids})
- in thediv ! [identifier "toc"] $ unordList contents
-
--- | Converts an Element to a list item for a table of contents,
--- retrieving the appropriate identifier from state.
-elementToListItem :: WriterOptions -> Element -> State WriterState Html
-elementToListItem opts (Blk _) = return noHtml
-elementToListItem opts (Sec headerText subsecs) = do
- st <- get
- let ids = stIds st
- let (id, rest) = if null ids
- then ("", [])
- else (head ids, tail ids)
- put $ st {stIds = rest}
- txt <- inlineListToHtml opts headerText
- subHeads <- mapM (elementToListItem opts) subsecs
- let subList = if null subHeads
- then noHtml
- else unordList subHeads
- return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
- subList
-
--- | Convert list of Note blocks to a footnote <div>.
--- Assumes notes are sorted.
-footnoteSection :: WriterOptions -> [Html] -> Html
-footnoteSection opts notes =
- if null notes
- then noHtml
- else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
-
--- | Obfuscate a "mailto:" link using Javascript.
-obfuscateLink :: WriterOptions -> String -> String -> Html
-obfuscateLink opts text src =
- let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
- src' = map toLower src
- in case (matchRegex emailRegex src') of
- (Just [name, domain]) ->
- let domain' = substitute "." " dot " domain
- at' = obfuscateChar '@'
- (linkText, altText) =
- if text == drop 7 src' -- autolink
- then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
- else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
- domain' ++ ")")
- in if writerStrictMarkdown opts
- then -- need to use primHtml or &'s are escaped to &amp; in URL
- primHtml $ "<a href=\"" ++ (obfuscateString src')
- ++ "\">" ++ (obfuscateString text) ++ "</a>"
- else (script ! [thetype "text/javascript"] $
- primHtml ("\n<!--\nh='" ++
- obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
- obfuscateString name ++ "';e=n+a+h;\n" ++
- "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
- linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
- noscript (primHtml $ obfuscateString altText)
- _ -> anchor ! [href src] $ primHtml text -- malformed email
-
--- | Obfuscate character as entity.
-obfuscateChar :: Char -> String
-obfuscateChar char =
- let num = ord char
- numstr = if even num then show num else "x" ++ showHex num ""
- in "&#" ++ numstr ++ ";"
-
--- | Obfuscate string using entities.
-obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
-
--- | True if character is a punctuation character (unicode).
-isPunctuation :: Char -> Bool
-isPunctuation c =
- let c' = ord c
- in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
- c' >= 0xE000 && c' <= 0xE0FF
- then True
- else False
-
--- | Add CSS for document header.
-addToCSS :: String -> State WriterState ()
-addToCSS item = do
- st <- get
- let current = stCSS st
- put $ st {stCSS = S.insert item current}
-
--- | Convert Pandoc inline list to plain text identifier.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier [] = ""
-inlineListToIdentifier (x:xs) =
- xAsText ++ inlineListToIdentifier xs
- where xAsText = case x of
- Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
- concat $ intersperse "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier lst
- Strikeout lst -> inlineListToIdentifier lst
- Superscript lst -> inlineListToIdentifier lst
- Subscript lst -> inlineListToIdentifier lst
- Strong lst -> inlineListToIdentifier lst
- Quoted _ lst -> inlineListToIdentifier lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier lst
- Image lst _ -> inlineListToIdentifier lst
- Note _ -> ""
-
--- | Return unique identifiers for list of inline lists.
-uniqueIdentifiers :: [[Inline]] -> [String]
-uniqueIdentifiers ls =
- let addIdentifier (nonuniqueIds, uniqueIds) l =
- let new = inlineListToIdentifier l
- matches = length $ filter (== new) nonuniqueIds
- new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
- in (new:nonuniqueIds, new':uniqueIds)
- in reverse $ snd $ foldl addIdentifier ([],[]) ls
-
--- | Convert Pandoc block element to HTML.
-blockToHtml :: WriterOptions -> Block -> State WriterState Html
-blockToHtml opts Null = return $ noHtml
-blockToHtml opts (Plain lst) = inlineListToHtml opts lst
-blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
-blockToHtml opts (RawHtml str) = return $ primHtml str
-blockToHtml opts (HorizontalRule) = return $ hr
-blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
- -- the final \n for consistency with Markdown.pl
-blockToHtml opts (BlockQuote blocks) =
- -- in S5, treat list in blockquote specially
- -- if default is incremental, make it nonincremental;
- -- otherwise incremental
- if writerS5 opts
- then let inc = not (writerIncremental opts) in
- case blocks of
- [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
- (BulletList lst)
- [OrderedList attribs lst] ->
- blockToHtml (opts {writerIncremental = inc})
- (OrderedList attribs lst)
- otherwise -> blockListToHtml opts blocks >>=
- (return . blockquote)
- else blockListToHtml opts blocks >>= (return . blockquote)
-blockToHtml opts (Header level lst) = do
- contents <- inlineListToHtml opts lst
- st <- get
- let ids = stIds st
- let (id, rest) = if null ids
- then ("", [])
- else (head ids, tail ids)
- put $ st {stIds = rest}
- let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
- then []
- else [identifier id]
- let contents' = if writerTableOfContents opts
- then anchor ! [href ("#TOC-" ++ id)] $ contents
- else contents
- return $ case level of
- 1 -> h1 contents' ! attribs
- 2 -> h2 contents' ! attribs
- 3 -> h3 contents' ! attribs
- 4 -> h4 contents' ! attribs
- 5 -> h5 contents' ! attribs
- 6 -> h6 contents' ! attribs
- _ -> paragraph contents' ! attribs
-blockToHtml opts (BulletList lst) = do
- contents <- mapM (blockListToHtml opts) lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ unordList ! attribs $ contents
-blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
- contents <- mapM (blockListToHtml opts) lst
- let numstyle' = camelCaseToHyphenated $ show numstyle
- let attribs = (if writerIncremental opts
- then [theclass "incremental"]
- else []) ++
- (if startnum /= 1
- then [start startnum]
- else []) ++
- (if numstyle /= DefaultStyle
- then [theclass numstyle']
- else [])
- if numstyle /= DefaultStyle
- then addToCSS $ "ol." ++ numstyle' ++
- " { list-style-type: " ++
- numstyle' ++ "; }"
- else return ()
- return $ ordList ! attribs $ contents
-blockToHtml opts (DefinitionList lst) = do
- contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
- def' <- blockListToHtml opts def
- return $ (term', def')) lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ defList ! attribs $ contents
-blockToHtml opts (Table capt aligns widths headers rows) = do
- let alignStrings = map alignmentToString aligns
- captionDoc <- if null capt
- then return noHtml
- else inlineListToHtml opts capt >>= return . caption
- colHeads <- colHeadsToHtml opts alignStrings
- widths headers
- rows' <- mapM (tableRowToHtml opts alignStrings) rows
- return $ table $ captionDoc +++ colHeads +++ rows'
-
-colHeadsToHtml opts alignStrings widths headers = do
- heads <- sequence $ zipWith3
- (\align width item -> tableItemToHtml opts th align width item)
- alignStrings widths headers
- return $ tr $ toHtmlFromList heads
-
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
-tableRowToHtml opts aligns cols =
- (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
- return . tr . toHtmlFromList
-
-tableItemToHtml opts tag align' width item = do
- contents <- blockListToHtml opts item
- let attrib = [align align'] ++
- if width /= 0
- then [thestyle ("width: " ++ show (truncate (100*width)) ++
- "%;")]
- else []
- return $ tag ! attrib $ contents
-
-blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
-blockListToHtml opts lst =
- mapM (blockToHtml opts) lst >>= return . toHtmlFromList
-
--- | Convert list of Pandoc inline elements to HTML.
-inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
-inlineListToHtml opts lst =
- mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
-
--- | Convert Pandoc inline element to HTML.
-inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
-inlineToHtml opts inline =
- case inline of
- (Str str) -> return $ stringToHtml str
- (Space) -> return $ stringToHtml " "
- (LineBreak) -> return $ br
- (EmDash) -> return $ primHtmlChar "mdash"
- (EnDash) -> return $ primHtmlChar "ndash"
- (Ellipses) -> return $ primHtmlChar "hellip"
- (Apostrophe) -> return $ primHtmlChar "rsquo"
- (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
- (Strong lst) -> inlineListToHtml opts lst >>= return . strong
- (Code str) -> return $ thecode << str
- (Strikeout lst) -> addToCSS
- ".strikeout { text-decoration: line-through; }" >>
- inlineListToHtml opts lst >>=
- return . (thespan ! [theclass "strikeout"])
- (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
- (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
- (Quoted quoteType lst) ->
- let (leftQuote, rightQuote) = case quoteType of
- SingleQuote -> (primHtmlChar "lsquo",
- primHtmlChar "rsquo")
- DoubleQuote -> (primHtmlChar "ldquo",
- primHtmlChar "rdquo")
- in do contents <- inlineListToHtml opts lst
- return $ leftQuote +++ contents +++ rightQuote
- (TeX str) -> (if writerUseASCIIMathML opts
- then modify (\st -> st {stMath = True})
- else return ()) >> return (stringToHtml str)
- (HtmlInline str) -> return $ primHtml str
- (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
- return $ obfuscateLink opts str src
- (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
- linkText <- inlineListToHtml opts txt
- return $ obfuscateLink opts (show linkText) src
- (Link txt (src,tit)) -> do
- linkText <- inlineListToHtml opts txt
- return $ anchor ! ([href src] ++
- if null tit then [] else [title tit]) $
- linkText
- (Image txt (source,tit)) -> do
- alternate <- inlineListToHtml opts txt
- let alternate' = renderFragment opts alternate
- let attributes = [src source] ++
- (if null tit
- then []
- else [title tit]) ++
- if null txt
- then []
- else [alt alternate']
- return $ image ! attributes
- -- note: null title included, as in Markdown.pl
- (Note contents) -> do
- st <- get
- let notes = stNotes st
- let number = (length notes) + 1
- let ref = show number
- htmlContents <- blockListToNote opts ref contents
- -- push contents onto front of notes
- put $ st {stNotes = (htmlContents:notes)}
- return $ anchor ! [href ("#fn" ++ ref),
- theclass "footnoteRef",
- identifier ("fnref" ++ ref)] <<
- sup << ref
-
-blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
-blockListToNote opts ref blocks =
- -- If last block is Para or Plain, include the backlink at the end of
- -- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++
- "\" class=\"footnoteBackLink\"" ++
- " title=\"Jump back to footnote " ++ ref ++ "\">&#8617;</a>"]
- blocks' = if null blocks
- then []
- else let lastBlock = last blocks
- otherBlocks = init blocks
- in case lastBlock of
- (Para lst) -> otherBlocks ++
- [Para (lst ++ backlink)]
- (Plain lst) -> otherBlocks ++
- [Plain (lst ++ backlink)]
- _ -> otherBlocks ++ [lastBlock,
- Plain backlink]
- in do contents <- blockListToHtml opts blocks'
- return $ li ! [identifier ("fn" ++ ref)] $ contents
-
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
deleted file mode 100644
index f64e06e24..000000000
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ /dev/null
@@ -1,310 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.LaTeX
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' format into LaTeX.
--}
-module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Printf ( printf )
-import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse )
-import Data.Char ( toLower )
-import qualified Data.Set as S
-import Control.Monad.State
-import Text.PrettyPrint.HughesPJ hiding ( Str )
-
-data WriterState =
- WriterState { stIncludes :: S.Set String -- strings to include in header
- , stInNote :: Bool -- @True@ if we're in a note
- , stOLLevel :: Int } -- level of ordered list nesting
-
--- | Add line to header.
-addToHeader :: String -> State WriterState ()
-addToHeader str = do
- st <- get
- let includes = stIncludes st
- put st {stIncludes = S.insert str includes}
-
--- | Convert Pandoc to LaTeX.
-writeLaTeX :: WriterOptions -> Pandoc -> String
-writeLaTeX options document =
- render $ evalState (pandocToLaTeX options document) $
- WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 }
-
-pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc
-pandocToLaTeX options (Pandoc meta blocks) = do
- main <- blockListToLaTeX blocks
- head <- if writerStandalone options
- then latexHeader options meta
- else return empty
- let before = if null (writerIncludeBefore options)
- then empty
- else text (writerIncludeBefore options)
- let after = if null (writerIncludeAfter options)
- then empty
- else text (writerIncludeAfter options)
- let body = before $$ main $$ after
- let toc = if writerTableOfContents options
- then text "\\tableofcontents\n"
- else empty
- let foot = if writerStandalone options
- then text "\\end{document}"
- else empty
- return $ head $$ toc $$ body $$ foot
-
--- | Insert bibliographic information into LaTeX header.
-latexHeader :: WriterOptions -- ^ Options, including LaTeX header
- -> Meta -- ^ Meta with bibliographic information
- -> State WriterState Doc
-latexHeader options (Meta title authors date) = do
- titletext <- if null title
- then return empty
- else inlineListToLaTeX title >>= return . inCmd "title"
- headerIncludes <- get >>= return . S.toList . stIncludes
- let extras = text $ unlines headerIncludes
- let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes
- then text "\\VerbatimFootnotes % allows verbatim text in footnotes"
- else empty
- let authorstext = text $ "\\author{" ++
- joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}"
- let datetext = if date == ""
- then empty
- else text $ "\\date{" ++ stringToLaTeX date ++ "}"
- let maketitle = if null title then empty else text "\\maketitle"
- let secnumline = if (writerNumberSections options)
- then empty
- else text "\\setcounter{secnumdepth}{0}"
- let baseHeader = text $ writerHeader options
- let header = baseHeader $$ extras
- return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$
- datetext $$ text "\\begin{document}" $$ maketitle $$ text ""
-
--- escape things as needed for LaTeX
-
-stringToLaTeX :: String -> String
-stringToLaTeX = escapeStringUsing latexEscapes
- where latexEscapes = backslashEscapes "{}$%&_#" ++
- [ ('^', "\\^{}")
- , ('\\', "\\textbackslash{}")
- , ('~', "\\ensuremath{\\sim}")
- , ('|', "\\textbar{}")
- , ('<', "\\textless{}")
- , ('>', "\\textgreater{}")
- ]
-
--- | Puts contents into LaTeX command.
-inCmd :: String -> Doc -> Doc
-inCmd cmd contents = char '\\' <> text cmd <> braces contents
-
--- | Remove all code elements from list of inline elements
--- (because it's illegal to have verbatim inside some command arguments)
-deVerb :: [Inline] -> [Inline]
-deVerb [] = []
-deVerb ((Code str):rest) =
- (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
-deVerb (other:rest) = other:(deVerb rest)
-
--- | Convert Pandoc block element to LaTeX.
-blockToLaTeX :: Block -- ^ Block to convert
- -> State WriterState Doc
-blockToLaTeX Null = return empty
-blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return
-blockToLaTeX (Para lst) =
- wrapped inlineListToLaTeX lst >>= return . (<> char '\n')
-blockToLaTeX (BlockQuote lst) = do
- contents <- blockListToLaTeX lst
- return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}"
-blockToLaTeX (CodeBlock str) = do
- st <- get
- env <- if stInNote st
- then do addToHeader "\\usepackage{fancyvrb}"
- return "Verbatim"
- else return "verbatim"
- return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>
- text ("\n\\end{" ++ env ++ "}")
-blockToLaTeX (RawHtml str) = return empty
-blockToLaTeX (BulletList lst) = do
- items <- mapM listItemToLaTeX lst
- return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}"
-blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
- st <- get
- let oldlevel = stOLLevel st
- put $ st {stOLLevel = oldlevel + 1}
- items <- mapM listItemToLaTeX lst
- modify (\st -> st {stOLLevel = oldlevel})
- exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
- then do addToHeader "\\usepackage{enumerate}"
- return $ char '[' <>
- text (head (orderedListMarkers (1, numstyle,
- numdelim))) <> char ']'
- else return empty
- let resetcounter = if start /= 1 && oldlevel <= 4
- then text $ "\\setcounter{enum" ++
- map toLower (toRomanNumeral oldlevel) ++
- "}{" ++ show (start - 1) ++ "}"
- else empty
- return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$
- vcat items $$ text "\\end{enumerate}"
-blockToLaTeX (DefinitionList lst) = do
- items <- mapM defListItemToLaTeX lst
- return $ text "\\begin{description}" $$ vcat items $$
- text "\\end{description}"
-blockToLaTeX HorizontalRule = return $ text $
- "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n"
-blockToLaTeX (Header level lst) = do
- txt <- inlineListToLaTeX (deVerb lst)
- return $ if (level > 0) && (level <= 3)
- then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++
- "section{") <> txt <> text "}\n"
- else txt <> char '\n'
-blockToLaTeX (Table caption aligns widths heads rows) = do
- headers <- tableRowToLaTeX heads
- captionText <- inlineListToLaTeX caption
- rows' <- mapM tableRowToLaTeX rows
- let colWidths = map (printf "%.2f") widths
- let colDescriptors = concat $ zipWith
- (\width align -> ">{\\PBS" ++
- (case align of
- AlignLeft -> "\\raggedright"
- AlignRight -> "\\raggedleft"
- AlignCenter -> "\\centering"
- AlignDefault -> "\\raggedright") ++
- "\\hspace{0pt}}p{" ++ width ++
- "\\columnwidth}")
- colWidths aligns
- let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
- headers $$ text "\\hline" $$ vcat rows' $$
- text "\\end{tabular}"
- let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}"
- addToHeader "\\usepackage{array}\n\
- \% This is needed because raggedright in table elements redefines \\\\:\n\
- \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
- \\\let\\PBS=\\PreserveBackslash"
- return $ if isEmpty captionText
- then centered tableBody <> char '\n'
- else text "\\begin{table}[h]" $$ centered tableBody $$
- inCmd "caption" captionText $$ text "\\end{table}\n"
-
-blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
-
-tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=
- return . ($$ text "\\\\") . foldl (\row item -> row $$
- (if isEmpty row then empty else text " & ") <> item) empty
-
-listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item " $$) .
- (nest 2)
-
-defListItemToLaTeX (term, def) = do
- term' <- inlineListToLaTeX $ deVerb term
- def' <- blockListToLaTeX def
- return $ text "\\item[" <> term' <> text "]" $$ def'
-
--- | Convert list of inline elements to LaTeX.
-inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
-inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
-
-isQuoted :: Inline -> Bool
-isQuoted (Quoted _ _) = True
-isQuoted Apostrophe = True
-isQuoted _ = False
-
--- | Convert inline element to LaTeX
-inlineToLaTeX :: Inline -- ^ Inline to convert
- -> State WriterState Doc
-inlineToLaTeX (Emph lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph"
-inlineToLaTeX (Strong lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"
-inlineToLaTeX (Strikeout lst) = do
- contents <- inlineListToLaTeX $ deVerb lst
- addToHeader "\\usepackage[normalem]{ulem}"
- return $ inCmd "sout" contents
-inlineToLaTeX (Superscript lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript"
-inlineToLaTeX (Subscript lst) = do
- contents <- inlineListToLaTeX $ deVerb lst
- -- oddly, latex includes \textsuperscript but not \textsubscript
- -- so we have to define it:
- addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}"
- return $ inCmd "textsubscript" contents
-inlineToLaTeX (Code str) = do
- st <- get
- if stInNote st
- then do addToHeader "\\usepackage{fancyvrb}"
- else return ()
- let chr = ((enumFromTo '!' '~') \\ str) !! 0
- return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
-inlineToLaTeX (Quoted SingleQuote lst) = do
- contents <- inlineListToLaTeX lst
- let s1 = if (not (null lst)) && (isQuoted (head lst))
- then text "\\,"
- else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
- then text "\\,"
- else empty
- return $ char '`' <> s1 <> contents <> s2 <> char '\''
-inlineToLaTeX (Quoted DoubleQuote lst) = do
- contents <- inlineListToLaTeX lst
- let s1 = if (not (null lst)) && (isQuoted (head lst))
- then text "\\,"
- else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
- then text "\\,"
- else empty
- return $ text "``" <> s1 <> contents <> s2 <> text "''"
-inlineToLaTeX Apostrophe = return $ char '\''
-inlineToLaTeX EmDash = return $ text "---"
-inlineToLaTeX EnDash = return $ text "--"
-inlineToLaTeX Ellipses = return $ text "\\ldots{}"
-inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
-inlineToLaTeX (TeX str) = return $ text str
-inlineToLaTeX (HtmlInline str) = return empty
-inlineToLaTeX (LineBreak) = return $ text "\\\\"
-inlineToLaTeX Space = return $ char ' '
-inlineToLaTeX (Link txt (src, _)) = do
- addToHeader "\\usepackage[breaklinks=true]{hyperref}"
- case txt of
- [Code x] | x == src -> -- autolink
- do addToHeader "\\usepackage{url}"
- return $ text $ "\\url{" ++ x ++ "}"
- _ -> do contents <- inlineListToLaTeX $ deVerb txt
- return $ text ("\\href{" ++ src ++ "}{") <> contents <>
- char '}'
-inlineToLaTeX (Image alternate (source, tit)) = do
- addToHeader "\\usepackage{graphicx}"
- return $ text $ "\\includegraphics{" ++ source ++ "}"
-inlineToLaTeX (Note contents) = do
- st <- get
- put (st {stInNote = True})
- contents' <- blockListToLaTeX contents
- modify (\st -> st {stInNote = False})
- let rawnote = stripTrailingNewlines $ render contents'
- -- note: a \n before } is needed when note ends with a Verbatim environment
- let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote
- return $ text "%\n\\footnote{" <>
- text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
deleted file mode 100644
index 8e14c2bf0..000000000
--- a/src/Text/Pandoc/Writers/Man.hs
+++ /dev/null
@@ -1,293 +0,0 @@
-{-
-Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Man
- Copyright : Copyright (C) 2007 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to groff man page format.
-
--}
-module Text.Pandoc.Writers.Man ( writeMan) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Printf ( printf )
-import Data.List ( isPrefixOf, drop, nub, intersperse )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
-import Control.Monad.State
-
-type Notes = [[Block]]
-type Preprocessors = [String] -- e.g. "t" for tbl
-type WriterState = (Notes, Preprocessors)
-
--- | Convert Pandoc to Man.
-writeMan :: WriterOptions -> Pandoc -> String
-writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
-
--- | Return groff man representation of document.
-pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
-pandocToMan opts (Pandoc meta blocks) = do
- let before = writerIncludeBefore opts
- let after = writerIncludeAfter opts
- let before' = if null before then empty else text before
- let after' = if null after then empty else text after
- (head, foot) <- metaToMan opts meta
- body <- blockListToMan opts blocks
- (notes, preprocessors) <- get
- let preamble = if null preprocessors || not (writerStandalone opts)
- then empty
- else text $ ".\\\" " ++ concat (nub preprocessors)
- notes' <- notesToMan opts (reverse notes)
- return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after'
-
--- | Insert bibliographic information into Man header and footer.
-metaToMan :: WriterOptions -- ^ Options, including Man header
- -> Meta -- ^ Meta with bibliographic information
- -> State WriterState (Doc, Doc)
-metaToMan options (Meta title authors date) = do
- titleText <- inlineListToMan options title
- let (cmdName, rest) = break (== ' ') $ render titleText
- let (title', section) = case reverse cmdName of
- (')':d:'(':xs) | d `elem` ['0'..'9'] ->
- (text (reverse xs), char d)
- xs -> (text (reverse xs), doubleQuotes empty)
- let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $
- splitBy '|' rest
- let head = (text ".TH") <+> title' <+> section <+>
- doubleQuotes (text date) <+> hsep extras
- let foot = case length authors of
- 0 -> empty
- 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
- 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
- return $ if writerStandalone options
- then (head, foot)
- else (empty, empty)
-
--- | Return man representation of notes.
-notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
-notesToMan opts notes =
- if null notes
- then return empty
- else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
- return . (text ".SH NOTES" $$) . vcat
-
--- | Return man representation of a note.
-noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
-noteToMan opts num note = do
- contents <- blockListToMan opts note
- let marker = text "\n.SS [" <> text (show num) <> char ']'
- return $ marker $$ contents
-
--- | Association list of characters to escape.
-manEscapes :: [(Char, String)]
-manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
-
--- | Escape special characters for Man.
-escapeString :: String -> String
-escapeString = escapeStringUsing manEscapes
-
--- | Escape a literal (code) section for Man.
-escapeCode :: String -> String
-escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ")
-
--- | Convert Pandoc block element to man.
-blockToMan :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
-blockToMan opts Null = return empty
-blockToMan opts (Plain inlines) =
- wrapIfNeeded opts (inlineListToMan opts) inlines
-blockToMan opts (Para inlines) = do
- contents <- wrapIfNeeded opts (inlineListToMan opts) inlines
- return $ text ".PP" $$ contents
-blockToMan opts (RawHtml str) = return $ text str
-blockToMan opts HorizontalRule = return $ text $ ".PP\n * * * * *"
-blockToMan opts (Header level inlines) = do
- contents <- inlineListToMan opts inlines
- let heading = case level of
- 1 -> ".SH "
- _ -> ".SS "
- return $ text heading <> contents
-blockToMan opts (CodeBlock str) = return $
- text ".PP" $$ text "\\f[CR]" $$
- text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
-blockToMan opts (BlockQuote blocks) = do
- contents <- blockListToMan opts blocks
- return $ text ".RS" $$ contents $$ text ".RE"
-blockToMan opts (Table caption alignments widths headers rows) =
- let aligncode AlignLeft = "l"
- aligncode AlignRight = "r"
- aligncode AlignCenter = "c"
- aligncode AlignDefault = "l"
- in do
- caption' <- inlineListToMan opts caption
- modify (\(notes, preprocessors) -> (notes, "t":preprocessors))
- let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths
- -- 78n default width - 8n indent = 70n
- let coldescriptions = text $ joinWithSep " "
- (zipWith (\align width -> aligncode align ++ width)
- alignments iwidths) ++ "."
- colheadings <- mapM (blockListToMan opts) headers
- let makeRow cols = text "T{" $$
- (vcat $ intersperse (text "T}@T{") cols) $$
- text "T}"
- let colheadings' = makeRow colheadings
- body <- mapM (\row -> do
- cols <- mapM (blockListToMan opts) row
- return $ makeRow cols) rows
- return $ text ".PP" $$ caption' $$
- text ".TS" $$ text "tab(@);" $$ coldescriptions $$
- colheadings' $$ char '_' $$ vcat body $$ text ".TE"
-
-blockToMan opts (BulletList items) = do
- contents <- mapM (bulletListItemToMan opts) items
- return (vcat contents)
-blockToMan opts (OrderedList attribs items) = do
- let markers = take (length items) $ orderedListMarkers attribs
- let indent = 1 + (maximum $ map length markers)
- contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
- zip markers items
- return (vcat contents)
-blockToMan opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMan opts) items
- return (vcat contents)
-
--- | Convert bullet list item (list of blocks) to man.
-bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
-bulletListItemToMan opts [] = return empty
-bulletListItemToMan opts ((Para first):rest) =
- bulletListItemToMan opts ((Plain first):rest)
-bulletListItemToMan opts ((Plain first):rest) = do
- first' <- blockToMan opts (Plain first)
- rest' <- blockListToMan opts rest
- let first'' = text ".IP \\[bu] 2" $$ first'
- let rest'' = if null rest
- then empty
- else text ".RS 2" $$ rest' $$ text ".RE"
- return (first'' $$ rest'')
-bulletListItemToMan opts (first:rest) = do
- first' <- blockToMan opts first
- rest' <- blockListToMan opts rest
- return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
-
--- | Convert ordered list item (a list of blocks) to man.
-orderedListItemToMan :: WriterOptions -- ^ options
- -> String -- ^ order marker for list item
- -> Int -- ^ number of spaces to indent
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToMan _ _ _ [] = return empty
-orderedListItemToMan opts num indent ((Para first):rest) =
- orderedListItemToMan opts num indent ((Plain first):rest)
-orderedListItemToMan opts num indent (first:rest) = do
- first' <- blockToMan opts first
- rest' <- blockListToMan opts rest
- let num' = printf ("%" ++ show (indent - 1) ++ "s") num
- let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
- let rest'' = if null rest
- then empty
- else text ".RS 4" $$ rest' $$ text ".RE"
- return $ first'' $$ rest''
-
--- | Convert definition list item (label, list of blocks) to man.
-definitionListItemToMan :: WriterOptions
- -> ([Inline],[Block])
- -> State WriterState Doc
-definitionListItemToMan opts (label, items) = do
- labelText <- inlineListToMan opts label
- contents <- if null items
- then return empty
- else do
- let (first, rest) = case items of
- ((Para x):y) -> (Plain x,y)
- (x:y) -> (x,y)
- rest' <- mapM (\item -> blockToMan opts item)
- rest >>= (return . vcat)
- first' <- blockToMan opts first
- return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
- return $ text ".TP\n.B " <> labelText $+$ contents
-
--- | Convert list of Pandoc block elements to man.
-blockListToMan :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToMan opts blocks =
- mapM (blockToMan opts) blocks >>= (return . vcat)
-
--- | Convert list of Pandoc inline elements to man.
-inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
-
--- | Convert Pandoc inline element to man.
-inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
-inlineToMan opts (Emph lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[I]" <> contents <> text "\\f[]"
-inlineToMan opts (Strong lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[B]" <> contents <> text "\\f[]"
-inlineToMan opts (Strikeout lst) = do
- contents <- inlineListToMan opts lst
- return $ text "[STRIKEOUT:" <> contents <> char ']'
-inlineToMan opts (Superscript lst) = do
- contents <- inlineListToMan opts lst
- return $ char '^' <> contents <> char '^'
-inlineToMan opts (Subscript lst) = do
- contents <- inlineListToMan opts lst
- return $ char '~' <> contents <> char '~'
-inlineToMan opts (Quoted SingleQuote lst) = do
- contents <- inlineListToMan opts lst
- return $ char '`' <> contents <> char '\''
-inlineToMan opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\[lq]" <> contents <> text "\\[rq]"
-inlineToMan opts EmDash = return $ text "\\[em]"
-inlineToMan opts EnDash = return $ text "\\[en]"
-inlineToMan opts Apostrophe = return $ char '\''
-inlineToMan opts Ellipses = return $ text "\\&..."
-inlineToMan opts (Code str) =
- return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
-inlineToMan opts (Str str) = return $ text $ escapeString str
-inlineToMan opts (TeX str) = return $ text $ escapeCode str
-inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str
-inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n"
-inlineToMan opts Space = return $ char ' '
-inlineToMan opts (Link txt (src, _)) = do
- linktext <- inlineListToMan opts txt
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- return $ if txt == [Code srcSuffix]
- then char '<' <> text srcSuffix <> char '>'
- else linktext <> text " (" <> text src <> char ')'
-inlineToMan opts (Image alternate (source, tit)) = do
- let txt = if (null alternate) || (alternate == [Str ""]) ||
- (alternate == [Str source]) -- to prevent autolinks
- then [Str "image"]
- else alternate
- linkPart <- inlineToMan opts (Link txt (source, tit))
- return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
-inlineToMan opts (Note contents) = do
- modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state
- (notes, _) <- get
- let ref = show $ (length notes)
- return $ char '[' <> text ref <> char ']'
-
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
deleted file mode 100644
index 4cecaae5d..000000000
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ /dev/null
@@ -1,373 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Markdown
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to markdown-formatted plain text.
-
-Markdown: <http://daringfireball.net/projects/markdown/>
--}
-module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Blocks
-import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser )
-import Data.List ( group, isPrefixOf, drop, find, intersperse )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
-import Control.Monad.State
-
-type Notes = [[Block]]
-type Refs = KeyTable
-type WriterState = (Notes, Refs)
-
--- | Convert Pandoc to Markdown.
-writeMarkdown :: WriterOptions -> Pandoc -> String
-writeMarkdown opts document =
- render $ evalState (pandocToMarkdown opts document) ([],[])
-
--- | Return markdown representation of document.
-pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
-pandocToMarkdown opts (Pandoc meta blocks) = do
- let before = writerIncludeBefore opts
- let after = writerIncludeAfter opts
- let before' = if null before then empty else text before
- let after' = if null after then empty else text after
- metaBlock <- metaToMarkdown opts meta
- let head = if writerStandalone opts
- then metaBlock $+$ text (writerHeader opts)
- else empty
- let headerBlocks = filter isHeaderBlock blocks
- let toc = if writerTableOfContents opts
- then tableOfContents opts headerBlocks
- else empty
- body <- blockListToMarkdown opts blocks
- (notes, _) <- get
- notes' <- notesToMarkdown opts (reverse notes)
- (_, refs) <- get -- note that the notes may contain refs
- refs' <- keyTableToMarkdown opts (reverse refs)
- return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$
- notes' $+$ text "" $+$ refs' $+$ after'
-
--- | Return markdown representation of reference key table.
-keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-
--- | Return markdown representation of a reference key.
-keyToMarkdown :: WriterOptions
- -> ([Inline], (String, String))
- -> State WriterState Doc
-keyToMarkdown opts (label, (src, tit)) = do
- label' <- inlineListToMarkdown opts label
- let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
- return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
- text src <> tit'
-
--- | Return markdown representation of notes.
-notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
-notesToMarkdown opts notes =
- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
- return . vcat
-
--- | Return markdown representation of a note.
-noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
-noteToMarkdown opts num blocks = do
- contents <- blockListToMarkdown opts blocks
- let marker = text "[^" <> text (show num) <> text "]:"
- return $ hang marker (writerTabStop opts) contents
-
--- | Escape special characters for Markdown.
-escapeString :: String -> String
-escapeString = escapeStringUsing markdownEscapes
- where markdownEscapes = ('\160', "&nbsp;"):(backslashEscapes "`<\\*_^~")
-
--- | Convert bibliographic information into Markdown header.
-metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc
-metaToMarkdown opts (Meta title authors date) = do
- title' <- titleToMarkdown opts title
- authors' <- authorsToMarkdown authors
- date' <- dateToMarkdown date
- return $ title' $+$ authors' $+$ date'
-
-titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-titleToMarkdown opts [] = return empty
-titleToMarkdown opts lst = do
- contents <- inlineListToMarkdown opts lst
- return $ text "% " <> contents
-
-authorsToMarkdown :: [String] -> State WriterState Doc
-authorsToMarkdown [] = return empty
-authorsToMarkdown lst = return $
- text "% " <> text (joinWithSep ", " (map escapeString lst))
-
-dateToMarkdown :: String -> State WriterState Doc
-dateToMarkdown [] = return empty
-dateToMarkdown str = return $ text "% " <> text (escapeString str)
-
--- | Construct table of contents from list of header blocks.
-tableOfContents :: WriterOptions -> [Block] -> Doc
-tableOfContents opts headers =
- let opts' = opts { writerIgnoreNotes = True }
- contents = BulletList $ map elementToListItem $ hierarchicalize headers
- in evalState (blockToMarkdown opts' contents) ([],[])
-
--- | Converts an Element to a list item for a table of contents,
-elementToListItem :: Element -> [Block]
-elementToListItem (Blk _) = []
-elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
- if null subsecs
- then []
- else [BulletList $ map elementToListItem subsecs]
-
--- | Ordered list start parser for use in Para below.
-olMarker :: GenParser Char st Char
-olMarker = do (start, style, delim) <- anyOrderedListMarker
- if delim == Period &&
- (style == UpperAlpha || (style == UpperRoman &&
- start `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then spaceChar >> spaceChar
- else spaceChar
-
--- | True if string begins with an ordered list marker
-beginsWithOrderedListMarker :: String -> Bool
-beginsWithOrderedListMarker str =
- case parse olMarker "para start" str of
- Left _ -> False
- Right _ -> True
-
-wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-wrappedMarkdown opts inlines = do
- let chunks = splitBy LineBreak inlines
- let chunks' = if null chunks
- then []
- else (map (++ [Str " "]) $ init chunks) ++ [last chunks]
- lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks'
- return $ vcat lns
-
--- | Convert Pandoc block element to markdown.
-blockToMarkdown :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
-blockToMarkdown opts Null = return empty
-blockToMarkdown opts (Plain inlines) =
- wrappedMarkdown opts inlines
-blockToMarkdown opts (Para inlines) = do
- contents <- wrappedMarkdown opts inlines
- -- escape if para starts with ordered list marker
- let esc = if (not (writerStrictMarkdown opts)) &&
- beginsWithOrderedListMarker (render contents)
- then char '\\'
- else empty
- return $ esc <> contents <> text "\n"
-blockToMarkdown opts (RawHtml str) = return $ text str
-blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n"
-blockToMarkdown opts (Header level inlines) = do
- contents <- inlineListToMarkdown opts inlines
- return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
-blockToMarkdown opts (CodeBlock str) = return $
- (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
-blockToMarkdown opts (BlockQuote blocks) = do
- contents <- blockListToMarkdown opts blocks
- return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <>
- text "\n"
-blockToMarkdown opts (Table caption aligns widths headers rows) = do
- caption' <- inlineListToMarkdown opts caption
- let caption'' = if null caption
- then empty
- else text "" $+$ (text "Table: " <> caption')
- headers' <- mapM (blockListToMarkdown opts) headers
- let widthsInChars = map (floor . (78 *)) widths
- let alignHeader alignment = case alignment of
- AlignLeft -> leftAlignBlock
- AlignCenter -> centerAlignBlock
- AlignRight -> rightAlignBlock
- AlignDefault -> leftAlignBlock
- let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
- (zipWith docToBlock widthsInChars)
- let head = makeRow headers'
- rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
- return $ makeRow cols) rows
- let tableWidth = sum widthsInChars
- let maxRowHeight = maximum $ map heightOfBlock (head:rows')
- let isMultilineTable = maxRowHeight > 1
- let underline = hsep $
- map (\width -> text $ replicate width '-') widthsInChars
- let border = if isMultilineTable
- then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-'
- else empty
- let spacer = if isMultilineTable
- then text ""
- else empty
- let body = vcat $ intersperse spacer $ map blockToDoc rows'
- return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$
- border $+$ caption'') <> text "\n"
-blockToMarkdown opts (BulletList items) = do
- contents <- mapM (bulletListItemToMarkdown opts) items
- return $ (vcat contents) <> text "\n"
-blockToMarkdown opts (OrderedList attribs items) = do
- let markers = orderedListMarkers attribs
- let markers' = map (\m -> if length m < 3
- then m ++ replicate (3 - length m) ' '
- else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
- zip markers' items
- return $ (vcat contents) <> text "\n"
-blockToMarkdown opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMarkdown opts) items
- return $ (vcat contents) <> text "\n"
-
--- | Convert bullet list item (list of blocks) to markdown.
-bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
-bulletListItemToMarkdown opts items = do
- contents <- blockListToMarkdown opts items
- return $ hang (text "- ") (writerTabStop opts) contents
-
--- | Convert ordered list item (a list of blocks) to markdown.
-orderedListItemToMarkdown :: WriterOptions -- ^ options
- -> String -- ^ list item marker
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToMarkdown opts marker items = do
- contents <- blockListToMarkdown opts items
- -- The complexities here are needed to ensure that if the list
- -- marker is 4 characters or longer, the second and following
- -- lines are indented 4 spaces but the list item begins after the marker.
- return $ sep [nest (min (3 - length marker) 0) (text marker),
- nest (writerTabStop opts) contents]
-
--- | Convert definition list item (label, list of blocks) to markdown.
-definitionListItemToMarkdown :: WriterOptions
- -> ([Inline],[Block])
- -> State WriterState Doc
-definitionListItemToMarkdown opts (label, items) = do
- labelText <- inlineListToMarkdown opts label
- let tabStop = writerTabStop opts
- let leader = char ':'
- contents <- mapM (\item -> blockToMarkdown opts item >>=
- (\txt -> return (leader $$ nest tabStop txt)))
- items >>= return . vcat
- return $ labelText $+$ contents
-
--- | Convert list of Pandoc block elements to markdown.
-blockListToMarkdown :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToMarkdown opts blocks =
- mapM (blockToMarkdown opts) blocks >>= return . vcat
-
--- | Get reference for target; if none exists, create unique one and return.
--- Prefer label if possible; otherwise, generate a unique key.
-getReference :: [Inline] -> Target -> State WriterState [Inline]
-getReference label (src, tit) = do
- (_,refs) <- get
- case find ((== (src, tit)) . snd) refs of
- Just (ref, _) -> return ref
- Nothing -> do
- let label' = case find ((== label) . fst) refs of
- Just _ -> -- label is used; generate numerical label
- case find (\n -> not (any (== [Str (show n)])
- (map fst refs))) [1..10000] of
- Just x -> [Str (show x)]
- Nothing -> error "no unique label"
- Nothing -> label
- modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
- return label'
-
--- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToMarkdown opts lst =
- mapM (inlineToMarkdown opts) lst >>= return . hcat
-
--- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
-inlineToMarkdown opts (Emph lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ char '*' <> contents <> char '*'
-inlineToMarkdown opts (Strong lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ text "**" <> contents <> text "**"
-inlineToMarkdown opts (Strikeout lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ text "~~" <> contents <> text "~~"
-inlineToMarkdown opts (Superscript lst) = do
- contents <- inlineListToMarkdown opts lst
- let contents' = text $ substitute " " "\\ " $ render contents
- return $ char '^' <> contents' <> char '^'
-inlineToMarkdown opts (Subscript lst) = do
- contents <- inlineListToMarkdown opts lst
- let contents' = text $ substitute " " "\\ " $ render contents
- return $ char '~' <> contents' <> char '~'
-inlineToMarkdown opts (Quoted SingleQuote lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ char '\'' <> contents <> char '\''
-inlineToMarkdown opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ char '"' <> contents <> char '"'
-inlineToMarkdown opts EmDash = return $ text "--"
-inlineToMarkdown opts EnDash = return $ char '-'
-inlineToMarkdown opts Apostrophe = return $ char '\''
-inlineToMarkdown opts Ellipses = return $ text "..."
-inlineToMarkdown opts (Code str) =
- let tickGroups = filter (\s -> '`' `elem` s) $ group str
- longest = if null tickGroups
- then 0
- else maximum $ map length tickGroups
- marker = replicate (longest + 1) '`'
- spacer = if (longest == 0) then "" else " " in
- return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
-inlineToMarkdown opts (Str str) = return $ text $ escapeString str
-inlineToMarkdown opts (TeX str) = return $ text str
-inlineToMarkdown opts (HtmlInline str) = return $ text str
-inlineToMarkdown opts (LineBreak) = return $ text " \n"
-inlineToMarkdown opts Space = return $ char ' '
-inlineToMarkdown opts (Link txt (src, tit)) = do
- linktext <- inlineListToMarkdown opts txt
- let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- let useRefLinks = writerReferenceLinks opts
- let useAuto = null tit && txt == [Code srcSuffix]
- ref <- if useRefLinks then getReference txt (src, tit) else return []
- reftext <- inlineListToMarkdown opts ref
- return $ if useAuto
- then char '<' <> text srcSuffix <> char '>'
- else if useRefLinks
- then let first = char '[' <> linktext <> char ']'
- second = if txt == ref
- then text "[]"
- else char '[' <> reftext <> char ']'
- in first <> second
- else char '[' <> linktext <> char ']' <>
- char '(' <> text src <> linktitle <> char ')'
-inlineToMarkdown opts (Image alternate (source, tit)) = do
- let txt = if (null alternate) || (alternate == [Str ""]) ||
- (alternate == [Str source]) -- to prevent autolinks
- then [Str "image"]
- else alternate
- linkPart <- inlineToMarkdown opts (Link txt (source, tit))
- return $ char '!' <> linkPart
-inlineToMarkdown opts (Note contents) = do
- modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state
- (notes, _) <- get
- let ref = show $ (length notes)
- return $ text "[^" <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
deleted file mode 100644
index ddcbf95c0..000000000
--- a/src/Text/Pandoc/Writers/RST.hs
+++ /dev/null
@@ -1,325 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.RST
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to reStructuredText.
-
-reStructuredText: <http://docutils.sourceforge.net/rst.html>
--}
-module Text.Pandoc.Writers.RST ( writeRST) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Blocks
-import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
-import Control.Monad.State
-
-type Notes = [[Block]]
-type Refs = KeyTable
-type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures
-
--- | Convert Pandoc to RST.
-writeRST :: WriterOptions -> Pandoc -> String
-writeRST opts document =
- render $ evalState (pandocToRST opts document) ([],[],[])
-
--- | Return RST representation of document.
-pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc
-pandocToRST opts (Pandoc meta blocks) = do
- let before = writerIncludeBefore opts
- let after = writerIncludeAfter opts
- before' = if null before then empty else text before
- after' = if null after then empty else text after
- metaBlock <- metaToRST opts meta
- let head = if (writerStandalone opts)
- then metaBlock $+$ text (writerHeader opts)
- else empty
- body <- blockListToRST opts blocks
- (notes, _, _) <- get
- notes' <- notesToRST opts (reverse notes)
- (_, refs, pics) <- get -- note that the notes may contain refs
- refs' <- keyTableToRST opts (reverse refs)
- pics' <- pictTableToRST opts (reverse pics)
- return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$
- pics' $+$ after'
-
--- | Return RST representation of reference key table.
-keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
-
--- | Return RST representation of a reference key.
-keyToRST :: WriterOptions
- -> ([Inline], (String, String))
- -> State WriterState Doc
-keyToRST opts (label, (src, tit)) = do
- label' <- inlineListToRST opts label
- let label'' = if ':' `elem` (render label')
- then char '`' <> label' <> char '`'
- else label'
- return $ text ".. _" <> label'' <> text ": " <> text src
-
--- | Return RST representation of notes.
-notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
-notesToRST opts notes =
- mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
- return . vcat
-
--- | Return RST representation of a note.
-noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
-noteToRST opts num note = do
- contents <- blockListToRST opts note
- let marker = text ".. [" <> text (show num) <> text "] "
- return $ hang marker 3 contents
-
--- | Return RST representation of picture reference table.
-pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
-pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
-
--- | Return RST representation of a picture substitution reference.
-pictToRST :: WriterOptions
- -> ([Inline], (String, String))
- -> State WriterState Doc
-pictToRST opts (label, (src, _)) = do
- label' <- inlineListToRST opts label
- return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
- text src
-
--- | Take list of inline elements and return wrapped doc.
-wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
-wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts))
- (splitBy LineBreak inlines) >>= return . vcat
-
--- | Escape special characters for RST.
-escapeString :: String -> String
-escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
-
--- | Convert bibliographic information into RST header.
-metaToRST :: WriterOptions -> Meta -> State WriterState Doc
-metaToRST opts (Meta title authors date) = do
- title' <- titleToRST opts title
- authors' <- authorsToRST authors
- date' <- dateToRST date
- let toc = if writerTableOfContents opts
- then text "" $+$ text ".. contents::"
- else empty
- return $ title' $+$ authors' $+$ date' $+$ toc
-
-titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc
-titleToRST opts [] = return empty
-titleToRST opts lst = do
- contents <- inlineListToRST opts lst
- let titleLength = length $ render contents
- let border = text (replicate titleLength '=')
- return $ border $+$ contents $+$ border <> text "\n"
-
-authorsToRST :: [String] -> State WriterState Doc
-authorsToRST [] = return empty
-authorsToRST (first:rest) = do
- rest' <- authorsToRST rest
- return $ (text ":Author: " <> text first) $+$ rest'
-
-dateToRST :: String -> State WriterState Doc
-dateToRST [] = return empty
-dateToRST str = return $ text ":Date: " <> text (escapeString str)
-
--- | Convert Pandoc block element to RST.
-blockToRST :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
-blockToRST opts Null = return empty
-blockToRST opts (Plain inlines) = wrappedRST opts inlines
-blockToRST opts (Para [TeX str]) =
- let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
- return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
-blockToRST opts (Para inlines) = do
- contents <- wrappedRST opts inlines
- return $ contents <> text "\n"
-blockToRST opts (RawHtml str) =
- let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
- return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
-blockToRST opts HorizontalRule = return $ text "--------------\n"
-blockToRST opts (Header level inlines) = do
- contents <- inlineListToRST opts inlines
- let headerLength = length $ render contents
- let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
- let border = text $ replicate headerLength headerChar
- return $ contents $+$ border <> text "\n"
-blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
- (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
-blockToRST opts (BlockQuote blocks) = do
- contents <- blockListToRST opts blocks
- return $ (nest (writerTabStop opts) contents) <> text "\n"
-blockToRST opts (Table caption aligns widths headers rows) = do
- caption' <- inlineListToRST opts caption
- let caption'' = if null caption
- then empty
- else text "" $+$ (text "Table: " <> caption')
- headers' <- mapM (blockListToRST opts) headers
- let widthsInChars = map (floor . (78 *)) widths
- let alignHeader alignment = case alignment of
- AlignLeft -> leftAlignBlock
- AlignCenter -> centerAlignBlock
- AlignRight -> rightAlignBlock
- AlignDefault -> leftAlignBlock
- let hpipeBlocks blocks = hcatBlocks [beg, middle, end]
- where height = maximum (map heightOfBlock blocks)
- sep = TextBlock 3 height (replicate height " | ")
- beg = TextBlock 2 height (replicate height "| ")
- end = TextBlock 2 height (replicate height " |")
- middle = hcatBlocks $ intersperse sep blocks
- let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
- let head = makeRow headers'
- rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
- return $ makeRow cols) rows
- let tableWidth = sum widthsInChars
- let maxRowHeight = maximum $ map heightOfBlock (head:rows')
- let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '+'
- let body = vcat $ intersperse (border '-') $ map blockToDoc rows'
- return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$
- border '-' $$ caption'' $$ text ""
-blockToRST opts (BulletList items) = do
- contents <- mapM (bulletListItemToRST opts) items
- -- ensure that sublists have preceding blank line
- return $ text "" $+$ vcat contents <> text "\n"
-blockToRST opts (OrderedList (start, style, delim) items) = do
- let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim
- then take (length items) $ repeat "#."
- else take (length items) $ orderedListMarkers
- (start, style, delim)
- let maxMarkerLength = maximum $ map length markers
- let markers' = map (\m -> let s = maxMarkerLength - length m
- in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
- zip markers' items
- -- ensure that sublists have preceding blank line
- return $ text "" $+$ vcat contents <> text "\n"
-blockToRST opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToRST opts) items
- return $ (vcat contents) <> text "\n"
-
--- | Convert bullet list item (list of blocks) to RST.
-bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc
-bulletListItemToRST opts items = do
- contents <- blockListToRST opts items
- return $ hang (text "- ") 3 contents
-
--- | Convert ordered list item (a list of blocks) to RST.
-orderedListItemToRST :: WriterOptions -- ^ options
- -> String -- ^ marker for list item
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToRST opts marker items = do
- contents <- blockListToRST opts items
- return $ hang (text marker) (length marker + 1) contents
-
--- | Convert defintion list item (label, list of blocks) to RST.
-definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc
-definitionListItemToRST opts (label, items) = do
- label <- inlineListToRST opts label
- contents <- blockListToRST opts items
- return $ label $+$ nest (writerTabStop opts) contents
-
--- | Convert list of Pandoc block elements to RST.
-blockListToRST :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToRST opts blocks =
- mapM (blockToRST opts) blocks >>= return . vcat
-
--- | Convert list of Pandoc inline elements to RST.
-inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
-
--- | Convert Pandoc inline element to RST.
-inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
-inlineToRST opts (Emph lst) = do
- contents <- inlineListToRST opts lst
- return $ char '*' <> contents <> char '*'
-inlineToRST opts (Strong lst) = do
- contents <- inlineListToRST opts lst
- return $ text "**" <> contents <> text "**"
-inlineToRST opts (Strikeout lst) = do
- contents <- inlineListToRST opts lst
- return $ text "[STRIKEOUT:" <> contents <> char ']'
-inlineToRST opts (Superscript lst) = do
- contents <- inlineListToRST opts lst
- return $ text "\\ :sup:`" <> contents <> text "`\\ "
-inlineToRST opts (Subscript lst) = do
- contents <- inlineListToRST opts lst
- return $ text "\\ :sub:`" <> contents <> text "`\\ "
-inlineToRST opts (Quoted SingleQuote lst) = do
- contents <- inlineListToRST opts lst
- return $ char '\'' <> contents <> char '\''
-inlineToRST opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToRST opts lst
- return $ char '"' <> contents <> char '"'
-inlineToRST opts EmDash = return $ text "--"
-inlineToRST opts EnDash = return $ char '-'
-inlineToRST opts Apostrophe = return $ char '\''
-inlineToRST opts Ellipses = return $ text "..."
-inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``"
-inlineToRST opts (Str str) = return $ text $ escapeString str
-inlineToRST opts (TeX str) = return $ text str
-inlineToRST opts (HtmlInline str) = return empty
-inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks
-inlineToRST opts Space = return $ char ' '
-inlineToRST opts (Link [Code str] (src, tit)) | src == str ||
- src == "mailto:" ++ str = do
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- return $ text srcSuffix
-inlineToRST opts (Link txt (src, tit)) = do
- let useReferenceLinks = writerReferenceLinks opts
- linktext <- inlineListToRST opts $ normalizeSpaces txt
- if useReferenceLinks
- then do (notes, refs, pics) <- get
- let refs' = if (txt, (src, tit)) `elem` refs
- then refs
- else (txt, (src, tit)):refs
- put (notes, refs', pics)
- return $ char '`' <> linktext <> text "`_"
- else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_"
-inlineToRST opts (Image alternate (source, tit)) = do
- (notes, refs, pics) <- get
- let labelsUsed = map fst pics
- let txt = if null alternate || alternate == [Str ""] ||
- alternate `elem` labelsUsed
- then [Str $ "image" ++ show (length refs)]
- else alternate
- let pics' = if (txt, (source, tit)) `elem` pics
- then pics
- else (txt, (source, tit)):pics
- put (notes, refs, pics')
- label <- inlineListToRST opts txt
- return $ char '|' <> label <> char '|'
-inlineToRST opts (Note contents) = do
- -- add to notes in state
- modify (\(notes, refs, pics) -> (contents:notes, refs, pics))
- (notes, _, _) <- get
- let ref = show $ (length notes)
- return $ text " [" <> text ref <> text "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
deleted file mode 100644
index 3bd5c63b2..000000000
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ /dev/null
@@ -1,286 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.RTF
- Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to RTF (rich text format).
--}
-module Text.Pandoc.Writers.RTF ( writeRTF ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Regex ( matchRegexAll, mkRegex )
-import Data.List ( isSuffixOf )
-import Data.Char ( ord )
-
--- | Convert Pandoc to a string in rich text format.
-writeRTF :: WriterOptions -> Pandoc -> String
-writeRTF options (Pandoc meta blocks) =
- let head = if writerStandalone options
- then rtfHeader (writerHeader options) meta
- else ""
- toc = if writerTableOfContents options
- then tableOfContents $ filter isHeaderBlock blocks
- else ""
- foot = if writerStandalone options then "\n}\n" else ""
- body = writerIncludeBefore options ++
- concatMap (blockToRTF 0 AlignDefault) blocks ++
- writerIncludeAfter options
- in head ++ toc ++ body ++ foot
-
--- | Construct table of contents from list of header blocks.
-tableOfContents :: [Block] -> String
-tableOfContents headers =
- let contentsTree = hierarchicalize headers
- in concatMap (blockToRTF 0 AlignDefault) $
- [Header 1 [Str "Contents"],
- BulletList (map elementToListItem contentsTree)]
-
-elementToListItem :: Element -> [Block]
-elementToListItem (Blk _) = []
-elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
- if null subsecs
- then []
- else [BulletList (map elementToListItem subsecs)]
-
--- | Convert unicode characters (> 127) into rich text format representation.
-handleUnicode :: String -> String
-handleUnicode [] = []
-handleUnicode (c:cs) =
- if ord c > 127
- then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
- else c:(handleUnicode cs)
-
--- | Escape special characters.
-escapeSpecial :: String -> String
-escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
-
--- | Escape strings as needed for rich text format.
-stringToRTF :: String -> String
-stringToRTF = handleUnicode . escapeSpecial
-
--- | Escape things as needed for code block in RTF.
-codeStringToRTF :: String -> String
-codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str)
-
--- | Deal with raw LaTeX.
-latexToRTF :: String -> String
-latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } "
-
--- | Make a paragraph with first-line indent, block indent, and space after.
-rtfParSpaced :: Int -- ^ space after (in twips)
- -> Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfParSpaced spaceAfter indent firstLineIndent alignment content =
- let alignString = case alignment of
- AlignLeft -> "\\ql "
- AlignRight -> "\\qr "
- AlignCenter -> "\\qc "
- AlignDefault -> "\\ql "
- in "{\\pard " ++ alignString ++
- "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
- " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
-
--- | Default paragraph.
-rtfPar :: Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfPar = rtfParSpaced 180
-
--- | Compact paragraph (e.g. for compact list items).
-rtfCompact :: Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfCompact = rtfParSpaced 0
-
--- number of twips to indent
-indentIncrement = 720
-listIncrement = 360
-
--- | Returns appropriate bullet list marker for indent level.
-bulletMarker :: Int -> String
-bulletMarker indent = case indent `mod` 720 of
- 0 -> "\\bullet "
- otherwise -> "\\endash "
-
--- | Returns appropriate (list of) ordered list markers for indent level.
-orderedMarkers :: Int -> ListAttributes -> [String]
-orderedMarkers indent (start, style, delim) =
- if style == DefaultStyle && delim == DefaultDelim
- then case indent `mod` 720 of
- 0 -> orderedListMarkers (start, Decimal, Period)
- otherwise -> orderedListMarkers (start, LowerAlpha, Period)
- else orderedListMarkers (start, style, delim)
-
--- | Returns RTF header.
-rtfHeader :: String -- ^ header text
- -> Meta -- ^ bibliographic information
- -> String
-rtfHeader headerText (Meta title authors date) =
- let titletext = if null title
- then ""
- else rtfPar 0 0 AlignCenter $
- "\\b \\fs36 " ++ inlineListToRTF title
- authorstext = if null authors
- then ""
- else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
- map stringToRTF authors))
- datetext = if date == ""
- then ""
- else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
- let spacer = if null (titletext ++ authorstext ++ datetext)
- then ""
- else rtfPar 0 0 AlignDefault "" in
- headerText ++ titletext ++ authorstext ++ datetext ++ spacer
-
--- | Convert Pandoc block element to RTF.
-blockToRTF :: Int -- ^ indent level
- -> Alignment -- ^ alignment
- -> Block -- ^ block to convert
- -> String
-blockToRTF _ _ Null = ""
-blockToRTF indent alignment (Plain lst) =
- rtfCompact indent 0 alignment $ inlineListToRTF lst
-blockToRTF indent alignment (Para lst) =
- rtfPar indent 0 alignment $ inlineListToRTF lst
-blockToRTF indent alignment (BlockQuote lst) =
- concatMap (blockToRTF (indent + indentIncrement) alignment) lst
-blockToRTF indent _ (CodeBlock str) =
- rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF _ _ (RawHtml str) = ""
-blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
- concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
-blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
- zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
-blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
- concatMap (definitionListItemToRTF alignment indent) lst
-blockToRTF indent _ HorizontalRule =
- rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
-blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
- "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
-blockToRTF indent alignment (Table caption aligns sizes headers rows) =
- tableRowToRTF True indent aligns sizes headers ++
- concatMap (tableRowToRTF False indent aligns sizes) rows ++
- rtfPar indent 0 alignment (inlineListToRTF caption)
-
-tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
-tableRowToRTF header indent aligns sizes cols =
- let columns = concat $ zipWith (tableItemToRTF indent) aligns cols
- totalTwips = 6 * 1440 -- 6 inches
- rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
- 0 sizes
- cellDefs = map (\edge -> (if header
- then "\\clbrdrb\\brdrs"
- else "") ++ "\\cellx" ++ show edge)
- rightEdges
- start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
- "\\trkeep\\intbl\n{\n"
- end = "}\n\\intbl\\row}\n"
- in start ++ columns ++ end
-
-tableItemToRTF :: Int -> Alignment -> [Block] -> String
-tableItemToRTF indent alignment item =
- let contents = concatMap (blockToRTF indent alignment) item
- in "{\\intbl " ++ contents ++ "\\cell}\n"
-
--- | Ensure that there's the same amount of space after compact
--- lists as after regular lists.
-spaceAtEnd :: String -> String
-spaceAtEnd str =
- if isSuffixOf "\\par}\n" str
- then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
- else str
-
--- | Convert list item (list of blocks) to RTF.
-listItemToRTF :: Alignment -- ^ alignment
- -> Int -- ^ indent level
- -> String -- ^ list start marker
- -> [Block] -- ^ list item (list of blocks)
- -> [Char]
-listItemToRTF alignment indent marker [] =
- rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
- (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
-listItemToRTF alignment indent marker list =
- let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
- -- insert the list marker into the (processed) first block
- let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
- Just (before, matched, after, _) ->
- before ++ "\\fi" ++ show (0 - listIncrement) ++
- " " ++ marker ++ "\\tx" ++
- show listIncrement ++ "\\tab" ++ after
- Nothing -> first in
- modFirst ++ concat rest
-
--- | Convert definition list item (label, list of blocks) to RTF.
-definitionListItemToRTF :: Alignment -- ^ alignment
- -> Int -- ^ indent level
- -> ([Inline],[Block]) -- ^ list item (list of blocks)
- -> [Char]
-definitionListItemToRTF alignment indent (label, items) =
- let labelText = blockToRTF indent alignment (Plain label)
- itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items
- in labelText ++ itemsText
-
--- | Convert list of inline items to RTF.
-inlineListToRTF :: [Inline] -- ^ list of inlines to convert
- -> String
-inlineListToRTF lst = concatMap inlineToRTF lst
-
--- | Convert inline item to RTF.
-inlineToRTF :: Inline -- ^ inline to convert
- -> String
-inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} "
-inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} "
-inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} "
-inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} "
-inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} "
-inlineToRTF (Quoted SingleQuote lst) =
- "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
-inlineToRTF (Quoted DoubleQuote lst) =
- "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
-inlineToRTF Apostrophe = "\\u8217'"
-inlineToRTF Ellipses = "\\u8230?"
-inlineToRTF EmDash = "\\u8212-"
-inlineToRTF EnDash = "\\u8211-"
-inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
-inlineToRTF (Str str) = stringToRTF str
-inlineToRTF (TeX str) = latexToRTF str
-inlineToRTF (HtmlInline str) = ""
-inlineToRTF (LineBreak) = "\\line "
-inlineToRTF Space = " "
-inlineToRTF (Link text (src, tit)) =
- "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
- "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
-inlineToRTF (Image alternate (source, tit)) =
- "{\\cf1 [image: " ++ source ++ "]\\cf0}"
-inlineToRTF (Note contents) =
- "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
- (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"