From df7b68225101966051f8b592a27127bf789eb81e Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Tue, 17 Oct 2006 14:22:29 +0000 Subject: initial import git-svn-id: https://pandoc.googlecode.com/svn/trunk@2 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/ASCIIMathML.hs | 14 + src/Text/Pandoc/Definition.hs | 50 +++ src/Text/Pandoc/HtmlEntities.hs | 306 ++++++++++++++ src/Text/Pandoc/Readers/HTML.hs | 434 ++++++++++++++++++++ src/Text/Pandoc/Readers/LaTeX.hs | 585 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 582 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/RST.hs | 644 ++++++++++++++++++++++++++++++ src/Text/Pandoc/Shared.hs | 417 +++++++++++++++++++ src/Text/Pandoc/UTF8.hs | 43 ++ src/Text/Pandoc/Writers/DefaultHeaders.hs | 27 ++ src/Text/Pandoc/Writers/HTML.hs | 197 +++++++++ src/Text/Pandoc/Writers/LaTeX.hs | 164 ++++++++ src/Text/Pandoc/Writers/Markdown.hs | 149 +++++++ src/Text/Pandoc/Writers/RST.hs | 188 +++++++++ src/Text/Pandoc/Writers/RTF.hs | 194 +++++++++ src/Text/Pandoc/Writers/S5.hs | 95 +++++ 16 files changed, 4089 insertions(+) create mode 100644 src/Text/Pandoc/ASCIIMathML.hs create mode 100644 src/Text/Pandoc/Definition.hs create mode 100644 src/Text/Pandoc/HtmlEntities.hs create mode 100644 src/Text/Pandoc/Readers/HTML.hs create mode 100644 src/Text/Pandoc/Readers/LaTeX.hs create mode 100644 src/Text/Pandoc/Readers/Markdown.hs create mode 100644 src/Text/Pandoc/Readers/RST.hs create mode 100644 src/Text/Pandoc/Shared.hs create mode 100644 src/Text/Pandoc/UTF8.hs create mode 100644 src/Text/Pandoc/Writers/DefaultHeaders.hs create mode 100644 src/Text/Pandoc/Writers/HTML.hs create mode 100644 src/Text/Pandoc/Writers/LaTeX.hs create mode 100644 src/Text/Pandoc/Writers/Markdown.hs create mode 100644 src/Text/Pandoc/Writers/RST.hs create mode 100644 src/Text/Pandoc/Writers/RTF.hs create mode 100644 src/Text/Pandoc/Writers/S5.hs (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/ASCIIMathML.hs b/src/Text/Pandoc/ASCIIMathML.hs new file mode 100644 index 000000000..85d7c46b9 --- /dev/null +++ b/src/Text/Pandoc/ASCIIMathML.hs @@ -0,0 +1,14 @@ +---------------------------------------------------- +-- Do not edit this file by hand. Edit +-- 'templates/ASCIIMathML.hs' +-- and run ./fillTemplates.pl Text/Pandoc/ASCIIMathML.hs +---------------------------------------------------- + +-- | Definitions for use of Pandoc.ASCIIMathML in HTML. +-- (See .) +module Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) where + +-- | String containing Pandoc.ASCIIMathML javascript. +asciiMathMLScript :: String +asciiMathMLScript = "\n" + diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs new file mode 100644 index 000000000..08ff3905e --- /dev/null +++ b/src/Text/Pandoc/Definition.hs @@ -0,0 +1,50 @@ +-- | Definition of 'Pandoc' data structure for format-neutral representation +-- of documents. +module Text.Pandoc.Definition where + +data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) + +-- | Bibliographic information for the document: title (list of 'Inline'), +-- authors (list of strings), date (string). +data Meta = Meta [Inline] -- title + [String] -- authors + String -- date + deriving (Eq, Show, Read) + +-- | Block element. +data Block + = Plain [Inline] -- ^ Plain text, not a paragraph + | Blank -- ^ A blank line + | Null -- ^ Nothing + | Para [Inline] -- ^ Paragraph + | Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target' + | CodeBlock String -- ^ Code block (literal) + | RawHtml String -- ^ Raw HTML block (literal) + | BlockQuote [Block] -- ^ Block quote (list of blocks) + | OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks) + | Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines) + | HorizontalRule -- ^ Horizontal rule + | Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks) + deriving (Eq, Read, Show) + +-- | Target for a link: either a URL or an indirect (labeled) reference. +data Target + = Src String String -- ^ First string is URL, second is title + | Ref [Inline] -- ^ Label (list of inlines) for an indirect reference + deriving (Show, Eq, Read) + +-- | Inline elements. +data Inline + = Str String -- ^ Text (string) + | Emph [Inline] -- ^ Emphasized text (list of inlines) + | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) + | Code String -- ^ Inline code (literal) + | Space -- ^ Inter-word space + | LineBreak -- ^ Hard line break + | TeX String -- ^ LaTeX code (literal) + | HtmlInline String -- ^ HTML code (literal) + | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target + | Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target + | NoteRef String -- ^ Footnote or endnote reference + deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs new file mode 100644 index 000000000..bbb438ef5 --- /dev/null +++ b/src/Text/Pandoc/HtmlEntities.hs @@ -0,0 +1,306 @@ +-- | Functions for encoding unicode characters as HTML entity +-- references, and vice versa. +module Text.Pandoc.HtmlEntities ( + htmlEntityToChar, + charToHtmlEntity, + decodeEntities, + encodeEntities + ) where +import Char ( chr, ord ) +import Text.Regex ( mkRegex, matchRegexAll ) +import Maybe ( fromMaybe ) + +-- regexs for entities +decimalCodedEntity = mkRegex "&#([0-9]+);" +characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" + +-- | Return a string with all entity references decoded to unicode characters +-- where possible. +decodeEntities :: String -> String +decodeEntities str = + case (matchRegexAll characterEntity str) of + Nothing -> str + Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest) + where replacement = case (htmlEntityToChar match) of + Just ch -> [ch] + Nothing -> match + +-- | Returns a string with characters replaced with entity references where possible. +encodeEntities :: String -> String +encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c)) + +-- | If the string is a valid entity reference, returns @Just@ the character, +-- otherwise @Nothing@. +htmlEntityToChar :: String -> Maybe Char +htmlEntityToChar entity = + case (lookup entity htmlEntityTable) of + Just ch -> Just ch + Nothing -> case (matchRegexAll decimalCodedEntity entity) of + Just (_, _, _, [sub]) -> Just (chr (read sub)) + Nothing -> Nothing + +-- | If there is an entity reference corresponding to the character, returns +-- @Just@ the entity reference, otherwise @Nothing@. +charToHtmlEntity :: Char -> Maybe String +charToHtmlEntity char = + let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in + if (length matches) == 0 then + Nothing + else + Just (fst (head matches)) + +htmlEntityTable :: [(String, Char)] +htmlEntityTable = [ + (""", chr 34), + ("&", chr 38), + ("<", chr 60), + (">", chr 62), + (" ", chr 160), + ("¡", chr 161), + ("¢", chr 162), + ("£", chr 163), + ("¤", chr 164), + ("¥", chr 165), + ("¦", chr 166), + ("§", chr 167), + ("¨", chr 168), + ("©", chr 169), + ("ª", chr 170), + ("«", chr 171), + ("¬", chr 172), + ("­", chr 173), + ("®", chr 174), + ("¯", chr 175), + ("°", chr 176), + ("±", chr 177), + ("²", chr 178), + ("³", chr 179), + ("´", chr 180), + ("µ", chr 181), + ("¶", chr 182), + ("·", chr 183), + ("¸", chr 184), + ("¹", chr 185), + ("º", chr 186), + ("»", chr 187), + ("¼", chr 188), + ("½", chr 189), + ("¾", chr 190), + ("¿", chr 191), + ("À", chr 192), + ("Á", chr 193), + ("Â", chr 194), + ("Ã", chr 195), + ("Ä", chr 196), + ("Å", chr 197), + ("Æ", chr 198), + ("Ç", chr 199), + ("È", chr 200), + ("É", chr 201), + ("Ê", chr 202), + ("Ë", chr 203), + ("Ì", chr 204), + ("Í", chr 205), + ("Î", chr 206), + ("Ï", chr 207), + ("Ð", chr 208), + ("Ñ", chr 209), + ("Ò", chr 210), + ("Ó", chr 211), + ("Ô", chr 212), + ("Õ", chr 213), + ("Ö", chr 214), + ("×", chr 215), + ("Ø", chr 216), + ("Ù", chr 217), + ("Ú", chr 218), + ("Û", chr 219), + ("Ü", chr 220), + ("Ý", chr 221), + ("Þ", chr 222), + ("ß", chr 223), + ("à", chr 224), + ("á", chr 225), + ("â", chr 226), + ("ã", chr 227), + ("ä", chr 228), + ("å", chr 229), + ("æ", chr 230), + ("ç", chr 231), + ("è", chr 232), + ("é", chr 233), + ("ê", chr 234), + ("ë", chr 235), + ("ì", chr 236), + ("í", chr 237), + ("î", chr 238), + ("ï", chr 239), + ("ð", chr 240), + ("ñ", chr 241), + ("ò", chr 242), + ("ó", chr 243), + ("ô", chr 244), + ("õ", chr 245), + ("ö", chr 246), + ("÷", chr 247), + ("ø", chr 248), + ("ù", chr 249), + ("ú", chr 250), + ("û", chr 251), + ("ü", chr 252), + ("ý", chr 253), + ("þ", chr 254), + ("ÿ", chr 255), + ("Œ", chr 338), + ("œ", chr 339), + ("Š", chr 352), + ("š", chr 353), + ("Ÿ", chr 376), + ("ƒ", chr 402), + ("ˆ", chr 710), + ("˜", chr 732), + ("Α", chr 913), + ("Β", chr 914), + ("Γ", chr 915), + ("Δ", chr 916), + ("Ε", chr 917), + ("Ζ", chr 918), + ("Η", chr 919), + ("Θ", chr 920), + ("Ι", chr 921), + ("Κ", chr 922), + ("Λ", chr 923), + ("Μ", chr 924), + ("Ν", chr 925), + ("Ξ", chr 926), + ("Ο", chr 927), + ("Π", chr 928), + ("Ρ", chr 929), + ("Σ", chr 931), + ("Τ", chr 932), + ("Υ", chr 933), + ("Φ", chr 934), + ("Χ", chr 935), + ("Ψ", chr 936), + ("Ω", chr 937), + ("α", chr 945), + ("β", chr 946), + ("γ", chr 947), + ("δ", chr 948), + ("ε", chr 949), + ("ζ", chr 950), + ("η", chr 951), + ("θ", chr 952), + ("ι", chr 953), + ("κ", chr 954), + ("λ", chr 955), + ("μ", chr 956), + ("ν", chr 957), + ("ξ", chr 958), + ("ο", chr 959), + ("π", chr 960), + ("ρ", chr 961), + ("ς", chr 962), + ("σ", chr 963), + ("τ", chr 964), + ("υ", chr 965), + ("φ", chr 966), + ("χ", chr 967), + ("ψ", chr 968), + ("ω", chr 969), + ("ϑ", chr 977), + ("ϒ", chr 978), + ("ϖ", chr 982), + (" ", chr 8194), + (" ", chr 8195), + (" ", chr 8201), + ("‌", chr 8204), + ("‍", chr 8205), + ("‎", chr 8206), + ("‏", chr 8207), + ("–", chr 8211), + ("—", chr 8212), + ("‘", chr 8216), + ("’", chr 8217), + ("‚", chr 8218), + ("“", chr 8220), + ("”", chr 8221), + ("„", chr 8222), + ("†", chr 8224), + ("‡", chr 8225), + ("•", chr 8226), + ("…", chr 8230), + ("‰", chr 8240), + ("′", chr 8242), + ("″", chr 8243), + ("‹", chr 8249), + ("›", chr 8250), + ("‾", chr 8254), + ("⁄", chr 8260), + ("€", chr 8364), + ("ℑ", chr 8465), + ("℘", chr 8472), + ("ℜ", chr 8476), + ("™", chr 8482), + ("ℵ", chr 8501), + ("←", chr 8592), + ("↑", chr 8593), + ("→", chr 8594), + ("↓", chr 8595), + ("↔", chr 8596), + ("↵", chr 8629), + ("⇐", chr 8656), + ("⇑", chr 8657), + ("⇒", chr 8658), + ("⇓", chr 8659), + ("⇔", chr 8660), + ("∀", chr 8704), + ("∂", chr 8706), + ("∃", chr 8707), + ("∅", chr 8709), + ("∇", chr 8711), + ("∈", chr 8712), + ("∉", chr 8713), + ("∋", chr 8715), + ("∏", chr 8719), + ("∑", chr 8721), + ("−", chr 8722), + ("∗", chr 8727), + ("√", chr 8730), + ("∝", chr 8733), + ("∞", chr 8734), + ("∠", chr 8736), + ("∧", chr 8743), + ("∨", chr 8744), + ("∩", chr 8745), + ("∪", chr 8746), + ("∫", chr 8747), + ("∴", chr 8756), + ("∼", chr 8764), + ("≅", chr 8773), + ("≈", chr 8776), + ("≠", chr 8800), + ("≡", chr 8801), + ("≤", chr 8804), + ("≥", chr 8805), + ("⊂", chr 8834), + ("⊃", chr 8835), + ("⊄", chr 8836), + ("⊆", chr 8838), + ("⊇", chr 8839), + ("⊕", chr 8853), + ("⊗", chr 8855), + ("⊥", chr 8869), + ("⋅", chr 8901), + ("⌈", chr 8968), + ("⌉", chr 8969), + ("⌊", chr 8970), + ("⌋", chr 8971), + ("⟨", chr 9001), + ("⟩", chr 9002), + ("◊", chr 9674), + ("♠", chr 9824), + ("♣", chr 9827), + ("♥", chr 9829), + ("♦", chr 9830) + ] diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs new file mode 100644 index 000000000..054d9eb72 --- /dev/null +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -0,0 +1,434 @@ +-- | Converts HTML to 'Pandoc' document. +module Text.Pandoc.Readers.HTML ( + readHtml, + rawHtmlInline, + rawHtmlBlock, + anyHtmlBlockTag, + anyHtmlInlineTag + ) where + +import Text.Regex ( matchRegex, mkRegex ) +import Text.ParserCombinators.Parsec +import Text.ParserCombinators.Pandoc +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.HtmlEntities ( decodeEntities, htmlEntityToChar ) +import Maybe ( fromMaybe ) +import Char ( toUpper, toLower ) + +-- | Convert HTML-formatted string to 'Pandoc' document. +readHtml :: ParserState -- ^ Parser state + -> String -- ^ String to parse + -> Pandoc +readHtml = readWith parseHtml + +-- for testing +testString :: String -> IO () +testString = testStringWith parseHtml + +-- +-- Constants +-- + +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", + "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", + "s", "samp", "select", "small", "span", "strike", "strong", "sub", + "sup", "textarea", "tt", "u", "var"] + +-- +-- HTML utility functions +-- + +-- | Read blocks until end tag. +blocksTilEnd tag = try (do + blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag) + return blocks) + +-- | Read inlines until end tag. +inlinesTilEnd tag = try (do + inlines <- manyTill inline (htmlEndTag tag) + return inlines) + +-- extract type from a tag: e.g. br from
, < br >,
, etc. +extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" + +anyHtmlTag = try (do + char '<' + spaces + tag <- many1 alphaNum + attribs <- htmlAttributes + spaces + ender <- option "" (string "/") + let ender' = if (null ender) then "" else " /" + spaces + char '>' + return ("<" ++ tag ++ attribs ++ ender' ++ ">")) + +anyHtmlEndTag = try (do + char '<' + spaces + char '/' + spaces + tagType <- many1 alphaNum + spaces + char '>' + return ("")) + +htmlTag :: String -> GenParser Char st (String, [(String, String)]) +htmlTag tag = try (do + char '<' + spaces + stringAnyCase tag + attribs <- many htmlAttribute + spaces + option "" (string "/") + spaces + char '>' + return (tag, (map (\(name, content, raw) -> (name, content)) attribs))) + +-- parses a quoted html attribute value +quoted quoteChar = do + result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + return (result, [quoteChar]) + +htmlAttributes = do + attrList <- many htmlAttribute + return (concatMap (\(name, content, raw) -> raw) attrList) + +htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute + +-- minimized boolean attribute (no = and value) +htmlMinimizedAttribute = try (do + spaces + name <- many1 (choice [letter, oneOf ".-_:"]) + spaces + notFollowedBy (char '=') + let content = name + return (name, content, (" " ++ name))) + +htmlRegularAttribute = try (do + spaces + name <- many1 (choice [letter, oneOf ".-_:"]) + spaces + char '=' + spaces + (content, quoteStr) <- choice [ (quoted '\''), + (quoted '"'), + (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); + return (a,"")} ) ] + return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + +htmlEndTag tag = try (do + char '<' + spaces + char '/' + spaces + stringAnyCase tag + spaces + char '>' + return ("")) + +-- | Returns @True@ if the tag is an inline tag. +isInline tag = (extractTagType tag) `elem` inlineHtmlTags + +anyHtmlBlockTag = try (do + tag <- choice [anyHtmlTag, anyHtmlEndTag] + if isInline tag then + fail "inline tag" + else + return tag) + +anyHtmlInlineTag = try (do + tag <- choice [ anyHtmlTag, anyHtmlEndTag ] + if isInline tag then + return tag + else + fail "not an inline tag") + +-- scripts must be treated differently, because they can contain <> etc. +htmlScript = try (do + open <- string "")) + +rawHtmlBlock = do + notFollowedBy (do {choice [htmlTag "/body", htmlTag "/html"]; return ' '}) + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + sp <- (many space) + state <- getState + if stateParseRaw state then + return (RawHtml (body ++ sp)) + else + return Null + +htmlComment = try (do + string "")) + return ("")) + +-- +-- parsing documents +-- + +xmlDec = try (do + string "') + return ("")) + +definition = try (do + string "') + return ("")) + +nonTitleNonHead = try (do + notFollowedBy' (htmlTag "title") + notFollowedBy' (htmlTag "/head") + result <- choice [do {rawHtmlBlock; return ' '}, anyChar] + return result) + +parseTitle = try (do + (tag, attribs) <- htmlTag "title" + contents <- inlinesTilEnd tag + spaces + return contents) + +-- parse header and return meta-information (for now, just title) +parseHead = try (do + htmlTag "head" + spaces + skipMany nonTitleNonHead + contents <- option [] parseTitle + skipMany nonTitleNonHead + htmlTag "/head" + return (contents, [], "")) + +skipHtmlTag tag = option ("",[]) (htmlTag tag) + +-- h1 class="title" representation of title in body +bodyTitle = try (do + (tag, attribs) <- htmlTag "h1" + cl <- case (extractAttribute "class" attribs) of + Just "title" -> do {return ""} + otherwise -> fail "not title" + inlinesTilEnd "h1" + return "") + +parseHtml = do + sepEndBy (choice [xmlDec, definition, htmlComment]) spaces + skipHtmlTag "html" + spaces + (title, authors, date) <- option ([], [], "") parseHead + spaces + skipHtmlTag "body" + spaces + option "" bodyTitle -- skip title in body, because it's represented in meta + blocks <- parseBlocks + spaces + option "" (htmlEndTag "body") + spaces + option "" (htmlEndTag "html") + many anyChar -- ignore anything after + eof + state <- getState + let keyBlocks = stateKeyBlocks state + return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks))) + +-- +-- parsing blocks +-- + +parseBlocks = do + spaces + result <- sepEndBy block spaces + return result + +block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain, + rawHtmlBlock ] "block" + +-- +-- header blocks +-- + +header = choice (map headerLevel (enumFromTo 1 5)) "header" + +headerLevel n = try (do + let level = "h" ++ show n + (tag, attribs) <- htmlTag level + contents <- inlinesTilEnd level + return (Header n (normalizeSpaces contents))) + +-- +-- hrule block +-- + +hrule = try (do + (tag, attribs) <- htmlTag "hr" + state <- getState + if (not (null attribs)) && (stateParseRaw state) then + unexpected "attributes in hr" -- in this case we want to parse it as raw html + else + return HorizontalRule) + +-- +-- code blocks +-- + +codeBlock = choice [ preCodeBlock, bareCodeBlock ] "code block" + +preCodeBlock = try (do + htmlTag "pre" + spaces + htmlTag "code" + result <- manyTill anyChar (htmlEndTag "code") + spaces + htmlEndTag "pre" + return (CodeBlock (decodeEntities result))) + +bareCodeBlock = try (do + htmlTag "code" + result <- manyTill anyChar (htmlEndTag "code") + return (CodeBlock (decodeEntities result))) + +-- +-- block quotes +-- + +blockQuote = try (do + tag <- htmlTag "blockquote" + spaces + blocks <- blocksTilEnd "blockquote" + return (BlockQuote blocks)) + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList ] "list" + +orderedList = try (do + tag <- htmlTag "ol" + spaces + items <- sepEndBy1 listItem spaces + htmlEndTag "ol" + return (OrderedList items)) + +bulletList = try (do + tag <- htmlTag "ul" + spaces + items <- sepEndBy1 listItem spaces + htmlEndTag "ul" + return (BulletList items)) + +listItem = try (do + tag <- htmlTag "li" + spaces + blocks <- blocksTilEnd "li" + return blocks) + +-- +-- paragraph block +-- + +para = try (do + tag <- htmlTag "p" + result <- inlinesTilEnd "p" + return (Para (normalizeSpaces result))) + +-- +-- plain block +-- + +plain = do + result <- many1 inline + return (Plain (normalizeSpaces result)) + +-- +-- inline +-- + +inline = choice [ text, special ] "inline" + +text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] "text" + +special = choice [ link, image, rawHtmlInline ] "link, inline html, or image" + +entity = try (do + char '&' + body <- choice [(many1 letter), + (try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))] + char ';' + return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))])) + +code = try (do + htmlTag "code" + result <- manyTill anyChar (htmlEndTag "code") + -- remove internal line breaks, leading and trailing space, and decode entities + let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) + +rawHtmlInline = do + result <- choice [htmlScript, anyHtmlInlineTag] + state <- getState + if stateParseRaw state then + return (HtmlInline result) + else + return (Str "") + +betweenTags tag = try (do + htmlTag tag + result <- inlinesTilEnd tag + return (normalizeSpaces result)) + +emph = try (do + result <- choice [betweenTags "em", betweenTags "it"] + return (Emph result)) + +strong = try (do + result <- choice [betweenTags "b", betweenTags "strong"] + return (Strong result)) + +whitespace = do + many1 space + return Space + +-- hard line break +linebreak = do + htmlTag "br" + return LineBreak + +str = do + result <- many1 (noneOf "<& \t\n") + return (Str (decodeEntities result)) + +-- +-- links and images +-- + +-- extract contents of attribute (attribute names are case-insensitive) +extractAttribute name [] = Nothing +extractAttribute name ((attrName, contents):rest) = + let name' = map toLower name + attrName' = map toLower attrName in + if (attrName' == name') then Just contents else extractAttribute name rest + +link = try (do + (tag, attributes) <- htmlTag "a" + url <- case (extractAttribute "href" attributes) of + Just url -> do {return url} + Nothing -> fail "no href" + let title = fromMaybe "" (extractAttribute "title" attributes) + label <- inlinesTilEnd "a" + ref <- generateReference url title + return (Link (normalizeSpaces label) ref)) + +image = try (do + (tag, attributes) <- htmlTag "img" + url <- case (extractAttribute "src" attributes) of + Just url -> do {return url} + Nothing -> fail "no src" + let title = fromMaybe "" (extractAttribute "title" attributes) + let alt = fromMaybe "" (extractAttribute "alt" attributes) + ref <- generateReference url title + return (Image [Str alt] ref)) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs new file mode 100644 index 000000000..3bf3dfd23 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -0,0 +1,585 @@ +-- | Converts LaTeX to 'Pandoc' document. +module Text.Pandoc.Readers.LaTeX ( + readLaTeX, + rawLaTeXInline, + rawLaTeXEnvironment + ) where + +import Text.ParserCombinators.Parsec +import Text.ParserCombinators.Pandoc +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Maybe ( fromMaybe ) +import Char ( chr ) + +-- | Parse LaTeX from string and return 'Pandoc' document. +readLaTeX :: ParserState -- ^ Parser state, including options for parser + -> String -- ^ String to parse + -> Pandoc +readLaTeX = readWith parseLaTeX + +-- for testing +testString = testStringWith parseLaTeX + +-- characters with special meaning +specialChars = "\\$%&^&_~#{}\n \t|<>" + +-- +-- utility functions +-- + +-- | Change quotation marks in a string back to "basic" quotes. +normalizeQuotes :: String -> String +normalizeQuotes = gsub "''" "\"" . gsub "`" "'" + +-- | Change LaTeX En dashes between digits to hyphens. +normalizeDashes :: String -> String +normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2" + +normalizePunctuation :: String -> String +normalizePunctuation = normalizeDashes . normalizeQuotes + +-- | Returns command option (between []) if any, or empty string. +commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]"))) + +-- | Returns text between brackets and its matching pair. +bracketedText = try (do + char '{' + result <- many (choice [ try (do{ char '\\'; + b <- oneOf "{}"; + return (['\\', b])}), -- escaped bracket + count 1 (noneOf "{}"), + do {text <- bracketedText; return ("{" ++ text ++ "}")} ]) + char '}' + return (concat result)) + +-- | Parses list of arguments of LaTeX command. +commandArgs = many bracketedText + +-- | Parses LaTeX command, returns (name, star, option, list of arguments). +command = try (do + char '\\' + name <- many1 alphaNum + star <- option "" (string "*") -- some commands have starred versions + opt <- commandOpt + args <- commandArgs + return (name, star, opt, args)) + +begin name = try (do + string "\\begin{" + string name + char '}' + option "" commandOpt + option [] commandArgs + spaces + return name) + +end name = try (do + string "\\end{" + string name + char '}' + spaces + return name) + +-- | Returns a list of block elements containing the contents of an environment. +environment name = try (do + begin name + spaces + contents <- manyTill block (end name) + return contents) + +anyEnvironment = try (do + string "\\begin{" + name <- many alphaNum + star <- option "" (string "*") -- some environments have starred variants + char '}' + option "" commandOpt + option [] commandArgs + spaces + contents <- manyTill block (end (name ++ star)) + return (BlockQuote contents)) + +-- +-- parsing documents +-- + +-- | Skip everything up through \begin{document} +skipLaTeXHeader = try (do + manyTill anyChar (begin "document") + spaces + return "") + +-- | Parse LaTeX and return 'Pandoc'. +parseLaTeX = do + option "" skipLaTeXHeader -- if parsing a fragment, this might not be present + blocks <- parseBlocks + spaces + option "" (string "\\end{document}") -- if parsing a fragment, this might not be present + spaces + eof + state <- getState + let keyBlocks = stateKeyBlocks state + let noteBlocks = stateNoteBlocks state + let blocks' = filter (/= Null) blocks + return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) + +-- +-- parsing blocks +-- + +parseBlocks = do + spaces + result <- many block + return result + +block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment, + bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment, + unknownCommand ] "block" + +-- +-- header blocks +-- + +header = choice (map headerLevel (enumFromTo 1 5)) "header" + +headerLevel n = try (do + let subs = concat $ replicate (n - 1) "sub" + string ("\\" ++ subs ++ "section") + option ' ' (char '*') + char '{' + title <- manyTill inline (char '}') + spaces + return (Header n (normalizeSpaces title))) + +-- +-- hrule block +-- + +hrule = try (do + oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ] + spaces + return HorizontalRule) + +-- +-- code blocks +-- + +codeBlock = try (do + string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace + option "" blanklines -- we want to gobble blank lines, but not leading space + contents <- manyTill anyChar (try (string "\\end{verbatim}")) + spaces + return (CodeBlock (stripTrailingNewlines contents))) + +-- +-- block quotes +-- + +blockQuote = choice [ blockQuote1, blockQuote2 ] "blockquote" + +blockQuote1 = try (do + blocks <- environment "quote" + spaces + return (BlockQuote blocks)) + +blockQuote2 = try (do + blocks <- environment "quotation" + spaces + return (BlockQuote blocks)) + +-- +-- math block +-- + +mathBlock = mathBlockWith (begin "equation") (end "equation") <|> + mathBlockWith (begin "displaymath") (end "displaymath") <|> + mathBlockWith (string "\\[") (string "\\]") "math block" + +mathBlockWith start end = try (do + start + spaces + result <- manyTill anyChar end + spaces + return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]])) + +-- +-- list blocks +-- + +list = bulletList <|> orderedList "list" + +listItem = try (do + ("item", _, _, _) <- command + spaces + state <- getState + let oldParserContext = stateParserContext state + updateState (\state -> state {stateParserContext = ListItemState}) + blocks <- many block + updateState (\state -> state {stateParserContext = oldParserContext}) + return blocks) + +orderedList = try (do + begin "enumerate" + spaces + items <- many listItem + end "enumerate" + spaces + return (OrderedList items)) + +bulletList = try (do + begin "itemize" + spaces + items <- many listItem + end "itemize" + spaces + return (BulletList items)) + +-- +-- paragraph block +-- + +para = try (do + result <- many1 inline + spaces + return (Para (normalizeSpaces result))) + +-- +-- title authors date +-- + +bibliographic = choice [ maketitle, title, authors, date ] + +maketitle = try (do + string "\\maketitle" + spaces + return Null) + +title = try (do + string "\\title{" + tit <- manyTill inline (char '}') + spaces + updateState (\state -> state { stateTitle = tit }) + return Null) + +authors = try (do + string "\\author{" + authors <- manyTill anyChar (char '}') + spaces + let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors + updateState (\state -> state { stateAuthors = authors' }) + return Null) + +date = try (do + string "\\date{" + date' <- manyTill anyChar (char '}') + spaces + updateState (\state -> state { stateDate = date' }) + return Null) + +-- +-- item block +-- for use in unknown environments that aren't being parsed as raw latex +-- + +-- this forces items to be parsed in different blocks +itemBlock = try (do + ("item", _, opt, _) <- command + state <- getState + if (stateParserContext state == ListItemState) then + fail "item should be handled by list block" + else + if null opt then + return Null + else + return (Plain [Str opt])) + +-- +-- raw LaTeX +-- + +specialEnvironment = do -- these are always parsed as raw + followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry", + "picture", "table", "verse", "theorem"])) + rawLaTeXEnvironment + +-- | Parse any LaTeX environment and return a Para block containing +-- the whole literal environment as raw TeX. +rawLaTeXEnvironment :: GenParser Char st Block +rawLaTeXEnvironment = try (do + string "\\begin" + char '{' + name <- many1 alphaNum + star <- option "" (string "*") -- for starred variants + let name' = name ++ star + char '}' + opt <- option "" commandOpt + args <- option [] commandArgs + let optStr = if (null opt) then "" else "[" ++ opt ++ "]" + let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args + contents <- manyTill (choice [(many1 (noneOf "\\")), + (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }), + string "\\"]) (end name') + spaces + return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++ + (concat contents) ++ "\\end{" ++ name' ++ "}")])) + +unknownEnvironment = try (do + state <- getState + result <- if stateParseRaw state then -- check to see whether we should include raw TeX + rawLaTeXEnvironment -- if so, get the whole raw environment + else + anyEnvironment -- otherwise just the contents + return result) + +unknownCommand = try (do + notFollowedBy' (string "\\end{itemize}") + notFollowedBy' (string "\\end{enumerate}") + notFollowedBy' (string "\\end{document}") + (name, star, opt, args) <- command + spaces + let optStr = if null opt then "" else "[" ++ opt ++ "]" + let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args + state <- getState + if (name == "item") && ((stateParserContext state) == ListItemState) then + fail "should not be parsed as raw" + else + string "" + if stateParseRaw state then + return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)]) + else + return (Plain [Str (joinWithSep " " args)])) + +-- latex comment +comment = try (do + char '%' + result <- manyTill anyChar newline + spaces + return Null) + +-- +-- inline +-- + +inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar, + specialChar, specialInline, escapedChar, unescapedChar, str, + endline, whitespace ] "inline" + +specialInline = choice [ link, image, footnote, rawLaTeXInline ] + "link, raw TeX, note, or image" + +ldots = try (do + string "\\ldots" + return (Str "...")) + +accentedChar = normalAccentedChar <|> specialAccentedChar + +normalAccentedChar = try (do + char '\\' + accent <- oneOf "'`^\"~" + character <- choice [ between (char '{') (char '}') anyChar, anyChar ] + let table = fromMaybe [] $ lookup character accentTable + let result = case lookup accent table of + Just num -> chr num + Nothing -> '?' + return (Str [result])) + +-- an association list of letters and association list of accents +-- and decimal character numbers. +accentTable = + [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), + ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), + ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), + ('N', [('~', 209)]), + ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), + ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), + ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), + ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), + ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), + ('n', [('~', 241)]), + ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), + ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] + +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound, + euro, copyright, sect ] + +ccedil = try (do + char '\\' + letter <- choice [try (string "cc"), try (string "cC")] + let num = if letter == "cc" then 231 else 199 + return (Str [chr num])) + +aring = try (do + char '\\' + letter <- choice [try (string "aa"), try (string "AA")] + let num = if letter == "aa" then 229 else 197 + return (Str [chr num])) + +iuml = try (do + string "\\\"" + choice [try (string "\\i"), try (string "{\\i}")] + return (Str [chr 239])) + +icirc = try (do + string "\\^" + choice [try (string "\\i"), try (string "{\\i}")] + return (Str [chr 238])) + +szlig = try (do + string "\\ss" + return (Str [chr 223])) + +oslash = try (do + char '\\' + letter <- choice [char 'o', char 'O'] + let num = if letter == 'o' then 248 else 216 + return (Str [chr num])) + +aelig = try (do + char '\\' + letter <- choice [try (string "ae"), try (string "AE")] + let num = if letter == "ae" then 230 else 198 + return (Str [chr num])) + +pound = try (do + string "\\pounds" + return (Str [chr 163])) + +euro = try (do + string "\\euro" + return (Str [chr 8364])) + +copyright = try (do + string "\\copyright" + return (Str [chr 169])) + +sect = try (do + string "\\S" + return (Str [chr 167])) + +escapedChar = escaped (oneOf " $%^&_#{}") + +unescapedChar = do -- ignore standalone, nonescaped special characters + oneOf "$^&_#{}|<>" + return (Str "") + +specialChar = choice [ backslash, bar, lt, gt ] + +backslash = try (do + string "\\textbackslash" + return (Str "\\")) + +bar = try (do + string "\\textbar" + return (Str "\\")) + +lt = try (do + string "\\textless" + return (Str "<")) + +gt = try (do + string "\\textgreater" + return (Str ">")) + +code = try (do + string "\\verb" + marker <- anyChar + result <- manyTill anyChar (char marker) + let result' = removeLeadingTrailingSpace result + return (Code result')) + +emph = try (do + oneOfStrings [ "\\emph{", "\\textit{" ] + result <- manyTill inline (char '}') + return (Emph result)) + +lab = try (do + string "\\label{" + result <- manyTill anyChar (char '}') + return (Str ("(" ++ result ++ ")"))) + +ref = try (do + string "\\ref{" + result <- manyTill anyChar (char '}') + return (Str (result))) + +strong = try (do + string "\\textbf{" + result <- manyTill inline (char '}') + return (Strong result)) + +whitespace = do + many1 (oneOf "~ \t") + return Space + +-- hard line break +linebreak = try (do + string "\\\\" + return LineBreak) + +str = do + result <- many1 (noneOf specialChars) + return (Str (normalizePunctuation result)) + +-- endline internal to paragraph +endline = try (do + newline + notFollowedBy blankline + return Space) + +-- math +math = math1 <|> math2 "math" + +math1 = try (do + char '$' + result <- many (noneOf "$") + char '$' + return (TeX ("$" ++ result ++ "$"))) + +math2 = try (do + string "\\(" + result <- many (noneOf "$") + string "\\)" + return (TeX ("$" ++ result ++ "$"))) + +-- +-- links and images +-- + +link = try (do + string "\\href{" + url <- manyTill anyChar (char '}') + char '{' + label <- manyTill inline (char '}') + ref <- generateReference url "" + return (Link (normalizeSpaces label) ref)) + +image = try (do + ("includegraphics", _, _, (src:lst)) <- command + return (Image [Str "image"] (Src src ""))) + +footnote = try (do + ("footnote", _, _, (contents:[])) <- command + let blocks = case runParser parseBlocks defaultParserState "footnote" contents of + Left err -> error $ "Input:\n" ++ show contents ++ + "\nError:\n" ++ show err + Right result -> result + state <- getState + let notes = stateNoteBlocks state + let nextRef = case notes of + [] -> "1" + (Note ref body):rest -> (show ((read ref) + 1)) + setState (state { stateNoteBlocks = (Note nextRef blocks):notes }) + return (NoteRef nextRef)) + +-- | Parse any LaTeX command and return it in a raw TeX inline element. +rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline = try (do + (name, star, opt, args) <- command + let optStr = if (null opt) then "" else "[" ++ opt ++ "]" + let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args + state <- getState + if ((name == "begin") || (name == "end") || (name == "item")) then + fail "not an inline command" + else + string "" + return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr))) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs new file mode 100644 index 000000000..60ac40fd7 --- /dev/null +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -0,0 +1,582 @@ +-- | Convert markdown to Pandoc document. +module Text.Pandoc.Readers.Markdown ( + readMarkdown + ) where + +import Text.ParserCombinators.Pandoc +import Text.Pandoc.Definition +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) +import Text.Pandoc.Shared +import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag, + anyHtmlInlineTag ) +import Text.Pandoc.HtmlEntities ( decodeEntities ) +import Text.Regex ( matchRegex, mkRegex ) +import Text.ParserCombinators.Parsec + +-- | Read markdown from an input string and return a Pandoc document. +readMarkdown :: ParserState -> String -> Pandoc +readMarkdown = readWith parseMarkdown + +-- | Parse markdown string with default options and print result (for testing). +testString :: String -> IO () +testString = testStringWith parseMarkdown + +-- +-- Constants and data structure definitions +-- + +spaceChars = " \t" +endLineChars = "\n" +labelStart = '[' +labelEnd = ']' +labelSep = ':' +srcStart = '(' +srcEnd = ')' +imageStart = '!' +noteStart = '^' +codeStart = '`' +codeEnd = '`' +emphStart = '*' +emphEnd = '*' +emphStartAlt = '_' +emphEndAlt = '_' +autoLinkStart = '<' +autoLinkEnd = '>' +mathStart = '$' +mathEnd = '$' +bulletListMarkers = "*+-" +orderedListDelimiters = "." +escapeChar = '\\' +hruleChars = "*-_" +quoteChars = "'\"" +atxHChar = '#' +titleOpeners = "\"'(" +setextHChars = ['=','-'] +blockQuoteChar = '>' +hyphenChar = '-' + +-- treat these as potentially non-text when parsing inline: +specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt, + emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart, + mathEnd, imageStart, noteStart, hyphenChar] + +-- +-- auxiliary functions +-- + +-- | Skip a single endline if there is one. +skipEndline = option Space endline + +indentSpaces = do + state <- getState + let tabStop = stateTabStop state + oneOfStrings [ "\t", (replicate tabStop ' ') ] "indentation" + +skipNonindentSpaces = do + state <- getState + let tabStop = stateTabStop state + choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)])) + +-- +-- document structure +-- + +titleLine = try (do + char '%' + skipSpaces + line <- manyTill inline newline + return line) + +authorsLine = try (do + char '%' + skipSpaces + authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") + newline + return (map removeLeadingTrailingSpace authors)) + +dateLine = try (do + char '%' + skipSpaces + date <- many (noneOf "\n") + newline + return (removeTrailingSpace date)) + +titleBlock = try (do + title <- option [] titleLine + author <- option [] authorsLine + date <- option "" dateLine + option "" blanklines + return (title, author, date)) + +parseMarkdown = do + updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML + (title, author, date) <- option ([],[],"") titleBlock + blocks <- parseBlocks + state <- getState + let keys = reverse $ stateKeyBlocks state + return (Pandoc (Meta title author date) (blocks ++ keys)) + +-- +-- parsing blocks +-- + +parseBlocks = do + result <- manyTill block eof + return result + +block = choice [ codeBlock, referenceKey, note, header, hrule, list, blockQuote, rawHtmlBlocks, + rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] "block" + +-- +-- header blocks +-- + +header = choice [ setextHeader, atxHeader ] "header" + +atxHeader = try (do + lead <- many1 (char atxHChar) + skipSpaces + txt <- many1 (do {notFollowedBy' atxClosing; inline}) + atxClosing + return (Header (length lead) (normalizeSpaces txt))) + +atxClosing = try (do + skipMany (char atxHChar) + skipSpaces + newline + option "" blanklines) + +setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars))) + +setextH n = try (do + txt <- many1 (do {notFollowedBy newline; inline}) + endline + many1 (char (setextHChars !! (n-1))) + skipSpaces + newline + option "" blanklines + return (Header n (normalizeSpaces txt))) + +-- +-- hrule block +-- + +hruleWith chr = + try (do + skipSpaces + char chr + skipSpaces + char chr + skipSpaces + char chr + skipMany (oneOf (chr:spaceChars)) + newline + option "" blanklines + return HorizontalRule) + +hrule = choice (map hruleWith hruleChars) "hrule" + +-- +-- code blocks +-- + +indentedLine = try (do + indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) + +-- two or more indented lines, possibly separated by blank lines +indentedBlock = try (do + res1 <- indentedLine + blanks <- many blankline + res2 <- choice [indentedBlock, indentedLine] + return (res1 ++ blanks ++ res2)) + +codeBlock = do + result <- choice [indentedBlock, indentedLine] + option "" blanklines + return (CodeBlock result) + +-- +-- note block +-- + +note = try (do + (NoteRef ref) <- noteRef + skipSpaces + raw <- sepBy (many (choice [nonEndline, + (try (do {endline; notFollowedBy (char noteStart); return '\n'})) + ])) (try (do {newline; char noteStart; option ' ' (char ' ')})) + newline + blanklines + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err + Right result -> result + return (Note ref parsed)) + +-- +-- block quotes +-- + +emacsBoxQuote = try (do + string ",----" + manyTill anyChar newline + raw <- manyTill (try (do{ char '|'; + option ' ' (char ' '); + result <- manyTill anyChar newline; + return result})) + (string "`----") + manyTill anyChar newline + option "" blanklines + return raw) + +emailBlockQuoteStart = try (do + skipNonindentSpaces + char blockQuoteChar + option ' ' (char ' ') + return "> ") + +emailBlockQuote = try (do + emailBlockQuoteStart + raw <- sepBy (many (choice [nonEndline, + (try (do{ endline; + notFollowedBy' emailBlockQuoteStart; + return '\n'}))])) + (try (do {newline; emailBlockQuoteStart})) + newline <|> (do{ eof; return '\n'}) + option "" blanklines + return raw) + +blockQuote = do + raw <- choice [ emailBlockQuote, emacsBoxQuote ] + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed) + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList ] "list" + +bulletListStart = + try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces) + +orderedListStart = + try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + many1 digit + oneOf orderedListDelimiters + oneOf spaceChars + skipSpaces) + +-- parse a line of a list item (start = parser for beginning of list item) +listLine start = try (do + notFollowedBy' start + notFollowedBy blankline + notFollowedBy' (try (do{ indentSpaces; + many (spaceChar); + choice [bulletListStart, orderedListStart]})) + line <- manyTill anyChar newline + return (line ++ "\n")) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem start = + try (do + start + result <- many1 (listLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) + +-- continuation of a list item - indented and separated by blankline +-- or (in compact lists) endline. +-- note: nested lists are parsed as continuations +listContinuation start = + try (do + followedBy' indentSpaces + result <- many1 (listContinuationLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) + +listContinuationLine start = try (do + notFollowedBy blankline + notFollowedBy' start + option "" indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) + +listItem start = + try (do + first <- rawListItem start + rest <- many (listContinuation start) + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) + "block" raw of + Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) + return parsed) + +orderedList = + try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = + try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) + +-- +-- paragraph block +-- + +para = try (do + result <- many1 inline + newline + choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ] + let result' = normalizeSpaces result + return (Para result')) + +plain = do + result <- many1 inline + let result' = normalizeSpaces result + return (Plain result') + +-- +-- raw html +-- + +rawHtmlBlocks = try (do + htmlBlocks <- many1 rawHtmlBlock + let combined = concatMap (\(RawHtml str) -> str) htmlBlocks + let combined' = if (last combined == '\n') then + init combined -- strip extra newline + else + combined + return (RawHtml combined')) + +-- +-- reference key +-- + +referenceKey = + try (do + skipSpaces + label <- reference + char labelSep + skipSpaces + option ' ' (char autoLinkStart) + src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) + option ' ' (char autoLinkEnd) + tit <- option "" title + blanklines + return (Key label (Src (removeTrailingSpace src) tit))) + +-- +-- inline +-- + +text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar, + whitespace, endline ] "text" + +inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] "inline" + +special = choice [ link, referenceLink, rawHtmlInline, autoLink, + image, noteRef ] "link, inline html, note, or image" + +escapedChar = escaped anyChar + +ltSign = do + notFollowedBy' rawHtmlBlocks -- don't return < if it starts html + char '<' + return (Str ['<']) + +specialCharsMinusLt = filter (/= '<') specialChars + +symbol = do + result <- oneOf specialCharsMinusLt + return (Str [result]) + +hyphens = try (do + result <- many1 (char '-') + if (length result) == 1 then + skipEndline -- don't want to treat endline after hyphen as a space + else + do{ string ""; return Space } + return (Str result)) + +-- parses inline code, between codeStart and codeEnd +code1 = + try (do + char codeStart + result <- many (noneOf [codeEnd]) + char codeEnd + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines + return (Code result')) + +-- parses inline code, between 2 codeStarts and 2 codeEnds +code2 = + try (do + string [codeStart, codeStart] + result <- manyTill anyChar (try (string [codeEnd, codeEnd])) + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines + return (Code result')) + +mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))]) + +math = try (do + char mathStart + notFollowedBy space + words <- sepBy1 mathWord (many1 space) + char mathEnd + return (TeX ("$" ++ (joinWithSep " " words) ++ "$"))) + +emph = do + result <- choice [ (enclosed (char emphStart) (char emphEnd) inline), + (enclosed (char emphStartAlt) (char emphEndAlt) inline) ] + return (Emph (normalizeSpaces result)) + +strong = do + result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline), + (enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)] + return (Strong (normalizeSpaces result)) + +whitespace = do + many1 (oneOf spaceChars) "whitespace" + return Space + +tabchar = do + tab + return (Str "\t") + +-- hard line break +linebreak = try (do + oneOf spaceChars + many1 (oneOf spaceChars) + endline + return LineBreak ) + +nonEndline = noneOf endLineChars + +str = do + result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars))) + return (Str (decodeEntities result)) + +-- an endline character that can be treated as a space, not a structural break +endline = + try (do + newline + -- next line would allow block quotes without preceding blank line + -- Markdown.pl does allow this, but there's a chance of a wrapped + -- greater-than sign triggering a block quote by accident... +-- notFollowedBy (try (do { choice [emailBlockQuoteStart, string ",----"]; return ' ' })) + notFollowedBy blankline + -- parse potential list starts at beginning of line differently if in a list: + st <- getState + if (stateParserContext st) == ListItemState then + do + notFollowedBy' orderedListStart + notFollowedBy' bulletListStart + else + option () pzero + return Space) + +-- +-- links +-- + +-- a reference label for a link +reference = do + char labelStart + label <- manyTill inline (char labelEnd) + return (normalizeSpaces label) + +-- source for a link, with optional title +source = + try (do + char srcStart + option ' ' (char autoLinkStart) + src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) + option ' ' (char autoLinkEnd) + tit <- option "" title + skipSpaces + char srcEnd + return (Src (removeTrailingSpace src) tit)) + +titleWith startChar endChar = + try (do + skipSpaces + skipEndline -- a title can be on the next line from the source + skipSpaces + char startChar + tit <- manyTill (choice [ try (do {char '\\'; char endChar}), + (noneOf (endChar:endLineChars)) ]) (char endChar) + let tit' = gsub "\"" """ tit + return tit') + +title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] "title" + +link = choice [explicitLink, referenceLink] "link" + +explicitLink = + try (do + label <- reference + src <- source + return (Link label src)) + +referenceLink = choice [referenceLinkDouble, referenceLinkSingle] + +referenceLinkDouble = -- a link like [this][/url/] + try (do + label <- reference + skipSpaces + skipEndline + skipSpaces + ref <- reference + return (Link label (Ref ref))) + +referenceLinkSingle = -- a link like [this] + try (do + label <- reference + return (Link label (Ref []))) + +autoLink = -- a link + try (do + notFollowedBy (do {anyHtmlBlockTag; return ' '}) + src <- between (char autoLinkStart) (char autoLinkEnd) + (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) + case (matchRegex emailAddress src) of + Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) + Nothing -> return (Link [Str src] (Src src ""))) + +emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace + +image = + try (do + char imageStart + (Link label src) <- link + return (Image label src)) + +noteRef = try (do + char noteStart + ref <- between (char '(') (char ')') (many1 (noneOf " \t\n)")) + return (NoteRef ref)) + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs new file mode 100644 index 000000000..82e5ea303 --- /dev/null +++ b/src/Text/Pandoc/Readers/RST.hs @@ -0,0 +1,644 @@ +-- | Parse reStructuredText and return Pandoc document. +module Text.Pandoc.Readers.RST ( + readRST + ) where +import Text.Pandoc.Definition +import Text.ParserCombinators.Pandoc +import Text.Pandoc.Shared +import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag ) +import Text.Regex ( matchRegex, mkRegex ) +import Text.ParserCombinators.Parsec +import Data.Maybe ( fromMaybe ) +import List ( findIndex ) +import Char ( toUpper ) + +-- | Parse reStructuredText string and return Pandoc document. +readRST :: ParserState -> String -> Pandoc +readRST = readWith parseRST + +-- | Parse a string and print result (for testing). +testString :: String -> IO () +testString = testStringWith parseRST + +-- +-- Constants and data structure definitions +--- + +bulletListMarkers = "*+-" +underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" + +-- treat these as potentially non-text when parsing inline: +specialChars = "\\`|*_<>$:[-" + +-- +-- parsing documents +-- + +isAnonKeyBlock block = case block of + (Key [Str "_"] str) -> True + otherwise -> False + +isNotAnonKeyBlock block = not (isAnonKeyBlock block) + +isHeader1 :: Block -> Bool +isHeader1 (Header 1 _) = True +isHeader1 _ = False + +isHeader2 :: Block -> Bool +isHeader2 (Header 2 _) = True +isHeader2 _ = False + +-- | Promote all headers in a list of blocks. (Part of +-- title transformation for RST.) +promoteHeaders :: Int -> [Block] -> [Block] +promoteHeaders num ((Header level text):rest) = + (Header (level - num) text):(promoteHeaders num rest) +promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders num [] = [] + +-- | If list of blocks starts with a header (or a header and subheader) +-- of level that are not found elsewhere, return it as a title and +-- promote all the other headers. +titleTransform :: [Block] -- ^ list of blocks + -> ([Block], [Inline]) -- ^ modified list of blocks, title +titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle + if (any isHeader1 rest) || (any isHeader2 rest) then + ((Header 1 head1):(Header 2 head2):rest, []) + else + ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) +titleTransform ((Header 1 head1):rest) = -- title, no subtitle + if (any isHeader1 rest) then + ((Header 1 head1):rest, []) + else + ((promoteHeaders 1 rest), head1) +titleTransform blocks = (blocks, []) + +parseRST = do + state <- getState + input <- getInput + blocks <- parseBlocks -- first pass + let anonymousKeys = filter isAnonKeyBlock blocks + let blocks' = if (null anonymousKeys) then + blocks + else -- run parser again to fill in anonymous links... + case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys }) + "RST source, second pass" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> (filter isNotAnonKeyBlock result) + let (blocks'', title) = if stateStandalone state then + titleTransform blocks' + else + (blocks', []) + state <- getState + let authors = stateAuthors state + let date = stateDate state + let title' = if (null title) then (stateTitle state) else title + return (Pandoc (Meta title' authors date) blocks'') + +-- +-- parsing blocks +-- + +parseBlocks = do + result <- manyTill block eof + return result + +block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey, + imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock, + para, plain, blankBlock, nullBlock ] "block" + +-- +-- field list +-- + +fieldListItem = try (do + char ':' + name <- many1 alphaNum + string ": " + skipSpaces + first <- manyTill anyChar newline + rest <- many (do{ notFollowedBy (char ':'); + notFollowedBy blankline; + skipSpaces; + manyTill anyChar newline }) + return (name, (joinWithSep " " (first:rest)))) + +fieldList = try (do + items <- many1 fieldListItem + blanklines + let authors = case (lookup "Authors" items) of + Just auth -> [auth] + Nothing -> map snd (filter (\(x,y) -> x == "Author") items) + let date = case (lookup "Date" items) of + Just dat -> dat + Nothing -> "" + let title = case (lookup "Title" items) of + Just tit -> [Str tit] + Nothing -> [] + let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") && + (x /= "Title")) items + let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining + updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title }) + return (BlockQuote result)) + +-- +-- line block +-- + +lineBlockLine = try (do + string "| " + white <- many (oneOf " \t") + line <- manyTill inline newline + let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak] + return line') + +lineBlock = try (do + lines <- many1 lineBlockLine + blanklines + return $ Para (concat lines)) + +-- +-- paragraph block +-- + +para = choice [ paraBeforeCodeBlock, paraNormal ] "paragraph" + +codeBlockStart = try (do + string "::" + blankline + blankline) + +-- paragraph that ends in a :: starting a code block +paraBeforeCodeBlock = try (do + result <- many1 (do {notFollowedBy' codeBlockStart; inline}) + followedBy' (string "::") + return (Para (if (last result == Space) then + normalizeSpaces result + else + (normalizeSpaces result) ++ [Str ":"]))) + +-- regular paragraph +paraNormal = try (do + result <- many1 inline + newline + blanklines + let result' = normalizeSpaces result + return (Para result')) + +plain = do + result <- many1 inline + let result' = normalizeSpaces result + return (Plain result') + +-- +-- image block +-- + +imageBlock = try (do + string ".. image:: " + src <- manyTill anyChar newline + return (Plain [Image [Str "image"] (Src src "")])) + +-- +-- header blocks +-- + +header = choice [ doubleHeader, singleHeader ] "header" + +-- a header with lines on top and bottom +doubleHeader = try (do + c <- oneOf underlineChars + rest <- many (char c) -- the top line + let lenTop = length (c:rest) + skipSpaces + newline + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + if (len > lenTop) then fail "title longer than border" else (do {return ()}) + blankline -- spaces and newline + count lenTop (char c) -- the bottom line + blanklines + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) + +-- a header with line on the bottom only +singleHeader = try (do + notFollowedBy' whitespace + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + blankline + c <- oneOf underlineChars + rest <- count (len - 1) (char c) + many (char c) + blanklines + state <- getState + let headerTable = stateHeaderTable state + let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) + +-- +-- hrule block +-- + +hruleWith chr = + try (do + count 4 (char chr) + skipMany (char chr) + skipSpaces + newline + blanklines + return HorizontalRule) + +hrule = choice (map hruleWith underlineChars) "hrule" + +-- +-- code blocks +-- + +-- read a line indented by a given string +indentedLine indents = try (do + string indents + result <- manyTill anyChar newline + return (result ++ "\n")) + +-- two or more indented lines, possibly separated by blank lines +-- if variable = True, then any indent will work, but it must be consistent through the block +-- if variable = False, indent should be one tab or equivalent in spaces +indentedBlock variable = try (do + state <- getState + let tabStop = stateTabStop state + indents <- if variable then + many1 (oneOf " \t") + else + oneOfStrings ["\t", (replicate tabStop ' ')] + firstline <- manyTill anyChar newline + rest <- many (choice [ indentedLine indents, + try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})]) + option "" blanklines + return (firstline ++ "\n" ++ (concat rest))) + +codeBlock = try (do + codeBlockStart + result <- indentedBlock False -- the False means we want one tab stop indent on each line + return (CodeBlock result)) + +-- +-- raw html +-- + +rawHtmlBlock = try (do + string ".. raw:: html" + blanklines + result <- indentedBlock True + return (RawHtml result)) + +-- +-- raw latex +-- + +rawLaTeXBlock = try (do + string ".. raw:: latex" + blanklines + result <- indentedBlock True + return (Para [(TeX result)])) + +-- +-- block quotes +-- + +blockQuote = try (do + block <- indentedBlock True + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) + "block" (block ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed)) + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList ] "list" + +-- parses bullet list start and returns its length (inc. following whitespace) +bulletListStart = + try (do + notFollowedBy' hrule -- because hrules start out just like lists + marker <- oneOf bulletListMarkers + white <- many1 spaceChar + let len = length (marker:white) + return len) + +withPeriodSuffix parser = try (do + a <- parser + b <- char '.' + return (a ++ [b])) + +withParentheses parser = try (do + a <- char '(' + b <- parser + c <- char ')' + return ([a] ++ b ++ [c])) + +withRightParen parser = try (do + a <- parser + b <- char ')' + return (a ++ [b])) + +upcaseWord = map toUpper + +romanNumeral = do + let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ] + let upperNumerals = map upcaseWord lowerNumerals + result <- choice $ map string (lowerNumerals ++ upperNumerals) + return result + +orderedListEnumerator = choice [ many1 digit, + string "#", + count 1 letter, + romanNumeral ] + +-- parses ordered list start and returns its length (inc. following whitespace) +orderedListStart = + try (do + marker <- choice [ withPeriodSuffix orderedListEnumerator, + withParentheses orderedListEnumerator, + withRightParen orderedListEnumerator ] + white <- many1 spaceChar + let len = length (marker ++ white) + return len) + +-- parse a line of a list item +listLine markerLength = try (do + notFollowedBy blankline + indentWith markerLength + line <- manyTill anyChar newline + return (line ++ "\n")) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith num = do + state <- getState + let tabStop = stateTabStop state + if (num < tabStop) then + count num (char ' ') + else + choice [ try (count num (char ' ')), + (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem start = + try (do + markerLength <- start + firstLine <- manyTill anyChar newline + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))) + +-- continuation of a list item - indented and separated by blankline or (in compact lists) +-- endline. Note: nested lists are parsed as continuations. +listContinuation markerLength = + try (do + blanks <- many1 blankline + result <- many1 (listLine markerLength) + return (blanks ++ (concat result))) + +listItem start = + try (do + (markerLength, first) <- rawListItem start + rest <- many (listContinuation markerLength) + blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}), + many1 blankline ] -- whole list must end with blank + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) + "list item" raw of + Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) ++ blanks + return parsed) + +orderedList = + try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = + try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) + +-- +-- unknown directive (e.g. comment) +-- + +unknownDirective = try (do + string ".. " + manyTill anyChar newline + many (do {string " "; + char ':'; + many1 (noneOf "\n:"); + char ':'; + many1 (noneOf "\n"); + newline}) + option "" blanklines + return Null) + +-- +-- reference key +-- + +referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] + +imageKey = try (do + string ".. |" + ref <- manyTill inline (char '|') + skipSpaces + string "image::" + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + +anonymousKey = try (do + choice [string ".. __:", string "__"] + skipSpaces + src <- manyTill anyChar newline + state <- getState + return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) + +regularKeyQuoted = try (do + string ".. _`" + ref <- manyTill inline (string "`:") + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + +regularKey = try (do + string ".. _" + ref <- manyTill inline (char ':') + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + + -- + -- inline + -- + +text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] "text" + +inline = choice [ escapedChar, special, hyphens, text, symbol ] "inline" + +special = choice [ link, image ] "link, inline html, or image" + +hyphens = try (do + result <- many1 (char '-') + option Space endline -- don't want to treat endline after hyphen or dash as a space + return (Str result)) + +escapedChar = escaped anyChar + +symbol = do + result <- oneOf specialChars + return (Str [result]) + +-- parses inline code, between codeStart and codeEnd +code = + try (do + string "``" + result <- manyTill anyChar (string "``") + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) + +emph = do + result <- enclosed (char '*') (char '*') inline + return (Emph (normalizeSpaces result)) + +strong = do + result <- enclosed (string "**") (string "**") inline + return (Strong (normalizeSpaces result)) + +whitespace = do + many1 spaceChar "whitespace" + return Space + +tabchar = do + tab + return (Str "\t") + +str = do + notFollowedBy' oneWordReferenceLink + result <- many1 (noneOf (specialChars ++ "\t\n ")) + return (Str result) + +-- an endline character that can be treated as a space, not a structural break +endline = + try (do + newline + notFollowedBy blankline + -- parse potential list starts at beginning of line differently if in a list: + st <- getState + if ((stateParserContext st) == ListItemState) then + notFollowedBy' (choice [orderedListStart, bulletListStart]) + else + option () pzero + return Space) + +-- +-- links +-- + +link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] "link" + +explicitLink = + try (do + char '`' + label <- manyTill inline (try (do {spaces; char '<'})) + src <- manyTill (noneOf ">\n ") (char '>') + skipSpaces + string "`_" + return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) ""))) + +anonymousLinkEnding = + try (do + char '_' + state <- getState + let anonKeys = stateKeyBlocks state + -- if there's a list of anon key refs (from previous pass), pop one off. + -- otherwise return an anon key ref for the next pass to take care of... + case anonKeys of + (Key [Str "_"] src):rest -> + do{ setState (state { stateKeyBlocks = rest }); + return src } + otherwise -> return (Ref [Str "_"])) + +referenceLink = + try (do + char '`' + label <- manyTill inline (string "`_") + src <- option (Ref []) anonymousLinkEnding + return (Link (normalizeSpaces label) src)) + +oneWordReferenceLink = + try (do + label <- many1 alphaNum + char '_' + src <- option (Ref []) anonymousLinkEnding + notFollowedBy alphaNum -- because this_is_not a link + return (Link [Str label] src)) + +uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", + "news:", "telnet:" ] + +uri = try (do + scheme <- uriScheme + identifier <- many1 (noneOf " \t\n") + return (scheme ++ identifier)) + +autoURI = try (do + src <- uri + return (Link [Str src] (Src src ""))) + +emailChar = alphaNum <|> oneOf "-+_." + +emailAddress = try (do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return (addr ++ '@':dom)) + +domainChar = alphaNum <|> char '-' + +domain = try (do + first <- many1 domainChar + dom <- many1 (try (do{ char '.'; many1 domainChar })) + return (joinWithSep "." (first:dom))) + +autoEmail = try (do + src <- emailAddress + return (Link [Str src] (Src ("mailto:" ++ src) ""))) + +autoLink = autoURI <|> autoEmail + +-- For now, we assume that all substitution references are for images. +image = + try (do + char '|' + ref <- manyTill inline (char '|') + return (Image (normalizeSpaces ref) (Ref ref))) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs new file mode 100644 index 000000000..dc58dd6b4 --- /dev/null +++ b/src/Text/Pandoc/Shared.hs @@ -0,0 +1,417 @@ +-- | Utility functions and definitions used by the various Pandoc modules. +module Text.Pandoc.Shared ( + -- * Text processing + gsub, + joinWithSep, + tabsToSpaces, + backslashEscape, + escapePreservingRegex, + endsWith, + stripTrailingNewlines, + removeLeadingTrailingSpace, + removeLeadingSpace, + removeTrailingSpace, + -- * Parsing + readWith, + testStringWith, + HeaderType (..), + ParserContext (..), + ParserState (..), + defaultParserState, + -- * Native format prettyprinting + prettyPandoc, + -- * Pandoc block list processing + consolidateList, + isNoteBlock, + splitBySpace, + normalizeSpaces, + compactify, + generateReference, + WriterOptions (..), + KeyTable, + keyTable, + lookupKeySrc, + refsMatch, + replaceReferenceLinks, + replaceRefLinksBlockList + ) where +import Text.Pandoc.Definition +import Text.ParserCombinators.Parsec +import Text.Pandoc.HtmlEntities ( decodeEntities ) +import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Char ( toLower ) +import List ( find, groupBy ) + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => + GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str + +-- | Parser state + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = BlockQuoteState -- ^ Used when running parser on contents of blockquote + | ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ What are we parsing? + stateKeyBlocks :: [Block], -- ^ List of reference key blocks + stateKeysUsed :: [[Inline]], -- ^ List of references used so far + stateNoteBlocks :: [Block], -- ^ List of note blocks + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateHeaderTable :: [HeaderType] } -- ^ List of header types used, in what order (for reStructuredText only) + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateKeyBlocks = [], + stateKeysUsed = [], + stateNoteBlocks = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateHeaderTable = [] } + +-- | Consolidate @Str@s and @Space@s in an inline list into one big @Str@. +-- Collapse adjacent @Space@s. +consolidateList :: [Inline] -> [Inline] +consolidateList ((Str a):(Str b):rest) = consolidateList ((Str (a ++ b)):rest) +consolidateList ((Str a):Space:rest) = consolidateList ((Str (a ++ " ")):rest) +consolidateList (Space:(Str a):rest) = consolidateList ((Str (" " ++ a)):rest) +consolidateList (Space:Space:rest) = consolidateList ((Str " "):rest) +consolidateList (inline:rest) = inline:(consolidateList rest) +consolidateList [] = [] + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces to indent first line, relative to block + -> String -- ^ Contents of block to indent + -> String +indentBy num first [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first in + (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) +prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks) +prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +prettyPandoc :: Pandoc -> String +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks) + +-- | Convert tabs to spaces (with adjustable tab stop). +tabsToSpaces :: Int -- ^ Tabstop + -> String -- ^ String to convert + -> String +tabsToSpaces tabstop str = + unlines (map (tabsInLine tabstop tabstop) (lines str)) + +-- | Convert tabs to spaces in one line. +tabsInLine :: Int -- ^ Number of spaces to next tab stop + -> Int -- ^ Tabstop + -> String -- ^ Line to convert + -> String +tabsInLine num tabstop "" = "" +tabsInLine num tabstop (c:cs) = + let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in + let nextnumraw = (num - (length replacement)) in + let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in + replacement ++ (tabsInLine nextnum tabstop cs) + +-- | Substitute string for every occurrence of regular expression. +gsub :: String -- ^ Regular expression (as string) to substitute for + -> String -- ^ String to substitute for the regex + -> String -- ^ String to be substituted in + -> String +gsub regex replacement str = subRegex (mkRegex regex) str replacement + +-- | Escape designated characters with backslash. +backslashEscape :: [Char] -- ^ list of special characters to escape + -> String -- ^ string input + -> String +backslashEscape special [] = [] +backslashEscape special (x:xs) = if x `elem` special then + '\\':x:(backslashEscape special xs) + else + x:(backslashEscape special xs) + +-- | Escape string by applying a function, but don't touch anything that matches regex. +escapePreservingRegex :: (String -> String) -- ^ Escaping function + -> Regex -- ^ Regular expression + -> String -- ^ String to be escaped + -> String +escapePreservingRegex escapeFunction regex str = + case (matchRegexAll regex str) of + Nothing -> escapeFunction str + Just (before, matched, after, _) -> + (escapeFunction before) ++ matched ++ + (escapePreservingRegex escapeFunction regex after) + +-- | Returns @True@ if string ends with given character. +endsWith :: Char -> [Char] -> Bool +endsWith char [] = False +endsWith char str = (char == last str) + +-- | Returns @True@ if block is a @Note@ block +isNoteBlock :: Block -> Bool +isNoteBlock (Note ref blocks) = True +isNoteBlock _ = False + +-- | Joins a list of lists, separated by another list. +joinWithSep :: [a] -- ^ List to use as separator + -> [[a]] -- ^ Lists to join + -> [a] +joinWithSep sep [] = [] +joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst + +-- | Strip trailing newlines from string. +stripTrailingNewlines :: String -> String +stripTrailingNewlines "" = "" +stripTrailingNewlines str = + if (last str) == '\n' then + stripTrailingNewlines (init str) + else + str + +-- | Remove leading and trailing space (including newlines) from string. +removeLeadingTrailingSpace :: String -> String +removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace + +-- | Remove leading space (including newlines) from string. +removeLeadingSpace :: String -> String +removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t')) + +-- | Remove trailing space (including newlines) from string. +removeTrailingSpace :: String -> String +removeTrailingSpace = reverse . removeLeadingSpace . reverse + +-- | Split list of inlines into groups separated by a space. +splitBySpace :: [Inline] -> [[Inline]] +splitBySpace lst = filter (\a -> (/= Space) (head a)) + (groupBy (\a b -> (/= Space) a && (/= Space) b) lst) + +-- | Normalize a list of inline elements: remove leading and trailing +-- @Space@ elements, and collapse double @Space@s into singles. +normalizeSpaces :: [Inline] -> [Inline] +normalizeSpaces [] = [] +normalizeSpaces list = + let removeDoubles [] = [] + removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) + removeDoubles (x:rest) = x:(removeDoubles rest) in + let removeLeading [] = [] + removeLeading lst = if ((head lst) == Space) then tail lst else lst in + let removeTrailing [] = [] + removeTrailing lst = if ((last lst) == Space) then init lst else lst in + removeLeading $ removeTrailing $ removeDoubles list + +-- | Change final list item from @Para@ to @Plain@ if the list should be compact. +compactify :: [[Block]] -- ^ List of list items (each a list of blocks) + -> [[Block]] +compactify [] = [] +compactify items = + let final = last items + others = init items in + case final of + [Para a] -> if any containsPara others then items else others ++ [[Plain a]] + otherwise -> items + +containsPara :: [Block] -> Bool +containsPara [] = False +containsPara ((Para a):rest) = True +containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest) +containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest) +containsPara (x:rest) = containsPara rest + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ If @True@, writer header and footer + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerHeader :: String -- ^ Header for the document + , writerIncludeBefore :: String -- ^ String to include before the document body + , writerIncludeAfter :: String -- ^ String to include after the document body + , writerSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses + , writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML + , writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally + , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX + , writerTabStop :: Int } -- ^ Tabstop for conversion between spaces and tabs + deriving Show + +-- +-- Functions for constructing lists of reference keys +-- + +-- | Returns @Just@ numerical key reference if there's already a key +-- for the specified target in the list of blocks, otherwise @Nothing@. +keyFoundIn :: [Block] -- ^ List of key blocks to search + -> Target -- ^ Target to search for + -> Maybe String +keyFoundIn [] src = Nothing +keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then + Just num + else + keyFoundIn rest src +keyFoundIn (_:rest) src = keyFoundIn rest src + +-- | Return next unique numerical key, given keyList +nextUniqueKey :: [[Inline]] -> String +nextUniqueKey keys = + let nums = [1..10000] + notAKey n = not (any (== [Str (show n)]) keys) in + case (find notAKey nums) of + Just x -> show x + Nothing -> error "Could not find unique key for reference link" + +-- | Generate a reference for a URL (either an existing reference, if +-- there is one, or a new one, if there isn't) and update parser state. +generateReference :: String -- ^ URL + -> String -- ^ Title + -> GenParser tok ParserState Target +generateReference url title = do + let src = Src (decodeEntities url) (decodeEntities title) + state <- getState + let keyBlocks = stateKeyBlocks state + let keysUsed = stateKeysUsed state + case (keyFoundIn keyBlocks src) of + Just num -> return (Ref [Str num]) + Nothing -> do + let nextNum = nextUniqueKey keysUsed + updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks, + stateKeysUsed = [Str nextNum]:keysUsed}) + return (Ref [Str nextNum]) + +-- +-- code to replace reference links with real links and remove unneeded key blocks +-- + +type KeyTable = [([Inline], Target)] + +-- | Returns @True@ if block is a Key block +isRefBlock :: Block -> Bool +isRefBlock (Key _ _) = True +isRefBlock _ = False + +-- | Returns a pair of a list of pairs of keys and associated sources, and a new +-- list of blocks with the included key blocks deleted. +keyTable :: [Block] -> (KeyTable, [Block]) +keyTable [] = ([],[]) +keyTable ((Key ref target):lst) = (((ref, target):table), rest) + where (table, rest) = keyTable lst +keyTable (Null:lst) = keyTable lst -- get rid of Nulls +keyTable (Blank:lst) = keyTable lst -- get rid of Blanks +keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2)) + where (table1, rest1) = keyTable blocks + (table2, rest2) = keyTable lst +keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2)) + where (table1, rest1) = keyTable blocks + (table2, rest2) = keyTable lst +keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results + (table2, rest2) = keyTable lst +keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results + (table2, rest2) = keyTable lst +keyTable (other:lst) = (table, (other:rest)) + where (table, rest) = keyTable lst + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> [Inline] -- ^ Key + -> Maybe Target +lookupKeySrc table key = case table of + [] -> Nothing + (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + +-- | Replace reference links with explicit links in list of blocks, removing key blocks. +replaceReferenceLinks :: [Block] -> [Block] +replaceReferenceLinks blocks = + let (keytable, purged) = keyTable blocks in + replaceRefLinksBlockList keytable purged + +-- | Use key table to replace reference links with explicit links in a list of blocks +replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block] +replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst + +-- | Use key table to replace reference links with explicit links in a block +replaceRefLinksBlock :: KeyTable -> Block -> Block +replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable other = other + +-- | Use key table to replace reference links with explicit links in an inline element. +replaceRefLinksInline :: KeyTable -> Inline -> Inline +replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef) + where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + Nothing -> (Ref ref) + Just src -> src + newText = map (replaceRefLinksInline keytable) text +replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef) + where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + Nothing -> (Ref ref) + Just src -> src + newText = map (replaceRefLinksInline keytable) text +replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable other = other diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs new file mode 100644 index 000000000..66590809f --- /dev/null +++ b/src/Text/Pandoc/UTF8.hs @@ -0,0 +1,43 @@ +-- | Functions for converting Unicode strings to UTF-8 and vice versa. +-- +-- Taken from . +-- (c) 2003, OGI School of Science & Engineering, Oregon Health and +-- Science University. +-- +-- Modified by Martin Norbaeck to pass illegal UTF-8 sequences through unchanged. +module Text.Pandoc.UTF8 ( + decodeUTF8, + encodeUTF8 + ) where + +-- From the Char module supplied with HBC. + +-- | Take a UTF-8 string and decode it into a Unicode string. +decodeUTF8 :: String -> String +decodeUTF8 "" = "" +decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && + '\x80' <= c' && c' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs +decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && + '\x80' <= c' && c' <= '\xbf' && + '\x80' <= c'' && c'' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs +decodeUTF8 (c:cs) = c : decodeUTF8 cs + +-- | Take a Unicode string and encode it as a UTF-8 string. +encodeUTF8 :: String -> String +encodeUTF8 "" = "" +encodeUTF8 (c:cs) = + if c > '\x0000' && c < '\x0080' then + c : encodeUTF8 cs + else if c < toEnum 0x0800 then + let i = fromEnum c + in toEnum (0xc0 + i `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs + else + let i = fromEnum c + in toEnum (0xe0 + i `div` 0x1000) : + toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs diff --git a/src/Text/Pandoc/Writers/DefaultHeaders.hs b/src/Text/Pandoc/Writers/DefaultHeaders.hs new file mode 100644 index 000000000..87dd7d8ff --- /dev/null +++ b/src/Text/Pandoc/Writers/DefaultHeaders.hs @@ -0,0 +1,27 @@ +---------------------------------------------------- +-- Do not edit this file by hand. Edit +-- 'templates/DefaultHeaders.hs' +-- and run ./fillTemplates.pl Text/Pandoc/Writers/DefaultHeaders.hs +---------------------------------------------------- + +-- | Default headers for Pandoc writers. +module Text.Pandoc.Writers.DefaultHeaders ( + defaultLaTeXHeader, + defaultHtmlHeader, + defaultS5Header, + defaultRTFHeader + ) where +import Text.Pandoc.Writers.S5 + +defaultLaTeXHeader :: String +defaultLaTeXHeader = "\\documentclass{article}\n\\usepackage{hyperref}\n\\usepackage{ucs}\n\\usepackage[utf8x]{inputenc}\n\\usepackage{graphicx}\n\\setlength{\\parindent}{0pt}\n\\setlength{\\parskip}{6pt plus 2pt minus 1pt}\n% This is needed for code blocks in footnotes:\n\\usepackage{fancyvrb}\n\\VerbatimFootnotes\n" + +defaultHtmlHeader :: String +defaultHtmlHeader = "\n\n\n\n\n\n" + +defaultS5Header :: String +defaultS5Header = "\n\n\n\n\n\n\n\n" ++ s5CSS ++ s5Javascript + +defaultRTFHeader :: String +defaultRTFHeader = "{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 Times New Roman;}{\\f1 Courier;}}\n{\\colortbl;\\red255\\green0\\blue0;\\red0\\green0\\blue255;}\n\\widowctrl\\hyphauto\n\n" + diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs new file mode 100644 index 000000000..9eecf2761 --- /dev/null +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -0,0 +1,197 @@ +-- | Converts Pandoc to HTML. +module Text.Pandoc.Writers.HTML ( + writeHtml + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Html ( stringToHtmlString ) +import Text.Regex ( mkRegex ) +import Numeric ( showHex ) +import Char ( ord ) +import List ( isPrefixOf ) + +-- | Convert Pandoc document to string in HTML format. +writeHtml :: WriterOptions -> Pandoc -> String +writeHtml options (Pandoc (Meta title authors date) blocks) = + let titlePrefix = writerTitlePrefix options in + let topTitle = if not (null titlePrefix) then + [Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else []) + else + title in + let head = if (writerStandalone options) then + htmlHeader options (Meta topTitle authors date) + else + "" + titleBlocks = if (writerStandalone options) && (not (null title)) && + (not (writerS5 options)) then + [RawHtml "

", Plain title, RawHtml "

\n"] + else + [] + foot = if (writerStandalone options) then "\n\n" else "" + body = (writerIncludeBefore options) ++ + concatMap (blockToHtml options) (replaceReferenceLinks (titleBlocks ++ blocks)) ++ + (writerIncludeAfter options) in + head ++ body ++ foot + +-- | Obfuscate a "mailto:" link using Javascript. +obfuscateLink :: WriterOptions -> [Inline] -> String -> String +obfuscateLink options text src = + let text' = inlineListToHtml options text in + let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'" + altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in + gsub "mailto:([^@]*)@(.*)" ("") src + +-- | Obfuscate character as entity. +obfuscateChar :: Char -> String +obfuscateChar char = let num = ord char in + let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in + "&#" ++ numstr ++ ";" + +-- | Escape string, preserving character entities and quote. +stringToHtml :: String -> String +stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str + +-- | Escape string as in 'stringToHtml' but add smartypants filter. +stringToSmartHtml :: String -> String +stringToSmartHtml = + let escapeDoubleQuotes = + gsub "(\"|"|'')" "”" . -- rest are right quotes + gsub "([[:space:]])(\"|")" "\\1“" . -- never right quo after space + gsub "(\"|"|``)('|`|‘)([^[:punct:][:space:]])" "“‘\\3" . -- "'word left + gsub "(\"|"|``)([^[:punct:][:space:]])" "“\\2" -- "word left + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "([[:space:]])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^[:punct:][:space:]])'" "\\1’" . -- word' right + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "^('|`)([^[:punct:][:space:]])" "‘\\2" . -- 'word left + gsub "([^[:punct:][:space:]])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in + escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml + +-- | Escape code string as needed for HTML. +codeStringToHtml :: String -> String +codeStringToHtml [] = [] +codeStringToHtml (x:xs) = case x of + '&' -> "&" ++ codeStringToHtml xs + '<' -> "<" ++ codeStringToHtml xs + _ -> x:(codeStringToHtml xs) + +-- | Escape string to HTML appropriate for attributes +attributeStringToHtml :: String -> String +attributeStringToHtml = gsub "\"" """ + +-- | Returns an HTML header with appropriate bibliographic information. +htmlHeader :: WriterOptions -> Meta -> String +htmlHeader options (Meta title authors date) = + let titletext = if (null title) then + "" + else + "" ++ (inlineListToHtml options title) ++ "\n" + authortext = if (null authors) then + "" + else + "\n" + datetext = if (date == "") then + "" + else + "\n" in + (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "\n\n" + +-- | Convert Pandoc block element to HTML. +blockToHtml :: WriterOptions -> Block -> String +blockToHtml options Blank = "\n" +blockToHtml options Null = "" +blockToHtml options (Plain lst) = inlineListToHtml options lst +blockToHtml options (Para lst) = "

" ++ (inlineListToHtml options lst) ++ "

\n" +blockToHtml options (BlockQuote blocks) = + if (writerS5 options) then -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; otherwise incremental + let inc = not (writerIncremental options) in + case blocks of + [BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst) + [OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst) + otherwise -> "
\n" ++ (concatMap (blockToHtml options) blocks) ++ + "
\n" + else + "
\n" ++ (concatMap (blockToHtml options) blocks) ++ "
\n" +blockToHtml options (Note ref lst) = + let marker = "(" ++ ref ++ ") " in + let contents = (concatMap (blockToHtml options) lst) in + let contents' = case contents of + ('<':'p':'>':rest) -> "

" ++ marker ++ rest ++ "\n" + otherwise -> marker ++ contents ++ "\n" in + "

\n" ++ contents' ++ "
\n" +blockToHtml options (Key _ _) = "" +blockToHtml options (CodeBlock str) = "
" ++ (codeStringToHtml str) ++ 
+                                      "
\n" +blockToHtml options (RawHtml str) = str +blockToHtml options (BulletList lst) = + let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in + "\n" ++ (concatMap (listItemToHtml options) lst) ++ "\n" +blockToHtml options (OrderedList lst) = + let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in + "\n" ++ (concatMap (listItemToHtml options) lst) ++ "\n" +blockToHtml options HorizontalRule = "
\n" +blockToHtml options (Header level lst) = if ((level > 0) && (level <= 6)) then + "" ++ + (inlineListToHtml options lst) ++ + "\n" + else + "

" ++ (inlineListToHtml options lst) ++ "

\n" +listItemToHtml options list = "
  • " ++ (concatMap (blockToHtml options) list) ++ "
  • \n" + +-- | Convert list of Pandoc inline elements to HTML. +inlineListToHtml :: WriterOptions -> [Inline] -> String +inlineListToHtml options lst = + -- consolidate adjacent Str and Space elements for more intelligent + -- smartypants filtering + let lst' = consolidateList lst in + concatMap (inlineToHtml options) lst' + +-- | Convert Pandoc inline element to HTML. +inlineToHtml :: WriterOptions -> Inline -> String +inlineToHtml options (Emph lst) = "" ++ (inlineListToHtml options lst) ++ "" +inlineToHtml options (Strong lst) = "" ++ (inlineListToHtml options lst) ++ "" +inlineToHtml options (Code str) = "" ++ (codeStringToHtml str) ++ "" +inlineToHtml options (Str str) = if (writerSmartypants options) then + stringToSmartHtml str + else + stringToHtml str +inlineToHtml options (TeX str) = (codeStringToHtml str) +inlineToHtml options (HtmlInline str) = str +inlineToHtml options (LineBreak) = "
    \n" +inlineToHtml options Space = " " +inlineToHtml options (Link text (Src src tit)) = + let title = attributeStringToHtml tit in + if (isPrefixOf "mailto:" src) then + obfuscateLink options text src + else + "" else ">") ++ + (inlineListToHtml options text) ++ "" +inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]" +inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++ + (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse +inlineToHtml options (Image alt (Src source tit)) = + let title = attributeStringToHtml tit + alternate = inlineListToHtml options alt in + "\""" +inlineToHtml options (Image alternate (Ref [])) = + "![" ++ (inlineListToHtml options alternate) ++ "]" +inlineToHtml options (Image alternate (Ref ref)) = + "![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]" +inlineToHtml options (NoteRef ref) = + "(" ++ ref ++ ")" + diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs new file mode 100644 index 000000000..b77789e90 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -0,0 +1,164 @@ +-- | Convert Pandoc to LaTeX. +module Text.Pandoc.Writers.LaTeX ( + writeLaTeX + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import List ( (\\) ) + +-- | Convert Pandoc to LaTeX. +writeLaTeX :: WriterOptions -> Pandoc -> String +writeLaTeX options (Pandoc meta blocks) = + let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level + let body = (writerIncludeBefore options) ++ + (concatMap (blockToLaTeX notes) (replaceReferenceLinks blocks)) ++ + (writerIncludeAfter options) in + let head = if writerStandalone options then + latexHeader notes options meta + else + "" in + let foot = if writerStandalone options then "\n\\end{document}\n" else "" in + head ++ body ++ foot + +-- | Insert bibliographic information into LaTeX header. +latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs + -> WriterOptions -- ^ Options, including LaTeX header + -> Meta -- ^ Meta with bibliographic information + -> String +latexHeader notes options (Meta title authors date) = + let titletext = if null title then + "" + else + "\\title{" ++ inlineListToLaTeX notes title ++ "}\n" + authorstext = if null authors then + "" + else + "\\author{" ++ (joinWithSep "\\\\" (map stringToLaTeX authors)) ++ "}\n" + datetext = if date == "" then + "" + else + "\\date{" ++ stringToLaTeX date ++ "}\n" + maketitle = if null title then + "" + else + "\\maketitle\n" + secnumline = if (writerNumberSections options) then + "" + else + "\\setcounter{secnumdepth}{0}\n" + header = writerHeader options in + header ++ secnumline ++ titletext ++ authorstext ++ datetext ++ "\\begin{document}\n" ++ maketitle + + +-- escape things as needed for LaTeX (also ldots, dashes, quotes, etc.) + +escapeBrackets = backslashEscape "{}" +escapeSpecial = backslashEscape "$%&~_#" + +escapeBackslash = gsub "\\\\" "\\\\textbackslash{}" +fixBackslash = gsub "\\\\textbackslash\\\\\\{\\\\\\}" "\\\\textbackslash{}" +escapeHat = gsub "\\^" "\\\\^{}" +escapeBar = gsub "\\|" "\\\\textbar{}" +escapeLt = gsub "<" "\\\\textless{}" +escapeGt = gsub ">" "\\\\textgreater{}" + +escapeDoubleQuotes = + gsub "\"" "''" . -- rest are right quotes + gsub "([[:space:]])\"" "\\1``" . -- never right quote after space + gsub "\"('|`)([^[:punct:][:space:]])" "``{}`\\2" . -- "'word left + gsub "\"([^[:punct:][:space:]])" "``\\1" -- "word left + +escapeSingleQuotes = + gsub "('|`)(\"|``)" "`{}``" . -- '"word left + gsub "([^[:punct:][:space:]])`(s|S)" "\\1'\\2" . -- catch possessives + gsub "^'([^[:punct:][:space:]])" "`\\1" . -- 'word left + gsub "([[:space:]])'" "\\1`" . -- never right quote after space + gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1`\\2" -- 'word left (leave possessives) + +escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "\\ldots{}" + +escapeDashes = gsub "([0-9])-([0-9])" "\\1--\\2" . + gsub " -- " "---" . + gsub "([^[:punct:][:space:]])--([^[:punct:][:space:]])" "\\1---\\2" + +escapeSmart = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses + +-- | Escape string for LaTeX (including smart quotes, dashes, ellipses) +stringToLaTeX :: String -> String +stringToLaTeX = escapeSmart . escapeGt . escapeLt . escapeBar . escapeHat . + escapeSpecial . fixBackslash . escapeBrackets . escapeBackslash + +-- | Remove all code elements from list of inline elements +-- (because it's illegal to have a \\verb inside a command argument) +deVerb :: [Inline] -> [Inline] +deVerb [] = [] +deVerb ((Code str):rest) = (Str str):(deVerb rest) +deVerb (other:rest) = other:(deVerb rest) + +-- | Convert Pandoc block element to LaTeX. +blockToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs + -> Block -- ^ Block to convert + -> String +blockToLaTeX notes Blank = "\n" +blockToLaTeX notes Null = "" +blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n" +blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n" +blockToLaTeX notes (BlockQuote lst) = + "\\begin{quote}\n" ++ (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n" +blockToLaTeX notes (Note ref lst) = "" +blockToLaTeX notes (Key _ _) = "" +blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\\end{verbatim}\n" +blockToLaTeX notes (RawHtml str) = "" +blockToLaTeX notes (BulletList lst) = + "\\begin{itemize}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n" +blockToLaTeX notes (OrderedList lst) = + "\\begin{enumerate}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n" +blockToLaTeX notes HorizontalRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n" +blockToLaTeX notes (Header level lst) = + if (level > 0) && (level <= 3) then + "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ + (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n" + else + (inlineListToLaTeX notes lst) ++ "\n\n" +listItemToLaTeX notes list = "\\item " ++ (concatMap (blockToLaTeX notes) list) + +-- | Convert list of inline elements to LaTeX. +inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs + -> [Inline] -- ^ Inlines to convert + -> String +inlineListToLaTeX notes lst = + -- first, consolidate Str and Space for more effective smartquotes: + let lst' = consolidateList lst in + concatMap (inlineToLaTeX notes) lst' + +-- | Convert inline element to LaTeX +inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs + -> Inline -- ^ Inline to convert + -> String +inlineToLaTeX notes (Emph lst) = "\\emph{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}" +inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}" +inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr] + where stuffing = str + chr = ((enumFromTo '!' '~') \\ stuffing) !! 0 +inlineToLaTeX notes (Str str) = stringToLaTeX str +inlineToLaTeX notes (TeX str) = str +inlineToLaTeX notes (HtmlInline str) = "" +inlineToLaTeX notes (LineBreak) = "\\\\\n" +inlineToLaTeX notes Space = " " +inlineToLaTeX notes (Link text (Src src tit)) = + "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}" +inlineToLaTeX notes (Link text (Ref [])) = "[" ++ (inlineListToLaTeX notes text) ++ "]" +inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ (inlineListToLaTeX notes text) ++ "][" ++ + (inlineListToLaTeX notes ref) ++ "]" -- this is what markdown does, for better or worse +inlineToLaTeX notes (Image alternate (Src source tit)) = "\\includegraphics{" ++ source ++ "}" +inlineToLaTeX notes (Image alternate (Ref [])) = + "![" ++ (inlineListToLaTeX notes alternate) ++ "]" +inlineToLaTeX notes (Image alternate (Ref ref)) = + "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ (inlineListToLaTeX notes ref) ++ "]" +inlineToLaTeX [] (NoteRef ref) = "" +inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) = + if (firstref == ref) then + "\\footnote{" ++ (stripTrailingNewlines (concatMap (blockToLaTeX rest) firstblocks)) ++ "}" + else + inlineToLaTeX rest (NoteRef ref) + diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs new file mode 100644 index 000000000..b73090f62 --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -0,0 +1,149 @@ +-- | Converts Pandoc to Markdown. +module Text.Pandoc.Writers.Markdown ( + writeMarkdown + ) where +import Text.Regex ( matchRegex, mkRegex ) +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- | Convert Pandoc to Markdown. +writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown options (Pandoc meta blocks) = + let body = text (writerIncludeBefore options) <> + vcat (map (blockToMarkdown (writerTabStop options)) (formatKeys blocks)) $$ + text (writerIncludeAfter options) in + let head = if (writerStandalone options) then + ((metaToMarkdown meta) $$ text (writerHeader options)) + else + empty in + render $ head <> body + +-- | Escape special characters for Markdown. +escapeString :: String -> String +escapeString = backslashEscape "`<\\*_^" + +-- | Escape embedded \" in link title. +escapeLinkTitle :: String -> String +escapeLinkTitle = gsub "\"" "\\\\\"" + +-- | Take list of inline elements and return wrapped doc. +wrappedMarkdown :: [Inline] -> Doc +wrappedMarkdown lst = fsep $ map (fcat . (map inlineToMarkdown)) (splitBySpace lst) + +-- | Insert Blank block between key and non-key +formatKeys :: [Block] -> [Block] +formatKeys [] = [] +formatKeys [x] = [x] +formatKeys ((Key x1 y1):(Key x2 y2):rest) = (Key x1 y1):(formatKeys ((Key x2 y2):rest)) +formatKeys ((Key x1 y1):rest) = (Key x1 y1):Blank:(formatKeys rest) +formatKeys (x:(Key x1 y1):rest) = x:Blank:(formatKeys ((Key x1 y1):rest)) +formatKeys (x:rest) = x:(formatKeys rest) + +-- | Convert bibliographic information into Markdown header. +metaToMarkdown :: Meta -> Doc +metaToMarkdown (Meta [] [] "") = empty +metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n") +metaToMarkdown (Meta title authors "") = + (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> (text "\n") +metaToMarkdown (Meta title authors date) = + (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> + (text "\n") <> (dateToMarkdown date) <> (text "\n") + +titleToMarkdown :: [Inline] -> Doc +titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst) + +authorsToMarkdown :: [String] -> Doc +authorsToMarkdown lst = text "% " <> text (joinWithSep ", " (map escapeString lst)) + +dateToMarkdown :: String -> Doc +dateToMarkdown str = text "% " <> text (escapeString str) + +-- | Convert Pandoc block element to markdown. +blockToMarkdown :: Int -- ^ Tab stop + -> Block -- ^ Block element + -> Doc +blockToMarkdown tabStop Blank = text "" +blockToMarkdown tabStop Null = empty +blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst +blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n") +blockToMarkdown tabStop (BlockQuote lst) = + (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $ + map (blockToMarkdown tabStop) lst) <> (text "\n") +blockToMarkdown tabStop (Note ref lst) = + let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in + if null lns then + empty + else + let first = head lns + rest = tail lns in + text ("^(" ++ (escapeString ref) ++ ") ") <> (text first) $$ (vcat $ + map (\line -> (text "^ ") <> (text line)) rest) <> (text "\n") +blockToMarkdown tabStop (Key txt (Src src tit)) = + text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> text ": " <> text src <> + (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) +blockToMarkdown tabStop (CodeBlock str) = (nest tabStop $ vcat $ map text (lines str)) <> + (if (endsWith '\n' str) then empty else text "\n") <> text "\n" +blockToMarkdown tabStop (RawHtml str) = text str +blockToMarkdown tabStop (BulletList lst) = + vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n" +blockToMarkdown tabStop (OrderedList lst) = + vcat (zipWith (orderedListItemToMarkdown tabStop) (enumFromTo 1 (length lst)) lst) <> + text "\n" +blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n" +blockToMarkdown tabStop (Header level lst) = + text ((replicate level '#') ++ " ") <> (inlineListToMarkdown lst) <> (text "\n") +bulletListItemToMarkdown tabStop list = + hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list)) + +-- | Convert ordered list item (a list of blocks) to markdown. +orderedListItemToMarkdown :: Int -- ^ tab stop + -> Int -- ^ ordinal number of list item + -> [Block] -- ^ list item (list of blocks) + -> Doc +orderedListItemToMarkdown tabStop num list = + hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat (map (blockToMarkdown tabStop) list)) + where spacer = if (num < 10) then " " else "" + +-- | Convert list of Pandoc inline elements to markdown. +inlineListToMarkdown :: [Inline] -> Doc +inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst + +-- | Convert Pandoc inline element to markdown. +inlineToMarkdown :: Inline -> Doc +inlineToMarkdown (Emph lst) = text "*" <> (inlineListToMarkdown lst) <> text "*" +inlineToMarkdown (Strong lst) = text "**" <> (inlineListToMarkdown lst) <> text "**" +inlineToMarkdown (Code str) = + case (matchRegex (mkRegex "``") str) of + Just match -> text ("` " ++ str ++ " `") + Nothing -> case (matchRegex (mkRegex "`") str) of + Just match -> text ("`` " ++ str ++ " ``") + Nothing -> text ("`" ++ str ++ "`") +inlineToMarkdown (Str str) = text $ escapeString str +inlineToMarkdown (TeX str) = text str +inlineToMarkdown (HtmlInline str) = text str +inlineToMarkdown (LineBreak) = text " \n" +inlineToMarkdown Space = char ' ' +inlineToMarkdown (Link txt (Src src tit)) = + let linktext = if (null txt) || (txt == [Str ""]) then + text "link" + else + inlineListToMarkdown txt in + char '[' <> linktext <> char ']' <> char '(' <> text src <> + (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')' +inlineToMarkdown (Link txt (Ref [])) = char '[' <> inlineListToMarkdown txt <> text "][]" +inlineToMarkdown (Link txt (Ref ref)) = char '[' <> inlineListToMarkdown txt <> char ']' <> + char '[' <> inlineListToMarkdown ref <> char ']' +inlineToMarkdown (Image alternate (Src source tit)) = + let alt = if (null alternate) || (alternate == [Str ""]) then + text "image" + else + inlineListToMarkdown alternate in + char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <> + (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')' +inlineToMarkdown (Image alternate (Ref [])) = + char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' +inlineToMarkdown (Image alternate (Ref ref)) = + char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' <> + char '[' <> inlineListToMarkdown ref <> char ']' +inlineToMarkdown (NoteRef ref) = char '^' <> char '(' <> text (escapeString ref) <> char ')' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs new file mode 100644 index 000000000..37d895336 --- /dev/null +++ b/src/Text/Pandoc/Writers/RST.hs @@ -0,0 +1,188 @@ +-- | Converts Pandoc to reStructuredText. +module Text.Pandoc.Writers.RST ( + writeRST + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import List ( nubBy ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- | Convert Pandoc to reStructuredText. +writeRST :: WriterOptions -> Pandoc -> String +writeRST options (Pandoc meta blocks) = + let (main, refs) = unzip $ map (blockToRST (writerTabStop options)) + (reformatBlocks $ replaceReferenceLinks blocks) + top = if (writerStandalone options) then + (metaToRST meta) $$ text (writerHeader options) + else + empty in + let refs' = nubBy (\x y -> (render x) == (render y)) refs in -- remove duplicate keys + let body = text (writerIncludeBefore options) <> + vcat main $$ text (writerIncludeAfter options) in + render $ top <> body $$ vcat refs' + +-- | Escape special RST characters. +escapeString :: String -> String +escapeString = backslashEscape "`\\|*_" + +-- | Convert list of inline elements into one 'Doc' of wrapped text and another +-- containing references. +wrappedRST :: [Inline] -> (Doc, Doc) +wrappedRST lst = + let words = splitBySpace lst in + (fsep $ map (fcat . (map (fst . inlineToRST))) words, vcat (map (snd . inlineToRST) lst)) + +-- | Remove reference keys, and make sure there are blanks before each list. +reformatBlocks :: [Block] -> [Block] +reformatBlocks [] = [] +reformatBlocks ((Plain x):(OrderedList y):rest) = + (Para x):(reformatBlocks ((OrderedList y):rest)) +reformatBlocks ((Plain x):(BulletList y):rest) = (Para x):(reformatBlocks ((BulletList y):rest)) +reformatBlocks ((OrderedList x):rest) = + (OrderedList (map reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((BulletList x):rest) = (BulletList (map reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((BlockQuote x):rest) = (BlockQuote (reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((Note ref x):rest) = (Note ref (reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest +reformatBlocks (x:rest) = x:(reformatBlocks rest) + +-- | Convert bibliographic information to 'Doc'. +metaToRST :: Meta -> Doc +metaToRST (Meta title authors date) = + (titleToRST title) <> (authorsToRST authors) <> (dateToRST date) + +-- | Convert title to 'Doc'. +titleToRST :: [Inline] -> Doc +titleToRST [] = empty +titleToRST lst = + let title = fst $ inlineListToRST lst in + let titleLength = length $ render title in + let border = text (replicate titleLength '=') in + border <> char '\n' <> title <> char '\n' <> border <> text "\n\n" + +-- | Convert author list to 'Doc'. +authorsToRST :: [String] -> Doc +authorsToRST [] = empty +authorsToRST (first:rest) = text ":Author: " <> text first <> char '\n' <> (authorsToRST rest) + +-- | Convert date to 'Doc'. +dateToRST :: String -> Doc +dateToRST [] = empty +dateToRST str = text ":Date: " <> text (escapeString str) <> char '\n' + +-- | Convert Pandoc block element to a 'Doc' containing the main text and +-- another one containing any references. +blockToRST :: Int -- ^ tab stop + -> Block -- ^ block element to convert + -> (Doc, Doc) -- ^ first element is text, second is references for end of file +blockToRST tabStop Blank = (text "\n", empty) +blockToRST tabStop Null = (empty, empty) +blockToRST tabStop (Plain lst) = wrappedRST lst +blockToRST tabStop (Para [TeX str]) = -- raw latex block + let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in + (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty) +blockToRST tabStop (Para lst) = ((fst $ wrappedRST lst) <> (text "\n"), snd $ wrappedRST lst) +blockToRST tabStop (BlockQuote lst) = + let (main, refs) = unzip $ map (blockToRST tabStop) lst in + ((nest tabStop $ vcat $ main) <> text "\n", vcat refs) +blockToRST tabStop (Note ref blocks) = + let (main, refs) = unzip $ map (blockToRST tabStop) blocks in + ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), vcat refs) +blockToRST tabStop (Key txt (Src src tit)) = + (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here +blockToRST tabStop (CodeBlock str) = + (hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n")))), empty) +blockToRST tabStop (RawHtml str) = + let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in + (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty) +blockToRST tabStop (BulletList lst) = + let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in + (vcat main <> text "\n", vcat refs) +blockToRST tabStop (OrderedList lst) = + let (main, refs) = + unzip $ zipWith (orderedListItemToRST tabStop) (enumFromTo 1 (length lst)) lst in + (vcat main <> text "\n", vcat refs) +blockToRST tabStop HorizontalRule = (text "--------------\n", empty) +blockToRST tabStop (Header level lst) = + let (headerText, refs) = inlineListToRST lst in + let headerLength = length $ render headerText in + let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in + let border = text $ replicate headerLength headerChar in + (headerText <> char '\n' <> border <> char '\n', refs) + +-- | Convert bullet list item (list of blocks) to reStructuredText. +-- Returns a pair of 'Doc', the first the main text, the second references +bulletListItemToRST :: Int -- ^ tab stop + -> [Block] -- ^ list item (list of blocks) + -> (Doc, Doc) +bulletListItemToRST tabStop list = + let (main, refs) = unzip $ map (blockToRST tabStop) list in + (hang (text "- ") tabStop (vcat main), (vcat refs)) + +-- | Convert an ordered list item (list of blocks) to reStructuredText. +-- Returns a pair of 'Doc', the first the main text, the second references +orderedListItemToRST :: Int -- ^ tab stop + -> Int -- ^ ordinal number of list item + -> [Block] -- ^ list item (list of blocks) + -> (Doc, Doc) +orderedListItemToRST tabStop num list = + let (main, refs) = unzip $ map (blockToRST tabStop) list + spacer = if (length (show num) < 2) then " " else "" in + (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs)) + +-- | Convert a list of inline elements to reStructuredText. +-- Returns a pair of 'Doc', the first the main text, the second references. +inlineListToRST :: [Inline] -> (Doc, Doc) +inlineListToRST lst = let (main, refs) = unzip $ map inlineToRST lst in + (hcat main, hcat refs) + +-- | Convert an inline element to reStructuredText. +-- Returns a pair of 'Doc', the first the main text, the second references. +inlineToRST :: Inline -> (Doc, Doc) -- second Doc is list of refs for end of file +inlineToRST (Emph lst) = let (main, refs) = inlineListToRST lst in + (text "*" <> main <> text "*", refs) +inlineToRST (Strong lst) = let (main, refs) = inlineListToRST lst in + (text "**" <> main <> text "**", refs) +inlineToRST (Code str) = (text $ "``" ++ str ++ "``", empty) +inlineToRST (Str str) = (text $ escapeString str, empty) +inlineToRST (TeX str) = (text str, empty) +inlineToRST (HtmlInline str) = (empty, empty) +inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks +inlineToRST Space = (char ' ', empty) +-- +-- Note: can assume reference links have been replaced where possible with explicit links. +-- +inlineToRST (Link txt (Src src tit)) = + let (linktext, ref') = if (null txt) || (txt == [Str ""]) then + (text "link", empty) + else + inlineListToRST $ normalizeSpaces txt in + let link = char '`' <> linktext <> text "`_" + linktext' = render linktext in + let linktext'' = if (':' `elem` linktext') then "`" ++ linktext' ++ "`" else linktext' in + let ref = text ".. _" <> text linktext'' <> text ": " <> text src in + (link, ref' $$ ref) +inlineToRST (Link txt (Ref [])) = + let (linktext, refs) = inlineListToRST txt in + (char '[' <> linktext <> char ']', refs) +inlineToRST (Link txt (Ref ref)) = + let (linktext, refs1) = inlineListToRST txt + (reftext, refs2) = inlineListToRST ref in + (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2) +inlineToRST (Image alternate (Src source tit)) = + let (alt, ref') = if (null alternate) || (alternate == [Str ""]) then + (text "image", empty) + else + inlineListToRST $ normalizeSpaces alternate in + let link = char '|' <> alt <> char '|' in + let ref = text ".. " <> link <> text " image:: " <> text source in + (link, ref' $$ ref) +inlineToRST (Image alternate (Ref [])) = + let (alttext, refs) = inlineListToRST alternate in + (char '|' <> alttext <> char '|', refs) +-- The following case won't normally occur... +inlineToRST (Image alternate (Ref ref)) = + let (alttext, refs1) = inlineListToRST alternate + (reftext, refs2) = inlineListToRST ref in + (char '|' <> alttext <> char '|', refs1 $$ refs2) +inlineToRST (NoteRef ref) = (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty) diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs new file mode 100644 index 000000000..64f17cc74 --- /dev/null +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -0,0 +1,194 @@ +-- | Convert Pandoc to rich text format. +module Text.Pandoc.Writers.RTF ( + writeRTF + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import List ( isSuffixOf ) +import Char ( ord, chr ) + +-- | Convert Pandoc to a string in rich text format. +writeRTF :: WriterOptions -> Pandoc -> String +writeRTF options (Pandoc meta blocks) = + let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level + let head = if writerStandalone options then + rtfHeader notes (writerHeader options) meta + else + "" + foot = if writerStandalone options then "\n}\n" else "" + body = (writerIncludeBefore options) ++ + (concatMap (blockToRTF notes 0) (replaceReferenceLinks blocks)) ++ + (writerIncludeAfter options) in + head ++ body ++ foot + +-- | Convert unicode characters (> 127) into rich text format representation. +handleUnicode :: String -> String +handleUnicode [] = [] +handleUnicode (c:cs) = if (ord c) > 127 then + '\\':'u':(show (ord c)) ++ "?" ++ (handleUnicode cs) + else + c:(handleUnicode cs) + +escapeSpecial = backslashEscape "{\\}" +escapeTab = gsub "\\\\t" "\\\\tab " + +-- | Escape strings as needed for rich text format. +stringToRTF :: String -> String +stringToRTF = handleUnicode . escapeSpecial . escapeTab + +-- | Escape raw LaTeX strings for RTF. Don't escape \t; it might +-- be the first letter of a command! +latexStringToRTF :: String -> String +latexStringToRTF = handleUnicode . escapeSpecial + +-- | Escape things as needed for code block in RTF. +codeStringToRTF :: String -> String +codeStringToRTF str = joinWithSep "\\line\n" (lines (stringToRTF str)) + +-- | Deal with raw LaTeX. +latexToRTF :: String -> String +latexToRTF str = "{\\cf1 " ++ (latexStringToRTF str) ++ "\\cf0 } " + +-- | Make a paragraph with first-line indent, block indent, and space after. +rtfParSpaced :: Int -- ^ space after (in twips) + -> Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> String -- ^ string with content + -> String +rtfParSpaced spaceAfter indent firstLineIndent content = + "{\\pard \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + +-- | Default paragraph. +rtfPar :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> String -- ^ string with content + -> String +rtfPar = rtfParSpaced 180 + +-- | Compact paragraph (e.g. for compact list items). +rtfCompact :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> String -- ^ string with content + -> String +rtfCompact = rtfParSpaced 0 + +-- number of twips to indent +indentIncrement = 720 +listIncrement = 360 + +-- | Returns appropriate bullet list marker for indent level. +bulletMarker :: Int -> String +bulletMarker indent = case (indent `mod` 720) of + 0 -> "\\bullet " + otherwise -> "\\endash " + +-- | Returns appropriate (list of) ordered list markers for indent level. +orderedMarkers :: Int -> [String] +orderedMarkers indent = case (indent `mod` 720) of + 0 -> map (\x -> show x ++ ".") [1..] + otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z'] + +-- | Returns RTF header. +rtfHeader :: [Block] -- ^ list of note blocks + -> String -- ^ header text + -> Meta -- ^ bibliographic information + -> String +rtfHeader notes headerText (Meta title authors date) = + let titletext = if null title then + "" + else + rtfPar 0 0 ("\\qc \\b \\fs36 " ++ inlineListToRTF notes title) + authorstext = if null authors then + "" + else + rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" (map stringToRTF authors))) + datetext = if date == "" then "" else rtfPar 0 0 ("\\qc " ++ stringToRTF date) in + let spacer = if null (titletext ++ authorstext ++ datetext) then "" else rtfPar 0 0 "" in + headerText ++ titletext ++ authorstext ++ datetext ++ spacer + +-- | Convert Pandoc block element to RTF. +blockToRTF :: [Block] -- ^ list of note blocks + -> Int -- ^ indent level + -> Block -- ^ block to convert + -> String +blockToRTF notes indent Blank = rtfPar indent 0 "" +blockToRTF notes indent Null = "" +blockToRTF notes indent (Plain lst) = rtfCompact indent 0 (inlineListToRTF notes lst) +blockToRTF notes indent (Para lst) = rtfPar indent 0 (inlineListToRTF notes lst) +blockToRTF notes indent (BlockQuote lst) = + concatMap (blockToRTF notes (indent + indentIncrement)) lst +blockToRTF notes indent (Note ref lst) = "" -- there shouldn't be any after filtering +blockToRTF notes indent (Key _ _) = "" +blockToRTF notes indent (CodeBlock str) = rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str)) +blockToRTF notes indent (RawHtml str) = "" +blockToRTF notes indent (BulletList lst) = + spaceAtEnd $ concatMap (listItemToRTF notes indent (bulletMarker indent)) lst +blockToRTF notes indent (OrderedList lst) = + spaceAtEnd $ concat $ zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst +blockToRTF notes indent HorizontalRule = + rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash" +blockToRTF notes indent (Header level lst) = + rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ + (inlineListToRTF notes lst)) + +-- | Ensure that there's the same amount of space after compact +-- lists as after regular lists. +spaceAtEnd :: String -> String +spaceAtEnd str = + if isSuffixOf "\\par}\n" str then + (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + else + str + +-- | Convert list item (list of blocks) to RTF. +listItemToRTF :: [Block] -- ^ list of note blocks + -> Int -- ^ indent level + -> String -- ^ list start marker + -> [Block] -- ^ list item (list of blocks) + -> [Char] +listItemToRTF notes indent marker [] = + rtfCompact (indent + listIncrement) (0 - listIncrement) + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") +listItemToRTF notes indent marker list = + let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in + let modFirst = gsub "\\\\fi-?[0-9]+" ("\\\\fi" ++ (show (0 - listIncrement)) ++ + " " ++ marker ++ "\\\\tx" ++ (show listIncrement) ++ "\\\\tab") first in + modFirst ++ (concat rest) + +-- | Convert list of inline items to RTF. +inlineListToRTF :: [Block] -- ^ list of note blocks + -> [Inline] -- ^ list of inlines to convert + -> String +inlineListToRTF notes lst = concatMap (inlineToRTF notes) lst + +-- | Convert inline item to RTF. +inlineToRTF :: [Block] -- ^ list of note blocks + -> Inline -- ^ inline to convert + -> String +inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} " +inlineToRTF notes (Strong lst) = "{\\b " ++ (inlineListToRTF notes lst) ++ "} " +inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " +inlineToRTF notes (Str str) = stringToRTF str +inlineToRTF notes (TeX str) = latexToRTF str +inlineToRTF notes (HtmlInline str) = "" +inlineToRTF notes (LineBreak) = "\\line " +inlineToRTF notes Space = " " +inlineToRTF notes (Link text (Src src tit)) = + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n" + ++ (inlineListToRTF notes text) ++ "\n}}}\n" +inlineToRTF notes (Link text (Ref [])) = "[" ++ (inlineListToRTF notes text) ++ "]" +inlineToRTF notes (Link text (Ref ref)) = "[" ++ (inlineListToRTF notes text) ++ "][" ++ + (inlineListToRTF notes ref) ++ "]" -- this is what markdown does, for better or worse +inlineToRTF notes (Image alternate (Src source tit)) = "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF notes (Image alternate (Ref [])) = "![" ++ (inlineListToRTF notes alternate) ++ "]" +inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ (inlineListToRTF notes alternate) ++ + "][" ++ (inlineListToRTF notes ref) ++ "]" +inlineToRTF [] (NoteRef ref) = "" +inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) = + if firstref == ref then + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + (concatMap (blockToRTF rest 0) firstblocks) ++ "}" + else + inlineToRTF rest (NoteRef ref) + diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs new file mode 100644 index 000000000..2d0b913a3 --- /dev/null +++ b/src/Text/Pandoc/Writers/S5.hs @@ -0,0 +1,95 @@ +---------------------------------------------------- +-- Do not edit this file by hand. Edit +-- 'templates/S5.hs' +-- and run ./fillTemplates.pl Text/Pandoc/Writers/S5.hs +---------------------------------------------------- + +-- | Definitions for creation of S5 powerpoint-like HTML. +-- (See .) +module Text.Pandoc.Writers.S5 ( + -- * Strings + s5Javascript, + s5CSS, + s5Links, + -- * Functions + writeS5, + insertS5Structure + ) where +import Text.Pandoc.Shared ( joinWithSep, WriterOptions ) +import Text.Pandoc.Writers.HTML ( writeHtml ) +import Text.Pandoc.Definition + +s5Javascript :: String +s5Javascript = "\n" + +s5CoreCSS :: String +s5CoreCSS = "/* Do not edit or override these styles! The system will likely break if you do. */\n\ndiv#header, div#footer, div#controls, .slide {position: absolute;}\nhtml>body div#header, html>body div#footer, \n html>body div#controls, html>body .slide {position: fixed;}\n.handout {display: none;}\n.layout {display: block;}\n.slide, .hideme, .incremental {visibility: hidden;}\n#slide0 {visibility: visible;}\n" + +s5FramingCSS :: String +s5FramingCSS = "/* The following styles size, place, and layer the slide components.\n Edit these if you want to change the overall slide layout.\n The commented lines can be uncommented (and modified, if necessary) \n to help you with the rearrangement process. */\n\n/* target = 1024x768 */\n\ndiv#header, div#footer, .slide {width: 100%; top: 0; left: 0;}\ndiv#header {top: 0; height: 3em; z-index: 1;}\ndiv#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;}\n.slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2; list-style: none;}\ndiv#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;}\ndiv#controls form {position: absolute; bottom: 0; right: 0; width: 100%;\n margin: 0;}\n#currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;}\nhtml>body #currentSlide {position: fixed;}\n\n/*\ndiv#header {background: #FCC;}\ndiv#footer {background: #CCF;}\ndiv#controls {background: #BBD;}\ndiv#currentSlide {background: #FFC;}\n*/\n" + +s5PrettyCSS :: String +s5PrettyCSS = "/* Following are the presentation styles -- edit away! */\n\nbody {background: #FFF url(bodybg.gif) -16px 0 no-repeat; color: #000; font-size: 2em;}\n:link, :visited {text-decoration: none; color: #00C;}\n#controls :active {color: #88A !important;}\n#controls :focus {outline: 1px dotted #227;}\nh1, h2, h3, h4 {font-size: 100%; margin: 0; padding: 0; font-weight: inherit;}\nul, pre {margin: 0; line-height: 1em;}\nhtml, body {margin: 0; padding: 0;}\n\nblockquote, q {font-style: italic;}\nblockquote {padding: 0 2em 0.5em; margin: 0 1.5em 0.5em; text-align: center; font-size: 1em;}\nblockquote p {margin: 0;}\nblockquote i {font-style: normal;}\nblockquote b {display: block; margin-top: 0.5em; font-weight: normal; font-size: smaller; font-style: normal;}\nblockquote b i {font-style: italic;}\n\nkbd {font-weight: bold; font-size: 1em;}\nsup {font-size: smaller; line-height: 1px;}\n\n.slide code {padding: 2px 0.25em; font-weight: bold; color: #533;}\n.slide code.bad, code del {color: red;}\n.slide code.old {color: silver;}\n.slide pre {padding: 0; margin: 0.25em 0 0.5em 0.5em; color: #533; font-size: 90%;}\n.slide pre code {display: block;}\n.slide ul {margin-left: 5%; margin-right: 7%; list-style: disc;}\n.slide li {margin-top: 0.75em; margin-right: 0;}\n.slide ul ul {line-height: 1;}\n.slide ul ul li {margin: .2em; font-size: 85%; list-style: square;}\n.slide img.leader {display: block; margin: 0 auto;}\n\ndiv#header, div#footer {background: #005; color: #AAB;\n font-family: Verdana, Helvetica, sans-serif;}\ndiv#header {background: #005 url(bodybg.gif) -16px 0 no-repeat;\n line-height: 1px;}\ndiv#footer {font-size: 0.5em; font-weight: bold; padding: 1em 0;}\n#footer h1, #footer h2 {display: block; padding: 0 1em;}\n#footer h2 {font-style: italic;}\n\ndiv.long {font-size: 0.75em;}\n.slide h1 {position: absolute; top: 0.7em; left: 87px; z-index: 1;\n margin: 0; padding: 0.3em 0 0 50px; white-space: nowrap;\n font: bold 150%/1em Helvetica, sans-serif; text-transform: capitalize;\n color: #DDE; background: #005;}\n.slide h3 {font-size: 130%;}\nh1 abbr {font-variant: small-caps;}\n\ndiv#controls {position: absolute; left: 50%; bottom: 0;\n width: 50%;\n text-align: right; font: bold 0.9em Verdana, Helvetica, sans-serif;}\nhtml>body div#controls {position: fixed; padding: 0 0 1em 0;\n top: auto;}\ndiv#controls form {position: absolute; bottom: 0; right: 0; width: 100%;\n margin: 0; padding: 0;}\n#controls #navLinks a {padding: 0; margin: 0 0.5em; \n background: #005; border: none; color: #779; \n cursor: pointer;}\n#controls #navList {height: 1em;}\n#controls #navList #jumplist {position: absolute; bottom: 0; right: 0; background: #DDD; color: #227;}\n\n#currentSlide {text-align: center; font-size: 0.5em; color: #449;}\n\n#slide0 {padding-top: 3.5em; font-size: 90%;}\n#slide0 h1 {position: static; margin: 1em 0 0; padding: 0;\n font: bold 2em Helvetica, sans-serif; white-space: normal;\n color: #000; background: transparent;}\n#slide0 h2 {font: bold italic 1em Helvetica, sans-serif; margin: 0.25em;}\n#slide0 h3 {margin-top: 1.5em; font-size: 1.5em;}\n#slide0 h4 {margin-top: 0; font-size: 1em;}\n\nul.urls {list-style: none; display: inline; margin: 0;}\n.urls li {display: inline; margin: 0;}\n.note {display: none;}\n.external {border-bottom: 1px dotted gray;}\nhtml>body .external {border-bottom: none;}\n.external:after {content: \" \\274F\"; font-size: smaller; color: #77B;}\n\n.incremental, .incremental *, .incremental *:after {color: #DDE; visibility: visible;}\nimg.incremental {visibility: hidden;}\n.slide .current {color: #B02;}\n\n\n/* diagnostics\n\nli:after {content: \" [\" attr(class) \"]\"; color: #F88;}\n */" + +s5OperaCSS :: String +s5OperaCSS = "/* DO NOT CHANGE THESE unless you really want to break Opera Show */\n.slide {\n\tvisibility: visible !important;\n\tposition: static !important;\n\tpage-break-before: always;\n}\n#slide0 {page-break-before: avoid;}\n" + +s5OutlineCSS :: String +s5OutlineCSS = "/* don't change this unless you want the layout stuff to show up in the outline view! */\n\n.layout div, #footer *, #controlForm * {display: none;}\n#footer, #controls, #controlForm, #navLinks, #toggle {\n display: block; visibility: visible; margin: 0; padding: 0;}\n#toggle {float: right; padding: 0.5em;}\nhtml>body #toggle {position: fixed; top: 0; right: 0;}\n\n/* making the outline look pretty-ish */\n\n#slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;}\n#slide0 h1 {padding-top: 1.5em;}\n.slide h1 {margin: 1.5em 0 0; padding-top: 0.25em;\n border-top: 1px solid #888; border-bottom: 1px solid #AAA;}\n#toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;}\n" + +s5PrintCSS :: String +s5PrintCSS = "/* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */ .slide, ul {page-break-inside: avoid; visibility: visible !important;} h1 {page-break-after: avoid;} body {font-size: 12pt; background: white;} * {color: black;} #slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;} #slide0 h3 {margin: 0; padding: 0;} #slide0 h4 {margin: 0 0 0.5em; padding: 0;} #slide0 {margin-bottom: 3em;} h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;} .extra {background: transparent !important;} div.extra, pre.extra, .example {font-size: 10pt; color: #333;} ul.extra a {font-weight: bold;} p.example {display: none;} #header {display: none;} #footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;} #footer h2, #controls {display: none;} /* The following rule keeps the layout stuff out of print. Remove at your own risk! */ .layout, .layout * {display: none !important;} " + +s5CSS :: String +s5CSS = "\n\n\n\n" + +s5Links :: String +s5Links = "\n\n\n\n\n\n\n" + +-- | Converts 'Pandoc' to an S5 HTML presentation. +writeS5 :: WriterOptions -> Pandoc -> String +writeS5 options = writeHtml options . insertS5Structure + +-- | Inserts HTML needed for an S5 presentation (e.g. around slides). +layoutDiv :: [Inline] -- ^ Title of document (for header or footer) + -> String -- ^ Date of document (for header or footer) + -> [Block] -- ^ List of block elements returned +layoutDiv title date = [(RawHtml "
    \n
    \n
    \n
    \n
    \n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "
    \n
    \n")] + +presentationStart = (RawHtml "
    \n\n") + +presentationEnd = (RawHtml "
    \n") + +slideStart = (RawHtml "
    \n") + +slideEnd = (RawHtml "
    \n") + +-- | Returns 'True' if block is a Header 1. +isH1 :: Block -> Bool +isH1 (Header 1 _) = True +isH1 _ = False + +-- | Insert HTML around sections to make individual slides. +insertSlides :: Bool -> [Block] -> [Block] +insertSlides beginning blocks = + let (beforeHead, rest) = break isH1 blocks in + if (null rest) then + if beginning then + beforeHead + else + beforeHead ++ [slideEnd] + else + if beginning then + beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest)) + else + beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) + +-- | Insert blocks into 'Pandoc' for slide structure. +insertS5Structure :: Pandoc -> Pandoc +insertS5Structure (Pandoc meta []) = Pandoc meta [] +insertS5Structure (Pandoc (Meta title authors date) blocks) = + let slides = insertSlides True blocks + firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in + let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in + Pandoc (Meta title authors date) newBlocks + -- cgit v1.2.3