diff options
author | dr@jones.dk <dr@jones.dk> | 2010-03-22 12:40:10 +0100 |
---|---|---|
committer | dr@jones.dk <dr@jones.dk> | 2010-03-22 12:40:10 +0100 |
commit | 96d4f941026a8eca3ba211facdc8ce66b2ab38bb (patch) | |
tree | aae68ec157e85fe9590d1dd5216fc6b7916e08d3 /src/Text | |
parent | 789d0772d8b5d9c066fb8624bd51576cbde5e30b (diff) |
Imported Upstream version 1.5.0.1
Diffstat (limited to 'src/Text')
26 files changed, 1420 insertions, 1124 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 390c27765..ec2dc19f5 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -36,14 +36,16 @@ inline links: > module Main where > import Text.Pandoc -> import qualified System.IO.UTF8 as U +> -- include the following two lines only if you're using ghc < 6.12: +> import Prelude hiding (getContents, putStrLn) +> import System.IO.UTF8 > > markdownToRST :: String -> String > markdownToRST = > (writeRST defaultWriterOptions {writerReferenceLinks = True}) . > readMarkdown defaultParserState > -> main = U.getContents >>= U.putStrLn . markdownToRST +> main = getContents >>= putStrLn . markdownToRST Note: all of the readers assume that the input text has @'\n'@ line endings. So if you get your input text from a web form, @@ -70,6 +72,7 @@ module Text.Pandoc , HeaderType (..) -- * Writers: converting /from/ Pandoc format , writeMarkdown + , writePlain , writeRST , writeLaTeX , writeConTeXt @@ -88,8 +91,8 @@ module Text.Pandoc , WriterOptions (..) , HTMLMathMethod (..) , defaultWriterOptions - -- * Default headers for various output formats - , module Text.Pandoc.DefaultHeaders + -- * Rendering templates and default templates + , module Text.Pandoc.Templates -- * Version , pandocVersion ) where @@ -111,7 +114,7 @@ import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki -import Text.Pandoc.DefaultHeaders +import Text.Pandoc.Templates import Text.Pandoc.Shared import Data.Version (showVersion) import Paths_pandoc (version) diff --git a/src/Text/Pandoc/DefaultHeaders.hs b/src/Text/Pandoc/DefaultHeaders.hs deleted file mode 100644 index e9c1f17e5..000000000 --- a/src/Text/Pandoc/DefaultHeaders.hs +++ /dev/null @@ -1,69 +0,0 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.DefaultHeaders - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Default headers for Pandoc writers. --} -module Text.Pandoc.DefaultHeaders ( - defaultLaTeXHeader, - defaultConTeXtHeader, - defaultDocbookHeader, - defaultOpenDocumentHeader, - defaultS5Header, - defaultRTFHeader - ) where -import Text.Pandoc.Writers.S5 -import System.FilePath ( (</>) ) -import Text.Pandoc.TH ( contentsOf ) - -defaultLaTeXHeader :: String -#ifndef __HADDOCK__ -defaultLaTeXHeader = $(contentsOf $ "data" </> "headers" </> "LaTeX.header") -#endif - -defaultConTeXtHeader :: String -#ifndef __HADDOCK__ -defaultConTeXtHeader = $(contentsOf $ "data" </> "headers" </> "ConTeXt.header") -#endif - -defaultDocbookHeader :: String -#ifndef __HADDOCK__ -defaultDocbookHeader = $(contentsOf $ "data" </> "headers" </> "Docbook.header") -#endif - -defaultOpenDocumentHeader :: String -#ifndef __HADDOCK__ -defaultOpenDocumentHeader = $(contentsOf $ "data" </> "headers" </> "OpenDocument.header") -#endif - -defaultS5Header :: String -defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript - -defaultRTFHeader :: String -#ifndef __HADDOCK__ -defaultRTFHeader = $(contentsOf $ "data" </> "headers" </> "RTF.header") -#endif diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 94183c500..02bf5efbb 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -35,11 +35,10 @@ import Data.Generics data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data) --- | Bibliographic information for the document: title (list of 'Inline'), --- authors (list of strings), date (string). -data Meta = Meta [Inline] -- title - [String] -- authors - String -- date +-- | Bibliographic information for the document: title, authors, date. +data Meta = Meta { docTitle :: [Inline] + , docAuthors :: [[Inline]] + , docDate :: [Inline] } deriving (Eq, Show, Read, Typeable, Data) -- | Alignment of a table column. diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 457e605a5..f29106262 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {- Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> @@ -34,7 +33,7 @@ import Text.XHtml import Text.Pandoc.Definition #ifdef _HIGHLIGHTING import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss, languagesByExtension ) -import Data.List (find, lookup) +import Data.List (find) import Data.Maybe (fromMaybe) import Data.Char (toLower) diff --git a/src/Text/Pandoc/LaTeXMathML.hs b/src/Text/Pandoc/LaTeXMathML.hs deleted file mode 100644 index 1eb3c23cc..000000000 --- a/src/Text/Pandoc/LaTeXMathML.hs +++ /dev/null @@ -1,14 +0,0 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} --- | Definitions for use of LaTeXMathML in HTML. --- (See <http://math.etsu.edu/LaTeXMathML/>) -module Text.Pandoc.LaTeXMathML ( latexMathMLScript ) where -import Text.Pandoc.TH ( contentsOf ) -import System.FilePath ( (</>) ) - --- | String containing LaTeXMathML javascript. -latexMathMLScript :: String -#ifndef __HADDOCK__ -latexMathMLScript = "<script type=\"text/javascript\">\n" ++ - $(contentsOf $ "data" </> "LaTeXMathML.js.comment") ++ - $(contentsOf $ "data" </> "LaTeXMathML.js.packed") ++ "</script>\n" -#endif diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs index f9e4dd8f1..6d602fb2a 100644 --- a/src/Text/Pandoc/ODT.hs +++ b/src/Text/Pandoc/ODT.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE TemplateHaskell #-} {- Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> @@ -29,7 +28,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Functions for producing an ODT file from OpenDocument XML. -} module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Text.Pandoc.TH ( makeZip ) import Data.List ( find ) import System.FilePath ( (</>), takeFileName ) import qualified Data.ByteString.Lazy as B @@ -39,14 +37,30 @@ import Codec.Archive.Zip import Control.Applicative ( (<$>) ) import Text.ParserCombinators.Parsec import System.Time +import Paths_pandoc ( getDataFileName ) +import System.Directory +import Control.Monad (liftM) -- | Produce an ODT file from OpenDocument XML. -saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. - -> FilePath -- ^ Relative directory of source file. - -> String -- ^ OpenDocument XML contents. +saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory + -> FilePath -- ^ Pathname of ODT file to be produced + -> FilePath -- ^ Relative directory of source file + -> Maybe FilePath -- ^ Path specified by --reference-odt + -> String -- ^ OpenDocument XML contents -> IO () -saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do - let refArchive = read $(makeZip $ "data" </> "odt-styles") +saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do + refArchive <- liftM toArchive $ + case mbRefOdt of + Just f -> B.readFile f + Nothing -> do + let defaultODT = getDataFileName "reference.odt" >>= B.readFile + case datadir of + Nothing -> defaultODT + Just d -> do + exists <- doesFileExist (d </> "reference.odt") + if exists + then B.readFile (d </> "reference.odt") + else defaultODT -- handle pictures let (newContents, pics) = case runParser pPictures [] "OpenDocument XML contents" xml of diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e6ca05d87..2e5473992 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -47,9 +47,10 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) -import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate ) +import Data.List ( isPrefixOf, isSuffixOf, intercalate ) import Data.Char ( toLower, isAlphaNum ) import Network.URI ( parseURIReference, URI (..) ) +import Control.Monad ( liftM ) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ParserState -- ^ Parser state @@ -71,7 +72,7 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", "s", "samp", "select", "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] ++ eitherBlockOrInline + "textarea", "tt", "u", "var"] -} blockHtmlTags :: [[Char]] @@ -80,7 +81,7 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", "h5", "h6", "head", "hr", "html", "isindex", "menu", "noframes", "noscript", "ol", "p", "pre", "table", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script"] ++ eitherBlockOrInline + "th", "thead", "tr", "script", "style"] sanitaryTags :: [[Char]] sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", @@ -112,6 +113,40 @@ sanitaryAttributes = ["abbr", "accept", "accept-charset", "summary", "tabindex", "target", "title", "type", "usemap", "valign", "value", "vspace", "width"] +-- taken from HXT and extended + +closes :: String -> String -> Bool +"EOF" `closes` _ = True +_ `closes` "body" = False +_ `closes` "html" = False +"a" `closes` "a" = True +"li" `closes` "li" = True +"th" `closes` t | t `elem` ["th","td"] = True +"td" `closes` t | t `elem` ["th","td"] = True +"tr" `closes` t | t `elem` ["th","td","tr"] = True +"dt" `closes` t | t `elem` ["dt","dd"] = True +"dd" `closes` t | t `elem` ["dt","dd"] = True +"hr" `closes` "p" = True +"p" `closes` "p" = True +"meta" `closes` "meta" = True +"colgroup" `closes` "colgroup" = True +"form" `closes` "form" = True +"label" `closes` "label" = True +"map" `closes` "map" = True +"object" `closes` "object" = True +_ `closes` t | t `elem` ["option","style","script","textarea","title"] = True +t `closes` "select" | t /= "option" = True +"thead" `closes` t | t `elem` ["colgroup"] = True +"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True +"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True +t `closes` t2 | + t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && + t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" +t1 `closes` t2 | + t1 `elem` blockHtmlTags && + t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True +_ `closes` _ = False + -- -- HTML utility functions -- @@ -176,6 +211,19 @@ extractTagType ('<':rest) = map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest extractTagType _ = "" +-- Parse any HTML tag (opening or self-closing) and return tag type +anyOpener :: GenParser Char ParserState [Char] +anyOpener = try $ do + char '<' + spaces + tag <- many1 alphaNum + skipMany htmlAttribute + spaces + option "" (string "/") + spaces + char '>' + return $ map toLower tag + -- | Parse any HTML tag (opening or self-closing) and return text of tag anyHtmlTag :: GenParser Char ParserState [Char] anyHtmlTag = try $ do @@ -257,32 +305,30 @@ htmlRegularAttribute = try $ do (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), (do - a <- many (alphaNum <|> (oneOf "-._:")) + a <- many (noneOf " \t\n\r\"'<>") return (a,"")) ] return (name, content, (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) -- | Parse an end tag of type 'tag' -htmlEndTag :: [Char] -> GenParser Char st [Char] +htmlEndTag :: [Char] -> GenParser Char ParserState [Char] htmlEndTag tag = try $ do - char '<' - spaces - char '/' - spaces - stringAnyCase tag - spaces - char '>' - return $ "</" ++ tag ++ ">" - -{- --- | Returns @True@ if the tag is (or can be) an inline tag. -isInline :: String -> Bool -isInline tag = (extractTagType tag) `elem` inlineHtmlTags --} + closedByNext <- lookAhead $ option False $ liftM (`closes` tag) $ + anyOpener <|> (eof >> return "EOF") + if closedByNext + then return "" + else do char '<' + spaces + char '/' + spaces + stringAnyCase tag + spaces + char '>' + return $ "</" ++ tag ++ ">" -- | Returns @True@ if the tag is (or can be) a block tag. isBlock :: String -> Bool -isBlock tag = (extractTagType tag) `elem` blockHtmlTags +isBlock tag = (extractTagType tag) `elem` (blockHtmlTags ++ eitherBlockOrInline) anyHtmlBlockTag :: GenParser Char ParserState [Char] anyHtmlBlockTag = try $ do @@ -298,18 +344,43 @@ anyHtmlInlineTag = try $ do -- Scripts must be treated differently, because they can contain '<>' etc. htmlScript :: GenParser Char ParserState [Char] htmlScript = try $ do - open <- string "<script" - rest <- manyTill anyChar (htmlEndTag "script") + lookAhead $ htmlTag "script" + open <- anyHtmlTag + rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") st <- getState if stateSanitizeHTML st && not ("script" `elem` sanitaryTags) then return "<!-- unsafe HTML removed -->" else return $ open ++ rest ++ "</script>" +scriptChunk :: GenParser Char ParserState [Char] +scriptChunk = jsComment <|> jsString <|> jsChars + where jsComment = jsEndlineComment <|> jsMultilineComment + jsString = jsSingleQuoteString <|> jsDoubleQuoteString + jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar + jsEndlineComment = try $ do + string "//" + res <- manyTill anyChar newline + return ("//" ++ res) + jsMultilineComment = try $ do + string "/*" + res <- manyTill anyChar (try $ string "*/") + return ("/*" ++ res ++ "*/") + jsSingleQuoteString = stringwith '\'' + jsDoubleQuoteString = stringwith '"' + charWithEsc escapable = try $ + (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x]) + <|> count 1 anyChar + stringwith c = try $ do + char c + res <- liftM concat $ manyTill (charWithEsc [c]) (char c) + return (c : (res ++ [c])) + -- | Parses material between style tags. -- Style tags must be treated differently, because they can contain CSS htmlStyle :: GenParser Char ParserState [Char] htmlStyle = try $ do - open <- string "<style" + lookAhead $ htmlTag "style" + open <- anyHtmlTag rest <- manyTill anyChar (htmlEndTag "style") st <- getState if stateSanitizeHTML st && not ("style" `elem` sanitaryTags) @@ -382,7 +453,7 @@ parseTitle = try $ do return contents -- parse header and return meta-information (for now, just title) -parseHead :: GenParser Char ParserState ([Inline], [a], [Char]) +parseHead :: GenParser Char ParserState Meta parseHead = try $ do htmlTag "head" spaces @@ -390,7 +461,7 @@ parseHead = try $ do contents <- option [] parseTitle skipMany nonTitleNonHead htmlEndTag "head" - return (contents, [], "") + return $ Meta contents [] [] skipHtmlTag :: String -> GenParser Char ParserState () skipHtmlTag tag = optional (htmlTag tag) @@ -404,23 +475,28 @@ bodyTitle = try $ do _ -> fail "not title" inlinesTilEnd "h1" +endOfDoc :: GenParser Char ParserState () +endOfDoc = try $ do + spaces + optional (htmlEndTag "body") + spaces + optional (htmlEndTag "html" >> many anyChar) -- ignore stuff after </html> + eof + parseHtml :: GenParser Char ParserState Pandoc parseHtml = do sepEndBy (choice [xmlDec, definition, htmlComment]) spaces + spaces skipHtmlTag "html" spaces - (title, authors, date) <- option ([], [], "") parseHead + meta <- option (Meta [] [] []) parseHead spaces skipHtmlTag "body" spaces optional bodyTitle -- skip title in body, because it's represented in meta blocks <- parseBlocks - spaces - optional (htmlEndTag "body") - spaces - optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html> - eof - return $ Pandoc (Meta title authors date) blocks + endOfDoc + return $ Pandoc meta blocks -- -- parsing blocks @@ -438,6 +514,7 @@ block = choice [ codeBlock , para , plain , rawHtmlBlock' + , notFollowedBy' endOfDoc >> char '<' >> return Null ] <?> "block" -- @@ -524,7 +601,9 @@ orderedList = try $ do _ -> DefaultStyle return (read sta, sty') spaces - items <- sepEndBy1 (blocksIn "li") spaces + -- note: if they have an <ol> or <ul> not in scope of a <li>, + -- treat it as a list item, though it's not valid xhtml... + items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces htmlEndTag "ol" return $ OrderedList (start, style, DefaultDelim) items @@ -532,7 +611,9 @@ bulletList :: GenParser Char ParserState Block bulletList = try $ do htmlTag "ul" spaces - items <- sepEndBy1 (blocksIn "li") spaces + -- note: if they have an <ol> or <ul> not in scope of a <li>, + -- treat it as a list item, though it's not valid xhtml... + items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces htmlEndTag "ul" return $ BulletList items @@ -586,6 +667,7 @@ inline = choice [ charRef , link , image , rawHtmlInline + , char '&' >> return (Str "&") -- common HTML error ] <?> "inline" code :: GenParser Char ParserState Inline @@ -599,7 +681,7 @@ code = try $ do rawHtmlInline :: GenParser Char ParserState Inline rawHtmlInline = do - result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag + result <- anyHtmlInlineTag <|> htmlComment state <- getState if stateParseRaw state then return (HtmlInline result) else return (Str "") @@ -640,7 +722,7 @@ linebreak :: GenParser Char ParserState Inline linebreak = htmlTag "br" >> optional newline >> return LineBreak str :: GenParser Char st Inline -str = many1 (noneOf "<& \t\n") >>= return . Str +str = many1 (noneOf "< \t\n&") >>= return . Str -- -- links and images diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b4c01fe19..f10f0e219 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -39,6 +39,7 @@ import Text.Pandoc.Shared import Data.Maybe ( fromMaybe ) import Data.Char ( chr ) import Data.List ( isPrefixOf, isSuffixOf ) +import Control.Monad ( when ) -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ParserState -- ^ Parser state, including options for parser @@ -169,12 +170,13 @@ header :: GenParser Char ParserState Block header = try $ do char '\\' subs <- many (try (string "sub")) - string "section" + base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4) optional (char '*') + optional $ bracketedText '[' ']' -- alt title char '{' title' <- manyTill inline (char '}') spaces - return $ Header (length subs + 1) (normalizeSpaces title') + return $ Header (length subs + base) (normalizeSpaces title') -- -- hrule block @@ -317,19 +319,19 @@ title = try $ do authors :: GenParser Char ParserState Block authors = try $ do string "\\author{" - authors' <- manyTill anyChar (char '}') + raw <- many1 (notFollowedBy (char '}') >> inline) + let authors' = map normalizeSpaces $ splitBy LineBreak raw + char '}' spaces - let authors'' = map removeLeadingTrailingSpace $ lines $ - substitute "\\\\" "\n" authors' - updateState (\s -> s { stateAuthors = authors'' }) + updateState (\s -> s { stateAuthors = authors' }) return Null date :: GenParser Char ParserState Block date = try $ do string "\\date{" - date' <- manyTill anyChar (char '}') + date' <- manyTill inline (char '}') spaces - updateState (\state -> state { stateDate = date' }) + updateState (\state -> state { stateDate = normalizeSpaces date' }) return Null -- @@ -399,21 +401,22 @@ unknownCommand = try $ do notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", "document"] state <- getState - if stateParserContext state == ListItemState - then notFollowedBy' $ string "\\item" - else return () + when (stateParserContext state == ListItemState) $ + notFollowedBy' (string "\\item") if stateParseRaw state then do (name, star, args) <- command spaces return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)] - else do -- skip unknown command, leaving arguments to be parsed - char '\\' - letter - many (letter <|> digit) - optional (try $ string "{}") + else do + (name, _, args) <- command spaces - return Null + if name `elem` commandsToIgnore + then return Null + else return $ Plain [Str $ concat args] + +commandsToIgnore :: [String] +commandsToIgnore = ["special","pdfannot","pdfstringdef"] -- latex comment comment :: GenParser Char st Block @@ -429,7 +432,6 @@ inline = choice [ str , whitespace , quoted , apostrophe - , spacer , strong , math , ellipses @@ -449,6 +451,7 @@ inline = choice [ str , footnote , linebreak , accentedChar + , nonbreakingSpace , specialChar , rawLaTeXInline , escapedChar @@ -539,7 +542,7 @@ sect = try (string "\\S") >> return (Str [chr 167]) escapedChar :: GenParser Char st Inline escapedChar = do - result <- escaped (oneOf " $%&_#{}\n") + result <- escaped (oneOf specialChars) return $ if result == Str "\n" then Str " " else result -- nonescaped special characters @@ -547,7 +550,15 @@ unescapedChar :: GenParser Char st Inline unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c]) specialChar :: GenParser Char st Inline -specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] +specialChar = choice [ spacer, interwordSpace, + backslash, tilde, caret, + bar, lt, gt, doubleQuote ] + +spacer :: GenParser Char st Inline +spacer = try (string "\\,") >> return (Str "") + +interwordSpace :: GenParser Char st Inline +interwordSpace = try (string "\\ ") >> return (Str "\160") backslash :: GenParser Char st Inline backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\") @@ -664,15 +675,15 @@ strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= return . Strong whitespace :: GenParser Char st Inline -whitespace = many1 (oneOf "~ \t") >> return Space +whitespace = many1 (oneOf " \t") >> return Space + +nonbreakingSpace :: GenParser Char st Inline +nonbreakingSpace = char '~' >> return (Str "\160") -- hard line break linebreak :: GenParser Char st Inline linebreak = try (string "\\\\") >> return LineBreak -spacer :: GenParser Char st Inline -spacer = try (string "\\,") >> return (Str "") - str :: GenParser Char st Inline str = many1 (noneOf specialChars) >>= return . Str @@ -764,15 +775,16 @@ footnote = try $ do -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline = try $ do - notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"] + notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore", + "\\section"] state <- getState if stateParseRaw state then do (name, star, args) <- command return $ TeX ("\\" ++ name ++ star ++ concat args) - else do -- skip unknown command, leaving arguments to be parsed - char '\\' - letter - many (letter <|> digit) - optional (try $ string "{}") - return $ Str "" + else do + (name, _, args) <- command + spaces + if name `elem` commandsToIgnore + then return $ Str "" + else return $ Str (concat args) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0de700537..82c761685 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {- Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> @@ -32,9 +31,9 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate ) +import Data.List ( transpose, isPrefixOf, isSuffixOf, sortBy, findIndex, intercalate ) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, isUpper ) +import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -68,12 +67,16 @@ setextHChars = "=-" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" +specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;" -- -- auxiliary functions -- +-- | Replace spaces with %20 +uriEscapeSpaces :: String -> String +uriEscapeSpaces = substitute " " "%20" + indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState @@ -130,30 +133,37 @@ inlinesInBalancedBrackets parser = try $ do -- titleLine :: GenParser Char ParserState [Inline] -titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline +titleLine = try $ do + char '%' + skipSpaces + res <- many $ (notFollowedBy newline >> inline) + <|> try (endline >> whitespace) + newline + return $ normalizeSpaces res -authorsLine :: GenParser Char st [String] +authorsLine :: GenParser Char ParserState [[Inline]] authorsLine = try $ do char '%' skipSpaces - authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") + authors <- sepEndBy (many (notFollowedBy (oneOf ";\n") >> inline)) + (char ';' <|> + try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors + return $ filter (not . null) $ map normalizeSpaces authors -dateLine :: GenParser Char st String +dateLine :: GenParser Char ParserState [Inline] dateLine = try $ do char '%' skipSpaces - date <- many (noneOf "\n") - newline - return $ decodeCharacterReferences $ removeTrailingSpace date + date <- manyTill inline newline + return $ normalizeSpaces date -titleBlock :: GenParser Char ParserState ([Inline], [String], [Char]) +titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline]) titleBlock = try $ do failIfStrict title <- option [] titleLine author <- option [] authorsLine - date <- option "" dateLine + date <- option [] dateLine optional blanklines return (title, author, date) @@ -175,7 +185,7 @@ parseMarkdown = do let reversedNotes = stateNotes st' updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... - (title, author, date) <- option ([],[],"") titleBlock + (title, author, date) <- option ([],[],[]) titleBlock blocks <- parseBlocks return $ Pandoc (Meta title author date) $ filter (/= Null) blocks @@ -196,7 +206,7 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (lab, (intercalate "+" $ words $ removeTrailingSpace src, tit)) + let newkey = (lab, (uriEscapeSpaces $ removeTrailingSpace src, tit)) st <- getState let oldkeys = stateKeys st updateState $ \s -> s { stateKeys = newkey : oldkeys } @@ -212,8 +222,8 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ decodeCharacterReferences tit -noteMarker :: GenParser Char st [Char] -noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') +noteMarker :: GenParser Char ParserState [Char] +noteMarker = skipNonindentSpaces >> string "[^" >> manyTill (noneOf " \t\n") (char ']') rawLine :: GenParser Char ParserState [Char] rawLine = do @@ -399,8 +409,10 @@ codeBlockIndented = do lhsCodeBlock :: GenParser Char ParserState Block lhsCodeBlock = do failUnlessLHS - contents <- lhsCodeBlockBird <|> lhsCodeBlockLaTeX - return $ CodeBlock ("",["sourceCode","literate","haskell"],[]) contents + liftM (CodeBlock ("",["sourceCode","literate","haskell"],[])) + (lhsCodeBlockBird <|> lhsCodeBlockLaTeX) + <|> liftM (CodeBlock ("",["sourceCode","haskell"],[])) + lhsCodeBlockInverseBird lhsCodeBlockLaTeX :: GenParser Char ParserState String lhsCodeBlockLaTeX = try $ do @@ -411,10 +423,16 @@ lhsCodeBlockLaTeX = try $ do return $ stripTrailingNewlines contents lhsCodeBlockBird :: GenParser Char ParserState String -lhsCodeBlockBird = try $ do +lhsCodeBlockBird = lhsCodeBlockBirdWith '>' + +lhsCodeBlockInverseBird :: GenParser Char ParserState String +lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' + +lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String +lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" - lns <- many1 birdTrackLine + lns <- many1 $ birdTrackLine c -- if (as is normal) there is always a space after >, drop it let lns' = if all (\ln -> null ln || take 1 ln == " ") lns then map (drop 1) lns @@ -422,9 +440,9 @@ lhsCodeBlockBird = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: GenParser Char st [Char] -birdTrackLine = do - char '>' +birdTrackLine :: Char -> GenParser Char st [Char] +birdTrackLine c = do + char c manyTill anyChar newline @@ -481,7 +499,7 @@ anyOrderedListStart = try $ do -- if it could be an abbreviated first name, insist on more than one space if delim == Period && (style == UpperAlpha || (style == UpperRoman && num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) + then char '\t' <|> (try $ char ' ' >> spaceChar) else spaceChar skipSpaces return (num, style, delim) @@ -947,7 +965,7 @@ code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) + (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) return $ Code $ removeLeadingTrailingSpace $ concat result @@ -1126,7 +1144,8 @@ likelyAbbrev x = let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", - "Rev.", "Ph.D.", "M.D.", "M.A." ] + "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", + "ch.", "sec." ] abbrPairs = map (break (=='.')) abbrevs in map snd $ filter (\(y,_) -> y == x) abbrPairs @@ -1175,7 +1194,7 @@ source' = do tit <- option "" linkTitle skipSpaces eof - return (intercalate "+" $ words $ removeTrailingSpace src, tit) + return (uriEscapeSpaces $ removeTrailingSpace src, tit) linkTitle :: GenParser Char st String linkTitle = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d1515c4d5..2496d1823 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -33,8 +33,8 @@ module Text.Pandoc.Readers.RST ( import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.ParserCombinators.Parsec -import Control.Monad ( when ) -import Data.List ( findIndex, delete, intercalate ) +import Control.Monad ( when, unless, liftM ) +import Data.List ( findIndex, delete, intercalate, transpose ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -- ^ Parser state, including options for parser @@ -127,6 +127,7 @@ block = choice [ codeBlock , header , hrule , lineBlock -- must go before definitionList + , table , list , lhsCodeBlock , para @@ -157,11 +158,13 @@ fieldList = try $ do let authors = case lookup "Authors" items of Just auth -> [auth] Nothing -> map snd (filter (\(x,_) -> x == "Author") items) - if null authors - then return () - else updateState $ \st -> st {stateAuthors = authors} + unless (null authors) $ do + authors' <- mapM (parseFromString (many inline)) authors + updateState $ \st -> st {stateAuthors = map normalizeSpaces authors'} case (lookup "Date" items) of - Just dat -> updateState $ \st -> st {stateDate = dat} + Just dat -> do + dat' <- parseFromString (many inline) dat + updateState $ \st -> st{ stateDate = normalizeSpaces dat' } Nothing -> return () case (lookup "Title" items) of Just tit -> parseFromString (many inline) tit >>= @@ -345,6 +348,7 @@ customCodeBlock = try $ do lhsCodeBlock :: GenParser Char ParserState Block lhsCodeBlock = try $ do failUnlessLHS + optional codeBlockStart pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" lns <- many1 birdTrackLine @@ -577,6 +581,211 @@ regularKey = try $ do src <- targetURI return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) +-- +-- tables +-- + +-- General tables TODO: +-- - figure out if leading spaces are acceptable and if so, add +-- support for them +-- +-- Simple tables TODO: +-- - column spans +-- - multiline support +-- - ensure that rightmost column span does not need to reach end +-- - require at least 2 columns +-- +-- Grid tables TODO: +-- - column spans + +dashedLine :: Char -> GenParser Char st (Int, Int) +dashedLine ch = do + dashes <- many1 (char ch) + sp <- many (char ' ') + return (length dashes, length $ dashes ++ sp) + +simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines ch = try $ many1 (dashedLine ch) + +gridPart :: Char -> GenParser Char st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) + +gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +-- Parse a table row separator +simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep ch = try $ simpleDashedLines ch >> newline + +gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- Parse a table footer +simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter = try $ simpleTableSep '=' >> blanklines + +gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter = blanklines + +-- Parse a raw line and split it into chunks by indices. +simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine indices = do + line <- many1Till anyChar newline + return (simpleTableSplitLine indices line) + +gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices $ removeTrailingSpace line) + +-- Parse a table row and return a list of blocks (columns). +simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow indices = do + notFollowedBy' simpleTableFooter + firstLine <- simpleTableRawLine indices + colLines <- return [] -- TODO + let cols = map unlines . transpose $ firstLine : colLines + mapM (parseFromString (many plain)) cols + +gridTableRow :: [Int] + -> GenParser Char ParserState [[Block]] +gridTableRow indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + mapM (liftM compactifyCell . parseFromString (many block)) cols + +compactifyCell :: [Block] -> [Block] +compactifyCell bs = head $ compactify [bs] + +simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine indices line = + map removeLeadingTrailingSpace + $ tail $ splitByIndices (init indices) line + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = + map removeFinalBar $ tail $ splitByIndices (init indices) line + +removeFinalBar :: String -> String +removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . + reverse + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Double] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths' = zipWith (-) indices (0:indices) + lengths = reverse $ + case reverse lengths' of + [] -> [] + [x] -> [x] + -- compensate for the fact that intercolumn + -- spaces are counted in widths of all columns + -- but the last... + (x:y:zs) -> if x < y && y - x <= 2 + then y:y:zs + else x:y:zs + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +simpleTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) +simpleTableHeader headless = try $ do + optional blanklines + rawContent <- if headless + then return "" + else simpleTableSep '=' >> anyLine + dashes <- simpleDashedLines '=' + newline + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + let rawHeads = if headless + then replicate (length dashes) "" + else simpleTableSplitLine indices rawContent + return (rawHeads, aligns, indices) + +gridTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState ([String], [Alignment], [Int]) +gridTableHeader headless = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + return (rawHeads, aligns, indices) + +-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState sep + -> GenParser Char ParserState end + -> GenParser Char ParserState Block +tableWith headerParser rowParser lineParser footerParser = try $ do + (rawHeads, aligns, indices) <- headerParser + lines' <- rowParser indices `sepEndBy` lineParser + footerParser + heads <- mapM (parseFromString (many plain)) rawHeads + state <- getState + let captions = [] -- no notion of captions in RST + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table captions aligns widths heads lines' + +-- Parse a simple table with '---' header and one line per row. +simpleTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +simpleTable headless = do + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + -- Simple tables get 0s for relative column widths (i.e., use default) + return $ Table c a (replicate (length a) 0) h l + where + sep = return () -- optional (simpleTableSep '-') + +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +gridTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTable headless = + tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + +table :: GenParser Char ParserState Block +table = gridTable False <|> simpleTable False <|> + gridTable True <|> simpleTable True <?> "table" + + -- -- inline -- @@ -716,4 +925,3 @@ image = try $ do Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) src - diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index c99fa3e9e..f093ddbee 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, DeriveDataTypeable #-} +{-# LANGUAGE DeriveDataTypeable #-} {- Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> @@ -97,6 +97,7 @@ module Text.Pandoc.Shared ( compactify, Element (..), hierarchicalize, + uniqueIdent, isHeaderBlock, -- * Writer options HTMLMathMethod (..), @@ -104,7 +105,8 @@ module Text.Pandoc.Shared ( WriterOptions (..), defaultWriterOptions, -- * File handling - inDirectory + inDirectory, + readDataFile ) where import Text.Pandoc.Definition @@ -117,12 +119,18 @@ import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, import Data.List ( find, isPrefixOf, intercalate ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) import System.Directory -import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) +import System.FilePath ( (</>) ) +-- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv +-- So we use System.IO.UTF8 only if we have an earlier version +#if MIN_VERSION_base(4,2,0) +#else +import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) import System.IO.UTF8 +#endif import Data.Generics import qualified Control.Monad.State as S import Control.Monad (join) - +import Paths_pandoc (getDataFileName) -- -- List processing -- @@ -665,8 +673,8 @@ data ParserState = ParserState stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ Parse bibliographic info? stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [String], -- ^ Authors of document - stateDate :: String, -- ^ Date of document + stateAuthors :: [[Inline]], -- ^ Authors of document + stateDate :: [Inline], -- ^ Date of document stateStrict :: Bool, -- ^ Use strict markdown syntax? stateSmart :: Bool, -- ^ Use smart typography? stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell @@ -895,7 +903,7 @@ inlineListToIdentifier' [] = "" inlineListToIdentifier' (x:xs) = xAsText ++ inlineListToIdentifier' xs where xAsText = case x of - Str s -> filter (\c -> c `elem` "_-.~" || not (isPunctuation c)) $ + Str s -> filter (\c -> c `elem` "_-." || not (isPunctuation c)) $ intercalate "-" $ words $ map toLower s Emph lst -> inlineListToIdentifier' lst Strikeout lst -> inlineListToIdentifier' lst @@ -945,6 +953,8 @@ headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _) = l <= level headerLtEq _ _ = False +-- | Generate a unique identifier from a list of inlines. +-- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> [String] -> String uniqueIdent title' usedIdents = let baseIdent = inlineListToIdentifier title' @@ -969,6 +979,7 @@ data HTMLMathMethod = PlainMath | JsMath (Maybe String) -- url of jsMath load script | GladTeX | MimeTeX String -- url of mimetex.cgi + | MathML (Maybe String) -- url of MathMLinHTML.js deriving (Show, Read, Eq) -- | Methods for obfuscating email addresses in HTML. @@ -980,17 +991,18 @@ data ObfuscationMethod = NoObfuscation -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer - , writerHeader :: String -- ^ Header for the document - , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerTemplate :: String -- ^ Template to use in standalone mode + , writerVariables :: [(String, String)] -- ^ Variables to set in template + , writerIncludeBefore :: String -- ^ Text to include before the body + , writerIncludeAfter :: String -- ^ Text to include after the body , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs , writerTableOfContents :: Bool -- ^ Include table of contents , writerS5 :: Bool -- ^ We're writing S5 + , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) , writerIncremental :: Bool -- ^ Incremental S5 lists , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerIncludeBefore :: String -- ^ String to include before the body - , writerIncludeAfter :: String -- ^ String to include after the body , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerWrapText :: Bool -- ^ Wrap text to line length @@ -1003,17 +1015,18 @@ data WriterOptions = WriterOptions defaultWriterOptions :: WriterOptions defaultWriterOptions = WriterOptions { writerStandalone = False - , writerHeader = "" - , writerTitlePrefix = "" + , writerTemplate = "" + , writerVariables = [] + , writerIncludeBefore = "" + , writerIncludeAfter = "" , writerTabStop = 4 , writerTableOfContents = False , writerS5 = False + , writerXeTeX = False , writerHTMLMathMethod = PlainMath , writerIgnoreNotes = False , writerIncremental = False , writerNumberSections = False - , writerIncludeBefore = "" - , writerIncludeAfter = "" , writerStrictMarkdown = False , writerReferenceLinks = False , writerWrapText = True @@ -1034,3 +1047,12 @@ inDirectory path action = do result <- action setCurrentDirectory oldDir return result + +-- | Read file from specified user data directory or, if not found there, from +-- Cabal data directory. +readDataFile :: Maybe FilePath -> FilePath -> IO String +readDataFile userDir fname = + case userDir of + Nothing -> getDataFileName fname >>= readFile + Just u -> catch (readFile $ u </> fname) + (\_ -> getDataFileName fname >>= readFile) diff --git a/src/Text/Pandoc/TH.hs b/src/Text/Pandoc/TH.hs deleted file mode 100644 index 0dc5a6719..000000000 --- a/src/Text/Pandoc/TH.hs +++ /dev/null @@ -1,65 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{- -Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.TH - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Template haskell functions used by Pandoc modules. --} -module Text.Pandoc.TH ( - contentsOf, - binaryContentsOf, - makeZip - ) where - -import Language.Haskell.TH -import Language.Haskell.TH.Syntax (Lift (..)) -import qualified Data.ByteString as B -import Data.ByteString.Internal ( w2c ) -import Prelude hiding ( readFile ) -import System.IO.UTF8 -import Codec.Archive.Zip -import Text.Pandoc.Shared ( inDirectory ) - --- | Insert contents of text file into a template. -contentsOf :: FilePath -> ExpQ -contentsOf p = lift =<< (runIO $ readFile p) - --- | Insert contents of binary file into a template. --- Note that @Data.ByteString.readFile@ uses binary mode on windows. -binaryContentsOf :: FilePath -> ExpQ -binaryContentsOf p = lift =<< (runIO $ B.readFile p) - -instance Lift B.ByteString where - lift x = return (LitE (StringL $ map w2c $ B.unpack x)) - -instance Lift Archive where - lift x = return (LitE (StringL $ show x )) - --- | Construct zip file from files in a directory, and --- insert into a template. -makeZip :: FilePath -> ExpQ -makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."]) - diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs new file mode 100644 index 000000000..c30af0bfc --- /dev/null +++ b/src/Text/Pandoc/Templates.hs @@ -0,0 +1,211 @@ +{-# LANGUAGE TypeSynonymInstances #-} +{- +Copyright (C) 2009 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Templates + Copyright : Copyright (C) 2009 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +A simple templating system with variable substitution and conditionals. +Example: + +> renderTemplate [("name","Sam"),("salary","50,000")] $ +> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$" +> "Hi, John. You make $50,000." + +A slot for an interpolated variable is a variable name surrounded +by dollar signs. To include a literal @$@ in your template, use +@$$@. Variable names must begin with a letter and can contain letters, +numbers, @_@, and @-@. + +The value of a variable will be indented to the same level as the +variable. + +A conditional begins with @$if(variable_name)$@ and ends with @$endif$@. +It may optionally contain an @$else$@ section. The if section is +used if @variable_name@ has a non-null value, otherwise the else section +is used. + +Conditional keywords should not be indented, or unexpected spacing +problems may occur. + +If a variable name is associated with multiple values in the association +list passed to 'renderTemplate', you may use the @$for$@ keyword to +iterate over them: + +> renderTemplate [("name","Sam"),("name","Joe")] $ +> "$for(name)$\nHi, $name$.\n$endfor$" +> "Hi, Sam.\nHi, Joe." + +You may optionally specify separators using @$sep$@: + +> renderTemplate [("name","Sam"),("name","Joe"),("name","Lynn")] $ +> "Hi, $for(name)$$name$$sep$, $endfor$" +> "Hi, Sam, Joe, Lynn." +-} + +module Text.Pandoc.Templates ( renderTemplate + , TemplateTarget + , getDefaultTemplate ) where + +import Text.ParserCombinators.Parsec +import Control.Monad (liftM, when, forM) +import System.FilePath +import Data.List (intercalate, intersperse) +import Text.PrettyPrint (text, Doc) +import Text.XHtml (primHtml, Html) +import Data.ByteString.Lazy.UTF8 (ByteString, fromString) +import Text.Pandoc.Shared (readDataFile) +import qualified Control.Exception.Extensible as E (try, IOException) + +-- | Get default template for the specified writer. +getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first + -> String -- ^ Name of writer + -> IO (Either E.IOException String) +getDefaultTemplate _ "native" = return $ Right "" +getDefaultTemplate user "s5" = getDefaultTemplate user "html" +getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" +getDefaultTemplate user writer = do + let format = takeWhile (/='+') writer -- strip off "+lhs" if present + let fname = "templates" </> format <.> "template" + E.try $ readDataFile user fname + +data TemplateState = TemplateState Int [(String,String)] + +adjustPosition :: String -> GenParser Char TemplateState String +adjustPosition str = do + let lastline = takeWhile (/= '\n') $ reverse str + updateState $ \(TemplateState pos x) -> + if str == lastline + then TemplateState (pos + length lastline) x + else TemplateState (length lastline) x + return str + +class TemplateTarget a where + toTarget :: String -> a + +instance TemplateTarget String where + toTarget = id + +instance TemplateTarget ByteString where + toTarget = fromString + +instance TemplateTarget Html where + toTarget = primHtml + +instance TemplateTarget Doc where + toTarget = text + +-- | Renders a template +renderTemplate :: TemplateTarget a + => [(String,String)] -- ^ Assoc. list of values for variables + -> String -- ^ Template + -> a +renderTemplate vals templ = + case runParser (do x <- parseTemplate; eof; return x) (TemplateState 0 vals) "template" templ of + Left e -> error $ show e + Right r -> toTarget $ concat r + +reservedWords :: [String] +reservedWords = ["else","endif","for","endfor","sep"] + +parseTemplate :: GenParser Char TemplateState [String] +parseTemplate = + many $ (plaintext <|> escapedDollar <|> conditional <|> for <|> variable) + >>= adjustPosition + +plaintext :: GenParser Char TemplateState String +plaintext = many1 $ noneOf "$" + +escapedDollar :: GenParser Char TemplateState String +escapedDollar = try $ string "$$" >> return "$" + +skipEndline :: GenParser Char st () +skipEndline = try $ skipMany (oneOf " \t") >> newline >> return () + +conditional :: GenParser Char TemplateState String +conditional = try $ do + TemplateState pos vars <- getState + string "$if(" + id' <- ident + string ")$" + -- if newline after the "if", then a newline after "endif" will be swallowed + multiline <- option False $ try $ skipEndline >> return True + ifContents <- liftM concat parseTemplate + -- reset state for else block + setState $ TemplateState pos vars + elseContents <- option "" $ do try (string "$else$") + when multiline $ optional skipEndline + liftM concat parseTemplate + string "$endif$" + when multiline $ optional skipEndline + let conditionSatisfied = case lookup id' vars of + Nothing -> False + Just "" -> False + Just _ -> True + return $ if conditionSatisfied + then ifContents + else elseContents + +for :: GenParser Char TemplateState String +for = try $ do + TemplateState pos vars <- getState + string "$for(" + id' <- ident + string ")$" + -- if newline after the "if", then a newline after "endif" will be swallowed + multiline <- option False $ try $ skipEndline >> return True + let matches = filter (\(k,_) -> k == id') vars + let indent = replicate pos ' ' + contents <- forM matches $ \m -> do + updateState $ \(TemplateState p v) -> TemplateState p (m:v) + raw <- liftM concat $ lookAhead parseTemplate + return $ intercalate ('\n':indent) $ lines $ raw ++ "\n" + parseTemplate + sep <- option "" $ do try (string "$sep$") + when multiline $ optional skipEndline + liftM concat parseTemplate + string "$endfor$" + when multiline $ optional skipEndline + setState $ TemplateState pos vars + return $ concat $ intersperse sep contents + +ident :: GenParser Char TemplateState String +ident = do + first <- letter + rest <- many (alphaNum <|> oneOf "_-") + let id' = first : rest + if id' `elem` reservedWords + then pzero + else return id' + +variable :: GenParser Char TemplateState String +variable = try $ do + char '$' + id' <- ident + char '$' + TemplateState pos vars <- getState + let indent = replicate pos ' ' + return $ case lookup id' vars of + Just val -> intercalate ('\n' : indent) $ lines val + Nothing -> "" diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 142c862ef..545acded5 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -33,8 +33,8 @@ import Text.Pandoc.Shared import Text.Printf ( printf ) import Data.List ( isSuffixOf, intercalate, intersperse ) import Control.Monad.State -import Control.Monad (liftM) import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Templates ( renderTemplate ) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -52,52 +52,28 @@ writeConTeXt options document = , stOrderedListLevel = 0 , stOptions = options } - in render $ - evalState (pandocToConTeXt options document) defaultWriterState + in evalState (pandocToConTeXt options document) defaultWriterState -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToConTeXt options (Pandoc meta blocks) = do - main <- blockListToConTeXt blocks - let before = if null (writerIncludeBefore options) - then empty - else text $ writerIncludeBefore options - let after = if null (writerIncludeAfter options) - then empty - else text $ writerIncludeAfter options - let body = before $$ main $$ after - head' <- if writerStandalone options - then contextHeader options meta - else return empty - let toc = if writerTableOfContents options - then text "\\placecontent\n" - else empty - let foot = if writerStandalone options - then text "\\stoptext\n" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into ConTeXt header. -contextHeader :: WriterOptions -- ^ Options, including ConTeXt header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -contextHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else inlineListToConTeXt title - let authorstext = if null authors - then "" - else if length authors == 1 - then stringToConTeXt $ head authors - else stringToConTeXt $ (intercalate ", " $ - init authors) ++ " & " ++ last authors - let datetext = if date == "" - then "" - else stringToConTeXt date - let titleblock = text "\\doctitle{" <> titletext <> char '}' $$ - text ("\\author{" ++ authorstext ++ "}") $$ - text ("\\date{" ++ datetext ++ "}") - let header = text $ writerHeader options - return $ header $$ titleblock $$ text "\\starttext\n\\maketitle\n" +pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String +pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do + titletext <- if null title + then return "" + else liftM render $ inlineListToConTeXt title + authorstext <- mapM (liftM render . inlineListToConTeXt) authors + datetext <- if null date + then return "" + else liftM render $ inlineListToConTeXt date + body <- blockListToConTeXt blocks + let main = render body + let context = writerVariables options ++ + [ ("toc", if writerTableOfContents options then "yes" else "") + , ("body", main) + , ("title", titletext) + , ("date", datetext) ] ++ + [ ("author", a) | a <- authorstext ] + return $ if writerStandalone options + then renderTemplate context $ writerTemplate options + else main -- escape things as needed for ConTeXt @@ -133,6 +109,10 @@ blockToConTeXt (Plain lst) = do let options = stOptions st contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst return $ Reg contents +blockToConTeXt (Para [Image txt (src,_)]) = do + capt <- inlineListToConTeXt txt + return $ Pad $ text "\\placefigure[here,nonumber]{" <> capt <> + text "}{\\externalfigure[" <> text src <> text "]}" blockToConTeXt (Para lst) = do st <- get let options = stOptions st @@ -203,13 +183,15 @@ blockToConTeXt (Table caption aligns widths heads rows) = do else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") let colDescriptors = "|" ++ (concat $ zipWith colDescriptor widths aligns) - headers <- tableRowToConTeXt heads + headers <- if all null heads + then return empty + else liftM ($$ text "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption let captionText' = if null caption then text "none" else captionText rows' <- mapM tableRowToConTeXt rows return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$ text "\\starttable[" <> text colDescriptors <> char ']' $$ - text "\\HL" $$ headers $$ text "\\HL" $$ + text "\\HL" $$ headers $$ vcat rows' $$ text "\\HL\n\\stoptable" tableRowToConTeXt :: [[Block]] -> State WriterState Doc @@ -287,10 +269,8 @@ inlineToConTeXt (Link txt (src, _)) = do label <- inlineListToConTeXt txt return $ text "\\useURL[" <> text ref <> text "][" <> text src <> text "][][" <> label <> text "]\\from[" <> text ref <> char ']' -inlineToConTeXt (Image alternate (src, tit)) = do - alt <- inlineListToConTeXt alternate - return $ text "\\placefigure\n[]\n[fig:" <> alt <> text "]\n{" <> - text tit <> text "}\n{\\externalfigure[" <> text src <> text "]}" +inlineToConTeXt (Image _ (src, _)) = do + return $ text "{\\externalfigure[" <> text src <> text "]}" inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let rawnote = stripTrailingNewlines $ render contents' diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b46bb0eb4..fc97ed3ac 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -31,53 +31,49 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath -import Data.List ( isPrefixOf, drop, intercalate ) +import Data.List ( isPrefixOf, intercalate ) import Data.Char ( toLower ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Text.Pandoc.Highlighting (languages, languagesByExtension) -- | Convert list of authors to a docbook <author> section -authorToDocbook :: [Char] -> Doc -authorToDocbook name = inTagsIndented "author" $ - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) +authorToDocbook :: WriterOptions -> [Inline] -> Doc +authorToDocbook opts name' = + let name = render $ inlinesToDocbook opts name' + in if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest in + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (intercalate " " (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta title authors date) blocks) = - let head' = if writerStandalone opts - then text (writerHeader opts) - else empty - meta = if writerStandalone opts - then inTagsIndented "articleinfo" $ - (inTagsSimple "title" (wrap opts title)) $$ - (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ escapeStringForXML date)) - else empty +writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = + let title = wrap opts tit + authors = map (authorToDocbook opts) auths + date = inlinesToDocbook opts dat elements = hierarchicalize blocks - before = writerIncludeBefore opts - after = writerIncludeAfter opts - body = (if null before then empty else text before) $$ - vcat (map (elementToDocbook opts) elements) $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "article" (meta $$ body) - else body - in render $ head' $$ body' $$ text "" + main = render $ vcat (map (elementToDocbook opts) elements) + context = writerVariables opts ++ + [ ("body", main) + , ("title", render title) + , ("date", render date) ] ++ + [ ("author", render a) | a <- authors ] + in if writerStandalone opts + then renderTemplate context $ writerTemplate opts + else main -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Element -> Doc @@ -128,6 +124,14 @@ blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Para [Image txt (src,_)]) = + let capt = inlinesToDocbook opts txt + in inTagsIndented "figure" $ + inTagsSimple "title" capt $$ + (inTagsIndented "mediaobject" $ + (inTagsIndented "imageobject" + (selfClosingTag "imagedata" [("fileref",src)])) $$ + inTagsSimple "textobject" (inTagsSimple "phrase" capt)) blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks @@ -167,24 +171,22 @@ blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = let alignStrings = map alignmentToString aligns captionDoc = if null caption - then empty - else inTagsIndented "caption" - (inlinesToDocbook opts caption) + then empty + else inTagsIndented "caption" + (inlinesToDocbook opts caption) tableType = if isEmpty captionDoc then "informaltable" else "table" - in inTagsIndented tableType $ captionDoc $$ - (colHeadsToDocbook opts alignStrings widths headers) $$ - (vcat $ map (tableRowToDocbook opts alignStrings) rows) - -colHeadsToDocbook :: WriterOptions - -> [[Char]] - -> [Double] - -> [[Block]] - -> Doc -colHeadsToDocbook opts alignStrings widths headers = - let heads = zipWith3 (\align width item -> - tableItemToDocbook opts "th" align width item) - alignStrings widths headers - in inTagsIndented "tr" $ vcat heads + percent w = show (truncate (100*w) :: Integer) ++ "%" + coltags = if all (== 0.0) widths + then empty + else vcat $ map (\w -> + selfClosingTag "col" [("width", percent w)]) widths + head' = if all null headers + then empty + else inTagsIndented "thead" $ + tableRowToDocbook opts alignStrings "th" headers + body' = inTagsIndented "tbody" $ + vcat $ map (tableRowToDocbook opts alignStrings "td") rows + in inTagsIndented tableType $ captionDoc $$ coltags $$ head' $$ body' alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -193,22 +195,22 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook :: WriterOptions -> [[Char]] -> [[Block]] -> Doc -tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ - vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols +tableRowToDocbook :: WriterOptions + -> [String] + -> String + -> [[Block]] + -> Doc +tableRowToDocbook opts aligns celltype cols = + inTagsIndented "tr" $ vcat $ + zipWith (tableItemToDocbook opts celltype) aligns cols tableItemToDocbook :: WriterOptions -> [Char] -> [Char] - -> Double -> [Block] -> Doc -tableItemToDocbook opts tag align width item = - let attrib = [("align", align)] ++ - if width /= 0 - then [("style", "{width: " ++ - show (truncate (100*width) :: Integer) ++ "%;}")] - else [] +tableItemToDocbook opts tag align item = + let attrib = [("align", align)] in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item -- | Take list of inline elements and return wrapped doc. @@ -243,9 +245,9 @@ inlineToDocbook opts (Quoted _ lst) = inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook _ Apostrophe = char '\'' -inlineToDocbook _ Ellipses = text "…" -inlineToDocbook _ EmDash = text "—" -inlineToDocbook _ EnDash = text "–" +inlineToDocbook _ Ellipses = text "…" +inlineToDocbook _ EmDash = text "—" +inlineToDocbook _ EnDash = text "–" inlineToDocbook _ (Code str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e0e3882fe..d33dcff27 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,132 +30,145 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition -import Text.Pandoc.LaTeXMathML import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared +import Text.Pandoc.Templates import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) -import Text.Pandoc.XML (stripTags) +import Text.Pandoc.Highlighting ( highlightHtml ) +import Text.Pandoc.XML (stripTags, escapeStringForXML) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.Maybe ( catMaybes ) -import qualified Data.Set as S import Control.Monad.State import Text.XHtml.Transitional hiding ( stringToHtml ) +import Text.TeXMath +import Text.XML.Light.Output data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stMath :: Bool -- ^ Math is used in document - , stCSS :: S.Set String -- ^ CSS to include in header - , stSecNum :: [Int] -- ^ Number of current section + { stNotes :: [Html] -- ^ List of notes + , stMath :: Bool -- ^ Math is used in document + , stHighlighting :: Bool -- ^ Syntax highlighting is used + , stSecNum :: [Int] -- ^ Number of current section } deriving Show defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stCSS = S.empty, stSecNum = []} +defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []} -- Helpers to render HTML with the appropriate function. -render :: (HTML html) => WriterOptions -> html -> String -render opts = if writerWrapText opts then renderHtml else showHtml - renderFragment :: (HTML html) => WriterOptions -> html -> String renderFragment opts = if writerWrapText opts then renderHtmlFragment else showHtmlFragment --- | Slightly modified version of Text.XHtml's stringToHtml. --- Only uses numerical entities for 0xff and greater. --- Adds . +-- | Modified version of Text.XHtml's stringToHtml. +-- Use unicode characters wherever possible. stringToHtml :: String -> Html -stringToHtml = primHtml . concatMap fixChar - where - fixChar '<' = "<" - fixChar '>' = ">" - fixChar '&' = "&" - fixChar '"' = """ - fixChar '\160' = " " - fixChar c | ord c < 0xff = [c] - fixChar c = "&#" ++ show (ord c) ++ ";" +stringToHtml = primHtml . escapeStringForXML -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts = - if writerStandalone opts - then render opts . writeHtml opts - else renderFragment opts . writeHtml opts +writeHtmlString opts d = + let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState + in if writerStandalone opts + then inTemplate opts tit auths date toc body' newvars + else renderFragment opts body' -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) = - let titlePrefix = writerTitlePrefix opts - (topTitle,st) = runState (inlineListToHtml opts tit) defaultWriterState - topTitle'' = stripTags $ showHtmlFragment topTitle - topTitle' = titlePrefix ++ - (if null topTitle'' || null titlePrefix - then "" - else " - ") ++ topTitle'' - metadata = thetitle << topTitle' +++ - meta ! [httpequiv "Content-Type", - content "text/html; charset=UTF-8"] +++ - meta ! [name "generator", content "pandoc"] +++ - (toHtmlFromList $ - map (\a -> meta ! [name "author", content a]) authors) +++ - (if null date - then noHtml - else meta ! [name "date", content date]) - titleHeader = if writerStandalone opts && not (null tit) && - not (writerS5 opts) - then h1 ! [theclass "title"] $ topTitle - else noHtml - sects = hierarchicalize blocks - toc = if writerTableOfContents opts - then evalState (tableOfContents opts sects) st - else noHtml - (blocks', st') = runState - (mapM (elementToHtml opts) sects >>= return . toHtmlFromList) - st - cssLines = stCSS st' - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath st' - then case writerHTMLMathMethod opts of - LaTeXMathML Nothing -> - primHtml latexMathMLScript - LaTeXMathML (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - JsMath (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - _ -> noHtml - else noHtml - head' = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes st') - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection notes +++ after +writeHtml opts d = + let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState in if writerStandalone opts - then head' +++ body thebody - else thebody + then inTemplate opts tit auths date toc body' newvars + else body' + +-- result is (title, authors, date, toc, body, new variables) +pandocToHtml :: WriterOptions + -> Pandoc + -> State WriterState (Html, [Html], Html, Maybe Html, Html, [(String,String)]) +pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do + let standalone = writerStandalone opts + tit <- if standalone + then inlineListToHtml opts title' + else return noHtml + auths <- if standalone + then mapM (inlineListToHtml opts) authors' + else return [] + date <- if standalone + then inlineListToHtml opts date' + else return noHtml + let sects = hierarchicalize blocks + toc <- if writerTableOfContents opts + then tableOfContents opts sects + else return Nothing + blocks' <- liftM toHtmlFromList $ mapM (elementToHtml opts) sects + st <- get + let notes = reverse (stNotes st) + let thebody = blocks' +++ footnoteSection notes + let math = if stMath st + then case writerHTMLMathMethod opts of + LaTeXMathML (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + MathML (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + JsMath (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + _ -> case lookup "mathml-script" (writerVariables opts) of + Just s -> + script ! [thetype "text/javascript"] << + primHtml s + Nothing -> noHtml + else noHtml + let newvars = [("highlighting","yes") | stHighlighting st] ++ + [("math", renderHtmlFragment math) | stMath st] + return (tit, auths, date, toc, thebody, newvars) + +inTemplate :: TemplateTarget a + => WriterOptions + -> Html + -> [Html] + -> Html + -> Maybe Html + -> Html + -> [(String,String)] + -> a +inTemplate opts tit auths date toc body' newvars = + let renderedTit = showHtmlFragment tit + topTitle' = stripTags renderedTit + authors = map (stripTags . showHtmlFragment) auths + date' = stripTags $ showHtmlFragment date + variables = writerVariables opts ++ newvars + context = variables ++ + [ ("body", renderHtmlFragment body') + , ("pagetitle", topTitle') + , ("title", renderHtmlFragment tit) + , ("date", date') ] ++ + (case toc of + Just t -> [ ("toc", renderHtmlFragment t)] + Nothing -> []) ++ + [ ("author", a) | a <- authors ] + in renderTemplate context $ writerTemplate opts -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix prefixedId :: WriterOptions -> String -> HtmlAttr prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s -- | Construct table of contents from list of elements. -tableOfContents :: WriterOptions -> [Element] -> State WriterState Html -tableOfContents _ [] = return noHtml +tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) +tableOfContents _ [] = return Nothing tableOfContents opts sects = do let opts' = opts { writerIgnoreNotes = True } contents <- mapM (elementToListItem opts') sects - return $ thediv ! [prefixedId opts' "TOC"] $ unordList $ catMaybes contents + let tocList = catMaybes contents + return $ if null tocList + then Nothing + else Just $ thediv ! [prefixedId opts' "TOC"] $ unordList tocList -- | Convert section number to string showSecNum :: [Int] -> String @@ -236,7 +249,7 @@ obfuscateLink opts txt s = linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ primHtml txt -- malformed email + _ -> anchor ! [href s] $ stringToHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -249,17 +262,15 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do - st <- get - let current = stCSS st - put $ st {stCSS = S.insert item current} - -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return $ noHtml blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para [Image txt (s,tit)]) = do + img <- inlineToHtml opts (Image txt (s,tit)) + capt <- inlineListToHtml opts txt + return $ thediv ! [theclass "figure"] << + [img, paragraph ! [theclass "caption"] << capt] blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) blockToHtml _ (RawHtml str) = return $ primHtml str blockToHtml _ (HorizontalRule) = return $ hr @@ -277,7 +288,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ [stringToHtml $ rawCode' ++ "\n"]) - Right h -> addToCSS defaultHighlightingCss >> return h + Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; @@ -344,21 +355,33 @@ blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt then return noHtml else inlineListToHtml opts capt >>= return . caption - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows'' <- zipWithM (tableRowToHtml opts alignStrings) (cycle ["odd", "even"]) rows' - return $ table $ captionDoc +++ colHeads +++ rows'' - -colHeadsToHtml :: WriterOptions - -> [[Char]] - -> [Double] + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let coltags = if all (== 0.0) widths + then noHtml + else concatHtml $ map + (\w -> col ! [width $ percent w] $ noHtml) widths + head' <- if all null headers + then return noHtml + else liftM (thead <<) $ tableRowToHtml opts alignStrings 0 headers + body' <- liftM (tbody <<) $ + zipWithM (tableRowToHtml opts alignStrings) [1..] rows' + return $ table $ captionDoc +++ coltags +++ head' +++ body' + +tableRowToHtml :: WriterOptions + -> [String] + -> Int -> [[Block]] -> State WriterState Html -colHeadsToHtml opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item) - alignStrings widths headers - return $ tr ! [theclass "header"] $ toHtmlFromList heads +tableRowToHtml opts alignStrings rownum cols' = do + let mkcell = if rownum == 0 then th else td + let rowclass = case rownum of + 0 -> "header" + x | x `rem` 2 == 1 -> "odd" + _ -> "even" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToHtml opts mkcell alignment item) + alignStrings cols' + return $ tr ! [theclass rowclass] $ toHtmlFromList cols'' alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -367,28 +390,14 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToHtml :: WriterOptions - -> [[Char]] - -> String - -> [[Block]] - -> State WriterState Html -tableRowToHtml opts aligns rowclass columns = - (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>= - return . (tr ! [theclass rowclass]) . toHtmlFromList - tableItemToHtml :: WriterOptions -> (Html -> Html) -> [Char] - -> Double -> [Block] -> State WriterState Html -tableItemToHtml opts tag' align' width' item = do +tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if width' /= 0 - then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] - else [] - return $ tag' ! attrib $ contents + return $ tag' ! [align align'] $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = @@ -405,11 +414,11 @@ inlineToHtml opts inline = case inline of (Str str) -> return $ stringToHtml str (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" + (LineBreak) -> return br + (EmDash) -> return $ stringToHtml "—" + (EnDash) -> return $ stringToHtml "–" + (Ellipses) -> return $ stringToHtml "…" + (Apostrophe) -> return $ stringToHtml "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str @@ -421,10 +430,10 @@ inlineToHtml opts inline = (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") + SingleQuote -> (stringToHtml "‘", + stringToHtml "’") + DoubleQuote -> (stringToHtml "“", + stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> @@ -447,7 +456,20 @@ inlineToHtml opts inline = alt str, title str] GladTeX -> return $ primHtml $ "<EQ>" ++ str ++ "</EQ>" - PlainMath -> + MathML _ -> do + let dt = if t == InlineMath + then DisplayInline + else DisplayBlock + let conf = useShortEmptyTags (const False) + defaultConfigPP + case texMathToMathML dt str of + Right r -> return $ primHtml $ + ppcElement conf r + Left _ -> inlineListToHtml opts + (readTeXMath str) >>= + return . (thespan ! + [theclass "math"]) + PlainMath -> inlineListToHtml opts (readTeXMath str) >>= return . (thespan ! [theclass "math"]) ) (TeX str) -> case writerHTMLMathMethod opts of @@ -485,10 +507,10 @@ inlineToHtml opts inline = htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes put $ st {stNotes = (htmlContents:notes)} - return $ anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), - theclass "footnoteRef", - prefixedId opts ("fnref" ++ ref)] << - sup << ref + return $ sup << + anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), + theclass "footnoteRef", + prefixedId opts ("fnref" ++ ref)] << ref (Cite _ il) -> inlineListToHtml opts il blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html @@ -497,7 +519,7 @@ blockListToNote opts ref blocks = -- that block. Otherwise, insert a new Plain block with the backlink. let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] + " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] blocks' = if null blocks then [] else let lastBlock = last blocks diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index af23f9285..02fbf4add 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -30,81 +30,71 @@ Conversion of 'Pandoc' format into LaTeX. module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Templates import Text.Printf ( printf ) -import Data.List ( (\\), isSuffixOf, intercalate, intersperse ) +import Data.List ( (\\), isSuffixOf, isPrefixOf, intersperse ) import Data.Char ( toLower ) -import qualified Data.Set as S import Control.Monad.State -import Control.Monad (liftM) import Text.PrettyPrint.HughesPJ hiding ( Str ) data WriterState = - WriterState { stIncludes :: S.Set String -- strings to include in header - , stInNote :: Bool -- @True@ if we're in a note - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter + WriterState { stInNote :: Bool -- @True@ if we're in a note + , stOLLevel :: Int -- level of ordered list nesting + , stOptions :: WriterOptions -- writer options, so they don't have to be parameter + , stVerbInNote :: Bool -- true if document has verbatim text in note + , stEnumerate :: Bool -- true if document needs fancy enumerated lists + , stTable :: Bool -- true if document has a table + , stStrikeout :: Bool -- true if document has strikeout + , stSubscript :: Bool -- true if document has subscript + , stUrl :: Bool -- true if document has visible URL link + , stGraphics :: Bool -- true if document contains images + , stLHS :: Bool -- true if document has literate haskell code + , stBook :: Bool -- true if document uses book or memoir class } --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do - st <- get - let includes = stIncludes st - put st {stIncludes = S.insert str includes} - -- | Convert Pandoc to LaTeX. writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = - render $ evalState (pandocToLaTeX options document) $ - WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1, stOptions = options } + evalState (pandocToLaTeX options document) $ + WriterState { stInNote = False, stOLLevel = 1, stOptions = options, + stVerbInNote = False, stEnumerate = False, + stTable = False, stStrikeout = False, stSubscript = False, + stUrl = False, stGraphics = False, + stLHS = False, stBook = False } -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToLaTeX options (Pandoc meta blocks) = do - main <- blockListToLaTeX blocks - head' <- if writerStandalone options - then latexHeader options meta - else return empty - let before = if null (writerIncludeBefore options) - then empty - else text (writerIncludeBefore options) - let after = if null (writerIncludeAfter options) - then empty - else text (writerIncludeAfter options) - let body = before $$ main $$ after - let toc = if writerTableOfContents options - then text "\\tableofcontents\n" - else empty - let foot = if writerStandalone options - then text "\\end{document}" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into LaTeX header. -latexHeader :: WriterOptions -- ^ Options, including LaTeX header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -latexHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else inlineListToLaTeX title >>= return . inCmd "title" - headerIncludes <- get >>= return . S.toList . stIncludes - let extras = text $ unlines headerIncludes - let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes - then text "\\VerbatimFootnotes % allows verbatim text in footnotes" - else empty - let authorstext = text $ "\\author{" ++ - intercalate "\\\\" (map stringToLaTeX authors) ++ "}" - let datetext = if date == "" - then empty - else text $ "\\date{" ++ stringToLaTeX date ++ "}" - let maketitle = if null title then empty else text "\\maketitle" - let secnumline = if (writerNumberSections options) - then empty - else text "\\setcounter{secnumdepth}{0}" - let baseHeader = text $ writerHeader options - let header = baseHeader $$ extras - return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ - datetext $$ text "\\begin{document}" $$ maketitle $$ text "" +pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String +pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do + let template = writerTemplate options + let usesBookClass x = "\\documentclass" `isPrefixOf` x && + ("{memoir}" `isSuffixOf` x || "{book}" `isSuffixOf` x || + "{report}" `isSuffixOf` x) + when (any usesBookClass (lines template)) $ + modify $ \s -> s{stBook = True} + titletext <- liftM render $ inlineListToLaTeX title + authorsText <- mapM (liftM render . inlineListToLaTeX) authors + dateText <- liftM render $ inlineListToLaTeX date + body <- blockListToLaTeX blocks + let main = render body + st <- get + let context = writerVariables options ++ + [ ("toc", if writerTableOfContents options then "yes" else "") + , ("body", main) + , ("title", titletext) + , ("date", dateText) ] ++ + [ ("author", a) | a <- authorsText ] ++ + [ ("xetex", "yes") | writerXeTeX options ] ++ + [ ("verbatim-in-note", "yes") | stVerbInNote st ] ++ + [ ("fancy-enums", "yes") | stEnumerate st ] ++ + [ ("tables", "yes") | stTable st ] ++ + [ ("strikeout", "yes") | stStrikeout st ] ++ + [ ("subscript", "yes") | stSubscript st ] ++ + [ ("url", "yes") | stUrl st ] ++ + [ ("numbersections", "yes") | writerNumberSections options ] ++ + [ ("lhs", "yes") | stLHS st ] ++ + [ ("graphics", "yes") | stGraphics st ] + return $ if writerStandalone options + then renderTemplate context template + else main -- escape things as needed for LaTeX @@ -140,6 +130,11 @@ blockToLaTeX (Plain lst) = do st <- get let opts = stOptions st wrapTeXIfNeeded opts True inlineListToLaTeX lst +blockToLaTeX (Para [Image txt (src,tit)]) = do + capt <- inlineListToLaTeX txt + img <- inlineToLaTeX (Image txt (src,tit)) + return $ text "\\begin{figure}[htb]" $$ text "\\centering" $$ img $$ + (text "\\caption{" <> capt <> char '}') $$ text "\\end{figure}\n" blockToLaTeX (Para lst) = do st <- get let opts = stOptions st @@ -152,10 +147,13 @@ blockToLaTeX (CodeBlock (_,classes,_) str) = do st <- get env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes && "literate" `elem` classes - then return "code" + then do + modify $ \s -> s{ stLHS = True } + return "code" else if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - return "Verbatim" + then do + modify $ \s -> s{ stVerbInNote = True } + return "Verbatim" else return "verbatim" return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> text ("\n\\end{" ++ env ++ "}") @@ -170,10 +168,11 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do items <- mapM listItemToLaTeX lst modify (\s -> s {stOLLevel = oldlevel}) exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim - then do addToHeader "\\usepackage{enumerate}" - return $ char '[' <> - text (head (orderedListMarkers (1, numstyle, - numdelim))) <> char ']' + then do + modify $ \s -> s{ stEnumerate = True } + return $ char '[' <> + text (head (orderedListMarkers (1, numstyle, + numdelim))) <> char ']' else return empty let resetcounter = if start /= 1 && oldlevel <= 4 then text $ "\\setcounter{enum" ++ @@ -201,23 +200,28 @@ blockToLaTeX (Header level lst) = do else do res <- inlineListToLaTeX lstNoNotes return $ char '[' <> res <> char ']' - return $ if (level > 0) && (level <= 3) - then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++ - "section") <> optional <> char '{' <> txt <> text "}\n" - else txt <> char '\n' + let stuffing = optional <> char '{' <> txt <> char '}' + book <- liftM stBook get + return $ case (book, level) of + (True, 1) -> text "\\chapter" <> stuffing <> char '\n' + (True, 2) -> text "\\section" <> stuffing <> char '\n' + (True, 3) -> text "\\subsection" <> stuffing <> char '\n' + (True, 4) -> text "\\subsubsection" <> stuffing <> char '\n' + (False, 1) -> text "\\section" <> stuffing <> char '\n' + (False, 2) -> text "\\subsection" <> stuffing <> char '\n' + (False, 3) -> text "\\subsubsection" <> stuffing <> char '\n' + _ -> txt <> char '\n' blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- tableRowToLaTeX heads + headers <- if all null heads + then return empty + else liftM ($$ text "\\hline") $ tableRowToLaTeX heads captionText <- inlineListToLaTeX caption rows' <- mapM tableRowToLaTeX rows let colDescriptors = concat $ zipWith toColDescriptor widths aligns let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ - headers $$ text "\\hline" $$ vcat rows' $$ - text "\\end{tabular}" + headers $$ vcat rows' $$ text "\\end{tabular}" let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" - addToHeader $ "\\usepackage{array}\n" ++ - "% This is needed because raggedright in table elements redefines \\\\:\n" ++ - "\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n" ++ - "\\let\\PBS=\\PreserveBackslash" + modify $ \s -> s{ stTable = True } return $ if isEmpty captionText then centered tableBody <> char '\n' else text "\\begin{table}[h]" $$ centered tableBody $$ @@ -276,15 +280,15 @@ inlineToLaTeX (Strong lst) = inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do contents <- inlineListToLaTeX $ deVerb lst - addToHeader "\\usepackage[normalem]{ulem}" + modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents -inlineToLaTeX (Superscript lst) = +inlineToLaTeX (Superscript lst) = inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" inlineToLaTeX (Subscript lst) = do + modify $ \s -> s{ stSubscript = True } contents <- inlineListToLaTeX $ deVerb lst -- oddly, latex includes \textsuperscript but not \textsubscript -- so we have to define it (using a different name so as not to conflict with memoir class): - addToHeader "\\newcommand{\\textsubscr}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" return $ inCmd "textsubscr" contents inlineToLaTeX (SmallCaps lst) = inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc" @@ -292,9 +296,7 @@ inlineToLaTeX (Cite _ lst) = inlineListToLaTeX lst inlineToLaTeX (Code str) = do st <- get - if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - else return () + when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True } let chr = ((enumFromTo '!' '~') \\ str) !! 0 return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] inlineToLaTeX (Quoted SingleQuote lst) = do @@ -326,17 +328,16 @@ inlineToLaTeX (TeX str) = return $ text str inlineToLaTeX (HtmlInline _) = return empty inlineToLaTeX (LineBreak) = return $ text "\\\\" inlineToLaTeX Space = return $ char ' ' -inlineToLaTeX (Link txt (src, _)) = do - addToHeader "\\usepackage[breaklinks=true]{hyperref}" +inlineToLaTeX (Link txt (src, _)) = case txt of [Code x] | x == src -> -- autolink - do addToHeader "\\usepackage{url}" + do modify $ \s -> s{ stUrl = True } return $ text $ "\\url{" ++ x ++ "}" _ -> do contents <- inlineListToLaTeX $ deVerb txt return $ text ("\\href{" ++ src ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do - addToHeader "\\usepackage{graphicx}" + modify $ \s -> s{ stGraphics = True } return $ text $ "\\includegraphics{" ++ source ++ "}" inlineToLaTeX (Note contents) = do st <- get diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index f6f656c4e..62bb90f8e 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -30,59 +30,51 @@ Conversion of 'Pandoc' documents to groff man page format. -} module Text.Pandoc.Writers.Man ( writeMan) where import Text.Pandoc.Definition +import Text.Pandoc.Templates import Text.Pandoc.Shared import Text.Printf ( printf ) -import Data.List ( isPrefixOf, drop, nub, intersperse, intercalate ) +import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State -import Control.Monad ( liftM ) type Notes = [[Block]] -type Preprocessors = [String] -- e.g. "t" for tbl -type WriterState = (Notes, Preprocessors) +data WriterState = WriterState { stNotes :: Notes + , stHasTables :: Bool } -- | Convert Pandoc to Man. writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) +writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) -- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMan opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - (head', foot) <- metaToMan opts meta - body <- blockListToMan opts blocks - (notes, preprocessors) <- get - let preamble = if null preprocessors || not (writerStandalone opts) - then empty - else text $ ".\\\" " ++ concat (nub preprocessors) - notes' <- notesToMan opts (reverse notes) - return $ preamble $$ head' $$ before' $$ body $$ notes' $$ foot $$ after' - --- | Insert bibliographic information into Man header and footer. -metaToMan :: WriterOptions -- ^ Options, including Man header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState (Doc, Doc) -metaToMan options (Meta title authors date) = do - titleText <- inlineListToMan options title +pandocToMan :: WriterOptions -> Pandoc -> State WriterState String +pandocToMan opts (Pandoc (Meta title authors date) blocks) = do + titleText <- inlineListToMan opts title + authors' <- mapM (inlineListToMan opts) authors + date' <- inlineListToMan opts date let (cmdName, rest) = break (== ' ') $ render titleText let (title', section) = case reverse cmdName of (')':d:'(':xs) | d `elem` ['0'..'9'] -> (text (reverse xs), char d) xs -> (text (reverse xs), doubleQuotes empty) - let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ - splitBy '|' rest - let head' = (text ".TH") <+> title' <+> section <+> - doubleQuotes (text date) <+> hsep extras - let foot = case length authors of - 0 -> empty - 1 -> text ".SH AUTHOR" $$ (text $ intercalate ", " authors) - _ -> text ".SH AUTHORS" $$ (text $ intercalate ", " authors) - return $ if writerStandalone options - then (head', foot) - else (empty, empty) + let description = hsep $ + map (doubleQuotes . text . removeLeadingTrailingSpace) $ + splitBy '|' rest + body <- blockListToMan opts blocks + notes <- liftM stNotes get + notes' <- notesToMan opts (reverse notes) + let main = render $ body $$ notes' + hasTables <- liftM stHasTables get + let context = writerVariables opts ++ + [ ("body", main) + , ("title", render title') + , ("section", render section) + , ("date", render date') + , ("description", render description) ] ++ + [ ("has-tables", "yes") | hasTables ] ++ + [ ("author", render a) | a <- authors' ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main -- | Return man representation of notes. notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc @@ -170,7 +162,7 @@ blockToMan opts (Table caption alignments widths headers rows) = aligncode AlignDefault = "l" in do caption' <- inlineListToMan opts caption - modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) + modify $ \st -> st{ stHasTables = True } let iwidths = if all (== 0) widths then repeat "" else map (printf "w(%0.2fn)" . (70 *)) widths @@ -182,13 +174,15 @@ blockToMan opts (Table caption alignments widths headers rows) = let makeRow cols = text "T{" $$ (vcat $ intersperse (text "T}@T{") cols) $$ text "T}" - let colheadings' = makeRow colheadings + let colheadings' = if all null headers + then empty + else makeRow colheadings $$ char '_' body <- mapM (\row -> do cols <- mapM (blockListToMan opts) row return $ makeRow cols) rows return $ text ".PP" $$ caption' $$ text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ char '_' $$ vcat body $$ text ".TE" + colheadings' $$ vcat body $$ text ".TE" blockToMan opts (BulletList items) = do contents <- mapM (bulletListItemToMan opts) items @@ -329,8 +323,9 @@ inlineToMan opts (Image alternate (source, tit)) = do linkPart <- inlineToMan opts (Link txt (source, tit)) return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' inlineToMan _ (Note contents) = do - modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state - (notes, _) <- get + -- add to notes in state + modify $ \st -> st{ stNotes = contents : stNotes st } + notes <- liftM stNotes get let ref = show $ (length notes) return $ char '[' <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 0e1231b62..777784704 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -29,48 +29,84 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: <http://daringfireball.net/projects/markdown/> -} -module Text.Pandoc.Writers.Markdown ( writeMarkdown) where +module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared import Text.Pandoc.Blocks import Text.ParserCombinators.Parsec ( parse, GenParser ) -import Data.List ( group, isPrefixOf, drop, find, intersperse, intercalate, transpose ) +import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State type Notes = [[Block]] type Refs = KeyTable -type WriterState = (Notes, Refs) +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stPlain :: Bool } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String writeMarkdown opts document = - render $ evalState (pandocToMarkdown opts document) ([],[]) + evalState (pandocToMarkdown opts document) WriterState{ stNotes = [] + , stRefs = [] + , stPlain = False } + +-- | Convert Pandoc to plain text (like markdown, but without links, +-- pictures, or inline formatting). +writePlain :: WriterOptions -> Pandoc -> String +writePlain opts document = + evalState (pandocToMarkdown opts document') WriterState{ stNotes = [] + , stRefs = [] + , stPlain = True } + where document' = plainify document + +plainify :: Pandoc -> Pandoc +plainify = processWith go + where go :: [Inline] -> [Inline] + go (Emph xs : ys) = go xs ++ go ys + go (Strong xs : ys) = go xs ++ go ys + go (Strikeout xs : ys) = go xs ++ go ys + go (Superscript xs : ys) = go xs ++ go ys + go (Subscript xs : ys) = go xs ++ go ys + go (SmallCaps xs : ys) = go xs ++ go ys + go (Code s : ys) = Str s : go ys + go (Math _ s : ys) = Str s : go ys + go (TeX _ : ys) = Str "" : go ys + go (HtmlInline _ : ys) = Str "" : go ys + go (Link xs _ : ys) = go xs ++ go ys + go (Image _ _ : ys) = go ys + go (x : ys) = x : go ys + go [] = [] -- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMarkdown opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let header = writerHeader opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - let header' = if null header then empty else text header - metaBlock <- metaToMarkdown opts meta - let head' = if writerStandalone opts - then metaBlock $+$ header' - else empty +pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String +pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do + title' <- inlineListToMarkdown opts title + authors' <- mapM (inlineListToMarkdown opts) authors + date' <- inlineListToMarkdown opts date + let titleblock = not $ null title && null authors && null date let headerBlocks = filter isHeaderBlock blocks let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty body <- blockListToMarkdown opts blocks - (notes, _) <- get - notes' <- notesToMarkdown opts (reverse notes) - (_, refs) <- get -- note that the notes may contain refs - refs' <- keyTableToMarkdown opts (reverse refs) - return $ head' $+$ before' $+$ toc $+$ body $+$ text "" $+$ - notes' $+$ text "" $+$ refs' $+$ after' + st <- get + notes' <- notesToMarkdown opts (reverse $ stNotes st) + st' <- get -- note that the notes may contain refs + refs' <- keyTableToMarkdown opts (reverse $ stRefs st') + let main = render $ body $+$ text "" $+$ notes' $+$ text "" $+$ refs' + let context = writerVariables opts ++ + [ ("toc", render toc) + , ("body", main) + , ("title", render title') + , ("date", render date') + ] ++ + [ ("titleblock", "yes") | titleblock ] ++ + [ ("author", render a) | a <- authors' ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main -- | Return markdown representation of reference key table. keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc @@ -104,36 +140,14 @@ escapeString :: String -> String escapeString = escapeStringUsing markdownEscapes where markdownEscapes = backslashEscapes "\\`*_>#~^" --- | Convert bibliographic information into Markdown header. -metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc -metaToMarkdown _ (Meta [] [] []) = return empty -metaToMarkdown opts (Meta title authors date) = do - title' <- titleToMarkdown opts title - authors' <- authorsToMarkdown authors - date' <- dateToMarkdown date - return $ title' $+$ authors' $+$ date' $+$ text "" - -titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -titleToMarkdown _ [] = return empty -titleToMarkdown opts lst = do - contents <- inlineListToMarkdown opts lst - return $ text "% " <> contents - -authorsToMarkdown :: [String] -> State WriterState Doc -authorsToMarkdown [] = return empty -authorsToMarkdown lst = return $ - text "% " <> text (intercalate ", " (map escapeString lst)) - -dateToMarkdown :: String -> State WriterState Doc -dateToMarkdown [] = return empty -dateToMarkdown str = return $ text "% " <> text (escapeString str) - -- | Construct table of contents from list of header blocks. tableOfContents :: WriterOptions -> [Block] -> Doc tableOfContents opts headers = let opts' = opts { writerIgnoreNotes = True } contents = BulletList $ map elementToListItem $ hierarchicalize headers - in evalState (blockToMarkdown opts' contents) ([],[]) + in evalState (blockToMarkdown opts' contents) WriterState{ stNotes = [] + , stRefs = [] + , stPlain = False } -- | Converts an Element to a list item for a table of contents, elementToListItem :: Element -> [Block] @@ -183,13 +197,18 @@ blockToMarkdown opts (Para inlines) = do then char '\\' else empty return $ esc <> contents <> text "\n" -blockToMarkdown _ (RawHtml str) = return $ text str +blockToMarkdown _ (RawHtml str) = do + st <- get + if stPlain st + then return empty + else return $ text str blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n" blockToMarkdown opts (Header level inlines) = do contents <- inlineListToMarkdown opts inlines + st <- get -- use setext style headers if in literate haskell mode. -- ghc interprets '#' characters in column 1 as line number specifiers. - if writerLiterateHaskell opts + if writerLiterateHaskell opts || stPlain st then let len = length $ render contents in return $ contents <> text "\n" <> case level of @@ -204,11 +223,14 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && blockToMarkdown opts (CodeBlock _ str) = return $ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" blockToMarkdown opts (BlockQuote blocks) = do + st <- get -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if writerLiterateHaskell opts then text . (" > " ++) - else text . ("> " ++) + else if stPlain st + then text . (" " ++) + else text . ("> " ++) contents <- blockListToMarkdown opts blocks return $ (vcat $ map leader $ lines $ render contents) <> text "\n" @@ -232,20 +254,28 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do else map (floor . (78 *)) widths let makeRow = hsepBlocks . (zipWith alignHeader aligns) . (zipWith docToBlock widthsInChars) - let head' = makeRow headers' let rows' = map makeRow rawRows + let head' = makeRow headers' let maxRowHeight = maximum $ map heightOfBlock (head':rows') let underline = hsep $ map (\width -> text $ replicate width '-') widthsInChars let border = if maxRowHeight > 1 then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' - else empty + else if all null headers + then underline + else empty + let head'' = if all null headers + then empty + else border $+$ blockToDoc head' let spacer = if maxRowHeight > 1 then text "" else empty let body = vcat $ intersperse spacer $ map blockToDoc rows' - return $ (nest 2 $ border $+$ (blockToDoc head') $+$ underline $+$ body $+$ - border $+$ caption'') <> text "\n" + let bottom = if all null headers + then underline + else border + return $ (nest 2 $ head'' $+$ underline $+$ body $+$ + bottom $+$ caption'') <> text "\n" blockToMarkdown opts (BulletList items) = do contents <- mapM (bulletListItemToMarkdown opts) items return $ (vcat contents) <> text "\n" @@ -284,7 +314,8 @@ definitionListItemToMarkdown :: WriterOptions definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label let tabStop = writerTabStop opts - let leader = char ':' + st <- get + let leader = if stPlain st then empty else text " ~" contents <- liftM vcat $ mapM (liftM ((leader $$) . nest tabStop . vcat) . mapM (blockToMarkdown opts)) defs return $ labelText $+$ contents @@ -300,18 +331,18 @@ blockListToMarkdown opts blocks = -- Prefer label if possible; otherwise, generate a unique key. getReference :: [Inline] -> Target -> State WriterState [Inline] getReference label (src, tit) = do - (_,refs) <- get - case find ((== (src, tit)) . snd) refs of + st <- get + case find ((== (src, tit)) . snd) (stRefs st) of Just (ref, _) -> return ref Nothing -> do - let label' = case find ((== label) . fst) refs of + let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label case find (\n -> not (any (== [Str (show n)]) - (map fst refs))) [1..(10000 :: Integer)] of + (map fst (stRefs st)))) [1..(10000 :: Integer)] of Just x -> [Str (show x)] Nothing -> error "no unique label" Nothing -> label - modify (\(notes, refs') -> (notes, (label', (src,tit)):refs')) + modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' -- | Convert list of Pandoc inline elements to markdown. @@ -357,20 +388,18 @@ inlineToMarkdown _ (Code str) = marker = replicate (longest + 1) '`' spacer = if (longest == 0) then "" else " " in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) -inlineToMarkdown _ (Str str) = return $ text $ escapeString str +inlineToMarkdown _ (Str str) = do + st <- get + if stPlain st + then return $ text str + else return $ text $ escapeString str inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$" inlineToMarkdown _ (TeX str) = return $ text str inlineToMarkdown _ (HtmlInline str) = return $ text str inlineToMarkdown _ (LineBreak) = return $ text " \n" inlineToMarkdown _ Space = return $ char ' ' -inlineToMarkdown _ (Cite cits _ ) = do - let format (a,b) xs = text a <> - (if b /= [] then char '@' else empty) <> - text b <> - (if isEmpty xs then empty else text "; ") <> - xs - return $ char '[' <> foldr format empty cits <> char ']' +inlineToMarkdown opts (Cite _ cits) = inlineListToMarkdown opts cits inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" @@ -397,7 +426,7 @@ inlineToMarkdown opts (Image alternate (source, tit)) = do linkPart <- inlineToMarkdown opts (Link txt (source, tit)) return $ char '!' <> linkPart inlineToMarkdown _ (Note contents) = do - modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state - (notes, _) <- get - let ref = show $ (length notes) + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = show $ (length $ stNotes st) return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 1e7194621..f1e985bb7 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -32,6 +32,7 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intersect, intercalate ) import Network.URI ( isURI ) @@ -52,20 +53,18 @@ writeMediaWiki opts document = -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String pandocToMediaWiki opts (Pandoc _ blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let head' = if writerStandalone opts - then writerHeader opts - else "" - let toc = if writerTableOfContents opts - then "__TOC__\n" - else "" body <- blockListToMediaWiki opts blocks notesExist <- get >>= return . stNotes let notes = if notesExist - then "\n== Notes ==\n<references />" + then "\n<references />" else "" - return $ head' ++ before ++ toc ++ body ++ after ++ notes + let main = body ++ notes + let context = writerVariables opts ++ + [ ("body", main) ] ++ + [ ("toc", "yes") | writerTableOfContents opts ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main -- | Escape special characters for MediaWiki. escapeString :: String -> String @@ -81,6 +80,14 @@ blockToMediaWiki _ Null = return "" blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines +blockToMediaWiki opts (Para [Image txt (src,tit)]) = do + capt <- inlineListToMediaWiki opts txt + let opt = if null txt + then "" + else "|alt=" ++ if null tit then capt else tit ++ + "|caption " ++ capt + return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" + blockToMediaWiki opts (Para inlines) = do useTags <- get >>= return . stUseTags listLevel <- get >>= return . stListLevel @@ -95,7 +102,7 @@ blockToMediaWiki _ HorizontalRule = return "\n-----\n" blockToMediaWiki opts (Header level inlines) = do contents <- inlineListToMediaWiki opts inlines - let eqs = replicate (level + 1) '=' + let eqs = replicate level '=' return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do @@ -117,17 +124,27 @@ blockToMediaWiki opts (BlockQuote blocks) = do contents <- blockListToMediaWiki opts blocks return $ "<blockquote>" ++ contents ++ "</blockquote>" -blockToMediaWiki opts (Table caption aligns widths headers rows) = do +blockToMediaWiki opts (Table capt aligns widths headers rows') = do let alignStrings = map alignmentToString aligns - captionDoc <- if null caption + captionDoc <- if null capt then return "" else do - c <- inlineListToMediaWiki opts caption - return $ "<caption>" ++ c ++ "</caption>" - colHeads <- colHeadsToMediaWiki opts alignStrings widths headers - rows' <- mapM (tableRowToMediaWiki opts alignStrings) rows - return $ "<table>\n" ++ captionDoc ++ colHeads ++ vcat rows' ++ "\n</table>" - + c <- inlineListToMediaWiki opts capt + return $ "<caption>" ++ c ++ "</caption>\n" + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let coltags = if all (== 0.0) widths + then "" + else unlines $ map + (\w -> "<col width=\"" ++ percent w ++ "\" />") widths + head' <- if all null headers + then return "" + else do + hs <- tableRowToMediaWiki opts alignStrings 0 headers + return $ "<thead>\n" ++ hs ++ "\n</thead>\n" + body' <- zipWithM (tableRowToMediaWiki opts alignStrings) [1..] rows' + return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ + "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" + blockToMediaWiki opts x@(BulletList items) = do oldUseTags <- get >>= return . stUseTags let useTags = oldUseTags || not (isSimpleList x) @@ -248,25 +265,27 @@ isPlainOrPara (Plain _) = True isPlainOrPara (Para _) = True isPlainOrPara _ = False -tr :: String -> String -tr x = "<tr>\n" ++ x ++ "\n</tr>" - -- | Concatenates strings with line breaks between them. vcat :: [String] -> String vcat = intercalate "\n" -- Auxiliary functions for tables: -colHeadsToMediaWiki :: WriterOptions - -> [[Char]] - -> [Double] +tableRowToMediaWiki :: WriterOptions + -> [String] + -> Int -> [[Block]] -> State WriterState String -colHeadsToMediaWiki opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\alignment columnwidth item -> tableItemToMediaWiki opts "th" alignment columnwidth item) - alignStrings widths headers - return $ tr $ vcat heads +tableRowToMediaWiki opts alignStrings rownum cols' = do + let celltype = if rownum == 0 then "th" else "td" + let rowclass = case rownum of + 0 -> "header" + x | x `rem` 2 == 1 -> "odd" + _ -> "even" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToMediaWiki opts celltype alignment item) + alignStrings cols' + return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -275,27 +294,16 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToMediaWiki :: WriterOptions - -> [[Char]] - -> [[Block]] - -> State WriterState String -tableRowToMediaWiki opts aligns columns = - (sequence $ zipWith3 (tableItemToMediaWiki opts "td") aligns (repeat 0) columns) >>= - return . tr . vcat - tableItemToMediaWiki :: WriterOptions - -> [Char] - -> [Char] - -> Double + -> String + -> String -> [Block] -> State WriterState String -tableItemToMediaWiki opts tag' align' width' item = do +tableItemToMediaWiki opts celltype align' item = do + let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ + x ++ "</" ++ celltype ++ ">" contents <- blockListToMediaWiki opts item - let attrib = " align=\"" ++ align' ++ "\"" ++ - if width' /= 0 - then " style=\"width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;\"" - else "" - return $ "<" ++ tag' ++ attrib ++ ">" ++ contents ++ "</" ++ tag' ++ ">" + return $ mkcell contents -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options @@ -369,17 +377,15 @@ inlineToMediaWiki _ (LineBreak) = return "<br />\n" inlineToMediaWiki _ Space = return " " inlineToMediaWiki opts (Link txt (src, _)) = do - link <- inlineListToMediaWiki opts txt - let useAuto = txt == [Code src] - let src' = if isURI src - then src - else if take 1 src == "/" - then "http://{{SERVERNAME}}" ++ src - else "http://{{SERVERNAME}}/" ++ src - return $ if useAuto - then src' - else "[" ++ src' ++ " " ++ link ++ "]" - + label <- inlineListToMediaWiki opts txt + if txt == [Code src] -- autolink + then return src + else if isURI src + then return $ "[" ++ src ++ " " ++ label ++ "]" + else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + where src' = case src of + '/':xs -> xs -- with leading / it's a + _ -> src -- link to a help page inlineToMediaWiki opts (Image alt (source, tit)) = do alt' <- inlineListToMediaWiki opts alt let txt = if (null tit) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 15e7f30bd..347072cf1 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.XML +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Text.PrettyPrint.HughesPJ hiding ( Str ) import Text.Printf ( printf ) @@ -39,7 +40,6 @@ import Control.Applicative ( (<$>) ) import Control.Arrow ( (***), (>>>) ) import Control.Monad.State hiding ( when ) import Data.Char (chr) -import Data.List (intercalate) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -156,58 +156,38 @@ handleSpaces s rm ( x:xs) = char x <> rm xs rm [] = empty --- | Convert list of authors to a docbook <author> section -authorToOpenDocument :: [Char] -> Doc -authorToOpenDocument name = - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest - in inParagraphTagsWithStyle "Author" $ - (text $ escapeStringForXML firstname) <+> - (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) - in inParagraphTagsWithStyle "Author" $ - (text $ escapeStringForXML firstname) <+> - (text $ escapeStringForXML lastname) - -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: WriterOptions -> Pandoc -> String writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = - let root = inTags True "office:document-content" openDocumentNameSpaces - header = when (writerStandalone opts) $ text (writerHeader opts) - title' = case runState (wrap opts title) defaultWriterState of - (t,_) -> if isEmpty t then empty else inHeaderTags 1 t - authors' = when (authors /= []) $ vcat (map authorToOpenDocument authors) - date' = when (date /= []) $ - inParagraphTagsWithStyle "Date" (text $ escapeStringForXML date) - meta = when (writerStandalone opts) $ title' $$ authors' $$ date' - before = writerIncludeBefore opts - after = writerIncludeAfter opts - (doc, s) = runState (blocksToOpenDocument opts blocks) defaultWriterState - body = (if null before then empty else text before) $$ - doc $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "office:body" $ - inTagsIndented "office:text" (meta $$ body) - else body + let ((doc, title', authors', date'),s) = flip runState + defaultWriterState $ do + title'' <- inlinesToOpenDocument opts title + authors'' <- mapM (inlinesToOpenDocument opts) authors + date'' <- inlinesToOpenDocument opts date + doc'' <- blocksToOpenDocument opts blocks + return (doc'', title'', authors'', date'') + body' = render doc styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s - listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) + listStyle (n,l) = inTags True "text:list-style" + [("style:name", "L" ++ show n)] (vcat l) listStyles = map listStyle (stListStyles s) - in render $ header $$ root (generateStyles (styles ++ listStyles) $$ body' $$ text "") + automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $ + reverse $ styles ++ listStyles + context = writerVariables opts ++ + [ ("body", body') + , ("automatic-styles", render automaticStyles) + , ("title", render title') + , ("date", render date') ] ++ + [ ("author", render a) | a <- authors' ] + in if writerStandalone opts + then renderTemplate context $ writerTemplate opts + else body' withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b - where go i = ($$) i <$> withParagraphStyle o s bs + where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty inPreformattedTags :: String -> State WriterState Doc @@ -287,9 +267,9 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags <$> wrap o b - | Para b <- bs = inParagraphTags <$> wrap o b - | Header i b <- bs = inHeaderTags i <$> wrap o b + | Plain b <- bs = inParagraphTags <$> inlinesToOpenDocument o b + | Para b <- bs = inParagraphTags <$> inlinesToOpenDocument o b + | Header i b <- bs = inHeaderTags i <$> inlinesToOpenDocument o b | BlockQuote b <- bs = mkBlockQuote b | CodeBlock _ s <- bs = preformatted s | RawHtml _ <- bs = return empty @@ -328,7 +308,9 @@ blockToOpenDocument o bs captionDoc <- if null c then return empty else withParagraphStyle o "Caption" [Para c] - th <- colHeadsToOpenDocument o name (map fst paraHStyles) h + th <- if all null h + then return empty + else colHeadsToOpenDocument o name (map fst paraHStyles) h tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) @@ -352,12 +334,6 @@ tableItemToOpenDocument o tn (n,i) = in inTags True "table:table-cell" a <$> withParagraphStyle o n (map plainToPara i) --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> State WriterState Doc -wrap o l = if writerWrapText o - then fsep <$> mapM (inlinesToOpenDocument o) (splitBy Space l) - else inlinesToOpenDocument o l - -- | Convert a list of inline elements to OpenDocument. inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l @@ -404,22 +380,12 @@ inlineToOpenDocument o ils let footNote t = inTags False "text:note" [ ("text:id" , "ftn" ++ show n) , ("text:note-class", "footnote" )] $ - inTagsSimple "text:note-citation" (text . show $ n + 1) $$ + inTagsSimple "text:note-citation" (text . show $ n + 1) <> inTagsSimple "text:note-body" t nn <- footNote <$> withParagraphStyle o "Footnote" l addNote nn return nn -generateStyles :: [Doc] -> Doc -generateStyles acc = - let scripts = selfClosingTag "office:scripts" [] - fonts = inTagsIndented "office:font-face-decls" - (vcat $ map font ["Lucida Sans Unicode", "Tahoma", "Times New Roman"]) - font fn = selfClosingTag "style:font-face" - [ ("style:name" , "'" ++ fn ++ "'") - , ("svg:font-family", fn )] - in scripts $$ fonts $$ inTagsIndented "office:automatic-styles" (vcat $ reverse acc) - bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) bulletListStyle l = let doStyles i = inTags True "text:list-level-style-bullet" @@ -541,30 +507,3 @@ textStyleAttr s | SmallC <- s = [("fo:font-variant" ,"small-caps")] | otherwise = [] -openDocumentNameSpaces :: [(String, String)] -openDocumentNameSpaces = - [ ("xmlns:office" , "urn:oasis:names:tc:opendocument:xmlns:office:1.0" ) - , ("xmlns:style" , "urn:oasis:names:tc:opendocument:xmlns:style:1.0" ) - , ("xmlns:text" , "urn:oasis:names:tc:opendocument:xmlns:text:1.0" ) - , ("xmlns:table" , "urn:oasis:names:tc:opendocument:xmlns:table:1.0" ) - , ("xmlns:draw" , "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" ) - , ("xmlns:fo" , "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0") - , ("xmlns:xlink" , "http://www.w3.org/1999/xlink" ) - , ("xmlns:dc" , "http://purl.org/dc/elements/1.1/" ) - , ("xmlns:meta" , "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" ) - , ("xmlns:number" , "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" ) - , ("xmlns:svg" , "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" ) - , ("xmlns:chart" , "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" ) - , ("xmlns:dr3d" , "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" ) - , ("xmlns:math" , "http://www.w3.org/1998/Math/MathML" ) - , ("xmlns:form" , "urn:oasis:names:tc:opendocument:xmlns:form:1.0" ) - , ("xmlns:script" , "urn:oasis:names:tc:opendocument:xmlns:script:1.0" ) - , ("xmlns:ooo" , "http://openoffice.org/2004/office" ) - , ("xmlns:ooow" , "http://openoffice.org/2004/writer" ) - , ("xmlns:oooc" , "http://openoffice.org/2004/calc" ) - , ("xmlns:dom" , "http://www.w3.org/2001/xml-events" ) - , ("xmlns:xforms" , "http://www.w3.org/2002/xforms" ) - , ("xmlns:xsd" , "http://www.w3.org/2001/XMLSchema" ) - , ("xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" ) - , ("office:version", "1.0" ) - ] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 31c039bd7..534c34c09 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -33,7 +33,8 @@ module Text.Pandoc.Writers.RST ( writeRST) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Blocks -import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse, transpose ) +import Text.Pandoc.Templates (renderTemplate) +import Data.List ( isPrefixOf, isSuffixOf, intersperse, transpose ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State import Control.Applicative ( (<$>) ) @@ -42,7 +43,7 @@ data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: KeyTable , stImages :: KeyTable - , stIncludes :: [String] + , stHasMath :: Bool , stOptions :: WriterOptions } @@ -50,33 +51,33 @@ data WriterState = writeRST :: WriterOptions -> Pandoc -> String writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], - stImages = [], stIncludes = [], + stImages = [], stHasMath = False, stOptions = opts } - in render $ evalState (pandocToRST document) st + in evalState (pandocToRST document) st -- | Return RST representation of document. -pandocToRST :: Pandoc -> State WriterState Doc -pandocToRST (Pandoc meta blocks) = do - opts <- get >>= (return . stOptions) - let before = writerIncludeBefore opts - after = writerIncludeAfter opts - header = writerHeader opts - before' = if null before then empty else text before - after' = if null after then empty else text after - header' = if null header then empty else text header - metaBlock <- metaToRST opts meta - let head' = if (writerStandalone opts) - then metaBlock $+$ header' - else empty +pandocToRST :: Pandoc -> State WriterState String +pandocToRST (Pandoc (Meta tit auth dat) blocks) = do + opts <- liftM stOptions get + title <- titleToRST tit + authors <- mapM inlineListToRST auth + date <- inlineListToRST dat body <- blockListToRST blocks - includes <- get >>= (return . concat . stIncludes) - let includes' = if null includes then empty else text includes - notes <- get >>= (notesToRST . reverse . stNotes) + notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first - refs <- get >>= (keyTableToRST . reverse . stLinks) - pics <- get >>= (pictTableToRST . reverse . stImages) - return $ head' $+$ before' $+$ includes' $+$ body $+$ notes $+$ text "" $+$ - refs $+$ pics $+$ after' + refs <- liftM (reverse . stLinks) get >>= keyTableToRST + pics <- liftM (reverse . stImages) get >>= pictTableToRST + hasMath <- liftM stHasMath get + let main = render $ body $+$ notes $+$ text "" $+$ refs $+$ pics + let context = writerVariables opts ++ + [ ("body", main) + , ("title", render title) + , ("date", render date) ] ++ + [ ("math", "yes") | hasMath ] ++ + [ ("author", render a) | a <- authors ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main -- | Return RST representation of reference key table. keyTableToRST :: KeyTable -> State WriterState Doc @@ -129,35 +130,13 @@ wrappedRST opts inlines = do escapeString :: String -> String escapeString = escapeStringUsing (backslashEscapes "`\\|*_") --- | Convert bibliographic information into RST header. -metaToRST :: WriterOptions -> Meta -> State WriterState Doc -metaToRST _ (Meta [] [] []) = return empty -metaToRST opts (Meta title authors date) = do - title' <- titleToRST title - authors' <- authorsToRST authors - date' <- dateToRST date - let toc = if writerTableOfContents opts - then text "" $+$ text ".. contents::" - else empty - return $ title' $+$ authors' $+$ date' $+$ toc $+$ text "" - titleToRST :: [Inline] -> State WriterState Doc titleToRST [] = return empty titleToRST lst = do contents <- inlineListToRST lst let titleLength = length $ render contents let border = text (replicate titleLength '=') - return $ border $+$ contents $+$ border <> text "\n" - -authorsToRST :: [String] -> State WriterState Doc -authorsToRST [] = return empty -authorsToRST (first:rest) = do - rest' <- authorsToRST rest - return $ (text ":Author: " <> text first) $+$ rest' - -dateToRST :: String -> State WriterState Doc -dateToRST [] = return empty -dateToRST str = return $ text ":Date: " <> text (escapeString str) + return $ border $+$ contents $+$ border -- | Convert Pandoc block element to RST. blockToRST :: Block -- ^ Block element @@ -166,6 +145,12 @@ blockToRST Null = return empty blockToRST (Plain inlines) = do opts <- get >>= (return . stOptions) wrappedRST opts inlines +blockToRST (Para [Image txt (src,tit)]) = do + capt <- inlineListToRST txt + let fig = text "figure:: " <> text src + let align = text ":align: center" + let alt = text ":alt: " <> if null tit then capt else text tit + return $ (text ".. " <> (fig $$ align $$ alt $$ text "" $$ capt)) $$ text "" blockToRST (Para inlines) = do opts <- get >>= (return . stOptions) contents <- wrappedRST opts inlines @@ -220,8 +205,10 @@ blockToRST (Table caption _ widths headers rows) = do map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '+' let body = vcat $ intersperse (border '-') $ map blockToDoc rows' - return $ border '-' $+$ blockToDoc head' $+$ border '=' $+$ body $+$ - border '-' $$ caption'' $$ text "" + let head'' = if all null headers + then empty + else blockToDoc head' $+$ border '=' + return $ border '-' $+$ head'' $+$ body $+$ border '-' $$ caption'' $$ text "" blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line @@ -306,12 +293,7 @@ inlineToRST Ellipses = return $ text "..." inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do - includes <- get >>= (return . stIncludes) - let rawMathRole = ".. role:: math(raw)\n" ++ - " :format: html latex\n" - if not (rawMathRole `elem` includes) - then modify $ \st -> st { stIncludes = rawMathRole : includes } - else return () + modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath then text $ ":math:`$" ++ str ++ "$`" else text $ ":math:`$$" ++ str ++ "$$`" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 15bac115d..c0c3d0536 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -31,23 +31,29 @@ module Text.Pandoc.Writers.RTF ( writeRTF ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Templates (renderTemplate) import Data.List ( isSuffixOf, intercalate ) import Data.Char ( ord, isDigit ) -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta blocks) = - let head' = if writerStandalone options - then rtfHeader (writerHeader options) meta - else "" - toc = if writerTableOfContents options - then tableOfContents $ filter isHeaderBlock blocks - else "" - foot = if writerStandalone options then "\n}\n" else "" - body = writerIncludeBefore options ++ - concatMap (blockToRTF 0 AlignDefault) blocks ++ - writerIncludeAfter options - in head' ++ toc ++ body ++ foot +writeRTF options (Pandoc (Meta title authors date) blocks) = + let titletext = inlineListToRTF title + authorstext = map inlineListToRTF authors + datetext = inlineListToRTF date + spacer = not $ all null $ titletext : datetext : authorstext + body = concatMap (blockToRTF 0 AlignDefault) blocks + context = writerVariables options ++ + [ ("body", body) + , ("title", titletext) + , ("date", datetext) ] ++ + [ ("author", a) | a <- authorstext ] ++ + [ ("spacer", "yes") | spacer ] ++ + [ ("toc", tableOfContents $ filter isHeaderBlock blocks) | + writerTableOfContents options ] + in if writerStandalone options + then renderTemplate context $ writerTemplate options + else body -- | Construct table of contents from list of header blocks. tableOfContents :: [Block] -> String @@ -139,27 +145,6 @@ orderedMarkers indent (start, style, delim) = _ -> orderedListMarkers (start, LowerAlpha, Period) else orderedListMarkers (start, style, delim) --- | Returns RTF header. -rtfHeader :: String -- ^ header text - -> Meta -- ^ bibliographic information - -> String -rtfHeader headerText (Meta title authors date) = - let titletext = if null title - then "" - else rtfPar 0 0 AlignCenter $ - "\\b \\fs36 " ++ inlineListToRTF title - authorstext = if null authors - then "" - else rtfPar 0 0 AlignCenter (" " ++ (intercalate "\\" $ - map stringToRTF authors)) - datetext = if date == "" - then "" - else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in - let spacer = if null (titletext ++ authorstext ++ datetext) - then "" - else rtfPar 0 0 AlignDefault "" in - headerText ++ titletext ++ authorstext ++ datetext ++ spacer - -- | Convert Pandoc block element to RTF. blockToRTF :: Int -- ^ indent level -> Alignment -- ^ alignment @@ -186,7 +171,9 @@ blockToRTF indent _ HorizontalRule = blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst blockToRTF indent alignment (Table caption aligns sizes headers rows) = - tableRowToRTF True indent aligns sizes headers ++ + (if all null headers + then "" + else tableRowToRTF True indent aligns sizes headers) ++ concatMap (tableRowToRTF False indent aligns sizes) rows ++ rtfPar indent 0 alignment (inlineListToRTF caption) diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs index 6f528503a..1dff06e62 100644 --- a/src/Text/Pandoc/Writers/S5.hs +++ b/src/Text/Pandoc/Writers/S5.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} {- Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> @@ -30,66 +29,46 @@ Definitions for creation of S5 powerpoint-like HTML. (See <http://meyerweb.com/eric/tools/s5/>.) -} module Text.Pandoc.Writers.S5 ( - -- * Strings - s5Meta, - s5Javascript, - s5CSS, + -- * Header includes + s5HeaderIncludes, s5Links, -- * Functions writeS5, writeS5String, insertS5Structure ) where -import Text.Pandoc.Shared ( WriterOptions ) -import Text.Pandoc.TH ( contentsOf ) +import Text.Pandoc.Shared ( WriterOptions, readDataFile ) import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) import Text.Pandoc.Definition import Text.XHtml.Strict import System.FilePath ( (</>) ) import Data.List ( intercalate ) +s5HeaderIncludes :: Maybe FilePath -> IO String +s5HeaderIncludes datadir = do + c <- s5CSS datadir + j <- s5Javascript datadir + return $ s5Meta ++ c ++ j + s5Meta :: String s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n" -s5Javascript :: String -#ifndef __HADDOCK__ -s5Javascript = "<script type=\"text/javascript\">\n" ++ - $(contentsOf $ "data" </> "ui" </> "default" </> "slides.js.comment") ++ - $(contentsOf $ "data" </> "ui" </> "default" </> "slides.js.packed") ++ "</script>\n" -#endif - -s5CoreCSS :: String -#ifndef __HADDOCK__ -s5CoreCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "s5-core.css") -#endif - -s5FramingCSS :: String -#ifndef __HADDOCK__ -s5FramingCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "framing.css") -#endif - -s5PrettyCSS :: String -#ifndef __HADDOCK__ -s5PrettyCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "pretty.css") -#endif - -s5OperaCSS :: String -#ifndef __HADDOCK__ -s5OperaCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "opera.css") -#endif - -s5OutlineCSS :: String -#ifndef __HADDOCK__ -s5OutlineCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "outline.css") -#endif - -s5PrintCSS :: String -#ifndef __HADDOCK__ -s5PrintCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "print.css") -#endif - -s5CSS :: String -s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" +s5Javascript :: Maybe FilePath -> IO String +s5Javascript datadir = do + jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment" + jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed" + return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++ + "</script>\n" + +s5CSS :: Maybe FilePath -> IO String +s5CSS datadir = do + s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css" + s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css" + s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css" + s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css" + s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css" + s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css" + return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" s5Links :: String s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n" @@ -104,9 +83,9 @@ writeS5String options = (writeHtmlString options) . insertS5Structure -- | Inserts HTML needed for an S5 presentation (e.g. around slides). layoutDiv :: [Inline] -- ^ Title of document (for header or footer) - -> String -- ^ Date of document (for header or footer) + -> [Inline] -- ^ Date of document (for header or footer) -> [Block] -- ^ List of block elements returned -layoutDiv title' date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title'), (RawHtml "</div>\n</div>\n")] +layoutDiv title' date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 date), (Header 2 title'), (RawHtml "</div>\n</div>\n")] presentationStart :: Block presentationStart = RawHtml "<div class=\"presentation\">\n\n" @@ -149,8 +128,8 @@ insertS5Structure (Pandoc (Meta title' authors date) blocks) = let slides = insertSlides True blocks firstSlide = if not (null title') then [slideStart, (Header 1 title'), - (Header 3 [Str (intercalate ", " authors)]), - (Header 4 [Str date]), slideEnd] + (Header 3 (intercalate [LineBreak] authors)), + (Header 4 date), slideEnd] else [] newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 5b706d24b..47a318631 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -30,17 +30,19 @@ Conversion of 'Pandoc' format into Texinfo. module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) import Data.List ( isSuffixOf, transpose, maximumBy ) import Data.Ord ( comparing ) import Data.Char ( chr, ord ) -import qualified Data.Set as S import Control.Monad.State import Text.PrettyPrint.HughesPJ hiding ( Str ) data WriterState = - WriterState { stIncludes :: S.Set String -- strings to include in header + WriterState { stStrikeout :: Bool -- document contains strikeout + , stSuperscript :: Bool -- document contains superscript + , stSubscript :: Bool -- document contains subscript } {- TODO: @@ -48,79 +50,39 @@ data WriterState = - generated .texi files don't work when run through texi2dvi -} --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do - st <- get - let includes = stIncludes st - put st {stIncludes = S.insert str includes} - -- | Convert Pandoc to Texinfo. writeTexinfo :: WriterOptions -> Pandoc -> String writeTexinfo options document = - render $ evalState (pandocToTexinfo options $ wrapTop document) $ - WriterState { stIncludes = S.empty } + evalState (pandocToTexinfo options $ wrapTop document) $ + WriterState { stStrikeout = False, stSuperscript = False, stSubscript = False } -- | Add a "Top" node around the document, needed by Texinfo. wrapTop :: Pandoc -> Pandoc wrapTop (Pandoc (Meta title authors date) blocks) = Pandoc (Meta title authors date) (Header 0 title : blocks) -pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToTexinfo options (Pandoc meta blocks) = do - main <- blockListToTexinfo blocks - head' <- if writerStandalone options - then texinfoHeader options meta - else return empty - let before = if null (writerIncludeBefore options) - then empty - else text (writerIncludeBefore options) - let after = if null (writerIncludeAfter options) - then empty - else text (writerIncludeAfter options) - let body = before $$ main $$ after - -- XXX toc untested - let toc = if writerTableOfContents options - then text "@contents" - else empty - let foot = if writerStandalone options - then text "@bye" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into Texinfo header. -texinfoHeader :: WriterOptions -- ^ Options, including Texinfo header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -texinfoHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else do - t <- inlineListToTexinfo title - return $ text "@title " <> t - headerIncludes <- get >>= return . S.toList . stIncludes - let extras = text $ unlines headerIncludes - let authorstext = map makeAuthor authors - let datetext = if date == "" - then empty - else text $ stringToTexinfo date - - let baseHeader = case writerHeader options of - "" -> empty - x -> text x - let header = text "@documentencoding utf-8" $$ baseHeader $$ extras - return $ text "\\input texinfo" $$ - header $$ - text "@ifnottex" $$ - text "@paragraphindent 0" $$ - text "@end ifnottex" $$ - text "@titlepage" $$ - titletext $$ vcat authorstext $$ - datetext $$ - text "@end titlepage" - -makeAuthor :: String -> Doc -makeAuthor author = text $ "@author " ++ (stringToTexinfo author) +pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String +pandocToTexinfo options (Pandoc (Meta title authors date) blocks) = do + titleText <- inlineListToTexinfo title + authorsText <- mapM inlineListToTexinfo authors + dateText <- inlineListToTexinfo date + let titlePage = not $ all null $ title : date : authors + main <- blockListToTexinfo blocks + st <- get + let body = render main + let context = writerVariables options ++ + [ ("body", body) + , ("title", render titleText) + , ("date", render dateText) ] ++ + [ ("toc", "yes") | writerTableOfContents options ] ++ + [ ("titlepage", "yes") | titlePage ] ++ + [ ("subscript", "yes") | stSubscript st ] ++ + [ ("superscript", "yes") | stSuperscript st ] ++ + [ ("strikeout", "yes") | stStrikeout st ] ++ + [ ("author", render a) | a <- authorsText ] + if writerStandalone options + then return $ renderTemplate context $ writerTemplate options + else return body -- | Escape things as needed for Texinfo. stringToTexinfo :: String -> String @@ -145,6 +107,12 @@ blockToTexinfo Null = return empty blockToTexinfo (Plain lst) = inlineListToTexinfo lst +blockToTexinfo (Para [Image txt (src,tit)]) = do + capt <- inlineListToTexinfo txt + img <- inlineToTexinfo (Image txt (src,tit)) + return $ text "@float" $$ img $$ (text "@caption{" <> capt <> char '}') $$ + text "@end float" + blockToTexinfo (Para lst) = inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo @@ -223,7 +191,9 @@ blockToTexinfo (Header level lst) = do seccmd _ = error "illegal seccmd level" blockToTexinfo (Table caption aligns widths heads rows) = do - headers <- tableHeadToTexinfo aligns heads + headers <- if all null heads + then return empty + else tableHeadToTexinfo aligns heads captionText <- inlineListToTexinfo caption rowsText <- mapM (tableRowToTexinfo aligns) rows colDescriptors <- @@ -395,33 +365,17 @@ inlineToTexinfo (Strong lst) = inlineListToTexinfo lst >>= return . inCmd "strong" inlineToTexinfo (Strikeout lst) = do - addToHeader $ "@macro textstrikeout{text}\n" ++ - "~~\\text\\~~\n" ++ - "@end macro\n" + modify $ \st -> st{ stStrikeout = True } contents <- inlineListToTexinfo lst return $ text "@textstrikeout{" <> contents <> text "}" inlineToTexinfo (Superscript lst) = do - addToHeader $ "@macro textsuperscript{text}\n" ++ - "@iftex\n" ++ - "@textsuperscript{\\text\\}\n" ++ - "@end iftex\n" ++ - "@ifnottex\n" ++ - "^@{\\text\\@}\n" ++ - "@end ifnottex\n" ++ - "@end macro\n" + modify $ \st -> st{ stSuperscript = True } contents <- inlineListToTexinfo lst return $ text "@textsuperscript{" <> contents <> char '}' inlineToTexinfo (Subscript lst) = do - addToHeader $ "@macro textsubscript{text}\n" ++ - "@iftex\n" ++ - "@textsubscript{\\text\\}\n" ++ - "@end iftex\n" ++ - "@ifnottex\n" ++ - "_@{\\text\\@}\n" ++ - "@end ifnottex\n" ++ - "@end macro\n" + modify $ \st -> st{ stSubscript = True } contents <- inlineListToTexinfo lst return $ text "@textsubscript{" <> contents <> char '}' diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index a5d0202e5..68c5c3c5c 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -53,12 +53,11 @@ escapeCharForXML x = case x of '<' -> "<" '>' -> ">" '"' -> """ - '\160' -> " " c -> [c] -- | True if the character needs to be escaped. needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"\160" +needsEscaping c = c `elem` "&<>\"" -- | Escape string as needed for XML. Entity references are not preserved. escapeStringForXML :: String -> String |