diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2018-04-24 10:49:04 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2018-04-24 10:49:04 -0700 |
commit | a080dde1efb823e6e25e6ba0ead2afeb76012e43 (patch) | |
tree | 6165e39a24544d1387a201790541147e0f7478ab /src/Text/Pandoc/Writers | |
parent | a9ae23fa15d769ab9b05f483c8511e96cc684403 (diff) | |
parent | de5ee82ed0e287ada3a5b272d8365a04fe8e9f95 (diff) |
Merge tag 'upstream/2.1.2_dfsg'
Upstream version 2.1.2~dfsg
# gpg: Signature made Tue 24 Apr 2018 10:48:48 AM MST
# gpg: using RSA key 9B917007AE030E36E4FC248B695B7AE4BF066240
# gpg: issuer "spwhitton@spwhitton.name"
# gpg: Good signature from "Sean Whitton <spwhitton@spwhitton.name>" [ultimate]
# Primary key fingerprint: 8DC2 487E 51AB DD90 B5C4 753F 0F56 D055 3B6D 411B
# Subkey fingerprint: 9B91 7007 AE03 0E36 E4FC 248B 695B 7AE4 BF06 6240
Diffstat (limited to 'src/Text/Pandoc/Writers')
36 files changed, 11217 insertions, 4178 deletions
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e9d3dccf1..f91fa8fa0 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,49 +37,59 @@ that it has omitted the construct. AsciiDoc: <http://www.methods.co.nz/asciidoc/> -} module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where +import Control.Monad.State.Strict +import Data.Aeson (Result (..), Value (String), fromJSON, toJSON) +import Data.Char (isPunctuation, isSpace) +import Data.List (intercalate, intersperse, stripPrefix) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isJust) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, space) -import Data.Maybe (fromMaybe) -import Data.List ( stripPrefix, intersperse, intercalate ) import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Control.Monad.State -import qualified Data.Map as M -import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) -import qualified Data.Text as T -import Data.Char (isSpace, isPunctuation) +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared -data WriterState = WriterState { defListMarker :: String +data WriterState = WriterState { defListMarker :: String , orderedListLevel :: Int , bulletListLevel :: Int , intraword :: Bool + , autoIds :: Set.Set String } -- | Convert Pandoc to AsciiDoc. -writeAsciiDoc :: WriterOptions -> Pandoc -> String +writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeAsciiDoc opts document = - evalState (pandocToAsciiDoc opts document) WriterState{ + evalStateT (pandocToAsciiDoc opts document) WriterState{ defListMarker = "::" , orderedListLevel = 1 , bulletListLevel = 1 , intraword = False + , autoIds = Set.empty } +type ADW = StateT WriterState + -- | Return asciidoc representation of document. -pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String +pandocToAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> ADW m Text pandocToAsciiDoc opts (Pandoc meta blocks) = do let titleblock = not $ null (docTitle meta) && null (docAuthors meta) && null (docDate meta) let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToAsciiDoc opts) - (fmap (render colwidth) . inlineListToAsciiDoc opts) + (fmap render' . blockListToAsciiDoc opts) + (fmap render' . inlineListToAsciiDoc opts) meta let addTitleLine (String t) = String $ t <> "\n" <> T.replicate (T.length t) "=" @@ -93,12 +103,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do let context = defField "body" main $ defField "toc" (writerTableOfContents opts && - writerTemplate opts /= Nothing) - $ defField "titleblock" titleblock - $ metadata' + isJust (writerTemplate opts)) + $defField "titleblock" titleblock metadata' case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for AsciiDoc. escapeString :: String -> String @@ -118,18 +127,19 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker beginsWithOrderedListMarker :: String -> Bool beginsWithOrderedListMarker str = case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True + Left _ -> False + Right _ -> True -- | Convert Pandoc block element to asciidoc. -blockToAsciiDoc :: WriterOptions -- ^ Options +blockToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> ADW m Doc blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do +blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines @@ -145,9 +155,11 @@ blockToAsciiDoc opts (LineBlock lns) = do let joinWithLinefeeds = nowrap . mconcat . intersperse cr contents <- joinWithLinefeeds <$> mapM docify lns return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline -blockToAsciiDoc _ (RawBlock f s) +blockToAsciiDoc _ b@(RawBlock f s) | f == "asciidoc" = return $ text s - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToAsciiDoc _ HorizontalRule = return $ blankline <> text "'''''" <> blankline blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do @@ -155,21 +167,25 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do let len = offset contents -- ident seem to be empty most of the time and asciidoc will generate them automatically -- so lets make them not show up when null - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + ids <- gets autoIds + let autoId = uniqueIdent inlines ids + modify $ \st -> st{ autoIds = Set.insert autoId ids } + let identifier = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + then empty else "[[" <> text ident <> "]]" let setext = writerSetextHeaders opts - return $ + return (if setext then identifier $$ contents $$ (case level of - 1 -> text $ replicate len '-' - 2 -> text $ replicate len '~' - 3 -> text $ replicate len '^' - 4 -> text $ replicate len '+' - _ -> empty) <> blankline + 1 -> text $ replicate len '-' + 2 -> text $ replicate len '~' + 3 -> text $ replicate len '^' + 4 -> text $ replicate len '+' + _ -> empty) <> blankline else identifier $$ text (replicate level '=') <> space <> contents <> blankline) -blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $ +blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush ( if null classes then "...." $$ text str $$ "...." else attrs $$ "----" $$ text str $$ "----") @@ -194,7 +210,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do let isSimple = all (== 0) widths let relativePercentWidths = if isSimple then widths - else map (/ (sum widths)) widths + else map (/ sum widths) widths let widths'' :: [Integer] widths'' = map (floor . (* 100)) relativePercentWidths -- ensure that the widths sum to 100 @@ -210,7 +226,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do AlignCenter -> "^" AlignRight -> ">" AlignDefault -> "") ++ - if wi == 0 then "" else (show wi ++ "%") + if wi == 0 then "" else show wi ++ "%" let headerspec = if all null headers then empty else text "options=\"header\"," @@ -256,21 +272,21 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $ - zip markers' items + contents <- zipWithM (orderedListItemToAsciiDoc opts) markers' items return $ cat contents <> blankline blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline blockToAsciiDoc opts (Div (ident,_,_) bs) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else ("[[" <> text ident <> "]]") contents <- blockListToAsciiDoc opts bs return $ identifier $$ contents -- | Convert bullet list item (list of blocks) to asciidoc. -bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToAsciiDoc :: PandocMonad m + => WriterOptions -> [Block] -> ADW m Doc bulletListItemToAsciiDoc opts blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc + let addBlock :: PandocMonad m => Doc -> Block -> ADW m Doc addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x @@ -278,7 +294,7 @@ bulletListItemToAsciiDoc opts blocks = do return $ d <> cr <> chomp x addBlock d b = do x <- blockToAsciiDoc opts b return $ d <> cr <> text "+" <> cr <> chomp x - lev <- bulletListLevel `fmap` get + lev <- gets bulletListLevel modify $ \s -> s{ bulletListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } @@ -286,37 +302,38 @@ bulletListItemToAsciiDoc opts blocks = do return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. -orderedListItemToAsciiDoc :: WriterOptions -- ^ options +orderedListItemToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ options -> String -- ^ list item marker -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> ADW m Doc orderedListItemToAsciiDoc opts marker blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc - addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b + let addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x addBlock d b = do x <- blockToAsciiDoc opts b return $ d <> cr <> text "+" <> cr <> chomp x - lev <- orderedListLevel `fmap` get + lev <- gets orderedListLevel modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. -definitionListItemToAsciiDoc :: WriterOptions +definitionListItemToAsciiDoc :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> State WriterState Doc + -> ADW m Doc definitionListItemToAsciiDoc opts (label, defs) = do labelText <- inlineListToAsciiDoc opts label - marker <- defListMarker `fmap` get + marker <- gets defListMarker if marker == "::" then modify (\st -> st{ defListMarker = ";;"}) else modify (\st -> st{ defListMarker = "::"}) let divider = cr <> text "+" <> cr - let defsToAsciiDoc :: [Block] -> State WriterState Doc + let defsToAsciiDoc :: PandocMonad m => [Block] -> ADW m Doc defsToAsciiDoc ds = (vcat . intersperse divider . map chomp) `fmap` mapM (blockToAsciiDoc opts) ds defs' <- mapM defsToAsciiDoc defs @@ -325,15 +342,16 @@ definitionListItemToAsciiDoc opts (label, defs) = do return $ labelText <> text marker <> cr <> contents <> cr -- | Convert list of Pandoc block elements to asciidoc. -blockListToAsciiDoc :: WriterOptions -- ^ Options +blockListToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> ADW m Doc blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks data SpacyLocation = End | Start -- | Convert list of Pandoc inline elements to asciidoc. -inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToAsciiDoc :: PandocMonad m => WriterOptions -> [Inline] -> ADW m Doc inlineListToAsciiDoc opts lst = do oldIntraword <- gets intraword setIntraword False @@ -369,14 +387,14 @@ inlineListToAsciiDoc opts lst = do isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c isSpacy _ _ = False -setIntraword :: Bool -> State WriterState () +setIntraword :: PandocMonad m => Bool -> ADW m () setIntraword b = modify $ \st -> st{ intraword = b } -withIntraword :: State WriterState a -> State WriterState a +withIntraword :: PandocMonad m => ADW m a -> ADW m a withIntraword p = setIntraword True *> p <* setIntraword False -- | Convert Pandoc inline element to asciidoc. -inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc +inlineToAsciiDoc :: PandocMonad m => WriterOptions -> Inline -> ADW m Doc inlineToAsciiDoc opts (Emph lst) = do contents <- inlineListToAsciiDoc opts lst isIntraword <- gets intraword @@ -408,16 +426,19 @@ inlineToAsciiDoc _ (Math InlineMath str) = return $ "latexmath:[$" <> text str <> "$]" inlineToAsciiDoc _ (Math DisplayMath str) = return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline f s) +inlineToAsciiDoc _ il@(RawInline f s) | f == "asciidoc" = return $ text s + | otherwise = do + report $ InlineNotRendered il + return empty | otherwise = return empty -inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr +inlineToAsciiDoc _ LineBreak = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts SoftBreak = case writerWrapText opts of - WrapAuto -> return space + WrapAuto -> return space WrapPreserve -> return cr - WrapNone -> return space + WrapNone -> return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- relative: link:downloads/foo.zip[download foo.zip] @@ -431,20 +452,20 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True - _ -> False + _ -> False return $ if useAuto then text srcSuffix else prefix <> text src <> "[" <> linktext <> "]" inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] - let txt = if (null alternate) || (alternate == [Str ""]) + let txt = if null alternate || (alternate == [Str ""]) then [Str "image"] else alternate linktext <- inlineListToAsciiDoc opts txt let linktitle = if null tit then empty else ",title=\"" <> text tit <> "\"" - showDim dir = case (dimension dir attr) of + showDim dir = case dimension dir attr of Just (Percent a) -> ["scaledwidth=" <> text (show (Percent a))] Just dim -> @@ -464,6 +485,6 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" inlineToAsciiDoc opts (Span (ident,_,_) ils) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else ("[[" <> text ident <> "]]") contents <- inlineListToAsciiDoc opts ils return $ identifier <> contents diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 88a92eb47..7a6eb2948 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.CommonMark - Copyright : Copyright (C) 2015 John MacFarlane + Copyright : Copyright (C) 2015-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,34 +32,43 @@ CommonMark: <http://commonmark.org> -} module Text.Pandoc.Writers.CommonMark (writeCommonMark) where -import Text.Pandoc.Writers.HTML (writeHtmlString) +import CMarkGFM +import Control.Monad.State.Strict (State, get, modify, runState) +import Data.Foldable (foldrM) +import Data.List (transpose) +import Data.Monoid (Any (..), (<>)) +import Data.Text (Text) +import qualified Data.Text as T +import Network.HTTP (urlEncode) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Text.Pandoc.Shared (isTightList, linesToPara) +import Text.Pandoc.Options +import Text.Pandoc.Shared (isTightList, linesToPara, substitute) import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import CMark -import qualified Data.Text as T -import Control.Monad.Identity (runIdentity, Identity) -import Control.Monad.State (runState, State, modify, get) -import Text.Pandoc.Walk (walkM) -- | Convert Pandoc to CommonMark. -writeCommonMark :: WriterOptions -> Pandoc -> String -writeCommonMark opts (Pandoc meta blocks) = rendered - where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') - (blocks', notes) = runState (walkM processNotes blocks) [] - notes' = if null notes - then [] - else [OrderedList (1, Decimal, Period) $ reverse notes] - metadata = runIdentity $ metaToJSON opts - (blocksToCommonMark opts) - (inlinesToCommonMark opts) - meta - context = defField "body" main $ metadata - rendered = case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context +writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeCommonMark opts (Pandoc meta blocks) = do + let (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + main <- blocksToCommonMark opts (blocks' ++ notes') + metadata <- metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +softBreakToSpace :: Inline -> Inline +softBreakToSpace SoftBreak = Space +softBreakToSpace x = x processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do @@ -70,111 +80,235 @@ processNotes x = return x node :: NodeType -> [Node] -> Node node = Node Nothing -blocksToCommonMark :: WriterOptions -> [Block] -> Identity String -blocksToCommonMark opts bs = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node DOCUMENT (blocksToNodes bs) - where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing +blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text +blocksToCommonMark opts bs = do + let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + nodes <- blocksToNodes opts bs + return $ T.stripEnd $ + nodeToCommonmark cmarkOpts colwidth $ + node DOCUMENT nodes -inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text inlinesToCommonMark opts ils = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node PARAGRAPH (inlinesToNodes ils) + nodeToCommonmark cmarkOpts colwidth $ + node PARAGRAPH (inlinesToNodes opts ils) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing -blocksToNodes :: [Block] -> [Node] -blocksToNodes = foldr blockToNodes [] - -blockToNodes :: Block -> [Node] -> [Node] -blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (LineBlock lns) = blockToNodes $ linesToPara lns -blockToNodes (CodeBlock (_,classes,_) xs) = - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) -blockToNodes (RawBlock fmt xs) - | fmt == Format "html" = (node (HTML_BLOCK (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] :) -blockToNodes (BlockQuote bs) = - (node BLOCK_QUOTE (blocksToNodes bs) :) -blockToNodes (BulletList items) = - (node (LIST ListAttributes{ - listType = BULLET_LIST, - listDelim = PERIOD_DELIM, - listTight = isTightList items, - listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes (OrderedList (start, _sty, delim) items) = - (node (LIST ListAttributes{ - listType = ORDERED_LIST, - listDelim = case delim of - OneParen -> PAREN_DELIM - TwoParens -> PAREN_DELIM - _ -> PERIOD_DELIM, - listTight = isTightList items, - listStart = start }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes HorizontalRule = (node THEMATIC_BREAK [] :) -blockToNodes (Header lev _ ils) = (node (HEADING lev) (inlinesToNodes ils) :) -blockToNodes (Div _ bs) = (blocksToNodes bs ++) -blockToNodes (DefinitionList items) = blockToNodes (BulletList items') +blocksToNodes :: PandocMonad m => WriterOptions -> [Block] -> m [Node] +blocksToNodes opts = foldrM (blockToNodes opts) [] + +blockToNodes :: PandocMonad m => WriterOptions -> Block -> [Node] -> m [Node] +blockToNodes opts (Plain xs) ns = + return (node PARAGRAPH (inlinesToNodes opts xs) : ns) +blockToNodes opts (Para xs) ns = + return (node PARAGRAPH (inlinesToNodes opts xs) : ns) +blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns +blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) +blockToNodes opts (RawBlock fmt xs) ns + | fmt == Format "html" && isEnabled Ext_raw_html opts + = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + | fmt == Format "latex" || fmt == Format "tex" && isEnabled Ext_raw_tex opts + = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + | otherwise = return ns +blockToNodes opts (BlockQuote bs) ns = do + nodes <- blocksToNodes opts bs + return (node BLOCK_QUOTE nodes : ns) +blockToNodes opts (BulletList items) ns = do + nodes <- mapM (blocksToNodes opts) items + return (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM) nodes) : ns) +blockToNodes opts (OrderedList (start, _sty, delim) items) ns = do + nodes <- mapM (blocksToNodes opts) items + return (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM) nodes) : ns) +blockToNodes _ HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) +blockToNodes opts (Header lev _ ils) ns = + return (node (HEADING lev) (inlinesToNodes opts ils) : ns) +blockToNodes opts (Div attr bs) ns = do + nodes <- blocksToNodes opts bs + let op = tagWithAttributes opts True False "div" attr + if isEnabled Ext_raw_html opts + then return (node (HTML_BLOCK op) [] : nodes ++ + [node (HTML_BLOCK (T.pack "</div>")) []] ++ ns) + else return (nodes ++ ns) +blockToNodes opts (DefinitionList items) ns = + blockToNodes opts (BulletList items') ns where items' = map dlToBullet items - dlToBullet (term, ((Para xs : ys) : zs)) = + dlToBullet (term, (Para xs : ys) : zs) = Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, ((Plain xs : ys) : zs)) = + dlToBullet (term, (Plain xs : ys) : zs) = Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs dlToBullet (term, xs) = Para term : concat xs -blockToNodes t@(Table _ _ _ _ _) = - (node (HTML_BLOCK (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) -blockToNodes Null = id - -inlinesToNodes :: [Inline] -> [Node] -inlinesToNodes = foldr inlineToNodes [] - -inlineToNodes :: Inline -> [Node] -> [Node] -inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) -inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) -inlineToNodes LineBreak = (node LINEBREAK [] :) -inlineToNodes SoftBreak = (node SOFTBREAK [] :) -inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) -inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) -inlineToNodes (Strikeout xs) = - ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) -inlineToNodes (Superscript xs) = - ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++ +blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do + let allcells = concat (headers:rows) + let isLineBreak LineBreak = Any True + isLineBreak _ = Any False + let isPlainOrPara [Para _] = True + isPlainOrPara [Plain _] = True + isPlainOrPara [] = True + isPlainOrPara _ = False + let isSimple = all isPlainOrPara allcells && + not ( getAny (query isLineBreak allcells) ) + if isEnabled Ext_pipe_tables opts && isSimple + then do + -- We construct a table manually as a CUSTOM_BLOCK, for + -- two reasons: (1) cmark-gfm currently doesn't support + -- rendering TABLE nodes; (2) we can align the column sides; + -- (3) we can render the caption as a regular paragraph. + let capt' = node PARAGRAPH (inlinesToNodes opts capt) + -- backslash | in code and raw: + let fixPipe (Code attr xs) = + Code attr (substitute "|" "\\|" xs) + fixPipe (RawInline format xs) = + RawInline format (substitute "|" "\\|" xs) + fixPipe x = x + let toCell [Plain ils] = T.strip + $ nodeToCommonmark [] Nothing + $ node (CUSTOM_INLINE mempty mempty) + $ inlinesToNodes opts + $ walk (fixPipe . softBreakToSpace) ils + toCell [Para ils] = T.strip + $ nodeToCommonmark [] Nothing + $ node (CUSTOM_INLINE mempty mempty) + $ inlinesToNodes opts + $ walk (fixPipe . softBreakToSpace) ils + toCell [] = "" + toCell xs = error $ "toCell encountered " ++ show xs + let separator = " | " + let starter = "| " + let ender = " |" + let rawheaders = map toCell headers + let rawrows = map (map toCell) rows + let maximum' [] = 0 + maximum' xs = maximum xs + let colwidths = map (maximum' . map T.length) $ + transpose (rawheaders:rawrows) + let toHeaderLine len AlignDefault = T.replicate len "-" + toHeaderLine len AlignLeft = ":" <> + T.replicate (max (len - 1) 1) "-" + toHeaderLine len AlignRight = + T.replicate (max (len - 1) 1) "-" <> ":" + toHeaderLine len AlignCenter = ":" <> + T.replicate (max (len - 2) 1) (T.pack "-") <> ":" + let rawheaderlines = zipWith toHeaderLine colwidths aligns + let headerlines = starter <> T.intercalate separator rawheaderlines <> + ender + let padContent (align, w) t' = + let padding = w - T.length t' + halfpadding = padding `div` 2 + in case align of + AlignRight -> T.replicate padding " " <> t' + AlignCenter -> T.replicate halfpadding " " <> t' <> + T.replicate (padding - halfpadding) " " + _ -> t' <> T.replicate padding " " + let toRow xs = starter <> T.intercalate separator + (zipWith padContent (zip aligns colwidths) xs) <> + ender + let table' = toRow rawheaders <> "\n" <> headerlines <> "\n" <> + T.intercalate "\n" (map toRow rawrows) + return (node (CUSTOM_BLOCK table' mempty) [] : + if null capt + then ns + else capt' : ns) + else do -- fall back to raw HTML + s <- writeHtml5String def $! Pandoc nullMeta [t] + return (node (HTML_BLOCK s) [] : ns) +blockToNodes _ Null ns = return ns + +inlinesToNodes :: WriterOptions -> [Inline] -> [Node] +inlinesToNodes opts = foldr (inlineToNodes opts) [] + +inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node] +inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :) + where s' = if isEnabled Ext_smart opts + then unsmartify opts s + else s +inlineToNodes _ Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes _ LineBreak = (node LINEBREAK [] :) +inlineToNodes opts SoftBreak + | isEnabled Ext_hard_line_breaks opts = (node LINEBREAK [] :) + | writerWrapText opts == WrapNone = (node (TEXT " ") [] :) + | otherwise = (node SOFTBREAK [] :) +inlineToNodes opts (Emph xs) = (node EMPH (inlinesToNodes opts xs) :) +inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :) +inlineToNodes opts (Strikeout xs) = + if isEnabled Ext_strikeout opts + then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) + else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) +inlineToNodes opts (Superscript xs) = + ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) -inlineToNodes (Subscript xs) = - ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++ +inlineToNodes opts (Subscript xs) = + ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) -inlineToNodes (SmallCaps xs) = - ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) [] - : inlinesToNodes xs ++ +inlineToNodes opts (SmallCaps xs) = + ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] + : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) -inlineToNodes (Link _ ils (url,tit)) = - (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (Image _ ils (url,tit)) = - (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (RawInline fmt xs) - | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :) -inlineToNodes (Quoted qt ils) = - ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) +inlineToNodes opts (Link _ ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) +-- title beginning with fig: indicates implicit figure +inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) = + inlineToNodes opts (Image alt ils (url,tit)) +inlineToNodes opts (Image _ ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) +inlineToNodes opts (RawInline fmt xs) + | fmt == Format "html" && isEnabled Ext_raw_html opts + = (node (HTML_INLINE (T.pack xs)) [] :) + | (fmt == Format "latex" || fmt == Format "tex") && isEnabled Ext_raw_tex opts + = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + | otherwise = id +inlineToNodes opts (Quoted qt ils) = + ((node (TEXT start) [] : + inlinesToNodes opts ils ++ [node (TEXT end) []]) ++) where (start, end) = case qt of - SingleQuote -> (T.pack "‘", T.pack "’") - DoubleQuote -> (T.pack "“", T.pack "”") -inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) -inlineToNodes (Math mt str) = - case mt of - InlineMath -> - (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) - DisplayMath -> - (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) -inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Note _) = id -- should not occur + SingleQuote + | isEnabled Ext_smart opts -> ("'","'") + | otherwise -> ("‘", "’") + DoubleQuote + | isEnabled Ext_smart opts -> ("\"", "\"") + | otherwise -> ("“", "”") +inlineToNodes _ (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes opts (Math mt str) = + case writerHTMLMathMethod opts of + WebTeX url -> + let core = inlineToNodes opts + (Image nullAttr [Str str] (url ++ urlEncode str, str)) + sep = if mt == DisplayMath + then (node LINEBREAK [] :) + else id + in (sep . core . sep) + _ -> + case mt of + InlineMath -> + (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes opts (Span attr ils) = + let nodes = inlinesToNodes opts ils + op = tagWithAttributes opts True False "span" attr + in if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE op) [] : nodes ++ + [node (HTML_INLINE (T.pack "</span>")) []]) ++) + else (nodes ++) +inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) +inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index c663c75ce..f94c12d89 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,20 +30,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into ConTeXt. -} module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where +import Control.Monad.State.Strict +import Data.Char (ord, isDigit) +import Data.List (intercalate, intersperse) +import Data.Maybe (mapMaybe) +import Data.Text (Text) +import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 +import Text.Pandoc.Class (PandocMonad, report, toLang) import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Walk (query) -import Text.Printf ( printf ) -import Data.List ( intercalate, intersperse ) -import Data.Char ( ord ) -import Data.Maybe ( catMaybes ) -import Control.Monad.State import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Network.URI ( isURI, unEscapeString ) +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk (query) +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -50,37 +55,43 @@ data WriterState = , stOptions :: WriterOptions -- writer options } +data Tabl = Xtb | Ntb deriving (Show, Eq) + orderedListStyles :: [Char] orderedListStyles = cycle "narg" -- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String +writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeConTeXt options document = let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 , stOptions = options } - in evalState (pandocToConTeXt options document) defaultWriterState + in evalStateT (pandocToConTeXt options document) defaultWriterState -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String +type WM = StateT WriterState + +pandocToConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> WM m Text pandocToConTeXt options (Pandoc meta blocks) = do let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToConTeXt) - (fmap (render colwidth) . inlineListToConTeXt) + (fmap render' . blockListToConTeXt) + (fmap render' . inlineListToConTeXt) meta body <- mapM (elementToConTeXt options) $ hierarchicalize blocks - let main = (render colwidth . vcat) body - let layoutFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> + let main = (render' . vcat) body + let layoutFromMargins = intercalate [','] $ mapMaybe (\(x,y) -> ((x ++ "=") ++) <$> getField y metadata) [("leftmargin","margin-left") ,("rightmargin","margin-right") ,("top","margin-top") ,("bottom","margin-bottom") ] + mblang <- fromBCP47 (getLang options meta) let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -93,14 +104,17 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) - $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + $ maybe id (defField "context-lang") mblang + $ (case getField "papersize" metadata of + Just (('a':d:ds) :: String) + | all isDigit (d:ds) -> resetField "papersize" + (('A':d:ds) :: String) + _ -> id) metadata + let context' = defField "context-dir" (toContextDir + $ getField "dir" context) context + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' toContextDir :: Maybe String -> String toContextDir (Just "rtl") = "r2l" @@ -110,24 +124,24 @@ toContextDir _ = "" -- | escape things as needed for ConTeXt escapeCharForConTeXt :: WriterOptions -> Char -> String escapeCharForConTeXt opts ch = - let ligatures = writerTeXLigatures opts in + let ligatures = isEnabled Ext_smart opts in case ch of - '{' -> "\\{" - '}' -> "\\}" - '\\' -> "\\letterbackslash{}" - '$' -> "\\$" - '|' -> "\\letterbar{}" - '%' -> "\\letterpercent{}" - '~' -> "\\lettertilde{}" - '#' -> "\\#" - '[' -> "{[}" - ']' -> "{]}" - '\160' -> "~" + '{' -> "\\{" + '}' -> "\\}" + '\\' -> "\\letterbackslash{}" + '$' -> "\\$" + '|' -> "\\letterbar{}" + '%' -> "\\letterpercent{}" + '~' -> "\\lettertilde{}" + '#' -> "\\#" + '[' -> "{[}" + ']' -> "{]}" + '\160' -> "~" '\x2014' | ligatures -> "---" '\x2013' | ligatures -> "--" '\x2019' | ligatures -> "'" '\x2026' -> "\\ldots{}" - x -> [x] + x -> [x] -- | Escape string for ConTeXt stringToConTeXt :: WriterOptions -> String -> String @@ -137,20 +151,20 @@ stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) toLabel :: String -> String toLabel z = concatMap go z where go x - | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) + | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) | otherwise = [x] -- | Convert Elements to ConTeXt -elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc +elementToConTeXt :: PandocMonad m => WriterOptions -> Element -> WM m Doc elementToConTeXt _ (Blk block) = blockToConTeXt block elementToConTeXt opts (Sec level _ attr title' elements) = do header' <- sectionHeader attr level title' + footer' <- sectionFooter attr level innerContents <- mapM (elementToConTeXt opts) elements - return $ vcat (header' : innerContents) + return $ header' $$ vcat innerContents $$ footer' -- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block - -> State WriterState Doc +blockToConTeXt :: PandocMonad m => Block -> WM m Doc blockToConTeXt Null = return empty blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure @@ -175,9 +189,12 @@ blockToConTeXt (CodeBlock _ str) = return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline -blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt b@(RawBlock _ _ ) = do + report $ BlockNotRendered b + return empty blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + mblang <- fromBCP47 (lookup "lang" kvs) let wrapRef txt = if null ident then txt else ("\\reference" <> brackets (text $ toLabel ident) <> @@ -186,12 +203,12 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do Just "rtl" -> align "righttoleft" Just "ltr" -> align "lefttoright" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + <> text lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline - fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs + (wrapBlank . wrapLang . wrapDir . wrapRef) <$> blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -201,9 +218,9 @@ blockToConTeXt (BulletList lst) = do blockToConTeXt (OrderedList (start, style', delim) lst) = do st <- get let level = stOrderedListLevel st - put $ st {stOrderedListLevel = level + 1} + put st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} + put st {stOrderedListLevel = level} let start' = if start == 1 then "" else "start=" ++ show start let delim' = case delim of DefaultDelim -> "" @@ -238,39 +255,83 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline -- If this is ever executed, provide a default for the reference identifier. blockToConTeXt (Header level attr lst) = sectionHeader attr level lst blockToConTeXt (Table caption aligns widths heads rows) = do - let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' - AlignRight -> 'r' - AlignCenter -> 'c' - AlignDefault -> 'l'): - if colWidth == 0 - then "|" - else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") - let colDescriptors = "|" ++ (concat $ - zipWith colDescriptor widths aligns) - headers <- if all null heads - then return empty - else liftM ($$ "\\HL") $ tableRowToConTeXt heads + opts <- gets stOptions + let tabl = if isEnabled Ext_ntb opts + then Ntb + else Xtb captionText <- inlineListToConTeXt caption - rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> (if null caption - then brackets "none" - else empty) - <> braces captionText $$ - "\\starttable" <> brackets (text colDescriptors) $$ - "\\HL" $$ headers $$ - vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline - -tableRowToConTeXt :: [[Block]] -> State WriterState Doc -tableRowToConTeXt cols = do - cols' <- mapM blockListToConTeXt cols - return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR" - -listItemToConTeXt :: [Block] -> State WriterState Doc + headers <- if all null heads + then return empty + else tableRowToConTeXt tabl aligns widths heads + rows' <- mapM (tableRowToConTeXt tabl aligns widths) rows + body <- tableToConTeXt tabl headers rows' + return $ "\\startplacetable" <> brackets ( + if null caption + then "location=none" + else "title=" <> braces captionText + ) $$ body $$ "\\stopplacetable" <> blankline + +tableToConTeXt :: PandocMonad m => Tabl -> Doc -> [Doc] -> WM m Doc +tableToConTeXt Xtb heads rows = + return $ "\\startxtable" $$ + (if isEmpty heads + then empty + else "\\startxtablehead[head]" $$ heads $$ "\\stopxtablehead") $$ + (if null rows + then empty + else "\\startxtablebody[body]" $$ vcat (init rows) $$ "\\stopxtablebody" $$ + "\\startxtablefoot[foot]" $$ last rows $$ "\\stopxtablefoot") $$ + "\\stopxtable" +tableToConTeXt Ntb heads rows = + return $ "\\startTABLE" $$ + (if isEmpty heads + then empty + else "\\startTABLEhead" $$ heads $$ "\\stopTABLEhead") $$ + (if null rows + then empty + else "\\startTABLEbody" $$ vcat (init rows) $$ "\\stopTABLEbody" $$ + "\\startTABLEfoot" $$ last rows $$ "\\stopTABLEfoot") $$ + "\\stopTABLE" + +tableRowToConTeXt :: PandocMonad m => Tabl -> [Alignment] -> [Double] -> [[Block]] -> WM m Doc +tableRowToConTeXt Xtb aligns widths cols = do + cells <- mapM (tableColToConTeXt Xtb) $ zip3 aligns widths cols + return $ "\\startxrow" $$ vcat cells $$ "\\stopxrow" +tableRowToConTeXt Ntb aligns widths cols = do + cells <- mapM (tableColToConTeXt Ntb) $ zip3 aligns widths cols + return $ vcat cells $$ "\\NC\\NR" + +tableColToConTeXt :: PandocMonad m => Tabl -> (Alignment, Double, [Block]) -> WM m Doc +tableColToConTeXt tabl (align, width, blocks) = do + cellContents <- blockListToConTeXt blocks + let colwidth = if width == 0 + then empty + else "width=" <> braces (text (printf "%.2f\\textwidth" width)) + let halign = alignToConTeXt align + let options = (if keys == empty + then empty + else brackets keys) <> space + where keys = hcat $ intersperse "," $ filter (empty /=) [halign, colwidth] + tableCellToConTeXt tabl options cellContents + +tableCellToConTeXt :: PandocMonad m => Tabl -> Doc -> Doc -> WM m Doc +tableCellToConTeXt Xtb options cellContents = + return $ "\\startxcell" <> options <> cellContents <> " \\stopxcell" +tableCellToConTeXt Ntb options cellContents = + return $ "\\NC" <> options <> cellContents + +alignToConTeXt :: Alignment -> Doc +alignToConTeXt align = case align of + AlignLeft -> "align=right" + AlignRight -> "align=left" + AlignCenter -> "align=middle" + AlignDefault -> empty + +listItemToConTeXt :: PandocMonad m => [Block] -> WM m Doc listItemToConTeXt list = blockListToConTeXt list >>= - return . ("\\item" $$) . (nest 2) + return . ("\\item" $$) . nest 2 -defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc +defListItemToConTeXt :: PandocMonad m => ([Inline], [[Block]]) -> WM m Doc defListItemToConTeXt (term, defs) = do term' <- inlineListToConTeXt term def' <- liftM vsep $ mapM blockListToConTeXt defs @@ -278,12 +339,13 @@ defListItemToConTeXt (term, defs) = do "\\stopdescription" <> blankline -- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState Doc +blockListToConTeXt :: PandocMonad m => [Block] -> WM m Doc blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToConTeXt :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> WM m Doc inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst -- We add a \strut after a line break that precedes a space, -- or the space gets swallowed @@ -292,13 +354,14 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst addStruts xs addStruts (x:xs) = x : addStruts xs addStruts [] = [] - isSpacey Space = True + isSpacey Space = True isSpacey (Str ('\160':_)) = True - isSpacey _ = False + isSpacey _ = False -- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToConTeXt :: PandocMonad m + => Inline -- ^ Inline to convert + -> WM m Doc inlineToConTeXt (Emph lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\em " <> contents @@ -338,8 +401,10 @@ inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" <> space inlineToConTeXt (RawInline "context" str) = return $ text str inlineToConTeXt (RawInline "tex" str) = return $ text str -inlineToConTeXt (RawInline _ _) = return empty -inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr +inlineToConTeXt il@(RawInline _ _) = do + report $ InlineNotRendered il + return empty +inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr inlineToConTeXt SoftBreak = do wrapText <- gets (writerWrapText . stOptions) return $ case wrapText of @@ -348,7 +413,7 @@ inlineToConTeXt SoftBreak = do WrapPreserve -> cr inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections -inlineToConTeXt (Link _ txt (('#' : ref), _)) = do +inlineToConTeXt (Link _ txt ('#' : ref, _)) = do opts <- gets stOptions contents <- inlineListToConTeXt txt let ref' = toLabel $ stringToConTeXt opts ref @@ -374,7 +439,7 @@ inlineToConTeXt (Link _ txt (src, _)) = do inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do opts <- gets stOptions let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of + in case dimension dir attr of Just (Pixel a) -> [d <> text (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> @@ -397,84 +462,104 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] - codeBlock _ = [] + codeBlock _ = [] let codeBlocks = query codeBlock contents return $ if null codeBlocks then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do + mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + wrapLang txt = case mblang of + Just lng -> "\\start\\language[" <> text lng <> "]" <> txt <> "\\stop " Nothing -> txt - fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils + (wrapLang . wrapDir) <$> inlineListToConTeXt ils -- | Craft the section header, inserting the section reference, if supplied. -sectionHeader :: Attr +sectionHeader :: PandocMonad m + => Attr -> Int -> [Inline] - -> State WriterState Doc -sectionHeader (ident,classes,_) hdrLevel lst = do + -> WM m Doc +sectionHeader (ident,classes,kvs) hdrLevel lst = do + opts <- gets stOptions contents <- inlineListToConTeXt lst - st <- get - let opts = stOptions st + levelText <- sectionLevelToText opts (ident,classes,kvs) hdrLevel + let ident' = if null ident + then empty + else "reference=" <> braces (text (toLabel ident)) + let contents' = if contents == empty + then empty + else "title=" <> braces contents + let options = if keys == empty || levelText == empty + then empty + else brackets keys + where keys = hcat $ intersperse "," $ filter (empty /=) [contents', ident'] + let starter = if writerSectionDivs opts + then "\\start" + else "\\" + return $ starter <> levelText <> options <> blankline + +-- | Craft the section footer +sectionFooter :: PandocMonad m => Attr -> Int -> WM m Doc +sectionFooter attr hdrLevel = do + opts <- gets stOptions + levelText <- sectionLevelToText opts attr hdrLevel + return $ if writerSectionDivs opts + then "\\stop" <> levelText <> blankline + else empty + +-- | Generate a textual representation of the section level +sectionLevelToText :: PandocMonad m => WriterOptions -> Attr -> Int -> WM m Doc +sectionLevelToText opts (_,classes,_) hdrLevel = do let level' = case writerTopLevelDivision opts of TopLevelPart -> hdrLevel - 2 TopLevelChapter -> hdrLevel - 1 TopLevelSection -> hdrLevel TopLevelDefault -> hdrLevel - let ident' = toLabel ident let (section, chapter) = if "unnumbered" `elem` classes then (text "subject", text "title") else (text "section", text "chapter") return $ case level' of - -1 -> text "\\part" <> braces contents - 0 -> char '\\' <> chapter <> braces contents - n | n >= 1 && n <= 5 -> char '\\' - <> text (concat (replicate (n - 1) "sub")) - <> section - <> (if (not . null) ident' - then brackets (text ident') - else empty) - <> braces contents - <> blankline - _ -> contents <> blankline - -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') + -1 -> text "part" + 0 -> chapter + n | n >= 1 -> text (concat (replicate (n - 1) "sub")) + <> section + _ -> empty -- cannot happen + +fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l +fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" +fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" +fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" +fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" +fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" +fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" +fromBCP47' (Just (Lang l _ _ _) ) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index cf641dcd6..3daa8d0cf 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,11 +1,6 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE FlexibleInstances, OverloadedStrings, - ScopedTypeVariables, DeriveDataTypeable, CPP #-} -#if MIN_VERSION_base(4,8,0) -#else -{-# LANGUAGE OverlappingInstances #-} -#endif -{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE FlexibleInstances #-} +{- Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,20 +30,27 @@ Conversion of 'Pandoc' documents to custom markup using a lua writer. -} module Text.Pandoc.Writers.Custom ( writeCustom ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Data.List ( intersperse ) -import Data.Char ( toLower ) -import Data.Typeable -import Scripting.Lua (LuaState, StackValue, callfunc) -import Text.Pandoc.Writers.Shared -import qualified Scripting.Lua as Lua -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad (when) +import Control.Arrow ((***)) import Control.Exception +import Control.Monad (when) +import Control.Monad.Trans (MonadIO (liftIO)) +import Data.Char (toLower) +import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text, pack) +import Data.Typeable +import Foreign.Lua (Lua, ToLuaStack (..), callFunc) +import Foreign.Lua.Api +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath) +import Text.Pandoc.Lua.StackInstances () +import Text.Pandoc.Lua.Util (addValue, dostring') +import Text.Pandoc.Options import Text.Pandoc.Templates -import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Writers.Shared attrToMap :: Attr -> M.Map String String attrToMap (id',classes,keyvals) = M.fromList @@ -56,119 +58,43 @@ attrToMap (id',classes,keyvals) = M.fromList : ("class", unwords classes) : keyvals -#if MIN_VERSION_hslua(0,4,0) -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Char] where -#else -instance StackValue [Char] where -#endif - push lua cs = Lua.push lua (UTF8.fromString cs) - peek lua i = do - res <- Lua.peek lua i - return $ UTF8.toString `fmap` res - valuetype _ = Lua.TSTRING -#else -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue a => StackValue [a] where -#else -instance StackValue a => StackValue [a] where -#endif - push lua xs = do - Lua.createtable lua (length xs + 1) 0 - let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i - mapM_ addValue $ zip [1..] xs - peek lua i = do - top <- Lua.gettop lua - let i' = if i < 0 then top + i + 1 else i - Lua.pushnil lua - lst <- getList lua i' - Lua.pop lua 1 - return (Just lst) - valuetype _ = Lua.TTABLE - -getList :: StackValue a => LuaState -> Int -> IO [a] -getList lua i' = do - continue <- Lua.next lua i' - if continue - then do - next <- Lua.peek lua (-1) - Lua.pop lua 1 - x <- maybe (fail "peek returned Nothing") return next - rest <- getList lua i' - return (x : rest) - else return [] -#endif - -instance StackValue Format where - push lua (Format f) = Lua.push lua (map toLower f) - peek l n = fmap Format `fmap` Lua.peek l n - valuetype _ = Lua.TSTRING - -instance (StackValue a, StackValue b) => StackValue (M.Map a b) where - push lua m = do - let xs = M.toList m - Lua.createtable lua (length xs + 1) 0 - let addValue (k, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - mapM_ addValue xs - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -instance (StackValue a, StackValue b) => StackValue (a,b) where - push lua (k,v) = do - Lua.createtable lua 2 0 - Lua.push lua k - Lua.push lua v - Lua.rawset lua (-3) - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Inline] where -#else -instance StackValue [Inline] where -#endif - push l ils = Lua.push l =<< inlineListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Block] where -#else -instance StackValue [Block] where -#endif - push l ils = Lua.push l =<< blockListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -instance StackValue MetaValue where - push l (MetaMap m) = Lua.push l m - push l (MetaList xs) = Lua.push l xs - push l (MetaBool x) = Lua.push l x - push l (MetaString s) = Lua.push l s - push l (MetaInlines ils) = Lua.push l ils - push l (MetaBlocks bs) = Lua.push l bs - peek _ _ = undefined - valuetype (MetaMap _) = Lua.TTABLE - valuetype (MetaList _) = Lua.TTABLE - valuetype (MetaBool _) = Lua.TBOOLEAN - valuetype (MetaString _) = Lua.TSTRING - valuetype (MetaInlines _) = Lua.TSTRING - valuetype (MetaBlocks _) = Lua.TSTRING - -instance StackValue Citation where - push lua cit = do - Lua.createtable lua 6 0 - let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - addValue ("citationId", citationId cit) - addValue ("citationPrefix", citationPrefix cit) - addValue ("citationSuffix", citationSuffix cit) - addValue ("citationMode", show (citationMode cit)) - addValue ("citationNoteNum", citationNoteNum cit) - addValue ("citationHash", citationHash cit) - peek = undefined - valuetype _ = Lua.TTABLE +newtype Stringify a = Stringify a + +instance ToLuaStack (Stringify Format) where + push (Stringify (Format f)) = push (map toLower f) + +instance ToLuaStack (Stringify [Inline]) where + push (Stringify ils) = push =<< inlineListToCustom ils + +instance ToLuaStack (Stringify [Block]) where + push (Stringify blks) = push =<< blockListToCustom blks + +instance ToLuaStack (Stringify MetaValue) where + push (Stringify (MetaMap m)) = push (fmap Stringify m) + push (Stringify (MetaList xs)) = push (map Stringify xs) + push (Stringify (MetaBool x)) = push x + push (Stringify (MetaString s)) = push s + push (Stringify (MetaInlines ils)) = push (Stringify ils) + push (Stringify (MetaBlocks bs)) = push (Stringify bs) + +instance ToLuaStack (Stringify Citation) where + push (Stringify cit) = do + createtable 6 0 + addValue "citationId" $ citationId cit + addValue "citationPrefix" . Stringify $ citationPrefix cit + addValue "citationSuffix" . Stringify $ citationSuffix cit + addValue "citationMode" $ show (citationMode cit) + addValue "citationNoteNum" $ citationNoteNum cit + addValue "citationHash" $ citationHash cit + +-- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the +-- associated value. +newtype KeyValue a b = KeyValue (a, b) + +instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where + push (KeyValue (k, v)) = do + newtable + addValue k v data PandocLuaException = PandocLuaException String deriving (Show, Typeable) @@ -176,147 +102,147 @@ data PandocLuaException = PandocLuaException String instance Exception PandocLuaException -- | Convert Pandoc to custom markup. -writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String +writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text writeCustom luaFile opts doc@(Pandoc meta _) = do - luaScript <- UTF8.readFile luaFile - enc <- getForeignEncoding - setForeignEncoding utf8 - lua <- Lua.newstate - Lua.openlibs lua - status <- Lua.loadstring lua luaScript luaFile - -- check for error in lua script (later we'll change the return type - -- to handle this more gracefully): - when (status /= 0) $ -#if MIN_VERSION_hslua(0,4,0) - Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString -#else - Lua.tostring lua 1 >>= throw . PandocLuaException -#endif - Lua.call lua 0 0 - -- TODO - call hierarchicalize, so we have that info - rendered <- docToCustom lua opts doc - context <- metaToJSON opts - (blockListToCustom lua) - (inlineListToCustom lua) - meta - Lua.close lua - setForeignEncoding enc - let body = rendered + luaScript <- liftIO $ UTF8.readFile luaFile + res <- runPandocLua $ do + registerScriptPath luaFile + stat <- dostring' luaScript + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (stat /= OK) $ + tostring 1 >>= throw . PandocLuaException . UTF8.toString + -- TODO - call hierarchicalize, so we have that info + rendered <- docToCustom opts doc + context <- metaToJSON opts + blockListToCustom + inlineListToCustom + meta + return (rendered, context) + let (body, context) = case res of + Left e -> throw (PandocLuaException (show e)) + Right x -> x case writerTemplate opts of - Nothing -> return body - Just tpl -> return $ renderTemplate' tpl $ setField "body" body context + Nothing -> return $ pack body + Just tpl -> + case applyTemplate (pack tpl) $ setField "body" body context of + Left e -> throw (PandocTemplateError e) + Right r -> return (pack r) -docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String -docToCustom lua opts (Pandoc (Meta metamap) blocks) = do - body <- blockListToCustom lua blocks - callfunc lua "Doc" body metamap (writerVariables opts) +docToCustom :: WriterOptions -> Pandoc -> Lua String +docToCustom opts (Pandoc (Meta metamap) blocks) = do + body <- blockListToCustom blocks + callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts) -- | Convert Pandoc block element to Custom. -blockToCustom :: LuaState -- ^ Lua state - -> Block -- ^ Block element - -> IO String +blockToCustom :: Block -- ^ Block element + -> Lua String -blockToCustom _ Null = return "" +blockToCustom Null = return "" -blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines +blockToCustom (Plain inlines) = callFunc "Plain" (Stringify inlines) -blockToCustom lua (Para [Image attr txt (src,tit)]) = - callfunc lua "CaptionedImage" src tit txt (attrToMap attr) +blockToCustom (Para [Image attr txt (src,tit)]) = + callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr) -blockToCustom lua (Para inlines) = callfunc lua "Para" inlines +blockToCustom (Para inlines) = callFunc "Para" (Stringify inlines) -blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList +blockToCustom (LineBlock linesList) = callFunc "LineBlock" (map Stringify linesList) -blockToCustom lua (RawBlock format str) = - callfunc lua "RawBlock" format str +blockToCustom (RawBlock format str) = + callFunc "RawBlock" (Stringify format) str -blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule" +blockToCustom HorizontalRule = callFunc "HorizontalRule" -blockToCustom lua (Header level attr inlines) = - callfunc lua "Header" level inlines (attrToMap attr) +blockToCustom (Header level attr inlines) = + callFunc "Header" level (Stringify inlines) (attrToMap attr) -blockToCustom lua (CodeBlock attr str) = - callfunc lua "CodeBlock" str (attrToMap attr) +blockToCustom (CodeBlock attr str) = + callFunc "CodeBlock" str (attrToMap attr) -blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks +blockToCustom (BlockQuote blocks) = callFunc "BlockQuote" (Stringify blocks) -blockToCustom lua (Table capt aligns widths headers rows') = - callfunc lua "Table" capt (map show aligns) widths headers rows' +blockToCustom (Table capt aligns widths headers rows) = + let aligns' = map show aligns + capt' = Stringify capt + headers' = map Stringify headers + rows' = map (map Stringify) rows + in callFunc "Table" capt' aligns' widths headers' rows' -blockToCustom lua (BulletList items) = callfunc lua "BulletList" items +blockToCustom (BulletList items) = callFunc "BulletList" (map Stringify items) -blockToCustom lua (OrderedList (num,sty,delim) items) = - callfunc lua "OrderedList" items num (show sty) (show delim) +blockToCustom (OrderedList (num,sty,delim) items) = + callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) -blockToCustom lua (DefinitionList items) = - callfunc lua "DefinitionList" items +blockToCustom (DefinitionList items) = + callFunc "DefinitionList" + (map (KeyValue . (Stringify *** map Stringify)) items) -blockToCustom lua (Div attr items) = - callfunc lua "Div" items (attrToMap attr) +blockToCustom (Div attr items) = + callFunc "Div" (Stringify items) (attrToMap attr) -- | Convert list of Pandoc block elements to Custom. -blockListToCustom :: LuaState -- ^ Options - -> [Block] -- ^ List of block elements - -> IO String -blockListToCustom lua xs = do - blocksep <- callfunc lua "Blocksep" - bs <- mapM (blockToCustom lua) xs +blockListToCustom :: [Block] -- ^ List of block elements + -> Lua String +blockListToCustom xs = do + blocksep <- callFunc "Blocksep" + bs <- mapM blockToCustom xs return $ mconcat $ intersperse blocksep bs -- | Convert list of Pandoc inline elements to Custom. -inlineListToCustom :: LuaState -> [Inline] -> IO String -inlineListToCustom lua lst = do - xs <- mapM (inlineToCustom lua) lst - return $ concat xs +inlineListToCustom :: [Inline] -> Lua String +inlineListToCustom lst = do + xs <- mapM inlineToCustom lst + return $ mconcat xs -- | Convert Pandoc inline element to Custom. -inlineToCustom :: LuaState -> Inline -> IO String +inlineToCustom :: Inline -> Lua String -inlineToCustom lua (Str str) = callfunc lua "Str" str +inlineToCustom (Str str) = callFunc "Str" str -inlineToCustom lua Space = callfunc lua "Space" +inlineToCustom Space = callFunc "Space" -inlineToCustom lua SoftBreak = callfunc lua "SoftBreak" +inlineToCustom SoftBreak = callFunc "SoftBreak" -inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst +inlineToCustom (Emph lst) = callFunc "Emph" (Stringify lst) -inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst +inlineToCustom (Strong lst) = callFunc "Strong" (Stringify lst) -inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst +inlineToCustom (Strikeout lst) = callFunc "Strikeout" (Stringify lst) -inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst +inlineToCustom (Superscript lst) = callFunc "Superscript" (Stringify lst) -inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst +inlineToCustom (Subscript lst) = callFunc "Subscript" (Stringify lst) -inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst +inlineToCustom (SmallCaps lst) = callFunc "SmallCaps" (Stringify lst) -inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst +inlineToCustom (Quoted SingleQuote lst) = callFunc "SingleQuoted" (Stringify lst) -inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst +inlineToCustom (Quoted DoubleQuote lst) = callFunc "DoubleQuoted" (Stringify lst) -inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs +inlineToCustom (Cite cs lst) = callFunc "Cite" (Stringify lst) (map Stringify cs) -inlineToCustom lua (Code attr str) = - callfunc lua "Code" str (attrToMap attr) +inlineToCustom (Code attr str) = + callFunc "Code" str (attrToMap attr) -inlineToCustom lua (Math DisplayMath str) = - callfunc lua "DisplayMath" str +inlineToCustom (Math DisplayMath str) = + callFunc "DisplayMath" str -inlineToCustom lua (Math InlineMath str) = - callfunc lua "InlineMath" str +inlineToCustom (Math InlineMath str) = + callFunc "InlineMath" str -inlineToCustom lua (RawInline format str) = - callfunc lua "RawInline" format str +inlineToCustom (RawInline format str) = + callFunc "RawInline" (Stringify format) str -inlineToCustom lua (LineBreak) = callfunc lua "LineBreak" +inlineToCustom LineBreak = callFunc "LineBreak" -inlineToCustom lua (Link attr txt (src,tit)) = - callfunc lua "Link" txt src tit (attrToMap attr) +inlineToCustom (Link attr txt (src,tit)) = + callFunc "Link" (Stringify txt) src tit (attrToMap attr) -inlineToCustom lua (Image attr alt (src,tit)) = - callfunc lua "Image" alt src tit (attrToMap attr) +inlineToCustom (Image attr alt (src,tit)) = + callFunc "Image" (Stringify alt) src tit (attrToMap attr) -inlineToCustom lua (Note contents) = callfunc lua "Note" contents +inlineToCustom (Note contents) = callFunc "Note" (Stringify contents) -inlineToCustom lua (Span attr items) = - callfunc lua "Span" items (attrToMap attr) +inlineToCustom (Span attr items) = + callFunc "Span" (Stringify items) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 44f96d700..3034fade5 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,34 +29,43 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where +module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where +import Control.Monad.Reader +import Data.Char (toLower) +import Data.Generics (everywhere, mkT) +import Data.List (isPrefixOf, isSuffixOf, stripPrefix) +import Data.Monoid (Any (..)) +import Data.Text (Text) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.XML +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath -import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) -import Data.Char ( toLower ) -import Data.Monoid ( Any(..) ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.XML import Text.TeXMath import qualified Text.XML.Light as Xml -import Data.Generics (everywhere, mkT) + +data DocBookVersion = DocBook4 | DocBook5 + deriving (Eq, Show) + +type DB = ReaderT DocBookVersion -- | Convert list of authors to a docbook <author> section -authorToDocbook :: WriterOptions -> [Inline] -> B.Inlines -authorToDocbook opts name' = - let name = render Nothing $ inlinesToDocbook opts name' - colwidth = if writerWrapText opts == WrapAuto +authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines +authorToDocbook opts name' = do + name <- render Nothing <$> inlinesToDocbook opts name' + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - in B.rawInline "docbook" $ render colwidth $ + return $ B.rawInline "docbook" $ render colwidth $ if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name @@ -66,52 +76,63 @@ authorToDocbook opts name' = let namewords = words name lengthname = length namewords (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) + 0 -> ("","") + 1 -> ("", name) + n -> (unwords (take (n-1) namewords), last namewords) in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ inTagsSimple "surname" (text $ escapeStringForXML lastname) +writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocbook4 opts d = + runReaderT (writeDocbook opts d) DocBook4 + +writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocbook5 opts d = + runReaderT (writeDocbook opts d) DocBook5 + -- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc meta blocks) = +writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text +writeDocbook opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks - colwidth = if writerWrapText opts == WrapAuto + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - render' = render colwidth - opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) + let render' :: Doc -> Text + render' = render colwidth + let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr) (writerTemplate opts) && - TopLevelDefault == writerTopLevelDivision opts) + TopLevelDefault == writerTopLevelDivision opts then opts{ writerTopLevelDivision = TopLevelChapter } else opts - -- The numbering here follows LaTeX's internal numbering - startLvl = case writerTopLevelDivision opts' of + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' = map (authorToDocbook opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToDocbook opts' startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToDocbook opts') + auths' <- mapM (authorToDocbook opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToDocbook opts' startLvl) . + hierarchicalize) + (fmap render' . inlinesToDocbook opts') meta' - main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) - context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML _ -> True - _ -> False) - $ metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + main <- (render' . vcat) <$> mapM (elementToDocbook opts' startLvl) elements + let context = defField "body" main + $ + defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Int -> Element -> Doc +elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc elementToDocbook opts _ (Blk block) = blockToDocbook opts block -elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = +elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do + version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] @@ -119,24 +140,25 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = tag = case lvl of -1 -> "part" 0 -> "chapter" - n | n >= 1 && n <= 5 -> if writerDocbook5 opts + n | n >= 1 && n <= 5 -> if version == DocBook5 then "section" else "sect" ++ show n _ -> "simplesect" - idName = if writerDocbook5 opts + idName = if version == DocBook5 then "xml:id" else "id" idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] - nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] + nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] attribs = nsAttr ++ idAttr - in inTags True tag attribs $ - inTagsSimple "title" (inlinesToDocbook opts title) $$ - vcat (map (elementToDocbook opts (lvl + 1)) elements') + contents <- mapM (elementToDocbook opts (lvl + 1)) elements' + title' <- inlinesToDocbook opts title + return $ inTags True tag attribs $ + inTagsSimple "title" title' $$ vcat contents -- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) +blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc +blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -145,74 +167,82 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of -- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToDocbook :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc deflistItemsToDocbook opts items = - vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items + vcat <$> mapM (uncurry (deflistItemToDocbook opts)) items -- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToDocbook opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') +deflistItemToDocbook :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc +deflistItemToDocbook opts term defs = do + term' <- inlinesToDocbook opts term + def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "varlistentry" $ + inTagsIndented "term" term' $$ + inTagsIndented "listitem" def' -- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items +listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc +listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc listItemToDocbook opts item = - inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item + inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) imageToDocbook :: WriterOptions -> Attr -> String -> Doc imageToDocbook _ attr src = selfClosingTag "imagedata" $ ("fileref", src) : idAndRole attr ++ dims where dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of + go dir dstr = case dimension dir attr of Just a -> [(dstr, show a)] Nothing -> [] -- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook _ Null = empty +blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc +blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: blockToDocbook opts (Div (ident,_,_) [Para lst]) = let attribs = [("id", ident) | not (null ident)] in if hasLineBreaks lst - then flush $ nowrap $ inTags False "literallayout" attribs - $ inlinesToDocbook opts lst - else inTags True "para" attribs $ inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ - blocksToDocbook opts (map plainToPara bs) -blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize + then (flush . nowrap . inTags False "literallayout" attribs) + <$> inlinesToDocbook opts lst + else inTags True "para" attribs <$> inlinesToDocbook opts lst +blockToDocbook opts (Div (ident,_,_) bs) = do + contents <- blocksToDocbook opts (map plainToPara bs) + return $ + (if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) $$ contents +blockToDocbook _ h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = - let alt = inlinesToDocbook opts txt - capt = if null txt +blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do + alt <- inlinesToDocbook opts txt + let capt = if null txt then empty else inTagsSimple "title" alt - in inTagsIndented "figure" $ + return $ inTagsIndented "figure" $ capt $$ - (inTagsIndented "mediaobject" $ - (inTagsIndented "imageobject" - (imageToDocbook opts attr src)) $$ + inTagsIndented "mediaobject" ( + inTagsIndented "imageobject" + (imageToDocbook opts attr src) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToDocbook opts (Para lst) - | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst - | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst + | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout") + <$> inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst blockToDocbook opts (LineBlock lns) = blockToDocbook opts $ linesToPara lns blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = + inTagsIndented "blockquote" <$> blocksToDocbook opts blocks +blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ text ("<programlisting" ++ lang ++ ">") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") where lang = if null langs @@ -224,11 +254,11 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = +blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] - in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = + inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = return empty +blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] @@ -239,40 +269,46 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerRoman -> [("numeration", "lowerroman")] spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest - in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = + items <- if start == 1 + then listItemsToDocbook opts (first:rest) + else do + first' <- blocksToDocbook opts (map plainToPara first) + rest' <- listItemsToDocbook opts rest + return $ + inTags True "listitem" [("override",show start)] first' $$ + rest' + return $ inTags True "orderedlist" attribs items +blockToDocbook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] - in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook opts (RawBlock f str) - | f == "docbook" = text str -- raw XML block - | f == "html" = if writerDocbook5 opts - then empty -- No html in Docbook5 - else text str -- allow html for backwards compatibility - | otherwise = empty -blockToDocbook _ HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = - let captionDoc = if null caption - then empty - else inTagsIndented "title" - (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" + inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst +blockToDocbook _ b@(RawBlock f str) + | f == "docbook" = return $ text str -- raw XML block + | f == "html" = do + version <- ask + if version == DocBook5 + then return empty -- No html in Docbook5 + else return $ text str -- allow html for backwards compatibility + | otherwise = do + report $ BlockNotRendered b + return empty +blockToDocbook _ HorizontalRule = return empty -- not semantic +blockToDocbook opts (Table caption aligns widths headers rows) = do + captionDoc <- if null caption + then return empty + else inTagsIndented "title" <$> + inlinesToDocbook opts caption + let tableType = if isEmpty captionDoc then "informaltable" else "table" percent w = show (truncate (100*w) :: Integer) ++ "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" ([("colwidth", percent w) | w > 0] ++ [("align", alignmentToString al)])) widths aligns - head' = if all null headers - then empty - else inTagsIndented "thead" $ - tableRowToDocbook opts headers - body' = inTagsIndented "tbody" $ - vcat $ map (tableRowToDocbook opts) rows - in inTagsIndented tableType $ captionDoc $$ - (inTags True "tgroup" [("cols", show (length headers))] $ + head' <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToDocbook opts headers + body' <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToDocbook opts) rows + return $ inTagsIndented tableType $ captionDoc $$ + inTags True "tgroup" [("cols", show (length headers))] ( coltags $$ head' $$ body') hasLineBreaks :: [Inline] -> Bool @@ -280,101 +316,111 @@ hasLineBreaks = getAny . query isLineBreak . walk removeNote where removeNote :: Inline -> Inline removeNote (Note _) = Str "" - removeNote x = x + removeNote x = x isLineBreak :: Inline -> Any isLineBreak LineBreak = Any True - isLineBreak _ = Any False + isLineBreak _ = Any False alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook :: WriterOptions +tableRowToDocbook :: PandocMonad m + => WriterOptions -> [[Block]] - -> Doc + -> DB m Doc tableRowToDocbook opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols + (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols -tableItemToDocbook :: WriterOptions +tableItemToDocbook :: PandocMonad m + => WriterOptions -> [Block] - -> Doc + -> DB m Doc tableItemToDocbook opts item = - inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item + (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item -- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst +inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc +inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc +inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" $ inlinesToDocbook opts lst + inTagsSimple "emphasis" <$> inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst + inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] $ + inTags False "emphasis" [("role", "strikethrough")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" $ inlinesToDocbook opts lst + inTagsSimple "superscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" $ inlinesToDocbook opts lst + inTagsSimple "subscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (SmallCaps lst) = - inTags False "emphasis" [("role", "smallcaps")] $ + inTags False "emphasis" [("role", "smallcaps")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToDocbook opts lst + inTagsSimple "quote" <$> inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook opts (Span (ident,_,_) ils) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) <> + ((if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) <>) <$> inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = - inTagsSimple "literal" $ text (escapeStringForXML str) + return $ inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) - | isMathML (writerHTMLMathMethod opts) = - case writeMathML dt <$> readTeX str of - Right r -> inTagsSimple tagtype - $ text $ Xml.ppcElement conf - $ fixNS - $ removeAttr r - Left _ -> inlinesToDocbook opts - $ texMathToInlines t str - | otherwise = inlinesToDocbook opts $ texMathToInlines t str - where (dt, tagtype) = case t of - InlineMath -> (DisplayInline,"inlineequation") - DisplayMath -> (DisplayBlock,"informalequation") + | isMathML (writerHTMLMathMethod opts) = do + res <- convertMath writeMathML t str + case res of + Right r -> return $ inTagsSimple tagtype + $ text $ Xml.ppcElement conf + $ fixNS + $ removeAttr r + Left il -> inlineToDocbook opts il + | otherwise = + texMathToInlines t str >>= inlinesToDocbook opts + where tagtype = case t of + InlineMath -> "inlineequation" + DisplayMath -> "informalequation" conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP removeAttr e = e{ Xml.elAttribs = [] } fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') -inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x - | otherwise = empty -inlineToDocbook _ LineBreak = text "\n" -inlineToDocbook _ Space = space +inlineToDocbook _ il@(RawInline f x) + | f == "html" || f == "docbook" = return $ text x + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToDocbook _ LineBreak = return $ text "\n" +-- currently ignore, would require the option to add custom +-- styles to the document +inlineToDocbook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToDocbook _ SoftBreak = space +inlineToDocbook _ SoftBreak = return space inlineToDocbook opts (Link attr txt (src, _)) | Just email <- stripPrefix "mailto:" src = let emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ email + escapeStringForXML email in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToDocbook opts txt <+> - char '(' <> emailLink <> char ')' - | otherwise = - (if isPrefixOf "#" src - then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else if writerDocbook5 opts + [Str s] | escapeURI s == email -> return emailLink + _ -> do contents <- inlinesToDocbook opts txt + return $ contents <+> + char '(' <> emailLink <> char ')' + | otherwise = do + version <- ask + (if "#" `isPrefixOf` src + then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr + else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ - inlinesToDocbook opts txt -inlineToDocbook opts (Image attr _ (src, tit)) = + else inTags False "ulink" $ ("url", src) : idAndRole attr ) + <$> inlinesToDocbook opts txt +inlineToDocbook opts (Image attr _ (src, tit)) = return $ let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ @@ -382,11 +428,11 @@ inlineToDocbook opts (Image attr _ (src, tit)) = in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ imageToDocbook opts attr src inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" $ blocksToDocbook opts contents + inTagsIndented "footnote" <$> blocksToDocbook opts contents isMathML :: HTMLMathMethod -> Bool -isMathML (MathML _) = True -isMathML _ = False +isMathML MathML = True +isMathML _ = False idAndRole :: Attr -> [(String, String)] idAndRole (id',cls,_) = ident ++ role diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3fc5d22a2..4542389a2 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,44 +32,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Codec.Archive.Zip +import Control.Applicative ((<|>)) +import Control.Monad.Except (catchError) +import Control.Monad.Reader +import Control.Monad.State.Strict import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 +import Data.Char (isSpace, ord, toLower) +import Data.List (intercalate, isPrefixOf, isSuffixOf) import qualified Data.Map as M +import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip +import qualified Data.Text as T import Data.Time.Clock.POSIX -import System.Environment +import Skylighting +import System.Random (randomR, StdGen, mkStdGen) +import Text.Pandoc.BCP47 (getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.ImageSize -import Text.Pandoc.Shared hiding (Element) -import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, + getMimeTypeDef) import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlight ) -import Text.Pandoc.Walk -import Text.XML.Light as XML -import Text.TeXMath import Text.Pandoc.Readers.Docx.StyleMap -import Text.Pandoc.Readers.Docx.Util (elemName) -import Control.Monad.Reader -import Control.Monad.State -import Skylighting -import Data.Unique (hashUnique, newUnique) -import System.Random (randomRIO) +import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared (fixDisplayMath, metaValueToInlines) import Text.Printf (printf) -import qualified Control.Exception as E -import Data.Monoid ((<>)) -import qualified Data.Text as T -import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, - extensionFromMimeType) -import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing) -import Data.Char (ord, isSpace, toLower) +import Text.TeXMath +import Text.XML.Light as XML +import Text.XML.Light.Cursor as XMLC +import Text.Pandoc.Writers.OOXML data ListMarker = NoMarker | BulletMarker @@ -79,28 +82,28 @@ listMarkerToId BulletMarker = "991" listMarkerToId (NumberMarker sty delim n) = '9' : '9' : styNum : delimNum : show n where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' data WriterEnv = WriterEnv{ envTextProperties :: [Element] , envParaProperties :: [Element] - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: String - , envChangesDate :: String - , envPrintWidth :: Integer + , envRTL :: Bool + , envListLevel :: Int + , envListNumId :: Int + , envInDel :: Bool + , envChangesAuthor :: String + , envChangesDate :: String + , envPrintWidth :: Integer } defaultWriterEnv :: WriterEnv @@ -117,22 +120,25 @@ defaultWriterEnv = WriterEnv{ envTextProperties = [] data WriterState = WriterState{ stFootnotes :: [Element] + , stComments :: [([(String,String)], [Inline])] , stSectionIds :: Set.Set String , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) + , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] , stInsId :: Int , stDelId :: Int , stStyleMaps :: StyleMaps , stFirstPara :: Bool , stTocTitle :: [Inline] - , stDynamicParaProps :: [String] - , stDynamicTextProps :: [String] + , stDynamicParaProps :: Set.Set String + , stDynamicTextProps :: Set.Set String + , stCurId :: Int } defaultWriterState :: WriterState defaultWriterState = WriterState{ stFootnotes = defaultFootnotes + , stComments = [] , stSectionIds = Set.empty , stExternalLinks = M.empty , stImages = M.empty @@ -141,44 +147,29 @@ defaultWriterState = WriterState{ , stDelId = 1 , stStyleMaps = defaultStyleMaps , stFirstPara = False - , stTocTitle = normalizeInlines [Str "Table of Contents"] - , stDynamicParaProps = [] - , stDynamicTextProps = [] + , stTocTitle = [Str "Table of Contents"] + , stDynamicParaProps = Set.empty + , stDynamicTextProps = Set.empty + , stCurId = 20 } -type WS = ReaderT WriterEnv (StateT WriterState IO) - -mknode :: Node t => String -> [(String,String)] -> t -> Element -mknode s attrs = - add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) +type WS m = ReaderT WriterEnv (StateT WriterState m) -nodename :: String -> QName -nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) - -toLazy :: B.ByteString -> BL.ByteString -toLazy = BL.fromChunks . (:[]) - -renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> - UTF8.fromStringLazy (showElement elt) renumIdMap :: Int -> [Element] -> M.Map String String renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] replaceAttr _ _ [] = [] replaceAttr f val (a:as) | f (attrKey a) = - (XML.Attr (attrKey a) val) : (replaceAttr f val as) - | otherwise = a : (replaceAttr f val as) + XML.Attr (attrKey a) val : replaceAttr f val as + | otherwise = a : replaceAttr f val as -renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -187,7 +178,7 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -- | Certain characters are invalid in XML even if escaped. @@ -206,38 +197,36 @@ isValidChar (ord -> c) | 0x10000 <= c && c <= 0x10FFFF = True | otherwise = False -metaValueToInlines :: MetaValue -> [Inline] -metaValueToInlines (MetaString s) = normalizeInlines [Str s] -metaValueToInlines (MetaInlines ils) = ils -metaValueToInlines (MetaBlocks bs) = query return bs -metaValueToInlines (MetaBool b) = [Str $ show b] -metaValueToInlines _ = [] - --- | Produce an Docx file from a Pandoc document. -writeDocx :: WriterOptions -- ^ Writer options +writeDocx :: (PandocMonad m) + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO BL.ByteString + -> m BL.ByteString writeDocx opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath $ doc - username <- lookup "USERNAME" <$> getEnvironment - utctime <- getCurrentTime - distArchive <- getDefaultReferenceDocx datadir - refArchive <- case writerReferenceDocx opts of - Just f -> liftM (toArchive . toLazy) $ B.readFile f - Nothing -> getDefaultReferenceDocx datadir + let doc' = walk fixDisplayMath doc + username <- P.lookupEnv "USERNAME" + utctime <- P.getCurrentTime + distArchive <- (toArchive . BL.fromStrict) <$> do + oldUserDataDir <- P.getUserDataDir + P.setUserDataDir Nothing + res <- P.readDefaultDataFile "reference.docx" + P.setUserDataDir oldUserDataDir + return res + refArchive <- case writerReferenceDoc opts of + Just f -> toArchive <$> P.readFileLazy f + Nothing -> (toArchive . BL.fromStrict) <$> + P.readDataFile "reference.docx" parsedDoc <- parseXml refArchive distArchive "word/document.xml" let wname f qn = qPrefix qn == Just "w" && f (qName qn) let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc -- Gets the template size - let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) - let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrBy ((=="w") . qName) - let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) - let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) - let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName) -- Get the avaible area (converting the size and the margins to int and -- doing the difference @@ -248,8 +237,29 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles + mblang <- toLang $ getLang opts meta + let addLang :: Element -> Element + addLang e = case mblang >>= \l -> + (return . XMLC.toTree . go (renderLang l) + . XMLC.fromElement) e of + Just (Elem e') -> e' + _ -> e -- return original + where go :: String -> Cursor -> Cursor + go l cursor = case XMLC.findRec (isLangElt . current) cursor of + Nothing -> cursor + Just t -> XMLC.modifyContent (setval l) t + setval :: String -> Content -> Content + setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ + elAttribs e' } + setval _ x = x + setvalattr :: String -> XML.Attr -> XML.Attr + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x + isLangElt (Elem e') = qName (elName e') == "lang" + isLangElt _ = False + let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath + styledoc <- addLang <$> parseXml refArchive distArchive stylepath -- parse styledoc for heading styles let styleMaps = getStyleMaps styledoc @@ -271,20 +281,20 @@ writeDocx opts doc@(Pandoc meta _) = do envRTL = isRTLmeta , envChangesAuthor = fromMaybe "unknown" username , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime - , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , envPrintWidth = maybe 420 (\x -> quot x 20) pgContentWidth } - ((contents, footnotes), st) <- runStateT - (runReaderT - (writeOpenXML opts{writerWrapText = WrapNone} doc') - env) - initialSt + ((contents, footnotes, comments), st) <- runStateT + (runReaderT + (writeOpenXML opts{writerWrapText = WrapNone} doc') + env) + initialSt let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let toImageEntry (_,path,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs let stdAttributes = @@ -316,7 +326,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () - let mkImageOverride (_, imgpath, mbMimeType, _, _) = + let mkImageOverride (_, imgpath, mbMimeType, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = @@ -340,6 +350,8 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml") ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") + ,("/word/comments.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml") ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ @@ -386,13 +398,16 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments", + "rId8", + "comments.xml") ] let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -409,7 +424,7 @@ writeDocx opts doc@(Pandoc meta _) = do (elChildren sectpr') in add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs - Nothing -> (mknode "w:sectPr" [] ()) + Nothing -> mknode "w:sectPr" [] () -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' let contents' = contents ++ [sectpr] @@ -431,6 +446,10 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] linkrels + -- comments + let commentsEntry = toEntry "word/comments.xml" epochtime + $ renderXml $ mknode "w:comments" stdAttributes comments + -- styles -- We only want to inject paragraph and text properties that @@ -438,26 +457,19 @@ writeDocx opts doc@(Pandoc meta _) = do -- are normalized as lowercase. let newDynamicParaProps = filter (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps) - (stDynamicParaProps st) + (Set.toList $ stDynamicParaProps st) newDynamicTextProps = filter (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps) - (stDynamicTextProps st) + (Set.toList $ stDynamicTextProps st) let newstyles = map newParaPropToOpenXml newDynamicParaProps ++ map newTextPropToOpenXml newDynamicTextProps ++ - (styleToOpenXml styleMaps $ writerHighlightStyle opts) - let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } - where - modifyContent - | writerHighlight opts = (++ map Elem newstyles) - | otherwise = filter notTokStyle - notTokStyle (Elem el) = notStyle el || notTokId el - notTokStyle _ = True - notStyle = (/= elemName' "style") . elName - notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") - tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) - elemName' = elemName (sNameSpaces styleMaps) "w" + (case writerHighlightStyle opts of + Nothing -> [] + Just sty -> styleToOpenXml styleMaps sty) + let styledoc' = styledoc{ elContent = elContent styledoc ++ + map Elem newstyles } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -472,6 +484,11 @@ writeDocx opts doc@(Pandoc meta _) = do , qName (elName e) == "abstractNum" ] ++ [Elem e | e <- allElts , qName (elName e) == "num" ] } + + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> map stringify xs + _ -> [] + let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -481,6 +498,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : mknode "cp:keywords" [] (intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -509,6 +527,7 @@ writeDocx opts doc@(Pandoc meta _) = do , "w:consecutiveHyphenLimit" , "w:hyphenationZone" , "w:doNotHyphenateCap" + , "w:evenAndOddHeaders" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList @@ -535,6 +554,7 @@ writeDocx opts doc@(Pandoc meta _) = do let archive = foldr addEntryToArchive emptyArchive $ contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : + commentsEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : imageEntries ++ headerFooterEntries ++ @@ -583,12 +603,12 @@ styleToOpenXml sm style = [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ] ] tokStyles = tokenStyles style - tokFeature f toktype = maybe False f $ lookup toktype tokStyles + tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles tokCol toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenColor =<< lookup toktype tokStyles) + $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style tokBg toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenBackground =<< lookup toktype tokStyles) + $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing | otherwise = Just $ @@ -599,11 +619,11 @@ styleToOpenXml sm style = , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () - : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) - $ backgroundColor style ) + : + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -622,11 +642,14 @@ copyChildren refArchive distArchive path timestamp elNames = do baseListId :: Int baseListId = 1000 -mkNumbering :: [ListMarker] -> IO [Element] +mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element] mkNumbering lists = do - elts <- mapM mkAbstractNum (ordNub lists) + elts <- evalStateT (mapM mkAbstractNum (ordNub lists)) (mkStdGen 1848) return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] +maxListLevel :: Int +maxListLevel = 8 + mkNum :: ListMarker -> Int -> Element mkNum marker numid = mknode "w:num" [("w:numId",show numid)] @@ -636,15 +659,19 @@ mkNum marker numid = BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] + $ mknode "w:startOverride" [("w:val",show start)] ()) + [0..maxListLevel] -mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum :: (PandocMonad m) => ListMarker -> StateT StdGen m Element mkAbstractNum marker = do - nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) + gen <- get + let (nsid, gen') = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen + put gen' return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () - : map (mkLvl marker) [0..6] + : map (mkLvl marker) + [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = @@ -675,33 +702,35 @@ mkLvl marker lvl = bulletFor 3 = "\x2013" bulletFor 4 = "\x2022" bulletFor 5 = "\x2013" - bulletFor _ = "\x2022" - styleFor UpperAlpha _ = "upperLetter" - styleFor LowerAlpha _ = "lowerLetter" - styleFor UpperRoman _ = "upperRoman" - styleFor LowerRoman _ = "lowerRoman" - styleFor Decimal _ = "decimal" + bulletFor x = bulletFor (x `mod` 6) + styleFor UpperAlpha _ = "upperLetter" + styleFor LowerAlpha _ = "lowerLetter" + styleFor UpperRoman _ = "upperRoman" + styleFor LowerRoman _ = "lowerRoman" + styleFor Decimal _ = "decimal" styleFor DefaultStyle 1 = "decimal" styleFor DefaultStyle 2 = "lowerLetter" styleFor DefaultStyle 3 = "lowerRoman" styleFor DefaultStyle 4 = "decimal" styleFor DefaultStyle 5 = "lowerLetter" - styleFor DefaultStyle 6 = "lowerRoman" - styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" + styleFor DefaultStyle 0 = "lowerRoman" + styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) + styleFor _ _ = "decimal" + patternFor OneParen s = s ++ ")" patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor _ s = s ++ "." -getNumId :: WS Int +getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists -makeTOC :: WriterOptions -> WS [Element] + +makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts | writerTableOfContents opts = do - let depth = "1-"++(show (writerTOCDepth opts)) + let depth = "1-"++show (writerTOCDepth opts) let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) - return $ + return [mknode "w:sdt" [] ([ mknode "w:sdtPr" [] ( mknode "w:docPartObj" [] ( @@ -725,22 +754,20 @@ makeTOC _ = return [] -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) +writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element]) writeOpenXML opts (Pandoc meta blocks) = do - let tit = docTitle meta ++ case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> LineBreak : xs - _ -> [] + let tit = docTitle meta let auths = docAuthors meta let dat = docDate meta let abstract' = case lookupMeta "abstract" meta of - Just (MetaBlocks bs) -> bs + Just (MetaBlocks bs) -> bs Just (MetaInlines ils) -> [Plain ils] - _ -> [] + _ -> [] let subtitle' = case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> xs Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs - _ -> [] + _ -> [] title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ @@ -750,23 +777,40 @@ writeOpenXML opts (Pandoc meta blocks) = do then return [] else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs - convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs - convertSpace xs = xs + convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + convertSpace xs = xs let blocks' = bottomUp convertSpace blocks - doc' <- (setFirstPara >> blocksToOpenXML opts blocks') - notes' <- reverse `fmap` gets stFootnotes + doc' <- setFirstPara >> blocksToOpenXML opts blocks' + notes' <- reverse <$> gets stFootnotes + comments <- reverse <$> gets stComments + let toComment (kvs, ils) = do + annotation <- inlinesToOpenXML opts ils + return $ + mknode "w:comment" [('w':':':k,v) | (k,v) <- kvs] + [ mknode "w:p" [] $ + [ mknode "w:pPr" [] + [ mknode "w:pStyle" [("w:val", "CommentText")] () ] + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () + , mknode "w:annotationRef" [] () + ] + ] + ] ++ annotation + ] + comments' <- mapM toComment comments toc <- makeTOC opts let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc - return (meta' ++ doc', notes') + return (meta' ++ doc', notes', comments') -- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] +blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () -pStyleM :: String -> WS XML.Element +pStyleM :: (PandocMonad m) => String -> WS m XML.Element pStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sParaStyleMap styleMaps @@ -775,61 +819,75 @@ pStyleM styleName = do rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyleM :: String -> WS XML.Element +rStyleM :: (PandocMonad m) => String -> WS m XML.Element rStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sCharStyleMap styleMaps return $ mknode "w:rStyle" [("w:val",sty')] () -getUniqueId :: MonadIO m => m String +getUniqueId :: (PandocMonad m) => WS m String -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel -getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique +getUniqueId = do + n <- gets stCurId + modify $ \st -> st{stCurId = n + 1} + return $ show n -- | Key for specifying user-defined docx styles. dynamicStyleKey :: String dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: WriterOptions -> Block -> WS [Element] +blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk -blockToOpenXML' :: WriterOptions -> Block -> WS [Element] +blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] -blockToOpenXML' opts (Div (ident,classes,kvs) bs) - | Just sty <- lookup dynamicStyleKey kvs = do - modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)} - withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) -blockToOpenXML' opts (Div (_,["references"],_) bs) = do - let (hs, bs') = span isHeaderBlock bs - header <- blocksToOpenXML opts hs - -- We put the Bibliography style on paragraphs after the header - rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' - return (header ++ rest) -blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs +blockToOpenXML' opts (Div (ident,classes,kvs) bs) = do + stylemod <- case lookup dynamicStyleKey kvs of + Just sty -> do + modify $ \s -> + s{stDynamicParaProps = Set.insert sty + (stDynamicParaProps s)} + return $ withParaPropM (pStyleM sty) + _ -> return id + dirmod <- case lookup "dir" kvs of + Just "rtl" -> return $ local (\env -> env { envRTL = True }) + Just "ltr" -> return $ local (\env -> env { envRTL = False }) + _ -> return id + let (hs, bs') = if "references" `elem` classes + then span isHeaderBlock bs + else ([], bs) + let bibmod = if "references" `elem` classes + then withParaPropM (pStyleM "Bibliography") + else id + header <- dirmod $ stylemod $ blocksToOpenXML opts hs + contents <- dirmod $ bibmod $ stylemod $ blocksToOpenXML opts bs' + if null ident + then return $ header ++ contents + else do + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return $ bookmarkStart : header ++ contents ++ [bookmarkEnd] blockToOpenXML' opts (Header lev (ident,_,_) lst) = do setFirstPara paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst - usedIdents <- gets stSectionIds - let bookmarkName = if null ident - then uniqueIdent lst usedIdents - else ident - modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } - id' <- getUniqueId - let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + if null ident + then return [mknode "w:p" [] (paraProps ++contents)] + else do + let bookmarkName = ident + modify $ \s -> s{ stSectionIds = Set.insert bookmarkName + $ stSectionIds s } + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () - let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return [mknode "w:p" [] (paraProps ++ + [bookmarkStart] ++ contents ++ [bookmarkEnd])] blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure @@ -838,31 +896,34 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do let prop = pCustomStyle $ if null alt then "Figure" - else "FigureWithCaption" + else "CaptionedFigure" paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode --- fixDisplayMath sometimes produces a Para [] as artifact -blockToOpenXML' _ (Para []) = return [] -blockToOpenXML' opts (Para lst) = do - isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - bodyTextStyle <- pStyleM "Body Text" - let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [bodyTextStyle]] - ps -> ps - modify $ \s -> s { stFirstPara = False } - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] +blockToOpenXML' opts (Para lst) + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] + | otherwise = do + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] + [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns -blockToOpenXML' _ (RawBlock format str) +blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | otherwise = do + report $ BlockNotRendered b + return [] blockToOpenXML' opts (BlockQuote blocks) = do p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara @@ -914,9 +975,9 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","TableNormal")] () : + ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : - mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : + mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] @@ -945,7 +1006,7 @@ blockToOpenXML' opts (DefinitionList items) = do setFirstPara return l -definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] +definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] definitionListItemToOpenXML opts (term,defs) = do term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) @@ -953,12 +1014,12 @@ definitionListItemToOpenXML opts (term,defs) = do $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' -addList :: ListMarker -> WS () +addList :: (PandocMonad m) => ListMarker -> WS m () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } -listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] +listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first @@ -968,121 +1029,162 @@ listItemToOpenXML opts numid (first:rest) = do alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" AlignDefault -> "left" -- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] +inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst -withNumId :: Int -> WS a -> WS a +withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a withNumId numid = local $ \env -> env{ envListNumId = numid } -asList :: WS a -> WS a +asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -getTextProps :: WS [Element] +getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties return $ if null props then [] else [mknode "w:rPr" [] props] -withTextProp :: Element -> WS a -> WS a +withTextProp :: PandocMonad m => Element -> WS m a -> WS m a withTextProp d p = local (\env -> env {envTextProperties = d : envTextProperties env}) p -withTextPropM :: WS Element -> WS a -> WS a +withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withTextPropM = (. flip withTextProp) . (>>=) -getParaProps :: Bool -> WS [Element] +getParaProps :: PandocMonad m => Bool -> WS m [Element] getParaProps displayMathPara = do props <- asks envParaProperties listLevel <- asks envListLevel numid <- asks envListNumId - let listPr = if listLevel >= 0 && not displayMathPara - then [ mknode "w:numPr" [] - [ mknode "w:numId" [("w:val",show numid)] () - , mknode "w:ilvl" [("w:val",show listLevel)] () ] - ] - else [] + let listPr = [mknode "w:numPr" [] + [ mknode "w:numId" [("w:val",show numid)] () + , mknode "w:ilvl" [("w:val",show listLevel)] () ] | listLevel >= 0 && not displayMathPara] return $ case props ++ listPr of [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: Element -> WS a -> WS a +withParaProp :: PandocMonad m => Element -> WS m a -> WS m a withParaProp d p = local (\env -> env {envParaProperties = d : envParaProperties env}) p -withParaPropM :: WS Element -> WS a -> WS a +withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) -formattedString :: String -> WS [Element] -formattedString str = do - props <- getTextProps +formattedString :: PandocMonad m => String -> WS m [Element] +formattedString str = + -- properly handle soft hyphens + case splitBy (=='\173') str of + [w] -> formattedString' w + ws -> do + sh <- formattedRun [mknode "w:softHyphen" [] ()] + intercalate sh <$> mapM formattedString' ws + +formattedString' :: PandocMonad m => String -> WS m [Element] +formattedString' str = do inDel <- asks envInDel - return [ mknode "w:r" [] $ - props ++ - [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] (stripInvalidChars str) ] ] + formattedRun [ mknode (if inDel then "w:delText" else "w:t") + [("xml:space","preserve")] (stripInvalidChars str) ] -setFirstPara :: WS () +formattedRun :: PandocMonad m => [Element] -> WS m [Element] +formattedRun els = do + props <- getTextProps + return [ mknode "w:r" [] $ props ++ els ] + +setFirstPara :: PandocMonad m => WS m () setFirstPara = modify $ \s -> s { stFirstPara = True } -- | Convert an inline element to OpenXML. -inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il -inlineToOpenXML' :: WriterOptions -> Inline -> WS [Element] -inlineToOpenXML' _ (Str str) = formattedString str +inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML' _ (Str str) = + formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") -inlineToOpenXML' opts (Span (ident,classes,kvs) ils) - | Just sty <- lookup dynamicStyleKey kvs = do - let kvs' = filter ((dynamicStyleKey, sty)/=) kvs - modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)} - withTextProp (rCustomStyle sty) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | "insertion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - insId <- gets stInsId - modify $ \s -> s{stInsId = (insId + 1)} - x <- inlinesToOpenXML opts ils - return [ mknode "w:ins" [("w:id", (show insId)), - ("w:author", author), - ("w:date", date)] - x ] - | "deletion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - delId <- gets stDelId - modify $ \s -> s{stDelId = (delId + 1)} - x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils) - return [ mknode "w:del" [("w:id", (show delId)), - ("w:author", author), - ("w:date", date)] - x ] - | otherwise = do - let off x = withTextProp (mknode x [("w:val","0")] ()) - ((if "csl-no-emph" `elem` classes then off "w:i" else id) . - (if "csl-no-strong" `elem` classes then off "w:b" else id) . - (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) - $ inlinesToOpenXML opts ils +inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do + -- prefer the "id" in kvs, since that is the one produced by the docx + -- reader. + let ident' = fromMaybe ident (lookup "id" kvs) + kvs' = filter (("id" /=) . fst) kvs + modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } + return [ mknode "w:commentRangeStart" [("w:id", ident')] () ] +inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = + -- prefer the "id" in kvs, since that is the one produced by the docx + -- reader. + let ident' = fromMaybe ident (lookup "id" kvs) + in + return [ mknode "w:commentRangeEnd" [("w:id", ident')] () + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] + ] +inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do + stylemod <- case lookup dynamicStyleKey kvs of + Just sty -> do + modify $ \s -> + s{stDynamicTextProps = Set.insert sty + (stDynamicTextProps s)} + return $ withTextPropM (rStyleM sty) + _ -> return id + let dirmod = case lookup "dir" kvs of + Just "rtl" -> local (\env -> env { envRTL = True }) + Just "ltr" -> local (\env -> env { envRTL = False }) + _ -> id + let off x = withTextProp (mknode x [("w:val","0")] ()) + let pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes + then off "w:smallCaps" + else id) + insmod <- if "insertion" `elem` classes + then do + defaultAuthor <- asks envChangesAuthor + defaultDate <- asks envChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + insId <- gets stInsId + modify $ \s -> s{stInsId = insId + 1} + return $ \f -> do + x <- f + return [ mknode "w:ins" + [("w:id", show insId), + ("w:author", author), + ("w:date", date)] x ] + else return id + delmod <- if "deletion" `elem` classes + then do + defaultAuthor <- asks envChangesAuthor + defaultDate <- asks envChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + delId <- gets stDelId + modify $ \s -> s{stDelId = delId + 1} + return $ \f -> local (\env->env{envInDel=True}) $ do + x <- f + return [mknode "w:del" + [("w:id", show delId), + ("w:author", author), + ("w:date", date)] x] + else return id + contents <- insmod $ delmod $ dirmod $ stylemod $ pmod + $ inlinesToOpenXML opts ils + if null ident + then return contents + else do + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return $ bookmarkStart : contents ++ [bookmarkEnd] inlineToOpenXML' opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML' opts (Emph lst) = @@ -1100,40 +1202,40 @@ inlineToOpenXML' opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML' _ LineBreak = return [br] -inlineToOpenXML' _ (RawInline f str) +inlineToOpenXML' _ il@(RawInline f str) | f == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | otherwise = do + report $ InlineNotRendered il + return [] inlineToOpenXML' opts (Quoted quoteType lst) = inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] where (open, close) = case quoteType of SingleQuote -> ("\x2018", "\x2019") DoubleQuote -> ("\x201C", "\x201D") inlineToOpenXML' opts (Math mathType str) = do - let displayType = if mathType == DisplayMath - then DisplayBlock - else DisplayInline - when (displayType == DisplayBlock) setFirstPara - case writeOMML displayType <$> readTeX str of - Right r -> return [r] - Left e -> do - warn $ "Cannot convert the following TeX math, skipping:\n" ++ str ++ - "\n" ++ e - inlinesToOpenXML opts (texMathToInlines mathType str) + when (mathType == DisplayMath) setFirstPara + res <- (lift . lift) (convertMath writeOMML mathType str) + case res of + Right r -> return [r] + Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) + mapM formattedString (lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rCustomStyle (show toktype) ] , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] withTextProp (rCustomStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted + $ if isNothing (writerHighlightStyle opts) + then unhighlighted + else case highlight (writerSyntaxMap opts) + formatOpenXML attrs str of + Right h -> return h + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + unhighlighted inlineToOpenXML' opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId @@ -1151,7 +1253,7 @@ inlineToOpenXML' opts (Note bs) = do , envTextProperties = [] }) (withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs) - let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents + let newnote = mknode "w:footnote" [("w:id", notenum)] contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle @@ -1173,81 +1275,109 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr alt (src, title)) = do - -- first, check to see if we've already done this image pageWidth <- asks envPrintWidth imgs <- gets stImages - case M.lookup src imgs of - Just (_,_,_,elt,_) -> return [elt] - Nothing -> do - res <- liftIO $ - fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - -- emit alt text - inlinesToOpenXML opts alt - Right (img, mt) -> do - ident <- ("rId"++) `fmap` getUniqueId - let (xpt,ypt) = desiredSizeInPoints opts attr - (either (const def) id (imageSize img)) - -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) - let cNvPicPr = mknode "pic:cNvPicPr" [] $ - mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () - let nvPicPr = mknode "pic:nvPicPr" [] - [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () - , cNvPicPr ] - let blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () - , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] - let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ] - let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ - mknode "a:avLst" [] () - let ln = mknode "a:ln" [("w","9525")] - [ mknode "a:noFill" [] () - , mknode "a:headEnd" [] () - , mknode "a:tailEnd" [] () ] - let spPr = mknode "pic:spPr" [("bwMode","auto")] - [xfrm, prstGeom, mknode "a:noFill" [] (), ln] - let graphic = mknode "a:graphic" [] $ - mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] - [ mknode "pic:pic" [] - [ nvPicPr - , blipFill - , spPr ] ] - let imgElt = mknode "w:r" [] $ - mknode "w:drawing" [] $ - mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () - , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] () - , graphic ] - let imgext = case mt >>= extensionFromMimeType of - Just x -> '.':x - Nothing -> case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Just Eps -> ".eps" - Nothing -> "" - if null imgext - then -- without an extension there is no rule for content type - inlinesToOpenXML opts alt -- return alt to avoid corrupted docx - else do - let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = mt <|> getMimeType imgpath - -- insert mime type to use in constructing [Content_Types].xml - modify $ \st -> st{ stImages = - M.insert src (ident, imgpath, mbMimeType, imgElt, img) - $ stImages st } - return [imgElt] + let + stImage = M.lookup src imgs + generateImgElt (ident, _, _, img) = + let + (xpt,ypt) = desiredSizeInPoints opts attr + (either (const def) id (imageSize opts img)) + -- 12700 emu = 1 pt + (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + (pageWidth * 12700) + cNvPicPr = mknode "pic:cNvPicPr" [] $ + mknode "a:picLocks" [("noChangeArrowheads","1") + ,("noChangeAspect","1")] () + nvPicPr = mknode "pic:nvPicPr" [] + [ mknode "pic:cNvPr" + [("descr",src),("id","0"),("name","Picture")] () + , cNvPicPr ] + blipFill = mknode "pic:blipFill" [] + [ mknode "a:blip" [("r:embed",ident)] () + , mknode "a:stretch" [] $ + mknode "a:fillRect" [] () + ] + xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x","0"),("y","0")] () + , mknode "a:ext" [("cx",show xemu) + ,("cy",show yemu)] () ] + prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + spPr = mknode "pic:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + graphic = mknode "a:graphic" [] $ + mknode "a:graphicData" + [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] + [ mknode "pic:pic" [] + [ nvPicPr + , blipFill + , spPr + ] + ] + imgElt = mknode "w:r" [] $ + mknode "w:drawing" [] $ + mknode "wp:inline" [] + [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + , mknode "wp:effectExtent" + [("b","0"),("l","0"),("r","0"),("t","0")] () + , mknode "wp:docPr" + [ ("descr", stringify alt) + , ("title", title) + , ("id","1") + , ("name","Picture") + ] () + , graphic + ] + in + imgElt + + case stImage of + Just imgData -> return $ [generateImgElt imgData] + Nothing -> ( do --try + (img, mt) <- P.fetchItem src + ident <- ("rId"++) `fmap` getUniqueId + + let + imgext = case mt >>= extensionFromMimeType of + Just x -> '.':x + Nothing -> case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Just Pdf -> ".pdf" + Just Eps -> ".eps" + Just Svg -> ".svg" + Just Emf -> ".emf" + Nothing -> "" + imgpath = "media/" ++ ident ++ imgext + mbMimeType = mt <|> getMimeType imgpath + + imgData = (ident, imgpath, mbMimeType, img) + + if null imgext + then -- without an extension there is no rule for content type + inlinesToOpenXML opts alt -- return alt to avoid corrupted docx + else do + -- insert mime type to use in constructing [Content_Types].xml + modify $ \st -> st { stImages = M.insert src imgData $ stImages st } + return [generateImgElt imgData] + ) + `catchError` ( \e -> do + report $ CouldNotFetchResource src (show e) + -- emit alt text + inlinesToOpenXML opts alt + ) br :: Element -br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +br = breakElement "textWrapping" + +breakElement :: String -> Element +breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ] -- Word will insert these footnotes into the settings.xml file -- (whether or not they're visible in the document). If they're in the @@ -1255,35 +1385,18 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -- problems. So we want to make sure we insert them into our document. defaultFootnotes :: [Element] defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] $ + [("w:type", "separator"), ("w:id", "-1")] [ mknode "w:p" [] $ [mknode "w:r" [] $ [ mknode "w:separator" [] ()]]] , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] $ + [("w:type", "continuationSeparator"), ("w:id", "0")] [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] -parseXml :: Archive -> Archive -> String -> IO Element -parseXml refArchive distArchive relpath = - case findEntryByPath relpath refArchive `mplus` - findEntryByPath relpath distArchive of - Nothing -> fail $ relpath ++ " missing in reference docx" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Just d -> return d - --- | Scales the image to fit the page --- sizes are passed in emu -fitToPage :: (Double, Double) -> Integer -> (Integer, Integer) -fitToPage (x, y) pageWidth - -- Fixes width to the page width and scales the height - | x > fromIntegral pageWidth = - (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) - | otherwise = (floor x, floor y) - -withDirection :: WS a -> WS a + +withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do isRTL <- asks envRTL paraProps <- asks envParaProperties @@ -1296,8 +1409,8 @@ withDirection x = do if isRTL -- if we are going right-to-left, we (re?)add the properties. then flip local x $ - \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps' - , envTextProperties = (mknode "w:rtl" [] ()) : textProps' + \env -> env { envParaProperties = mknode "w:bidi" [] () : paraProps' + , envTextProperties = mknode "w:rtl" [] () : textProps' } else flip local x $ \env -> env { envParaProperties = paraProps' , envTextProperties = textProps' diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 7459f1b42..dda21d23d 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> @@ -39,71 +39,64 @@ DokuWiki: <https://www.dokuwiki.org/dokuwiki> -} module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where +import Control.Monad (zipWithM) +import Control.Monad.Reader (ReaderT, ask, local, runReaderT) +import Control.Monad.State.Strict (StateT, evalStateT) +import Data.Default (Default (..)) +import Data.List (intercalate, intersect, isPrefixOf, transpose) +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions( - writerTableOfContents - , writerTemplate - , writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting - , camelCaseToHyphenated, trimr, normalize, substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intersect, intercalate, isPrefixOf, transpose ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) -import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Logging +import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) +import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, + removeFormatting, substitute, trimr) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared (defField, metaToJSON) data WriterState = WriterState { - stNotes :: Bool -- True if there are notes } data WriterEnvironment = WriterEnvironment { - stIndent :: String -- Indent after the marker at the beginning of list items - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + stIndent :: String -- Indent after the marker at the beginning of list items + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell) } instance Default WriterState where - def = WriterState { stNotes = False } + def = WriterState {} instance Default WriterEnvironment where def = WriterEnvironment { stIndent = "" , stUseTags = False , stBackSlashLB = False } -type DokuWiki = ReaderT WriterEnvironment (State WriterState) +type DokuWiki m = ReaderT WriterEnvironment (StateT WriterState m) -- | Convert Pandoc to DokuWiki. -writeDokuWiki :: WriterOptions -> Pandoc -> String +writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDokuWiki opts document = - runDokuWiki (pandocToDokuWiki opts $ normalize document) + runDokuWiki (pandocToDokuWiki opts document) -runDokuWiki :: DokuWiki a -> a -runDokuWiki = flip evalState def . flip runReaderT def +runDokuWiki :: PandocMonad m => DokuWiki m a -> m a +runDokuWiki = flip evalStateT def . flip runReaderT def -- | Return DokuWiki representation of document. -pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String +pandocToDokuWiki :: PandocMonad m + => WriterOptions -> Pandoc -> DokuWiki m Text pandocToDokuWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToDokuWiki opts) (inlineListToDokuWiki opts) meta body <- blockListToDokuWiki opts blocks - notesExist <- stNotes <$> get - let notes = if notesExist - then "" -- TODO Was "\n<references />" Check whether I can really remove this: - -- if it is definitely to do with footnotes, can remove this whole bit - else "" - let main = body ++ notes + let main = pack body let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata + $ defField "toc" (writerTableOfContents opts) metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for DokuWiki. escapeString :: String -> String @@ -112,9 +105,10 @@ escapeString = substitute "__" "%%__%%" . substitute "//" "%%//%%" -- | Convert Pandoc block element to DokuWiki. -blockToDokuWiki :: WriterOptions -- ^ Options +blockToDokuWiki :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> DokuWiki String + -> DokuWiki m String blockToDokuWiki _ Null = return "" @@ -149,12 +143,13 @@ blockToDokuWiki opts (Para inlines) = do blockToDokuWiki opts (LineBlock lns) = blockToDokuWiki opts $ linesToPara lns -blockToDokuWiki _ (RawBlock f str) +blockToDokuWiki _ b@(RawBlock f str) | f == Format "dokuwiki" = return str -- See https://www.dokuwiki.org/wiki:syntax -- use uppercase HTML tag for block-level content: | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" - | otherwise = return "" + | otherwise = "" <$ + report (BlockNotRendered b) blockToDokuWiki _ HorizontalRule = return "\n----\n" @@ -177,7 +172,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do "visualfoxpro", "winbatch", "xml", "xpp", "z80"] return $ "<code" ++ (case at of - [] -> ">\n" + [] -> ">\n" (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" blockToDokuWiki opts (BlockQuote blocks) = do @@ -198,7 +193,7 @@ blockToDokuWiki opts (Table capt aligns _ headers rows) = do rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows let widths = map (maximum . map length) $ transpose (headers':rows') let padTo (width, al) s = - case (width - length s) of + case width - length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault then s ++ replicate x ' ' @@ -278,21 +273,27 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet list item (list of blocks) to DokuWiki. -listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String +listItemToDokuWiki :: PandocMonad m + => WriterOptions -> [Block] -> DokuWiki m String listItemToDokuWiki opts items = do - contents <- blockListToDokuWiki opts items useTags <- stUseTags <$> ask if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + then do + contents <- blockListToDokuWiki opts items + return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" else do + bs <- mapM (blockToDokuWiki opts) items + let contents = case items of + [_, CodeBlock _ _] -> concat bs + _ -> vcat bs indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "* " ++ contents -- | Convert ordered list item (list of blocks) to DokuWiki. -- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki -orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String +orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m String orderedListItemToDokuWiki opts items = do contents <- blockListToDokuWiki opts items useTags <- stUseTags <$> ask @@ -301,24 +302,25 @@ orderedListItemToDokuWiki opts items = do else do indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "- " ++ contents -- | Convert definition list item (label, list of blocks) to DokuWiki. -definitionListItemToDokuWiki :: WriterOptions +definitionListItemToDokuWiki :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> DokuWiki String + -> DokuWiki m String definitionListItemToDokuWiki opts (label, items) = do labelText <- inlineListToDokuWiki opts label contents <- mapM (blockListToDokuWiki opts) items useTags <- stUseTags <$> ask if useTags then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ - (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) + intercalate "\n" (map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) else do indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. @@ -337,18 +339,19 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + CodeBlock _ _ -> True + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -368,14 +371,15 @@ backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs where f '\n' = "\\\\ " f c = [c] g (' ' : '\\':'\\': xs) = xs - g s = s + g s = s -- Auxiliary functions for tables: -tableItemToDokuWiki :: WriterOptions - -> Alignment - -> [Block] - -> DokuWiki String +tableItemToDokuWiki :: PandocMonad m + => WriterOptions + -> Alignment + -> [Block] + -> DokuWiki m String tableItemToDokuWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " @@ -388,22 +392,32 @@ tableItemToDokuWiki opts align' item = do return $ mkcell contents -- | Convert list of Pandoc block elements to DokuWiki. -blockListToDokuWiki :: WriterOptions -- ^ Options +blockListToDokuWiki :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> DokuWiki String + -> DokuWiki m String blockListToDokuWiki opts blocks = do backSlash <- stBackSlashLB <$> ask + let blocks' = consolidateRawBlocks blocks if backSlash - then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks - else vcat <$> mapM (blockToDokuWiki opts) blocks + then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks' + else vcat <$> mapM (blockToDokuWiki opts) blocks' + +consolidateRawBlocks :: [Block] -> [Block] +consolidateRawBlocks [] = [] +consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) + | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) +consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs -- | Convert list of Pandoc inline elements to DokuWiki. -inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String +inlineListToDokuWiki :: PandocMonad m + => WriterOptions -> [Inline] -> DokuWiki m String inlineListToDokuWiki opts lst = - concat <$> (mapM (inlineToDokuWiki opts) lst) + concat <$> mapM (inlineToDokuWiki opts) lst -- | Convert Pandoc inline element to DokuWiki. -inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String +inlineToDokuWiki :: PandocMonad m + => WriterOptions -> Inline -> DokuWiki m String inlineToDokuWiki opts (Span _attrs ils) = inlineListToDokuWiki opts ils @@ -460,12 +474,12 @@ inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim DisplayMath -> "$$" InlineMath -> "$" -inlineToDokuWiki _ (RawInline f str) +inlineToDokuWiki _ il@(RawInline f str) | f == Format "dokuwiki" = return str | f == Format "html" = return $ "<html>" ++ str ++ "</html>" - | otherwise = return "" + | otherwise = "" <$ report (InlineNotRendered il) -inlineToDokuWiki _ (LineBreak) = return "\\\\\n" +inlineToDokuWiki _ LineBreak = return "\\\\\n" inlineToDokuWiki opts SoftBreak = case writerWrapText opts of @@ -498,7 +512,6 @@ inlineToDokuWiki opts (Image attr alt (source, tit)) = do inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents - modify (\s -> s { stNotes = True }) return $ "((" ++ contents' ++ "))" -- note - may not work for notes with multiple blocks @@ -507,7 +520,7 @@ imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = "?" ++ w go (Just w) (Just h) = "?" ++ w ++ "x" ++ h go Nothing (Just h) = "?0x" ++ h diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 00bf4a81c..7b4853a24 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,47 +31,47 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} -module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) -import qualified Data.Map as M -import qualified Data.Set as Set -import Data.Maybe ( fromMaybe, catMaybes ) -import Data.List ( isPrefixOf, isInfixOf, intercalate ) -import System.Environment ( getEnv ) -import Text.Printf (printf) -import System.FilePath ( takeExtension, takeFileName ) -import System.FilePath.Glob ( namesMatching ) -import Network.HTTP ( urlEncode ) +module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where +import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, + fromArchive, fromEntry, toEntry) +import Control.Monad (mplus, unless, when, zipWithM) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, + gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Data.Time.Clock.POSIX ( getPOSIXTime ) -import Text.Pandoc.Compat.Time -import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim - , normalizeDate, readDataFile, stringify, warn - , hierarchicalize, fetchItem' ) -import qualified Text.Pandoc.Shared as S (Element(..)) +import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.List (intercalate, isInfixOf, isPrefixOf) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) +import qualified Data.Set as Set +import qualified Data.Text as TS +import qualified Data.Text.Lazy as TL +import Network.HTTP (urlEncode) +import System.FilePath (takeExtension, takeFileName, makeRelative) +import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options ( WriterOptions(..) - , WrapOption(..) - , HTMLMathMethod(..) - , EPUBVersion(..) - , ObfuscationMethod(NoObfuscation) ) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk, walkM, query) -import Control.Monad.State (modify, get, State, put, evalState) -import Control.Monad (mplus, liftM, when) -import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs - , strContent, lookupAttr, Node(..), QName(..), parseXML - , onlyElems, node, ppElement) -import Text.Pandoc.UUID (getRandomUUID) -import Text.Pandoc.Writers.HTML ( writeHtml ) -import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import qualified Control.Exception as E -import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) +import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), + ObfuscationMethod (NoObfuscation), WrapOption (..), + WriterOptions (..)) +import Text.Pandoc.Shared (hierarchicalize, normalizeDate, renderTags', + safeRead, stringify, trim, uniqueIdent) +import qualified Text.Pandoc.Shared as S (Element (..)) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.UUID (getUUID) +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) +import Text.Printf (printf) +import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), + add_attrs, lookupAttr, node, onlyElems, parseXML, + ppElement, showElement, strContent, unode, unqual) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -76,51 +79,55 @@ import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBState = EPUBState { + stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + , stEpubSubdir :: String + } + +type E m = StateT EPUBState m + data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheet :: Maybe Stylesheet - , epubPageDirection :: Maybe ProgressionDirection + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheets :: [FilePath] + , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] } deriving Show -data Stylesheet = StylesheetPath FilePath - | StylesheetContents String - deriving Show - data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: String + , dateEvent :: Maybe String } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: String + , identifierScheme :: Maybe String } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String } deriving Show data ProgressionDirection = LTR | RTL deriving Show @@ -143,15 +150,29 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry +mkEntry path content = do + epubSubdir <- gets stEpubSubdir + let addEpubSubdir :: Entry -> Entry + addEpubSubdir e = e{ eRelativePath = + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ eRelativePath e } + epochtime <- floor <$> lift P.getPOSIXTime + return $ + (if path == "mimetype" || "META-INF" `isPrefixOf` path + then id + else addEpubSubdir) $ toEntry path epochtime content + +getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) then do - randomId <- fmap show getRandomUUID + randomId <- (show . getUUID) <$> lift P.newStdGen return $ m{ epubIdentifier = [Identifier randomId Nothing] } else return m let addLanguage m = @@ -159,16 +180,19 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- E.catch (liftM - (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: E.SomeException) in return "en-US") + mLang <- lift $ P.lookupEnv "LANG" + let localeLang = + case mLang of + Just lang -> + map (\c -> if c == '_' then '-' else c) $ + takeWhile (/='.') lang + Nothing -> "en-US" return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) then do - currentTime <- getCurrentTime + currentTime <- lift P.getCurrentTime return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -225,12 +249,16 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s +metaValueToString (MetaString s) = s metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs -metaValueToString (MetaBool True) = "true" -metaValueToString (MetaBool False) = "false" -metaValueToString _ = "" +metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaBool True) = "true" +metaValueToString (MetaBool False) = "false" +metaValueToString _ = "" + +metaValueToPaths:: MetaValue -> [FilePath] +metaValueToPaths (MetaList xs) = map metaValueToString xs +metaValueToPaths x = [metaValueToString x] getList :: String -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = @@ -267,19 +295,18 @@ getCreator s meta = getList s meta handleMetaValue getDate :: String -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = - Date{ dateText = maybe "" id $ + Date{ dateText = fromMaybe "" $ M.lookup "text" m >>= normalizeDate' . metaValueToString , dateEvent = metaValueToString <$> M.lookup "event" m } - handleMetaValue mv = Date { dateText = maybe "" - id $ normalizeDate' $ metaValueToString mv + handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } simpleList :: String -> Meta -> [String] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] + Just x -> [metaValueToString x] + Nothing -> [] metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ @@ -299,8 +326,9 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubCoverImage = coverImage - , epubStylesheet = stylesheet + , epubStylesheets = stylesheets , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -320,70 +348,125 @@ metadataFromMeta opts meta = EPUBMetadata{ rights = metaValueToString <$> lookupMeta "rights" meta coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) - stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` - ((StylesheetPath . metaValueToString) <$> - lookupMeta "stylesheet" meta) + stylesheets = fromMaybe [] + (metaValueToPaths <$> lookupMeta "stylesheet" meta) ++ + [f | ("css",f) <- writerVariables opts] pageDirection = case map toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing + ibooksFields = case lookupMeta "ibooks" meta of + Just (MetaMap mp) + -> M.toList $ M.map metaValueToString mp + _ -> [] + +-- | Produce an EPUB2 file from a Pandoc document. +writeEPUB2 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB2 = writeEPUB EPUB2 + +-- | Produce an EPUB3 file from a Pandoc document. +writeEPUB3 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB3 = writeEPUB EPUB3 -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: WriterOptions -- ^ Writer options +writeEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeEPUB opts doc@(Pandoc meta _) = do - let version = fromMaybe EPUB2 (writerEpubVersion opts) + -> m B.ByteString +writeEPUB epubVersion opts doc = do + let epubSubdir = writerEpubSubdirectory opts + -- sanity check on epubSubdir + unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + throwError $ PandocEpubSubdirectoryError epubSubdir + let initState = EPUBState { stMediaPaths = [], stEpubSubdir = epubSubdir } + evalStateT (pandocToEPUB epubVersion opts doc) initState + +pandocToEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions + -> Pandoc + -> E m B.ByteString +pandocToEPUB version opts doc@(Pandoc meta _) = do + epubSubdir <- gets stEpubSubdir let epub3 = version == EPUB3 - epochtime <- floor `fmap` getPOSIXTime - let mkEntry path content = toEntry path epochtime content + let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . + writeHtmlStringForEPUB version o + metadata <- getEPUBMetadata opts meta + + -- stylesheet + stylesheets <- case epubStylesheets metadata of + [] -> (\x -> [B.fromChunks [x]]) <$> + P.readDataFile "epub.css" + fs -> mapM P.readFileLazy fs + stylesheetEntries <- zipWithM + (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) + stylesheets [(1 :: Int)..] + let vars = ("epub3", if epub3 then "true" else "false") - : ("css", "stylesheet.css") - : writerVariables opts + : [(x,y) | (x,y) <- writerVariables opts, x /= "css"] + + let cssvars useprefix = map (\e -> ("css", + (if useprefix + then "../" + else "") + ++ makeRelative epubSubdir (eRelativePath e))) + stylesheetEntries + let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True - , writerHtml5 = epub3 , writerVariables = vars , writerHTMLMathMethod = if epub3 - then MathML Nothing + then MathML else writerHTMLMathMethod opts , writerWrapText = WrapAuto } - metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml - opts'{ writerVariables = ("coverpage","true"):vars } - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) - imgContent <- B.readFile img - return ( [mkEntry "cover.xhtml" cpContent] - , [mkEntry coverImage imgContent] ) + let coverImage = takeFileName img + cpContent <- lift $ writeHtml + opts'{ writerVariables = + ("coverpage","true"): + cssvars True ++ vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + imgContent <- lift $ P.readFileLazy img + coverEntry <- mkEntry "text/cover.xhtml" cpContent + coverImageEntry <- mkEntry ("media/" ++ coverImage) + imgContent + return ( [ coverEntry ] + , [ coverImageEntry ] ) -- title page - let tpContent = renderHtml $ writeHtml opts'{ - writerVariables = ("titlepage","true"):vars } - (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent + tpContent <- lift $ writeHtml opts'{ + writerVariables = ("titlepage","true"): + cssvars True ++ vars } + (Pandoc meta []) + tpEntry <- mkEntry "text/title_page.xhtml" tpContent -- handle pictures - mediaRef <- newIORef [] - Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= - walkM (transformBlock opts' mediaRef) - picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef - + -- mediaRef <- P.newIORef [] + Pandoc _ blocks <- walkM (transformInline opts') doc >>= + walkM transformBlock + picEntries <- (mapMaybe (snd . snd)) <$> gets stMediaPaths -- handle fonts let matchingGlob f = do - xs <- namesMatching f + xs <- lift $ P.glob f when (null xs) $ - warn $ f ++ " did not match any font files." + report $ CouldNotFetchResource f "glob did not match any font files" return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< + lift (P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -420,7 +503,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mbnum <- if "unnumbered" `elem` classes then return Nothing else case splitAt (n - 1) nums of - (ks, (m:_)) -> do + (ks, m:_) -> do let nums' = ks ++ [m+1] put nums' return $ Just (ks ++ [m]) @@ -467,77 +550,93 @@ writeEPUB opts doc@(Pandoc meta _) = do Chapter mbnum $ walk fixInternalReferences bs) chapters' - let chapToEntry :: Int -> Chapter -> Entry - chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) - $ renderHtml - $ writeHtml opts'{ writerNumberOffset = - fromMaybe [] mbnum } - $ case bs of - (Header _ _ xs : _) -> - -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> - Pandoc nullMeta bs - - let chapterEntries = zipWith chapToEntry [1..] chapters + let chapToEntry num (Chapter mbnum bs) = + mkEntry ("text/" ++ showChapter num) =<< + writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum + , writerVariables = cssvars True ++ vars } + (case bs of + (Header _ _ xs : _) -> + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs + _ -> Pandoc nullMeta bs) + + chapterEntries <- zipWithM chapToEntry [1..] chapters -- incredibly inefficient (TODO): let containsMathML ent = epub3 && - "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + "<math" `isInfixOf` + B8.unpack (fromEntry ent) let containsSVG ent = epub3 && - "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + "<svg" `isInfixOf` + B8.unpack (fromEntry ent) let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf let chapterNode ent = unode "item" ! - ([("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), + ([("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of - [] -> [] - xs -> [("properties", unwords xs)]) + [] -> [] + xs -> [("properties", unwords xs)]) $ () + let chapterRefNode ent = unode "itemref" ! - [("idref", toId $ eRelativePath ent)] $ () + [("idref", toId $ makeRelative epubSubdir + $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "application/octet-stream" + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", fromMaybe "" $ + getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle' meta of [] -> case epubTitle metadata of - [] -> "UNTITLED" + [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x let tocTitle = fromMaybe plainTitle $ metaValueToString <$> lookupMeta "toc-title" meta - let uuid = case epubIdentifier metadata of - (x:_) -> identifierText x -- use first identifier as UUID - [] -> error "epubIdentifier is null" -- shouldn't happen - currentTime <- getCurrentTime + uuid <- case epubIdentifier metadata of + (x:_) -> return $ identifierText x -- use first identifier as UUID + [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen + currentTime <- lift P.getCurrentTime let contentsData = UTF8.fromStringLazy $ ppTopElement $ - unode "package" ! [("version", case version of - EPUB2 -> "2.0" - EPUB3 -> "3.0") - ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ + unode "package" ! + ([("version", case version of + EPUB2 -> "2.0" + EPUB3 -> "3.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","epub-id-1") + ] ++ + [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $ [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () - , unode "item" ! [("id","style"), ("href","stylesheet.css") - ,("media-type","text/css")] $ () , unode "item" ! ([("id","nav") ,("href","nav.xhtml") ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ + [ unode "item" ! [("id","style"), ("href",fp) + ,("media-type","text/css")] $ () | + fp <- map + (makeRelative epubSubdir . eRelativePath) + stylesheetEntries ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ (case cpicEntry of [] -> [] @@ -546,7 +645,8 @@ writeEPUB opts doc@(Pandoc meta _) = do (pictureNode x)]) ++ map pictureNode picEntries ++ map fontNode fontEntries - , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $ + , unode "spine" ! ( + ("toc","ncx") : progressionDirection) $ case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! @@ -565,48 +665,54 @@ writeEPUB opts doc@(Pandoc meta _) = do ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing + [("type","cover") + ,("title","Cover") + ,("href","text/cover.xhtml")] $ () + | isJust (epubCoverImage metadata) ] ] - let contentsEntry = mkEntry "content.opf" contentsData + contentsEntry <- mkEntry "content.opf" contentsData -- toc.ncx let secs = hierarchicalize blocks' let tocLevel = writerTOCDepth opts - let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> S.Element -> State Int Element + let navPointNode :: PandocMonad m + => (Int -> [Inline] -> String -> [Element] -> Element) + -> S.Element -> StateT Int m Element navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) let showNums :: [Int] -> String showNums = intercalate "." . map show - let tit' = stringify ils let tit = if writerNumberSections opts && not (null nums) - then showNums nums ++ " " ++ tit' - else tit' - let src = case lookup ident reftable of - Just x -> x - Nothing -> error (ident ++ " not found in reftable") + then Span ("", ["section-header-number"], []) + [Str (showNums nums)] : Space : ils + else ils + src <- case lookup ident reftable of + Just x -> return x + Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable" let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel - isSec _ = False + isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk" - let navMapFormatter :: Int -> String -> String -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" ++ src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] + , unode "content" ! [("src", "text/title_page.xhtml")] + $ () ] + navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ @@ -623,34 +729,48 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> [] Just img -> [unode "meta" ! [("name","cover"), ("content", toId img)] $ ()] - , unode "docTitle" $ unode "text" $ plainTitle + , unode "docTitle" $ unode "text" plainTitle , unode "navMap" $ - tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 + tpNode : navMap ] - let tocEntry = mkEntry "toc.ncx" tocData + tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] - $ tit) + (unode "a" ! + [("href", "text/" ++ src)] + $ titElements) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] + where titElements = parseXML titRendered + titRendered = case P.runPure + (writeHtmlStringForEPUB version + opts{ writerTemplate = Nothing } + (Pandoc nullMeta + [Plain $ walk delink tit])) of + Left _ -> TS.pack $ stringify tit + Right x -> x + -- can't have a element inside a... + delink (Link _ ils _) = Span ("", [], []) ils + delink x = x let navtag = if epub3 then "nav" else "div" - let navBlocks = [RawBlock (Format "html") $ ppElement $ + tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1 + let navBlocks = [RawBlock (Format "html") + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarks = if epub3 then [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("hidden","hidden")] $ [ unode "ol" $ [ unode "li" - [ unode "a" ! [("href", "cover.xhtml") + [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ "Cover"] | epubCoverImage metadata /= Nothing @@ -664,52 +784,50 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ] else [] - let navData = renderHtml $ writeHtml - opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"): + cssvars False ++ vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) - let navEntry = mkEntry "nav.xhtml" navData + navEntry <- mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" + mimetypeEntry <- mkEntry "mimetype" $ + UTF8.fromStringLazy "application/epub+zip" -- container.xml let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") + unode "rootfile" ! [("full-path", + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ "content.opf") ,("media-type","application/oebps-package+xml")] $ () - let containerEntry = mkEntry "META-INF/container.xml" containerData + containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" - let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple - - -- stylesheet - stylesheet <- case epubStylesheet metadata of - Just (StylesheetPath fp) -> UTF8.readFile fp - Just (StylesheetContents s) -> return s - Nothing -> UTF8.toString `fmap` - readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet + appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : - contentsEntry : tocEntry : navEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) + let archive = foldr addEntryToArchive emptyArchive $ + [mimetypeEntry, containerEntry, appleEntry, + contentsEntry, tocEntry, navEntry, tpEntry] ++ + stylesheetEntries ++ picEntries ++ cpicEntry ++ + cpgEntry ++ chapterEntries ++ fontEntries return $ fromArchive archive metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element metadataElement version md currentTime = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes + ++ languageNodes ++ ibooksNodes ++ creatorNodes ++ contributorNodes ++ subjectNodes ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes @@ -728,6 +846,8 @@ metadataElement version md currentTime = [] -> [] (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] + ibooksNodes = map ibooksNode (epubIbooksFields md) + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -747,7 +867,7 @@ metadataElement version md currentTime = ("content",toId img)] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3 ] + showDateTimeISO8601 currentTime | version == EPUB3 ] dcTag n s = unode ("dc:" ++ n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) @@ -791,99 +911,92 @@ metadataElement version md currentTime = (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix "ISBN-10" = "02" - schemeToOnix "GTIN-13" = "03" - schemeToOnix "UPC" = "04" - schemeToOnix "ISMN-10" = "05" - schemeToOnix "DOI" = "06" - schemeToOnix "LCCN" = "13" - schemeToOnix "GTIN-14" = "14" - schemeToOnix "ISBN-13" = "15" + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" schemeToOnix "Legal deposit number" = "17" - schemeToOnix "URN" = "22" - schemeToOnix "OCLC" = "23" - schemeToOnix "ISMN-13" = "25" - schemeToOnix "ISBN-A" = "26" - schemeToOnix "JP" = "27" - schemeToOnix "OLCC" = "28" - schemeToOnix _ = "01" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Tag String - -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag :: PandocMonad m + => Tag String + -> E m (Tag String) +transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && - lookup "data-external" attr == Nothing = do + isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef opts mediaRef src - newposter <- modifyMediaRef opts mediaRef poster + newsrc <- modifyMediaRef src + newposter <- modifyMediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", newsrc) | not (null newsrc)] ++ - [("poster", newposter) | not (null newposter)] + [("src", "../" ++ newsrc) | not (null newsrc)] ++ + [("poster", "../" ++ newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag - -modifyMediaRef :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] - -> FilePath - -> IO FilePath -modifyMediaRef _ _ "" = return "" -modifyMediaRef opts mediaRef oldsrc = do - media <- readIORef mediaRef +transformTag tag = return tag + +modifyMediaRef :: PandocMonad m + => FilePath + -> E m FilePath +modifyMediaRef "" = return "" +modifyMediaRef oldsrc = do + media <- gets stMediaPaths case lookup oldsrc media of Just (n,_) -> return n - Nothing -> do - res <- fetchItem' (writerMediaBag opts) - (writerSourceURL opts) oldsrc - (new, mbEntry) <- - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return (oldsrc, Nothing) - Right (img,mbMime) -> do - let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) - epochtime <- floor `fmap` getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img - return (new, Just entry) - modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) - return new - -transformBlock :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Block - -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) + Nothing -> catchError + (do (img, mbMime) <- P.fetchItem oldsrc + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + entry <- mkEntry new (B.fromChunks . (:[]) $ img) + modify $ \st -> st{ stMediaPaths = + (oldsrc, (new, Just entry)):media} + return new) + (\e -> do + report $ CouldNotFetchResource oldsrc (show e) + return oldsrc) + +transformBlock :: PandocMonad m + => Block + -> E m Block +transformBlock (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') -transformBlock _ _ b = return b +transformBlock b = return b -transformInline :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media +transformInline :: PandocMonad m + => WriterOptions -> Inline - -> IO Inline -transformInline opts mediaRef (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef opts mediaRef src - return $ Image attr lab (newsrc, tit) -transformInline opts mediaRef (x@(Math t m)) + -> E m Inline +transformInline _opts (Image attr lab (src,tit)) = do + newsrc <- modifyMediaRef src + return $ Image attr lab ("../" ++ newsrc, tit) +transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] -transformInline opts mediaRef (RawInline fmt raw) + return $ Span ("",["math",mathclass],[]) + [Image nullAttr [x] ("../" ++ newsrc, "")] +transformInline _opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -transformInline _ _ x = return x +transformInline _ x = return x (!) :: (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) @@ -898,8 +1011,8 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . let (ds,ys) = break (==';') xs rest = drop 1 ys in case safeRead ('\'':'\\':ds ++ "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs + Just x -> x : unEntity rest + Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs mediaTypeOf :: FilePath -> Maybe MimeType @@ -907,7 +1020,7 @@ mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y - _ -> Nothing + _ -> Nothing -- Returns filename for chapter number. showChapter :: Int -> String diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 5538ca061..e322c7d98 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,8 +1,8 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (c) 2011-2012, Sergey Astanin -All rights reserved. +Copyright (c) 2011-2012 Sergey Astanin + 2012-2018 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,17 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. +{- | +Module : Text.Pandoc.Writers.FB2 +Copyright : Copyright (C) 2011-2012 Sergey Astanin + 2012-2018 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane +Stability : alpha +Portability : portable + +Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. FictionBook is an XML-based e-book format. For more information see: <http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> @@ -27,44 +37,42 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.State (StateT, evalStateT, get, modify) -import Control.Monad.State (liftM, liftM2, liftIO) +import Control.Monad (zipWithM) +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict (StateT, evalStateT, get, lift, liftM, modify) import Data.ByteString.Base64 (encode) -import Data.Char (toLower, isSpace, isAscii, isControl) -import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix) +import qualified Data.ByteString.Char8 as B8 +import Data.Char (isAscii, isControl, isSpace, toLower) import Data.Either (lefts, rights) -import Network.Browser (browse, request, setAllowRedirects, setOutHandler) -import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) -import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) -import Network.URI (isURI, unEscapeString) -import System.FilePath (takeExtension) +import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix) +import Data.Text (Text, pack) +import Network.HTTP (urlEncode) import Text.XML.Light -import qualified Control.Exception as E -import qualified Data.ByteString as B import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize, - linesToPara) +import Text.Pandoc.Logging +import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) +import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, orderedListMarkers) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. data FbRenderState = FbRenderState - { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text - , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path - , parentListMarker :: String -- ^ list marker of the parent ordered list - , parentBulletLevel :: Int -- ^ nesting level of the unordered list - , writerOptions :: WriterOptions + { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path + , parentListMarker :: String -- ^ list marker of the parent ordered list + , writerOptions :: WriterOptions } deriving (Show) -- | FictionBook building monad. -type FBM = StateT FbRenderState IO +type FBM m = StateT FbRenderState m newFB :: FbRenderState newFB = FbRenderState { footnotes = [], imagesToFetch = [] - , parentListMarker = "", parentBulletLevel = 0 + , parentListMarker = "" , writerOptions = def } data ImageMode = NormalImage | InlineImage deriving (Eq) @@ -73,20 +81,27 @@ instance Show ImageMode where show InlineImage = "inlineImageType" -- | Produce an FB2 document from a 'Pandoc' document. -writeFB2 :: WriterOptions -- ^ conversion options +writeFB2 :: PandocMonad m + => WriterOptions -- ^ conversion options -> Pandoc -- ^ document to convert - -> IO String -- ^ FictionBook2 document (not encoded yet) -writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + -> m Text -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc + +pandocToFB2 :: PandocMonad m + => WriterOptions + -> Pandoc + -> FBM m Text +pandocToFB2 opts (Pandoc meta blocks) = do modify (\s -> s { writerOptions = opts }) desc <- description meta - fp <- frontpage meta + title <- cMapM toXml . docTitle $ meta secs <- renderSections 1 blocks - let body = el "body" $ fp ++ secs + let body = el "body" $ el "title" (el "p" title) : secs notes <- renderFootnotes - (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + (imgs,missing) <- fmap imagesToFetch get >>= \s -> lift (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) ++ "\n" + return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = @@ -94,67 +109,77 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do xlink = "http://www.w3.org/1999/xlink" in [ uattr "xmlns" xmlns , attr ("xmlns", "l") xlink ] - -- - frontpage :: Meta -> FBM [Content] - frontpage meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ - [ el "title" (el "p" t) - , el "annotation" (map (el "p" . cMap plain) - (docAuthors meta' ++ [docDate meta'])) - ] - description :: Meta -> FBM Content - description meta' = do - bt <- booktitle meta' - let as = authors meta' - dd <- docdate meta' - return $ el "description" - [ el "title-info" (bt ++ as ++ dd) - , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version - ] - booktitle :: Meta -> FBM [Content] - booktitle meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ if null t - then [] - else [ el "book-title" t ] - authors :: Meta -> [Content] - authors meta' = cMap author (docAuthors meta') - author :: [Inline] -> [Content] - author ss = - let ws = words . cMap plain $ ss - email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws - names = case ws' of - (nickname:[]) -> [ el "nickname" nickname ] - (fname:lname:[]) -> [ el "first-name" fname - , el "last-name" lname ] - (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) - , el "last-name" (last rest) ] - ([]) -> [] - in list $ el "author" (names ++ email) - docdate :: Meta -> FBM [Content] - docdate meta' = do - let ss = docDate meta' - d <- cMapM toXml ss - return $ if null d - then [] - else [el "date" d] + +description :: PandocMonad m => Meta -> FBM m Content +description meta' = do + let genre = el "genre" "unrecognised" + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + let lang = case lookupMeta "lang" meta' of + Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] + Just (MetaString s) -> [el "lang" $ iso639 s] + _ -> [] + where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + let coverimage url = do + let img = Image nullAttr mempty (url, "") + im <- insertImage InlineImage img + return [el "coverpage" im] + coverpage <- case lookupMeta "cover-image" meta' of + Just (MetaInlines [Str s]) -> coverimage s + Just (MetaString s) -> coverimage s + _ -> return [] + return $ el "description" + [ el "title-info" (genre : (bt ++ as ++ dd ++ lang)) + , el "document-info" (el "program-used" "pandoc" : coverpage) + ] + +booktitle :: PandocMonad m => Meta -> FBM m [Content] +booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + +authors :: Meta -> [Content] +authors meta' = cMap author (docAuthors meta') + +author :: [Inline] -> [Content] +author ss = + let ws = words . cMap plain $ ss + email = el "email" <$> take 1 (filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + [nickname] -> [ el "nickname" nickname ] + [fname, lname] -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + [] -> [] + in list $ el "author" (names ++ email) + +docdate :: PandocMonad m => Meta -> FBM m [Content] +docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] -- | Divide the stream of blocks into sections and convert to XML -- representation. -renderSections :: Int -> [Block] -> FBM [Content] +renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content] renderSections level blocks = do let secs = splitSections level blocks mapM (renderSection level) secs -renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content renderSection level (ttl, body) = do title <- if null ttl then return [] else return . list . el "title" . formatTitle $ ttl - content <- if (hasSubsections body) + content <- if hasSubsections body then renderSections (level + 1) body else cMapM blockToXml body return $ el "section" (title ++ content) @@ -175,7 +200,7 @@ split cond xs = let (b,a) = break cond xs isLineBreak :: Inline -> Bool isLineBreak LineBreak = True -isLineBreak _ = False +isLineBreak _ = False -- | Divide the stream of block elements into sections: [(title, blocks)]. splitSections :: Int -> [Block] -> [([Inline], [Block])] @@ -186,17 +211,17 @@ splitSections level blocks = reverse $ revSplit (reverse blocks) let (lastsec, before) = break sameLevel rblocks (header, prevblocks) = case before of - ((Header n _ title):prevblocks') -> + (Header n _ title:prevblocks') -> if n == level then (title, prevblocks') else ([], before) _ -> ([], before) in (header, reverse lastsec) : revSplit prevblocks sameLevel (Header n _ _) = n == level - sameLevel _ = False + sameLevel _ = False -- | Make another FictionBook body with footnotes. -renderFootnotes :: FBM [Content] +renderFootnotes :: PandocMonad m => FBM m [Content] renderFootnotes = do fns <- footnotes `liftM` get if null fns @@ -205,19 +230,19 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = (el "title" (el "p" (show n))) : cs + let fn_texts = el "title" (el "p" (show n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. -- Return image data and a list of hrefs of the missing images. -fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) fetchImages links = do imgs <- mapM (uncurry fetchImage) links - return $ (rights imgs, lefts imgs) + return (rights imgs, lefts imgs) -- | Fetch image data from disk or from network and make a <binary> XML section. -- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: String -> String -> IO (Either String Content) +fetchImage :: PandocMonad m => String -> String -> m (Either String Content) fetchImage href link = do mbimg <- case (isURI link, readDataURI link) of @@ -227,28 +252,25 @@ fetchImage href link = do then return (Just (mime',base64)) else return Nothing (True, Just _) -> return Nothing -- not base64-encoded - (True, Nothing) -> fetchURL link - (False, _) -> do - d <- nothingOnError $ B.readFile (unEscapeString link) - let t = case map toLower (takeExtension link) of - ".png" -> Just "image/png" - ".jpg" -> Just "image/jpeg" - ".jpeg" -> Just "image/jpeg" - ".jpe" -> Just "image/jpeg" - _ -> Nothing -- only PNG and JPEG are supported in FB2 - return $ liftM2 (,) t (liftM (toStr . encode) d) + _ -> + catchError (do (bs, mbmime) <- P.fetchItem link + case mbmime of + Nothing -> do + report $ CouldNotDetermineMimeType link + return Nothing + Just mime -> return $ Just (mime, + B8.unpack $ encode bs)) + (\e -> + do report $ CouldNotFetchResource link (show e) + return Nothing) case mbimg of - Just (imgtype, imgdata) -> do + Just (imgtype, imgdata) -> return . Right $ el "binary" ( [uattr "id" href , uattr "content-type" imgtype] , txt imgdata ) _ -> return (Left ('#':href)) - where - nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) - nothingOnError action = liftM Just action `E.catch` omnihandler - omnihandler :: E.SomeException -> IO (Maybe B.ByteString) - omnihandler _ = return Nothing + -- | Extract mime type and encoded data from the Data URI. readDataURI :: String -- ^ URI @@ -276,8 +298,8 @@ isMimeType :: String -> Bool isMimeType s = case split (=='/') s of [mtype,msubtype] -> - ((map toLower mtype) `elem` types - || "x-" `isPrefixOf` (map toLower mtype)) + (map toLower mtype `elem` types + || "x-" `isPrefixOf` map toLower mtype) && all valid mtype && all valid msubtype _ -> False @@ -286,85 +308,63 @@ isMimeType s = valid c = isAscii c && not (isControl c) && not (isSpace c) && c `notElem` "()<>@,;:\\\"/[]?=" --- | Fetch URL, return its Content-Type and binary data on success. -fetchURL :: String -> IO (Maybe (String, String)) -fetchURL url = do - flip catchIO_ (return Nothing) $ do - r <- browse $ do - setOutHandler (const (return ())) - setAllowRedirects True - liftM snd . request . getRequest $ url - let content_type = lookupHeader HdrContentType (getHeaders r) - content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r - return $ liftM2 (,) content_type content - -toBS :: String -> B.ByteString -toBS = B.pack . map (toEnum . fromEnum) - -toStr :: B.ByteString -> String -toStr = map (toEnum . fromEnum) . B.unpack - footnoteID :: Int -> String -footnoteID i = "n" ++ (show i) +footnoteID i = "n" ++ show i linkID :: Int -> String -linkID i = "l" ++ (show i) +linkID i = "l" ++ show i -- | Convert a block-level Pandoc's element to FictionBook XML representation. -blockToXml :: Block -> FBM [Content] +blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) = insertImage NormalImage (Image atr alt (src,tit)) -blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss +blockToXml (Para ss) = (list . el "p") <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s -blockToXml (RawBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code") . lines $ s +blockToXml b@(RawBlock _ _) = do + report $ BlockNotRendered b + return [] blockToXml (Div _ bs) = cMapM blockToXml bs -blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs -blockToXml (LineBlock lns) = blockToXml $ linesToPara lns +blockToXml (BlockQuote bs) = (list . el "cite") <$> cMapM blockToXml bs +blockToXml (LineBlock lns) = + (list . el "poem") <$> mapM stanza (split null lns) + where + v xs = el "v" <$> cMapM toXml xs + stanza xs = el "stanza" <$> mapM v xs blockToXml (OrderedList a bss) = do state <- get let pmrk = parentListMarker state - let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a + let markers = (pmrk ++) <$> orderedListMarkers a let mkitem mrk bs = do - modify (\s -> s { parentListMarker = mrk }) - itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentListMarker = mrk ++ " "}) + item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs modify (\s -> s { parentListMarker = pmrk }) -- old parent marker - return . el "p" $ [ txt mrk, txt " " ] ++ itemtext - mapM (uncurry mkitem) (zip markers bss) + return item + concat <$> zipWithM mkitem markers bss blockToXml (BulletList bss) = do state <- get - let level = parentBulletLevel state let pmrk = parentListMarker state - let prefix = replicate (length pmrk) ' ' - let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] - let mrk = prefix ++ bullets !! (level `mod` (length bullets)) + let mrk = pmrk ++ "•" let mkitem bs = do - modify (\s -> s { parentBulletLevel = (level+1) }) - itemtext <- cMapM blockToXml . paraToPlain $ bs - modify (\s -> s { parentBulletLevel = level }) -- restore bullet level - return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext - mapM mkitem bss + modify (\s -> s { parentListMarker = mrk ++ " "}) + item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs + modify (\s -> s { parentListMarker = pmrk }) -- old parent marker + return item + cMapM mkitem bss blockToXml (DefinitionList defs) = cMapM mkdef defs where mkdef (term, bss) = do - def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss + items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (replicate 4 ' ')) bss t <- wrap "strong" term - return [ el "p" t, el "p" def' ] - sep blocks = - if all needsBreak blocks then - blocks ++ [Plain [LineBreak]] - else - blocks - needsBreak (Para _) = False - needsBreak (Plain ins) = LineBreak `notElem` ins - needsBreak _ = True -blockToXml (Header _ _ _) = -- should never happen, see renderSections - error "unexpected header in section text" + return (el "p" t : items) +blockToXml h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return [] blockToXml HorizontalRule = return [ el "empty-line" () , el "p" (txt (replicate 10 '—')) @@ -372,45 +372,42 @@ blockToXml HorizontalRule = return blockToXml (Table caption aligns _ headers rows) = do hd <- mkrow "th" headers aligns bd <- mapM (\r -> mkrow "td" r aligns) rows - c <- return . el "emphasis" =<< cMapM toXml caption + c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd : bd), el "p" c] where - mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = - (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) + el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) -- align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) - align_str AlignLeft = "left" - align_str AlignCenter = "center" - align_str AlignRight = "right" + align_str AlignLeft = "left" + align_str AlignCenter = "center" + align_str AlignRight = "right" align_str AlignDefault = "left" blockToXml Null = return [] --- Replace paragraphs with plain text and line break. --- Necessary to simulate multi-paragraph lists in FB2. -paraToPlain :: [Block] -> [Block] -paraToPlain [] = [] -paraToPlain (Para inlines : rest) = - let p = (Plain (inlines ++ [LineBreak])) - in p : paraToPlain rest -paraToPlain (p:rest) = p : paraToPlain rest +-- Replace plain text with paragraphs and add line break after paragraphs. +-- It is used to convert plain text from tight list items to paragraphs. +plainToPara :: [Block] -> [Block] +plainToPara [] = [] +plainToPara (Plain inlines : rest) = + Para inlines : plainToPara rest +plainToPara (Para inlines : rest) = + Para inlines : Plain [LineBreak] : plainToPara rest +plainToPara (p:rest) = p : plainToPara rest -- Simulate increased indentation level. Will not really work -- for multi-line paragraphs. -indent :: Block -> Block -indent = indentBlock +indentPrefix :: String -> Block -> Block +indentPrefix spacer = indentBlock where - -- indentation space - spacer :: String - spacer = replicate 4 ' ' - -- - indentBlock (Plain ins) = Plain ((Str spacer):ins) - indentBlock (Para ins) = Para ((Str spacer):ins) + indentBlock (Plain ins) = Plain (Str spacer:ins) + indentBlock (Para ins) = Para (Str spacer:ins) indentBlock (CodeBlock a s) = let s' = unlines . map (spacer++) . lines $ s in CodeBlock a s' @@ -420,10 +417,21 @@ indent = indentBlock -- indent every (explicit) line indentLines :: [Inline] -> [Inline] indentLines ins = let lns = split isLineBreak ins :: [[Inline]] - in intercalate [LineBreak] $ map ((Str spacer):) lns + in intercalate [LineBreak] $ map (Str spacer:) lns + +indent :: Block -> Block +indent = indentPrefix spacer + where + -- indentation space + spacer :: String + spacer = replicate 4 ' ' + +indentBlocks :: String -> [Block] -> [Block] +indentBlocks _ [] = [] +indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ replicate (length prefix) ' ') xs -- | Convert a Pandoc's Inline element to FictionBook XML representation. -toXml :: Inline -> FBM [Content] +toXml :: PandocMonad m => Inline -> FBM m [Content] toXml (Str s) = return [txt s] toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss @@ -444,7 +452,9 @@ toXml Space = return [txt " "] toXml SoftBreak = return [txt " "] toXml LineBreak = return [el "empty-line" ()] toXml (Math _ formula) = insertMath InlineImage formula -toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed +toXml il@(RawInline _ _) = do + report $ InlineNotRendered il + return [] -- raw TeX and raw HTML are suppressed toXml (Link _ text (url,ttl)) = do fns <- footnotes `liftM` get let n = 1 + length fns @@ -462,7 +472,7 @@ toXml (Link _ text (url,ttl)) = do ( [ attr ("l","href") ('#':ln_id) , uattr "type" "note" ] , ln_ref) ] -toXml img@(Image _ _ _) = insertImage InlineImage img +toXml img@Image{} = insertImage InlineImage img toXml (Note bs) = do fns <- footnotes `liftM` get let n = 1 + length fns @@ -474,9 +484,9 @@ toXml (Note bs) = do , uattr "type" "note" ] , fn_ref ) -insertMath :: ImageMode -> String -> FBM [Content] +insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] insertMath immode formula = do - htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get + htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get case htmlMath of WebTeX url -> do let alt = [Code nullAttr formula] @@ -485,7 +495,7 @@ insertMath immode formula = do insertImage immode img _ -> return [el "code" formula] -insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do images <- imagesToFetch `liftM` get let n = 1 + length images @@ -493,7 +503,7 @@ insertImage immode (Image _ alt (url,ttl)) = do modify (\s -> s { imagesToFetch = (fname, url) : images }) let ttlattr = case (immode, null ttl) of (NormalImage, False) -> [ uattr "title" ttl ] - _ -> [] + _ -> [] return . list $ el "image" $ [ attr ("l","href") ('#':fname) @@ -517,20 +527,20 @@ replaceImagesWithAlt missingHrefs body = else c in case XC.nextDF c' of (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document + Nothing -> c' -- end of document -- isImage :: Content -> Bool - isImage (Elem e) = (elName e) == (uname "image") - isImage _ = False + isImage (Elem e) = elName e == uname "image" + isImage _ = False -- - isMissing (Elem img@(Element _ _ _ _)) = + isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs in any (`elem` imgAttrs) badAttrs isMissing _ = False -- replaceNode :: Content -> Content - replaceNode n@(Elem img@(Element _ _ _ _)) = + replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img alt = getAttrVal attrs (uname "alt") imtype = getAttrVal attrs (qname "l" "type") @@ -551,7 +561,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: String -> [Inline] -> FBM Content +wrap :: PandocMonad m => String -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -560,25 +570,25 @@ list = (:[]) -- | Convert an 'Inline' to plaintext. plain :: Inline -> String -plain (Str s) = s -plain (Emph ss) = concat (map plain ss) -plain (Span _ ss) = concat (map plain ss) -plain (Strong ss) = concat (map plain ss) -plain (Strikeout ss) = concat (map plain ss) -plain (Superscript ss) = concat (map plain ss) -plain (Subscript ss) = concat (map plain ss) -plain (SmallCaps ss) = concat (map plain ss) -plain (Quoted _ ss) = concat (map plain ss) -plain (Cite _ ss) = concat (map plain ss) -- FIXME -plain (Code _ s) = s -plain Space = " " -plain SoftBreak = " " -plain LineBreak = "\n" -plain (Math _ s) = s -plain (RawInline _ s) = s +plain (Str s) = s +plain (Emph ss) = cMap plain ss +plain (Span _ ss) = cMap plain ss +plain (Strong ss) = cMap plain ss +plain (Strikeout ss) = cMap plain ss +plain (Superscript ss) = cMap plain ss +plain (Subscript ss) = cMap plain ss +plain (SmallCaps ss) = cMap plain ss +plain (Quoted _ ss) = cMap plain ss +plain (Cite _ ss) = cMap plain ss -- FIXME +plain (Code _ s) = s +plain Space = " " +plain SoftBreak = " " +plain LineBreak = "\n" +plain (Math _ s) = s +plain (RawInline _ _) = "" plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"]) -plain (Image _ alt _) = concat (map plain alt) -plain (Note _) = "" -- FIXME +plain (Image _ alt _) = cMap plain alt +plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) @@ -599,11 +609,11 @@ txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. uattr :: String -> String -> Text.XML.Light.Attr -uattr name val = Attr (uname name) val +uattr name = Attr (uname name) -- | Create an XML attribute with a qualified name from given namespace. attr :: (String, String) -> String -> Text.XML.Light.Attr -attr (ns, name) val = Attr (qname ns name) val +attr (ns, name) = Attr (qname ns name) -- | Unqualified name uname :: String -> QName diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3c8c264d2..1647df7ea 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,6 +1,10 @@ -{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE MultiWayIf #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,60 +32,92 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition +module Text.Pandoc.Writers.HTML ( + writeHtml4, + writeHtml4String, + writeHtml5, + writeHtml5String, + writeHtmlStringForEPUB, + writeS5, + writeSlidy, + writeSlideous, + writeDZSlides, + writeRevealJs, + tagWithAttributes + ) where +import Control.Monad.State.Strict +import Data.Char (ord, toLower) +import Data.List (intercalate, intersperse, isPrefixOf, partition) +import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options +import qualified Data.Set as Set +import Data.String (fromString) +import Data.Text (Text) +import qualified Data.Text.Lazy as TL +import Network.HTTP (urlEncode) +import Network.URI (URI (..), parseURIReference, unEscapeString) +import Numeric (showHex) +import Text.Blaze.Internal (customLeaf, MarkupM(Empty)) +#if MIN_VERSION_blaze_markup(0,6,3) +#else +import Text.Blaze.Internal (preEscapedString, preEscapedText) +#endif +import Text.Blaze.Html hiding (contents) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight, + styleToCss) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Highlighting ( highlight, styleToCss, - formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities, escapeStringForXML) -import Network.URI ( parseURIReference, URI(..), unEscapeString ) -import Network.HTTP ( urlEncode ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe, isJust ) -import Control.Monad.State -import Text.Blaze.Html hiding(contents) +import Text.Pandoc.Templates +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML, fromEntities) #if MIN_VERSION_blaze_markup(0,6,3) #else -import Text.Blaze.Internal(preEscapedString) +import Text.Blaze.Internal (preEscapedString, preEscapedText) #endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 #else import qualified Text.Blaze.Html5 as H5 #endif +import Control.Monad.Except (throwError) +import Data.Aeson (Value) +import System.FilePath (takeBaseName, takeExtension) +import Text.Blaze.Html.Renderer.Text (renderHtml) import qualified Text.Blaze.XHtml1.Transitional as H import qualified Text.Blaze.XHtml1.Transitional.Attributes as A -import Text.Blaze.Html.Renderer.String (renderHtml) +import Text.Pandoc.Class (PandocMonad, report, runPure) +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.TeXMath -import Text.XML.Light.Output -import Text.XML.Light (unode, elChildren, unqual) +import Text.XML.Light (elChildren, unode, unqual) import qualified Text.XML.Light as XML -import System.FilePath (takeExtension) -import Data.Aeson (Value) +import Text.XML.Light.Output data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stMath :: Bool -- ^ Math is used in document - , stQuotes :: Bool -- ^ <q> tag is used - , stHighlighting :: Bool -- ^ Syntax highlighting is used - , stSecNum :: [Int] -- ^ Number of current section - , stElement :: Bool -- ^ Processing an Element + { stNotes :: [Html] -- ^ List of notes + , stMath :: Bool -- ^ Math is used in document + , stQuotes :: Bool -- ^ <q> tag is used + , stHighlighting :: Bool -- ^ Syntax highlighting is used + , stSecNum :: [Int] -- ^ Number of current section + , stElement :: Bool -- ^ Processing an Element + , stHtml5 :: Bool -- ^ Use HTML5 + , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub + , stSlideVariant :: HTMLSlideVariant + , stCodeBlockNum :: Int -- ^ Number of code block } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stHighlighting = False, stSecNum = [], - stElement = False} + stElement = False, stHtml5 = False, + stEPUBVersion = Nothing, + stSlideVariant = NoSlides, + stCodeBlockNum = 0} -- Helpers to render HTML with the appropriate function. @@ -98,62 +134,141 @@ nl opts = if writerWrapText opts == WrapNone then mempty else preEscapedString "\n" --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> renderHtml body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context +-- | Convert Pandoc document to Html 5 string. +writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeHtml5String = writeHtmlString' + defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 5 structure. +writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 4 string. +writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeHtml4String = writeHtmlString' + defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html 4 structure. +writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html appropriate for an epub version. +writeHtmlStringForEPUB :: PandocMonad m + => EPUBVersion -> WriterOptions -> Pandoc + -> m Text +writeHtmlStringForEPUB version o = writeHtmlString' + defaultWriterState{ stHtml5 = version == EPUB3, + stEPUBVersion = Just version } o + +-- | Convert Pandoc document to Reveal JS HTML slide show. +writeRevealJs :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeRevealJs = writeHtmlSlideShow' RevealJsSlides + +-- | Convert Pandoc document to S5 HTML slide show. +writeS5 :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeS5 = writeHtmlSlideShow' S5Slides + +-- | Convert Pandoc document to Slidy HTML slide show. +writeSlidy :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeSlidy = writeHtmlSlideShow' SlidySlides + +-- | Convert Pandoc document to Slideous HTML slide show. +writeSlideous :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeSlideous = writeHtmlSlideShow' SlideousSlides + +-- | Convert Pandoc document to DZSlides HTML slide show. +writeDZSlides :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeDZSlides = writeHtmlSlideShow' DZSlides + +writeHtmlSlideShow' :: PandocMonad m + => HTMLSlideVariant -> WriterOptions -> Pandoc -> m Text +writeHtmlSlideShow' variant = writeHtmlString' + defaultWriterState{ stSlideVariant = variant + , stHtml5 = case variant of + RevealJsSlides -> True + S5Slides -> False + SlidySlides -> False + DZSlides -> True + SlideousSlides -> False + NoSlides -> False + } + +renderHtml' :: Html -> Text +renderHtml' = TL.toStrict . renderHtml + +writeHtmlString' :: PandocMonad m + => WriterState -> WriterOptions -> Pandoc -> m Text +writeHtmlString' st opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) st + case writerTemplate opts of + Nothing -> return $ renderHtml' body + Just tpl -> do + -- warn if empty lang + when (isNothing (getField "lang" context :: Maybe String)) $ + report NoLangSpecified + -- check for empty pagetitle + context' <- + case getField "pagetitle" context of + Just (s :: String) | not (null s) -> return context + _ -> do + let fallback = fromMaybe "Untitled" $ takeBaseName <$> + lookup "sourcefile" (writerVariables opts) + report $ NoTitleElement fallback + return $ resetField "pagetitle" fallback context + renderTemplate' tpl $ + defField "body" (renderHtml' body) context' + +writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html +writeHtml' st opts d = + case writerTemplate opts of + Just _ -> preEscapedText <$> writeHtmlString' st opts d + Nothing -> do + (body, _) <- evalStateT (pandocToHtml opts d) st + return body -- result is (title, authors, date, toc, body, new variables) -pandocToHtml :: WriterOptions +pandocToHtml :: PandocMonad m + => WriterOptions -> Pandoc - -> State WriterState (Html, Value) + -> StateT WriterState m (Html, Value) pandocToHtml opts (Pandoc meta blocks) = do metadata <- metaToJSON opts - (fmap renderHtml . blockListToHtml opts) - (fmap renderHtml . inlineListToHtml opts) + (fmap renderHtml' . blockListToHtml opts) + (fmap renderHtml' . inlineListToHtml opts) meta let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts + slideVariant <- gets stSlideVariant let sects = hierarchicalize $ - if writerSlideVariant opts == NoSlides + if slideVariant == NoSlides then blocks else prepSlides slideLevel blocks - toc <- if writerTableOfContents opts - then tableOfContents opts sects + toc <- if writerTableOfContents opts && slideVariant /= S5Slides + then fmap renderHtml' <$> tableOfContents opts sects else return Nothing blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects st <- get - let notes = reverse (stNotes st) - let thebody = blocks' >> footnoteSection opts notes + notes <- footnoteSection opts (reverse (stNotes st)) + let thebody = blocks' >> notes let math = case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - MathML (Just url) -> + MathJax url + | slideVariant /= RevealJsSlides -> + -- mathjax is handled via a special plugin in revealjs H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty - MathJax url -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ case writerSlideVariant opts of + $ case slideVariant of SlideousSlides -> preEscapedString "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" @@ -162,37 +277,55 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - KaTeX js css -> - (H.script ! A.src (toValue js) $ mempty) <> - (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> - (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) + KaTeX url -> + (H.script ! + A.src (toValue $ url ++ "katex.min.js") $ mempty) <> + (H.script ! + A.src (toValue $ url ++ "contrib/auto-render.min.js") + $ mempty) <> + ( + H.script + "document.addEventListener(\"DOMContentLoaded\", function() {\n renderMathInElement(document.body);\n});") <> + (H.link ! A.rel "stylesheet" ! + A.href (toValue $ url ++ "katex.min.css")) + _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (writerHtml5 opts) -> + Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") | otherwise -> mempty Nothing -> mempty let context = (if stHighlighting st - then defField "highlighting-css" - (styleToCss $ writerHighlightStyle opts) + then case writerHighlightStyle opts of + Just sty -> defField "highlighting-css" + (styleToCss sty) + Nothing -> id else id) $ (if stMath st - then defField "math" (renderHtml math) + then defField "math" (renderHtml' math) else id) $ + defField "mathjax" + (case writerHTMLMathMethod opts of + MathJax _ -> True + _ -> False) $ defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml) toc $ + -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + maybe id (defField "toc") toc $ + maybe id (defField "table-of-contents") toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringifyHTML $ docTitle meta) $ + defField "pagetitle" (stringifyHTML (docTitle meta)) $ defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" - ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $ + ("https://www.w3.org/Talks/Tools/Slidy2" :: String) $ defField "slideous-url" ("slideous" :: String) $ defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ - defField "html5" (writerHtml5 opts) $ + defField "html5" (stHtml5 st) metadata return (thebody, context) @@ -200,44 +333,52 @@ pandocToHtml opts (Pandoc meta blocks) = do prefixedId :: WriterOptions -> String -> Attribute prefixedId opts s = case s of - "" -> mempty - _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s + "" -> mempty + _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s -toList :: (Html -> Html) -> WriterOptions -> ([Html] -> Html) +toList :: PandocMonad m + => (Html -> Html) + -> WriterOptions + -> [Html] + -> StateT WriterState m Html toList listop opts items = do - if (writerIncremental opts) - then if (writerSlideVariant opts /= RevealJsSlides) - then (listop $ mconcat items) ! A.class_ "incremental" - else listop $ mconcat $ map (! A.class_ "fragment") items - else listop $ mconcat items + slideVariant <- gets stSlideVariant + return $ + if writerIncremental opts + then if slideVariant /= RevealJsSlides + then listop (mconcat items) ! A.class_ "incremental" + else listop $ mconcat $ map (! A.class_ "fragment") items + else listop $ mconcat items -unordList :: WriterOptions -> [Html] -> Html +unordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html unordList opts = toList H.ul opts . toListItems opts -ordList :: WriterOptions -> [Html] -> Html +ordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html ordList opts = toList H.ol opts . toListItems opts -defList :: WriterOptions -> [Html] -> Html +defList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html defList opts items = toList H.dl opts (items ++ [nl opts]) -- | Construct table of contents from list of elements. -tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) +tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html) tableOfContents _ [] = return Nothing tableOfContents opts sects = do - let opts' = opts { writerIgnoreNotes = True } - contents <- mapM (elementToListItem opts') sects + contents <- mapM (elementToListItem opts) sects let tocList = catMaybes contents - return $ if null tocList - then Nothing - else Just $ unordList opts tocList + if null tocList + then return Nothing + else Just <$> unordList opts tocList -- | Convert section number to string showSecNum :: [Int] -> String -showSecNum = concat . intersperse "." . map show +showSecNum = intercalate "." . map show -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) +elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html) -- Don't include the empty headers created in slide shows -- shows when an hrule is used to separate slides without a new title: elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing @@ -249,45 +390,52 @@ elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num') >> preEscapedString " " else mempty - txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText + txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes - let subList = if null subHeads - then mempty - else unordList opts subHeads + subList <- if null subHeads + then return mempty + else unordList opts subHeads -- in reveal.js, we need #/apples, not #apples: - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant== RevealJsSlides] return $ Just $ if null id' - then (H.a $ toHtml txt) >> subList + then H.a (toHtml txt) >> subList else (H.a ! A.href (toValue $ "#" ++ revealSlash ++ writerIdentifierPrefix opts ++ id') $ toHtml txt) >> subList elementToListItem _ _ = return Nothing -- | Convert an Element to Html. -elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html +elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do - let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel + slideVariant <- gets stSlideVariant + let slide = slideVariant /= NoSlides && level <= slideLevel let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) modify $ \st -> st{stSecNum = num'} -- update section number + html5 <- gets stHtml5 let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty else do modify (\st -> st{ stElement = True}) + let level' = if level <= slideLevel && + slideVariant == SlidySlides + then 1 -- see #3566 + else level res <- blockToHtml opts - (Header level (id',classes,keyvals) title') + (Header level' (id',classes,keyvals) title') modify (\st -> st{ stElement = False}) return res - let isSec (Sec _ _ _ _ _) = True - isSec (Blk _) = False + let isSec Sec{} = True + isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] isPause _ = False - let fragmentClass = case writerSlideVariant opts of - RevealJsSlides -> "fragment" - _ -> "incremental" + let fragmentClass = case slideVariant of + RevealJsSlides -> "fragment" + _ -> "incremental" let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" ++ fragmentClass ++ "\">")) : (xs ++ [Blk (RawBlock (Format "html") "</div>")]) @@ -299,45 +447,51 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen [] -> [] (x:xs) -> x ++ concatMap inDiv xs let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] - let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ + let classes' = ["title-slide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && - not (writerHtml5 opts) ] ++ + not html5 ] ++ ["level" ++ show level | slide || writerSectionDivs opts ] ++ classes - let secttag = if writerHtml5 opts + let secttag = if html5 then H5.section else H.div let attr = (id',classes',keyvals) - return $ if titleSlide - then (if writerSlideVariant opts == RevealJsSlides - then H5.section - else id) $ mconcat $ - (addAttrs opts attr $ secttag $ header') : innerContents - else if writerSectionDivs opts || slide - then addAttrs opts attr - $ secttag $ inNl $ header' : innerContents - else mconcat $ intersperse (nl opts) - $ addAttrs opts attr header' : innerContents + if titleSlide + then do + t <- addAttrs opts attr $ + secttag header' + return $ + (if slideVariant == RevealJsSlides + then H5.section + else id) $ mconcat $ t : innerContents + else if writerSectionDivs opts || slide + then addAttrs opts attr + $ secttag $ inNl $ header' : innerContents + else do + t <- addAttrs opts attr header' + return $ mconcat $ intersperse (nl opts) (t : innerContents) -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then mempty - else nl opts >> (container - $ nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) - where container x = if writerHtml5 opts - then H5.section ! A.class_ "footnotes" $ x - else if writerSlideVariant opts /= NoSlides - then H.div ! A.class_ "footnotes slide" $ x - else H.div ! A.class_ "footnotes" $ x - hrtag = if writerHtml5 opts then H5.hr else H.hr +footnoteSection :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html +footnoteSection opts notes = do + html5 <- gets stHtml5 + slideVariant <- gets stSlideVariant + let hrtag = if html5 then H5.hr else H.hr + let container x + | html5 = H5.section ! A.class_ "footnotes" $ x + | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x + | otherwise = H.div ! A.class_ "footnotes" $ x + return $ + if null notes + then mempty + else nl opts >> container (nl opts >> hrtag >> nl opts >> + H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) -parseMailto s = do +parseMailto s = case break (==':') s of (xs,':':addr) | map toLower xs == "mailto" -> do let (name', rest) = span (/='@') addr @@ -346,10 +500,12 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> Attr -> Html -> String -> Html +obfuscateLink :: PandocMonad m + => WriterOptions -> Attr -> Html -> String + -> StateT WriterState m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (renderHtml -> txt) s = +obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -361,20 +517,25 @@ obfuscateLink opts attr (renderHtml -> txt) s = then ("e", name' ++ " at " ++ domain') else ("'" ++ obfuscateString txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") + (_, classNames, _) = attr + classNamesStr = concatMap (' ':) classNames in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL - preEscapedString $ "<a href=\"" ++ (obfuscateString s') - ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>" + return $ + preEscapedString $ "<a href=\"" ++ obfuscateString s' + ++ "\" class=\"email\">" ++ obfuscateString txt ++ "</a>" JavascriptObfuscation -> + return $ (H.script ! A.type_ "text/javascript" $ preEscapedString ("\n<!--\nh='" ++ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++ + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" ++ + classNamesStr ++ "\">'+" ++ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> H.noscript (preEscapedString $ obfuscateString altText) - _ -> error $ "Unknown obfuscation method: " ++ show meth + _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. @@ -388,36 +549,71 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . fromEntities -addAttrs :: WriterOptions -> Attr -> Html -> Html -addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr) +-- | Create HTML tag with attributes. +tagWithAttributes :: WriterOptions + -> Bool -- ^ True for HTML5 + -> Bool -- ^ True if self-closing tag + -> Text -- ^ Tag text + -> Attr -- ^ Pandoc style tag attributes + -> Text +tagWithAttributes opts html5 selfClosing tagname attr = + let mktag = (TL.toStrict . renderHtml <$> evalStateT + (addAttrs opts attr (customLeaf (textTag tagname) selfClosing)) + defaultWriterState{ stHtml5 = html5 }) + in case runPure mktag of + Left _ -> mempty + Right t -> t -toAttrs :: [(String, String)] -> [Attribute] -toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs +addAttrs :: PandocMonad m + => WriterOptions -> Attr -> Html -> StateT WriterState m Html +addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr -attrsToHtml :: WriterOptions -> Attr -> [Attribute] -attrsToHtml opts (id',classes',keyvals) = - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals +toAttrs :: PandocMonad m + => [(String, String)] -> StateT WriterState m [Attribute] +toAttrs kvs = do + html5 <- gets stHtml5 + return $ map (\(x,y) -> + customAttribute + (fromString (if not html5 || x `Set.member` html5Attributes + || "data-" `isPrefixOf` x + then x + else "data-" ++ x)) (toValue y)) kvs -imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute] -imgAttrsToHtml opts attr = - attrsToHtml opts (ident,cls,kvs') ++ - toAttrs (dimensionsToAttrList opts attr) +attrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +attrsToHtml opts (id',classes',keyvals) = do + attrs <- toAttrs keyvals + return $ + [prefixedId opts id' | not (null id')] ++ + [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs + +imgAttrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +imgAttrsToHtml opts attr = do + attrs <- attrsToHtml opts (ident,cls,kvs') + dimattrs <- toAttrs (dimensionsToAttrList attr) + return $ attrs ++ dimattrs where (ident,cls,kvs) = attr kvs' = filter isNotDim kvs isNotDim ("width", _) = False isNotDim ("height", _) = False - isNotDim _ = True + isNotDim _ = True -dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)] -dimensionsToAttrList opts attr = (go Width) ++ (go Height) +dimensionsToAttrList :: Attr -> [(String, String)] +dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height where - go dir = case (dimension dir attr) of - (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))] - (Just dim) -> [(show dir, showInPixel opts dim)] - _ -> [] - + consolidateStyles :: [(String, String)] -> [(String, String)] + consolidateStyles xs = + case partition isStyle xs of + ([], _) -> xs + (ss, rest) -> ("style", intercalate ";" $ map snd ss) : rest + isStyle ("style", _) = True + isStyle _ = False + go dir = case dimension dir attr of + (Just (Pixel a)) -> [(show dir, show a)] + (Just x) -> [("style", show dir ++ ":" ++ show x)] + Nothing -> [] imageExts :: [String] imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", @@ -427,71 +623,121 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", treatAsImage :: FilePath -> Bool treatAsImage fp = - let path = case uriPath `fmap` parseURIReference fp of - Nothing -> fp - Just up -> up + let path = maybe fp uriPath (parseURIReference fp) ext = map toLower $ drop 1 $ takeExtension path in null ext || ext `elem` imageExts --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml _ Null = return mempty -blockToHtml opts (Plain lst) = inlineListToHtml opts lst --- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do +figure :: PandocMonad m + => WriterOptions -> Attr -> [Inline] -> (String, String) + -> StateT WriterState m Html +figure opts attr txt (s,tit) = do img <- inlineToHtml opts (Image attr txt (s,tit)) - let tocapt = if writerHtml5 opts + html5 <- gets stHtml5 + let tocapt = if html5 then H5.figcaption else H.p ! A.class_ "caption" capt <- if null txt then return mempty else tocapt `fmap` inlineListToHtml opts txt - return $ if writerHtml5 opts + return $ if html5 then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, nl opts, capt, nl opts] + +-- | Convert Pandoc block element to HTML. +blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtml _ Null = return mempty +blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) + | "stretch" `elem` classes = do + slideVariant <- gets stSlideVariant + case slideVariant of + RevealJsSlides -> + -- a "stretched" image in reveal.js must be a direct child + -- of the slide container + inlineToHtml opts (Image attr txt (src, tit)) + _ -> figure opts attr txt (src, tit) +-- title beginning with fig: indicates that the image is a figure +blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = + figure opts attr txt (s,tit) blockToHtml opts (Para lst) | isEmptyRaw lst = return mempty + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty | otherwise = do contents <- inlineListToHtml opts lst return $ H.p contents where - isEmptyRaw [RawInline f _] = f /= (Format "html") - isEmptyRaw _ = False + isEmptyRaw [RawInline f _] = f `notElem` [Format "html", + Format "html4", Format "html5"] + isEmptyRaw _ = False blockToHtml opts (LineBlock lns) = if writerWrapText opts == WrapNone then blockToHtml opts $ linesToPara lns else do - let lf = preEscapedString "\n" - htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns - return $ H.div ! A.style "white-space: pre-line;" $ htmlLines -blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do + htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns + return $ H.div ! A.class_ "line-block" $ htmlLines +blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do + html5 <- gets stHtml5 + slideVariant <- gets stSlideVariant + let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ + [("style", "width:" ++ w ++ ";") + | ("width",w) <- kvs', "column" `elem` classes] let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 - let opts' = if speakerNotes then opts{ writerIncremental = False } else opts - contents <- blockListToHtml opts' bs + let opts' = if | speakerNotes -> opts{ writerIncremental = False } + | "incremental" `elem` classes -> opts{ writerIncremental = True } + | "nonincremental" `elem` classes -> opts{ writerIncremental = False } + | otherwise -> opts + -- we remove "incremental" and "nonincremental" if we're in a + -- slide presentaiton format. + classes' = case slideVariant of + NoSlides -> classes + _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes + contents <- if "columns" `elem` classes' + then -- we don't use blockListToHtml because it inserts + -- a newline between the column divs, which throws + -- off widths! see #4028 + mconcat <$> mapM (blockToHtml opts) bs + else blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts - let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes - then (H5.section, filter (/= "section") classes) - else (H.div, classes) - return $ - if speakerNotes - then case writerSlideVariant opts of - RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' - DZSlides -> (addAttrs opts' attr $ H5.div $ contents') - ! (H5.customAttribute "role" "note") - NoSlides -> addAttrs opts' attr $ H.div $ contents' - _ -> mempty - else addAttrs opts (ident, classes', kvs) $ divtag $ contents' -blockToHtml opts (RawBlock f str) - | f == Format "html" = return $ preEscapedString str - | (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] - | otherwise = return mempty -blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr + let (divtag, classes'') = if html5 && "section" `elem` classes' + then (H5.section, filter (/= "section") classes') + else (H.div, classes') + if speakerNotes + then case slideVariant of + RevealJsSlides -> addAttrs opts' attr $ + H5.aside contents' + DZSlides -> do + t <- addAttrs opts' attr $ + H5.div contents' + return $ t ! H5.customAttribute "role" "note" + NoSlides -> addAttrs opts' attr $ + H.div contents' + _ -> return mempty + else addAttrs opts (ident, classes'', kvs) $ + divtag contents' +blockToHtml opts (RawBlock f str) = do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then blockToHtml opts $ Plain [Math DisplayMath str] + else do + report $ BlockNotRendered (RawBlock f str) + return mempty +blockToHtml _ HorizontalRule = do + html5 <- gets stHtml5 + return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do + id'' <- if null id' + then do + modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } + codeblocknum <- gets stCodeBlockNum + return ("cb" ++ show codeblocknum) + else return id' let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && any (\c -> map toLower c == "literate") classes @@ -503,19 +749,24 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - hlCode = if writerHighlight opts -- check highlighting options - then highlight formatHtmlBlock (id',classes',keyvals) adjCode - else Nothing + hlCode = if isJust (writerHighlightStyle opts) + then highlight (writerSyntaxMap opts) formatHtmlBlock + (id'',classes',keyvals) adjCode + else Left "" case hlCode of - Nothing -> return $ addAttrs opts (id',classes,keyvals) - $ H.pre $ H.code $ toHtml adjCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (addAttrs opts (id',[],keyvals) h) -blockToHtml opts (BlockQuote blocks) = + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + addAttrs opts (id',classes,keyvals) + $ H.pre $ H.code $ toHtml adjCode + Right h -> modify (\st -> st{ stHighlighting = True }) >> + addAttrs opts (id'',[],keyvals) h +blockToHtml opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental - if writerSlideVariant opts /= NoSlides + slideVariant <- gets stSlideVariant + if slideVariant /= NoSlides then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) @@ -541,7 +792,7 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do $ showSecNum secnum) >> strToHtml " " >> contents else contents inElement <- gets stElement - return $ (if inElement then id else addAttrs opts attr) + (if inElement then return else addAttrs opts attr) $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -552,20 +803,17 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst - return $ unordList opts contents + unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst + html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" _ -> camelCaseToHyphenated $ show numstyle - let attribs = (if startnum /= 1 - then [A.start $ toValue startnum] - else []) ++ - (if numstyle == Example - then [A.class_ "example"] - else []) ++ + let attribs = [A.start $ toValue startnum | startnum /= 1] ++ + [A.class_ "example" | numstyle == Example] ++ (if numstyle /= DefaultStyle - then if writerHtml5 opts + then if html5 then [A.type_ $ case numstyle of Decimal -> "1" @@ -577,23 +825,25 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do else [A.style $ toValue $ "list-style-type: " ++ numstyle'] else []) - return $ foldl (!) (ordList opts contents) attribs + l <- ordList opts contents + return $ foldl (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty else liftM H.dt $ inlineListToHtml opts term - defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . + defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : intersperse (nl opts) defs') lst - return $ defList opts contents + defList opts contents blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt then return mempty else do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts + html5 <- gets stHtml5 let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty @@ -601,7 +851,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do H.colgroup $ do nl opts mapM_ (\w -> do - if writerHtml5 opts + if html5 then H.col ! A.style (toValue $ "width: " ++ percent w) else H.col ! A.width (toValue $ percent w) @@ -624,18 +874,19 @@ blockToHtml opts (Table capt aligns widths headers rows') = do else tbl ! A.style (toValue $ "width:" ++ show (round (totalWidth * 100) :: Int) ++ "%;") -tableRowToHtml :: WriterOptions +tableRowToHtml :: PandocMonad m + => WriterOptions -> [Alignment] -> Int -> [[Block]] - -> State WriterState Html + -> StateT WriterState m Html tableRowToHtml opts aligns rownum cols' = do let mkcell = if rownum == 0 then H.th else H.td let rowclass = case rownum of - 0 -> "header" + 0 -> "header" x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith + _ -> "even" + cols'' <- zipWithM (\alignment item -> tableItemToHtml opts mkcell alignment item) aligns cols' return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'') @@ -648,21 +899,23 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "" -tableItemToHtml :: WriterOptions +tableItemToHtml :: PandocMonad m + => WriterOptions -> (Html -> Html) -> Alignment -> [Block] - -> State WriterState Html + -> StateT WriterState m Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item + html5 <- gets stHtml5 let alignStr = alignmentToString align' - let attribs = if writerHtml5 opts + let attribs = if html5 then A.style (toValue $ "text-align: " ++ alignStr ++ ";") else A.align (toValue alignStr) let tag'' = if null alignStr then tag' else tag' ! attribs - return $ (tag'' $ contents) >> nl opts + return $ tag'' contents >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] @@ -670,12 +923,16 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html toListItem opts item = nl opts >> H.li item -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html +blockListToHtml :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = - fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst + (mconcat . intersperse (nl opts) . filter nonempty) + <$> mapM (blockToHtml opts) lst + where nonempty (Empty _) = False + nonempty _ = True -- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html +inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat @@ -684,9 +941,9 @@ annotateMML :: XML.Element -> String -> XML.Element annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) where cs = case elChildren e of - [] -> unode "mrow" () + [] -> unode "mrow" () [x] -> x - xs -> unode "mrow" xs + xs -> unode "mrow" xs math childs = XML.Element q as [XML.Elem childs] l where (XML.Element q as _ l) = e @@ -694,27 +951,29 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, -- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = +inlineToHtml :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Html +inlineToHtml opts inline = do + html5 <- gets stHtml5 case inline of (Str str) -> return $ strToHtml str - (Space) -> return $ strToHtml " " - (SoftBreak) -> return $ case writerWrapText opts of - WrapNone -> preEscapedString " " - WrapAuto -> preEscapedString " " - WrapPreserve -> preEscapedString "\n" - (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + Space -> return $ strToHtml " " + SoftBreak -> return $ case writerWrapText opts of + WrapNone -> preEscapedString " " + WrapAuto -> preEscapedString " " + WrapPreserve -> preEscapedString "\n" + LineBreak -> return $ (if html5 then H5.br else H.br) <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= - return . addAttrs opts attr' . H.span + addAttrs opts attr' . H.span where attr' = (id',classes',kvs') classes' = filter (`notElem` ["csl-no-emph", "csl-no-strong", "csl-no-smallcaps"]) classes kvs' = if null styles then kvs - else (("style", concat styles) : kvs) + else ("style", concat styles) : kvs styles = ["font-style:normal;" | "csl-no-emph" `elem` classes] ++ ["font-weight:normal;" @@ -724,20 +983,23 @@ inlineToHtml opts inline = (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of - Nothing -> return - $ addAttrs opts attr - $ H.code $ strToHtml str - Just h -> do + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + addAttrs opts attr $ H.code $ strToHtml str + Right h -> do modify $ \st -> st{ stHighlighting = True } - return $ addAttrs opts (id',[],keyvals) h + addAttrs opts (id',[],keyvals) h where (id',_,keyvals) = attr - hlCode = if writerHighlight opts - then highlight formatHtmlInline attr str - else Nothing + hlCode = if isJust (writerHighlightStyle opts) + then highlight + (writerSyntaxMap opts) + formatHtmlInline attr str + else Left "" (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= - return . (H.span ! A.style "font-variant: small-caps;") + return . (H.span ! A.class_ "smallcaps") (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub (Quoted quoteType lst) -> @@ -768,15 +1030,15 @@ inlineToHtml opts inline = JsMath _ -> do let m = preEscapedString str return $ case t of - InlineMath -> H.span ! A.class_ mathClass $ m + InlineMath -> H.span ! A.class_ mathClass $ m DisplayMath -> H.div ! A.class_ mathClass $ m WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img + let imtag = if html5 then H5.img else H.img let m = imtag ! A.style "vertical-align:middle" ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -784,103 +1046,124 @@ inlineToHtml opts inline = return $ case t of InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock + MathML -> do let conf = useShortEmptyTags (const False) defaultConfigPP - case writeMathML dt <$> readTeX str of + res <- lift $ convertMath writeMathML t str + case res of Right r -> return $ preEscapedString $ ppcElement conf (annotateMML r str) - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ mathClass) + Left il -> (H.span ! A.class_ mathClass) <$> + inlineToHtml opts il MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" - KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ - toHtml (case t of - InlineMath -> str - DisplayMath -> "\\displaystyle " ++ str) + KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) + x <- lift (texMathToInlines t str) >>= inlineListToHtml opts let m = H.span ! A.class_ mathClass $ x - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag - (RawInline f str) - | f == Format "html" -> return $ preEscapedString str - | otherwise -> return mempty + (RawInline f str) -> do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + "\\begin" `isPrefixOf` str && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then inlineToHtml opts $ Math DisplayMath str + else do + report $ InlineNotRendered inline + return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts attr linkText s - (Link attr txt (s,tit)) -> do + obfuscateLink opts attr linkText s + (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt + slideVariant <- gets stSlideVariant let s' = case s of - '#':xs | writerSlideVariant opts == - RevealJsSlides -> '#':'/':xs - _ -> s + '#':xs -> let prefix = if slideVariant == RevealJsSlides + then "/" + else writerIdentifierPrefix opts + in '#' : prefix ++ xs + _ -> s let link = H.a ! A.href (toValue s') $ linkText - let link' = if txt == [Str (unEscapeString s)] - then link ! A.class_ "uri" - else link - let link'' = addAttrs opts attr link' + let attr = if txt == [Str (unEscapeString s)] + then (ident, "uri" : classes, kvs) + else (ident, classes, kvs) + link' <- addAttrs opts attr link return $ if null tit - then link'' - else link'' ! A.title (toValue tit) + then link' + else link' ! A.title (toValue tit) (Image attr txt (s,tit)) | treatAsImage s -> do let alternate' = stringify txt - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - [A.alt $ toValue alternate' | not (null txt)] ++ - imgAttrsToHtml opts attr - let tag = if writerHtml5 opts then H5.img else H.img + slideVariant <- gets stSlideVariant + let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr + let attributes = + -- reveal.js uses data-src for lazy loading + (if isReveal + then customAttribute "data-src" $ toValue s + else A.src $ toValue s) : + [A.title $ toValue tit | not (null tit)] ++ + [A.alt $ toValue alternate' | not (null txt)] ++ + attrs + let tag = if html5 then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image attr _ (s,tit)) -> do - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - imgAttrsToHtml opts attr + slideVariant <- gets stSlideVariant + let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr + let attributes = + (if isReveal + then customAttribute "data-src" $ toValue s + else A.src $ toValue s) : + [A.title $ toValue tit | not (null tit)] ++ + attrs return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) - | writerIgnoreNotes opts -> return mempty - | otherwise -> do + (Note contents) -> do notes <- gets stNotes - let number = (length notes) + 1 + let number = length notes + 1 let ref = show number htmlContents <- blockListToNote opts ref contents + epubVersion <- gets stEPUBVersion -- push contents onto front of notes - modify $ \st -> st {stNotes = (htmlContents:notes)} - let revealSlash = ['/' | writerSlideVariant opts - == RevealJsSlides] + modify $ \st -> st {stNotes = htmlContents:notes} + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant == RevealJsSlides] let link = H.a ! A.href (toValue $ "#" ++ revealSlash ++ writerIdentifierPrefix opts ++ "fn" ++ ref) - ! A.class_ "footnoteRef" + ! A.class_ "footnote-ref" ! prefixedId opts ("fnref" ++ ref) - $ (if isJust (writerEpubVersion opts) + $ (if isJust epubVersion then id else H.sup) $ toHtml ref - return $ case writerEpubVersion opts of + return $ case epubVersion of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents - return $ if writerHtml5 opts + return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) else result -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html +blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])] + let backlink = [Link ("",["footnote-back"],[]) [Str "↩"] ("#" ++ "fnref" ++ ref,[])] blocks' = if null blocks then [] else let lastBlock = last blocks @@ -893,23 +1176,13 @@ blockListToNote opts ref blocks = _ -> otherBlocks ++ [lastBlock, Plain backlink] in do contents <- blockListToHtml opts blocks' - let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents - let noteItem' = case writerEpubVersion opts of + let noteItem = H.li ! prefixedId opts ("fn" ++ ref) $ contents + epubVersion <- gets stEPUBVersion + let noteItem' = case epubVersion of Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' --- Javascript snippet to render all KaTeX elements -renderKaTeX :: String -renderKaTeX = unlines [ - "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" - , "for (var i=0; i < mathElements.length; i++)" - , "{" - , " var texText = mathElements[i].firstChild" - , " katex.render(texText.data, mathElements[i])" - , "}}" - ] - isMathEnvironment :: String -> Bool isMathEnvironment s = "\\begin{" `isPrefixOf` s && envName `elem` mathmlenvs @@ -944,6 +1217,219 @@ isMathEnvironment s = "\\begin{" `isPrefixOf` s && allowsMathEnvironments :: HTMLMathMethod -> Bool allowsMathEnvironments (MathJax _) = True -allowsMathEnvironments (MathML _) = True +allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False + +isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool +isRawHtml f = do + html5 <- gets stHtml5 + return $ f == Format "html" || + ((html5 && f == Format "html5") || f == Format "html4") + +html5Attributes :: Set.Set String +html5Attributes = Set.fromList + [ "abbr" + , "accept" + , "accept-charset" + , "accesskey" + , "action" + , "allowfullscreen" + , "allowpaymentrequest" + , "allowusermedia" + , "alt" + , "as" + , "async" + , "autocomplete" + , "autofocus" + , "autoplay" + , "charset" + , "checked" + , "cite" + , "class" + , "color" + , "cols" + , "colspan" + , "content" + , "contenteditable" + , "controls" + , "coords" + , "crossorigin" + , "data" + , "datetime" + , "default" + , "defer" + , "dir" + , "dirname" + , "disabled" + , "download" + , "draggable" + , "enctype" + , "for" + , "form" + , "formaction" + , "formenctype" + , "formmethod" + , "formnovalidate" + , "formtarget" + , "headers" + , "height" + , "hidden" + , "high" + , "href" + , "hreflang" + , "http-equiv" + , "id" + , "inputmode" + , "integrity" + , "is" + , "ismap" + , "itemid" + , "itemprop" + , "itemref" + , "itemscope" + , "itemtype" + , "kind" + , "label" + , "lang" + , "list" + , "loop" + , "low" + , "manifest" + , "max" + , "maxlength" + , "media" + , "method" + , "min" + , "minlength" + , "multiple" + , "muted" + , "name" + , "nomodule" + , "nonce" + , "novalidate" + , "onabort" + , "onafterprint" + , "onauxclick" + , "onbeforeprint" + , "onbeforeunload" + , "onblur" + , "oncancel" + , "oncanplay" + , "oncanplaythrough" + , "onchange" + , "onclick" + , "onclose" + , "oncontextmenu" + , "oncopy" + , "oncuechange" + , "oncut" + , "ondblclick" + , "ondrag" + , "ondragend" + , "ondragenter" + , "ondragexit" + , "ondragleave" + , "ondragover" + , "ondragstart" + , "ondrop" + , "ondurationchange" + , "onemptied" + , "onended" + , "onerror" + , "onfocus" + , "onhashchange" + , "oninput" + , "oninvalid" + , "onkeydown" + , "onkeypress" + , "onkeyup" + , "onlanguagechange" + , "onload" + , "onloadeddata" + , "onloadedmetadata" + , "onloadend" + , "onloadstart" + , "onmessage" + , "onmessageerror" + , "onmousedown" + , "onmouseenter" + , "onmouseleave" + , "onmousemove" + , "onmouseout" + , "onmouseover" + , "onmouseup" + , "onoffline" + , "ononline" + , "onpagehide" + , "onpageshow" + , "onpaste" + , "onpause" + , "onplay" + , "onplaying" + , "onpopstate" + , "onprogress" + , "onratechange" + , "onrejectionhandled" + , "onreset" + , "onresize" + , "onscroll" + , "onsecuritypolicyviolation" + , "onseeked" + , "onseeking" + , "onselect" + , "onstalled" + , "onstorage" + , "onsubmit" + , "onsuspend" + , "ontimeupdate" + , "ontoggle" + , "onunhandledrejection" + , "onunload" + , "onvolumechange" + , "onwaiting" + , "onwheel" + , "open" + , "optimum" + , "pattern" + , "ping" + , "placeholder" + , "playsinline" + , "poster" + , "preload" + , "readonly" + , "referrerpolicy" + , "rel" + , "required" + , "reversed" + , "rows" + , "rowspan" + , "sandbox" + , "scope" + , "selected" + , "shape" + , "size" + , "sizes" + , "slot" + , "span" + , "spellcheck" + , "src" + , "srcdoc" + , "srclang" + , "srcset" + , "start" + , "step" + , "style" + , "tabindex" + , "target" + , "title" + , "translate" + , "type" + , "typemustmatch" + , "updateviacache" + , "usemap" + , "value" + , "width" + , "workertype" + , "wrap" + ] diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 29fdafe15..688c1f390 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2014-2015, 2017-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Haddock - Copyright : Copyright (C) 2014 John MacFarlane + Copyright : Copyright (C) 2014-2015,2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,17 +33,19 @@ Conversion of 'Pandoc' documents to haddock markup. Haddock: <http://www.haskell.org/haddock/doc/html/> -} module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Control.Monad.State.Strict +import Data.Default +import Data.List (intersperse, transpose) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Logging import Text.Pandoc.Options -import Data.List ( intersperse, transpose ) import Text.Pandoc.Pretty -import Control.Monad.State -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Network.URI (isURI) -import Data.Default +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared type Notes = [[Block]] data WriterState = WriterState { stNotes :: Notes } @@ -49,13 +53,14 @@ instance Default WriterState where def = WriterState{ stNotes = [] } -- | Convert Pandoc to Haddock. -writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeHaddock opts document = - evalState (pandocToHaddock opts{ + evalStateT (pandocToHaddock opts{ writerWrapText = writerWrapText opts } document) def -- | Return haddock representation of document. -pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock :: PandocMonad m + => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToHaddock opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -63,22 +68,22 @@ pandocToHaddock opts (Pandoc meta blocks) = do body <- blockListToHaddock opts blocks st <- get notes' <- notesToHaddock opts (reverse $ stNotes st) - let render' :: Doc -> String + let render' :: Doc -> Text render' = render colwidth let main = render' $ body <> (if isEmpty notes' then empty else blankline <> notes') metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToHaddock opts) - (fmap (render colwidth) . inlineListToHaddock opts) + (fmap render' . blockListToHaddock opts) + (fmap render' . inlineListToHaddock opts) meta - let context = defField "body" main - $ metadata + let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return haddock representation of notes. -notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock :: PandocMonad m + => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToHaddock opts notes = if null notes then return empty @@ -92,9 +97,10 @@ escapeString = escapeStringUsing haddockEscapes where haddockEscapes = backslashEscapes "\\/'`\"@<" -- | Convert Pandoc block element to haddock. -blockToHaddock :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToHaddock _ Null = return empty blockToHaddock opts (Div _ ils) = do contents <- blockListToHaddock opts ils @@ -110,10 +116,12 @@ blockToHaddock opts (Para inlines) = (<> blankline) `fmap` blockToHaddock opts (Plain inlines) blockToHaddock opts (LineBlock lns) = blockToHaddock opts $ linesToPara lns -blockToHaddock _ (RawBlock f str) - | f == "haddock" = do +blockToHaddock _ b@(RawBlock f str) + | f == "haddock" = return $ text str <> text "\n" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToHaddock opts HorizontalRule = return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do @@ -141,16 +149,16 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do isPlainBlock _ = False let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) (nst,tbl) <- case True of - _ | isSimple -> fmap (nest 2,) $ + _ | isSimple -> (nest 2,) <$> pandocTable opts (all null headers) aligns widths rawHeaders rawRows - | not hasBlocks -> fmap (nest 2,) $ + | not hasBlocks -> (nest 2,) <$> pandocTable opts (all null headers) aligns widths rawHeaders rawRows - | otherwise -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline + | otherwise -> (id,) <$> + gridTable opts blockListToHaddock + (all null headers) aligns widths headers rows + return $ prefixed "> " (nst $ tbl $$ blankline $$ caption'') $$ blankline blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline @@ -160,15 +168,15 @@ blockToHaddock opts (OrderedList (start,_,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ - zip markers' items + contents <- zipWithM (orderedListItemToHaddock opts) markers' items return $ cat contents <> blankline blockToHaddock opts (DefinitionList items) = do contents <- mapM (definitionListItemToHaddock opts) items return $ cat contents <> blankline -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable :: PandocMonad m + => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> StateT WriterState m Doc pandocTable opts headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of @@ -184,18 +192,17 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do (floor . (fromIntegral (writerColumns opts) *)) widths let makeRow = hcat . intersperse (lblock 1 (text " ")) . - (zipWith3 alignHeader aligns widthsInChars) + zipWith3 alignHeader aligns widthsInChars let rows' = map makeRow rawRows let head' = makeRow rawHeaders let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') - else if headless - then underline - else empty + let border + | maxRowHeight > 1 = text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') + | headless = underline + | otherwise = empty let head'' = if headless then empty else border <> cr <> head' @@ -207,35 +214,9 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc -gridTable opts headless _aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if headless - then empty - else head' $$ border '=' - return $ border '-' $$ head'' $$ body $$ border '-' - -- | Convert bullet list item (list of blocks) to haddock -bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToHaddock opts items = do contents <- blockListToHaddock opts items let sps = replicate (writerTabStop opts - 2) ' ' @@ -250,22 +231,24 @@ bulletListItemToHaddock opts items = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to haddock -orderedListItemToHaddock :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToHaddock :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToHaddock opts marker items = do contents <- blockListToHaddock opts items let sps = case length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " let start = text marker <> sps return $ hang (writerTabStop opts) start $ contents <> cr -- | Convert definition list item (label, list of blocks) to haddock -definitionListItemToHaddock :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToHaddock :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToHaddock opts (label, defs) = do labelText <- inlineListToHaddock opts label defs' <- mapM (mapM (blockToHaddock opts)) defs @@ -273,19 +256,22 @@ definitionListItemToHaddock opts (label, defs) = do return $ nowrap (brackets labelText) <> cr <> contents <> cr -- | Convert list of Pandoc block elements to haddock -blockListToHaddock :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToHaddock opts blocks = mapM (blockToHaddock opts) blocks >>= return . cat -- | Convert list of Pandoc inline elements to haddock. -inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock :: PandocMonad m + => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToHaddock opts lst = mapM (inlineToHaddock opts) lst >>= return . cat -- | Convert Pandoc inline element to haddock. -inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Doc inlineToHaddock opts (Span (ident,_,_) ils) = do contents <- inlineListToHaddock opts ils if not (null ident) && null ils @@ -315,18 +301,20 @@ inlineToHaddock opts (Quoted DoubleQuote lst) = do return $ "“" <> contents <> "”" inlineToHaddock _ (Code _ str) = return $ "@" <> text (escapeString str) <> "@" -inlineToHaddock _ (Str str) = do +inlineToHaddock _ (Str str) = return $ text $ escapeString str inlineToHaddock opts (Math mt str) = do let adjust x = case mt of DisplayMath -> cr <> x <> cr InlineMath -> x - adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str) -inlineToHaddock _ (RawInline f str) + adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts) +inlineToHaddock _ il@(RawInline f str) | f == "haddock" = return $ text str - | otherwise = return empty + | otherwise = do + report $ InlineNotRendered il + return empty -- no line break in haddock (see above on CodeBlock) -inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ LineBreak = return cr inlineToHaddock opts SoftBreak = case writerWrapText opts of WrapAuto -> return space @@ -339,7 +327,7 @@ inlineToHaddock _ (Link _ txt (src, _)) = do let useAuto = isURI src && case txt of [Str s] | escapeURI s == src -> True - _ -> False + _ -> False return $ nowrap $ "<" <> text src <> (if useAuto then empty else space <> linktext) <> ">" inlineToHaddock opts (Image attr alternate (source, tit)) = do diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 8f0d21cf5..a5d851e40 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,8 +1,10 @@ -{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.ICML - Copyright : Copyright (C) 2013-2016 github.com/mb21 + Copyright : Copyright (C) 2013-2018 github.com/mb21 License : GNU GPL, version 2 or above Stability : alpha @@ -14,20 +16,25 @@ InCopy is the companion word-processor to Adobe InDesign and ICML documents can into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict +import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) +import Data.Maybe (fromMaybe) +import qualified Data.Set as Set +import Data.Text as Text (breakOnAll, pack) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared (linesToPara, splitBy, fetchItem, warn) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse) -import Data.Text as Text (breakOnAll, pack) -import Control.Monad.State -import Network.URI (isURI) -import qualified Data.Set as Set +import Text.Pandoc.Shared (isURI, linesToPara, splitBy) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML type Style = [String] type Hyperlink = [(Int, String)] @@ -40,7 +47,7 @@ data WriterState = WriterState{ , maxListDepth :: Int } -type WS a = StateT WriterState IO a +type WS m = StateT WriterState m defaultWriterState :: WriterState defaultWriterState = WriterState{ @@ -121,13 +128,13 @@ subListParName = "subParagraph" footnoteName = "Footnote" citeName = "Cite" - -- | Convert Pandoc document to string in ICML format. -writeICML :: WriterOptions -> Pandoc -> IO String +writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeICML opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState metadata <- metaToJSON opts @@ -139,18 +146,15 @@ writeICML opts (Pandoc meta blocks) = do context = defField "body" main $ defField "charStyles" (render' $ charStylesToDoc st) $ defField "parStyles" (render' $ parStylesToDoc st) - $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) - $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Auxilary functions for parStylesToDoc and charStylesToDoc. contains :: String -> (String, (String, String)) -> [(String, String)] contains s rule = - if isInfixOf (fst rule) s - then [snd rule] - else [] + [snd rule | (fst rule) `isInfixOf` s] -- | The monospaced font to use as default. monospacedFont :: Doc @@ -174,7 +178,7 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where makeStyle s = let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) - attrs = concat $ map (contains s) $ [ + attrs = concatMap (contains s) [ (defListTermName, ("BulletsAndNumberingListType", "BulletList")) , (defListTermName, ("FontStyle", "Bold")) , (tableHeaderName, ("FontStyle", "Bold")) @@ -200,9 +204,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] | otherwise = [] - listType | isOrderedList && (not $ isInfixOf subListParName s) + listType | isOrderedList && not (subListParName `isInfixOf` s) = [("BulletsAndNumberingListType", "NumberedList")] - | isBulletList && (not $ isInfixOf subListParName s) + | isBulletList && not (subListParName `isInfixOf` s) = [("BulletsAndNumberingListType", "BulletList")] | otherwise = [] indent = [("LeftIndent", show indt)] @@ -210,9 +214,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st nBlockQuotes = countSubStrs blockQuoteName s nDefLists = countSubStrs defListDefName s indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) - props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + props = inTags True "Properties" [] (basedOn $$ tabList $$ numbForm) where - font = if isInfixOf codeBlockName s + font = if codeBlockName `isInfixOf` s then monospacedFont else empty basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font @@ -239,7 +243,7 @@ charStylesToDoc :: WriterState -> Doc charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st where makeStyle s = - let attrs = concat $ map (contains s) [ + let attrs = concatMap (contains s) [ (strikeoutName, ("StrikeThru", "true")) , (superscriptName, ("Position", "Superscript")) , (subscriptName, ("Position", "Subscript")) @@ -253,7 +257,7 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font where font = - if isInfixOf codeName s + if codeName `isInfixOf` s then monospacedFont else empty in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props @@ -273,23 +277,22 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs hyp (ident, url) = hdest $$ hlink where hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 + [("Self", "HyperlinkURLDestination/"++escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] $ inTags True "Properties" [] $ inTags False "BorderColor" [("type","enumeration")] (text "Black") - $$ (inTags False "Destination" [("type","object")] - $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6 + $$ inTags False "Destination" [("type","object")] (text $ "HyperlinkURLDestination/"++escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 -- | Convert a list of Pandoc blocks to ICML. -blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc blocksToICML opts style lst = do docs <- mapM (blockToICML opts style) lst return $ intersperseBrs docs -- | Convert a Pandoc block element to ICML. -blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc blockToICML opts style (Plain lst) = parStyle opts style lst -- title beginning with fig: indicates that the image is a figure blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do @@ -299,10 +302,12 @@ blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst blockToICML opts style (LineBlock lns) = blockToICML opts style $ linesToPara lns -blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML _ _ (RawBlock f str) +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) [Str str] +blockToICML _ _ b@(RawBlock f str) | f == Format "icml" = return $ text str - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst @@ -343,11 +348,10 @@ blockToICML opts style (Table caption aligns widths headers rows) = then rows else headers:rows cells <- rowsToICML tabl (0::Int) - let colWidths w = if w > 0 - then [("SingleColumnWidth",show $ 500 * w)] - else [] - let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) - let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let colWidths w = + [("SingleColumnWidth",show $ 500 * w) | w > 0] + let tupToDoc tup = selfClosingTag "Column" $ ("Name",show $ fst tup) : colWidths (snd tup) + let colDescs = vcat $ zipWith (curry tupToDoc) [0..nrCols-1] widths let tableDoc = return $ inTags True "Table" [ ("AppliedTableStyle","TableStyle/Table") , ("HeaderRowCount", nrHeaders) @@ -359,7 +363,7 @@ blockToICML opts style (Div _ lst) = blocksToICML opts style lst blockToICML _ _ Null = return empty -- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc listItemsToICML _ _ _ _ [] = return empty listItemsToICML opts listType style attribs (first:rest) = do st <- get @@ -374,18 +378,17 @@ listItemsToICML opts listType style attribs (first:rest) = do return $ intersperseBrs docs -- | Convert a list of blocks to ICML list items. -listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc listItemToICML opts style isFirst attribs item = let makeNumbStart (Just (beginsWith, numbStl, _)) = let doN DefaultStyle = [] - doN LowerRoman = [lowerRomanName] - doN UpperRoman = [upperRomanName] - doN LowerAlpha = [lowerAlphaName] - doN UpperAlpha = [upperAlphaName] - doN _ = [] - bw = if beginsWith > 1 - then [beginsWithName ++ show beginsWith] - else [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = + [beginsWithName ++ show beginsWith | beginsWith > 1] in doN numbStl ++ bw makeNumbStart Nothing = [] stl = if isFirst @@ -394,26 +397,26 @@ listItemToICML opts style isFirst attribs item = stl' = makeNumbStart attribs ++ stl in if length item > 1 then do - let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ Str "\t":lst insertTab block = blockToICML opts style block f <- blockToICML opts stl' $ head item r <- mapM insertTab $ tail item return $ intersperseBrs (f : r) else blocksToICML opts stl' item -definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc definitionListItemToICML opts style (term,defs) = do term' <- parStyle opts (defListTermName:style) term defs' <- mapM (blocksToICML opts (defListDefName:style)) defs - return $ intersperseBrs $ (term' : defs') + return $ intersperseBrs (term' : defs') -- | Convert a list of inline elements to ICML. -inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) -- | Convert an inline element to ICML. -inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst @@ -433,17 +436,20 @@ inlineToICML opts style SoftBreak = WrapPreserve -> charStyle style cr inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML opts style (Math mt str) = - cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str) -inlineToICML _ _ (RawInline f str) + lift (texMathToInlines mt str) >>= + (fmap cat . mapM (inlineToICML opts style)) +inlineToICML _ _ il@(RawInline f str) | f == Format "icml" = return $ text str - | otherwise = return empty + | otherwise = do + report $ InlineNotRendered il + return empty inlineToICML opts style (Link _ lst (url, title)) = do content <- inlinesToICML opts (linkName:style) lst state $ \st -> let ident = if null $ links st then 1::Int - else 1 + (fst $ head $ links st) - newst = st{ links = (ident, url):(links st) } + else 1 + fst (head $ links st) + newst = st{ links = (ident, url):links st } cont = inTags True "HyperlinkTextSource" [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content in (cont, newst) @@ -452,9 +458,9 @@ inlineToICML opts style (Note lst) = footnoteToICML opts style lst inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst -- | Convert a list of block elements to an ICML footnote. -footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc footnoteToICML opts style lst = - let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ Str "\t":ls insertTab block = blockToICML opts (footnoteName:style) block in do contents <- mapM insertTab lst @@ -466,24 +472,24 @@ footnoteToICML opts style lst = -- | Auxiliary function to merge Space elements into the adjacent Strs. mergeSpaces :: [Inline] -> [Inline] -mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x = +mergeSpaces (Str s:(x:(Str s':xs))) | isSp x = mergeSpaces $ Str(s++" "++s') : xs -mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs -mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs -mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces (x:(Str s:xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs +mergeSpaces (Str s:(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : mergeSpaces xs mergeSpaces [] = [] isSp :: Inline -> Bool -isSp Space = True +isSp Space = True isSp SoftBreak = True -isSp _ = False +isSp _ = False -- | Intersperse line breaks intersperseBrs :: [Doc] -> Doc intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty) -- | Wrap a list of inline elements in an ICML Paragraph Style -parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc parStyle opts style lst = let slipIn x y = if null y then x @@ -498,7 +504,7 @@ parStyle opts style lst = begins = filter (isPrefixOf beginsWithName) style in if null begins then ats - else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + else let i = fromMaybe "" $ stripPrefix beginsWithName $ head begins in ("NumberingStartAt", i) : ats else [attrs] in do @@ -507,16 +513,16 @@ parStyle opts style lst = state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) -- | Wrap a Doc in an ICML Character Style. -charStyle :: Style -> Doc -> WS Doc +charStyle :: PandocMonad m => Style -> Doc -> WS m Doc charStyle style content = let (stlStr, attrs) = styleToStrAttr style doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content - in do + in state $ \st -> - let styles = if null stlStr - then st - else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } - in (doc, styles) + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) -- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. styleToStrAttr :: Style -> (String, [(String, String)]) @@ -529,20 +535,18 @@ styleToStrAttr style = in (stlStr, attrs) -- | Assemble an ICML Image. -imageICML :: WriterOptions -> Style -> Attr -> Target -> WS Doc +imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc imageICML opts style attr (src, _) = do - res <- liftIO $ fetchItem (writerSourceURL opts) src - imgS <- case res of - Left (_) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - return def - Right (img, _) -> do - case imageSize img of + imgS <- catchError + (do (img, _) <- P.fetchItem src + case imageSize opts img of Right size -> return size Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg - return def + report $ CouldNotDetermineImageSize src msg + return def) + (\e -> do + report $ CouldNotFetchResource src (show e) + return def) let (ow, oh) = sizeInPoints imgS (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS hw = showFl $ ow / 2 @@ -571,6 +575,5 @@ imageICML opts style attr (src, _) = do ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), - ("ItemTransform", scale++" "++hw++" -"++hh)] - $ (props $$ image) + ("ItemTransform", scale++" "++hw++" -"++hh)] (props $$ image) state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs new file mode 100644 index 000000000..639961acd --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -0,0 +1,455 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Writers.JATS + Copyright : Copyright (C) 2017-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to JATS XML. +Reference: +https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html +-} +module Text.Pandoc.Writers.JATS ( writeJATS ) where +import Control.Monad.Reader +import Data.Char (toLower) +import Data.Generics (everywhere, mkT) +import Data.List (isSuffixOf, partition) +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Logging +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML +import Text.TeXMath +import qualified Text.XML.Light as Xml + +data JATSVersion = JATS1_1 + deriving (Eq, Show) + +type JATS = ReaderT JATSVersion + +writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeJATS opts d = + runReaderT (docToJATS opts d) JATS1_1 + +-- | Convert Pandoc document to string in JATS format. +docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text +docToJATS opts (Pandoc meta blocks) = do + let isBackBlock (Div ("refs",_,_) _) = True + isBackBlock _ = False + let (backblocks, bodyblocks) = partition isBackBlock blocks + let elements = hierarchicalize bodyblocks + let backElements = hierarchicalize backblocks + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render colwidth + let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr) + (writerTemplate opts) && + TopLevelDefault == writerTopLevelDivision opts + then opts{ writerTopLevelDivision = TopLevelChapter } + else opts + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of + TopLevelPart -> -1 + TopLevelChapter -> 0 + TopLevelSection -> 1 + TopLevelDefault -> 1 + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToJATS opts' startLvl) . + hierarchicalize) + (fmap render' . inlinesToJATS opts') + meta + main <- (render' . vcat) <$> + mapM (elementToJATS opts' startLvl) elements + back <- (render' . vcat) <$> + mapM (elementToJATS opts' startLvl) backElements + let context = defField "body" main + $ defField "back" back + $ defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Convert an Element to JATS. +elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> JATS m Doc +elementToJATS opts _ (Blk block) = blockToJATS opts block +elementToJATS opts lvl (Sec _ _num (id',_,kvs) title elements) = do + let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + let otherAttrs = ["sec-type", "specific-use"] + let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] + contents <- mapM (elementToJATS opts (lvl + 1)) elements + title' <- inlinesToJATS opts title + return $ inTags True "sec" attribs $ + inTagsSimple "title" title' $$ vcat contents + +-- | Convert a list of Pandoc blocks to JATS. +blocksToJATS :: PandocMonad m => WriterOptions -> [Block] -> JATS m Doc +blocksToJATS opts = fmap vcat . mapM (blockToJATS opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a list of +-- JATS varlistentrys. +deflistItemsToJATS :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> JATS m Doc +deflistItemsToJATS opts items = + vcat <$> mapM (uncurry (deflistItemToJATS opts)) items + +-- | Convert a term and a list of blocks into a JATS varlistentry. +deflistItemToJATS :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> JATS m Doc +deflistItemToJATS opts term defs = do + term' <- inlinesToJATS opts term + def' <- blocksToJATS opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "def-item" $ + inTagsIndented "term" term' $$ + inTagsIndented "def" def' + +-- | Convert a list of lists of blocks to a list of JATS list items. +listItemsToJATS :: PandocMonad m + => WriterOptions -> Maybe [String] -> [[Block]] -> JATS m Doc +listItemsToJATS opts markers items = + case markers of + Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items + Just ms -> vcat <$> zipWithM (listItemToJATS opts) (map Just ms) items + +-- | Convert a list of blocks into a JATS list item. +listItemToJATS :: PandocMonad m + => WriterOptions -> Maybe String -> [Block] -> JATS m Doc +listItemToJATS opts mbmarker item = do + contents <- blocksToJATS opts item + return $ inTagsIndented "list-item" $ + maybe empty (\lbl -> inTagsIndented "label" (text lbl)) mbmarker + $$ contents + +imageMimeType :: String -> [(String, String)] -> (String, String) +imageMimeType src kvs = + let mbMT = getMimeType src + maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + in (maintype, subtype) + +languageFor :: [String] -> String +languageFor classes = + case langs of + (l:_) -> escapeStringForXML l + [] -> "" + where isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes + +codeAttr :: Attr -> (String, [(String, String)]) +codeAttr (ident,classes,kvs) = (lang, attr) + where + attr = [("id",ident) | not (null ident)] ++ + [("language",lang) | not (null lang)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["code-type", + "code-version", "executable", + "language-version", "orientation", + "platforms", "position", "specific-use"]] + lang = languageFor classes + +-- | Convert a Pandoc block element to JATS. +blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc +blockToJATS _ Null = return empty +-- Bibliography reference: +blockToJATS opts (Div ('r':'e':'f':'-':_,_,_) [Para lst]) = + inlinesToJATS opts lst +blockToJATS opts (Div ("refs",_,_) xs) = do + contents <- blocksToJATS opts xs + return $ inTagsIndented "ref-list" contents +blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do + contents <- blocksToJATS opts bs + let attr = [("id", ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", + "content-type", "orientation", "position"]] + return $ inTags True cls attr contents +blockToJATS opts (Div (ident,_,kvs) bs) = do + contents <- blocksToJATS opts bs + let attr = [("id", ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", + "content-type", "orientation", "position"]] + return $ inTags True "boxed-text" attr contents +blockToJATS opts (Header _ _ title) = do + title' <- inlinesToJATS opts title + return $ inTagsSimple "title" title' +-- No Plain, everything needs to be in a block-level tag +blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) +-- title beginning with fig: indicates that the image is a figure +blockToJATS opts (Para [Image (ident,_,kvs) txt + (src,'f':'i':'g':':':tit)]) = do + alt <- inlinesToJATS opts txt + let (maintype, subtype) = imageMimeType src kvs + let capt = if null txt + then empty + else inTagsSimple "caption" alt + let attr = [("id", ident) | not (null ident)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", + "position", "specific-use"]] + let graphicattr = [("mimetype",maintype), + ("mime-subtype",subtype), + ("xlink:href",src), -- do we need to URL escape this? + ("xlink:title",tit)] + return $ inTags True "fig" attr $ + capt $$ selfClosingTag "graphic" graphicattr +blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do + let (maintype, subtype) = imageMimeType src kvs + let attr = [("id", ident) | not (null ident)] ++ + [("mimetype", maintype), + ("mime-subtype", subtype), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", + "content-type", "specific-use", "xlink:actuate", + "xlink:href", "xlink:role", "xlink:show", + "xlink:type"]] + return $ selfClosingTag "graphic" attr +blockToJATS opts (Para lst) = + inTagsIndented "p" <$> inlinesToJATS opts lst +blockToJATS opts (LineBlock lns) = + blockToJATS opts $ linesToPara lns +blockToJATS opts (BlockQuote blocks) = + inTagsIndented "disp-quote" <$> blocksToJATS opts blocks +blockToJATS _ (CodeBlock a str) = return $ + inTags False tag attr (flush (text (escapeStringForXML str))) + where (lang, attr) = codeAttr a + tag = if null lang then "preformat" else "code" +blockToJATS _ (BulletList []) = return empty +blockToJATS opts (BulletList lst) = + inTags True "list" [("list-type", "bullet")] <$> + listItemsToJATS opts Nothing lst +blockToJATS _ (OrderedList _ []) = return empty +blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do + let listType = case numstyle of + DefaultStyle -> "order" + Decimal -> "order" + Example -> "order" + UpperAlpha -> "alpha-upper" + LowerAlpha -> "alpha-lower" + UpperRoman -> "roman-upper" + LowerRoman -> "roman-lower" + let simpleList = start == 1 && (delimstyle == DefaultDelim || + delimstyle == Period) + let markers = if simpleList + then Nothing + else Just $ + orderedListMarkers (start, numstyle, delimstyle) + inTags True "list" [("list-type", listType)] <$> + listItemsToJATS opts markers items +blockToJATS opts (DefinitionList lst) = + inTags True "def-list" [] <$> deflistItemsToJATS opts lst +blockToJATS _ b@(RawBlock f str) + | f == "jats" = return $ text str -- raw XML block + | otherwise = do + report $ BlockNotRendered b + return empty +blockToJATS _ HorizontalRule = return empty -- not semantic +blockToJATS opts (Table [] aligns widths headers rows) = do + let percent w = show (truncate (100*w) :: Integer) ++ "*" + let coltags = vcat $ zipWith (\w al -> selfClosingTag "col" + ([("width", percent w) | w > 0] ++ + [("align", alignmentToString al)])) widths aligns + thead <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToJATS opts True headers + tbody <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToJATS opts False) rows + return $ inTags True "table" [] $ coltags $$ thead $$ tbody +blockToJATS opts (Table caption aligns widths headers rows) = do + captionDoc <- inTagsIndented "caption" <$> blockToJATS opts (Para caption) + tbl <- blockToJATS opts (Table [] aligns widths headers rows) + return $ inTags True "table-wrap" [] $ captionDoc $$ tbl + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [[Block]] + -> JATS m Doc +tableRowToJATS opts isHeader cols = + (inTagsIndented "tr" . vcat) <$> mapM (tableItemToJATS opts isHeader) cols + +tableItemToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [Block] + -> JATS m Doc +tableItemToJATS opts isHeader [Plain item] = + inTags True (if isHeader then "th" else "td") [] <$> + inlinesToJATS opts item +tableItemToJATS opts isHeader item = + (inTags True (if isHeader then "th" else "td") [] . vcat) <$> + mapM (blockToJATS opts) item + +-- | Convert a list of inline elements to JATS. +inlinesToJATS :: PandocMonad m => WriterOptions -> [Inline] -> JATS m Doc +inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) lst + +-- | Convert an inline element to JATS. +inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> JATS m Doc +inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str +inlineToJATS opts (Emph lst) = + inTagsSimple "italic" <$> inlinesToJATS opts lst +inlineToJATS opts (Strong lst) = + inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst +inlineToJATS opts (Strikeout lst) = + inTagsSimple "strike" <$> inlinesToJATS opts lst +inlineToJATS opts (Superscript lst) = + inTagsSimple "sup" <$> inlinesToJATS opts lst +inlineToJATS opts (Subscript lst) = + inTagsSimple "sub" <$> inlinesToJATS opts lst +inlineToJATS opts (SmallCaps lst) = + inTags False "sc" [("role", "smallcaps")] <$> + inlinesToJATS opts lst +inlineToJATS opts (Quoted SingleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '‘' <> contents <> char '’' +inlineToJATS opts (Quoted DoubleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '“' <> contents <> char '”' +inlineToJATS _ (Code a str) = + return $ inTags False tag attr $ text (escapeStringForXML str) + where (lang, attr) = codeAttr a + tag = if null lang then "monospace" else "code" +inlineToJATS _ il@(RawInline f x) + | f == "jats" = return $ text x + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToJATS _ LineBreak = return $ selfClosingTag "break" [] +inlineToJATS _ Space = return space +inlineToJATS opts SoftBreak + | writerWrapText opts == WrapPreserve = return cr + | otherwise = return space +inlineToJATS opts (Note contents) = + -- TODO technically only <p> tags are allowed inside + inTagsIndented "fn" <$> blocksToJATS opts contents +inlineToJATS opts (Cite _ lst) = + -- TODO revisit this after examining the jats.csl pipeline + inlinesToJATS opts lst +inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils +inlineToJATS opts (Span (ident,_,kvs) ils) = do + contents <- inlinesToJATS opts ils + let attr = [("id",ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs + , k `elem` ["content-type", "rationale", + "rid", "specific-use"]] + return $ selfClosingTag "milestone-start" attr <> contents <> + selfClosingTag "milestone-end" [] +inlineToJATS _ (Math t str) = do + let addPref (Xml.Attr q v) + | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v + | otherwise = Xml.Attr q v + let fixNS' e = e{ Xml.elName = + (Xml.elName e){ Xml.qPrefix = Just "mml" } } + let fixNS = everywhere (mkT fixNS') . + (\e -> e{ Xml.elAttribs = map addPref (Xml.elAttribs e) }) + let conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP + res <- convertMath writeMathML t str + let tagtype = case t of + DisplayMath -> "disp-formula" + InlineMath -> "inline-formula" + let rawtex = inTagsSimple "tex-math" + $ text "<![CDATA[" <> + text str <> + text "]]>" + return $ inTagsSimple tagtype $ + case res of + Right r -> inTagsSimple "alternatives" $ + cr <> rawtex $$ + text (Xml.ppcElement conf $ fixNS r) + Left _ -> rawtex +inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _)) + | escapeURI t == email = + return $ inTagsSimple "email" $ text (escapeStringForXML email) +inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("alt", stringify txt) | not (null txt)] ++ + [("rid", src)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + if null txt + then return $ selfClosingTag "xref" attr + else do + contents <- inlinesToJATS opts txt + return $ inTags False "xref" attr contents +inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("ext-link-type", "uri"), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority", + "specific-use", "xlink:actuate", + "xlink:role", "xlink:show", + "xlink:type"]] + contents <- inlinesToJATS opts txt + return $ inTags False "ext-link" attr contents +inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do + let mbMT = getMimeType src + let maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + let subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + let attr = [("id", ident) | not (null ident)] ++ + [("mimetype", maintype), + ("mime-subtype", subtype), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", + "content-type", "specific-use", "xlink:actuate", + "xlink:href", "xlink:role", "xlink:show", + "xlink:type"]] + return $ selfClosingTag "inline-graphic" attr diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 88934eb44..f61c878e5 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, - PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,37 +30,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into LaTeX. -} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where +module Text.Pandoc.Writers.LaTeX ( + writeLaTeX + , writeBeamer + ) where +import Control.Applicative ((<|>)) +import Control.Monad.State.Strict +import Data.Aeson (FromJSON, object, (.=)) +import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, + toLower) +import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, + stripPrefix, (\\)) +import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, + styleToLaTeX, toListingsLanguage) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates -import Text.Printf ( printf ) -import Network.URI ( isURI, unEscapeString ) -import Data.Aeson (object, (.=), FromJSON) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, - nub, nubBy, foldl' ) -import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, - ord, isAlphaNum ) -import Data.Maybe ( fromMaybe, isJust, catMaybes ) -import qualified Data.Text as T -import Control.Applicative ((<|>)) -import Control.Monad.State -import qualified Text.Parsec as P import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize +import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Highlighting (highlight, styleToLaTeX, - formatLaTeXInline, formatLaTeXBlock, - toListingsLanguage) +import Text.Pandoc.Templates +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared +import qualified Text.Parsec as P +import Text.Printf (printf) data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage , stInHeading :: Bool -- true if in a section heading + , stInItem :: Bool -- true if in \item[..] , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter @@ -74,49 +82,75 @@ data WriterState = , stHighlighting :: Bool -- true if document has highlighted code , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit , stInternalLinks :: [String] -- list of internal link targets - , stUsesEuro :: Bool -- true if euro symbol used + , stBeamer :: Bool -- produce beamer + , stEmptyLine :: Bool -- true if no content on line } +startingState :: WriterOptions -> WriterState +startingState options = WriterState { + stInNote = False + , stInQuote = False + , stInMinipage = False + , stInHeading = False + , stInItem = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stBook = case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stInternalLinks = [] + , stBeamer = False + , stEmptyLine = True } + -- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String +writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeLaTeX options document = - evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stInHeading = False, - stNotes = [], stOLLevel = 1, - stOptions = options, stVerbInNote = False, - stTable = False, stStrikeout = False, - stUrl = False, stGraphics = False, - stLHS = False, - stBook = (case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False), - stCsquotes = False, stHighlighting = False, - stIncremental = writerIncremental options, - stInternalLinks = [], stUsesEuro = False } - -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String + evalStateT (pandocToLaTeX options document) $ + startingState options + +-- | Convert Pandoc to LaTeX Beamer. +writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBeamer options document = + evalStateT (pandocToLaTeX options document) $ + (startingState options){ stBeamer = True } + +type LW m = StateT WriterState m + +pandocToLaTeX :: PandocMonad m + => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do -- Strip off final 'references' header if --natbib or --biblatex let method = writerCiteMethod options let blocks' = if method == Biblatex || method == Natbib then case reverse blocks of - (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks + Div (_,["references"],_) _:xs -> reverse xs + _ -> blocks else blocks -- see if there are internal links - let isInternalLink (Link _ _ ('#':xs,_)) = [xs] - isInternalLink _ = [] + let isInternalLink (Link _ _ ('#':xs,_)) = [xs] + isInternalLink _ = [] modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } - let template = maybe "" id $ writerTemplate options + let template = fromMaybe "" $ writerTemplate options -- set stBook depending on documentclass let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) + (fmap render' . blockListToLaTeX) + (fmap render' . inlineListToLaTeX) meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] let documentClass = case P.parse pDocumentClass "template" template of @@ -143,25 +177,38 @@ pandocToLaTeX options (Pandoc meta blocks) = do else case last blocks' of Header 1 _ il -> (init blocks', il) _ -> (blocks', []) - blocks''' <- if writerBeamer options + beamer <- gets stBeamer + blocks''' <- if beamer then toSlides blocks'' else return blocks'' body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' - (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth $ vsep body + (biblioTitle :: Text) <- render' <$> inlineListToLaTeX lastHeader + let main = render' $ vsep body st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let docLangs = nub $ query (extract "lang") blocks + docLangs <- catMaybes <$> + mapM (toLang . Just) (ordNub (query (extract "lang") blocks)) let hasStringValue x = isJust (getField x metadata :: Maybe String) - let geometryFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> + let geometryFromMargins = intercalate [','] $ mapMaybe (\(x,y) -> ((x ++ "=") ++) <$> getField y metadata) [("lmargin","margin-left") ,("rmargin","margin-right") ,("tmargin","margin-top") ,("bmargin","margin-bottom") ] + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + mblang <- toLang $ case getLang options meta of + Just l -> Just l + Nothing | null docLangs -> Nothing + | otherwise -> Just "en" + -- we need a default here since lang is used in template conditionals + + let dirs = query (extract "dir") blocks + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -170,7 +217,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "body" main $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ - defField "documentclass" (if writerBeamer options + defField "documentclass" (if beamer then ("beamer" :: String) else if stBook st then "book" @@ -183,12 +230,14 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "lhs" (stLHS st) $ defField "graphics" (stGraphics st) $ defField "book-class" (stBook st) $ - defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ - defField "beamer" (writerBeamer options) $ + defField "beamer" beamer $ (if stHighlighting st - then defField "highlighting-macros" (styleToLaTeX - $ writerHighlightStyle options ) + then case writerHighlightStyle options of + Just sty -> + defField "highlighting-macros" + (styleToLaTeX sty) + Nothing -> id else id) $ (case writerCiteMethod options of Natbib -> defField "biblio-title" biblioTitle . @@ -196,26 +245,26 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ - -- set lang to something so polyglossia/babel is included - defField "lang" (if null docLangs then ""::String else "en") $ - defField "otherlangs" docLangs $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ - defField "dir" (if (null $ query (extract "dir") blocks) - then ""::String - else "ltr") $ + (if null dirs + then id + else defField "dir" ("ltr" :: String)) $ defField "section-titles" True $ defField "geometry" geometryFromMargins $ + (case getField "papersize" metadata of + -- uppercase a4, a5, etc. + Just (('A':d:ds) :: String) + | all isDigit (d:ds) -> resetField "papersize" + (('a':d:ds) :: String) + _ -> id) metadata - let toPolyObj lang = object [ "name" .= T.pack name - , "options" .= T.pack opts ] - where - (name, opts) = toPolyglossia lang - let lang = maybe [] (splitBy (=='-')) $ getField "lang" context - otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context let context' = - defField "babel-lang" (toBabel lang) - $ defField "babel-otherlangs" (map toBabel otherlangs) + -- note: lang is used in some conditionals in the template, + -- so we need to set it if we have any babel/polyglossia: + maybe id (defField "lang" . renderLang) mblang + $ maybe id (defField "babel-lang" . toBabel) mblang + $ defField "babel-otherlangs" (map toBabel docLangs) $ defField "babel-newcommands" (concatMap (\(poly, babel) -> -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that @@ -227,31 +276,30 @@ pandocToLaTeX options (Pandoc meta blocks) = do "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" ++ poly ++ "}{##2}}}\n" - else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{" - ++ babel ++ "}}{\\end{otherlanguage}}\n" + else (if poly == "latin" -- see #4161 + then "\\providecommand{\\textlatin}{}\n\\renewcommand" + else "\\newcommand") ++ "{\\text" ++ poly ++ + "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++ + "\\newenvironment{" ++ poly ++ + "}[2][]{\\begin{otherlanguage}{" ++ + babel ++ "}}{\\end{otherlanguage}}\n" ) -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) -- find polyglossia and babel names of languages used in the document - $ map (\l -> - let lng = splitBy (=='-') l - in (fst $ toPolyglossia lng, toBabel lng) - ) - docLangs ) - $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) - $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of - Just "rtl" -> True - _ -> False) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs + ) + $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang + $ defField "polyglossia-otherlangs" (map toPolyObj docLangs) + $ + defField "latex-dir-rtl" + (getField "dir" context == Just ("rtl" :: String)) context + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' -- | Convert Elements to LaTeX -elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc +elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do modify $ \s -> s{stInHeading = True} @@ -266,18 +314,15 @@ data StringContext = TextString deriving (Eq) -- escape things as needed for LaTeX -stringToLaTeX :: StringContext -> String -> State WriterState String +stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && ctx == TextString + let ligatures = isEnabled Ext_smart opts && ctx == TextString let isUrl = ctx == URLString - when (x == '€') $ - modify $ \st -> st{ stUsesEuro = True } return $ case x of - '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest @@ -311,26 +356,30 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest -toLabel :: String -> State WriterState String +toLabel :: PandocMonad m => String -> LW m String toLabel z = go `fmap` stringToLaTeX URLString z where go [] = "" go (x:xs) | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x ("_-+=:;." :: String) = x:go xs + | x `elem` ("_-+=:;." :: String) = x:go xs | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents -toSlides :: [Block] -> State WriterState [Block] +toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do opts <- gets stOptions let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs - concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') + concat `fmap` mapM (elementToBeamer slideLevel) (hierarchicalize bs') -elementToBeamer :: Int -> Element -> State WriterState [Block] +elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block] +elementToBeamer _slideLevel (Blk (Div attr bs)) = do + -- make sure we support "blocks" inside divs + bs' <- concat `fmap` mapM (elementToBeamer 0) (hierarchicalize bs) + return [Div attr bs'] elementToBeamer _slideLevel (Blk b) = return [b] elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) | lvl > slideLevel = do @@ -340,7 +389,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) : bs ++ [RawBlock "latex" "\\end{block}"] | lvl < slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,kvs) tit) : bs + return $ Header lvl (ident,classes,kvs) tit : bs | otherwise = do -- lvl == slideLevel -- note: [fragile] is required or verbatim breaks let hasCodeBlock (CodeBlock _ _) = [True] @@ -349,19 +398,28 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) hasCode _ = [] let fragile = "fragile" `elem` classes || not (null $ query hasCodeBlock elts ++ query hasCode elts) - let frameoptions = ["allowdisplaybreaks", "allowframebreaks", + let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile", "b", "c", "t", "environment", "label", "plain", "shrink", "standout"] - let optionslist = ["fragile" | fragile] ++ + let optionslist = ["fragile" | fragile && isNothing (lookup "fragile" kvs)] ++ [k | k <- classes, k `elem` frameoptions] ++ [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then "" else "[" ++ intercalate "," optionslist ++ "]" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : - if tit == [Str "\0"] -- marker for hrule - then [] - else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] + let latex = RawInline (Format "latex") + slideTitle <- + if tit == [Str "\0"] -- marker for hrule + then return [] + else + if null ident + then return $ latex "{" : tit ++ [latex "}"] + else do + ref <- toLabel ident + return $ latex ("{%\n\\protect\\hypertarget{" ++ + ref ++ "}{%\n") : tit ++ [latex "}}"] + let slideStart = Para $ + RawInline "latex" ("\\begin{frame}" ++ options) : slideTitle let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts @@ -376,43 +434,89 @@ isListBlock _ = False isLineBreakOrSpace :: Inline -> Bool isLineBreakOrSpace LineBreak = True isLineBreakOrSpace SoftBreak = True -isLineBreakOrSpace Space = True -isLineBreakOrSpace _ = False +isLineBreakOrSpace Space = True +isLineBreakOrSpace _ = False -- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: Block -- ^ Block to convert - -> State WriterState Doc +blockToLaTeX :: PandocMonad m + => Block -- ^ Block to convert + -> LW m Doc blockToLaTeX Null = return empty -blockToLaTeX (Div (identifier,classes,kvs) bs) = do - beamer <- writerBeamer `fmap` gets stOptions - ref <- toLabel identifier - let linkAnchor = if null identifier - then empty - else "\\hypertarget" <> braces (text ref) <> - braces empty - let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir - let wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> id - wrapLang txt = case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if null o - then "" - else brackets $ text o - in inCmd "begin" (text l) <> ops - $$ blankline <> txt <> blankline - $$ inCmd "end" (text l) - Nothing -> txt - wrapNotes txt = if beamer && "notes" `elem` classes +blockToLaTeX (Div (identifier,classes,kvs) bs) + | "incremental" `elem` classes = do + let classes' = filter ("incremental"/=) classes + beamer <- gets stBeamer + if beamer + then do oldIncremental <- gets stIncremental + modify $ \s -> s{ stIncremental = True } + result <- blockToLaTeX $ Div (identifier,classes',kvs) bs + modify $ \s -> s{ stIncremental = oldIncremental } + return result + else blockToLaTeX $ Div (identifier,classes',kvs) bs + | "nonincremental" `elem` classes = do + let classes' = filter ("nonincremental"/=) classes + beamer <- gets stBeamer + if beamer + then do oldIncremental <- gets stIncremental + modify $ \s -> s{ stIncremental = False } + result <- blockToLaTeX $ Div (identifier,classes',kvs) bs + modify $ \s -> s{ stIncremental = oldIncremental } + return result + else blockToLaTeX $ Div (identifier,classes',kvs) bs + | otherwise = do + beamer <- gets stBeamer + linkAnchor' <- hypertarget True identifier empty + -- see #2704 for the motivation for adding \leavevmode: + let linkAnchor = + case bs of + Para _ : _ + | not (isEmpty linkAnchor') + -> "\\leavevmode" <> linkAnchor' <> "%" + _ -> linkAnchor' + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs + let wrapColumns = if "columns" `elem` classes + then \contents -> + inCmd "begin" "columns" <> brackets "T" + $$ contents + $$ inCmd "end" "columns" + else id + wrapColumn = if "column" `elem` classes + then \contents -> + let fromPct xs = + case reverse xs of + '%':ds -> '0':'.': reverse ds + _ -> xs + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> + braces (text w <> "\\textwidth") + $$ contents + $$ inCmd "end" "column" + else id + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lang of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o + then "" + else brackets $ text o + in inCmd "begin" (text l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (text l) + Nothing -> txt + wrapNotes txt = if beamer && "notes" `elem` classes then "\\note" <> braces txt -- speaker notes else linkAnchor $$ txt - fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs + (wrapColumns . wrapColumn . wrapDir . wrapLang . wrapNotes) + <$> blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote + inMinipage <- gets stInMinipage modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt notes <- gets stNotes @@ -426,25 +530,26 @@ blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = d let footnotes = notesToLaTeX notes lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab - figure <- hypertarget ident (cr <> - "\\begin{figure}" $$ "\\centering" $$ img $$ - caption $$ "\\end{figure}" <> cr) - return $ if inNote - -- can't have figures in notes + innards <- hypertarget True ident $ + "\\centering" $$ img $$ caption <> cr + let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" + return $ if inNote || inMinipage + -- can't have figures in notes or minipage (here, table cell) + -- http://www.tex.ac.uk/FAQ-ouparmd.html then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" else figure $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer if beamer then blockToLaTeX (RawBlock "latex" "\\pause") else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."] blockToLaTeX (Para lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -blockToLaTeX (LineBlock lns) = do +blockToLaTeX (LineBlock lns) = blockToLaTeX $ linesToPara lns blockToLaTeX (BlockQuote lst) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer case lst of [b] | beamer && isListBlock b -> do oldIncremental <- gets stIncremental @@ -460,11 +565,11 @@ blockToLaTeX (BlockQuote lst) = do return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions - ref <- toLabel identifier - let linkAnchor = if null identifier + lab <- labelFor identifier + linkAnchor' <- hypertarget True identifier lab + let linkAnchor = if isEmpty linkAnchor' then empty - else "\\hypertarget" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) + else linkAnchor' <> "%" let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ @@ -479,6 +584,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do text str $$ text ("\\end{" ++ env ++ "}")) <> cr let listingsCodeBlock = do st <- get + ref <- toLabel identifier let params = if writerListings (stOptions st) then (case getListingsLanguage classes of Just l -> [ "language=" ++ mbBraced l ] @@ -495,9 +601,6 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do else [ "label=" ++ ref ]) else [] - mbBraced x = if not (all isAlphaNum x) - then "{" <> x <> "}" - else x printParams | null params = empty | otherwise = brackets $ hcat (intersperse ", " @@ -505,24 +608,34 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ "\\end{lstlisting}") $$ cr let highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ linkAnchor $$ text (T.unpack h)) + case highlight (writerSyntaxMap opts) + formatLaTeXBlock ("",classes,keyvalAttr) str of + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + rawCodeBlock + Right h -> do + st <- get + when (stInNote st) $ modify (\s -> s{ stVerbInNote = True }) + modify (\s -> s{ stHighlighting = True }) + return (flush $ linkAnchor $$ text (T.unpack h)) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && - "literate" `elem` classes -> lhsCodeBlock - | writerListings opts -> listingsCodeBlock - | writerHighlight opts && not (null classes) -> highlightedCodeBlock - | otherwise -> rawCodeBlock -blockToLaTeX (RawBlock f x) + "literate" `elem` classes -> lhsCodeBlock + | writerListings opts -> listingsCodeBlock + | not (null classes) && isJust (writerHighlightStyle opts) + -> highlightedCodeBlock + | otherwise -> rawCodeBlock +blockToLaTeX b@(RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer let inc = if beamer && incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst let spacing = if isTightList lst @@ -538,6 +651,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do put $ st {stOLLevel = oldlevel + 1} items <- mapM listItemToLaTeX lst modify (\s -> s {stOLLevel = oldlevel}) + let beamer = stBeamer st let tostyle x = case numstyle of Decimal -> "\\arabic" <> braces x UpperRoman -> "\\Roman" <> braces x @@ -547,15 +661,24 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do Example -> "\\arabic" <> braces x DefaultStyle -> "\\arabic" <> braces x let todelim x = case numdelim of - OneParen -> x <> ")" - TwoParens -> parens x - Period -> x <> "." - _ -> x <> "." + OneParen -> x <> ")" + TwoParens -> parens x + Period -> x <> "." + _ -> x <> "." + let exemplar = case numstyle of + Decimal -> "1" + UpperRoman -> "I" + LowerRoman -> "i" + UpperAlpha -> "A" + LowerAlpha -> "a" + Example -> "1" + DefaultStyle -> "1" let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel) - let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim - then empty - else "\\def" <> "\\label" <> enum <> - braces (todelim $ tostyle enum) + let stylecommand + | numstyle == DefaultStyle && numdelim == DefaultDelim = empty + | beamer = brackets (todelim exemplar) + | otherwise = "\\def" <> "\\label" <> enum <> + braces (todelim $ tostyle enum) let resetcounter = if start == 1 || oldlevel > 4 then empty else "\\setcounter" <> braces enum <> @@ -579,7 +702,8 @@ blockToLaTeX (DefinitionList lst) = do else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ "\\end{description}" -blockToLaTeX HorizontalRule = return $ +blockToLaTeX HorizontalRule = + return "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = True} @@ -587,33 +711,34 @@ blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = False} return hdr blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else do - contents <- (tableRowToLaTeX True aligns widths) heads - return ("\\toprule" $$ contents $$ "\\midrule") - let endhead = if all null heads - then empty - else text "\\endhead" - let endfirsthead = if all null heads - then empty - else text "\\endfirsthead" + let toHeaders hs = do contents <- tableRowToLaTeX True aligns widths hs + return ("\\toprule" $$ contents $$ "\\midrule") + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x captionText <- inlineListToLaTeX caption + firsthead <- if isEmpty captionText || all null heads + then return empty + else ($$ text "\\endfirsthead") <$> toHeaders heads + head' <- if all null heads + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else toHeaders (if isEmpty firsthead + then heads + else walk removeNote heads) let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText <> "\\tabularnewline" - $$ headers - $$ endfirsthead + else text "\\caption" <> + braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let colDescriptors = text $ concat $ map toColDescriptor aligns + let colDescriptors = text $ concatMap toColDescriptor aligns modify $ \s -> s{ stTable = True } return $ "\\begin{longtable}[]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt - $$ (if all null heads then "\\toprule" else empty) - $$ headers - $$ endhead + $$ firsthead + $$ head' + $$ "\\endhead" $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" @@ -626,14 +751,16 @@ toColDescriptor align = AlignCenter -> "c" AlignDefault -> "l" -blockListToLaTeX :: [Block] -> State WriterState Doc -blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst +blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc +blockListToLaTeX lst = + vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst -tableRowToLaTeX :: Bool +tableRowToLaTeX :: PandocMonad m + => Bool -> [Alignment] -> [Double] -> [[Block]] - -> State WriterState Doc + -> LW m Doc tableRowToLaTeX header aligns widths cols = do -- scale factor compensates for extra space between columns -- so the whole table isn't larger than columnwidth @@ -643,9 +770,9 @@ tableRowToLaTeX header aligns widths cols = do isSimple [] = True isSimple _ = False -- simple tables have to have simple cells: - let widths' = if not (all isSimple cols) + let widths' = if all (== 0) widths && not (all isSimple cols) then replicate (length aligns) - (0.97 / fromIntegral (length aligns)) + (scaleFactor / fromIntegral (length aligns)) else map (scaleFactor *) widths cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols return $ hsep (intersperse "&" cells) <> "\\tabularnewline" @@ -672,10 +799,10 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of -- math breaks in simple tables. displayMathToInline :: Inline -> Inline displayMathToInline (Math DisplayMath x) = Math InlineMath x -displayMathToInline x = x +displayMathToInline x = x -tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) - -> State WriterState Doc +tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block]) + -> LW m Doc tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks tableCellToLaTeX header (width, align, blocks) = do @@ -691,7 +818,7 @@ tableCellToLaTeX header (width, align, blocks) = do AlignDefault -> "\\raggedright" return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <> + (halign <> cr <> cellContents <> "\\strut" <> cr) <> "\\end{minipage}") $$ notesToLaTeX notes @@ -708,19 +835,22 @@ notesToLaTeX ns = (case length ns of $ map (\x -> "\\footnotetext" <> braces x) $ reverse ns) -listItemToLaTeX :: [Block] -> State WriterState Doc +listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc listItemToLaTeX lst -- we need to put some text before a header if it's the first -- element in an item. This will look ugly in LaTeX regardless, but -- this will keep the typesetter from throwing an error. - | ((Header _ _ _) :_) <- lst = - blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2) + | (Header{} :_) <- lst = + blockListToLaTeX lst >>= return . (text "\\item ~" $$) . nest 2 | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) . - (nest 2) + nest 2 -defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc +defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc defListItemToLaTeX (term, defs) = do + -- needed to turn off 'listings' because it breaks inside \item[...]: + modify $ \s -> s{stInItem = True} term' <- inlineListToLaTeX term + modify $ \s -> s{stInItem = False} -- put braces around term if it contains an internal link, -- since otherwise we get bad bracket interactions: \item[\hyperref[..] let isInternalLink (Link _ _ ('#':_,_)) = True @@ -730,32 +860,33 @@ defListItemToLaTeX (term, defs) = do else term' def' <- liftM vsep $ mapM blockListToLaTeX defs return $ case defs of - (((Header _ _ _) : _) : _) -> + ((Header{} : _) : _) -> "\\item" <> brackets term'' <> " ~ " $$ def' _ -> "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. -sectionHeader :: Bool -- True for unnumbered +sectionHeader :: PandocMonad m + => Bool -- True for unnumbered -> [Char] -> Int -> [Inline] - -> State WriterState Doc + -> LW m Doc sectionHeader unnumbered ident level lst = do txt <- inlineListToLaTeX lst plain <- stringToLaTeX TextString $ concatMap stringify lst - let removeInvalidInline (Note _) = [] + let removeInvalidInline (Note _) = [] removeInvalidInline (Span (id', _, _) _) | not (null id') = [] - removeInvalidInline (Image _ _ _) = [] - removeInvalidInline x = [x] + removeInvalidInline Image{} = [] + removeInvalidInline x = [x] let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst txtNoNotes <- inlineListToLaTeX lstNoNotes -- footnotes in sections don't work (except for starred variants) -- unless you specify an optional argument: -- \section[mysec]{mysec\footnote{blah}} - optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == [] + optional <- if unnumbered || lstNoNotes == lst || null lstNoNotes then return empty - else do + else return $ brackets txtNoNotes let contents = if render Nothing txt == plain then braces txt @@ -767,7 +898,8 @@ sectionHeader unnumbered ident level lst = do let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault then TopLevelChapter else writerTopLevelDivision opts - let level' = if writerBeamer opts && + beamer <- gets stBeamer + let level' = if beamer && topLevelDivision `elem` [TopLevelPart, TopLevelChapter] -- beamer has parts but no chapters then if level == 1 then -1 else level - 1 @@ -794,7 +926,8 @@ sectionHeader unnumbered ident level lst = do lab <- labelFor ident let star = if unnumbered && level' < 4 then text "*" else empty let stuffing = star <> optional <> contents - stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab + stuffing' <- hypertarget True ident $ + text ('\\':sectionType) <> stuffing <> lab return $ if level' > 5 then txt else prefix $$ stuffing' @@ -804,28 +937,28 @@ sectionHeader unnumbered ident level lst = do braces txtNoNotes else empty -hypertarget :: String -> Doc -> State WriterState Doc -hypertarget ident x = do +hypertarget :: PandocMonad m => Bool -> String -> Doc -> LW m Doc +hypertarget _ "" x = return x +hypertarget addnewline ident x = do ref <- text `fmap` toLabel ident - internalLinks <- gets stInternalLinks - return $ - if ident `elem` internalLinks - then text "\\hypertarget" + return $ text "\\hypertarget" <> braces ref - <> braces x - else x + <> braces ((if addnewline && not (isEmpty x) + then ("%" <> cr) + else empty) <> x) -labelFor :: String -> State WriterState Doc +labelFor :: PandocMonad m => String -> LW m Doc labelFor "" = return empty labelFor ident = do ref <- text `fmap` toLabel ident return $ text "\\label" <> braces ref -- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToLaTeX :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> LW m Doc inlineListToLaTeX lst = - mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) + mapM inlineToLaTeX (fixLineInitialSpaces lst) >>= return . hcat -- nonbreaking spaces (~) in LaTeX don't work after line breaks, -- so we turn nbsps after hard breaks to \hspace commands. @@ -837,43 +970,35 @@ inlineListToLaTeX lst = fixNbsps s = let (ys,zs) = span (=='\160') s in replicate (length ys) hspace ++ [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" - -- linebreaks after blank lines cause problems: - fixBreaks [] = [] - fixBreaks ys@(LineBreak : LineBreak : _) = - case span (== LineBreak) ys of - (lbs, rest) -> RawInline "latex" - ("\\\\[" ++ show (length lbs) ++ - "\\baselineskip]") : fixBreaks rest - fixBreaks (y:ys) = y : fixBreaks ys isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True -isQuoted _ = False +isQuoted _ = False -- | Convert inline element to LaTeX -inlineToLaTeX :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToLaTeX :: PandocMonad m + => Inline -- ^ Inline to convert + -> LW m Doc inlineToLaTeX (Span (id',classes,kvs) ils) = do - ref <- toLabel id' - let linkAnchor = if null id' - then empty - else "\\protect\\hypertarget" <> braces (text ref) <> - braces empty + linkAnchor <- hypertarget False id' empty + lang <- toLang $ lookup "lang" kvs let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ ["textnormal" | "csl-no-strong" `elem` classes || "csl-no-smallcaps" `elem` classes] ++ ["RL" | ("dir", "rtl") `elem` kvs] ++ ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + (case lang of + Just lng -> let (l, o) = toPolyglossia lng ops = if null o then "" else ("[" ++ o ++ "]") in ["text" ++ l ++ ops] Nothing -> []) contents <- inlineListToLaTeX ils - return $ linkAnchor <> - if null cmds - then braces contents - else foldr inCmd contents cmds + return $ (if null id' + then empty + else "\\protect" <> linkAnchor) <> + (if null cmds + then braces contents + else foldr inCmd contents cmds) inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -886,7 +1011,7 @@ inlineToLaTeX (Strikeout lst) = do return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = inlineListToLaTeX lst >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do +inlineToLaTeX (Subscript lst) = inlineListToLaTeX lst >>= return . inCmd "textsubscript" inlineToLaTeX (SmallCaps lst) = inlineListToLaTeX lst >>= return . inCmd "textsc" @@ -901,26 +1026,39 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions inHeading <- gets stInHeading + inItem <- gets stInItem + let listingsCode = do + let listingsopt = case getListingsLanguage classes of + Just l -> "[language=" ++ mbBraced l ++ "]" + Nothing -> "" + inNote <- gets stInNote + when inNote $ modify $ \s -> s{ stVerbInNote = True } + let chr = case "!\"&'()*,-./:;?@_" \\ str of + (c:_) -> c + [] -> '!' + let str' = escapeStringUsing (backslashEscapes "\\{}%~_&") str + -- we always put lstinline in a dummy 'passthrough' command + -- (defined in the default template) so that we don't have + -- to change the way we escape characters depending on whether + -- the lstinline is inside another command. See #1629: + return $ text $ "\\passthrough{\\lstinline" ++ listingsopt ++ [chr] ++ str' ++ [chr] ++ "}" + let rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) + $ stringToLaTeX CodeString str + where escapeSpaces = concatMap + (\c -> if c == ' ' then "\\ " else [c]) + let highlightCode = + case highlight (writerSyntaxMap opts) + formatLaTeXInline ("",classes,[]) str of + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + rawCode + Right h -> modify (\st -> st{ stHighlighting = True }) >> + return (text (T.unpack h)) case () of - _ | writerListings opts && not inHeading -> listingsCode - | writerHighlight opts && not (null classes) -> highlightCode + _ | writerListings opts && not (inHeading || inItem) -> listingsCode + | isJust (writerHighlightStyle opts) && not (null classes) + -> highlightCode | otherwise -> rawCode - where listingsCode = do - inNote <- gets stInNote - when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = case "!\"&'()*,-./:;?@_" \\ str of - (c:_) -> c - [] -> '!' - return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] - highlightCode = do - case highlight formatLaTeXInline ("",classes,[]) str of - Nothing -> rawCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (text (T.unpack h)) - rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) - $ stringToLaTeX CodeString str - where - escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -928,32 +1066,43 @@ inlineToLaTeX (Quoted qt lst) = do if csquotes then return $ "\\enquote" <> braces contents else do - let s1 = if (not (null lst)) && (isQuoted (head lst)) + let s1 = if not (null lst) && isQuoted (head lst) then "\\," else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) + let s2 = if not (null lst) && isQuoted (last lst) then "\\," else empty let inner = s1 <> contents <> s2 return $ case qt of DoubleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then text "``" <> inner <> text "''" else char '\x201C' <> inner <> char '\x201D' SingleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' -inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str -inlineToLaTeX (Math InlineMath str) = +inlineToLaTeX (Str str) = do + setEmptyLine False + liftM text $ stringToLaTeX TextString str +inlineToLaTeX (Math InlineMath str) = do + setEmptyLine False return $ "\\(" <> text str <> "\\)" -inlineToLaTeX (Math DisplayMath str) = +inlineToLaTeX (Math DisplayMath str) = do + setEmptyLine False return $ "\\[" <> text str <> "\\]" -inlineToLaTeX (RawInline f str) +inlineToLaTeX il@(RawInline f str) | f == Format "latex" || f == Format "tex" - = return $ text str - | otherwise = return empty -inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr + = do + setEmptyLine False + return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToLaTeX LineBreak = do + emptyLine <- gets stEmptyLine + setEmptyLine True + return $ (if emptyLine then "~" else "") <> "\\\\" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of @@ -982,19 +1131,32 @@ inlineToLaTeX (Link _ txt (src, _)) = src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' +inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do + report $ InlineNotRendered il + return empty inlineToLaTeX (Image attr _ (source, _)) = do + setEmptyLine False modify $ \s -> s{ stGraphics = True } opts <- gets stOptions let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of + in case dimension dir attr of Just (Pixel a) -> [d <> text (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] + [d <> text (showFl (a / 100)) <> + case dir of + Width -> "\\textwidth" + Height -> "\\textheight" + ] Just dim -> [d <> text (show dim)] Nothing -> - [] + case dir of + Width | isJust (dimension Height attr) -> + [d <> "\\textwidth"] + Height | isJust (dimension Width attr) -> + [d <> "\\textheight"] + _ -> [] dimList = showDim Width ++ showDim Height dims = if null dimList then empty @@ -1008,6 +1170,7 @@ inlineToLaTeX (Image attr _ (source, _)) = do (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> dims <> braces (text source'') inlineToLaTeX (Note contents) = do + setEmptyLine False inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents @@ -1016,9 +1179,9 @@ inlineToLaTeX (Note contents) = do (CodeBlock _ _ : _) -> cr _ -> empty let noteContents = nest 2 contents' <> optnl - opts <- gets stOptions + beamer <- gets stBeamer -- in beamer slides, display footnote from current overlay forward - let beamerMark = if writerBeamer opts + let beamerMark = if beamer then text "<.->" else empty modify $ \st -> st{ stNotes = noteContents : stNotes st } @@ -1035,8 +1198,12 @@ protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs where ltx = RawInline (Format "latex") protectCode (x : xs) = x : protectCode xs -citationsToNatbib :: [Citation] -> State WriterState Doc -citationsToNatbib (one:[]) +setEmptyLine :: PandocMonad m => Bool -> LW m () +setEmptyLine b = modify $ \st -> st{ stEmptyLine = b } + +citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc +citationsToNatbib + [one] = citeCommand c p s k where Citation { citationId = k @@ -1046,8 +1213,8 @@ citationsToNatbib (one:[]) } = one c = case m of - AuthorInText -> "citet" - SuppressAuthor -> "citeyearpar" + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" NormalCitation -> "citep" citationsToNatbib cits @@ -1056,9 +1223,11 @@ citationsToNatbib cits where noPrefix = all (null . citationPrefix) noSuffix = all (null . citationSuffix) - ismode m = all (((==) m) . citationMode) - p = citationPrefix $ head $ cits - s = citationSuffix $ last $ cits + ismode m = all ((==) m . citationMode) + p = citationPrefix $ + head cits + s = citationSuffix $ + last cits ks = intercalate ", " $ map citationId cits citationsToNatbib (c:cs) | citationMode c == AuthorInText = do @@ -1082,17 +1251,20 @@ citationsToNatbib cits = do SuppressAuthor -> citeCommand "citeyear" p s k NormalCitation -> citeCommand "citealp" p s k -citeCommand :: String -> [Inline] -> [Inline] -> String -> State WriterState Doc +citeCommand :: PandocMonad m + => String -> [Inline] -> [Inline] -> String -> LW m Doc citeCommand c p s k = do args <- citeArguments p s k return $ text ("\\" ++ c) <> args -citeArguments :: [Inline] -> [Inline] -> String -> State WriterState Doc +citeArguments :: PandocMonad m + => [Inline] -> [Inline] -> String -> LW m Doc citeArguments p s k = do let s' = case s of - (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r + (Str + [x] : r) | isPunctuation x -> dropWhile (== Space) r (Str (x:xs) : r) | isPunctuation x -> Str xs : r - _ -> s + _ -> s pdoc <- inlineListToLaTeX p sdoc <- inlineListToLaTeX s' let optargs = case (isEmpty pdoc, isEmpty sdoc) of @@ -1101,8 +1273,9 @@ citeArguments p s k = do (_ , _ ) -> brackets pdoc <> brackets sdoc return $ optargs <> braces (text k) -citationsToBiblatex :: [Citation] -> State WriterState Doc -citationsToBiblatex (one:[]) +citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc +citationsToBiblatex + [one] = citeCommand cmd p s k where Citation { citationId = k @@ -1133,15 +1306,20 @@ citationsToBiblatex _ = return empty -- Determine listings language from list of class attributes. getListingsLanguage :: [String] -> Maybe String -getListingsLanguage [] = Nothing -getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: String -> String +mbBraced x = if not (all isAlphaNum x) + then "{" <> x <> "}" + else x -- Extract a key from divs and spans extract :: String -> Block -> [String] extract key (Div attr _) = lookKey key attr -extract key (Plain ils) = concatMap (extractInline key) ils -extract key (Para ils) = concatMap (extractInline key) ils -extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract key (Plain ils) = query (extractInline key) ils +extract key (Para ils) = query (extractInline key) ils +extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans @@ -1154,85 +1332,95 @@ lookKey :: String -> Attr -> [String] lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv :: Lang -> (String, String) toPolyglossiaEnv l = - case toPolyglossia $ (splitBy (=='-')) l of + case toPolyglossia l of ("arabic", o) -> ("Arabic", o) x -> x -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: [String] -> (String, String) -toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") -toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") -toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") -toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") -toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") -toPolyglossia ("de":"1901":_) = ("german", "spelling=old") -toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") -toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") -toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") -toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") -toPolyglossia ("de":_) = ("german", "") -toPolyglossia ("dsb":_) = ("lsorbian", "") -toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") -toPolyglossia ("en":"AU":_) = ("english", "variant=australian") -toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") -toPolyglossia ("en":"GB":_) = ("english", "variant=british") -toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") -toPolyglossia ("en":"UK":_) = ("english", "variant=british") -toPolyglossia ("en":"US":_) = ("english", "variant=american") -toPolyglossia ("grc":_) = ("greek", "variant=ancient") -toPolyglossia ("hsb":_) = ("usorbian", "") -toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") -toPolyglossia ("sl":_) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia :: Lang -> (String, String) +toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ "AT" vars) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ "CH" vars) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: [String] -> String -toBabel ("de":"1901":_) = "german" -toBabel ("de":"AT":"1901":_) = "austrian" -toBabel ("de":"AT":_) = "naustrian" -toBabel ("de":"CH":"1901":_) = "swissgerman" -toBabel ("de":"CH":_) = "nswissgerman" -toBabel ("de":_) = "ngerman" -toBabel ("dsb":_) = "lowersorbian" -toBabel ("el":"polyton":_) = "polutonikogreek" -toBabel ("en":"AU":_) = "australian" -toBabel ("en":"CA":_) = "canadian" -toBabel ("en":"GB":_) = "british" -toBabel ("en":"NZ":_) = "newzealand" -toBabel ("en":"UK":_) = "british" -toBabel ("en":"US":_) = "american" -toBabel ("fr":"CA":_) = "canadien" -toBabel ("fra":"aca":_) = "acadian" -toBabel ("grc":_) = "polutonikogreek" -toBabel ("hsb":_) = "uppersorbian" -toBabel ("la":"x":"classic":_) = "classiclatin" -toBabel ("sl":_) = "slovene" -toBabel x = commonFromBcp47 x +toBabel :: Lang -> String +toBabel (Lang "de" _ "AT" vars) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ "CH" vars) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ "AU" _) = "australian" +toBabel (Lang "en" _ "CA" _) = "canadian" +toBabel (Lang "en" _ "GB" _) = "british" +toBabel (Lang "en" _ "NZ" _) = "newzealand" +toBabel (Lang "en" _ "UK" _) = "british" +toBabel (Lang "en" _ "US" _) = "american" +toBabel (Lang "fr" _ "CA" _) = "canadien" +toBabel (Lang "fra" _ _ vars) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "sl" _ _ _) = "slovene" +toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: [String] -> String -commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazil" +commonFromBcp47 :: Lang -> String +commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. -commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" -commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" -commonFromBcp47 x = fromIso $ head x +commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" +commonFromBcp47 (Lang "zh" "Latn" _ vars) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" @@ -1316,10 +1504,6 @@ commonFromBcp47 x = fromIso $ head x fromIso "vi" = "vietnamese" fromIso _ = "" -deNote :: Inline -> Inline -deNote (Note _) = RawInline (Format "latex") "" -deNote x = x - pDocumentOptions :: P.Parsec String () [String] pDocumentOptions = do P.char '[' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 98b08b08b..1be955fe3 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,79 +30,95 @@ Conversion of 'Pandoc' documents to groff man page format. -} module Text.Pandoc.Writers.Man ( writeMan) where +import Control.Monad.State.Strict +import Data.List (intercalate, intersperse, sort, stripPrefix) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder (deleteMeta) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath -import Text.Printf ( printf ) -import Data.List ( stripPrefix, intersperse, intercalate ) -import Data.Maybe (fromMaybe) import Text.Pandoc.Pretty -import Text.Pandoc.Builder (deleteMeta) -import Control.Monad.State +import Text.Pandoc.Shared +import Text.Pandoc.Templates +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) type Notes = [[Block]] -data WriterState = WriterState { stNotes :: Notes - , stHasTables :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stFontFeatures :: Map.Map Char Bool + , stHasTables :: Bool } + +defaultWriterState :: WriterState +defaultWriterState = WriterState { stNotes = [] + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + , stHasTables = False } -- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) +writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeMan opts document = + evalStateT (pandocToMan opts document) defaultWriterState -- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState String +pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToMan opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth titleText <- inlineListToMan opts $ docTitle meta let title' = render' titleText let setFieldsFromTitle = - case break (== ' ') title' of - (cmdName, rest) -> case break (=='(') cmdName of - (xs, '(':ys) | not (null ys) && - last ys == ')' -> + case T.break (== ' ') title' of + (cmdName, rest) -> case T.break (=='(') cmdName of + (xs, ys) | "(" `T.isPrefixOf` ys + && ")" `T.isSuffixOf` ys -> defField "title" xs . - defField "section" (init ys) . - case splitBy (=='|') rest of + defField "section" (T.init $ T.drop 1 ys) . + case T.splitOn "|" rest of (ft:hds) -> - defField "footer" (trim ft) . + defField "footer" (T.strip ft) . defField "header" - (trim $ concat hds) + (T.strip $ mconcat hds) [] -> id _ -> defField "title" title' metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMan opts) - (fmap (render colwidth) . inlineListToMan opts) + (fmap render' . blockListToMan opts) + (fmap render' . inlineListToMan opts) $ deleteMeta "title" meta body <- blockListToMan opts blocks - notes <- liftM stNotes get + notes <- gets stNotes notes' <- notesToMan opts (reverse notes) let main = render' $ body $$ notes' $$ text "" - hasTables <- liftM stHasTables get + hasTables <- gets stHasTables let context = defField "body" main $ setFieldsFromTitle $ defField "has-tables" hasTables $ defField "hyphenate" True - $ defField "pandoc-version" pandocVersion - $ metadata + $ defField "pandoc-version" pandocVersion metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToMan opts notes = if null notes then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + else zipWithM (noteToMan opts) [1..] notes >>= return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc noteToMan opts num note = do contents <- blockListToMan opts note let marker = cr <> text ".SS " <> brackets (text (show num)) @@ -123,11 +140,11 @@ escapeString = escapeStringUsing manEscapes -- | Escape a literal (code) section for Man. escapeCode :: String -> String -escapeCode = concat . intersperse "\n" . map escapeLine . lines where +escapeCode = intercalate "\n" . map escapeLine . lines where escapeLine codeline = case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of a@('.':_) -> "\\&" ++ a - b -> b + b -> b -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -139,8 +156,8 @@ breakSentence [] = ([],[]) breakSentence xs = let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline (LineBreak) = True - isSentenceEndInline _ = False + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False (as, bs) = break isSentenceEndInline xs in case bs of [] -> (as, []) @@ -160,9 +177,10 @@ splitSentences xs = in if null rest then [sent] else sent : splitSentences rest -- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToMan _ Null = return empty blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = @@ -173,9 +191,11 @@ blockToMan opts (Para inlines) = do return $ text ".PP" $$ contents blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns -blockToMan _ (RawBlock f str) +blockToMan _ b@(RawBlock f str) | f == Format "man" = return $ text str - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines @@ -205,12 +225,12 @@ blockToMan opts (Table caption alignments widths headers rows) = then repeat "" else map (printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ intercalate " " + let coldescriptions = text $ unwords (zipWith (\align width -> aligncode align ++ width) alignments iwidths) ++ "." colheadings <- mapM (blockListToMan opts) headers let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ + vcat (intersperse (text "T}@T{") cols) $$ text "T}" let colheadings' = if all null headers then empty @@ -227,7 +247,8 @@ blockToMan opts (BulletList items) = do return (vcat contents) blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + (maximum $ map length markers) + let indent = 1 + + maximum (map length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) @@ -236,11 +257,11 @@ blockToMan opts (DefinitionList items) = do return (vcat contents) -- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = - bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do +bulletListItemToMan opts (Para first:rest) = + bulletListItemToMan opts (Plain first:rest) +bulletListItemToMan opts (Plain first:rest) = do first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest let first'' = text ".IP \\[bu] 2" $$ first' @@ -254,14 +275,15 @@ bulletListItemToMan opts (first:rest) = do return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" -- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToMan :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = - orderedListItemToMan opts num indent ((Plain first):rest) +orderedListItemToMan opts num indent (Para first:rest) = + orderedListItemToMan opts num indent (Plain first:rest) orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest @@ -273,44 +295,47 @@ orderedListItemToMan opts num indent (first:rest) = do return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToMan :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToMan opts (label, defs) = do labelText <- inlineListToMan opts label contents <- if null defs then return empty - else liftM vcat $ forM defs $ \blocks -> do - let (first, rest) = case blocks of - ((Para x):y) -> (Plain x,y) - (x:y) -> (x,y) - [] -> error "blocks is null" - rest' <- liftM vcat $ - mapM (\item -> blockToMan opts item) rest - first' <- blockToMan opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + else liftM vcat $ forM defs $ \blocks -> + case blocks of + (x:xs) -> do + first' <- blockToMan opts $ + case x of + Para y -> Plain y + _ -> x + rest' <- liftM vcat $ mapM + (\item -> blockToMan opts item) xs + return $ first' $$ + text ".RS" $$ rest' $$ text ".RE" + [] -> return empty return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToMan opts blocks = mapM (blockToMan opts) blocks >>= (return . vcat) -- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc inlineToMan opts (Span _ ils) = inlineListToMan opts ils -inlineToMan opts (Emph lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[B]" <> contents <> text "\\f[]" +inlineToMan opts (Emph lst) = + withFontFeature 'I' (inlineListToMan opts lst) +inlineToMan opts (Strong lst) = + withFontFeature 'B' (inlineListToMan opts lst) inlineToMan opts (Strikeout lst) = do contents <- inlineListToMan opts lst return $ text "[STRIKEOUT:" <> contents <> char ']' @@ -330,22 +355,26 @@ inlineToMan opts (Quoted DoubleQuote lst) = do inlineToMan opts (Cite _ lst) = inlineListToMan opts lst inlineToMan _ (Code _ str) = - return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" + withFontFeature 'C' (return (text $ escapeCode str)) inlineToMan _ (Str str@('.':_)) = return $ afterBreak "\\&" <> text (escapeString str) inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = - inlineListToMan opts $ texMathToInlines InlineMath str + lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ texMathToInlines DisplayMath str + contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (RawInline f str) +inlineToMan _ il@(RawInline f str) | f == Format "man" = return $ text str - | otherwise = return empty -inlineToMan _ (LineBreak) = return $ + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToMan _ LineBreak = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space +inlineToMan opts (Link _ txt ('#':_, _)) = + inlineListToMan opts txt -- skip internal links inlineToMan opts (Link _ txt (src, _)) = do linktext <- inlineListToMan opts txt let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) @@ -355,7 +384,7 @@ inlineToMan opts (Link _ txt (src, _)) = do char '<' <> text srcSuffix <> char '>' _ -> linktext <> text " (" <> text src <> char ')' inlineToMan opts (Image attr alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || + let txt = if null alternate || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate @@ -364,6 +393,24 @@ inlineToMan opts (Image attr alternate (source, tit)) = do inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } - notes <- liftM stNotes get - let ref = show $ (length notes) + notes <- gets stNotes + let ref = show (length notes) return $ char '[' <> text ref <> char ']' + +fontChange :: PandocMonad m => StateT WriterState m Doc +fontChange = do + features <- gets stFontFeatures + let filling = sort [c | (c,True) <- Map.toList features] + return $ text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m + => Char + -> StateT WriterState m Doc + -> StateT WriterState m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index e3bb3eea0..cdd8f3b66 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,47 +34,50 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) -import Data.Maybe (fromMaybe) -import Data.Monoid (Any(..)) -import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace, isPunctuation, ord, chr ) -import Data.Ord ( comparing ) -import Text.Pandoc.Pretty import Control.Monad.Reader -import Control.Monad.State -import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) -import Network.URI (isURI) +import Control.Monad.State.Strict +import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum) import Data.Default -import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H -import qualified Data.Vector as V -import qualified Data.Text as T +import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) +import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import Data.Monoid (Any (..)) +import Data.Ord (comparing) import qualified Data.Set as Set -import Network.HTTP ( urlEncode ) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Vector as V +import Data.Yaml (Value (Array, Bool, Number, Object, String)) +import Network.HTTP (urlEncode) +import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared type Notes = [[Block]] -type Ref = ([Inline], Target, Attr) +type Ref = (Doc, Target, Attr) type Refs = [Ref] -type MD = ReaderT WriterEnv (State WriterState) +type MD m = ReaderT WriterEnv (StateT WriterState m) -evalMD :: MD a -> WriterEnv -> WriterState -> a -evalMD md env st = evalState (runReaderT md env) st +evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a +evalMD md env st = evalStateT (runReaderT md env) st -data WriterEnv = WriterEnv { envInList :: Bool - , envPlain :: Bool +data WriterEnv = WriterEnv { envInList :: Bool + , envPlain :: Bool , envRefShortcutable :: Bool , envBlockLevel :: Int - , envEscapeSpaces :: Bool + , envEscapeSpaces :: Bool } instance Default WriterEnv @@ -82,21 +88,26 @@ instance Default WriterEnv , envEscapeSpaces = False } -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: Set.Set String +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int + , stIds :: Set.Set String , stNoteNum :: Int } instance Default WriterState where def = WriterState{ stNotes = [] , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 , stIds = Set.empty , stNoteNum = 1 } -- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMarkdown opts document = evalMD (pandocToMarkdown opts{ writerWrapText = if isEnabled Ext_hard_line_breaks opts @@ -106,7 +117,7 @@ writeMarkdown opts document = -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). -writePlain :: WriterOptions -> Pandoc -> String +writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text writePlain opts document = evalMD (pandocToMarkdown opts document) def{ envPlain = True } def @@ -155,8 +166,7 @@ jsonToYaml (Object hashmap) = | otherwise -> (k' <> ":") $$ x (k', Object _, x) -> (k' <> ":") $$ nest 2 x (_, String "", _) -> empty - (k', _, x) | k == "meta-json" -> empty - | otherwise -> k' <> ":" <> space <> hang 2 "" x) + (k', _, x) -> k' <> ":" <> space <> hang 2 "" x) $ sortBy (comparing fst) $ H.toList hashmap jsonToYaml (Array vec) = vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec @@ -171,15 +181,17 @@ jsonToYaml (Number n) = text $ show n jsonToYaml _ = empty -- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> MD String +pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text pandocToMarkdown opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing isPlain <- asks envPlain - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMarkdown opts) - (fmap (render colwidth) . inlineListToMarkdown opts) + let render' :: Doc -> Text + render' = render colwidth . chomp + metadata <- metaToJSON' + (fmap render' . blockListToMarkdown opts) + (fmap render' . blockToMarkdown opts . Plain) meta let title' = maybe empty text $ getField "title" metadata let authors' = maybe [] (map text) $ getField "author" metadata @@ -196,49 +208,51 @@ pandocToMarkdown opts (Pandoc meta blocks) = do | otherwise -> empty Nothing -> empty let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks - else empty + toc <- if writerTableOfContents opts + then render' <$> tableOfContents opts headerBlocks + else return "" -- Strip off final 'references' header if markdown citations enabled let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks + _ -> blocks else blocks body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts - let render' :: Doc -> String - render' = render colwidth let main = render' $ body <> notesAndRefs' - let context = defField "toc" (render' toc) + let context = -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + defField "toc" toc + $ defField "table-of-contents" toc $ defField "body" main $ (if isNullMeta meta then id else defField "titleblock" (render' titleblock)) - $ metadata + $ addVariablesToJSON opts metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return markdown representation of reference key table. -refsToMarkdown :: WriterOptions -> Refs -> MD Doc +refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions +keyToMarkdown :: PandocMonad m + => WriterOptions -> Ref - -> MD Doc -keyToMarkdown opts (label, (src, tit), attr) = do - label' <- inlineListToMarkdown opts label + -> MD m Doc +keyToMarkdown opts (label', (src, tit), attr) = do let tit' = if null tit then empty else space <> "\"" <> text tit <> "\"" return $ nest 2 $ hang 2 ("[" <> label' <> "]:" <> space) (text src <> tit') - <> linkAttributes opts attr + <+> linkAttributes opts attr -- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> MD Doc +notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc notesToMarkdown opts notes = do n <- gets stNoteNum notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes) @@ -246,7 +260,7 @@ notesToMarkdown opts notes = do return $ vsep notes' -- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> MD Doc +noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks let num' = text $ writerIdentifierPrefix opts ++ show num @@ -256,45 +270,65 @@ noteToMarkdown opts num blocks = do let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " return $ if isEnabled Ext_footnotes opts then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents -- | Escape special characters for Markdown. escapeString :: WriterOptions -> String -> String -escapeString opts = escapeStringUsing markdownEscapes - where markdownEscapes = ('<', "<") : ('>', ">") : - backslashEscapes specialChars - specialChars = - (if isEnabled Ext_superscript opts - then ('^':) - else id) . - (if isEnabled Ext_subscript opts - then ('~':) - else id) . - (if isEnabled Ext_tex_math_dollars opts - then ('$':) - else id) $ - "\\`*_[]#" +escapeString _ [] = [] +escapeString opts (c:cs) = + case c of + '<' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '<' : escapeString opts cs + | otherwise -> "<" ++ escapeString opts cs + '>' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '>' : escapeString opts cs + | otherwise -> ">" ++ escapeString opts cs + '@' | isEnabled Ext_citations opts -> + case cs of + (d:_) + | isAlphaNum d || d == '_' + -> '\\':'@':escapeString opts cs + _ -> '@':escapeString opts cs + _ | c `elem` ['\\','`','*','_','[',']','#'] -> + '\\':c:escapeString opts cs + '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':escapeString opts cs + '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs + '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs + '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString opts cs + _ -> '-':escapeString opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest + _ -> '.':escapeString opts cs + _ -> c : escapeString opts cs -- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc -tableOfContents opts headers = - let opts' = opts { writerIgnoreNotes = True } - contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers - in evalMD (blockToMarkdown opts' contents) def def +tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc +tableOfContents opts headers = do + contents <- BulletList <$> mapM (elementToListItem opts) (hierarchicalize headers) + blockToMarkdown opts contents -- | Converts an Element to a list item for a table of contents, -elementToListItem :: WriterOptions -> Element -> [Block] +elementToListItem :: PandocMonad m => WriterOptions -> Element -> MD m [Block] elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs) - = Plain headerLink : - [ BulletList (map (elementToListItem opts) subsecs) | - not (null subsecs) && lev < writerTOCDepth opts ] - where headerLink = if null ident - then headerText - else [Link nullAttr headerText ('#':ident, "")] -elementToListItem _ (Blk _) = [] + = do isPlain <- asks envPlain + let headerLink = if null ident || isPlain + then walk deNote headerText + else [Link nullAttr (walk deNote headerText) + ('#':ident, "")] + listContents <- if null subsecs || lev >= writerTOCDepth opts + then return [] + else mapM (elementToListItem opts) subsecs + return [Plain headerLink, BulletList listContents] +elementToListItem _ (Blk _) = return [] attrsToMarkdown :: Attr -> Doc attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] @@ -331,10 +365,10 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker beginsWithOrderedListMarker :: String -> Bool beginsWithOrderedListMarker str = case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True + Left _ -> False + Right _ -> True -notesAndRefs :: WriterOptions -> MD Doc +notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc notesAndRefs opts = do notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts modify $ \s -> s { stNotes = [] } @@ -345,16 +379,17 @@ notesAndRefs opts = do if | writerReferenceLocation opts == EndOfDocument -> empty | isEmpty notes' && isEmpty refs' -> empty | otherwise -> blankline - + return $ (if isEmpty notes' then empty else blankline <> notes') <> (if isEmpty refs' then empty else blankline <> refs') <> endSpacing -- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options +blockToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> MD Doc + -> MD m Doc blockToMarkdown opts blk = local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $ do doc <- blockToMarkdown' opts blk @@ -363,17 +398,26 @@ blockToMarkdown opts blk = then notesAndRefs opts >>= (\d -> return $ doc <> d) else return doc -blockToMarkdown' :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> MD Doc +blockToMarkdown' :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> MD m Doc blockToMarkdown' _ Null = return empty blockToMarkdown' opts (Div attrs ils) = do contents <- blockListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts && - isEnabled Ext_markdown_in_html_blocks opts - then tagWithAttrs "div" attrs <> blankline <> - contents <> blankline <> "</div>" <> blankline - else contents <> blankline + return $ + case () of + _ | isEnabled Ext_fenced_divs opts && + attrs /= nullAttr -> + nowrap (text ":::" <+> attrsToMarkdown attrs) $$ + chomp contents $$ + text ":::" <> blankline + | isEnabled Ext_native_divs opts || + (isEnabled Ext_raw_html opts && + isEnabled Ext_markdown_in_html_blocks opts) -> + tagWithAttrs "div" attrs <> blankline <> + contents <> blankline <> "</div>" <> blankline + | otherwise -> contents <> blankline blockToMarkdown' opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker @@ -382,13 +426,28 @@ blockToMarkdown' opts (Plain inlines) = do then Just $ writerColumns opts else Nothing let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] - let contents' = if isEnabled Ext_all_symbols_escapable opts && - not isPlain && beginsWithOrderedListMarker rendered - then text $ escapeDelimiter rendered - else contents + let escapeMarker (x:xs) | x `elem` (".()" :: String) = '\\':x:xs + | otherwise = x : escapeMarker xs + escapeMarker [] = [] + let contents' = + case rendered of + '%':_ | isEnabled Ext_pandoc_title_block opts && + isEnabled Ext_all_symbols_escapable opts -> + "\\" <> contents + '+':s:_ | not isPlain && isSpace s -> "\\" <> contents + '*':s:_ | not isPlain && isSpace s -> "\\" <> contents + '-':s:_ | not isPlain && isSpace s -> "\\" <> contents + '+':[] | not isPlain -> "\\" <> contents + '*':[] | not isPlain -> "\\" <> contents + '-':[] | not isPlain -> "\\" <> contents + '|':_ | (isEnabled Ext_line_blocks opts || + isEnabled Ext_pipe_tables opts) + && isEnabled Ext_all_symbols_escapable opts + -> "\\" <> contents + _ | not isPlain && beginsWithOrderedListMarker rendered + && isEnabled Ext_all_symbols_escapable opts + -> text $ escapeMarker rendered + | otherwise -> contents return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = @@ -401,9 +460,11 @@ blockToMarkdown' opts (LineBlock lns) = mdLines <- mapM (inlineListToMarkdown opts) lns return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns -blockToMarkdown' opts (RawBlock f str) - | f == "markdown" = return $ text str <> text "\n" - | f == "html" && isEnabled Ext_raw_html opts = do +blockToMarkdown' opts b@(RawBlock f str) + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + = return $ text str <> text "\n" + | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do plain <- asks envPlain return $ if plain then empty @@ -415,7 +476,9 @@ blockToMarkdown' opts (RawBlock f str) return $ if plain then empty else text str <> text "\n" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToMarkdown' opts HorizontalRule = do return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown' opts (Header level attr inlines) = do @@ -442,6 +505,8 @@ blockToMarkdown' opts (Header level attr inlines) = do space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ + -- ensure no newlines; see #3736 + walk lineBreakToSpace $ if level == 1 && plain then capitalize inlines else inlines @@ -497,39 +562,70 @@ blockToMarkdown' opts (BlockQuote blocks) = do contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do + let numcols = maximum (length aligns : length widths : + map length (headers:rows)) caption' <- inlineListToMarkdown opts caption let caption'' = if null caption || not (isEnabled Ext_table_captions opts) - then empty - else blankline <> ": " <> caption' <> blankline - rawHeaders <- mapM (blockListToMarkdown opts) headers - rawRows <- mapM (mapM (blockListToMarkdown opts)) rows + then blankline + else blankline $$ (": " <> caption') $$ blankline let isLineBreak LineBreak = Any True isLineBreak _ = Any False - let isSimple = all (==0) widths && - not ( getAny (query isLineBreak (headers:rows)) ) + let hasLineBreak = getAny . query isLineBreak + let isSimpleCell [Plain ils] = not (hasLineBreak ils) + isSimpleCell [Para ils ] = not (hasLineBreak ils) + isSimpleCell [] = True + isSimpleCell _ = False + let hasSimpleCells = all isSimpleCell (concat (headers:rows)) + let isSimple = hasSimpleCells && all (==0) widths let isPlainBlock (Plain _) = True isPlainBlock _ = False let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) - (nst,tbl) <- case True of - _ | isSimple && - isEnabled Ext_simple_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isSimple && - isEnabled Ext_pipe_tables opts -> fmap (id,) $ - pipeTable (all null headers) aligns rawHeaders rawRows - | not hasBlocks && - isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_grid_tables opts -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_raw_html opts -> fmap (id,) $ - return $ text $ writeHtmlString def - $ Pandoc nullMeta [t] - | otherwise -> return $ (id, text "[TABLE]") - return $ nst $ tbl $$ blankline $$ caption'' $$ blankline + let padRow r = case numcols - length r of + x | x > 0 -> r ++ replicate x empty + | otherwise -> r + let aligns' = case numcols - length aligns of + x | x > 0 -> aligns ++ replicate x AlignDefault + | otherwise -> aligns + let widths' = case numcols - length widths of + x | x > 0 -> widths ++ replicate x 0.0 + | otherwise -> widths + (nst,tbl) <- + case True of + _ | isSimple && + isEnabled Ext_simple_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (nest 2,) <$> pandocTable opts False (all null headers) + aligns' widths' rawHeaders rawRows + | isSimple && + isEnabled Ext_pipe_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows + | not hasBlocks && + isEnabled Ext_multiline_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (nest 2,) <$> pandocTable opts True (all null headers) + aligns' widths' rawHeaders rawRows + | isEnabled Ext_grid_tables opts && + writerColumns opts >= 8 * numcols -> (id,) <$> + gridTable opts blockListToMarkdown + (all null headers) aligns' widths' headers rows + | isEnabled Ext_raw_html opts -> fmap (id,) $ + (text . T.unpack) <$> + (writeHtml5String def $ Pandoc nullMeta [t]) + | hasSimpleCells && + isEnabled Ext_pipe_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows + | otherwise -> return $ (id, text "[TABLE]") + return $ nst $ tbl $$ caption'' $$ blankline blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline @@ -550,7 +646,7 @@ blockToMarkdown' opts (DefinitionList items) = do contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline -inList :: MD a -> MD a +inList :: Monad m => MD m a -> MD m a inList p = local (\env -> env {envInList = True}) p addMarkdownAttribute :: String -> String @@ -562,7 +658,7 @@ addMarkdownAttribute s = x /= "markdown"] _ -> s -pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD Doc +pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc pipeTable headless aligns rawHeaders rawRows = do let sp = text " " let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty @@ -590,9 +686,10 @@ pipeTable headless aligns rawHeaders rawRows = do let body = vcat $ map torow rawRows return $ header $$ border $$ body -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc -pandocTable opts headless aligns widths rawHeaders rawRows = do +pandocTable :: PandocMonad m + => WriterOptions -> Bool -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> MD m Doc +pandocTable opts multiline headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of AlignLeft -> lblock @@ -609,23 +706,21 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do let minNumChars = (+ 2) . maximum . map minOffset let columns = transpose (rawHeaders : rawRows) -- minimal column width without wrapping a single word - let noWordWrapWidth - | writerWrapText opts == WrapAuto - = fromIntegral $ maximum (map minNumChars columns) - | otherwise = fromIntegral $ maximum (map numChars columns) - let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w) - (noWordWrapWidth * w / minimum widths) + let relWidth w col = + max (floor $ fromIntegral (writerColumns opts - 1) * w) + (if writerWrapText opts == WrapAuto + then minNumChars col + else numChars col) let widthsInChars | isSimple = map numChars columns - | otherwise = map relWidth widths + | otherwise = zipWith relWidth widths columns let makeRow = hcat . intersperse (lblock 1 (text " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows let head' = makeRow rawHeaders - let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 + let border = if multiline then text (replicate (sum widthsInChars + length widthsInChars - 1) '-') else if headless @@ -634,7 +729,7 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do let head'' = if headless then empty else border <> cr <> head' - let body = if maxRowHeight > 1 + let body = if multiline then vsep rows' else vcat rows' let bottom = if headless @@ -642,62 +737,15 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc -gridTable opts headless aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map - ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let borderpart ch align widthInChars = - let widthInChars' = if widthInChars < 1 then 1 else widthInChars - in (if (align == AlignLeft || align == AlignCenter) - then char ':' - else char ch) <> - text (replicate widthInChars' ch) <> - (if (align == AlignRight || align == AlignCenter) - then char ':' - else char ch) - let border ch aligns' widthsInChars' = - char '+' <> - hcat (intersperse (char '+') (zipWith (borderpart ch) - aligns' widthsInChars')) <> char '+' - let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars) - rows' - let head'' = if headless - then empty - else head' $$ border '=' aligns widthsInChars - if headless - then return $ - border '-' aligns widthsInChars $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - else return $ - border '-' (repeat AlignDefault) widthsInChars $$ - head'' $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - itemEndsWithTightList :: [Block] -> Bool itemEndsWithTightList bs = case bs of [Plain _, BulletList xs] -> isTightList xs [Plain _, OrderedList _ xs] -> isTightList xs - _ -> False + _ -> False -- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> MD Doc +bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc bulletListItemToMarkdown opts bs = do contents <- blockListToMarkdown opts bs let sps = replicate (writerTabStop opts - 2) ' ' @@ -709,15 +757,16 @@ bulletListItemToMarkdown opts bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options +orderedListItemToMarkdown :: PandocMonad m + => WriterOptions -- ^ options -> String -- ^ list item marker -> [Block] -- ^ list item (list of blocks) - -> MD Doc + -> MD m Doc orderedListItemToMarkdown opts marker bs = do contents <- blockListToMarkdown opts bs let sps = case length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " let start = text marker <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs @@ -726,9 +775,10 @@ orderedListItemToMarkdown opts marker bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions +definitionListItemToMarkdown :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> MD Doc + -> MD m Doc definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label defs' <- mapM (mapM (blockToMarkdown opts)) defs @@ -739,7 +789,7 @@ definitionListItemToMarkdown opts (label, defs) = do let leader = if isPlain then " " else ": " let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " if isEnabled Ext_compact_definition_lists opts then do let contents = vcat $ map (\d -> hang tabStop (leader <> sps) @@ -750,7 +800,7 @@ definitionListItemToMarkdown opts (label, defs) = do $ vcat d <> cr) defs' let isTight = case defs of ((Plain _ : _): _) -> True - _ -> False + _ -> False return $ blankline <> nowrap labelText <> (if isTight then cr else blankline) <> contents <> blankline else do @@ -758,75 +808,129 @@ definitionListItemToMarkdown opts (label, defs) = do vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options +blockListToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> MD Doc -blockListToMarkdown opts blocks = + -> MD m Doc +blockListToMarkdown opts blocks = do + inlist <- asks envInList + isPlain <- asks envPlain + -- a) insert comment between list and indented code block, or the + -- code block will be treated as a list continuation paragraph + -- b) change Plain to Para unless it's followed by a RawBlock + -- or has a list as its parent (#3487) + let fixBlocks (b : CodeBlock attr x : rest) + | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (Plain ils : bs@(RawBlock{}:_)) = + Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs) | inlist = + Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs) = + Para ils : fixBlocks bs + fixBlocks (x : xs) = x : fixBlocks xs + fixBlocks [] = [] + isListBlock (BulletList _) = True + isListBlock (OrderedList _ _) = True + isListBlock (DefinitionList _) = True + isListBlock _ = False + commentSep = if isPlain + then Null + else if isEnabled Ext_raw_html opts + then RawBlock "html" "<!-- -->\n" + else RawBlock "markdown" " \n" mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat - -- insert comment between list and indented code block, or the - -- code block will be treated as a list continuation paragraph - where fixBlocks (b : CodeBlock attr x : rest) - | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = b : commentSep : CodeBlock attr x : - fixBlocks rest - fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (x : xs) = x : fixBlocks xs - fixBlocks [] = [] - isListBlock (BulletList _) = True - isListBlock (OrderedList _ _) = True - isListBlock (DefinitionList _) = True - isListBlock _ = False - commentSep = if isEnabled Ext_raw_html opts - then RawBlock "html" "<!-- -->\n" - else RawBlock "markdown" " " + +getKey :: Doc -> Key +getKey = toKey . render Nothing -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. -getReference :: Attr -> [Inline] -> Target -> MD [Inline] +getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc getReference attr label target = do - st <- get - case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of Just (ref, _, _) -> return ref Nothing -> do - let label' = case find (\(l,_,_) -> l == label) (stRefs st) of - Just _ -> -- label is used; generate numerical label - case find (\n -> notElem [Str (show n)] - (map (\(l,_,_) -> l) (stRefs st))) - [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label - modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) - return label' + keys <- gets stKeys + case M.lookup (getKey label) keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if isEmpty label + then do + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + return (text (show i), i) + else return (label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> do -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = label <> if i == 0 + then mempty + else text (show i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let lab' = text (show i) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' -- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> MD Doc +inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc inlineListToMarkdown opts lst = do inlist <- asks envInList go (if inlist then avoidBadWrapsInList lst else lst) where go [] = return empty go (i:is) = case i of (Link _ _ _) -> case is of - -- If a link is followed by another link or '[' we don't shortcut - (Link _ _ _):_ -> unshortcutable - Space:(Link _ _ _):_ -> unshortcutable - Space:(Str('[':_)):_ -> unshortcutable - Space:(RawInline _ ('[':_)):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:(Link _ _ _):_ -> unshortcutable - SoftBreak:(Str('[':_)):_ -> unshortcutable + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut + (Link _ _ _):_ -> unshortcutable + Space:(Link _ _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:(Link _ _ _):_ -> unshortcutable + SoftBreak:(Str('[':_)):_ -> unshortcutable SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str ('[':_):_ -> unshortcutable - (RawInline _ ('[':_)):_ -> unshortcutable - (RawInline _ (' ':'[':_)):_ -> unshortcutable - _ -> shortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str('[':_)):_ -> unshortcutable + LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + Str ('(':_):_ -> unshortcutable + Str (':':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ ('(':_)):_ -> unshortcutable + (RawInline _ (':':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable _ -> shortcutable where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) unshortcutable = do @@ -836,9 +940,9 @@ inlineListToMarkdown opts lst = do fmap (iMark <>) (go is) isSp :: Inline -> Bool -isSp Space = True +isSp Space = True isSp SoftBreak = True -isSp _ = False +isSp _ = False avoidBadWrapsInList :: [Inline] -> [Inline] avoidBadWrapsInList [] = [] @@ -857,7 +961,7 @@ avoidBadWrapsInList (s:Str cs:[]) avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs isOrderedListMarker :: String -> Bool -isOrderedListMarker xs = (last xs `elem` ['.',')']) && +isOrderedListMarker xs = not (null xs) && (last xs `elem` ['.',')']) && isRight (runParser (anyOrderedListMarker >> eof) defaultParserState "" xs) @@ -866,27 +970,30 @@ isRight (Right _) = True isRight (Left _) = False -- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> MD Doc +inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain contents <- inlineListToMarkdown opts ils return $ case plain of True -> contents - False | isEnabled Ext_bracketed_spans opts -> - "[" <> contents <> "]" <> - if attrs == nullAttr - then "{}" - else linkAttributes opts attrs + False | attrs == nullAttr -> contents + | isEnabled Ext_bracketed_spans opts -> + let attrs' = if attrs /= nullAttr + then attrsToMarkdown attrs + else empty + in "[" <> contents <> "]" <> attrs' | isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts -> tagWithAttrs "span" attrs <> contents <> text "</span>" | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do plain <- asks envPlain contents <- inlineListToMarkdown opts lst return $ if plain then "_" <> contents <> "_" else "*" <> contents <> "*" +inlineToMarkdown _ (Strong []) = return empty inlineToMarkdown opts (Strong lst) = do plain <- asks envPlain if plain @@ -894,6 +1001,7 @@ inlineToMarkdown opts (Strong lst) = do else do contents <- inlineListToMarkdown opts lst return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts @@ -901,6 +1009,7 @@ inlineToMarkdown opts (Strikeout lst) = do else if isEnabled Ext_raw_html opts then "<s>" <> contents <> "</s>" else contents +inlineToMarkdown _ (Superscript []) = return empty inlineToMarkdown opts (Superscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -908,14 +1017,12 @@ inlineToMarkdown opts (Superscript lst) = then "^" <> contents <> "^" else if isEnabled Ext_raw_html opts then "<sup>" <> contents <> "</sup>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSuperscript ds) - _ -> contents - where toSuperscript '1' = '\x00B9' - toSuperscript '2' = '\x00B2' - toSuperscript '3' = '\x00B3' - toSuperscript c = chr (0x2070 + (ord c - 48)) + else + let rendered = render Nothing contents + in case mapM toSuperscript rendered of + Just r -> text r + Nothing -> text $ "^(" ++ rendered ++ ")" +inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -923,27 +1030,27 @@ inlineToMarkdown opts (Subscript lst) = then "~" <> contents <> "~" else if isEnabled Ext_raw_html opts then "<sub>" <> contents <> "</sub>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSubscript ds) - _ -> contents - where toSubscript c = chr (0x2080 + (ord c - 48)) + else + let rendered = render Nothing contents + in case mapM toSubscript rendered of + Just r -> text r + Nothing -> text $ "_(" ++ rendered ++ ")" inlineToMarkdown opts (SmallCaps lst) = do plain <- asks envPlain if not plain && (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) - then do - contents <- inlineListToMarkdown opts lst - return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) - <> contents <> text "</span>" + then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "‘" <> contents <> "’" + return $ if isEnabled Ext_smart opts + then "'" <> contents <> "'" + else "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "“" <> contents <> "”" + return $ if isEnabled Ext_smart opts + then "\"" <> contents <> "\"" + else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str let longest = if null tickGroups @@ -960,14 +1067,17 @@ inlineToMarkdown opts (Code attr str) = do else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown opts (Str str) = do isPlain <- asks envPlain - if isPlain - then return $ text str - else return $ text $ escapeString opts str + let str' = (if isEnabled Ext_smart opts + then unsmartify opts + else id) $ + if isPlain + then str + else escapeString opts str + return $ text str' inlineToMarkdown opts (Math InlineMath str) = case writerHTMLMathMethod opts of - WebTeX url -> - inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) + WebTeX url -> inlineToMarkdown opts + (Image nullAttr [Str str] (url ++ urlEncode str, str)) _ | isEnabled Ext_tex_math_dollars opts -> return $ "$" <> text str <> "$" | isEnabled Ext_tex_math_single_backslash opts -> @@ -976,9 +1086,9 @@ inlineToMarkdown opts (Math InlineMath str) = return $ "\\\\(" <> text str <> "\\\\)" | otherwise -> do plain <- asks envPlain - inlineListToMarkdown opts $ - (if plain then makeMathPlainer else id) $ - texMathToInlines InlineMath str + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if plain then makeMathPlainer else id) inlineToMarkdown opts (Math DisplayMath str) = case writerHTMLMathMethod opts of WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` @@ -991,15 +1101,19 @@ inlineToMarkdown opts (Math DisplayMath str) = | isEnabled Ext_tex_math_double_backslash opts -> return $ "\\\\[" <> text str <> "\\\\]" | otherwise -> (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (texMathToInlines DisplayMath str) -inlineToMarkdown opts (RawInline f str) = do + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) +inlineToMarkdown opts il@(RawInline f str) = do plain <- asks envPlain - if not plain && - ( f == "markdown" || + if (plain && f == "plain") || (not plain && + ( f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || - (isEnabled Ext_raw_html opts && f == "html") ) + (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"]) + )) then return $ text str - else return empty + else do + report $ InlineNotRendered il + return empty inlineToMarkdown opts (LineBreak) = do plain <- asks envPlain if plain || isEnabled Ext_hard_line_breaks opts @@ -1052,7 +1166,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [lnk]] + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1063,20 +1178,20 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True - _ -> False + _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto shortcutable <- asks envRefShortcutable let useShortcutRefLinks = shortcutable && isEnabled Ext_shortcut_reference_links opts - ref <- if useRefLinks then getReference attr txt (src, tit) else return [] - reftext <- inlineListToMarkdown opts ref + reftext <- if useRefLinks then getReference attr linktext (src, tit) + else return empty return $ if useAuto then if plain then text srcSuffix else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" - second = if txt == ref + second = if getKey linktext == getKey reftext then if useShortcutRefLinks then "" else "[]" @@ -1091,7 +1206,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [img]] + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1114,4 +1230,36 @@ makeMathPlainer :: [Inline] -> [Inline] makeMathPlainer = walk go where go (Emph xs) = Span nullAttr xs - go x = x + go x = x + +toSuperscript :: Char -> Maybe Char +toSuperscript '1' = Just '\x00B9' +toSuperscript '2' = Just '\x00B2' +toSuperscript '3' = Just '\x00B3' +toSuperscript '+' = Just '\x207A' +toSuperscript '-' = Just '\x207B' +toSuperscript '=' = Just '\x207C' +toSuperscript '(' = Just '\x207D' +toSuperscript ')' = Just '\x207E' +toSuperscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2070 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +toSubscript :: Char -> Maybe Char +toSubscript '+' = Just '\x208A' +toSubscript '-' = Just '\x208B' +toSubscript '=' = Just '\x208C' +toSubscript '(' = Just '\x208D' +toSubscript ')' = Just '\x208E' +toSubscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2080 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +lineBreakToSpace :: Inline -> Inline +lineBreakToSpace LineBreak = Space +lineBreakToSpace SoftBreak = Space +lineBreakToSpace x = x diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs new file mode 100644 index 000000000..477f5a0b1 --- /dev/null +++ b/src/Text/Pandoc/Writers/Math.hs @@ -0,0 +1,56 @@ +module Text.Pandoc.Writers.Math + ( texMathToInlines + , convertMath + , defaultMathJaxURL + , defaultKaTeXURL + ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.TeXMath (DisplayType (..), Exp, readTeX, writePandoc) + +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ or @$$@ characters if entire formula +-- can't be converted. +texMathToInlines :: PandocMonad m + => MathType + -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> m [Inline] +texMathToInlines mt inp = do + res <- convertMath writePandoc mt inp + case res of + Right (Just ils) -> return ils + Right Nothing -> do + report $ CouldNotConvertTeXMath inp "" + return [mkFallback mt inp] + Left il -> return [il] + +mkFallback :: MathType -> String -> Inline +mkFallback mt str = Str (delim ++ str ++ delim) + where delim = case mt of + DisplayMath -> "$$" + InlineMath -> "$" + +-- | Converts a raw TeX math formula using a writer function, +-- issuing a warning and producing a fallback (a raw string) +-- on failure. +convertMath :: PandocMonad m + => (DisplayType -> [Exp] -> a) -> MathType -> String + -> m (Either Inline a) +convertMath writer mt str = + case writer dt <$> readTeX str of + Right r -> return (Right r) + Left e -> do + report $ CouldNotConvertTeXMath str e + return (Left $ mkFallback mt str) + where dt = case mt of + DisplayMath -> DisplayBlock + InlineMath -> DisplayInline + +defaultMathJaxURL :: String +defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/" + +defaultKaTeXURL :: String +defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.8.3/" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 78d4651e7..2470d9200 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,41 +30,44 @@ Conversion of 'Pandoc' documents to MediaWiki markup. MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.List (intercalate) +import qualified Data.Set as Set +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) -import Network.URI ( isURI ) -import Control.Monad.Reader -import Control.Monad.State +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { - stNotes :: Bool -- True if there are notes - , stOptions :: WriterOptions -- writer options + stNotes :: Bool -- True if there are notes + , stOptions :: WriterOptions -- writer options } data WriterReader = WriterReader { - options :: WriterOptions -- Writer options - , listLevel :: String -- String at beginning of list items, e.g. "**" - , useTags :: Bool -- True if we should use HTML tags because we're in a complex list + options :: WriterOptions -- Writer options + , listLevel :: String -- String at beginning of list items, e.g. "**" + , useTags :: Bool -- True if we should use HTML tags because we're in a complex list } -type MediaWikiWriter = ReaderT WriterReader (State WriterState) +type MediaWikiWriter m = ReaderT WriterReader (StateT WriterState m) -- | Convert Pandoc to MediaWiki. -writeMediaWiki :: WriterOptions -> Pandoc -> String +writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMediaWiki opts document = let initialState = WriterState { stNotes = False, stOptions = opts } env = WriterReader { options = opts, listLevel = [], useTags = False } - in evalState (runReaderT (pandocToMediaWiki document) env) initialState + in evalStateT (runReaderT (pandocToMediaWiki document) env) initialState -- | Return MediaWiki representation of document. -pandocToMediaWiki :: Pandoc -> MediaWikiWriter String +pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text pandocToMediaWiki (Pandoc meta blocks) = do opts <- asks options metadata <- metaToJSON opts @@ -79,8 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do let main = body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata - return $ case writerTemplate opts of - Nothing -> main + pack <$> case writerTemplate opts of + Nothing -> return main Just tpl -> renderTemplate' tpl context -- | Escape special characters for MediaWiki. @@ -88,8 +91,9 @@ escapeString :: String -> String escapeString = escapeStringForXML -- | Convert Pandoc block element to MediaWiki. -blockToMediaWiki :: Block -- ^ Block element - -> MediaWikiWriter String +blockToMediaWiki :: PandocMonad m + => Block -- ^ Block element + -> MediaWikiWriter m String blockToMediaWiki Null = return "" @@ -123,10 +127,10 @@ blockToMediaWiki (Para inlines) = do blockToMediaWiki (LineBlock lns) = blockToMediaWiki $ linesToPara lns -blockToMediaWiki (RawBlock f str) +blockToMediaWiki b@(RawBlock f str) | f == Format "mediawiki" = return str | f == Format "html" = return str - | otherwise = return "" + | otherwise = "" <$ report (BlockNotRendered b) blockToMediaWiki HorizontalRule = return "\n-----\n" @@ -136,23 +140,15 @@ blockToMediaWiki (Header level _ inlines) = do return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" blockToMediaWiki (CodeBlock (_,classes,_) str) = do - let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", - "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", - "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", - "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", - "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", - "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", - "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", - "visualfoxpro", "winbatch", "xml", "xpp", "z80"] + let at = Set.fromList classes `Set.intersection` highlightingLangs return $ - if null at - then "<pre" ++ (if null classes - then ">" - else " class=\"" ++ unwords classes ++ "\">") ++ - escapeString str ++ "</pre>" - else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>" - -- note: no escape! + case Set.toList at of + [] -> "<pre" ++ (if null classes + then ">" + else " class=\"" ++ unwords classes ++ "\">") ++ + escapeString str ++ "</pre>" + (l:_) -> "<source lang=\"" ++ l ++ "\">" ++ str ++ "</source>" + -- note: no escape! even for <! blockToMediaWiki (BlockQuote blocks) = do contents <- blockListToMediaWiki blocks @@ -218,7 +214,7 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: [Block] -> MediaWikiWriter String +listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m String listItemToMediaWiki items = do contents <- blockListToMediaWiki items tags <- asks useTags @@ -229,8 +225,9 @@ listItemToMediaWiki items = do return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to MediaWiki. -definitionListItemToMediaWiki :: ([Inline],[[Block]]) - -> MediaWikiWriter String +definitionListItemToMediaWiki :: PandocMonad m + => ([Inline],[[Block]]) + -> MediaWikiWriter m String definitionListItemToMediaWiki (label, items) = do labelText <- inlineListToMediaWiki label contents <- mapM blockListToMediaWiki items @@ -259,18 +256,18 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -284,20 +281,22 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: -tableRowToMediaWiki :: Bool +tableRowToMediaWiki :: PandocMonad m + => Bool -> [Alignment] -> [Double] -> (Int, [[Block]]) - -> MediaWikiWriter String + -> MediaWikiWriter m String tableRowToMediaWiki headless alignments widths (rownum, cells) = do cells' <- mapM (tableCellToMediaWiki headless rownum) $ zip3 alignments widths cells return $ unlines cells' -tableCellToMediaWiki :: Bool +tableCellToMediaWiki :: PandocMonad m + => Bool -> Int -> (Alignment, Double, [Block]) - -> MediaWikiWriter String + -> MediaWikiWriter m String tableCellToMediaWiki headless rownum (alignment, width, bs) = do contents <- blockListToMediaWiki bs let marker = if rownum == 1 && not headless then "!" else "|" @@ -322,13 +321,13 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -imageToMediaWiki :: Attr -> MediaWikiWriter String +imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m String imageToMediaWiki attr = do opts <- gets stOptions let (_, cls, _) = attr toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = '|':w ++ "px" go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px" go Nothing (Just h) = "|x" ++ h ++ "px" @@ -340,18 +339,19 @@ imageToMediaWiki attr = do return $ dims ++ classes -- | Convert list of Pandoc block elements to MediaWiki. -blockListToMediaWiki :: [Block] -- ^ List of block elements - -> MediaWikiWriter String +blockListToMediaWiki :: PandocMonad m + => [Block] -- ^ List of block elements + -> MediaWikiWriter m String blockListToMediaWiki blocks = fmap vcat $ mapM blockToMediaWiki blocks -- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String +inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m String inlineListToMediaWiki lst = fmap concat $ mapM inlineToMediaWiki lst -- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: Inline -> MediaWikiWriter String +inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m String inlineToMediaWiki (Span attrs ils) = do contents <- inlineListToMediaWiki ils @@ -394,22 +394,28 @@ inlineToMediaWiki (Code _ str) = inlineToMediaWiki (Str str) = return $ escapeString str -inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>" - -- note: str should NOT be escaped +inlineToMediaWiki (Math mt str) = return $ + "<math display=\"" ++ + (if mt == DisplayMath then "block" else "inline") ++ + "\">" ++ str ++ "</math>" + -- note: str should NOT be escaped -inlineToMediaWiki (RawInline f str) +inlineToMediaWiki il@(RawInline f str) | f == Format "mediawiki" = return str | f == Format "html" = return str - | otherwise = return "" + | otherwise = "" <$ report (InlineNotRendered il) -inlineToMediaWiki (LineBreak) = return "<br />\n" +inlineToMediaWiki LineBreak = return "<br />\n" inlineToMediaWiki SoftBreak = do wrapText <- gets (writerWrapText . stOptions) + listlevel <- asks listLevel case wrapText of WrapAuto -> return " " WrapNone -> return " " - WrapPreserve -> return "\n" + WrapPreserve -> if null listlevel + then return "\n" + else return " " inlineToMediaWiki Space = return " " @@ -437,5 +443,636 @@ inlineToMediaWiki (Image attr alt (source, tit)) = do inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents modify (\s -> s { stNotes = True }) - return $ "<ref>" ++ contents' ++ "</ref>" - -- note - may not work for notes with multiple blocks + return $ "<ref>" ++ stripTrailingNewlines contents' ++ "</ref>" + -- note - does not work for notes with multiple blocks + +highlightingLangs :: Set.Set String +highlightingLangs = Set.fromList [ + "abap", + "abl", + "abnf", + "aconf", + "actionscript", + "actionscript3", + "ada", + "ada2005", + "ada95", + "adl", + "agda", + "ahk", + "alloy", + "ambienttalk", + "ambienttalk/2", + "antlr", + "antlr-actionscript", + "antlr-as", + "antlr-c#", + "antlr-cpp", + "antlr-csharp", + "antlr-java", + "antlr-objc", + "antlr-perl", + "antlr-python", + "antlr-rb", + "antlr-ruby", + "apache", + "apacheconf", + "apl", + "applescript", + "arduino", + "arexx", + "as", + "as3", + "asm", + "aspectj", + "aspx-cs", + "aspx-vb", + "asy", + "asymptote", + "at", + "autohotkey", + "autoit", + "awk", + "b3d", + "basemake", + "bash", + "basic", + "bat", + "batch", + "bbcode", + "because", + "befunge", + "bf", + "blitzbasic", + "blitzmax", + "bmax", + "bnf", + "boo", + "boogie", + "bplus", + "brainfuck", + "bro", + "bsdmake", + "bugs", + "c", + "c#", + "c++", + "c++-objdumb", + "c-objdump", + "ca65", + "cadl", + "camkes", + "cbmbas", + "ceylon", + "cf3", + "cfc", + "cfengine3", + "cfg", + "cfm", + "cfs", + "chai", + "chaiscript", + "chapel", + "cheetah", + "chpl", + "cirru", + "cl", + "clay", + "clipper", + "clj", + "cljs", + "clojure", + "clojurescript", + "cmake", + "cobol", + "cobolfree", + "coffee", + "coffee-script", + "coffeescript", + "common-lisp", + "componentpascal", + "console", + "control", + "coq", + "cp", + "cpp", + "cpp-objdump", + "cpsa", + "crmsh", + "croc", + "cry", + "cryptol", + "csh", + "csharp", + "csound", + "csound-csd", + "csound-document", + "csound-orc", + "csound-sco", + "csound-score", + "css", + "css+django", + "css+erb", + "css+genshi", + "css+genshitext", + "css+jinja", + "css+lasso", + "css+mako", + "css+mozpreproc", + "css+myghty", + "css+php", + "css+ruby", + "css+smarty", + "cu", + "cucumber", + "cuda", + "cxx-objdump", + "cypher", + "cython", + "d", + "d-objdump", + "dart", + "debcontrol", + "debsources", + "delphi", + "dg", + "diff", + "django", + "docker", + "dockerfile", + "dosbatch", + "doscon", + "dosini", + "dpatch", + "dtd", + "duby", + "duel", + "dylan", + "dylan-console", + "dylan-lid", + "dylan-repl", + "earl-grey", + "earlgrey", + "easytrieve", + "ebnf", + "ec", + "ecl", + "eg", + "eiffel", + "elisp", + "elixir", + "elm", + "emacs", + "erb", + "erl", + "erlang", + "evoque", + "ex", + "exs", + "ezhil", + "factor", + "fan", + "fancy", + "felix", + "fish", + "fishshell", + "flx", + "fortran", + "fortranfixed", + "foxpro", + "fsharp", + "fy", + "gap", + "gas", + "gawk", + "genshi", + "genshitext", + "gherkin", + "glsl", + "gnuplot", + "go", + "golo", + "gooddata-cl", + "gosu", + "groff", + "groovy", + "gst", + "haml", + "handlebars", + "haskell", + "haxe", + "haxeml", + "hexdump", + "hs", + "html", + "html+cheetah", + "html+django", + "html+erb", + "html+evoque", + "html+genshi", + "html+handlebars", + "html+jinja", + "html+kid", + "html+lasso", + "html+mako", + "html+myghty", + "html+php", + "html+ruby", + "html+smarty", + "html+spitfire", + "html+twig", + "html+velocity", + "htmlcheetah", + "htmldjango", + "http", + "hx", + "hxml", + "hxsl", + "hy", + "hybris", + "hylang", + "i6", + "i6t", + "i7", + "idl", + "idl4", + "idr", + "idris", + "iex", + "igor", + "igorpro", + "ik", + "inform6", + "inform7", + "ini", + "io", + "ioke", + "irb", + "irc", + "isabelle", + "j", + "jade", + "jags", + "jasmin", + "jasminxt", + "java", + "javascript", + "javascript+cheetah", + "javascript+django", + "javascript+erb", + "javascript+genshi", + "javascript+genshitext", + "javascript+jinja", + "javascript+lasso", + "javascript+mako", + "javascript+mozpreproc", + "javascript+myghty", + "javascript+php", + "javascript+ruby", + "javascript+smarty", + "javascript+spitfire", + "jbst", + "jcl", + "jinja", + "jl", + "jlcon", + "jproperties", + "js", + "js+cheetah", + "js+django", + "js+erb", + "js+genshi", + "js+genshitext", + "js+jinja", + "js+lasso", + "js+mako", + "js+myghty", + "js+php", + "js+ruby", + "js+smarty", + "js+spitfire", + "json", + "json-ld", + "jsonld", + "jsonml+bst", + "jsp", + "julia", + "kal", + "kconfig", + "kernel-config", + "kid", + "koka", + "kotlin", + "ksh", + "lagda", + "lasso", + "lassoscript", + "latex", + "lcry", + "lcryptol", + "lean", + "less", + "lhaskell", + "lhs", + "lid", + "lidr", + "lidris", + "lighttpd", + "lighty", + "limbo", + "linux-config", + "liquid", + "lisp", + "literate-agda", + "literate-cryptol", + "literate-haskell", + "literate-idris", + "live-script", + "livescript", + "llvm", + "logos", + "logtalk", + "lsl", + "lua", + "m2", + "make", + "makefile", + "mako", + "man", + "maql", + "mask", + "mason", + "mathematica", + "matlab", + "matlabsession", + "mawk", + "menuconfig", + "mf", + "minid", + "mma", + "modelica", + "modula2", + "moin", + "monkey", + "moo", + "moocode", + "moon", + "moonscript", + "mozhashpreproc", + "mozpercentpreproc", + "mq4", + "mq5", + "mql", + "mql4", + "mql5", + "msc", + "mscgen", + "mupad", + "mxml", + "myghty", + "mysql", + "nasm", + "nawk", + "nb", + "nemerle", + "nesc", + "newlisp", + "newspeak", + "nginx", + "nim", + "nimrod", + "nit", + "nix", + "nixos", + "nroff", + "nsh", + "nsi", + "nsis", + "numpy", + "obj-c", + "obj-c++", + "obj-j", + "objc", + "objc++", + "objdump", + "objdump-nasm", + "objective-c", + "objective-c++", + "objective-j", + "objectivec", + "objectivec++", + "objectivej", + "objectpascal", + "objj", + "ocaml", + "octave", + "odin", + "ooc", + "opa", + "openbugs", + "openedge", + "pacmanconf", + "pan", + "parasail", + "pas", + "pascal", + "pawn", + "pcmk", + "perl", + "perl6", + "php", + "php3", + "php4", + "php5", + "pig", + "pike", + "pkgconfig", + "pl", + "pl6", + "plpgsql", + "po", + "posh", + "postgres", + "postgres-console", + "postgresql", + "postgresql-console", + "postscr", + "postscript", + "pot", + "pov", + "powershell", + "praat", + "progress", + "prolog", + "properties", + "proto", + "protobuf", + "ps1", + "ps1con", + "psm1", + "psql", + "puppet", + "py", + "py3", + "py3tb", + "pycon", + "pypy", + "pypylog", + "pyrex", + "pytb", + "python", + "python3", + "pyx", + "qbasic", + "qbs", + "qml", + "qvt", + "qvto", + "r", + "racket", + "ragel", + "ragel-c", + "ragel-cpp", + "ragel-d", + "ragel-em", + "ragel-java", + "ragel-objc", + "ragel-rb", + "ragel-ruby", + "raw", + "rb", + "rbcon", + "rconsole", + "rd", + "rebol", + "red", + "red/system", + "redcode", + "registry", + "resource", + "resourcebundle", + "rest", + "restructuredtext", + "rexx", + "rhtml", + "rkt", + "roboconf-graph", + "roboconf-instances", + "robotframework", + "rout", + "rql", + "rsl", + "rst", + "rts", + "ruby", + "rust", + "s", + "sage", + "salt", + "sass", + "sc", + "scala", + "scaml", + "scheme", + "scilab", + "scm", + "scss", + "sh", + "shell", + "shell-session", + "shen", + "slim", + "sls", + "smali", + "smalltalk", + "smarty", + "sml", + "snobol", + "sources.list", + "sourceslist", + "sp", + "sparql", + "spec", + "spitfire", + "splus", + "sql", + "sqlite3", + "squeak", + "squid", + "squid.conf", + "squidconf", + "ssp", + "st", + "stan", + "supercollider", + "sv", + "swift", + "swig", + "systemverilog", + "tads3", + "tap", + "tcl", + "tcsh", + "tcshcon", + "tea", + "termcap", + "terminfo", + "terraform", + "tex", + "text", + "tf", + "thrift", + "todotxt", + "trac-wiki", + "trafficscript", + "treetop", + "ts", + "turtle", + "twig", + "typescript", + "udiff", + "urbiscript", + "v", + "vala", + "vapi", + "vb.net", + "vbnet", + "vctreestatus", + "velocity", + "verilog", + "vfp", + "vgl", + "vhdl", + "vim", + "winbatch", + "winbugs", + "x10", + "xbase", + "xml", + "xml+cheetah", + "xml+django", + "xml+erb", + "xml+evoque", + "xml+genshi", + "xml+jinja", + "xml+kid", + "xml+lasso", + "xml+mako", + "xml+myghty", + "xml+php", + "xml+ruby", + "xml+smarty", + "xml+spitfire", + "xml+velocity", + "xq", + "xql", + "xqm", + "xquery", + "xqy", + "xslt", + "xten", + "xtend", + "xul+mozpreproc", + "yaml", + "yaml+jinja", + "zephir" ] diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs new file mode 100644 index 000000000..83d80cd4a --- /dev/null +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -0,0 +1,639 @@ +{- +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Ms + Copyright : Copyright (C) 2007-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to groff ms format. + +TODO: + +[ ] use base URL to construct absolute URLs from relative ones for external + links +[ ] is there a better way to do strikeout? +[ ] tight/loose list distinction +-} + +module Text.Pandoc.Writers.Ms ( writeMs ) where +import Control.Monad.State.Strict +import Data.Char (isLower, isUpper, toUpper) +import Data.List (intercalate, intersperse, sort) +import qualified Data.Map as Map +import Data.Maybe (catMaybes, fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (escapeURIString, isAllowedInURI) +import Skylighting +import System.FilePath (takeExtension) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) +import Text.TeXMath (writeEqn) + +data WriterState = WriterState { stHasInlineMath :: Bool + , stFirstPara :: Bool + , stNotes :: [Note] + , stSmallCaps :: Bool + , stHighlighting :: Bool + , stFontFeatures :: Map.Map Char Bool + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ stHasInlineMath = False + , stFirstPara = True + , stNotes = [] + , stSmallCaps = False + , stHighlighting = False + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + } + +type Note = [Block] + +type MS = StateT WriterState + +-- | Convert Pandoc to Ms. +writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeMs opts document = + evalStateT (pandocToMs opts document) defaultWriterState + +-- | Return groff ms representation of document. +pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text +pandocToMs opts (Pandoc meta blocks) = do + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render colwidth + metadata <- metaToJSON opts + (fmap render' . blockListToMs opts) + (fmap render' . inlineListToMs' opts) + meta + body <- blockListToMs opts blocks + let main = render' body + hasInlineMath <- gets stHasInlineMath + let titleMeta = (escapeString . stringify) $ docTitle meta + let authorsMeta = map (escapeString . stringify) $ docAuthors meta + hasHighlighting <- gets stHighlighting + let highlightingMacros = if hasHighlighting + then case writerHighlightStyle opts of + Nothing -> mempty + Just sty -> render' $ styleToMs sty + else mempty + + let context = defField "body" main + $ defField "has-inline-math" hasInlineMath + $ defField "hyphenate" True + $ defField "pandoc-version" pandocVersion + $ defField "toc" (writerTableOfContents opts) + $ defField "title-meta" titleMeta + $ defField "author-meta" (intercalate "; " authorsMeta) + $ defField "highlighting-macros" highlightingMacros metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Association list of characters to escape. +msEscapes :: Map.Map Char String +msEscapes = Map.fromList + [ ('\160', "\\~") + , ('\'', "\\[aq]") + , ('`', "\\`") + , ('\8217', "'") + , ('"', "\\[dq]") + , ('\x2014', "\\[em]") + , ('\x2013', "\\[en]") + , ('\x2026', "\\&...") + , ('~', "\\[ti]") + , ('^', "\\[ha]") + , ('-', "\\-") + , ('@', "\\@") + , ('\\', "\\\\") + ] + +escapeChar :: Char -> String +escapeChar c = fromMaybe [c] (Map.lookup c msEscapes) + +-- | Escape | character, used to mark inline math, inside math. +escapeBar :: String -> String +escapeBar = concatMap go + where go '|' = "\\[u007C]" + go c = [c] + +-- | Escape special characters for Ms. +escapeString :: String -> String +escapeString = concatMap escapeChar + +escapeUri :: String -> String +escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) + +toSmallCaps :: String -> String +toSmallCaps [] = [] +toSmallCaps (c:cs) + | isLower c = let (lowers,rest) = span isLower (c:cs) + in "\\s-2" ++ escapeString (map toUpper lowers) ++ + "\\s0" ++ toSmallCaps rest + | isUpper c = let (uppers,rest) = span isUpper (c:cs) + in escapeString uppers ++ toSmallCaps rest + | otherwise = escapeChar c ++ toSmallCaps cs + +-- | Escape a literal (code) section for Ms. +escapeCode :: String -> String +escapeCode = intercalate "\n" . map escapeLine . lines + where escapeCodeChar ' ' = "\\ " + escapeCodeChar '\t' = "\\\t" + escapeCodeChar c = escapeChar c + escapeLine codeline = + case concatMap escapeCodeChar codeline of + a@('.':_) -> "\\&" ++ a + b -> b + +-- We split inline lists into sentences, and print one sentence per +-- line. groff/troff treats the line-ending period differently. +-- See http://code.google.com/p/pandoc/issues/detail?id=148. + +-- | Returns the first sentence in a list of inlines, and the rest. +breakSentence :: [Inline] -> ([Inline], [Inline]) +breakSentence [] = ([],[]) +breakSentence xs = + let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True + isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False + (as, bs) = break isSentenceEndInline xs + in case bs of + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) + (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) + (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs + +-- | Split a list of inlines into sentences. +splitSentences :: [Inline] -> [[Inline]] +splitSentences xs = + let (sent, rest) = breakSentence xs + in if null rest then [sent] else sent : splitSentences rest + +blockToMs :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> MS m Doc +blockToMs _ Null = return empty +blockToMs opts (Div _ bs) = do + setFirstPara + res <- blockListToMs opts bs + setFirstPara + return res +blockToMs opts (Plain inlines) = + liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines +blockToMs opts (Para [Image attr alt (src,_tit)]) + | let ext = takeExtension src in (ext == ".ps" || ext == ".eps") = do + let (mbW,mbH) = (inPoints opts <$> dimension Width attr, + inPoints opts <$> dimension Height attr) + let sizeAttrs = case (mbW, mbH) of + (Just wp, Nothing) -> space <> doubleQuotes + (text (show (floor wp :: Int) ++ "p")) + (Just wp, Just hp) -> space <> doubleQuotes + (text (show (floor wp :: Int) ++ "p")) <> + space <> + doubleQuotes (text (show (floor hp :: Int))) + _ -> empty + capt <- inlineListToMs' opts alt + return $ nowrap (text ".PSPIC -C " <> + doubleQuotes (text (escapeString src)) <> + sizeAttrs) $$ + text ".ce 1000" $$ + capt $$ + text ".ce 0" +blockToMs opts (Para inlines) = do + firstPara <- gets stFirstPara + resetFirstPara + contents <- liftM vcat $ mapM (inlineListToMs' opts) $ + splitSentences inlines + return $ text (if firstPara then ".LP" else ".PP") $$ contents +blockToMs _ b@(RawBlock f str) + | f == Format "ms" = return $ text str + | otherwise = do + report $ BlockNotRendered b + return empty +blockToMs _ HorizontalRule = do + resetFirstPara + return $ text ".HLINE" +blockToMs opts (Header level (ident,classes,_) inlines) = do + setFirstPara + contents <- inlineListToMs' opts $ map breakToSpace inlines + let (heading, secnum) = if writerNumberSections opts && + "unnumbered" `notElem` classes + then (".NH", "\\*[SN]") + else (".SH", "") + let anchor = if null ident + then empty + else nowrap $ + text ".pdfhref M " <> doubleQuotes (text ident) + let bookmark = text ".pdfhref O " <> text (show level ++ " ") <> + doubleQuotes (text $ secnum ++ + (if null secnum + then "" + else " ") ++ + escapeString (stringify inlines)) + let backlink = nowrap (text ".pdfhref L -D " <> + doubleQuotes (text ident) <> space <> text "\\") <> cr <> + text " -- " + let tocEntry = if writerTableOfContents opts && + level <= writerTOCDepth opts + then text ".XS" + $$ backlink <> doubleQuotes ( + nowrap (text (replicate level '\t') <> + (if null secnum + then empty + else text secnum <> text "\\~\\~") + <> contents)) + $$ text ".XE" + else empty + modify $ \st -> st{ stFirstPara = True } + return $ (text heading <> space <> text (show level)) $$ + contents $$ + bookmark $$ + anchor $$ + tocEntry +blockToMs opts (CodeBlock attr str) = do + hlCode <- highlightCode opts attr str + setFirstPara + return $ + text ".IP" $$ + text ".nf" $$ + text "\\f[C]" $$ + hlCode $$ + text "\\f[]" $$ + text ".fi" +blockToMs opts (LineBlock ls) = do + resetFirstPara + blockToMs opts $ Para $ intercalate [LineBreak] ls +blockToMs opts (BlockQuote blocks) = do + setFirstPara + contents <- blockListToMs opts blocks + setFirstPara + return $ text ".RS" $$ contents $$ text ".RE" +blockToMs opts (Table caption alignments widths headers rows) = + let aligncode AlignLeft = "l" + aligncode AlignRight = "r" + aligncode AlignCenter = "c" + aligncode AlignDefault = "l" + in do + caption' <- inlineListToMs' opts caption + let iwidths = if all (== 0) widths + then repeat "" + else map (printf "w(%0.1fn)" . (70 *)) widths + -- 78n default width - 8n indent = 70n + let coldescriptions = text $ unwords + (zipWith (\align width -> aligncode align ++ width) + alignments iwidths) ++ "." + colheadings <- mapM (blockListToMs opts) headers + let makeRow cols = text "T{" $$ + vcat (intersperse (text "T}\tT{") cols) $$ + text "T}" + let colheadings' = if all null headers + then empty + else makeRow colheadings $$ char '_' + body <- mapM (\row -> do + cols <- mapM (blockListToMs opts) row + return $ makeRow cols) rows + setFirstPara + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "delim(@@) tab(\t);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ text ".TE" + +blockToMs opts (BulletList items) = do + contents <- mapM (bulletListItemToMs opts) items + setFirstPara + return (vcat contents) +blockToMs opts (OrderedList attribs items) = do + let markers = take (length items) $ orderedListMarkers attribs + let indent = 2 + + maximum (map length markers) + contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ + zip markers items + setFirstPara + return (vcat contents) +blockToMs opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMs opts) items + setFirstPara + return (vcat contents) + +-- | Convert bullet list item (list of blocks) to ms. +bulletListItemToMs :: PandocMonad m => WriterOptions -> [Block] -> MS m Doc +bulletListItemToMs _ [] = return empty +bulletListItemToMs opts (Para first:rest) = + bulletListItemToMs opts (Plain first:rest) +bulletListItemToMs opts (Plain first:rest) = do + first' <- blockToMs opts (Plain first) + rest' <- blockListToMs opts rest + let first'' = text ".IP \\[bu] 3" $$ first' + let rest'' = if null rest + then empty + else text ".RS 3" $$ rest' $$ text ".RE" + return (first'' $$ rest'') +bulletListItemToMs opts (first:rest) = do + first' <- blockToMs opts first + rest' <- blockListToMs opts rest + return $ text "\\[bu] .RS 3" $$ first' $$ rest' $$ text ".RE" + +-- | Convert ordered list item (a list of blocks) to ms. +orderedListItemToMs :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> MS m Doc +orderedListItemToMs _ _ _ [] = return empty +orderedListItemToMs opts num indent (Para first:rest) = + orderedListItemToMs opts num indent (Plain first:rest) +orderedListItemToMs opts num indent (first:rest) = do + first' <- blockToMs opts first + rest' <- blockListToMs opts rest + let num' = printf ("%" ++ show (indent - 1) ++ "s") num + let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let rest'' = if null rest + then empty + else text ".RS " <> text (show indent) $$ + rest' $$ text ".RE" + return $ first'' $$ rest'' + +-- | Convert definition list item (label, list of blocks) to ms. +definitionListItemToMs :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> MS m Doc +definitionListItemToMs opts (label, defs) = do + labelText <- inlineListToMs' opts $ map breakToSpace label + contents <- if null defs + then return empty + else liftM vcat $ forM defs $ \blocks -> do + let (first, rest) = case blocks of + (Para x:y) -> (Plain x,y) + (x:y) -> (x,y) + [] -> (Plain [], []) + -- should not happen + rest' <- liftM vcat $ + mapM (\item -> blockToMs opts item) rest + first' <- blockToMs opts first + return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + return $ nowrap (text ".IP " <> doubleQuotes labelText) $$ contents + +-- | Convert list of Pandoc block elements to ms. +blockListToMs :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> MS m Doc +blockListToMs opts blocks = + mapM (blockToMs opts) blocks >>= (return . vcat) + +-- | Convert list of Pandoc inline elements to ms. +inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc +-- if list starts with ., insert a zero-width character \& so it +-- won't be interpreted as markup if it falls at the beginning of a line. +inlineListToMs opts lst = hcat <$> mapM (inlineToMs opts) lst + +-- This version to be used when there is no further inline content; +-- forces a note at the end. +inlineListToMs' :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc +inlineListToMs' opts lst = do + x <- hcat <$> mapM (inlineToMs opts) lst + y <- handleNotes opts empty + return $ x <> y + +-- | Convert Pandoc inline element to ms. +inlineToMs :: PandocMonad m => WriterOptions -> Inline -> MS m Doc +inlineToMs opts (Span _ ils) = inlineListToMs opts ils +inlineToMs opts (Emph lst) = + withFontFeature 'I' (inlineListToMs opts lst) +inlineToMs opts (Strong lst) = + withFontFeature 'B' (inlineListToMs opts lst) +inlineToMs opts (Strikeout lst) = do + contents <- inlineListToMs opts lst + -- we use grey color instead of strikeout, which seems quite + -- hard to do in groff for arbitrary bits of text + return $ text "\\m[strikecolor]" <> contents <> text "\\m[]" +inlineToMs opts (Superscript lst) = do + contents <- inlineListToMs opts lst + return $ text "\\*{" <> contents <> text "\\*}" +inlineToMs opts (Subscript lst) = do + contents <- inlineListToMs opts lst + return $ text "\\*<" <> contents <> text "\\*>" +inlineToMs opts (SmallCaps lst) = do + -- see https://lists.gnu.org/archive/html/groff/2015-01/msg00016.html + modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } + res <- inlineListToMs opts lst + modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } + return res +inlineToMs opts (Quoted SingleQuote lst) = do + contents <- inlineListToMs opts lst + return $ char '`' <> contents <> char '\'' +inlineToMs opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMs opts lst + return $ text "\\[lq]" <> contents <> text "\\[rq]" +inlineToMs opts (Cite _ lst) = + inlineListToMs opts lst +inlineToMs opts (Code attr str) = do + hlCode <- highlightCode opts attr str + withFontFeature 'C' (return hlCode) +inlineToMs _ (Str str) = do + let shim = case str of + '.':_ -> afterBreak "\\&" + _ -> empty + smallcaps <- gets stSmallCaps + if smallcaps + then return $ shim <> text (toSmallCaps str) + else return $ shim <> text (escapeString str) +inlineToMs opts (Math InlineMath str) = do + modify $ \st -> st{ stHasInlineMath = True } + res <- convertMath writeEqn InlineMath str + case res of + Left il -> inlineToMs opts il + Right r -> return $ text "@" <> text (escapeBar r) <> text "@" +inlineToMs opts (Math DisplayMath str) = do + res <- convertMath writeEqn InlineMath str + case res of + Left il -> do + contents <- inlineToMs opts il + return $ cr <> text ".RS" $$ contents $$ text ".RE" + Right r -> return $ + cr <> text ".EQ" $$ text (escapeBar r) $$ text ".EN" +inlineToMs _ il@(RawInline f str) + | f == Format "ms" = return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToMs _ LineBreak = return $ cr <> text ".br" <> cr +inlineToMs opts SoftBreak = + handleNotes opts $ + case writerWrapText opts of + WrapAuto -> space + WrapNone -> space + WrapPreserve -> cr +inlineToMs opts Space = handleNotes opts space +inlineToMs opts (Link _ txt ('#':ident, _)) = do + -- internal link + contents <- inlineListToMs' opts $ map breakToSpace txt + return $ text "\\c" <> cr <> nowrap (text ".pdfhref L -D " <> + doubleQuotes (text ident) <> text " -A " <> + doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> + text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" +inlineToMs opts (Link _ txt (src, _)) = do + -- external link + contents <- inlineListToMs' opts $ map breakToSpace txt + return $ text "\\c" <> cr <> nowrap (text ".pdfhref W -D " <> + doubleQuotes (text (escapeUri src)) <> text " -A " <> + doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> + text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" +inlineToMs _ (Image _ alternate (_, _)) = + return $ char '[' <> text "IMAGE: " <> + text (escapeString (stringify alternate)) <> char ']' +inlineToMs _ (Note contents) = do + modify $ \st -> st{ stNotes = contents : stNotes st } + return $ text "\\**" + +handleNotes :: PandocMonad m => WriterOptions -> Doc -> MS m Doc +handleNotes opts fallback = do + notes <- gets stNotes + if null notes + then return fallback + else do + modify $ \st -> st{ stNotes = [] } + vcat <$> mapM (handleNote opts) notes + +handleNote :: PandocMonad m => WriterOptions -> Note -> MS m Doc +handleNote opts bs = do + -- don't start with Paragraph or we'll get a spurious blank + -- line after the note ref: + let bs' = case bs of + (Para ils : rest) -> Plain ils : rest + _ -> bs + contents <- blockListToMs opts bs' + return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr + +fontChange :: PandocMonad m => MS m Doc +fontChange = do + features <- gets stFontFeatures + let filling = sort [c | (c,True) <- Map.toList features] + return $ text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end + +setFirstPara :: PandocMonad m => MS m () +setFirstPara = modify $ \st -> st{ stFirstPara = True } + +resetFirstPara :: PandocMonad m => MS m () +resetFirstPara = modify $ \st -> st{ stFirstPara = False } + +breakToSpace :: Inline -> Inline +breakToSpace SoftBreak = Space +breakToSpace LineBreak = Space +breakToSpace x = x + +-- Highlighting + +styleToMs :: Style -> Doc +styleToMs sty = vcat $ colordefs ++ map (toMacro sty) alltoktypes + where alltoktypes = enumFromTo KeywordTok NormalTok + colordefs = map toColorDef allcolors + toColorDef c = text (".defcolor " ++ + hexColor c ++ " rgb #" ++ hexColor c) + allcolors = catMaybes $ ordNub $ + [defaultColor sty, backgroundColor sty, + lineNumberColor sty, lineNumberBackgroundColor sty] ++ + concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty)) + colorsForToken ts = [tokenColor ts, tokenBackground ts] + +hexColor :: Color -> String +hexColor (RGB r g b) = printf "%02x%02x%02x" r g b + +toMacro :: Style -> TokenType -> Doc +toMacro sty toktype = + nowrap (text ".ds " <> text (show toktype) <> text " " <> + setbg <> setcolor <> setfont <> + text "\\\\$1" <> + resetfont <> resetcolor <> resetbg) + where setcolor = maybe empty fgcol tokCol + resetcolor = maybe empty (const $ text "\\\\m[]") tokCol + setbg = empty -- maybe empty bgcol tokBg + resetbg = empty -- maybe empty (const $ text "\\\\M[]") tokBg + fgcol c = text $ "\\\\m[" ++ hexColor c ++ "]" + -- bgcol c = text $ "\\\\M[" ++ hexColor c ++ "]" + setfont = if tokBold || tokItalic + then text $ "\\\\f[C" ++ ['B' | tokBold] ++ + ['I' | tokItalic] ++ "]" + else empty + resetfont = if tokBold || tokItalic + then text "\\\\f[C]" + else empty + tokSty = Map.lookup toktype (tokenStyles sty) + tokCol = (tokSty >>= tokenColor) `mplus` defaultColor sty + -- tokBg = (tokSty >>= tokenBackground) `mplus` backgroundColor sty + tokBold = fromMaybe False (tokenBold <$> tokSty) + tokItalic = fromMaybe False (tokenItalic <$> tokSty) + -- tokUnderline = fromMaybe False (tokSty >>= tokUnderline) + -- lnColor = lineNumberColor sty + -- lnBkgColor = lineNumberBackgroundColor sty + +msFormatter :: FormatOptions -> [SourceLine] -> Doc +msFormatter _fmtopts = + vcat . map fmtLine + where fmtLine = hcat . map fmtToken + fmtToken (toktype, tok) = text "\\*" <> + brackets (text (show toktype) <> text " \"" + <> text (escapeCode (T.unpack tok)) <> text "\"") + +highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc +highlightCode opts attr str = + case highlight (writerSyntaxMap opts) msFormatter attr str of + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + return $ text (escapeCode str) + Right h -> do + modify (\st -> st{ stHighlighting = True }) + return h diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs new file mode 100644 index 000000000..ad67e489d --- /dev/null +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -0,0 +1,408 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Muse + Copyright : Copyright (C) 2017-2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : stable + Portability : portable + +Conversion of 'Pandoc' documents to Muse. + +This module is mostly intended for <https://amusewiki.org/ Amusewiki> markup support, +as described by <https://amusewiki.org/library/manual Text::Amuse markup manual>. +Original <https://www.gnu.org/software/emacs-muse/ Emacs Muse> markup support +is a secondary goal. + +Where Text::Amuse markup +<https://metacpan.org/pod/Text::Amuse#DIFFERENCES-WITH-THE-ORIGINAL-EMACS-MUSE-MARKUP differs> +from <https://www.gnu.org/software/emacs-muse/manual/ Emacs Muse markup>, +Text::Amuse markup is supported. +For example, native tables are always used instead of Org Mode tables. +However, @\<literal style="html">@ tag is used for HTML raw blocks +even though it is supported only in Emacs Muse. +-} +module Text.Pandoc.Writers.Muse (writeMuse) where +import Control.Monad.State.Strict +import Data.Text (Text) +import Data.List (intersperse, transpose, isInfixOf) +import System.FilePath (takeExtension) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import qualified Data.Set as Set + +type Notes = [[Block]] +data WriterState = + WriterState { stNotes :: Notes + , stOptions :: WriterOptions + , stTopLevel :: Bool + , stInsideBlock :: Bool + , stIds :: Set.Set String + } + +-- | Convert Pandoc to Muse. +writeMuse :: PandocMonad m + => WriterOptions + -> Pandoc + -> m Text +writeMuse opts document = + let st = WriterState { stNotes = [] + , stOptions = opts + , stTopLevel = True + , stInsideBlock = False + , stIds = Set.empty + } + in evalStateT (pandocToMuse document) st + +-- | Return Muse representation of document. +pandocToMuse :: PandocMonad m + => Pandoc + -> StateT WriterState m Text +pandocToMuse (Pandoc meta blocks) = do + opts <- gets stOptions + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render Nothing + metadata <- metaToJSON opts + (fmap render' . blockListToMuse) + (fmap render' . inlineListToMuse) + meta + body <- blockListToMuse blocks + notes <- liftM (reverse . stNotes) get >>= notesToMuse + let main = render colwidth $ body $+$ notes + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Helper function for flatBlockListToMuse +-- | Render all blocks and insert blank lines between the first two +catWithBlankLines :: PandocMonad m + => [Block] -- ^ List of block elements + -> Int -- ^ Number of blank lines + -> StateT WriterState m Doc +catWithBlankLines (b : bs) n = do + b' <- blockToMuse b + bs' <- flatBlockListToMuse bs + return $ b' <> blanklines n <> bs' +catWithBlankLines _ _ = error "Expected at least one block" + +-- | Convert list of Pandoc block elements to Muse +-- | without setting stTopLevel. +flatBlockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +flatBlockListToMuse bs@(BulletList _ : BulletList _ : _) = catWithBlankLines bs 2 +flatBlockListToMuse bs@(OrderedList (_, style1, _) _ : OrderedList (_, style2, _) _ : _) = + catWithBlankLines bs (if style1' == style2' then 2 else 0) + where + style1' = normalizeStyle style1 + style2' = normalizeStyle style2 + normalizeStyle DefaultStyle = Decimal + normalizeStyle s = s +flatBlockListToMuse bs@(DefinitionList _ : DefinitionList _ : _) = catWithBlankLines bs 2 +flatBlockListToMuse bs@(_ : _) = catWithBlankLines bs 0 +flatBlockListToMuse [] = return mempty + +-- | Convert list of Pandoc block elements to Muse. +blockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +blockListToMuse blocks = do + oldState <- get + modify $ \s -> s { stTopLevel = not $ stInsideBlock s + , stInsideBlock = True + } + result <- flatBlockListToMuse blocks + modify $ \s -> s { stTopLevel = stTopLevel oldState + , stInsideBlock = stInsideBlock oldState + } + return result + +-- | Convert Pandoc block element to Muse. +blockToMuse :: PandocMonad m + => Block -- ^ Block element + -> StateT WriterState m Doc +blockToMuse (Plain inlines) = inlineListToMuse inlines +blockToMuse (Para inlines) = do + contents <- inlineListToMuse inlines + return $ contents <> blankline +blockToMuse (LineBlock lns) = do + lns' <- mapM inlineListToMuse lns + return $ nowrap $ vcat (map (text "> " <>) lns') <> blankline +blockToMuse (CodeBlock (_,_,_) str) = + return $ "<example>" $$ text str $$ "</example>" $$ blankline +blockToMuse (RawBlock (Format format) str) = + return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$ + text str $$ "</literal>" $$ blankline +blockToMuse (BlockQuote blocks) = do + contents <- flatBlockListToMuse blocks + return $ blankline + <> "<quote>" + $$ nest 0 contents -- nest 0 to remove trailing blank lines + $$ "</quote>" + <> blankline +blockToMuse (OrderedList (start, style, _) items) = do + let markers = take (length items) $ orderedListMarkers + (start, style, Period) + let maxMarkerLength = maximum $ map length markers + let markers' = map (\m -> let s = maxMarkerLength - length m + in m ++ replicate s ' ') markers + contents <- zipWithM orderedListItemToMuse markers' items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where orderedListItemToMuse :: PandocMonad m + => String -- ^ marker for list item + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc + orderedListItemToMuse marker item = do + contents <- blockListToMuse item + return $ hang (length marker + 1) (text marker <> space) contents +blockToMuse (BulletList items) = do + contents <- mapM bulletListItemToMuse items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where bulletListItemToMuse :: PandocMonad m + => [Block] + -> StateT WriterState m Doc + bulletListItemToMuse item = do + contents <- blockListToMuse item + return $ hang 2 "- " contents +blockToMuse (DefinitionList items) = do + contents <- mapM definitionListItemToMuse items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where definitionListItemToMuse :: PandocMonad m + => ([Inline], [[Block]]) + -> StateT WriterState m Doc + definitionListItemToMuse (label, defs) = do + label' <- inlineListToMuse label + contents <- liftM vcat $ mapM descriptionToMuse defs + let ind = offset label' + return $ hang ind label' contents + descriptionToMuse :: PandocMonad m + => [Block] + -> StateT WriterState m Doc + descriptionToMuse desc = hang 4 " :: " <$> blockListToMuse desc +blockToMuse (Header level (ident,_,_) inlines) = do + opts <- gets stOptions + contents <- inlineListToMuse inlines + + ids <- gets stIds + let autoId = uniqueIdent inlines ids + modify $ \st -> st{ stIds = Set.insert autoId ids } + + let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + then empty + else "#" <> text ident <> cr + let header' = text $ replicate level '*' + return $ blankline <> nowrap (header' <> space <> contents) + $$ attr' <> blankline +-- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors +blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline +blockToMuse (Table caption _ _ headers rows) = do + caption' <- inlineListToMuse caption + headers' <- mapM blockListToMuse headers + rows' <- mapM (mapM blockListToMuse) rows + let noHeaders = all null headers + + let numChars = maximum . map offset + -- FIXME: width is not being used. + let widthsInChars = + map numChars $ transpose (headers' : rows') + -- FIXME: Muse doesn't allow blocks with height more than 1. + let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks + where h = maximum (1 : map height blocks) + sep' = lblock (length sep) $ vcat (replicate h (text sep)) + let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars + let head' = makeRow " || " headers' + let rowSeparator = if noHeaders then " | " else " | " + rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row + return $ makeRow rowSeparator cols) rows + let body = vcat rows'' + return $ (if noHeaders then empty else head') + $$ body + $$ (if null caption then empty else " |+ " <> caption' <> " +|") + $$ blankline +blockToMuse (Div _ bs) = flatBlockListToMuse bs +blockToMuse Null = return empty + +-- | Return Muse representation of notes. +notesToMuse :: PandocMonad m + => Notes + -> StateT WriterState m Doc +notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes) + +-- | Return Muse representation of a note. +noteToMuse :: PandocMonad m + => Int + -> [Block] + -> StateT WriterState m Doc +noteToMuse num note = do + contents <- blockListToMuse note + let marker = "[" ++ show num ++ "] " + return $ hang (length marker) (text marker) contents + +-- | Escape special characters for Muse. +escapeString :: String -> String +escapeString s = + "<verbatim>" ++ + substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++ + "</verbatim>" + +-- | Escape special characters for Muse if needed. +conditionalEscapeString :: String -> String +conditionalEscapeString s = + if any (`elem` ("#*<=>[]|" :: String)) s || + "::" `isInfixOf` s || + "----" `isInfixOf` s || + "~~" `isInfixOf` s + then escapeString s + else s + +normalizeInlineList :: [Inline] -> [Inline] +normalizeInlineList (x : Str "" : xs) + = normalizeInlineList (x:xs) +normalizeInlineList (Str x1 : Str x2 : xs) + = normalizeInlineList $ Str (x1 ++ x2) : xs +normalizeInlineList (Emph x1 : Emph x2 : ils) + = normalizeInlineList $ Emph (x1 ++ x2) : ils +normalizeInlineList (Strong x1 : Strong x2 : ils) + = normalizeInlineList $ Strong (x1 ++ x2) : ils +normalizeInlineList (Strikeout x1 : Strikeout x2 : ils) + = normalizeInlineList $ Strikeout (x1 ++ x2) : ils +normalizeInlineList (Superscript x1 : Superscript x2 : ils) + = normalizeInlineList $ Superscript (x1 ++ x2) : ils +normalizeInlineList (Subscript x1 : Subscript x2 : ils) + = normalizeInlineList $ Subscript (x1 ++ x2) : ils +normalizeInlineList (SmallCaps x1 : SmallCaps x2 : ils) + = normalizeInlineList $ SmallCaps (x1 ++ x2) : ils +normalizeInlineList (Code _ x1 : Code _ x2 : ils) + = normalizeInlineList $ Code nullAttr (x1 ++ x2) : ils +normalizeInlineList (RawInline f1 x1 : RawInline f2 x2 : ils) | f1 == f2 + = normalizeInlineList $ RawInline f1 (x1 ++ x2) : ils +normalizeInlineList (Span a1 x1 : Span a2 x2 : ils) | a1 == a2 + = normalizeInlineList $ Span a1 (x1 ++ x2) : ils +normalizeInlineList (x:xs) = x : normalizeInlineList xs +normalizeInlineList [] = [] + +fixNotes :: [Inline] -> [Inline] +fixNotes [] = [] +fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest +fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest +fixNotes (x:xs) = x : fixNotes xs + +-- | Convert list of Pandoc inline elements to Muse. +inlineListToMuse :: PandocMonad m + => [Inline] + -> StateT WriterState m Doc +inlineListToMuse lst = hcat <$> mapM inlineToMuse (fixNotes $ normalizeInlineList lst) + +-- | Convert Pandoc inline element to Muse. +inlineToMuse :: PandocMonad m + => Inline + -> StateT WriterState m Doc +inlineToMuse (Str str) = return $ text $ conditionalEscapeString str +inlineToMuse (Emph lst) = do + contents <- inlineListToMuse lst + return $ "<em>" <> contents <> "</em>" +inlineToMuse (Strong lst) = do + contents <- inlineListToMuse lst + return $ "<strong>" <> contents <> "</strong>" +inlineToMuse (Strikeout lst) = do + contents <- inlineListToMuse lst + return $ "<del>" <> contents <> "</del>" +inlineToMuse (Superscript lst) = do + contents <- inlineListToMuse lst + return $ "<sup>" <> contents <> "</sup>" +inlineToMuse (Subscript lst) = do + contents <- inlineListToMuse lst + return $ "<sub>" <> contents <> "</sub>" +inlineToMuse (SmallCaps lst) = inlineListToMuse lst +inlineToMuse (Quoted SingleQuote lst) = do + contents <- inlineListToMuse lst + return $ "‘" <> contents <> "’" +inlineToMuse (Quoted DoubleQuote lst) = do + contents <- inlineListToMuse lst + return $ "“" <> contents <> "”" +-- Amusewiki does not support <cite> tag, +-- and Emacs Muse citation support is limited +-- (https://www.gnu.org/software/emacs-muse/manual/html_node/Citations.html#Citation) +-- so just fallback to expanding inlines. +inlineToMuse (Cite _ lst) = inlineListToMuse lst +inlineToMuse (Code _ str) = return $ + "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" +inlineToMuse (Math t str) = + lift (texMathToInlines t str) >>= inlineListToMuse +inlineToMuse (RawInline (Format f) str) = + return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>" +inlineToMuse LineBreak = return $ "<br>" <> cr +inlineToMuse Space = return space +inlineToMuse SoftBreak = do + wrapText <- gets $ writerWrapText . stOptions + return $ if wrapText == WrapPreserve then cr else space +inlineToMuse (Link _ txt (src, _)) = + case txt of + [Str x] | escapeURI x == src -> + return $ "[[" <> text (escapeLink x) <> "]]" + _ -> do contents <- inlineListToMuse txt + return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" + where escapeLink lnk = if isImageUrl lnk then "URL:" ++ lnk else lnk + -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension +inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) = + inlineToMuse (Image attr alt (source,title)) +inlineToMuse (Image attr inlines (source, title)) = do + opts <- gets stOptions + alt <- inlineListToMuse inlines + let title' = if null title + then if null inlines + then "" + else "[" <> alt <> "]" + else "[" <> text title <> "]" + let width = case dimension Width attr of + Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer) + _ -> "" + return $ "[[" <> text (source ++ width) <> "]" <> title' <> "]" +inlineToMuse (Note contents) = do + -- add to notes in state + notes <- gets stNotes + modify $ \st -> st { stNotes = contents:notes } + let ref = show $ length notes + 1 + return $ "[" <> text ref <> "]" +inlineToMuse (Span (_,name:_,_) inlines) = do + contents <- inlineListToMuse inlines + return $ "<class name=\"" <> text name <> "\">" <> contents <> "</class>" +inlineToMuse (Span _ lst) = inlineListToMuse lst diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 87e23aeeb..f852bad96 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,14 +30,17 @@ Conversion of a 'Pandoc' document to a string representation. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Data.List ( intersperse ) +import Data.List (intersperse) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition +import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty prettyList :: [Doc] -> Doc prettyList ds = - "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]" + "[" <> + cat (intersperse (cr <> ",") $ map (nest 1) ds) <> "]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> Doc @@ -47,12 +50,12 @@ prettyBlock (BlockQuote blocks) = "BlockQuote" $$ prettyList (map prettyBlock blocks) prettyBlock (OrderedList attribs blockLists) = "OrderedList" <> space <> text (show attribs) $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) + prettyList (map (prettyList . map prettyBlock) blockLists) prettyBlock (BulletList blockLists) = "BulletList" $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) + prettyList (map (prettyList . map prettyBlock) blockLists) prettyBlock (DefinitionList items) = "DefinitionList" $$ - (prettyList $ map deflistitem items) + prettyList (map deflistitem items) where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" prettyBlock (Table caption aligns widths header rows) = @@ -66,8 +69,8 @@ prettyBlock (Div attr blocks) = prettyBlock block = text $ show block -- | Prettyprint Pandoc document. -writeNative :: WriterOptions -> Pandoc -> String -writeNative opts (Pandoc meta blocks) = +writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeNative opts (Pandoc meta blocks) = return $ let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index ce4d456a3..63a3f915a 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,49 +29,70 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where -import Data.IORef -import Data.List ( isPrefixOf ) -import Data.Maybe ( fromMaybe ) -import Text.XML.Light.Output -import Text.TeXMath -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Text.Pandoc.Shared ( stringify, fetchItem', warn, - getDefaultReferenceODT ) -import Text.Pandoc.ImageSize -import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict +import qualified Data.ByteString.Lazy as B +import Data.Generics (everywhere', mkT) +import Data.List (isPrefixOf) +import Data.Maybe (fromMaybe) +import qualified Data.Text.Lazy as TL +import System.FilePath (takeDirectory, takeExtension, (<.>)) +import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) +import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) +import Text.Pandoc.Pretty +import Text.Pandoc.Shared (stringify) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk -import Text.Pandoc.Writers.Shared ( fixDisplayMath ) -import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) -import Control.Monad (liftM) +import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.XML -import Text.Pandoc.Pretty -import qualified Control.Exception as E -import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension, takeDirectory, (<.>)) +import Text.TeXMath +import Text.XML.Light + +newtype ODTState = ODTState { stEntries :: [Entry] + } + +type O m = StateT ODTState m -- | Produce an ODT file from a Pandoc document. -writeODT :: WriterOptions -- ^ Writer options +writeODT :: PandocMonad m + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeODT opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts + -> m B.ByteString +writeODT opts doc = + let initState = ODTState{ stEntries = [] + } + in + evalStateT (pandocToODT opts doc) initState + +-- | Produce an ODT file from a Pandoc document. +pandocToODT :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> O m B.ByteString +pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta + lang <- toLang (getLang opts meta) refArchive <- - case writerReferenceODT opts of - Just f -> liftM toArchive $ B.readFile f - Nothing -> getDefaultReferenceODT datadir + case writerReferenceDoc opts of + Just f -> liftM toArchive $ lift $ P.readFileLazy f + Nothing -> lift $ (toArchive . B.fromStrict) <$> + P.readDataFile "reference.odt" -- handle formulas and pictures - picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc - let newContents = writeOpenDocument opts{writerWrapText = WrapNone} doc' - epochtime <- floor `fmap` getPOSIXTime + -- picEntriesRef <- P.newIORef ([] :: [Entry]) + doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc + newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc' + epochtime <- floor `fmap` lift P.getPOSIXTime let contentEntry = toEntry "content.xml" epochtime - $ fromStringLazy newContents - picEntries <- readIORef picEntriesRef + $ fromTextLazy $ TL.fromStrict newContents + picEntries <- gets stEntries let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive @@ -90,14 +111,13 @@ writeODT opts doc@(Pandoc meta _) = do $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" $$ - ( inTags True "manifest:manifest" + (inTags True "manifest:manifest" [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") - ,("manifest:version","1.2")] - $ ( selfClosingTag "manifest:file-entry" + ,("manifest:version","1.2")] ( selfClosingTag "manifest:file-entry" [("manifest:media-type","application/vnd.oasis.opendocument.text") ,("manifest:full-path","/")] - $$ vcat ( map toFileEntry $ files ) - $$ vcat ( map toFileEntry $ formulas ) + $$ vcat ( map toFileEntry files ) + $$ vcat ( map toFileEntry formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -105,88 +125,138 @@ writeODT opts doc@(Pandoc meta _) = do $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" $$ - ( inTags True "office:document-meta" + (inTags True "office:document-meta" [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0") ,("xmlns:xlink","http://www.w3.org/1999/xlink") ,("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0") ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") - ,("office:version","1.2")] - $ ( inTagsSimple "office:meta" - $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title)) - ) + ,("office:version","1.2")] ( inTagsSimple "office:meta" $ + ( inTagsSimple "dc:title" + (text $ escapeStringForXML (stringify title)) + $$ + case lang of + Just l -> inTagsSimple "dc:language" + (text (escapeStringForXML (renderLang l))) + Nothing -> empty + ) ) ) -- make sure mimetype is first let mimetypeEntry = toEntry "mimetype" epochtime $ fromStringLazy "application/vnd.oasis.opendocument.text" - let archive'' = addEntryToArchive mimetypeEntry + archive'' <- updateStyleWithLang lang + $ addEntryToArchive mimetypeEntry $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' +updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive +updateStyleWithLang Nothing arch = return arch +updateStyleWithLang (Just lang) arch = do + epochtime <- floor `fmap` lift P.getPOSIXTime + return arch{ zEntries = [if eRelativePath e == "styles.xml" + then case parseXMLDoc + (toStringLazy (fromEntry e)) of + Nothing -> e + Just d -> + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else e + | e <- zEntries arch] } + +addLang :: Lang -> Element -> Element +addLang lang = everywhere' (mkT updateLangAttr) + where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) + = Attr n (langLanguage lang) + updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) + = Attr n (langRegion lang) + updateLangAttr x = x + -- | transform both Image and Math elements -transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image attr@(id', cls, _) lab (src,t)) = do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - return $ Emph lab - Right (img, mbMimeType) -> do - (ptX, ptY) <- case imageSize img of +transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline +transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError + (do (img, mbMimeType) <- P.fetchItem src + (ptX, ptY) <- case imageSize opts img of Right s -> return $ sizeInPoints s Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg + report $ CouldNotDetermineImageSize src msg return (100, 100) let dims = case (getDim Width, getDim Height) of (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")] - (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)] + (Just w@(Percent p), Nothing) -> [("width", show w), ("height", show (p / ratio) ++ "%")] + (Nothing, Just h@(Percent p)) -> [("width", show (p * ratio) ++ "%"), ("height", show h)] (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] where ratio = ptX / ptY - getDim dir = case (dimension dir attr) of + getDim dir = case dimension dir attr of Just (Percent i) -> Just $ Percent i Just dim -> Just $ Inch $ inInch opts dim Nothing -> Nothing let newattr = (id', cls, dims) - entries <- readIORef entriesRef + entries <- gets stEntries let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) (mbMimeType >>= extensionFromMimeType) let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` lift P.getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img - modifyIORef entriesRef (entry:) - return $ Image newattr lab (newsrc, t) -transformPicMath _ entriesRef (Math t math) = do - entries <- readIORef entriesRef + modify $ \st -> st{ stEntries = entry : entries } + return $ Image newattr lab (newsrc, t)) + (\e -> do + report $ CouldNotFetchResource src (show e) + return $ Emph lab) + +transformPicMath _ (Math t math) = do + entries <- gets stEntries let dt = if t == InlineMath then DisplayInline else DisplayBlock case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do let conf = useShortEmptyTags (const False) defaultConfigPP let mathml = ppcTopElement conf r - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` (lift P.getPOSIXTime) let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" let entry = toEntry fname epochtime (fromStringLazy mathml) - modifyIORef entriesRef (entry:) + let fname' = dirname ++ "settings.xml" + let entry' = toEntry fname' epochtime $ documentSettings (t == InlineMath) + modify $ \st -> st{ stEntries = entry' : (entry : entries) } return $ RawInline (Format "opendocument") $ render Nothing $ - inTags False "draw:frame" [("text:anchor-type", - if t == DisplayMath - then "paragraph" - else "as-char") - ,("style:vertical-pos", "middle") - ,("style:vertical-rel", "text")] $ + inTags False "draw:frame" (if t == DisplayMath + then [("draw:style-name","fr2") + -- `draw:frame` does not support either + -- `style:vertical-pos` or `style:vertical-rel`, + -- therefore those attributes must go into the + -- `style:style` element + ,("text:anchor-type","paragraph")] + else [("draw:style-name","fr1") + ,("text:anchor-type","as-char")]) $ selfClosingTag "draw:object" [("xlink:href", dirname) , ("xlink:type", "simple") , ("xlink:show", "embed") , ("xlink:actuate", "onLoad")] -transformPicMath _ _ x = return x +transformPicMath _ x = return x + +documentSettings :: Bool -> B.ByteString +documentSettings isTextMode = fromStringLazy $ render Nothing + $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + $$ + (inTags True "office:document-settings" + [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0") + ,("xmlns:xlink","http://www.w3.org/1999/xlink") + ,("xmlns:config","urn:oasis:names:tc:opendocument:xmlns:config:1.0") + ,("xmlns:ooo","http://openoffice.org/2004/office") + ,("office:version","1.2")] $ + inTagsSimple "office:settings" $ + inTags False "config:config-item-set" + [("config:name", "ooo:configuration-settings")] $ + inTags False "config:config-item" [("config:name", "IsTextMode") + ,("config:type", "boolean")] $ + text $ if isTextMode then "true" else "false") diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs new file mode 100644 index 000000000..30d8d72dd --- /dev/null +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -0,0 +1,108 @@ +{- +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.OOXML + Copyright : Copyright (C) 2012-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions common to OOXML writers (Docx and Powerpoint) +-} +module Text.Pandoc.Writers.OOXML ( mknode + , nodename + , toLazy + , renderXml + , parseXml + , elemToNameSpaces + , elemName + , isElem + , NameSpaces + , fitToPage + ) where + +import Codec.Archive.Zip +import Control.Monad.Reader +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BL8 +import Data.Maybe (mapMaybe) +import Data.Monoid ((<>)) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.XML.Light as XML + +mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode s attrs = + add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) + +nodename :: String -> QName +nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } + where (name, prefix) = case break (==':') s of + (xs,[]) -> (xs, Nothing) + (ys, _:zs) -> (zs, Just ys) + +toLazy :: B.ByteString -> BL.ByteString +toLazy = BL.fromChunks . (:[]) + +renderXml :: Element -> BL.ByteString +renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> + UTF8.fromStringLazy (showElement elt) + +parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element +parseXml refArchive distArchive relpath = + case findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive of + Nothing -> fail $ relpath ++ " missing in reference file" + Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of + Nothing -> fail $ relpath ++ " corrupt in reference file" + Just d -> return d + +-- Copied from Util + +attrToNSPair :: XML.Attr -> Maybe (String, String) +attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = + QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + let ns' = ns ++ elemToNameSpaces element + in qName (elName element) == name && + qURI (elName element) == lookup prefix ns' + +type NameSpaces = [(String, String)] + +-- | Scales the image to fit the page +-- sizes are passed in emu +fitToPage :: (Double, Double) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > fromIntegral pageWidth = + (pageWidth, floor $ (fromIntegral pageWidth / x) * y) + | otherwise = (floor x, floor y) diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 20c2c5cbc..29e1bc80c 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,40 +29,45 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to OPML XML. -} module Text.Pandoc.Writers.OPML ( writeOPML) where +import Control.Monad.Except (throwError) +import Data.Text (Text, unpack) +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared +import Text.Pandoc.Error import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Markdown (writeMarkdown) -import Text.Pandoc.Pretty -import Text.Pandoc.Compat.Time -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML -- | Convert Pandoc document to string in OPML format. -writeOPML :: WriterOptions -> Pandoc -> String -writeOPML opts (Pandoc meta blocks) = +writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOPML opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta - Just metadata = metaToJSON opts - (Just . writeMarkdown def . Pandoc nullMeta) - (Just . trimr . writeMarkdown def . Pandoc nullMeta . - (\ils -> [Plain ils])) - meta' - main = render colwidth $ vcat (map (elementToOPML opts) elements) - context = defField "body" main metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + metadata <- metaToJSON opts + (writeMarkdown def . Pandoc nullMeta) + (\ils -> T.stripEnd <$> writeMarkdown def (Pandoc nullMeta [Plain ils])) + meta' + main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -writeHtmlInlines :: [Inline] -> String -writeHtmlInlines ils = trim $ writeHtmlString def - $ Pandoc nullMeta [Plain ils] + +writeHtmlInlines :: PandocMonad m => [Inline] -> m Text +writeHtmlInlines ils = + T.strip <$> writeHtml5String def (Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT showDateTimeRFC822 :: UTCTime -> String @@ -75,20 +80,27 @@ convertDate ils = maybe "" showDateTimeRFC822 $ #else parseTime #endif - defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) + defaultTimeLocale "%F" =<< normalizeDate (stringify ils) -- | Convert an Element to OPML. -elementToOPML :: WriterOptions -> Element -> Doc -elementToOPML _ (Blk _) = empty -elementToOPML opts (Sec _ _num _ title elements) = - let isBlk (Blk _) = True - isBlk _ = False - fromBlk (Blk x) = x - fromBlk _ = error "fromBlk called on non-block" +elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc +elementToOPML _ (Blk _) = return empty +elementToOPML opts (Sec _ _num _ title elements) = do + let isBlk :: Element -> Bool + isBlk (Blk _) = True + isBlk _ = False + + fromBlk :: PandocMonad m => Element -> m Block + fromBlk (Blk x) = return x + fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block" + (blocks, rest) = span isBlk elements - attrs = [("text", writeHtmlInlines title)] ++ - [("_note", writeMarkdown def (Pandoc nullMeta - (map fromBlk blocks))) - | not (null blocks)] - in inTags True "outline" attrs $ - vcat (map (elementToOPML opts) rest) + htmlIls <- writeHtmlInlines title + md <- if null blocks + then return mempty + else do blks <- mapM fromBlk blocks + writeMarkdown def $ Pandoc nullMeta blks + let attrs = ("text", unpack htmlIls) : + [("_note", unpack md) | not (null blocks)] + o <- mapM (elementToOPML opts) rest + return $ inTags True "outline" attrs $ vcat o diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 8f0e037c5..17edc0cbd 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it> +Copyright (C) 2008-2018 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. This program is free software; you can redistribute it and/or modify @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2018 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -30,32 +32,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where +import Control.Arrow ((***), (>>>)) +import Control.Monad.State.Strict hiding (when) +import Data.Char (chr) +import Data.List (sortBy) +import qualified Data.Map as Map +import Data.Ord (comparing) +import qualified Data.Set as Set +import Data.Text (Text) +import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.XML +import Text.Pandoc.Pretty import Text.Pandoc.Shared (linesToPara) import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Pretty -import Text.Printf ( printf ) -import Control.Arrow ( (***), (>>>) ) -import Control.Monad.State hiding ( when ) -import Data.Char (chr) -import qualified Data.Set as Set -import qualified Data.Map as Map +import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Data.List (sortBy) -import Data.Ord (comparing) +import Text.Pandoc.XML +import Text.Printf (printf) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block plainToPara (Plain x) = Para x -plainToPara x = x +plainToPara x = x -- -- OpenDocument writer -- +type OD m = StateT WriterState m + data WriterState = WriterState { stNotes :: [Doc] , stTableStyles :: [Doc] @@ -88,46 +96,45 @@ defaultWriterState = when :: Bool -> Doc -> Doc when p a = if p then a else empty -addTableStyle :: Doc -> State WriterState () +addTableStyle :: PandocMonad m => Doc -> OD m () addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } -addNote :: Doc -> State WriterState () +addNote :: PandocMonad m => Doc -> OD m () addNote i = modify $ \s -> s { stNotes = i : stNotes s } -addParaStyle :: Doc -> State WriterState () +addParaStyle :: PandocMonad m => Doc -> OD m () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } -addTextStyle :: Set.Set TextStyle -> (String, Doc) -> State WriterState () +addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m () addTextStyle attrs i = modify $ \s -> s { stTextStyles = Map.insert attrs i (stTextStyles s) } -addTextStyleAttr :: TextStyle -> State WriterState () +addTextStyleAttr :: PandocMonad m => TextStyle -> OD m () addTextStyleAttr t = modify $ \s -> s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) } -increaseIndent :: State WriterState () +increaseIndent :: PandocMonad m => OD m () increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } -resetIndent :: State WriterState () -resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } +resetIndent :: PandocMonad m => OD m () +resetIndent = modify $ \s -> s { stIndentPara = stIndentPara s - 1 } -inTightList :: State WriterState a -> State WriterState a +inTightList :: PandocMonad m => OD m a -> OD m a inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> modify (\s -> s { stTight = False }) >> return r -setInDefinitionList :: Bool -> State WriterState () +setInDefinitionList :: PandocMonad m => Bool -> OD m () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } -setFirstPara :: State WriterState () +setFirstPara :: PandocMonad m => OD m () setFirstPara = modify $ \s -> s { stFirstPara = True } -inParagraphTags :: Doc -> State WriterState Doc -inParagraphTags d | isEmpty d = return empty +inParagraphTags :: PandocMonad m => Doc -> OD m Doc inParagraphTags d = do b <- gets stFirstPara a <- if b then do modify $ \st -> st { stFirstPara = False } - return $ [("text:style-name", "First_20_paragraph")] + return [("text:style-name", "First_20_paragraph")] else return [("text:style-name", "Text_20_body")] return $ inTags False "text:p" a d @@ -137,7 +144,7 @@ inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] inSpanTags :: String -> Doc -> Doc inSpanTags s = inTags False "text:span" [("text:style-name",s)] -withTextStyle :: TextStyle -> State WriterState a -> State WriterState a +withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a withTextStyle s f = do oldTextStyleAttr <- gets stTextStyleAttr addTextStyleAttr s @@ -145,7 +152,7 @@ withTextStyle s f = do modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr } return res -inTextStyle :: Doc -> State WriterState Doc +inTextStyle :: PandocMonad m => Doc -> OD m Doc inTextStyle d = do at <- gets stTextStyleAttr if Set.null at @@ -161,12 +168,30 @@ inTextStyle d = do inTags False "style:style" [("style:name", styleName) ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (concatMap textStyleAttr (Set.toList at))) + $ selfClosingTag "style:text-properties" + (concatMap textStyleAttr (Set.toList at))) return $ inTags False "text:span" [("text:style-name",styleName)] d -inHeaderTags :: Int -> Doc -> State WriterState Doc +formulaStyles :: [Doc] +formulaStyles = [formulaStyle InlineMath, formulaStyle DisplayMath] + +formulaStyle :: MathType -> Doc +formulaStyle mt = inTags False "style:style" + [("style:name", if mt == InlineMath then "fr1" else "fr2") + ,("style:family", "graphic") + ,("style:parent-style-name", "Formula")] + $ selfClosingTag "style:graphic-properties" $ if mt == InlineMath then + [("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] + else + [("style:vertical-pos", "middle") + ,("style:vertical-rel", "paragraph-content") + ,("style:horizontal-pos", "center") + ,("style:horizontal-rel", "paragraph-content") + ,("style:wrap", "none")] + +inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc inHeaderTags i d = return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) , ("text:outline-level", show i)] d @@ -189,64 +214,67 @@ handleSpaces s rm [] = empty -- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: WriterOptions -> Pandoc -> String -writeOpenDocument opts (Pandoc meta blocks) = +writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOpenDocument opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text render' = render colwidth - ((body, metadata),s) = flip runState + ((body, metadata),s) <- flip runStateT defaultWriterState $ do m <- metaToJSON opts - (fmap (render colwidth) . blocksToOpenDocument opts) - (fmap (render colwidth) . inlinesToOpenDocument opts) + (fmap render' . blocksToOpenDocument opts) + (fmap render' . inlinesToOpenDocument opts) meta b <- render' `fmap` blocksToOpenDocument opts blocks return (b, m) - styles = stTableStyles s ++ stParaStyles s ++ - map snd (reverse $ sortBy (comparing fst) $ - Map.elems (stTextStyles s)) + let styles = stTableStyles s ++ stParaStyles s ++ formulaStyles ++ + map snd (sortBy (flip (comparing fst)) ( + Map.elems (stTextStyles s))) listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) - listStyles = map listStyle (stListStyles s) - automaticStyles = vcat $ reverse $ styles ++ listStyles - context = defField "body" body - $ defField "automatic-styles" (render' automaticStyles) - $ metadata - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context - -withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc + let listStyles = map listStyle (stListStyles s) + let automaticStyles = vcat $ reverse $ styles ++ listStyles + let context = defField "body" body + $ defField "toc" (writerTableOfContents opts) + $defField "automatic-styles" (render' automaticStyles) metadata + case writerTemplate opts of + Nothing -> return body + Just tpl -> renderTemplate' tpl context + +withParagraphStyle :: PandocMonad m + => WriterOptions -> String -> [Block] -> OD m Doc withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty -inPreformattedTags :: String -> State WriterState Doc +inPreformattedTags :: PandocMonad m => String -> OD m Doc inPreformattedTags s = do n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s -orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc +orderedListToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [[Block]] -> OD m Doc orderedListToOpenDocument o pn bs = vcat . map (inTagsIndented "text:list-item") <$> mapM (orderedItemToOpenDocument o pn . map plainToPara) bs -orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc -orderedItemToOpenDocument o n (b:bs) - | OrderedList a l <- b = newLevel a l - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where - go i = ($$) i <$> orderedItemToOpenDocument o n bs - newLevel a l = do - nn <- length <$> gets stParaStyles - ls <- head <$> gets stListStyles - modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) } - inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l -orderedItemToOpenDocument _ _ [] = return empty +orderedItemToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc +orderedItemToOpenDocument o n bs = vcat <$> mapM go bs + where go (OrderedList a l) = newLevel a l + go (Para l) = inParagraphTagsWithStyle ("P" ++ show n) <$> + inlinesToOpenDocument o l + go b = blockToOpenDocument o b + newLevel a l = do + nn <- length <$> gets stParaStyles + ls <- head <$> gets stListStyles + modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : + drop 1 (stListStyles s) } + inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l isTightList :: [[Block]] -> Bool isTightList [] = False @@ -254,7 +282,8 @@ isTightList (b:_) | Plain {} : _ <- b = True | otherwise = False -newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int) +newOrderedListStyle :: PandocMonad m + => Bool -> ListAttributes -> OD m (Int,Int) newOrderedListStyle b a = do ln <- (+) 1 . length <$> gets stListStyles let nbs = orderedListLevelStyle a (ln, []) @@ -262,7 +291,8 @@ newOrderedListStyle b a = do modify $ \s -> s { stListStyles = nbs : stListStyles s } return (ln,pn) -bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc +bulletListToOpenDocument :: PandocMonad m + => WriterOptions -> [[Block]] -> OD m Doc bulletListToOpenDocument o b = do ln <- (+) 1 . length <$> gets stListStyles (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln @@ -270,48 +300,53 @@ bulletListToOpenDocument o b = do is <- listItemsToOpenDocument ("P" ++ show pn) o b return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is -listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc +listItemsToOpenDocument :: PandocMonad m + => String -> WriterOptions -> [[Block]] -> OD m Doc listItemsToOpenDocument s o is = vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is -deflistItemToOpenDocument :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState Doc +deflistItemToOpenDocument :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc deflistItemToOpenDocument o (t,d) = do let ts = if isTightList d then "Definition_20_Term_20_Tight" else "Definition_20_Term" ds = if isTightList d then "Definition_20_Definition_20_Tight" else "Definition_20_Definition" t' <- withParagraphStyle o ts [Para t] - d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d + d' <- liftM vcat $ mapM (withParagraphStyle o ds . map plainToPara) d return $ t' $$ d' -inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc +inBlockQuote :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc inBlockQuote o i (b:bs) | BlockQuote l <- b = do increaseIndent ni <- paraStyle [("style:parent-style-name","Quotations")] go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l - | otherwise = do go =<< blockToOpenDocument o b + | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l + | otherwise = go =<< blockToOpenDocument o b where go block = ($$) block <$> inBlockQuote o i bs inBlockQuote _ _ [] = resetIndent >> return empty -- | Convert a list of Pandoc blocks to OpenDocument. -blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc +blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. -blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc +blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs = figure attr c s t - | Para b <- bs = if null b + | Para b <- bs = if null b && + not (isEnabled Ext_empty_paragraphs o) then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div _ xs <- bs = blocksToOpenDocument o xs + | Div attr xs <- bs = withLangFromAttr attr + (blocksToOpenDocument o xs) | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -324,7 +359,9 @@ blockToOpenDocument o bs [ ("text:style-name", "Horizontal_20_Line") ]) | RawBlock f s <- bs = if f == Format "opendocument" then return $ text s - else return empty + else do + report $ BlockNotRendered bs + return empty | Null <- bs = return empty | otherwise = return empty where @@ -370,17 +407,23 @@ blockToOpenDocument o bs captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] return $ imageDoc $$ captionDoc -colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +colHeadsToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc colHeadsToOpenDocument o tn ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns hs) -tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +tableRowToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc tableRowToOpenDocument o tn ns cs = inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns cs) -tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc +tableItemToOpenDocument :: PandocMonad m + => WriterOptions -> String -> (String,[Block]) + -> OD m Doc tableItemToOpenDocument o tn (n,i) = let a = [ ("table:style-name" , tn ++ ".A1" ) , ("office:value-type", "string" ) @@ -389,10 +432,10 @@ tableItemToOpenDocument o tn (n,i) = withParagraphStyle o n (map plainToPara i) -- | Convert a list of inline elements to OpenDocument. -inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc +inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc inlinesToOpenDocument o l = hcat <$> toChunks o l -toChunks :: WriterOptions -> [Inline] -> State WriterState [Doc] +toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc] toChunks _ [] = return [] toChunks o (x : xs) | isChunkable x = do @@ -407,21 +450,21 @@ toChunks o (x : xs) where (ys, zs) = span isChunkable xs isChunkable :: Inline -> Bool -isChunkable (Str _) = True -isChunkable Space = True +isChunkable (Str _) = True +isChunkable Space = True isChunkable SoftBreak = True -isChunkable _ = False +isChunkable _ = False -- | Convert an inline element to OpenDocument. -inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc +inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc inlineToOpenDocument o ils = case ils of Space -> return space SoftBreak | writerWrapText o == WrapPreserve -> return $ preformatted "\n" - | otherwise -> return $ space - Span _ xs -> inlinesToOpenDocument o xs + | otherwise ->return space + Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs) LineBreak -> return $ selfClosingTag "text:line-break" [] Str s -> return $ handleSpaces $ escapeStringForXML s Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l @@ -432,11 +475,14 @@ inlineToOpenDocument o ils SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l Code _ s -> inlinedCode $ preformatted s - Math t s -> inlinesToOpenDocument o (texMathToInlines t s) + Math t s -> lift (texMathToInlines t s) >>= + inlinesToOpenDocument o Cite _ l -> inlinesToOpenDocument o l RawInline f s -> if f == Format "opendocument" then return $ text s - else return empty + else do + report $ InlineNotRendered ils + return empty Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l Image attr _ (s,t) -> mkImg attr s t Note l -> mkNote l @@ -453,8 +499,6 @@ inlineToOpenDocument o ils let getDims [] = [] getDims (("width", w) :xs) = ("svg:width", w) : getDims xs getDims (("height", h):xs) = ("svg:height", h) : getDims xs - getDims (x@("style:rel-width", _) :xs) = x : getDims xs - getDims (x@("style:rel-height", _):xs) = x : getDims xs getDims (_:xs) = getDims xs return $ inTags False "draw:frame" (("draw:name", "img" ++ show id') : getDims kvs) $ @@ -473,18 +517,18 @@ inlineToOpenDocument o ils addNote nn return nn -bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) -bulletListStyle l = - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,8227,8259] - listElStyle = map doStyles [0..9] - in do pn <- paraListStyle l - return (pn, (l, listElStyle)) +bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc])) +bulletListStyle l = do + let doStyles i = inTags True "text:list-level-style-bullet" + [ ("text:level" , show (i + 1) ) + , ("text:style-name" , "Bullet_20_Symbols") + , ("style:num-suffix", "." ) + , ("text:bullet-char", [bulletList !! i] ) + ] (listLevelStyle (1 + i)) + bulletList = map chr $ cycle [8226,9702,9642] + listElStyle = map doStyles [0..9] + pn <- paraListStyle l + return (pn, (l, listElStyle)) orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) orderedListLevelStyle (s,n, d) (l,ls) = @@ -494,11 +538,11 @@ orderedListLevelStyle (s,n, d) (l,ls) = ,("style:num-suffix", ")")] _ -> [("style:num-suffix", ".")] format = case n of - UpperAlpha -> "A" - LowerAlpha -> "a" - UpperRoman -> "I" - LowerRoman -> "i" - _ -> "1" + UpperAlpha -> "A" + LowerAlpha -> "a" + UpperRoman -> "I" + LowerRoman -> "i" + _ -> "1" listStyle = inTags True "text:list-level-style-number" ([ ("text:level" , show $ 1 + length ls ) , ("text:style-name" , "Numbering_20_Symbols") @@ -509,10 +553,10 @@ orderedListLevelStyle (s,n, d) (l,ls) = listLevelStyle :: Int -> Doc listLevelStyle i = - let indent = show (0.25 * fromIntegral i :: Double) in + let indent = show (0.4 * fromIntegral (i - 1) :: Double) in selfClosingTag "style:list-level-properties" [ ("text:space-before" , indent ++ "in") - , ("text:min-label-width", "0.25in")] + , ("text:min-label-width", "0.4in")] tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = @@ -529,7 +573,7 @@ tableStyle num wcs = [ ("style:name" , tableId ++ "." ++ [c]) , ("style:family", "table-column" )] $ selfClosingTag "style:table-column-properties" - [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))] + [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))] cellStyle = inTags True "style:style" [ ("style:name" , tableId ++ ".A1") , ("style:family", "table-cell" )] $ @@ -538,31 +582,33 @@ tableStyle num wcs = columnStyles = map colStyle wcs in table $$ vcat columnStyles $$ cellStyle -paraStyle :: [(String,String)] -> State WriterState Int +paraStyle :: PandocMonad m => [(String,String)] -> OD m Int paraStyle attrs = do pn <- (+) 1 . length <$> gets stParaStyles - i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double + i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight let styleAttr = [ ("style:name" , "P" ++ show pn) , ("style:family" , "paragraph" )] - indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i + indentVal = flip (++) "in" . show $ if b then max 0.5 i else i tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = if (i /= 0 || b) + indent = if i /= 0 || b then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] else [] attributes = indent ++ tight - paraProps = when (not $ null attributes) $ - selfClosingTag "style:paragraph-properties" attributes + paraProps = if null attributes + then mempty + else selfClosingTag + "style:paragraph-properties" attributes addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn -paraListStyle :: Int -> State WriterState Int +paraListStyle :: PandocMonad m => Int -> OD m Int paraListStyle l = paraStyle [("style:parent-style-name","Text_20_body") ,("style:list-style-name", "L" ++ show l )] @@ -582,7 +628,14 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre +data TextStyle = Italic + | Bold + | Strike + | Sub + | Sup + | SmallC + | Pre + | Language Lang deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -600,4 +653,18 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] + | Language lang <- s + = [("fo:language" ,langLanguage lang) + ,("fo:country" ,langRegion lang)] | otherwise = [] + +withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a +withLangFromAttr (_,_,kvs) action = + case lookup "lang" kvs of + Nothing -> action + Just l -> + case parseBCP47 l of + Right lang -> withTextStyle (Language lang) action + Left _ -> do + report $ InvalidLang l + action diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 4302459cc..2307204a5 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,8 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>, - and John MacFarlane <jgm@berkeley.edu> + 2010-2018 John MacFarlane <jgm@berkeley.edu> + 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane + Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> + 2010-2018 John MacFarlane <jgm@berkeley.edu> + 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above - Maintainer : Puneeth Chaganti <punchagan@gmail.com> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha Portability : portable @@ -32,64 +34,67 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: <http://orgmode.org> -} -module Text.Pandoc.Writers.Org ( writeOrg) where +module Text.Pandoc.Writers.Org (writeOrg) where +import Control.Monad.State.Strict +import Data.Char (isAlphaNum, toLower) +import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Pretty +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Data.Char ( isAlphaNum, toLower ) -import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose ) -import Control.Monad.State +import Text.Pandoc.Writers.Shared data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Bool - , stImages :: Bool - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stHasMath :: Bool + , stOptions :: WriterOptions } +type Org = StateT WriterState + -- | Convert Pandoc to Org. -writeOrg :: WriterOptions -> Pandoc -> String -writeOrg opts document = - let st = WriterState { stNotes = [], stLinks = False, - stImages = False, stHasMath = False, +writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOrg opts document = do + let st = WriterState { stNotes = [], + stHasMath = False, stOptions = opts } - in evalState (pandocToOrg document) st + evalStateT (pandocToOrg document) st -- | Return Org representation of document. -pandocToOrg :: Pandoc -> State WriterState String +pandocToOrg :: PandocMonad m => Pandoc -> Org m Text pandocToOrg (Pandoc meta blocks) = do - opts <- liftM stOptions get + opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToOrg) - (fmap (render colwidth) . inlineListToOrg) + (fmap render' . blockListToOrg) + (fmap render' . inlineListToOrg) meta body <- blockListToOrg blocks - notes <- liftM (reverse . stNotes) get >>= notesToOrg - -- note that the notes may contain refs, so we do them first - hasMath <- liftM stHasMath get - let main = render colwidth $ foldl ($+$) empty $ [body, notes] + notes <- gets (reverse . stNotes) >>= notesToOrg + hasMath <- gets stHasMath + let main = render colwidth . foldl ($+$) empty $ [body, notes] let context = defField "body" main - $ defField "math" hasMath + . defField "math" hasMath $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return Org representation of notes. -notesToOrg :: [[Block]] -> State WriterState Doc +notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= - return . vsep + vsep <$> zipWithM noteToOrg [1..] notes -- | Return Org representation of a note. -noteToOrg :: Int -> [Block] -> State WriterState Doc +noteToOrg :: PandocMonad m => Int -> [Block] -> Org m Doc noteToOrg num note = do contents <- blockListToOrg note let marker = "[fn:" ++ show num ++ "] " @@ -109,8 +114,9 @@ isRawFormat f = f == Format "latex" || f == Format "tex" || f == Format "org" -- | Convert Pandoc block element to Org. -blockToOrg :: Block -- ^ Block element - -> State WriterState Doc +blockToOrg :: PandocMonad m + => Block -- ^ Block element + -> Org m Doc blockToOrg Null = return empty blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do contents <- blockListToOrg bs @@ -123,36 +129,25 @@ blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do blankline $$ contents $$ blankline $$ drawerEndTag $$ blankline -blockToOrg (Div attrs bs) = do +blockToOrg (Div (ident, classes, kv) bs) = do contents <- blockListToOrg bs + -- if one class looks like the name of a greater block then output as such: + -- The ID, if present, is added via the #+NAME keyword; other classes and + -- key-value pairs are kept as #+ATTR_HTML attributes. let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower - return $ case attrs of - ("", [], []) -> - -- nullAttr, treat contents as if it wasn't wrapped - blankline $$ contents $$ blankline - (ident, [], []) -> - -- only an id: add id as an anchor, unwrap the rest - blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline - (ident, classes, kv) -> - -- if one class looks like the name of a greater block then output as - -- such: The ID, if present, is added via the #+NAME keyword; other - -- classes and key-value pairs are kept as #+ATTR_HTML attributes. - let - (blockTypeCand, classes') = partition isGreaterBlockClass classes - in case blockTypeCand of - (blockType:classes'') -> - blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ - "#+BEGIN_" <> text blockType $$ contents $$ - "#+END_" <> text blockType $$ blankline - _ -> - -- fallback: wrap in div tags - let - startTag = tagWithAttrs "div" attrs - endTag = text "</div>" - in blankline $$ "#+BEGIN_HTML" $$ - nest 2 startTag $$ "#+END_HTML" $$ blankline $$ - contents $$ blankline $$ "#+BEGIN_HTML" $$ - nest 2 endTag $$ "#+END_HTML" $$ blankline + (blockTypeCand, classes') = partition isGreaterBlockClass classes + return $ case blockTypeCand of + (blockType:classes'') -> + blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ + "#+BEGIN_" <> text blockType $$ contents $$ + "#+END_" <> text blockType $$ blankline + _ -> + -- fallback with id: add id as an anchor if present, discard classes and + -- key-value pairs, unwrap the content. + let contents' = if not (null ident) + then "<<" <> text ident <> ">>" $$ contents + else contents + in blankline $$ contents' $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do @@ -167,20 +162,22 @@ blockToOrg (Para inlines) = do blockToOrg (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] + (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline - let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls - contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) + let prettifyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls + contents <- joinWithBlankLines <$> mapM prettifyStanza (splitStanza lns) return $ blankline $$ "#+BEGIN_VERSE" $$ nest 2 contents $$ "#+END_VERSE" <> blankline blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | isRawFormat f = - return $ text str -blockToOrg (RawBlock _ _) = return empty +blockToOrg b@(RawBlock f str) + | isRawFormat f = return $ text str + | otherwise = do + report $ BlockNotRendered b + return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToOrg (Header level attr inlines) = do contents <- inlineListToOrg inlines @@ -190,7 +187,7 @@ blockToOrg (Header level attr inlines) = do else cr <> nest (level + 1) (propertiesDrawer attr) return $ headerStr <> " " <> contents <> drawerStr <> blankline blockToOrg (CodeBlock (_,classes,_) str) = do - opts <- stOptions <$> get + opts <- gets stOptions let tabstop = writerTabStop opts let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers let (beg, end) = case at of @@ -205,7 +202,7 @@ blockToOrg (Table caption' _ _ headers rows) = do caption'' <- inlineListToOrg caption' let caption = if null caption' then empty - else ("#+CAPTION: " <> caption'') + else "#+CAPTION: " <> caption'' headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows let numChars = maximum . map offset @@ -215,16 +212,16 @@ blockToOrg (Table caption' _ _ headers rows) = do -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") + sep' = lblock 3 $ vcat (replicate h (text " | ")) + beg = lblock 2 $ vcat (replicate h (text "| ")) + end = lblock 2 $ vcat (replicate h (text " |")) middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat . intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' @@ -245,8 +242,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $ - zip markers' items + contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ vcat contents $$ blankline blockToOrg (DefinitionList items) = do @@ -254,25 +250,27 @@ blockToOrg (DefinitionList items) = do return $ vcat contents $$ blankline -- | Convert bullet list item (list of blocks) to Org. -bulletListItemToOrg :: [Block] -> State WriterState Doc +bulletListItemToOrg :: PandocMonad m => [Block] -> Org m Doc bulletListItemToOrg items = do contents <- blockListToOrg items - return $ hang 3 "- " (contents <> cr) + return $ hang 2 "- " (contents <> cr) -- | Convert ordered list item (a list of blocks) to Org. -orderedListItemToOrg :: String -- ^ marker for list item +orderedListItemToOrg :: PandocMonad m + => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> Org m Doc orderedListItemToOrg marker items = do contents <- blockListToOrg items return $ hang (length marker + 1) (text marker <> space) (contents <> cr) -- | Convert defintion list item (label, list of blocks) to Org. -definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc +definitionListItemToOrg :: PandocMonad m + => ([Inline], [[Block]]) -> Org m Doc definitionListItemToOrg (label, defs) = do label' <- inlineListToOrg label - contents <- liftM vcat $ mapM blockListToOrg defs - return $ hang 3 "- " $ label' <> " :: " <> (contents <> cr) + contents <- vcat <$> mapM blockListToOrg defs + return . hang 2 "- " $ label' <> " :: " <> (contents <> cr) -- | Convert list of key/value pairs to Org :PROPERTIES: drawer. propertiesDrawer :: Attr -> Doc @@ -280,8 +278,8 @@ propertiesDrawer (ident, classes, kv) = let drawerStart = text ":PROPERTIES:" drawerEnd = text ":END:" - kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv - kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv' + kv' = if classes == mempty then kv else ("CLASS", unwords classes):kv + kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv' properties = vcat $ map kvToOrgProperty kv'' in drawerStart <> cr <> properties <> cr <> drawerEnd @@ -294,23 +292,37 @@ attrHtml :: Attr -> Doc attrHtml ("" , [] , []) = mempty attrHtml (ident, classes, kvs) = let - name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr + name = if null ident then mempty else "#+NAME: " <> text ident <> cr keyword = "#+ATTR_HTML" classKv = ("class", unwords classes) kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) in name <> keyword <> ": " <> text (unwords kvStrings) <> cr -- | Convert list of Pandoc block elements to Org. -blockListToOrg :: [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat +blockListToOrg :: PandocMonad m + => [Block] -- ^ List of block elements + -> Org m Doc +blockListToOrg blocks = vcat <$> mapM blockToOrg blocks -- | Convert list of Pandoc inline elements to Org. -inlineListToOrg :: [Inline] -> State WriterState Doc -inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat +inlineListToOrg :: PandocMonad m + => [Inline] + -> Org m Doc +inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) + where fixMarkers [] = [] -- prevent note refs and list markers from wrapping, see #4171 + fixMarkers (Space : x : rest) | shouldFix x = + Str " " : x : fixMarkers rest + fixMarkers (SoftBreak : x : rest) | shouldFix x = + Str " " : x : fixMarkers rest + fixMarkers (x : rest) = x : fixMarkers rest + + shouldFix Note{} = True -- Prevent footnotes + shouldFix (Str "-") = True -- Prevent bullet list items + -- TODO: prevent ordered list items + shouldFix _ = False -- | Convert Pandoc inline element to Org. -inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg :: PandocMonad m => Inline -> Org m Doc inlineToOrg (Span (uid, [], []) []) = return $ "<<" <> text uid <> ">>" inlineToOrg (Span _ lst) = @@ -339,50 +351,48 @@ inlineToOrg (Quoted DoubleQuote lst) = do return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return $ text $ escapeString str +inlineToOrg (Str str) = return . text $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath then "$" <> text str <> "$" else "$$" <> text str <> "$$" -inlineToOrg (RawInline f@(Format f') str) = - return $ if isRawFormat f - then text str - else "@@" <> text f' <> ":" <> text str <> "@@" -inlineToOrg (LineBreak) = return (text "\\\\" <> cr) +inlineToOrg il@(RawInline f str) + | isRawFormat f = return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToOrg LineBreak = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space -inlineToOrg (Link _ txt (src, _)) = do + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space +inlineToOrg (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - do modify $ \s -> s{ stLinks = True } - return $ "[[" <> text (orgPath x) <> "]]" + return $ "[[" <> text (orgPath x) <> "]]" _ -> do contents <- inlineListToOrg txt - modify $ \s -> s{ stLinks = True } return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]" -inlineToOrg (Image _ _ (source, _)) = do - modify $ \s -> s{ stImages = True } +inlineToOrg (Image _ _ (source, _)) = return $ "[[" <> text (orgPath source) <> "]]" inlineToOrg (Note contents) = do -- add to notes in state - notes <- get >>= (return . stNotes) + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ "[fn:" <> text ref <> "]" orgPath :: String -> String orgPath src = case src of - [] -> mempty -- wiki link - ('#':xs) -> xs -- internal link - _ | isUrl src -> src - _ | isFilePath src -> src - _ -> "file:" <> src + [] -> mempty -- wiki link + ('#':_) -> src -- internal link + _ | isUrl src -> src + _ | isFilePath src -> src + _ -> "file:" <> src where isFilePath :: String -> Bool isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"] diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs new file mode 100644 index 000000000..645a4cb86 --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint.hs @@ -0,0 +1,63 @@ + + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to powerpoint (pptx). -} + +{- +This is a wrapper around two modules: + + - Text.Pandoc.Writers.Powerpoint.Presentation (which converts a + pandoc document into a Presentation datatype), and + + - Text.Pandoc.Writers.Powerpoint.Output (which converts a + Presentation into a zip archive, which can be output). +-} + +module Text.Pandoc.Writers.Powerpoint (writePowerpoint) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Walk +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Writers.Powerpoint.Presentation (documentToPresentation) +import Text.Pandoc.Writers.Powerpoint.Output (presentationToArchive) +import qualified Data.ByteString.Lazy as BL + +writePowerpoint :: (PandocMonad m) + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m BL.ByteString +writePowerpoint opts (Pandoc meta blks) = do + let blks' = walk fixDisplayMath blks + let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks') + mapM_ report logMsgs + archv <- presentationToArchive opts pres + return $ fromArchive archv diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs new file mode 100644 index 000000000..410b6c20c --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -0,0 +1,1834 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint.Output + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Conversion of Presentation datatype (defined in +Text.Pandoc.Writers.Powerpoint.Presentation) to a zip archive. +-} + +module Text.Pandoc.Writers.Powerpoint.Output ( presentationToArchive + ) where + +import Control.Monad.Except (throwError, catchError) +import Control.Monad.Reader +import Control.Monad.State +import Codec.Archive.Zip +import Data.Char (toUpper) +import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) +import Data.Default +import Text.Pandoc.Compat.Time (formatTime, defaultTimeLocale) +import Data.Time.Clock (UTCTime) +import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) +import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) +import Text.XML.Light +import Text.Pandoc.Definition +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Error (PandocError(..)) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Options +import Text.Pandoc.MIME +import qualified Data.ByteString.Lazy as BL +import Text.Pandoc.Writers.OOXML +import qualified Data.Map as M +import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, isJust, maybeToList, catMaybes) +import Text.Pandoc.ImageSize +import Control.Applicative ((<|>)) +import System.FilePath.Glob +import Text.TeXMath +import Text.Pandoc.Writers.Math (convertMath) +import Text.Pandoc.Writers.Powerpoint.Presentation +import Skylighting (fromColor) + +-- This populates the global ids map with images already in the +-- template, so the ids won't be used by images introduced by the +-- user. +initialGlobalIds :: Archive -> Archive -> M.Map FilePath Int +initialGlobalIds refArchive distArchive = + let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive + mediaPaths = filter (isPrefixOf "ppt/media/image") archiveFiles + + go :: FilePath -> Maybe (FilePath, Int) + go fp = do + s <- stripPrefix "ppt/media/image" $ fst $ splitExtension fp + (n, _) <- listToMaybe $ reads s + return (fp, n) + in + M.fromList $ mapMaybe go mediaPaths + +getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) +getPresentationSize refArchive distArchive = do + entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` + findEntryByPath "ppt/presentation.xml" distArchive + presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + let ns = elemToNameSpaces presElement + sldSize <- findChild (elemName ns "p" "sldSz") presElement + cxS <- findAttr (QName "cx" Nothing Nothing) sldSize + cyS <- findAttr (QName "cy" Nothing Nothing) sldSize + (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) + (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + return (cx `div` 12700, cy `div` 12700) + +data WriterEnv = WriterEnv { envRefArchive :: Archive + , envDistArchive :: Archive + , envUTCTime :: UTCTime + , envOpts :: WriterOptions + , envPresentationSize :: (Integer, Integer) + , envSlideHasHeader :: Bool + , envInList :: Bool + , envInNoteSlide :: Bool + , envCurSlideId :: Int + -- the difference between the number at + -- the end of the slide file name and + -- the rId number + , envSlideIdOffset :: Int + , envContentType :: ContentType + , envSlideIdMap :: M.Map SlideId Int + -- maps the slide number to the + -- corresponding notes id number. If there + -- are no notes for a slide, there will be + -- no entry in the map for it. + , envSpeakerNotesIdMap :: M.Map Int Int + } + deriving (Show) + +instance Default WriterEnv where + def = WriterEnv { envRefArchive = emptyArchive + , envDistArchive = emptyArchive + , envUTCTime = posixSecondsToUTCTime 0 + , envOpts = def + , envPresentationSize = (720, 540) + , envSlideHasHeader = False + , envInList = False + , envInNoteSlide = False + , envCurSlideId = 1 + , envSlideIdOffset = 1 + , envContentType = NormalContent + , envSlideIdMap = mempty + , envSpeakerNotesIdMap = mempty + } + +data ContentType = NormalContent + | TwoColumnLeftContent + | TwoColumnRightContent + deriving (Show, Eq) + +data MediaInfo = MediaInfo { mInfoFilePath :: FilePath + , mInfoLocalId :: Int + , mInfoGlobalId :: Int + , mInfoMimeType :: Maybe MimeType + , mInfoExt :: Maybe String + , mInfoCaption :: Bool + } deriving (Show, Eq) + +data WriterState = WriterState { stLinkIds :: M.Map Int (M.Map Int LinkTarget) + -- (FP, Local ID, Global ID, Maybe Mime) + , stMediaIds :: M.Map Int [MediaInfo] + , stMediaGlobalIds :: M.Map FilePath Int + } deriving (Show, Eq) + +instance Default WriterState where + def = WriterState { stLinkIds = mempty + , stMediaIds = mempty + , stMediaGlobalIds = mempty + } + +type P m = ReaderT WriterEnv (StateT WriterState m) + +runP :: Monad m => WriterEnv -> WriterState -> P m a -> m a +runP env st p = evalStateT (runReaderT p env) st + +-------------------------------------------------------------------- + +copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive +copyFileToArchive arch fp = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + case findEntryByPath fp refArchive `mplus` findEntryByPath fp distArchive of + Nothing -> fail $ fp ++ " missing in reference file" + Just e -> return $ addEntryToArchive e arch + +alwaysInheritedPatterns :: [Pattern] +alwaysInheritedPatterns = + map compile [ "docProps/app.xml" + , "ppt/slideLayouts/slideLayout*.xml" + , "ppt/slideLayouts/_rels/slideLayout*.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/theme/theme1.xml" + , "ppt/theme/_rels/theme1.xml.rels" + , "ppt/presProps.xml" + , "ppt/viewProps.xml" + , "ppt/tableStyles.xml" + , "ppt/media/image*" + ] + +-- We only look for these under special conditions +contingentInheritedPatterns :: Presentation -> [Pattern] +contingentInheritedPatterns pres = [] ++ + if presHasSpeakerNotes pres + then map compile [ "ppt/notesMasters/notesMaster*.xml" + , "ppt/notesMasters/_rels/notesMaster*.xml.rels" + , "ppt/theme/theme2.xml" + , "ppt/theme/_rels/theme2.xml.rels" + ] + else [] + +inheritedPatterns :: Presentation -> [Pattern] +inheritedPatterns pres = + alwaysInheritedPatterns ++ contingentInheritedPatterns pres + +patternToFilePaths :: PandocMonad m => Pattern -> P m [FilePath] +patternToFilePaths pat = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + + let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive + return $ filter (match pat) archiveFiles + +patternsToFilePaths :: PandocMonad m => [Pattern] -> P m [FilePath] +patternsToFilePaths pats = concat <$> mapM patternToFilePaths pats + +-- Here are the files we'll require to make a Powerpoint document. If +-- any of these are missing, we should error out of our build. +requiredFiles :: [FilePath] +requiredFiles = [ "docProps/app.xml" + , "ppt/presProps.xml" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + , "ppt/tableStyles.xml" + ] + +presentationToArchiveP :: PandocMonad m => Presentation -> P m Archive +presentationToArchiveP p@(Presentation docProps slides) = do + filePaths <- patternsToFilePaths $ inheritedPatterns p + + -- make sure all required files are available: + let missingFiles = filter (\fp -> not (fp `elem` filePaths)) requiredFiles + unless (null missingFiles) + (throwError $ + PandocSomeError $ + "The following required files are missing:\n" ++ + (unlines $ map (" " ++) missingFiles) + ) + + newArch' <- foldM copyFileToArchive emptyArchive filePaths + -- we make a docProps/core.xml entry out of the presentation docprops + docPropsEntry <- docPropsToEntry docProps + -- we make this ourself in case there's something unexpected in the + -- one in the reference doc. + relsEntry <- topLevelRelsEntry + -- presentation entry and rels. We have to do the rels first to make + -- sure we know the correct offset for the rIds. + presEntry <- presentationToPresEntry p + presRelsEntry <- presentationToRelsEntry p + slideEntries <- mapM slideToEntry slides + slideRelEntries <- mapM slideToSlideRelEntry slides + spkNotesEntries <- catMaybes <$> mapM slideToSpeakerNotesEntry slides + spkNotesRelEntries <- catMaybes <$> mapM slideToSpeakerNotesRelEntry slides + -- These have to come after everything, because they need the info + -- built up in the state. + mediaEntries <- makeMediaEntries + contentTypesEntry <- presentationToContentTypes p >>= contentTypesToEntry + -- fold everything into our inherited archive and return it. + return $ foldr addEntryToArchive newArch' $ + slideEntries ++ + slideRelEntries ++ + spkNotesEntries ++ + spkNotesRelEntries ++ + mediaEntries ++ + [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry] + +makeSlideIdMap :: Presentation -> M.Map SlideId Int +makeSlideIdMap (Presentation _ slides) = + M.fromList $ (map slideId slides) `zip` [1..] + +makeSpeakerNotesMap :: Presentation -> M.Map Int Int +makeSpeakerNotesMap (Presentation _ slides) = + M.fromList $ (mapMaybe f $ slides `zip` [1..]) `zip` [1..] + where f (Slide _ _ Nothing, _) = Nothing + f (Slide _ _ (Just _), n) = Just n + +presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive +presentationToArchive opts pres = do + distArchive <- (toArchive . BL.fromStrict) <$> + P.readDefaultDataFile "reference.pptx" + refArchive <- case writerReferenceDoc opts of + Just f -> toArchive <$> P.readFileLazy f + Nothing -> (toArchive . BL.fromStrict) <$> + P.readDataFile "reference.pptx" + + utctime <- P.getCurrentTime + + presSize <- case getPresentationSize refArchive distArchive of + Just sz -> return sz + Nothing -> throwError $ + PandocSomeError $ + "Could not determine presentation size" + + let env = def { envRefArchive = refArchive + , envDistArchive = distArchive + , envUTCTime = utctime + , envOpts = opts + , envPresentationSize = presSize + , envSlideIdMap = makeSlideIdMap pres + , envSpeakerNotesIdMap = makeSpeakerNotesMap pres + } + + let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive + } + + runP env st $ presentationToArchiveP pres + + + +-------------------------------------------------- + +-- Check to see if the presentation has speaker notes. This will +-- influence whether we import the notesMaster template. +presHasSpeakerNotes :: Presentation -> Bool +presHasSpeakerNotes (Presentation _ slides) = any isJust $ map slideSpeakerNotes slides + +curSlideHasSpeakerNotes :: PandocMonad m => P m Bool +curSlideHasSpeakerNotes = do + sldId <- asks envCurSlideId + notesIdMap <- asks envSpeakerNotesIdMap + return $ isJust $ M.lookup sldId notesIdMap + +-------------------------------------------------- + +getLayout :: PandocMonad m => Layout -> P m Element +getLayout layout = do + let layoutpath = case layout of + (MetadataSlide _ _ _ _) -> "ppt/slideLayouts/slideLayout1.xml" + (TitleSlide _) -> "ppt/slideLayouts/slideLayout3.xml" + (ContentSlide _ _) -> "ppt/slideLayouts/slideLayout2.xml" + (TwoColumnSlide _ _ _) -> "ppt/slideLayouts/slideLayout4.xml" + distArchive <- asks envDistArchive + root <- case findEntryByPath layoutpath distArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just element -> return $ element + Nothing -> throwError $ + PandocSomeError $ + layoutpath ++ " corrupt in reference file" + Nothing -> throwError $ + PandocSomeError $ + layoutpath ++ " missing in reference file" + return root + +shapeHasId :: NameSpaces -> String -> Element -> Bool +shapeHasId ns ident element + | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element + , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = + nm == ident + | otherwise = False + +-- The content shape in slideLayout2 (Title/Content) has id=3 In +-- slideLayout4 (two column) the left column is id=3, and the right +-- column is id=4. +getContentShape :: PandocMonad m => NameSpaces -> Element -> P m Element +getContentShape ns spTreeElem + | isElem ns "p" "spTree" spTreeElem = do + contentType <- asks envContentType + let idx = case contentType of + NormalContent -> "1" + TwoColumnLeftContent -> "1" + TwoColumnRightContent -> "2" + case getShapeByPlaceHolderIndex ns spTreeElem idx of + Just e -> return e + Nothing -> throwError $ + PandocSomeError $ + "Could not find shape for Powerpoint content" +getContentShape _ _ = throwError $ + PandocSomeError $ + "Attempted to find content on non shapeTree" + +getShapeDimensions :: NameSpaces + -> Element + -> Maybe ((Integer, Integer), (Integer, Integer)) +getShapeDimensions ns element + | isElem ns "p" "sp" element = do + spPr <- findChild (elemName ns "p" "spPr") element + xfrm <- findChild (elemName ns "a" "xfrm") spPr + off <- findChild (elemName ns "a" "off") xfrm + xS <- findAttr (QName "x" Nothing Nothing) off + yS <- findAttr (QName "y" Nothing Nothing) off + ext <- findChild (elemName ns "a" "ext") xfrm + cxS <- findAttr (QName "cx" Nothing Nothing) ext + cyS <- findAttr (QName "cy" Nothing Nothing) ext + (x, _) <- listToMaybe $ reads xS + (y, _) <- listToMaybe $ reads yS + (cx, _) <- listToMaybe $ reads cxS + (cy, _) <- listToMaybe $ reads cyS + return $ ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) + | otherwise = Nothing + + +getMasterShapeDimensionsById :: String + -> Element + -> Maybe ((Integer, Integer), (Integer, Integer)) +getMasterShapeDimensionsById ident master = do + let ns = elemToNameSpaces master + cSld <- findChild (elemName ns "p" "cSld") master + spTree <- findChild (elemName ns "p" "spTree") cSld + sp <- filterChild (\e -> (isElem ns "p" "sp" e) && (shapeHasId ns ident e)) spTree + getShapeDimensions ns sp + +getContentShapeSize :: PandocMonad m + => NameSpaces + -> Element + -> Element + -> P m ((Integer, Integer), (Integer, Integer)) +getContentShapeSize ns layout master + | isElem ns "p" "sldLayout" layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + sp <- getContentShape ns spTree + case getShapeDimensions ns sp of + Just sz -> return sz + Nothing -> do let mbSz = + findChild (elemName ns "p" "nvSpPr") sp >>= + findChild (elemName ns "p" "cNvPr") >>= + findAttr (QName "id" Nothing Nothing) >>= + flip getMasterShapeDimensionsById master + case mbSz of + Just sz' -> return sz' + Nothing -> throwError $ + PandocSomeError $ + "Couldn't find necessary content shape size" +getContentShapeSize _ _ _ = throwError $ + PandocSomeError $ + "Attempted to find content shape size in non-layout" + +replaceNamedChildren :: NameSpaces + -> String + -> String + -> [Element] + -> Element + -> Element +replaceNamedChildren ns prefix name newKids element = + element { elContent = concat $ fun True $ elContent element } + where + fun :: Bool -> [Content] -> [[Content]] + fun _ [] = [] + fun switch ((Elem e) : conts) | isElem ns prefix name e = + if switch + then (map Elem $ newKids) : fun False conts + else fun False conts + fun switch (cont : conts) = [cont] : fun switch conts + +---------------------------------------------------------------- + +registerLink :: PandocMonad m => LinkTarget -> P m Int +registerLink link = do + curSlideId <- asks envCurSlideId + linkReg <- gets stLinkIds + mediaReg <- gets stMediaIds + hasSpeakerNotes <- curSlideHasSpeakerNotes + let maxLinkId = case M.lookup curSlideId linkReg of + Just mp -> case M.keys mp of + [] -> if hasSpeakerNotes then 2 else 1 + ks -> maximum ks + Nothing -> if hasSpeakerNotes then 2 else 1 + maxMediaId = case M.lookup curSlideId mediaReg of + Just [] -> if hasSpeakerNotes then 2 else 1 + Just mInfos -> maximum $ map mInfoLocalId mInfos + Nothing -> if hasSpeakerNotes then 2 else 1 + maxId = max maxLinkId maxMediaId + slideLinks = case M.lookup curSlideId linkReg of + Just mp -> M.insert (maxId + 1) link mp + Nothing -> M.singleton (maxId + 1) link + modify $ \st -> st{ stLinkIds = M.insert curSlideId slideLinks linkReg} + return $ maxId + 1 + +registerMedia :: PandocMonad m => FilePath -> [ParaElem] -> P m MediaInfo +registerMedia fp caption = do + curSlideId <- asks envCurSlideId + linkReg <- gets stLinkIds + mediaReg <- gets stMediaIds + globalIds <- gets stMediaGlobalIds + hasSpeakerNotes <- curSlideHasSpeakerNotes + let maxLinkId = case M.lookup curSlideId linkReg of + Just mp -> case M.keys mp of + [] -> if hasSpeakerNotes then 2 else 1 + ks -> maximum ks + Nothing -> if hasSpeakerNotes then 2 else 1 + maxMediaId = case M.lookup curSlideId mediaReg of + Just [] -> if hasSpeakerNotes then 2 else 1 + Just mInfos -> maximum $ map mInfoLocalId mInfos + Nothing -> if hasSpeakerNotes then 2 else 1 + maxLocalId = max maxLinkId maxMediaId + + maxGlobalId = case M.elems globalIds of + [] -> 0 + ids -> maximum ids + + (imgBytes, mbMt) <- P.fetchItem fp + let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ '.':x)) + <|> + case imageType imgBytes of + Just Png -> Just ".png" + Just Jpeg -> Just ".jpeg" + Just Gif -> Just ".gif" + Just Pdf -> Just ".pdf" + Just Eps -> Just ".eps" + Just Svg -> Just ".svg" + Just Emf -> Just ".emf" + Nothing -> Nothing + + let newGlobalId = case M.lookup fp globalIds of + Just ident -> ident + Nothing -> maxGlobalId + 1 + + let newGlobalIds = M.insert fp newGlobalId globalIds + + let mediaInfo = MediaInfo { mInfoFilePath = fp + , mInfoLocalId = maxLocalId + 1 + , mInfoGlobalId = newGlobalId + , mInfoMimeType = mbMt + , mInfoExt = imgExt + , mInfoCaption = (not . null) caption + } + + let slideMediaInfos = case M.lookup curSlideId mediaReg of + Just minfos -> mediaInfo : minfos + Nothing -> [mediaInfo] + + + modify $ \st -> st{ stMediaIds = M.insert curSlideId slideMediaInfos mediaReg + , stMediaGlobalIds = newGlobalIds + } + return mediaInfo + +makeMediaEntry :: PandocMonad m => MediaInfo -> P m Entry +makeMediaEntry mInfo = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime + (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + let ext = case mInfoExt mInfo of + Just e -> e + Nothing -> "" + let fp = "ppt/media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext + return $ toEntry fp epochtime $ BL.fromStrict imgBytes + +makeMediaEntries :: PandocMonad m => P m [Entry] +makeMediaEntries = do + mediaInfos <- gets stMediaIds + let allInfos = mconcat $ M.elems mediaInfos + mapM makeMediaEntry allInfos + +-- -- | Scales the image to fit the page +-- -- sizes are passed in emu +-- fitToPage' :: (Double, Double) -- image size in emu +-- -> Integer -- pageWidth +-- -> Integer -- pageHeight +-- -> (Integer, Integer) -- imagesize +-- fitToPage' (x, y) pageWidth pageHeight +-- -- Fixes width to the page width and scales the height +-- | x <= fromIntegral pageWidth && y <= fromIntegral pageHeight = +-- (floor x, floor y) +-- | x / fromIntegral pageWidth > y / fromIntegral pageWidth = +-- (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) +-- | otherwise = +-- (floor $ ((fromIntegral pageHeight) / y) * x, pageHeight) + +-- positionImage :: (Double, Double) -> Integer -> Integer -> (Integer, Integer) +-- positionImage (x, y) pageWidth pageHeight = +-- let (x', y') = fitToPage' (x, y) pageWidth pageHeight +-- in +-- ((pageWidth - x') `div` 2, (pageHeight - y') `div` 2) + +getMaster :: PandocMonad m => P m Element +getMaster = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml" + +-- We want to get the header dimensions, so we can make sure that the +-- image goes underneath it. We only use this in a content slide if it +-- has a header. + +-- getHeaderSize :: PandocMonad m => P m ((Integer, Integer), (Integer, Integer)) +-- getHeaderSize = do +-- master <- getMaster +-- let ns = elemToNameSpaces master +-- sps = [master] >>= +-- findChildren (elemName ns "p" "cSld") >>= +-- findChildren (elemName ns "p" "spTree") >>= +-- findChildren (elemName ns "p" "sp") +-- mbXfrm = +-- listToMaybe (filter (shapeHasName ns "Title Placeholder 1") sps) >>= +-- findChild (elemName ns "p" "spPr") >>= +-- findChild (elemName ns "a" "xfrm") +-- xoff = mbXfrm >>= +-- findChild (elemName ns "a" "off") >>= +-- findAttr (QName "x" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- yoff = mbXfrm >>= +-- findChild (elemName ns "a" "off") >>= +-- findAttr (QName "y" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- xext = mbXfrm >>= +-- findChild (elemName ns "a" "ext") >>= +-- findAttr (QName "cx" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- yext = mbXfrm >>= +-- findChild (elemName ns "a" "ext") >>= +-- findAttr (QName "cy" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- off = case xoff of +-- Just (xoff', _) | Just (yoff',_) <- yoff -> (xoff', yoff') +-- _ -> (1043490, 1027664) +-- ext = case xext of +-- Just (xext', _) | Just (yext',_) <- yext -> (xext', yext') +-- _ -> (7024744, 1143000) +-- return $ (off, ext) + +-- Hard-coded for now +-- captionPosition :: ((Integer, Integer), (Integer, Integer)) +-- captionPosition = ((457200, 6061972), (8229600, 527087)) + +captionHeight :: Integer +captionHeight = 40 + +createCaption :: PandocMonad m + => ((Integer, Integer), (Integer, Integer)) + -> [ParaElem] + -> P m Element +createCaption contentShapeDimensions paraElements = do + let para = Paragraph def{pPropAlign = Just AlgnCenter} paraElements + elements <- mapM paragraphToElement [para] + let ((x, y), (cx, cy)) = contentShapeDimensions + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + return $ + mknode "p:sp" [] [ mknode "p:nvSpPr" [] + [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () + , mknode "p:cNvSpPr" [("txBox", "1")] () + , mknode "p:nvPr" [] () + ] + , mknode "p:spPr" [] + [ mknode "a:xfrm" [] + [ mknode "a:off" [("x", show $ 12700 * x), + ("y", show $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", show $ 12700 * cx), + ("cy", show $ 12700 * captionHeight)] () + ] + , mknode "a:prstGeom" [("prst", "rect")] + [ mknode "a:avLst" [] () + ] + , mknode "a:noFill" [] () + ] + , txBody + ] + +makePicElements :: PandocMonad m + => Element + -> PicProps + -> MediaInfo + -> [ParaElem] + -> P m [Element] +makePicElements layout picProps mInfo alt = do + opts <- asks envOpts + (pageWidth, pageHeight) <- asks envPresentationSize + -- hasHeader <- asks envSlideHasHeader + let hasCaption = mInfoCaption mInfo + (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + let (pxX, pxY) = case imageSize opts imgBytes of + Right sz -> sizeInPixels $ sz + Left _ -> sizeInPixels $ def + master <- getMaster + let ns = elemToNameSpaces layout + ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master + `catchError` + (\_ -> return ((0, 0), (pageWidth, pageHeight))) + + let cy = if hasCaption then cytmp - captionHeight else cytmp + + let imgRatio = fromIntegral pxX / fromIntegral pxY :: Double + boxRatio = fromIntegral cx / fromIntegral cy :: Double + (dimX, dimY) = if imgRatio > boxRatio + then (fromIntegral cx, fromIntegral cx / imgRatio) + else (fromIntegral cy * imgRatio, fromIntegral cy) + + (dimX', dimY') = (round dimX * 12700, round dimY * 12700) :: (Integer, Integer) + (xoff, yoff) = (fromIntegral x + (fromIntegral cx - dimX) / 2, + fromIntegral y + (fromIntegral cy - dimY) / 2) + (xoff', yoff') = (round xoff * 12700, round yoff * 12700) :: (Integer, Integer) + + let cNvPicPr = mknode "p:cNvPicPr" [] $ + mknode "a:picLocks" [("noGrp","1") + ,("noChangeAspect","1")] () + -- cNvPr will contain the link information so we do that separately, + -- and register the link if necessary. + let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + cNvPr <- case picPropLink picProps of + Just link -> do idNum <- registerLink link + return $ mknode "p:cNvPr" cNvPrAttr $ + mknode "a:hlinkClick" [("r:id", "rId" ++ show idNum)] () + Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () + let nvPicPr = mknode "p:nvPicPr" [] + [ cNvPr + , cNvPicPr + , mknode "p:nvPr" [] ()] + let blipFill = mknode "p:blipFill" [] + [ mknode "a:blip" [("r:embed", "rId" ++ (show $ mInfoLocalId mInfo))] () + , mknode "a:stretch" [] $ + mknode "a:fillRect" [] () ] + let xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () + , mknode "a:ext" [("cx",show dimX') + ,("cy",show dimY')] () ] + let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + let ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + let spPr = mknode "p:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + + let picShape = mknode "p:pic" [] + [ nvPicPr + , blipFill + , spPr ] + + -- And now, maybe create the caption: + if hasCaption + then do cap <- createCaption ((x, y), (cx, cytmp)) alt + return [picShape, cap] + else return [picShape] + + +paraElemToElement :: PandocMonad m => ParaElem -> P m Element +paraElemToElement Break = return $ mknode "a:br" [] () +paraElemToElement (Run rpr s) = do + let sizeAttrs = case rPropForceSize rpr of + Just n -> [("sz", (show $ n * 100))] + Nothing -> if rPropCode rpr + -- hardcoded size for code for now + then [("sz", "1800")] + else [] + attrs = sizeAttrs ++ + (if rPropBold rpr then [("b", "1")] else []) ++ + (if rPropItalics rpr then [("i", "1")] else []) ++ + (if rPropUnderline rpr then [("u", "sng")] else []) ++ + (case rStrikethrough rpr of + Just NoStrike -> [("strike", "noStrike")] + Just SingleStrike -> [("strike", "sngStrike")] + Just DoubleStrike -> [("strike", "dblStrike")] + Nothing -> []) ++ + (case rBaseline rpr of + Just n -> [("baseline", show n)] + Nothing -> []) ++ + (case rCap rpr of + Just NoCapitals -> [("cap", "none")] + Just SmallCapitals -> [("cap", "small")] + Just AllCapitals -> [("cap", "all")] + Nothing -> []) ++ + [] + linkProps <- case rLink rpr of + Just link -> do + idNum <- registerLink link + -- first we have to make sure that if it's an + -- anchor, it's in the anchor map. If not, there's + -- no link. + return $ case link of + InternalTarget _ -> + let linkAttrs = + [ ("r:id", "rId" ++ show idNum) + , ("action", "ppaction://hlinksldjump") + ] + in [mknode "a:hlinkClick" linkAttrs ()] + -- external + ExternalTarget _ -> + let linkAttrs = + [ ("r:id", "rId" ++ show idNum) + ] + in [mknode "a:hlinkClick" linkAttrs ()] + Nothing -> return [] + let colorContents = case rSolidFill rpr of + Just color -> + case fromColor color of + '#':hx -> [mknode "a:solidFill" [] + [mknode "a:srgbClr" [("val", map toUpper hx)] ()] + ] + _ -> [] + Nothing -> [] + let codeContents = if rPropCode rpr + then [mknode "a:latin" [("typeface", "Courier")] ()] + else [] + let propContents = linkProps ++ colorContents ++ codeContents + return $ mknode "a:r" [] [ mknode "a:rPr" attrs $ propContents + , mknode "a:t" [] s + ] +paraElemToElement (MathElem mathType texStr) = do + res <- convertMath writeOMML mathType (unTeXString texStr) + case res of + Right r -> return $ mknode "a14:m" [] $ addMathInfo r + Left (Str s) -> paraElemToElement (Run def s) + Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" + +-- This is a bit of a kludge -- really requires adding an option to +-- TeXMath, but since that's a different package, we'll do this one +-- step at a time. +addMathInfo :: Element -> Element +addMathInfo element = + let mathspace = Attr { attrKey = (QName "m" Nothing (Just "xmlns")) + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } + in add_attr mathspace element + +-- We look through the element to see if it contains an a14:m +-- element. If so, we surround it. This is a bit ugly, but it seems +-- more dependable than looking through shapes for math. Plus this is +-- an xml implementation detail, so it seems to make sense to do it at +-- the xml level. +surroundWithMathAlternate :: Element -> Element +surroundWithMathAlternate element = + case findElement (QName "m" Nothing (Just "a14")) element of + Just _ -> + mknode "mc:AlternateContent" + [("xmlns:mc", "http://schemas.openxmlformats.org/markup-compatibility/2006") + ] [ mknode "mc:Choice" + [ ("xmlns:a14", "http://schemas.microsoft.com/office/drawing/2010/main") + , ("Requires", "a14")] [ element ] + ] + Nothing -> element + +paragraphToElement :: PandocMonad m => Paragraph -> P m Element +paragraphToElement par = do + let + attrs = [("lvl", show $ pPropLevel $ paraProps par)] ++ + (case pPropMarginLeft (paraProps par) of + Just px -> [("marL", show $ 12700 * px), ("indent", "0")] + Nothing -> [] + ) ++ + (case pPropAlign (paraProps par) of + Just AlgnLeft -> [("algn", "l")] + Just AlgnRight -> [("algn", "r")] + Just AlgnCenter -> [("algn", "ctr")] + Nothing -> [] + ) + props = [] ++ + (case pPropSpaceBefore $ paraProps par of + Just px -> [mknode "a:spcBef" [] [ + mknode "a:spcPts" [("val", show $ 100 * px)] () + ] + ] + Nothing -> [] + ) ++ + (case pPropBullet $ paraProps par of + Just Bullet -> [] + Just (AutoNumbering attrs') -> + [mknode "a:buAutoNum" [("type", autoNumberingToType attrs')] ()] + Nothing -> [mknode "a:buNone" [] ()] + ) + paras <- mapM paraElemToElement (paraElems par) + return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] ++ paras + +shapeToElement :: PandocMonad m => Element -> Shape -> P m Element +shapeToElement layout (TextBox paras) + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + sp <- getContentShape ns spTree + elements <- mapM paragraphToElement paras + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + emptySpPr = mknode "p:spPr" [] () + return $ + surroundWithMathAlternate $ + replaceNamedChildren ns "p" "txBody" [txBody] $ + replaceNamedChildren ns "p" "spPr" [emptySpPr] $ + sp +-- GraphicFrame and Pic should never reach this. +shapeToElement _ _ = return $ mknode "p:sp" [] () + +shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element] +shapeToElements layout (Pic picProps fp alt) = do + mInfo <- registerMedia fp alt + case mInfoExt mInfo of + Just _ -> do + makePicElements layout picProps mInfo alt + Nothing -> shapeToElements layout $ TextBox [Paragraph def alt] +shapeToElements layout (GraphicFrame tbls cptn) = + graphicFrameToElements layout tbls cptn +shapeToElements layout shp = do + element <- shapeToElement layout shp + return [element] + +shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element] +shapesToElements layout shps = do + concat <$> mapM (shapeToElements layout) shps + +graphicFrameToElements :: PandocMonad m => Element -> [Graphic] -> [ParaElem] -> P m [Element] +graphicFrameToElements layout tbls caption = do + -- get the sizing + master <- getMaster + (pageWidth, pageHeight) <- asks envPresentationSize + let ns = elemToNameSpaces layout + ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master + `catchError` + (\_ -> return ((0, 0), (pageWidth, pageHeight))) + + let cy = if (not $ null caption) then cytmp - captionHeight else cytmp + + elements <- mapM (graphicToElement cx) tbls + let graphicFrameElts = + mknode "p:graphicFrame" [] $ + [ mknode "p:nvGraphicFramePr" [] $ + [ mknode "p:cNvPr" [("id", "6"), ("name", "Content Placeholder 5")] () + , mknode "p:cNvGraphicFramePr" [] $ + [mknode "a:graphicFrameLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [mknode "p:ph" [("idx", "1")] ()] + ] + , mknode "p:xfrm" [] $ + [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () + , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + ] + ] ++ elements + + if (not $ null caption) + then do capElt <- createCaption ((x, y), (cx, cytmp)) caption + return [graphicFrameElts, capElt] + else return [graphicFrameElts] + +getDefaultTableStyle :: PandocMonad m => P m (Maybe String) +getDefaultTableStyle = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst + +graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element +graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do + let colWidths = if null hdrCells + then case rows of + r : _ | not (null r) -> replicate (length r) $ + (tableWidth `div` (toInteger $ length r)) + -- satisfy the compiler. This is the same as + -- saying that rows is empty, but the compiler + -- won't understand that `[]` exhausts the + -- alternatives. + _ -> [] + else replicate (length hdrCells) $ + (tableWidth `div` (toInteger $ length hdrCells)) + + let cellToOpenXML paras = + do elements <- mapM paragraphToElement paras + let elements' = if null elements + then [mknode "a:p" [] [mknode "a:endParaRPr" [] ()]] + else elements + return $ + [mknode "a:txBody" [] $ + ([ mknode "a:bodyPr" [] () + , mknode "a:lstStyle" [] ()] + ++ elements')] + headers' <- mapM cellToOpenXML hdrCells + rows' <- mapM (mapM cellToOpenXML) rows + let borderProps = mknode "a:tcPr" [] () + let emptyCell = [mknode "a:p" [] [mknode "a:pPr" [] ()]] + let mkcell border contents = mknode "a:tc" [] + $ (if null contents + then emptyCell + else contents) ++ [ borderProps | border ] + let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells + + let mkgridcol w = mknode "a:gridCol" + [("w", show ((12700 * w) :: Integer))] () + let hasHeader = not (all null hdrCells) + + mbDefTblStyle <- getDefaultTableStyle + let tblPrElt = mknode "a:tblPr" + [ ("firstRow", if tblPrFirstRow tblPr then "1" else "0") + , ("bandRow", if tblPrBandRow tblPr then "1" else "0") + ] (case mbDefTblStyle of + Nothing -> [] + Just sty -> [mknode "a:tableStyleId" [] sty]) + + return $ mknode "a:graphic" [] $ + [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] $ + [mknode "a:tbl" [] $ + [ tblPrElt + , mknode "a:tblGrid" [] (if all (==0) colWidths + then [] + else map mkgridcol colWidths) + ] + ++ [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' + ] + ] + +getShapeByPlaceHolderType :: NameSpaces -> Element -> String -> Maybe Element +getShapeByPlaceHolderType ns spTreeElem phType + | isElem ns "p" "spTree" spTreeElem = + let findPhType element = isElem ns "p" "sp" element && + Just phType == (Just element >>= + findChild (elemName ns "p" "nvSpPr") >>= + findChild (elemName ns "p" "nvPr") >>= + findChild (elemName ns "p" "ph") >>= + findAttr (QName "type" Nothing Nothing)) + in + filterChild findPhType spTreeElem + | otherwise = Nothing + +getShapeByPlaceHolderIndex :: NameSpaces -> Element -> String -> Maybe Element +getShapeByPlaceHolderIndex ns spTreeElem phIdx + | isElem ns "p" "spTree" spTreeElem = + let findPhType element = isElem ns "p" "sp" element && + Just phIdx == (Just element >>= + findChild (elemName ns "p" "nvSpPr") >>= + findChild (elemName ns "p" "nvPr") >>= + findChild (elemName ns "p" "ph") >>= + findAttr (QName "idx" Nothing Nothing)) + in + filterChild findPhType spTreeElem + | otherwise = Nothing + + +nonBodyTextToElement :: PandocMonad m => Element -> String -> [ParaElem] -> P m Element +nonBodyTextToElement layout phType paraElements + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld + , Just sp <- getShapeByPlaceHolderType ns spTree phType = do + let hdrPara = Paragraph def paraElements + element <- paragraphToElement hdrPara + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ + [element] + return $ replaceNamedChildren ns "p" "txBody" [txBody] sp + -- XXX: TODO + | otherwise = return $ mknode "p:sp" [] () + +contentToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> P m Element +contentToElement layout hdrShape shapes + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" hdrShape + let hdrShapeElements = if null hdrShape + then [] + else [element] + contentElements <- local + (\env -> env {envContentType = NormalContent}) + (shapesToElements layout shapes) + return $ + replaceNamedChildren ns "p" "sp" + (hdrShapeElements ++ contentElements) + spTree +contentToElement _ _ _ = return $ mknode "p:sp" [] () + +twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element +twoColumnToElement layout hdrShape shapesL shapesR + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" hdrShape + let hdrShapeElements = if null hdrShape + then [] + else [element] + contentElementsL <- local + (\env -> env {envContentType =TwoColumnLeftContent}) + (shapesToElements layout shapesL) + contentElementsR <- local + (\env -> env {envContentType =TwoColumnRightContent}) + (shapesToElements layout shapesR) + -- let contentElementsL' = map (setIdx ns "1") contentElementsL + -- contentElementsR' = map (setIdx ns "2") contentElementsR + return $ + replaceNamedChildren ns "p" "sp" + (hdrShapeElements ++ contentElementsL ++ contentElementsR) + spTree +twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () + + +titleToElement :: PandocMonad m => Element -> [ParaElem] -> P m Element +titleToElement layout titleElems + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" titleElems + let titleShapeElements = if null titleElems + then [] + else [element] + return $ replaceNamedChildren ns "p" "sp" titleShapeElements spTree +titleToElement _ _ = return $ mknode "p:sp" [] () + +metadataToElement :: PandocMonad m => Element -> [ParaElem] -> [ParaElem] -> [[ParaElem]] -> [ParaElem] -> P m Element +metadataToElement layout titleElems subtitleElems authorsElems dateElems + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + titleShapeElements <- if null titleElems + then return [] + else sequence [nonBodyTextToElement layout "ctrTitle" titleElems] + let combinedAuthorElems = intercalate [Break] authorsElems + subtitleAndAuthorElems = intercalate [Break, Break] [subtitleElems, combinedAuthorElems] + subtitleShapeElements <- if null subtitleAndAuthorElems + then return [] + else sequence [nonBodyTextToElement layout "subTitle" subtitleAndAuthorElems] + dateShapeElements <- if null dateElems + then return [] + else sequence [nonBodyTextToElement layout "dt" dateElems] + return $ replaceNamedChildren ns "p" "sp" + (titleShapeElements ++ subtitleShapeElements ++ dateShapeElements) + spTree +metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () + +slideToElement :: PandocMonad m => Slide -> P m Element +slideToElement (Slide _ l@(ContentSlide hdrElems shapes) _ )= do + layout <- getLayout l + spTree <- local (\env -> if null hdrElems + then env + else env{envSlideHasHeader=True}) $ + contentToElement layout hdrElems shapes + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(TwoColumnSlide hdrElems shapesL shapesR) _) = do + layout <- getLayout l + spTree <- local (\env -> if null hdrElems + then env + else env{envSlideHasHeader=True}) $ + twoColumnToElement layout hdrElems shapesL shapesR + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(TitleSlide hdrElems) _) = do + layout <- getLayout l + spTree <- titleToElement layout hdrElems + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(MetadataSlide titleElems subtitleElems authorElems dateElems) _) = do + layout <- getLayout l + spTree <- metadataToElement layout titleElems subtitleElems authorElems dateElems + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] + + +-------------------------------------------------------------------- +-- Notes: + +getNotesMaster :: PandocMonad m => P m Element +getNotesMaster = do + let notesMasterPath = "ppt/notesMasters/notesMaster1.xml" + distArchive <- asks envDistArchive + root <- case findEntryByPath notesMasterPath distArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just element -> return $ element + Nothing -> throwError $ + PandocSomeError $ + notesMasterPath ++ " corrupt in reference file" + Nothing -> throwError $ + PandocSomeError $ + notesMasterPath ++ " missing in reference file" + return root + +getSlideNumberFieldId :: PandocMonad m => Element -> P m String +getSlideNumberFieldId notesMaster + | ns <- elemToNameSpaces notesMaster + , Just cSld <- findChild (elemName ns "p" "cSld") notesMaster + , Just spTree <- findChild (elemName ns "p" "spTree") cSld + , Just sp <- getShapeByPlaceHolderType ns spTree "sldNum" + , Just txBody <- findChild (elemName ns "p" "txBody") sp + , Just p <- findChild (elemName ns "a" "p") txBody + , Just fld <- findChild (elemName ns "a" "fld") p + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = + return fldId + | otherwise = throwError $ + PandocSomeError $ + "No field id for slide numbers in notesMaster.xml" + +speakerNotesSlideImage :: Element +speakerNotesSlideImage = + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "2") + , ("name", "Slide Image Placeholder 1") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [ ("noGrp", "1") + , ("noRot", "1") + , ("noChangeAspect", "1") + ] () + ] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [("type", "sldImg")] ()] + ] + , mknode "p:spPr" [] () + ] + +-- we want to wipe links from the speaker notes in the +-- paragraphs. Powerpoint doesn't allow you to input them, and it +-- would provide extra complications. +removeParaLinks :: Paragraph -> Paragraph +removeParaLinks paragraph = paragraph{paraElems = map f (paraElems paragraph)} + where f (Run rProps s) = Run rProps{rLink=Nothing} s + f pe = pe + +-- put an empty paragraph between paragraphs for more expected spacing. +spaceParas :: [Paragraph] -> [Paragraph] +spaceParas = intersperse (Paragraph def []) + +speakerNotesBody :: PandocMonad m => [Paragraph] -> P m Element +speakerNotesBody paras = do + elements <- mapM paragraphToElement $ spaceParas $ map removeParaLinks paras + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + return $ + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "3") + , ("name", "Notes Placeholder 2") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [("type", "body"), ("idx", "1")] ()] + ] + , mknode "p:spPr" [] () + , txBody + ] + +speakerNotesSlideNumber :: Int -> String -> Element +speakerNotesSlideNumber pgNum fieldId = + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "4") + , ("name", "Slide Number Placeholder 3") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [ ("type", "sldNum") + , ("sz", "quarter") + , ("idx", "10") + ] () + ] + ] + , mknode "p:spPr" [] () + , mknode "p:txBody" [] $ + [ mknode "a:bodyPr" [] () + , mknode "a:lstStyle" [] () + , mknode "a:p" [] $ + [ mknode "a:fld" [ ("id", fieldId) + , ("type", "slidenum") + ] + [ mknode "a:rPr" [("lang", "en-US")] () + , mknode "a:t" [] (show pgNum) + ] + , mknode "a:endParaRPr" [("lang", "en-US")] () + ] + ] + ] + +slideToSpeakerNotesElement :: PandocMonad m => Slide -> P m (Maybe Element) +slideToSpeakerNotesElement slide + | Slide _ _ mbNotes <- slide + , Just (SpeakerNotes paras) <- mbNotes = do + master <- getNotesMaster + fieldId <- getSlideNumberFieldId master + num <- slideNum slide + let imgShape = speakerNotesSlideImage + sldNumShape = speakerNotesSlideNumber num fieldId + bodyShape <- speakerNotesBody paras + return $ Just $ + mknode "p:notes" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main") + , ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships") + , ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [ mknode "p:cSld" [] + [ mknode "p:spTree" [] + [ mknode "p:nvGrpSpPr" [] + [ mknode "p:cNvPr" [("id", "1"), ("name", "")] () + , mknode "p:cNvGrpSpPr" [] () + , mknode "p:nvPr" [] () + ] + , mknode "p:grpSpPr" [] + [ mknode "a:xfrm" [] + [ mknode "a:off" [("x", "0"), ("y", "0")] () + , mknode "a:ext" [("cx", "0"), ("cy", "0")] () + , mknode "a:chOff" [("x", "0"), ("y", "0")] () + , mknode "a:chExt" [("cx", "0"), ("cy", "0")] () + ] + ] + , imgShape + , bodyShape + , sldNumShape + ] + ] + ] +slideToSpeakerNotesElement _ = return Nothing + +----------------------------------------------------------------------- + +getSlideIdNum :: PandocMonad m => SlideId -> P m Int +getSlideIdNum sldId = do + slideIdMap <- asks envSlideIdMap + case M.lookup sldId slideIdMap of + Just n -> return n + Nothing -> throwError $ + PandocShouldNeverHappenError $ + "Slide Id " ++ (show sldId) ++ " not found." + +slideNum :: PandocMonad m => Slide -> P m Int +slideNum slide = getSlideIdNum $ slideId slide + +idNumToFilePath :: Int -> FilePath +idNumToFilePath idNum = "slide" ++ (show $ idNum) ++ ".xml" + +slideToFilePath :: PandocMonad m => Slide -> P m FilePath +slideToFilePath slide = do + idNum <- slideNum slide + return $ "slide" ++ (show $ idNum) ++ ".xml" + +slideToRelId :: PandocMonad m => Slide -> P m String +slideToRelId slide = do + n <- slideNum slide + offset <- asks envSlideIdOffset + return $ "rId" ++ (show $ n + offset) + + +data Relationship = Relationship { relId :: Int + , relType :: MimeType + , relTarget :: FilePath + } deriving (Show, Eq) + +elementToRel :: Element -> Maybe Relationship +elementToRel element + | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = + do rId <- findAttr (QName "Id" Nothing Nothing) element + numStr <- stripPrefix "rId" rId + num <- case reads numStr :: [(Int, String)] of + (n, _) : _ -> Just n + [] -> Nothing + type' <- findAttr (QName "Type" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship num type' target + | otherwise = Nothing + +slideToPresRel :: PandocMonad m => Slide -> P m Relationship +slideToPresRel slide = do + idNum <- slideNum slide + n <- asks envSlideIdOffset + let rId = idNum + n + fp = "slides/" ++ idNumToFilePath idNum + return $ Relationship { relId = rId + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" + , relTarget = fp + } + +getRels :: PandocMonad m => P m [Relationship] +getRels = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + relsElem <- parseXml refArchive distArchive "ppt/_rels/presentation.xml.rels" + let globalNS = "http://schemas.openxmlformats.org/package/2006/relationships" + let relElems = findChildren (QName "Relationship" (Just globalNS) Nothing) relsElem + return $ mapMaybe elementToRel relElems + +presentationToRels :: PandocMonad m => Presentation -> P m [Relationship] +presentationToRels pres@(Presentation _ slides) = do + mySlideRels <- mapM slideToPresRel slides + let notesMasterRels = + if presHasSpeakerNotes pres + then [Relationship { relId = length mySlideRels + 2 + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster" + , relTarget = "notesMasters/notesMaster1.xml" + }] + else [] + insertedRels = mySlideRels ++ notesMasterRels + rels <- getRels + -- we remove the slide rels and the notesmaster (if it's + -- there). We'll put these back in ourselves, if necessary. + let relsWeKeep = filter + (\r -> relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" && + relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") + rels + -- We want to make room for the slides in the id space. The slides + -- will start at Id2 (since Id1 is for the slide master). There are + -- two slides in the data file, but that might change in the future, + -- so we will do this: + -- + -- 1. We look to see what the minimum relWithoutSlide id (greater than 1) is. + -- 2. We add the difference between this and the number of slides to + -- all relWithoutSlide rels (unless they're 1) + -- 3. If we have a notesmaster slide, we make space for that as well. + + let minRelNotOne = case filter (1<) $ map relId relsWeKeep of + [] -> 0 -- doesn't matter in this case, since + -- there will be nothing to map the + -- function over + l -> minimum l + + modifyRelNum :: Int -> Int + modifyRelNum 1 = 1 + modifyRelNum n = n - minRelNotOne + 2 + length insertedRels + + relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep + + return $ insertedRels ++ relsWeKeep' + +-- We make this ourselves, in case there's a thumbnail in the one from +-- the template. +topLevelRels :: [Relationship] +topLevelRels = + [ Relationship { relId = 1 + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" + , relTarget = "ppt/presentation.xml" + } + , Relationship { relId = 2 + , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" + , relTarget = "docProps/core.xml" + } + , Relationship { relId = 3 + , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties" + , relTarget = "docProps/app.xml" + } + ] + +topLevelRelsEntry :: PandocMonad m => P m Entry +topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels + +relToElement :: Relationship -> Element +relToElement rel = mknode "Relationship" [ ("Id", "rId" ++ (show $ relId rel)) + , ("Type", relType rel) + , ("Target", relTarget rel) ] () + +relsToElement :: [Relationship] -> Element +relsToElement rels = mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + (map relToElement rels) + +presentationToRelsEntry :: PandocMonad m => Presentation -> P m Entry +presentationToRelsEntry pres = do + rels <- presentationToRels pres + elemToEntry "ppt/_rels/presentation.xml.rels" $ relsToElement rels + +elemToEntry :: PandocMonad m => FilePath -> Element -> P m Entry +elemToEntry fp element = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime + return $ toEntry fp epochtime $ renderXml element + +slideToEntry :: PandocMonad m => Slide -> P m Entry +slideToEntry slide = do + idNum <- slideNum slide + local (\env -> env{envCurSlideId = idNum}) $ do + element <- slideToElement slide + elemToEntry ("ppt/slides/" ++ idNumToFilePath idNum) element + +slideToSpeakerNotesEntry :: PandocMonad m => Slide -> P m (Maybe Entry) +slideToSpeakerNotesEntry slide = do + idNum <- slideNum slide + local (\env -> env{envCurSlideId = idNum}) $ do + mbElement <- slideToSpeakerNotesElement slide + mbNotesIdNum <- do mp <- asks envSpeakerNotesIdMap + return $ M.lookup idNum mp + case mbElement of + Just element | Just notesIdNum <- mbNotesIdNum -> + Just <$> + elemToEntry + ("ppt/notesSlides/notesSlide" ++ show notesIdNum ++ ".xml") + element + _ -> return Nothing + +slideToSpeakerNotesRelElement :: PandocMonad m => Slide -> P m (Maybe Element) +slideToSpeakerNotesRelElement slide + | Slide _ _ mbNotes <- slide + , Just _ <- mbNotes = do + idNum <- slideNum slide + return $ Just $ + mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + [ mknode "Relationship" [ ("Id", "rId2") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") + , ("Target", "../slides/slide" ++ show idNum ++ ".xml") + ] () + , mknode "Relationship" [ ("Id", "rId1") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") + , ("Target", "../notesMasters/notesMaster1.xml") + ] () + ] +slideToSpeakerNotesRelElement _ = return Nothing + +slideToSpeakerNotesRelEntry :: PandocMonad m => Slide -> P m (Maybe Entry) +slideToSpeakerNotesRelEntry slide = do + idNum <- slideNum slide + mbElement <- slideToSpeakerNotesRelElement slide + mp <- asks envSpeakerNotesIdMap + let mbNotesIdNum = M.lookup idNum mp + case mbElement of + Just element | Just notesIdNum <- mbNotesIdNum -> + Just <$> + elemToEntry + ("ppt/notesSlides/_rels/notesSlide" ++ show notesIdNum ++ ".xml.rels") + element + _ -> return Nothing + +slideToSlideRelEntry :: PandocMonad m => Slide -> P m Entry +slideToSlideRelEntry slide = do + idNum <- slideNum slide + element <- slideToSlideRelElement slide + elemToEntry ("ppt/slides/_rels/" ++ idNumToFilePath idNum ++ ".rels") element + +linkRelElement :: PandocMonad m => Int -> LinkTarget -> P m Element +linkRelElement rIdNum (InternalTarget targetId) = do + targetIdNum <- getSlideIdNum targetId + return $ + mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") + , ("Target", "slide" ++ show targetIdNum ++ ".xml") + ] () +linkRelElement rIdNum (ExternalTarget (url, _)) = do + return $ + mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") + , ("Target", url) + , ("TargetMode", "External") + ] () + +linkRelElements :: PandocMonad m => M.Map Int LinkTarget -> P m [Element] +linkRelElements mp = mapM (\(n, lnk) -> linkRelElement n lnk) (M.toList mp) + +mediaRelElement :: MediaInfo -> Element +mediaRelElement mInfo = + let ext = case mInfoExt mInfo of + Just e -> e + Nothing -> "" + in + mknode "Relationship" [ ("Id", "rId" ++ (show $ mInfoLocalId mInfo)) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") + , ("Target", "../media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext) + ] () + +speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) +speakerNotesSlideRelElement slide = do + idNum <- slideNum slide + mp <- asks envSpeakerNotesIdMap + return $ case M.lookup idNum mp of + Nothing -> Nothing + Just n -> + let target = "../notesSlides/notesSlide" ++ show n ++ ".xml" + in Just $ + mknode "Relationship" [ ("Id", "rId2") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") + , ("Target", target) + ] () + +slideToSlideRelElement :: PandocMonad m => Slide -> P m Element +slideToSlideRelElement slide = do + idNum <- slideNum slide + let target = case slide of + (Slide _ (MetadataSlide _ _ _ _) _) -> "../slideLayouts/slideLayout1.xml" + (Slide _ (TitleSlide _) _) -> "../slideLayouts/slideLayout3.xml" + (Slide _ (ContentSlide _ _) _) -> "../slideLayouts/slideLayout2.xml" + (Slide _ (TwoColumnSlide _ _ _) _) -> "../slideLayouts/slideLayout4.xml" + + speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide + + linkIds <- gets stLinkIds + mediaIds <- gets stMediaIds + + linkRels <- case M.lookup idNum linkIds of + Just mp -> linkRelElements mp + Nothing -> return [] + let mediaRels = case M.lookup idNum mediaIds of + Just mInfos -> map mediaRelElement mInfos + Nothing -> [] + + return $ + mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + ([mknode "Relationship" [ ("Id", "rId1") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout") + , ("Target", target)] () + ] ++ speakerNotesRels ++ linkRels ++ mediaRels) + +slideToSldIdElement :: PandocMonad m => Slide -> P m Element +slideToSldIdElement slide = do + n <- slideNum slide + let id' = show $ n + 255 + rId <- slideToRelId slide + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () + +presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element +presentationToSldIdLst (Presentation _ slides) = do + ids <- mapM slideToSldIdElement slides + return $ mknode "p:sldIdLst" [] ids + +presentationToPresentationElement :: PandocMonad m => Presentation -> P m Element +presentationToPresentationElement pres@(Presentation _ slds) = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + element <- parseXml refArchive distArchive "ppt/presentation.xml" + sldIdLst <- presentationToSldIdLst pres + + let modifySldIdLst :: Content -> Content + modifySldIdLst (Elem e) = case elName e of + (QName "sldIdLst" _ _) -> Elem sldIdLst + _ -> Elem e + modifySldIdLst ct = ct + + notesMasterRId = length slds + 2 + + notesMasterElem = mknode "p:notesMasterIdLst" [] + [ mknode + "p:NotesMasterId" + [("r:id", "rId" ++ show notesMasterRId)] + () + ] + + -- if there's a notesMasterIdLst in the presentation.xml file, + -- we want to remove it. We then want to put our own, if + -- necessary, after the slideMasterIdLst element. + + removeNotesMaster' :: Content -> [Content] + removeNotesMaster' (Elem e) = case elName e of + (QName "notesMasterIdLst" _ _) -> [] + _ -> [Elem e] + removeNotesMaster' ct = [ct] + + removeNotesMaster :: [Content] -> [Content] + removeNotesMaster = concatMap removeNotesMaster' + + insertNotesMaster' :: Content -> [Content] + insertNotesMaster' (Elem e) = case elName e of + (QName "sldMasterIdLst" _ _) -> [Elem e, Elem notesMasterElem] + _ -> [Elem e] + insertNotesMaster' ct = [ct] + + insertNotesMaster :: [Content] -> [Content] + insertNotesMaster = if presHasSpeakerNotes pres + then concatMap insertNotesMaster' + else id + + newContent = insertNotesMaster $ + removeNotesMaster $ + map modifySldIdLst $ + elContent element + + return $ element{elContent = newContent} + +presentationToPresEntry :: PandocMonad m => Presentation -> P m Entry +presentationToPresEntry pres = presentationToPresentationElement pres >>= + elemToEntry "ppt/presentation.xml" + +-- adapted from the Docx writer +docPropsElement :: PandocMonad m => DocProps -> P m Element +docPropsElement docProps = do + utctime <- asks envUTCTime + let keywords = case dcKeywords docProps of + Just xs -> intercalate "," xs + Nothing -> "" + return $ + mknode "cp:coreProperties" + [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") + ,("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:dcterms","http://purl.org/dc/terms/") + ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") + ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] + $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps) + : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps) + : (mknode "cp:keywords" [] keywords) + : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + +docPropsToEntry :: PandocMonad m => DocProps -> P m Entry +docPropsToEntry docProps = docPropsElement docProps >>= + elemToEntry "docProps/core.xml" + + +defaultContentTypeToElem :: DefaultContentType -> Element +defaultContentTypeToElem dct = + mknode "Default" + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] + () + +overrideContentTypeToElem :: OverrideContentType -> Element +overrideContentTypeToElem oct = + mknode "Override" + [("PartName", overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] + () + +contentTypesToElement :: ContentTypes -> Element +contentTypesToElement ct = + let ns = "http://schemas.openxmlformats.org/package/2006/content-types" + in + mknode "Types" [("xmlns", ns)] $ + (map defaultContentTypeToElem $ contentTypesDefaults ct) ++ + (map overrideContentTypeToElem $ contentTypesOverrides ct) + +data DefaultContentType = DefaultContentType + { defContentTypesExt :: String + , defContentTypesType:: MimeType + } + deriving (Show, Eq) + +data OverrideContentType = OverrideContentType + { overrideContentTypesPart :: FilePath + , overrideContentTypesType :: MimeType + } + deriving (Show, Eq) + +data ContentTypes = ContentTypes { contentTypesDefaults :: [DefaultContentType] + , contentTypesOverrides :: [OverrideContentType] + } + deriving (Show, Eq) + +contentTypesToEntry :: PandocMonad m => ContentTypes -> P m Entry +contentTypesToEntry ct = elemToEntry "[Content_Types].xml" $ contentTypesToElement ct + +pathToOverride :: FilePath -> Maybe OverrideContentType +pathToOverride fp = OverrideContentType ("/" ++ fp) <$> (getContentType fp) + +mediaFileContentType :: FilePath -> Maybe DefaultContentType +mediaFileContentType fp = case takeExtension fp of + '.' : ext -> Just $ + DefaultContentType { defContentTypesExt = ext + , defContentTypesType = + case getMimeType fp of + Just mt -> mt + Nothing -> "application/octet-stream" + } + _ -> Nothing + +mediaContentType :: MediaInfo -> Maybe DefaultContentType +mediaContentType mInfo + | Just ('.' : ext) <- mInfoExt mInfo = + Just $ DefaultContentType { defContentTypesExt = ext + , defContentTypesType = + case mInfoMimeType mInfo of + Just mt -> mt + Nothing -> "application/octet-stream" + } + | otherwise = Nothing + +getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] +getSpeakerNotesFilePaths = do + mp <- asks envSpeakerNotesIdMap + let notesIdNums = M.elems mp + return $ map (\n -> "ppt/notesSlides/notesSlide" ++ show n ++ ".xml") notesIdNums + +presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes +presentationToContentTypes p@(Presentation _ slides) = do + mediaInfos <- (mconcat . M.elems) <$> gets stMediaIds + filePaths <- patternsToFilePaths $ inheritedPatterns p + let mediaFps = filter (match (compile "ppt/media/image*")) filePaths + let defaults = [ DefaultContentType "xml" "application/xml" + , DefaultContentType "rels" "application/vnd.openxmlformats-package.relationships+xml" + ] + mediaDefaults = nub $ + (mapMaybe mediaContentType $ mediaInfos) ++ + (mapMaybe mediaFileContentType $ mediaFps) + + inheritedOverrides = mapMaybe pathToOverride filePaths + docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"] + presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"] + relativePaths <- mapM slideToFilePath slides + let slideOverrides = mapMaybe + (\fp -> pathToOverride $ "ppt/slides/" ++ fp) + relativePaths + speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths + return $ ContentTypes + (defaults ++ mediaDefaults) + (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides) + +presML :: String +presML = "application/vnd.openxmlformats-officedocument.presentationml" + +noPresML :: String +noPresML = "application/vnd.openxmlformats-officedocument" + +getContentType :: FilePath -> Maybe MimeType +getContentType fp + | fp == "ppt/presentation.xml" = Just $ presML ++ ".presentation.main+xml" + | fp == "ppt/presProps.xml" = Just $ presML ++ ".presProps+xml" + | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml" + | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml" + | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" + | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml" + | "ppt" : "slideMasters" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".slideMaster+xml" + | "ppt" : "slides" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".slide+xml" + | "ppt" : "notesMasters" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".notesMaster+xml" + | "ppt" : "notesSlides" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".notesSlide+xml" + | "ppt" : "theme" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ noPresML ++ ".theme+xml" + | "ppt" : "slideLayouts" : _ : [] <- splitDirectories fp= + Just $ presML ++ ".slideLayout+xml" + | otherwise = Nothing + +autoNumberingToType :: ListAttributes -> String +autoNumberingToType (_, numStyle, numDelim) = + typeString ++ delimString + where + typeString = case numStyle of + Decimal -> "arabic" + UpperAlpha -> "alphaUc" + LowerAlpha -> "alphaLc" + UpperRoman -> "romanUc" + LowerRoman -> "romanLc" + _ -> "arabic" + delimString = case numDelim of + Period -> "Period" + OneParen -> "ParenR" + TwoParens -> "ParenBoth" + _ -> "Period" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs new file mode 100644 index 000000000..396469edd --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -0,0 +1,987 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint.Presentation + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Definition of Presentation datatype, modeling a MS Powerpoint (pptx) +document, and functions for converting a Pandoc document to +Presentation. +-} + +module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation + , Presentation(..) + , DocProps(..) + , Slide(..) + , Layout(..) + , SpeakerNotes(..) + , SlideId(..) + , Shape(..) + , Graphic(..) + , BulletType(..) + , Algnment(..) + , Paragraph(..) + , ParaElem(..) + , ParaProps(..) + , RunProps(..) + , TableProps(..) + , Strikethrough(..) + , Capitals(..) + , PicProps(..) + , URL + , TeXString(..) + , LinkTarget(..) + ) where + + +import Control.Monad.Reader +import Control.Monad.State +import Data.List (intercalate) +import Data.Default +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Slides (getSlideLevel) +import Text.Pandoc.Options +import Text.Pandoc.Logging +import Text.Pandoc.Walk +import Text.Pandoc.Compat.Time (UTCTime) +import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" +import Text.Pandoc.Writers.Shared (metaValueToInlines) +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Maybe (maybeToList, fromMaybe) +import Text.Pandoc.Highlighting +import qualified Data.Text as T +import Control.Applicative ((<|>)) +import Skylighting + +data WriterEnv = WriterEnv { envMetadata :: Meta + , envRunProps :: RunProps + , envParaProps :: ParaProps + , envSlideLevel :: Int + , envOpts :: WriterOptions + , envSlideHasHeader :: Bool + , envInList :: Bool + , envInNoteSlide :: Bool + , envCurSlideId :: SlideId + , envInSpeakerNotes :: Bool + } + deriving (Show) + +instance Default WriterEnv where + def = WriterEnv { envMetadata = mempty + , envRunProps = def + , envParaProps = def + , envSlideLevel = 2 + , envOpts = def + , envSlideHasHeader = False + , envInList = False + , envInNoteSlide = False + , envCurSlideId = SlideId "Default" + , envInSpeakerNotes = False + } + + +data WriterState = WriterState { stNoteIds :: M.Map Int [Block] + -- associate anchors with slide id + , stAnchorMap :: M.Map String SlideId + , stSlideIdSet :: S.Set SlideId + , stLog :: [LogMessage] + , stSpeakerNotesMap :: M.Map SlideId [[Paragraph]] + } deriving (Show, Eq) + +instance Default WriterState where + def = WriterState { stNoteIds = mempty + , stAnchorMap = mempty + -- we reserve this s + , stSlideIdSet = reservedSlideIds + , stLog = [] + , stSpeakerNotesMap = mempty + } + +metadataSlideId :: SlideId +metadataSlideId = SlideId "Metadata" + +tocSlideId :: SlideId +tocSlideId = SlideId "TOC" + +endNotesSlideId :: SlideId +endNotesSlideId = SlideId "EndNotes" + +reservedSlideIds :: S.Set SlideId +reservedSlideIds = S.fromList [ metadataSlideId + , tocSlideId + , endNotesSlideId + ] + +uniqueSlideId' :: Integer -> S.Set SlideId -> String -> SlideId +uniqueSlideId' n idSet s = + let s' = if n == 0 then s else s ++ "-" ++ show n + in if SlideId s' `S.member` idSet + then uniqueSlideId' (n+1) idSet s + else SlideId s' + +uniqueSlideId :: S.Set SlideId -> String -> SlideId +uniqueSlideId = uniqueSlideId' 0 + +runUniqueSlideId :: String -> Pres SlideId +runUniqueSlideId s = do + idSet <- gets stSlideIdSet + let sldId = uniqueSlideId idSet s + modify $ \st -> st{stSlideIdSet = S.insert sldId idSet} + return sldId + +addLogMessage :: LogMessage -> Pres () +addLogMessage msg = modify $ \st -> st{stLog = msg : stLog st} + +type Pres = ReaderT WriterEnv (State WriterState) + +runPres :: WriterEnv -> WriterState -> Pres a -> (a, [LogMessage]) +runPres env st p = (pres, reverse $ stLog finalSt) + where (pres, finalSt) = runState (runReaderT p env) st + +-- GHC 7.8 will still complain about concat <$> mapM unless we specify +-- Functor. We can get rid of this when we stop supporting GHC 7.8. +concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +concatMapM f xs = liftM concat (mapM f xs) + +type Pixels = Integer + +data Presentation = Presentation DocProps [Slide] + deriving (Show) + +data DocProps = DocProps { dcTitle :: Maybe String + , dcSubject :: Maybe String + , dcCreator :: Maybe String + , dcKeywords :: Maybe [String] + , dcCreated :: Maybe UTCTime + } deriving (Show, Eq) + + +data Slide = Slide { slideId :: SlideId + , slideLayout :: Layout + , slideSpeakerNotes :: Maybe SpeakerNotes + } deriving (Show, Eq) + +newtype SlideId = SlideId String + deriving (Show, Eq, Ord) + +-- In theory you could have anything on a notes slide but it seems +-- designed mainly for one textbox, so we'll just put in the contents +-- of that textbox, to avoid other shapes that won't work as well. +newtype SpeakerNotes = SpeakerNotes {fromSpeakerNotes :: [Paragraph]} + deriving (Show, Eq) + +data Layout = MetadataSlide { metadataSlideTitle :: [ParaElem] + , metadataSlideSubtitle :: [ParaElem] + , metadataSlideAuthors :: [[ParaElem]] + , metadataSlideDate :: [ParaElem] + } + | TitleSlide { titleSlideHeader :: [ParaElem]} + | ContentSlide { contentSlideHeader :: [ParaElem] + , contentSlideContent :: [Shape] + } + | TwoColumnSlide { twoColumnSlideHeader :: [ParaElem] + , twoColumnSlideLeft :: [Shape] + , twoColumnSlideRight :: [Shape] + } + deriving (Show, Eq) + +data Shape = Pic PicProps FilePath [ParaElem] + | GraphicFrame [Graphic] [ParaElem] + | TextBox [Paragraph] + deriving (Show, Eq) + +type Cell = [Paragraph] + +data TableProps = TableProps { tblPrFirstRow :: Bool + , tblPrBandRow :: Bool + } deriving (Show, Eq) + +data Graphic = Tbl TableProps [Cell] [[Cell]] + deriving (Show, Eq) + + +data Paragraph = Paragraph { paraProps :: ParaProps + , paraElems :: [ParaElem] + } deriving (Show, Eq) + +data BulletType = Bullet + | AutoNumbering ListAttributes + deriving (Show, Eq) + +data Algnment = AlgnLeft | AlgnRight | AlgnCenter + deriving (Show, Eq) + +data ParaProps = ParaProps { pPropMarginLeft :: Maybe Pixels + , pPropMarginRight :: Maybe Pixels + , pPropLevel :: Int + , pPropBullet :: Maybe BulletType + , pPropAlign :: Maybe Algnment + , pPropSpaceBefore :: Maybe Pixels + } deriving (Show, Eq) + +instance Default ParaProps where + def = ParaProps { pPropMarginLeft = Just 0 + , pPropMarginRight = Just 0 + , pPropLevel = 0 + , pPropBullet = Nothing + , pPropAlign = Nothing + , pPropSpaceBefore = Nothing + } + +newtype TeXString = TeXString {unTeXString :: String} + deriving (Eq, Show) + +data ParaElem = Break + | Run RunProps String + -- It would be more elegant to have native TeXMath + -- Expressions here, but this allows us to use + -- `convertmath` from T.P.Writers.Math. Will perhaps + -- revisit in the future. + | MathElem MathType TeXString + deriving (Show, Eq) + +data Strikethrough = NoStrike | SingleStrike | DoubleStrike + deriving (Show, Eq) + +data Capitals = NoCapitals | SmallCapitals | AllCapitals + deriving (Show, Eq) + +type URL = String + +data LinkTarget = ExternalTarget (URL, String) + | InternalTarget SlideId + deriving (Show, Eq) + +data RunProps = RunProps { rPropBold :: Bool + , rPropItalics :: Bool + , rStrikethrough :: Maybe Strikethrough + , rBaseline :: Maybe Int + , rCap :: Maybe Capitals + , rLink :: Maybe LinkTarget + , rPropCode :: Bool + , rPropBlockQuote :: Bool + , rPropForceSize :: Maybe Pixels + , rSolidFill :: Maybe Color + -- TODO: Make a full underline data type with + -- the different options. + , rPropUnderline :: Bool + } deriving (Show, Eq) + +instance Default RunProps where + def = RunProps { rPropBold = False + , rPropItalics = False + , rStrikethrough = Nothing + , rBaseline = Nothing + , rCap = Nothing + , rLink = Nothing + , rPropCode = False + , rPropBlockQuote = False + , rPropForceSize = Nothing + , rSolidFill = Nothing + , rPropUnderline = False + } + +data PicProps = PicProps { picPropLink :: Maybe LinkTarget + , picWidth :: Maybe Dimension + , picHeight :: Maybe Dimension + } deriving (Show, Eq) + +instance Default PicProps where + def = PicProps { picPropLink = Nothing + , picWidth = Nothing + , picHeight = Nothing + } + +-------------------------------------------------- + +inlinesToParElems :: [Inline] -> Pres [ParaElem] +inlinesToParElems ils = concatMapM inlineToParElems ils + +inlineToParElems :: Inline -> Pres [ParaElem] +inlineToParElems (Str s) = do + pr <- asks envRunProps + return [Run pr s] +inlineToParElems (Emph ils) = + local (\r -> r{envRunProps = (envRunProps r){rPropItalics=True}}) $ + inlinesToParElems ils +inlineToParElems (Strong ils) = + local (\r -> r{envRunProps = (envRunProps r){rPropBold=True}}) $ + inlinesToParElems ils +inlineToParElems (Strikeout ils) = + local (\r -> r{envRunProps = (envRunProps r){rStrikethrough=Just SingleStrike}}) $ + inlinesToParElems ils +inlineToParElems (Superscript ils) = + local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just 30000}}) $ + inlinesToParElems ils +inlineToParElems (Subscript ils) = + local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just (-25000)}}) $ + inlinesToParElems ils +inlineToParElems (SmallCaps ils) = + local (\r -> r{envRunProps = (envRunProps r){rCap = Just SmallCapitals}}) $ + inlinesToParElems ils +inlineToParElems Space = inlineToParElems (Str " ") +inlineToParElems SoftBreak = inlineToParElems (Str " ") +inlineToParElems LineBreak = return [Break] +inlineToParElems (Link _ ils (url, title)) = + local (\r ->r{envRunProps = (envRunProps r){rLink = Just $ ExternalTarget (url, title)}}) $ + inlinesToParElems ils +inlineToParElems (Code _ str) = + local (\r ->r{envRunProps = (envRunProps r){rPropCode = True}}) $ + inlineToParElems $ Str str +inlineToParElems (Math mathtype str) = + return [MathElem mathtype (TeXString str)] +-- We ignore notes if we're in a speaker notes div. Otherwise this +-- would add an entry to the endnotes slide, which would put speaker +-- notes in the public presentation. In the future, we can entertain a +-- way of adding a speakernotes-specific note that would just add +-- paragraphs to the bottom of the notes page. +inlineToParElems (Note blks) = do + inSpNotes <- asks envInSpeakerNotes + if inSpNotes + then return [] + else do + notes <- gets stNoteIds + let maxNoteId = case M.keys notes of + [] -> 0 + lst -> maximum lst + curNoteId = maxNoteId + 1 + modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } + local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ + inlineToParElems $ Superscript [Str $ show curNoteId] +inlineToParElems (Span _ ils) = concatMapM inlineToParElems ils +inlineToParElems (RawInline _ _) = return [] +inlineToParElems _ = return [] + +isListType :: Block -> Bool +isListType (OrderedList _ _) = True +isListType (BulletList _) = True +isListType (DefinitionList _) = True +isListType _ = False + +registerAnchorId :: String -> Pres () +registerAnchorId anchor = do + anchorMap <- gets stAnchorMap + sldId <- asks envCurSlideId + unless (null anchor) $ + modify $ \st -> st {stAnchorMap = M.insert anchor sldId anchorMap} + +-- Currently hardcoded, until I figure out how to make it dynamic. +blockQuoteSize :: Pixels +blockQuoteSize = 20 + +noteSize :: Pixels +noteSize = 18 + +blockToParagraphs :: Block -> Pres [Paragraph] +blockToParagraphs (Plain ils) = do + parElems <- inlinesToParElems ils + pProps <- asks envParaProps + return [Paragraph pProps parElems] +blockToParagraphs (Para ils) = do + parElems <- inlinesToParElems ils + pProps <- asks envParaProps + return [Paragraph pProps parElems] +blockToParagraphs (LineBlock ilsList) = do + parElems <- inlinesToParElems $ intercalate [LineBreak] ilsList + pProps <- asks envParaProps + return [Paragraph pProps parElems] +-- TODO: work out the attributes +blockToParagraphs (CodeBlock attr str) = + local (\r -> r{ envParaProps = def{pPropMarginLeft = Just 100} + , envRunProps = (envRunProps r){rPropCode = True}}) $ do + mbSty <- writerHighlightStyle <$> asks envOpts + synMap <- writerSyntaxMap <$> asks envOpts + case mbSty of + Just sty -> + case highlight synMap (formatSourceLines sty) attr str of + Right pElems -> do pProps <- asks envParaProps + return [Paragraph pProps pElems] + Left _ -> blockToParagraphs $ Para [Str str] + Nothing -> blockToParagraphs $ Para [Str str] +-- We can't yet do incremental lists, but we should render a +-- (BlockQuote List) as a list to maintain compatibility with other +-- formats. +blockToParagraphs (BlockQuote (blk : blks)) | isListType blk = do + ps <- blockToParagraphs blk + ps' <- blockToParagraphs $ BlockQuote blks + return $ ps ++ ps' +blockToParagraphs (BlockQuote blks) = + local (\r -> r{ envParaProps = (envParaProps r){pPropMarginLeft = Just 100} + , envRunProps = (envRunProps r){rPropForceSize = Just blockQuoteSize}})$ + concatMapM blockToParagraphs blks +-- TODO: work out the format +blockToParagraphs (RawBlock _ _) = return [] +blockToParagraphs (Header _ (ident, _, _) ils) = do + -- Note that this function only deals with content blocks, so it + -- will only touch headers that are above the current slide level -- + -- slides at or below the slidelevel will be taken care of by + -- `blocksToSlide'`. We have the register anchors in both of them. + registerAnchorId ident + -- we set the subeader to bold + parElems <- local (\e->e{envRunProps = (envRunProps e){rPropBold=True}}) $ + inlinesToParElems ils + -- and give it a bit of space before it. + return [Paragraph def{pPropSpaceBefore = Just 30} parElems] +blockToParagraphs (BulletList blksLst) = do + pProps <- asks envParaProps + let lvl = pPropLevel pProps + local (\env -> env{ envInList = True + , envParaProps = pProps{ pPropLevel = lvl + 1 + , pPropBullet = Just Bullet + , pPropMarginLeft = Nothing + }}) $ + concatMapM multiParBullet blksLst +blockToParagraphs (OrderedList listAttr blksLst) = do + pProps <- asks envParaProps + let lvl = pPropLevel pProps + local (\env -> env{ envInList = True + , envParaProps = pProps{ pPropLevel = lvl + 1 + , pPropBullet = Just (AutoNumbering listAttr) + , pPropMarginLeft = Nothing + }}) $ + concatMapM multiParBullet blksLst +blockToParagraphs (DefinitionList entries) = do + let go :: ([Inline], [[Block]]) -> Pres [Paragraph] + go (ils, blksLst) = do + term <-blockToParagraphs $ Para [Strong ils] + -- For now, we'll treat each definition term as a + -- blockquote. We can extend this further later. + definition <- concatMapM (blockToParagraphs . BlockQuote) blksLst + return $ term ++ definition + concatMapM go entries +blockToParagraphs (Div (_, "notes" : [], _) blks) = + local (\env -> env{envInSpeakerNotes=True}) $ do + sldId <- asks envCurSlideId + spkNotesMap <- gets stSpeakerNotesMap + paras <- concatMapM blockToParagraphs blks + let spkNotesMap' = case M.lookup sldId spkNotesMap of + Just lst -> M.insert sldId (paras : lst) spkNotesMap + Nothing -> M.insert sldId [paras] spkNotesMap + modify $ \st -> st{stSpeakerNotesMap = spkNotesMap'} + return [] +blockToParagraphs (Div _ blks) = concatMapM blockToParagraphs blks +blockToParagraphs blk = do + addLogMessage $ BlockNotRendered blk + return [] + +-- Make sure the bullet env gets turned off after the first para. +multiParBullet :: [Block] -> Pres [Paragraph] +multiParBullet [] = return [] +multiParBullet (b:bs) = do + pProps <- asks envParaProps + p <- blockToParagraphs b + ps <- local (\env -> env{envParaProps = pProps{pPropBullet = Nothing}}) $ + concatMapM blockToParagraphs bs + return $ p ++ ps + +cellToParagraphs :: Alignment -> TableCell -> Pres [Paragraph] +cellToParagraphs algn tblCell = do + paras <- mapM blockToParagraphs tblCell + let alignment = case algn of + AlignLeft -> Just AlgnLeft + AlignRight -> Just AlgnRight + AlignCenter -> Just AlgnCenter + AlignDefault -> Nothing + paras' = map (map (\p -> p{paraProps = (paraProps p){pPropAlign = alignment}})) paras + return $ concat paras' + +rowToParagraphs :: [Alignment] -> [TableCell] -> Pres [[Paragraph]] +rowToParagraphs algns tblCells = do + -- We have to make sure we have the right number of alignments + let pairs = zip (algns ++ repeat AlignDefault) tblCells + mapM (uncurry cellToParagraphs) pairs + +withAttr :: Attr -> Shape -> Shape +withAttr attr (Pic picPr url caption) = + let picPr' = picPr { picWidth = dimension Width attr + , picHeight = dimension Height attr + } + in + Pic picPr' url caption +withAttr _ sp = sp + +blockToShape :: Block -> Pres Shape +blockToShape (Plain (il:_)) | Image attr ils (url, _) <- il = + (withAttr attr . Pic def url) <$> inlinesToParElems ils +blockToShape (Para (il:_)) | Image attr ils (url, _) <- il = + (withAttr attr . Pic def url) <$> inlinesToParElems ils +blockToShape (Plain (il:_)) | Link _ (il':_) target <- il + , Image attr ils (url, _) <- il' = + (withAttr attr . Pic def {picPropLink = Just $ ExternalTarget target} url) <$> + inlinesToParElems ils +blockToShape (Para (il:_)) | Link _ (il':_) target <- il + , Image attr ils (url, _) <- il' = + (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} url) <$> + inlinesToParElems ils +blockToShape (Table caption algn _ hdrCells rows) = do + caption' <- inlinesToParElems caption + hdrCells' <- rowToParagraphs algn hdrCells + rows' <- mapM (rowToParagraphs algn) rows + let tblPr = if null hdrCells + then TableProps { tblPrFirstRow = False + , tblPrBandRow = True + } + else TableProps { tblPrFirstRow = True + , tblPrBandRow = True + } + + return $ GraphicFrame [Tbl tblPr hdrCells' rows'] caption' +blockToShape blk = do paras <- blockToParagraphs blk + let paras' = map (\par -> par{paraElems = combineParaElems $ paraElems par}) paras + return $ TextBox paras' + +combineShapes :: [Shape] -> [Shape] +combineShapes [] = [] +combineShapes[s] = [s] +combineShapes (pic@(Pic{}) : ss) = pic : combineShapes ss +combineShapes (TextBox [] : ss) = combineShapes ss +combineShapes (s : TextBox [] : ss) = combineShapes (s : ss) +combineShapes (TextBox (p:ps) : TextBox (p':ps') : ss) = + combineShapes $ TextBox ((p:ps) ++ (p':ps')) : ss +combineShapes (s:ss) = s : combineShapes ss + +blocksToShapes :: [Block] -> Pres [Shape] +blocksToShapes blks = combineShapes <$> mapM blockToShape blks + +isImage :: Inline -> Bool +isImage (Image{}) = True +isImage (Link _ (Image _ _ _ : _) _) = True +isImage _ = False + +splitBlocks' :: [Block] -> [[Block]] -> [Block] -> Pres [[Block]] +splitBlocks' cur acc [] = return $ acc ++ (if null cur then [] else [cur]) +splitBlocks' cur acc (HorizontalRule : blks) = + splitBlocks' [] (acc ++ (if null cur then [] else [cur])) blks +splitBlocks' cur acc (h@(Header n _ _) : blks) = do + slideLevel <- asks envSlideLevel + case compare n slideLevel of + LT -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[h]]) blks + EQ -> splitBlocks' [h] (acc ++ (if null cur then [] else [cur])) blks + GT -> splitBlocks' (cur ++ [h]) acc blks +-- `blockToParagraphs` treats Plain and Para the same, so we can save +-- some code duplication by treating them the same here. +splitBlocks' cur acc (Plain ils : blks) = splitBlocks' cur acc (Para ils : blks) +splitBlocks' cur acc (Para (il:ils) : blks) | isImage il = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] + (acc ++ [cur ++ [Para [il]]]) + (if null ils then blks else Para ils : blks) + _ -> splitBlocks' [] + (acc ++ (if null cur then [] else [cur]) ++ [[Para [il]]]) + (if null ils then blks else Para ils : blks) +splitBlocks' cur acc (tbl@(Table{}) : blks) = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] (acc ++ [cur ++ [tbl]]) blks + _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[tbl]]) blks +splitBlocks' cur acc (d@(Div (_, classes, _) _): blks) | "columns" `elem` classes = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] (acc ++ [cur ++ [d]]) blks + _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[d]]) blks +splitBlocks' cur acc (blk : blks) = splitBlocks' (cur ++ [blk]) acc blks + +splitBlocks :: [Block] -> Pres [[Block]] +splitBlocks = splitBlocks' [] [] + +getSpeakerNotes :: Pres (Maybe SpeakerNotes) +getSpeakerNotes = do + sldId <- asks envCurSlideId + spkNtsMap <- gets stSpeakerNotesMap + return $ (SpeakerNotes . concat . reverse) <$> (M.lookup sldId spkNtsMap) + +blocksToSlide' :: Int -> [Block] -> Pres Slide +blocksToSlide' lvl (Header n (ident, _, _) ils : blks) + | n < lvl = do + registerAnchorId ident + sldId <- asks envCurSlideId + hdr <- inlinesToParElems ils + return $ Slide sldId TitleSlide {titleSlideHeader = hdr} Nothing + | n == lvl = do + registerAnchorId ident + hdr <- inlinesToParElems ils + -- Now get the slide without the header, and then add the header + -- in. + slide <- blocksToSlide' lvl blks + let layout = case slideLayout slide of + ContentSlide _ cont -> ContentSlide hdr cont + TwoColumnSlide _ contL contR -> TwoColumnSlide hdr contL contR + layout' -> layout' + return $ slide{slideLayout = layout} +blocksToSlide' _ (blk : blks) + | Div (_, classes, _) divBlks <- blk + , "columns" `elem` classes + , Div (_, clsL, _) blksL : Div (_, clsR, _) blksR : remaining <- divBlks + , "column" `elem` clsL, "column" `elem` clsR = do + unless (null blks) + (mapM (addLogMessage . BlockNotRendered) blks >> return ()) + unless (null remaining) + (mapM (addLogMessage . BlockNotRendered) remaining >> return ()) + mbSplitBlksL <- splitBlocks blksL + mbSplitBlksR <- splitBlocks blksR + let blksL' = case mbSplitBlksL of + bs : _ -> bs + [] -> [] + let blksR' = case mbSplitBlksR of + bs : _ -> bs + [] -> [] + shapesL <- blocksToShapes blksL' + shapesR <- blocksToShapes blksR' + sldId <- asks envCurSlideId + return $ Slide + sldId + TwoColumnSlide { twoColumnSlideHeader = [] + , twoColumnSlideLeft = shapesL + , twoColumnSlideRight = shapesR + } + Nothing +blocksToSlide' _ (blk : blks) = do + inNoteSlide <- asks envInNoteSlide + shapes <- if inNoteSlide + then forceFontSize noteSize $ blocksToShapes (blk : blks) + else blocksToShapes (blk : blks) + sldId <- asks envCurSlideId + return $ + Slide + sldId + ContentSlide { contentSlideHeader = [] + , contentSlideContent = shapes + } + Nothing +blocksToSlide' _ [] = do + sldId <- asks envCurSlideId + return $ + Slide + sldId + ContentSlide { contentSlideHeader = [] + , contentSlideContent = [] + } + Nothing + +blocksToSlide :: [Block] -> Pres Slide +blocksToSlide blks = do + slideLevel <- asks envSlideLevel + sld <- blocksToSlide' slideLevel blks + spkNotes <- getSpeakerNotes + return $ sld{slideSpeakerNotes = spkNotes} + +makeNoteEntry :: Int -> [Block] -> [Block] +makeNoteEntry n blks = + let enum = Str (show n ++ ".") + in + case blks of + (Para ils : blks') -> (Para $ enum : Space : ils) : blks' + _ -> Para [enum] : blks + +forceFontSize :: Pixels -> Pres a -> Pres a +forceFontSize px x = do + rpr <- asks envRunProps + local (\r -> r {envRunProps = rpr{rPropForceSize = Just px}}) x + +-- We leave these as blocks because we will want to include them in +-- the TOC. +makeEndNotesSlideBlocks :: Pres [Block] +makeEndNotesSlideBlocks = do + noteIds <- gets stNoteIds + slideLevel <- asks envSlideLevel + meta <- asks envMetadata + -- Get identifiers so we can give the notes section a unique ident. + anchorSet <- M.keysSet <$> gets stAnchorMap + if M.null noteIds + then return [] + else do let title = case lookupMeta "notes-title" meta of + Just val -> metaValueToInlines val + Nothing -> [Str "Notes"] + ident = Shared.uniqueIdent title anchorSet + hdr = Header slideLevel (ident, [], []) title + blks <- return $ + concatMap (\(n, bs) -> makeNoteEntry n bs) $ + M.toList noteIds + return $ hdr : blks + +getMetaSlide :: Pres (Maybe Slide) +getMetaSlide = do + meta <- asks envMetadata + title <- inlinesToParElems $ docTitle meta + subtitle <- inlinesToParElems $ + case lookupMeta "subtitle" meta of + Just (MetaString s) -> [Str s] + Just (MetaInlines ils) -> ils + Just (MetaBlocks [Plain ils]) -> ils + Just (MetaBlocks [Para ils]) -> ils + _ -> [] + authors <- mapM inlinesToParElems $ docAuthors meta + date <- inlinesToParElems $ docDate meta + if null title && null subtitle && null authors && null date + then return Nothing + else return $ + Just $ + Slide + metadataSlideId + MetadataSlide { metadataSlideTitle = title + , metadataSlideSubtitle = subtitle + , metadataSlideAuthors = authors + , metadataSlideDate = date + } + Nothing + +-- adapted from the markdown writer +elementToListItem :: Shared.Element -> Pres [Block] +elementToListItem (Shared.Sec lev _nums (ident,_,_) headerText subsecs) = do + opts <- asks envOpts + let headerLink = if null ident + then walk Shared.deNote headerText + else [Link nullAttr (walk Shared.deNote headerText) + ('#':ident, "")] + listContents <- if null subsecs || lev >= writerTOCDepth opts + then return [] + else mapM elementToListItem subsecs + return [Plain headerLink, BulletList listContents] +elementToListItem (Shared.Blk _) = return [] + +makeTOCSlide :: [Block] -> Pres Slide +makeTOCSlide blks = local (\env -> env{envCurSlideId = tocSlideId}) $ do + contents <- BulletList <$> mapM elementToListItem (Shared.hierarchicalize blks) + meta <- asks envMetadata + slideLevel <- asks envSlideLevel + let tocTitle = case lookupMeta "toc-title" meta of + Just val -> metaValueToInlines val + Nothing -> [Str "Table of Contents"] + hdr = Header slideLevel nullAttr tocTitle + sld <- blocksToSlide [hdr, contents] + return sld + +combineParaElems' :: Maybe ParaElem -> [ParaElem] -> [ParaElem] +combineParaElems' mbPElem [] = maybeToList mbPElem +combineParaElems' Nothing (pElem : pElems) = + combineParaElems' (Just pElem) pElems +combineParaElems' (Just pElem') (pElem : pElems) + | Run rPr' s' <- pElem' + , Run rPr s <- pElem + , rPr == rPr' = + combineParaElems' (Just $ Run rPr' $ s' ++ s) pElems + | otherwise = + pElem' : combineParaElems' (Just pElem) pElems + +combineParaElems :: [ParaElem] -> [ParaElem] +combineParaElems = combineParaElems' Nothing + +applyToParagraph :: Monad m => (ParaElem -> m ParaElem) -> Paragraph -> m Paragraph +applyToParagraph f para = do + paraElems' <- mapM f $ paraElems para + return $ para {paraElems = paraElems'} + +applyToShape :: Monad m => (ParaElem -> m ParaElem) -> Shape -> m Shape +applyToShape f (Pic pPr fp pes) = do + pes' <- mapM f pes + return $ Pic pPr fp pes' +applyToShape f (GraphicFrame gfx pes) = do + pes' <- mapM f pes + return $ GraphicFrame gfx pes' +applyToShape f (TextBox paras) = do + paras' <- mapM (applyToParagraph f) paras + return $ TextBox paras' + +applyToLayout :: Monad m => (ParaElem -> m ParaElem) -> Layout -> m Layout +applyToLayout f (MetadataSlide title subtitle authors date) = do + title' <- mapM f title + subtitle' <- mapM f subtitle + authors' <- mapM (mapM f) authors + date' <- mapM f date + return $ MetadataSlide title' subtitle' authors' date' +applyToLayout f (TitleSlide title) = do + title' <- mapM f title + return $ TitleSlide title' +applyToLayout f (ContentSlide hdr content) = do + hdr' <- mapM f hdr + content' <- mapM (applyToShape f) content + return $ ContentSlide hdr' content' +applyToLayout f (TwoColumnSlide hdr contentL contentR) = do + hdr' <- mapM f hdr + contentL' <- mapM (applyToShape f) contentL + contentR' <- mapM (applyToShape f) contentR + return $ TwoColumnSlide hdr' contentL' contentR' + +applyToSlide :: Monad m => (ParaElem -> m ParaElem) -> Slide -> m Slide +applyToSlide f slide = do + layout' <- applyToLayout f $ slideLayout slide + mbNotes' <- case slideSpeakerNotes slide of + Just (SpeakerNotes notes) -> (Just . SpeakerNotes) <$> + mapM (applyToParagraph f) notes + Nothing -> return Nothing + return slide{slideLayout = layout', slideSpeakerNotes = mbNotes'} + +replaceAnchor :: ParaElem -> Pres ParaElem +replaceAnchor (Run rProps s) + | Just (ExternalTarget ('#':anchor, _)) <- rLink rProps = do + anchorMap <- gets stAnchorMap + -- If the anchor is not in the anchormap, we just remove the + -- link. + let rProps' = case M.lookup anchor anchorMap of + Just n -> rProps{rLink = Just $ InternalTarget n} + Nothing -> rProps{rLink = Nothing} + return $ Run rProps' s +replaceAnchor pe = return pe + +emptyParaElem :: ParaElem -> Bool +emptyParaElem (Run _ s) = + null $ Shared.trim s +emptyParaElem (MathElem _ ts) = + null $ Shared.trim $ unTeXString ts +emptyParaElem _ = False + +emptyParagraph :: Paragraph -> Bool +emptyParagraph para = all emptyParaElem $ paraElems para + + +emptyShape :: Shape -> Bool +emptyShape (TextBox paras) = all emptyParagraph $ paras +emptyShape _ = False + +emptyLayout :: Layout -> Bool +emptyLayout layout = case layout of + MetadataSlide title subtitle authors date -> + all emptyParaElem title && + all emptyParaElem subtitle && + all (all emptyParaElem) authors && + all emptyParaElem date + TitleSlide hdr -> all emptyParaElem hdr + ContentSlide hdr shapes -> + all emptyParaElem hdr && + all emptyShape shapes + TwoColumnSlide hdr shapes1 shapes2 -> + all emptyParaElem hdr && + all emptyShape shapes1 && + all emptyShape shapes2 + +emptySlide :: Slide -> Bool +emptySlide (Slide _ layout Nothing) = emptyLayout layout +emptySlide _ = False + +blocksToPresentationSlides :: [Block] -> Pres [Slide] +blocksToPresentationSlides blks = do + opts <- asks envOpts + metadataslides <- maybeToList <$> getMetaSlide + -- As far as I can tell, if we want to have a variable-length toc in + -- the future, we'll have to make it twice. Once to get the length, + -- and a second time to include the notes slide. We can't make the + -- notes slide before the body slides because we need to know if + -- there are notes, and we can't make either before the toc slide, + -- because we need to know its length to get slide numbers right. + -- + -- For now, though, since the TOC slide is only length 1, if it + -- exists, we'll just get the length, and then come back to make the + -- slide later + blksLst <- splitBlocks blks + bodySlideIds <- mapM + (\n -> runUniqueSlideId $ "BodySlide" ++ show n) + (take (length blksLst) [1..] :: [Integer]) + bodyslides <- mapM + (\(bs, ident) -> + local (\st -> st{envCurSlideId = ident}) (blocksToSlide bs)) + (zip blksLst bodySlideIds) + endNotesSlideBlocks <- makeEndNotesSlideBlocks + -- now we come back and make the real toc... + tocSlides <- if writerTableOfContents opts + then do toc <- makeTOCSlide $ blks ++ endNotesSlideBlocks + return [toc] + else return [] + -- ... and the notes slide. We test to see if the blocks are empty, + -- because we don't want to make an empty slide. + endNotesSlides <- if null endNotesSlideBlocks + then return [] + else do endNotesSlide <- local + (\env -> env { envCurSlideId = endNotesSlideId + , envInNoteSlide = True + }) + (blocksToSlide endNotesSlideBlocks) + return [endNotesSlide] + + let slides = metadataslides ++ tocSlides ++ bodyslides ++ endNotesSlides + slides' = filter (not . emptySlide) slides + mapM (applyToSlide replaceAnchor) slides' + +metaToDocProps :: Meta -> DocProps +metaToDocProps meta = + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> Just $ map Shared.stringify xs + _ -> Nothing + + authors = case map Shared.stringify $ docAuthors meta of + [] -> Nothing + ss -> Just $ intercalate ";" ss + in + DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta + , dcSubject = Shared.stringify <$> lookupMeta "subject" meta + , dcCreator = authors + , dcKeywords = keywords + , dcCreated = Nothing + } + +documentToPresentation :: WriterOptions + -> Pandoc + -> (Presentation, [LogMessage]) +documentToPresentation opts (Pandoc meta blks) = + let env = def { envOpts = opts + , envMetadata = meta + , envSlideLevel = fromMaybe (getSlideLevel blks) (writerSlideLevel opts) + } + (presSlides, msgs) = runPres env def $ blocksToPresentationSlides blks + docProps = metaToDocProps meta + in + (Presentation docProps presSlides, msgs) + +-- -------------------------------------------------------------- + +applyTokStyToRunProps :: TokenStyle -> RunProps -> RunProps +applyTokStyToRunProps tokSty rProps = + rProps{ rSolidFill = tokenColor tokSty <|> rSolidFill rProps + , rPropBold = tokenBold tokSty || rPropBold rProps + , rPropItalics = tokenItalic tokSty || rPropItalics rProps + , rPropUnderline = tokenUnderline tokSty || rPropUnderline rProps + } + +formatToken :: Style -> Token -> ParaElem +formatToken sty (tokType, txt) = + let rProps = def{rPropCode = True, rSolidFill = defaultColor sty} + rProps' = case M.lookup tokType (tokenStyles sty) of + Just tokSty -> applyTokStyToRunProps tokSty rProps + Nothing -> rProps + in + Run rProps' $ T.unpack txt + +formatSourceLine :: Style -> FormatOptions -> SourceLine -> [ParaElem] +formatSourceLine sty _ srcLn = map (formatToken sty) srcLn + +formatSourceLines :: Style -> FormatOptions -> [SourceLine] -> [ParaElem] +formatSourceLines sty opts srcLns = intercalate [Break] $ + map (formatSourceLine sty opts) srcLns diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 064434483..95cb46643 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,19 +31,21 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST ) where +import Control.Monad.State.Strict +import Data.Char (isSpace, toLower) +import Data.List (isPrefixOf, stripPrefix) +import Data.Maybe (fromMaybe) +import Data.Text (Text, stripEnd) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.ImageSize import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Builder (deleteMeta) -import Data.Maybe (fromMaybe) -import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose ) -import Network.URI (isURI) -import Text.Pandoc.Pretty -import Control.Monad.State -import Data.Char (isSpace, toLower) +import Text.Pandoc.Writers.Shared type Refs = [([Inline], Target)] @@ -57,95 +59,99 @@ data WriterState = , stTopLevel :: Bool } +type RST = StateT WriterState + -- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = +writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeRST opts document = do let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stHasRawTeX = False, stOptions = opts, - stTopLevel = True} - in evalState (pandocToRST document) st + stTopLevel = True } + evalStateT (pandocToRST document) st -- | Return RST representation of document. -pandocToRST :: Pandoc -> State WriterState String +pandocToRST :: PandocMonad m => Pandoc -> RST m Text pandocToRST (Pandoc meta blocks) = do - opts <- liftM stOptions get + opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth let subtit = case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> xs - _ -> [] + _ -> [] title <- titleToRST (docTitle meta) subtit metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToRST) - (fmap (trimr . render colwidth) . inlineListToRST) - $ deleteMeta "title" $ deleteMeta "subtitle" meta + (fmap render' . blockListToRST) + (fmap (stripEnd . render') . inlineListToRST) + $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta body <- blockListToRST' True $ case writerTemplate opts of Just _ -> normalizeHeadings 1 blocks Nothing -> blocks - notes <- liftM (reverse . stNotes) get >>= notesToRST + notes <- gets (reverse . stNotes) >>= notesToRST -- note that the notes may contain refs, so we do them first - refs <- liftM (reverse . stLinks) get >>= refsToRST - pics <- liftM (reverse . stImages) get >>= pictRefsToRST - hasMath <- liftM stHasMath get - rawTeX <- liftM stHasRawTeX get - let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] + refs <- gets (reverse . stLinks) >>= refsToRST + pics <- gets (reverse . stImages) >>= pictRefsToRST + hasMath <- gets stHasMath + rawTeX <- gets stHasRawTeX + let main = render' $ foldl ($+$) empty [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath - $ defField "rawtex" rawTeX - $ metadata + $ defField "rawtex" rawTeX metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context where normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' where (cont,bs') = break (headerLtEq l) bs headerLtEq level (Header l' _ _) = l' <= level - headerLtEq _ _ = False + headerLtEq _ _ = False normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. -refsToRST :: Refs -> State WriterState Doc +refsToRST :: PandocMonad m => Refs -> RST m Doc refsToRST refs = mapM keyToRST refs >>= return . vcat -- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) - -> State WriterState Doc +keyToRST :: PandocMonad m => ([Inline], (String, String)) -> RST m Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` ((render Nothing label') :: String) + let label'' = if ':' `elem` (render Nothing label' :: String) then char '`' <> label' <> char '`' else label' return $ nowrap $ ".. _" <> label'' <> ": " <> text src -- | Return RST representation of notes. -notesToRST :: [[Block]] -> State WriterState Doc +notesToRST :: PandocMonad m => [[Block]] -> RST m Doc notesToRST notes = - mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= + zipWithM noteToRST [1..] notes >>= return . vsep -- | Return RST representation of a note. -noteToRST :: Int -> [Block] -> State WriterState Doc +noteToRST :: PandocMonad m => Int -> [Block] -> RST m Doc noteToRST num note = do contents <- blockListToRST note let marker = ".. [" <> text (show num) <> "]" return $ nowrap $ marker $$ nest 3 contents -- | Return RST representation of picture reference table. -pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))] - -> State WriterState Doc +pictRefsToRST :: PandocMonad m + => [([Inline], (Attr, String, String, Maybe String))] + -> RST m Doc pictRefsToRST refs = mapM pictToRST refs >>= return . vcat -- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (Attr, String, String, Maybe String)) - -> State WriterState Doc +pictToRST :: PandocMonad m + => ([Inline], (Attr, String, String, Maybe String)) + -> RST m Doc pictToRST (label, (attr, src, _, mbtarget)) = do label' <- inlineListToRST label dims <- imageDimsToRST attr @@ -160,10 +166,27 @@ pictToRST (label, (attr, src, _, mbtarget)) = do Just t -> " :target: " <> text t -- | Escape special characters for RST. -escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "`\\|*_") +escapeString :: WriterOptions -> String -> String +escapeString = escapeString' True + where + escapeString' _ _ [] = [] + escapeString' firstChar opts (c:cs) = + case c of + _ | c `elem` ['\\','`','*','_','|'] && + (firstChar || null cs) -> '\\':c:escapeString' False opts cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString' False opts cs + _ -> '-':escapeString' False opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest + _ -> '.':escapeString' False opts cs + _ -> c : escapeString' False opts cs -titleToRST :: [Inline] -> [Inline] -> State WriterState Doc +titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m Doc titleToRST [] _ = return empty titleToRST tit subtit = do title <- inlineListToRST tit @@ -179,8 +202,9 @@ bordered contents c = border = text (replicate len c) -- | Convert Pandoc block element to RST. -blockToRST :: Block -- ^ Block element - -> State WriterState Doc +blockToRST :: PandocMonad m + => Block -- ^ Block element + -> RST m Doc blockToRST Null = return empty blockToRST (Div attr bs) = do contents <- blockListToRST bs @@ -200,7 +224,7 @@ blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do else ":figclass: " <> text (unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) - | LineBreak `elem` inlines = do -- use line block if LineBreaks + | LineBreak `elem` inlines = linesToLineBlock $ splitBy (==LineBreak) inlines | otherwise = do contents <- inlineListToRST inlines @@ -211,17 +235,22 @@ blockToRST (RawBlock f@(Format f') str) | f == "rst" = return $ text str | otherwise = return $ blankline <> ".. raw:: " <> text (map toLower f') $+$ - (nest 3 $ text str) $$ blankline + nest 3 (text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines + -- we calculate the id that would be used by auto_identifiers + -- so we know whether to print an explicit identifier + let autoId = uniqueIdent inlines mempty isTopLevel <- gets stTopLevel if isTopLevel then do let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + let anchor | null name || name == autoId = empty + | otherwise = ".. _" <> text name <> ":" $$ blankline + return $ nowrap $ anchor $$ contents $$ border $$ blankline else do let rub = "rubric:: " <> contents let name' | null name = empty @@ -230,7 +259,7 @@ blockToRST (Header level (name,classes,_) inlines) = do | otherwise = ":class: " <> text (unwords classes) return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do - opts <- stOptions <$> get + opts <- gets stOptions let tabstop = writerTabStop opts let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes @@ -246,59 +275,38 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do (lang:_) -> (".. code:: " <> text lang) $$ numberlines) $+$ nest tabstop (text str) $$ blankline blockToRST (BlockQuote blocks) = do - tabstop <- get >>= (return . writerTabStop . stOptions) + tabstop <- gets $ writerTabStop . stOptions contents <- blockListToRST blocks - return $ (nest tabstop contents) <> blankline -blockToRST (Table caption _ widths headers rows) = do + return $ nest tabstop contents <> blankline +blockToRST (Table caption aligns widths headers rows) = do caption' <- inlineListToRST caption - let caption'' = if null caption - then empty - else blankline <> text "Table: " <> caption' - headers' <- mapM blockListToRST headers - rawRows <- mapM (mapM blockListToRST) rows - -- let isSimpleCell [Plain _] = True - -- isSimpleCell [Para _] = True - -- isSimpleCell [] = True - -- isSimpleCell _ = False - -- let isSimple = all (==0) widths && all (all isSimpleCell) rows - let numChars = maximum . map offset - opts <- get >>= return . stOptions - let widthsInChars = - if all (== 0) widths - then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (fromIntegral (writerColumns opts) *)) widths - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = height (hcat blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map makeRow rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if all null headers - then empty - else head' $$ border '=' - return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline + let blocksToDoc opts bs = do + oldOpts <- gets stOptions + modify $ \st -> st{ stOptions = opts } + result <- blockListToRST bs + modify $ \st -> st{ stOptions = oldOpts } + return result + opts <- gets stOptions + tbl <- gridTable opts blocksToDoc (all null headers) + (map (const AlignDefault) aligns) widths + headers rows + return $ if null caption + then tbl $$ blankline + else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$ + blankline blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim - then take (length items) $ repeat "#." + then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ - zip markers' items + contents <- zipWithM orderedListItemToRST markers' items -- ensure that sublists have preceding blank line return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do @@ -307,51 +315,69 @@ blockToRST (DefinitionList items) = do return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: [Block] -> State WriterState Doc +bulletListItemToRST :: PandocMonad m => [Block] -> RST m Doc bulletListItemToRST items = do contents <- blockListToRST items return $ hang 3 "- " $ contents <> cr -- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: String -- ^ marker for list item +orderedListItemToRST :: PandocMonad m + => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> RST m Doc orderedListItemToRST marker items = do contents <- blockListToRST items let marker' = marker ++ " " return $ hang (length marker') (text marker') $ contents <> cr -- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc +definitionListItemToRST :: PandocMonad m => ([Inline], [[Block]]) -> RST m Doc definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs - tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (nestle contents <> cr) + tabstop <- gets $ writerTabStop . stOptions + return $ nowrap label' $$ nest tabstop (nestle contents <> cr) -- | Format a list of lines as line block. -linesToLineBlock :: [[Inline]] -> State WriterState Doc +linesToLineBlock :: PandocMonad m => [[Inline]] -> RST m Doc linesToLineBlock inlineLines = do lns <- mapM inlineListToRST inlineLines - return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline + return $ + vcat (map (hang 2 (text "| ")) lns) <> blankline -- | Convert list of Pandoc block elements to RST. -blockListToRST' :: Bool +blockListToRST' :: PandocMonad m + => Bool -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> RST m Doc blockListToRST' topLevel blocks = do + -- insert comment between list and quoted blocks, see #4248 and #3675 + let fixBlocks (b1:b2@(BlockQuote _):bs) + | toClose b1 = b1 : commentSep : b2 : fixBlocks bs + where + toClose Plain{} = False + toClose Header{} = False + toClose LineBlock{} = False + toClose HorizontalRule = False + toClose (Para [Image _ _ (_,'f':'i':'g':':':_)]) = True + toClose Para{} = False + toClose _ = True + commentSep = RawBlock "rst" "..\n\n" + fixBlocks (b:bs) = b : fixBlocks bs + fixBlocks [] = [] tl <- gets stTopLevel modify (\s->s{stTopLevel=topLevel}) - res <- vcat `fmap` mapM blockToRST blocks + res <- vcat `fmap` mapM blockToRST (fixBlocks blocks) modify (\s->s{stTopLevel=tl}) return res -blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToRST :: PandocMonad m + => [Block] -- ^ List of block elements + -> RST m Doc blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: [Inline] -> State WriterState Doc +inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc inlineListToRST lst = mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat @@ -362,7 +388,7 @@ inlineListToRST lst = removeSpaceAfterDisplayMath [] = [] insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) - | isComplex y && (surroundComplex x z) = + | isComplex y && surroundComplex x z = x : y : insertBS (z : zs) insertBS (x:y:zs) | isComplex x && not (okAfterComplex y) = @@ -396,23 +422,28 @@ inlineListToRST lst = okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False isComplex :: Inline -> Bool - isComplex (Emph _) = True - isComplex (Strong _) = True - isComplex (SmallCaps _) = True - isComplex (Strikeout _) = True + isComplex (Emph _) = True + isComplex (Strong _) = True + isComplex (SmallCaps _) = True + isComplex (Strikeout _) = True isComplex (Superscript _) = True - isComplex (Subscript _) = True - isComplex (Link _ _ _) = True - isComplex (Image _ _ _) = True - isComplex (Code _ _) = True - isComplex (Math _ _) = True - isComplex (Cite _ (x:_)) = isComplex x - isComplex (Span _ (x:_)) = isComplex x - isComplex _ = False + isComplex (Subscript _) = True + isComplex Link{} = True + isComplex Image{} = True + isComplex (Code _ _) = True + isComplex (Math _ _) = True + isComplex (Cite _ (x:_)) = isComplex x + isComplex (Span _ (x:_)) = isComplex x + isComplex _ = False -- | Convert Pandoc inline element to RST. -inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Span _ ils) = inlineListToRST ils +inlineToRST :: PandocMonad m => Inline -> RST m Doc +inlineToRST (Span (_,_,kvs) ils) = do + contents <- inlineListToRST ils + return $ + case lookup "role" kvs of + Just role -> ":" <> text role <> ":`" <> contents <> "`" + Nothing -> contents inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" @@ -431,14 +462,33 @@ inlineToRST (Subscript lst) = do inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst - return $ "‘" <> contents <> "’" + opts <- gets stOptions + if isEnabled Ext_smart opts + then return $ "'" <> contents <> "'" + else return $ "‘" <> contents <> "’" inlineToRST (Quoted DoubleQuote lst) = do contents <- inlineListToRST lst - return $ "“" <> contents <> "”" + opts <- gets stOptions + if isEnabled Ext_smart opts + then return $ "\"" <> contents <> "\"" + else return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = inlineListToRST lst -inlineToRST (Code _ str) = return $ "``" <> text str <> "``" -inlineToRST (Str str) = return $ text $ escapeString str +inlineToRST (Code _ str) = do + opts <- gets stOptions + -- we trim the string because the delimiters must adjoin a + -- non-space character; see #3496 + -- we use :literal: when the code contains backticks, since + -- :literal: allows backslash-escapes; see #3974 + return $ if '`' `elem` str + then ":literal:`" <> text (escapeString opts (trim str)) <> "`" + else "``" <> text (trim str) <> "``" +inlineToRST (Str str) = do + opts <- gets stOptions + return $ text $ + (if isEnabled Ext_smart opts + then unsmartify opts + else id) $ escapeString opts str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath @@ -447,20 +497,20 @@ inlineToRST (Math t str) = do then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline f x) +inlineToRST il@(RawInline f x) | f == "rst" = return $ text x | f == "latex" || f == "tex" = do modify $ \st -> st{ stHasRawTeX = True } return $ ":raw-latex:`" <> text x <> "`" - | otherwise = return empty -inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) + | otherwise = empty <$ report (InlineNotRendered il) +inlineToRST LineBreak = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space inlineToRST SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) + wrapText <- gets $ writerWrapText . stOptions case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space -- autolink inlineToRST (Link _ [Str str] (src, _)) | isURI src && @@ -473,15 +523,15 @@ inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do label <- registerImage attr alt (imgsrc,imgtit) (Just src) return $ "|" <> label <> "|" inlineToRST (Link _ txt (src, tit)) = do - useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions - linktext <- inlineListToRST $ normalizeSpaces txt + useReferenceLinks <- gets $ writerReferenceLinks . stOptions + linktext <- inlineListToRST $ B.toList . B.trimInlines . B.fromList $ txt if useReferenceLinks - then do refs <- get >>= return . stLinks + then do refs <- gets stLinks case lookup txt refs of Just (src',tit') -> if src == src' && tit == tit' then return $ "`" <> linktext <> "`_" - else do -- duplicate label, use non-reference link + else return $ "`" <> linktext <> " <" <> text src <> ">`__" Nothing -> do modify $ \st -> st { stLinks = (txt,(src,tit)):refs } @@ -494,12 +544,12 @@ inlineToRST (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ " [" <> text ref <> "]_" -registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc +registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe String -> RST m Doc registerImage attr alt (src,tit) mbtarget = do - pics <- get >>= return . stImages + pics <- gets stImages txt <- case lookup alt pics of Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget) -> return alt @@ -512,14 +562,14 @@ registerImage attr alt (src,tit) mbtarget = do return alt' inlineListToRST txt -imageDimsToRST :: Attr -> State WriterState Doc +imageDimsToRST :: PandocMonad m => Attr -> RST m Doc imageDimsToRST attr = do let (ident, _, _) = attr name = if null ident then empty else ":name: " <> text ident showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d) - in case (dimension dir attr) of + in case dimension dir attr of Just (Percent a) -> case dir of Height -> empty diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 8f942b4d0..7006b58d1 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -27,103 +28,125 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( writeRTF, writeRTFWithEmbeddedImages ) where +module Text.Pandoc.Writers.RTF ( writeRTF + ) where +import Control.Monad.Except (catchError, throwError) +import Control.Monad +import qualified Data.ByteString as B +import Data.Char (chr, isDigit, ord) +import Data.List (intercalate, isSuffixOf) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk -import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, chr, isDigit ) -import qualified Data.ByteString as B -import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.ImageSize +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) -- | Convert Image inlines into a raw RTF embedded image, read from a file, -- or a MediaBag, or the internet. -- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: WriterOptions -> Inline -> IO Inline -rtfEmbedImage opts x@(Image attr _ (src,_)) = do - result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case result of - Right (imgdata, Just mime) - | mime == "image/jpeg" || mime == "image/png" -> do - let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case mime of - "image/jpeg" -> "\\jpegblip" - "image/png" -> "\\pngblip" - _ -> error "Unknown file type" - sizeSpec <- case imageSize imgdata of - Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg - return "" - Right sz -> return $ "\\picw" ++ show xpx ++ - "\\pich" ++ show ypx ++ - "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) - ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) - -- twip = 1/1440in = 1/20pt - where (xpx, ypx) = sizeInPixels sz - (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ - concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline (Format "rtf") raw - _ -> return x +rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline +rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError + (do result <- P.fetchItem src + case result of + (imgdata, Just mime) + | mime == "image/jpeg" || mime == "image/png" -> do + let bytes = map (printf "%02x") $ B.unpack imgdata + filetype <- + case mime of + "image/jpeg" -> return "\\jpegblip" + "image/png" -> return "\\pngblip" + _ -> throwError $ + PandocShouldNeverHappenError $ + "Unknown file type " ++ mime + sizeSpec <- + case imageSize opts imgdata of + Left msg -> do + report $ CouldNotDetermineImageSize src msg + return "" + Right sz -> return $ "\\picw" ++ show xpx ++ + "\\pich" ++ show ypx ++ + "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) + ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) + -- twip = 1/1440in = 1/20pt + where (xpx, ypx) = sizeInPixels sz + (xpt, ypt) = desiredSizeInPoints opts attr sz + let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ + concat bytes ++ "}" + if B.null imgdata + then do + report $ CouldNotFetchResource src "image contained no data" + return x + else return $ RawInline (Format "rtf") raw + | otherwise -> do + report $ CouldNotFetchResource src "image is not a jpeg or png" + return x + (_, Nothing) -> do + report $ CouldNotDetermineMimeType src + return x) + (\e -> do + report $ CouldNotFetchResource src (show e) + return x) rtfEmbedImage _ x = return x --- | Convert Pandoc to a string in rich text format, with --- images embedded as encoded binary data. -writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String -writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` walkM (rtfEmbedImage options) doc - -- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta@(Meta metamap) blocks) = +writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeRTF options doc = do + -- handle images + Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta - toPlain (MetaBlocks [Para ils]) = MetaInlines ils - toPlain x = x - -- adjust title, author, date so we don't get para inside para - meta' = Meta $ M.adjust toPlain "title" + let toPlain (MetaBlocks [Para ils]) = MetaInlines ils + toPlain x = x + -- adjust title, author, date so we don't get para inside para + let meta' = Meta $ M.adjust toPlain "title" . M.adjust toPlain "author" . M.adjust toPlain "date" $ metamap - Just metadata = metaToJSON options - (Just . concatMap (blockToRTF 0 AlignDefault)) - (Just . inlineListToRTF) + metadata <- metaToJSON options + (fmap concat . mapM (blockToRTF 0 AlignDefault)) + inlinesToRTF meta' - body = concatMap (blockToRTF 0 AlignDefault) blocks - isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options - isTOCHeader _ = False - context = defField "body" body + body <- blocksToRTF 0 AlignDefault blocks + let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options + isTOCHeader _ = False + toc <- tableOfContents $ filter isTOCHeader blocks + let context = defField "body" body $ defField "spacer" spacer - $ (if writerTableOfContents options - then defField "toc" - (tableOfContents $ filter isTOCHeader blocks) - else id) - $ metadata - in case writerTemplate options of + $(if writerTableOfContents options + then defField "table-of-contents" toc + -- for backwards compatibility, + -- we populate toc with the contents + -- of the toc rather than a boolean: + . defField "toc" toc + else id) metadata + T.pack <$> + case writerTemplate options of Just tpl -> renderTemplate' tpl context - Nothing -> case reverse body of + Nothing -> return $ + case reverse body of ('\n':_) -> body _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String -tableOfContents headers = - let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 nullAttr [Str "Contents"], - BulletList (map elementToListItem contentsTree)] +tableOfContents :: PandocMonad m => [Block] -> m String +tableOfContents headers = do + let contents = map elementToListItem $ hierarchicalize headers + blocksToRTF 0 AlignDefault + [Header 1 nullAttr [Str "Contents"], BulletList contents] elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++ +elementToListItem (Sec _ _ _ sectext subsecs) = Plain sectext : if null subsecs then [] else [BulletList (map elementToListItem subsecs)] @@ -140,11 +163,11 @@ handleUnicode (c:cs) = lower = r + 0xDC00 in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs else enc c ++ handleUnicode cs - else c:(handleUnicode cs) + else c:handleUnicode cs where surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = '\\':'u':(show (ord x)) ++ "?" + enc x = '\\':'u':show (ord x) ++ "?" -- | Escape special characters. escapeSpecial :: String -> String @@ -175,13 +198,13 @@ rtfParSpaced :: Int -- ^ space after (in twips) -> String rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of - AlignLeft -> "\\ql " - AlignRight -> "\\qr " - AlignCenter -> "\\qc " + AlignLeft -> "\\ql " + AlignRight -> "\\qr " + AlignCenter -> "\\qc " AlignDefault -> "\\ql " in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + "\\f0 \\sa" ++ show spaceAfter ++ " \\li" ++ show indent ++ + " \\fi" ++ show firstLineIndent ++ " " ++ content ++ "\\par}\n" -- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) @@ -221,143 +244,180 @@ orderedMarkers indent (start, style, delim) = _ -> orderedListMarkers (start, LowerAlpha, Period) else orderedListMarkers (start, style, delim) +blocksToRTF :: PandocMonad m + => Int + -> Alignment + -> [Block] + -> m String +blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) + -- | Convert Pandoc block element to RTF. -blockToRTF :: Int -- ^ indent level +blockToRTF :: PandocMonad m + => Int -- ^ indent level -> Alignment -- ^ alignment -> Block -- ^ block to convert - -> String -blockToRTF _ _ Null = "" + -> m String +blockToRTF _ _ Null = return "" blockToRTF indent alignment (Div _ bs) = - concatMap (blockToRTF indent alignment) bs + blocksToRTF indent alignment bs blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment $ inlineListToRTF lst + rtfCompact indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment $ inlineListToRTF lst + rtfPar indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (LineBlock lns) = blockToRTF indent alignment $ linesToPara lns blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst + blocksToRTF (indent + indentIncrement) alignment lst blockToRTF indent _ (CodeBlock _ str) = - rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawBlock f str) - | f == Format "rtf" = str - | otherwise = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ - concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = + return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ codeStringToRTF str) +blockToRTF _ _ b@(RawBlock f str) + | f == Format "rtf" = return str + | otherwise = do + report $ BlockNotRendered b + return "" +blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> + mapM (listItemToRTF alignment indent (bulletMarker indent)) lst +blockToRTF indent alignment (OrderedList attribs lst) = + (spaceAtEnd . concat) <$> + zipWithM (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst +blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> + mapM (definitionListItemToRTF alignment indent) lst +blockToRTF indent _ HorizontalRule = return $ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level _ lst) = rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = - (if all null headers - then "" - else tableRowToRTF True indent aligns sizes headers) ++ - concatMap (tableRowToRTF False indent aligns sizes) rows ++ - rtfPar indent 0 alignment (inlineListToRTF caption) +blockToRTF indent alignment (Header level _ lst) = do + contents <- inlinesToRTF lst + return $ rtfPar indent 0 alignment $ + "\\b \\fs" ++ show (40 - (level * 4)) ++ " " ++ contents +blockToRTF indent alignment (Table caption aligns sizes headers rows) = do + caption' <- inlinesToRTF caption + header' <- if all null headers + then return "" + else tableRowToRTF True indent aligns sizes headers + rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows + return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String -tableRowToRTF header indent aligns sizes' cols = +tableRowToRTF :: PandocMonad m + => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String +tableRowToRTF header indent aligns sizes' cols = do let totalTwips = 6 * 1440 -- 6 inches - sizes = if all (== 0) sizes' - then take (length cols) $ repeat (1.0 / fromIntegral (length cols)) + let sizes = if all (== 0) sizes' + then replicate (length cols) (1.0 / fromIntegral (length cols)) else sizes' - columns = concat $ zipWith (tableItemToRTF indent) aligns cols - rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) + columns <- concat <$> + zipWithM (tableItemToRTF indent) aligns cols + let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) (0 :: Integer) sizes - cellDefs = map (\edge -> (if header + let cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" else "") ++ "\\cellx" ++ show edge) rightEdges - start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ "\\trkeep\\intbl\n{\n" - end = "}\n\\intbl\\row}\n" - in start ++ columns ++ end + let end = "}\n\\intbl\\row}\n" + return $ start ++ columns ++ end -tableItemToRTF :: Int -> Alignment -> [Block] -> String -tableItemToRTF indent alignment item = - let contents = concatMap (blockToRTF indent alignment) item - in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" +tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String +tableItemToRTF indent alignment item = do + contents <- blocksToRTF indent alignment item + return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. spaceAtEnd :: String -> String spaceAtEnd str = - if isSuffixOf "\\par}\n" str - then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + if "\\par}\n" `isSuffixOf` str + then take (length str - 6) str ++ "\\sa180\\par}\n" else str -- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment -- ^ alignment +listItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) - -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = - let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list - listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" - insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = + -> m String +listItemToRTF alignment indent marker [] = return $ + rtfCompact (indent + listIncrement) (negate listIncrement) alignment + (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ") +listItemToRTF alignment indent marker list = do + (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list + let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++ + "\\tx" ++ show listIncrement ++ "\\tab" + let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker ('\\':'f':'i':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker (x:xs) = x : insertListMarker xs insertListMarker [] = [] - -- insert the list marker into the (processed) first block - in insertListMarker first ++ concat rest + -- insert the list marker into the (processed) first block + return $ insertListMarker first ++ concat rest -- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment -- ^ alignment +definitionListItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> [Char] -definitionListItemToRTF alignment indent (label, defs) = - let labelText = blockToRTF indent alignment (Plain label) - itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $ - concat defs - in labelText ++ itemsText + -> m String +definitionListItemToRTF alignment indent (label, defs) = do + labelText <- blockToRTF indent alignment (Plain label) + itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) + return $ labelText ++ itemsText -- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline] -- ^ list of inlines to convert - -> String -inlineListToRTF lst = concatMap inlineToRTF lst +inlinesToRTF :: PandocMonad m + => [Inline] -- ^ list of inlines to convert + -> m String +inlinesToRTF lst = concat <$> mapM inlineToRTF lst -- | Convert inline item to RTF. -inlineToRTF :: Inline -- ^ inline to convert - -> String -inlineToRTF (Span _ lst) = inlineListToRTF lst -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str -inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (RawInline f str) - | f == Format "rtf" = str - | otherwise = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF SoftBreak = " " -inlineToRTF Space = " " -inlineToRTF (Link _ text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" +inlineToRTF :: PandocMonad m + => Inline -- ^ inline to convert + -> m String +inlineToRTF (Span _ lst) = inlinesToRTF lst +inlineToRTF (Emph lst) = do + contents <- inlinesToRTF lst + return $ "{\\i " ++ contents ++ "}" +inlineToRTF (Strong lst) = do + contents <- inlinesToRTF lst + return $ "{\\b " ++ contents ++ "}" +inlineToRTF (Strikeout lst) = do + contents <- inlinesToRTF lst + return $ "{\\strike " ++ contents ++ "}" +inlineToRTF (Superscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\super " ++ contents ++ "}" +inlineToRTF (Subscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\sub " ++ contents ++ "}" +inlineToRTF (SmallCaps lst) = do + contents <- inlinesToRTF lst + return $ "{\\scaps " ++ contents ++ "}" +inlineToRTF (Quoted SingleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8216'" ++ contents ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8220\"" ++ contents ++ "\\u8221\"" +inlineToRTF (Code _ str) = return $ "{\\f1 " ++ codeStringToRTF str ++ "}" +inlineToRTF (Str str) = return $ stringToRTF str +inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF +inlineToRTF (Cite _ lst) = inlinesToRTF lst +inlineToRTF il@(RawInline f str) + | f == Format "rtf" = return str + | otherwise = do + return $ InlineNotRendered il + return "" +inlineToRTF LineBreak = return "\\line " +inlineToRTF SoftBreak = return " " +inlineToRTF Space = return " " +inlineToRTF (Link _ text (src, _)) = do + contents <- inlinesToRTF text + return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ codeStringToRTF src ++ + "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" inlineToRTF (Image _ _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" + return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Note contents) = do + body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents + return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + body ++ "}" diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 845d22077..ae4cc5cc5 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,62 +30,91 @@ Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( metaToJSON + , metaToJSON' + , addVariablesToJSON , getField , setField + , resetField , defField , tagWithAttrs , fixDisplayMath + , unsmartify + , gridTable + , metaValueToInlines ) where -import Text.Pandoc.Definition -import Text.Pandoc.Pretty -import Text.Pandoc.XML (escapeStringForXML) -import Control.Monad (liftM) -import Text.Pandoc.Options (WriterOptions(..)) +import Control.Monad (zipWithM) +import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), + encode, fromJSON) import qualified Data.HashMap.Strict as H +import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M +import Data.Maybe (isJust) import qualified Data.Text as T -import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode) -import Text.Pandoc.UTF8 (toStringLazy) import qualified Data.Traversable as Traversable -import Data.List ( groupBy ) -import Data.Maybe ( isJust ) +import qualified Text.Pandoc.Builder as Builder +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Walk (query) +import Text.Pandoc.UTF8 (toStringLazy) +import Text.Pandoc.XML (escapeStringForXML) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- Variables overwrite metadata fields with the same names. -- If multiple variables are set with the same name, a list is --- assigned. -metaToJSON :: Monad m +-- assigned. Does nothing if 'writerTemplate' is Nothing. +metaToJSON :: (Functor m, Monad m, ToJSON a) => WriterOptions - -> ([Block] -> m String) - -> ([Inline] -> m String) + -> ([Block] -> m a) + -> ([Inline] -> m a) -> Meta -> m Value -metaToJSON opts blockWriter inlineWriter (Meta metamap) - | isJust (writerTemplate opts) = do - let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty) - $ writerVariables opts - renderedMap <- Traversable.mapM - (metaValueToJSON blockWriter inlineWriter) - metamap - let metadata = M.foldWithKey defField baseContext renderedMap - return $ defField "meta-json" (toStringLazy $ encode metadata) metadata +metaToJSON opts blockWriter inlineWriter meta + | isJust (writerTemplate opts) = + addVariablesToJSON opts <$> metaToJSON' blockWriter inlineWriter meta | otherwise = return (Object H.empty) -metaValueToJSON :: Monad m - => ([Block] -> m String) - -> ([Inline] -> m String) +-- | Like 'metaToJSON', but does not include variables and is +-- not sensitive to 'writerTemplate'. +metaToJSON' :: (Functor m, Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) + -> Meta + -> m Value +metaToJSON' blockWriter inlineWriter (Meta metamap) = do + renderedMap <- Traversable.mapM + (metaValueToJSON blockWriter inlineWriter) + metamap + return $ M.foldrWithKey defField (Object H.empty) renderedMap + +-- | Add variables to JSON object, replacing any existing values. +-- Also include @meta-json@, a field containing a string representation +-- of the original JSON object itself, prior to addition of variables. +addVariablesToJSON :: WriterOptions -> Value -> Value +addVariablesToJSON opts metadata = + foldl (\acc (x,y) -> setField x y acc) + (defField "meta-json" (toStringLazy $ encode metadata) (Object mempty)) + (writerVariables opts) + `combineMetadata` metadata + where combineMetadata (Object o1) (Object o2) = Object $ H.union o1 o2 + combineMetadata x _ = x + +metaValueToJSON :: (Functor m, Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) -> MetaValue -> m Value -metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ +metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = toJSON <$> Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap -metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $ +metaValueToJSON blockWriter inlineWriter (MetaList xs) = toJSON <$> Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs metaValueToJSON _ _ (MetaBool b) = return $ toJSON b -metaValueToJSON _ _ (MetaString s) = return $ toJSON s -metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs -metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs +metaValueToJSON _ inlineWriter (MetaString s) = toJSON <$> + inlineWriter (Builder.toList (Builder.text s)) +metaValueToJSON blockWriter _ (MetaBlocks bs) = toJSON <$> blockWriter bs +metaValueToJSON _ inlineWriter (MetaInlines is) = toJSON <$> inlineWriter is -- | Retrieve a field value from a JSON object. getField :: FromJSON a @@ -111,10 +140,22 @@ setField field val (Object hashmap) = Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap where combine newval oldval = case fromJSON oldval of - Success xs -> toJSON $ xs ++ [newval] - _ -> toJSON [oldval, newval] + Success xs -> toJSON $ xs ++ [newval] + _ -> toJSON [oldval, newval] setField _ _ x = x +resetField :: ToJSON a + => String + -> a + -> Value + -> Value +-- | Reset a field of a JSON object. If the field already has a value, +-- the new value replaces it. +-- This is a utility function to be used in preparing template contexts. +resetField field val (Object hashmap) = + Object $ H.insert (T.pack field) (toJSON val) hashmap +resetField _ _ x = x + defField :: ToJSON a => String -> a @@ -148,22 +189,131 @@ isDisplayMath _ = False stripLeadingTrailingSpace :: [Inline] -> [Inline] stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs + where go (Space:xs) = xs go (SoftBreak:xs) = xs - go xs = xs + go xs = xs -- Put display math in its own block (for ODT/DOCX). fixDisplayMath :: Block -> Block fixDisplayMath (Plain lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ + map Plain $ + filter (not . null) $ + map stripLeadingTrailingSpace $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath (Para lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ + map Para $ + filter (not . null) $ + map stripLeadingTrailingSpace $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath x = x + +unsmartify :: WriterOptions -> String -> String +unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs +unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs +unsmartify opts ('\8211':xs) + | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs + | otherwise = "--" ++ unsmartify opts xs +unsmartify opts ('\8212':xs) + | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs + | otherwise = "---" ++ unsmartify opts xs +unsmartify opts ('\8220':xs) = '"' : unsmartify opts xs +unsmartify opts ('\8221':xs) = '"' : unsmartify opts xs +unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs +unsmartify opts (x:xs) = x : unsmartify opts xs +unsmartify _ [] = [] + +gridTable :: Monad m + => WriterOptions + -> (WriterOptions -> [Block] -> m Doc) + -> Bool -- ^ headless + -> [Alignment] + -> [Double] + -> [[Block]] + -> [[[Block]]] + -> m Doc +gridTable opts blocksToDoc headless aligns widths headers rows = do + let numcols = maximum (length aligns : length widths : + map length (headers:rows)) + let handleGivenWidths widths' = do + let widthsInChars' = map ( + (\x -> if x < 1 then 1 else x) . + (\x -> x - 3) . floor . + (fromIntegral (writerColumns opts) *) + ) widths' + rawHeaders' <- zipWithM blocksToDoc + (map (\w -> opts{writerColumns = + min (w - 2) (writerColumns opts)}) widthsInChars') + headers + rawRows' <- mapM + (\cs -> zipWithM blocksToDoc + (map (\w -> opts{writerColumns = + min (w - 2) (writerColumns opts)}) widthsInChars') + cs) + rows + return (widthsInChars', rawHeaders', rawRows') + let handleZeroWidths = do + rawHeaders' <- mapM (blocksToDoc opts) headers + rawRows' <- mapM (mapM (blocksToDoc opts)) rows + let numChars [] = 0 + numChars xs = maximum . map offset $ xs + let widthsInChars' = + map numChars $ transpose (rawHeaders' : rawRows') + if sum widthsInChars' > writerColumns opts + then -- use even widths + handleGivenWidths + (replicate numcols (1.0 / fromIntegral numcols) :: [Double]) + else return (widthsInChars', rawHeaders', rawRows') + (widthsInChars, rawHeaders, rawRows) <- if all (== 0) widths + then handleZeroWidths + else handleGivenWidths widths + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (1 : map height blocks) + sep' = lblock 3 $ vcat (replicate h (text " | ")) + beg = lblock 2 $ vcat (replicate h (text "| ")) + end = lblock 2 $ vcat (replicate h (text " |")) + middle = chomp $ hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow rawHeaders + let rows' = map (makeRow . map chomp) rawRows + let borderpart ch align widthInChars = + (if align == AlignLeft || align == AlignCenter + then char ':' + else char ch) <> + text (replicate widthInChars ch) <> + (if align == AlignRight || align == AlignCenter + then char ':' + else char ch) + let border ch aligns' widthsInChars' = + char '+' <> + hcat (intersperse (char '+') (zipWith (borderpart ch) + aligns' widthsInChars')) <> char '+' + let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars) + rows' + let head'' = if headless + then empty + else head' $$ border '=' aligns widthsInChars + if headless + then return $ + border '-' aligns widthsInChars $$ + body $$ + border '-' (repeat AlignDefault) widthsInChars + else return $ + border '-' (repeat AlignDefault) widthsInChars $$ + head'' $$ + body $$ + border '-' (repeat AlignDefault) widthsInChars + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 9bd23ac3b..4936c743e 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,63 +30,67 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where +import Data.Char (toLower) +import Data.List (isPrefixOf, stripPrefix) +import Data.Text (Text) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Data.List ( stripPrefix, isPrefixOf ) -import Data.Char ( toLower ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML -- | Convert list of authors to a docbook <author> section -authorToTEI :: WriterOptions -> [Inline] -> B.Inlines -authorToTEI opts name' = - let name = render Nothing $ inlinesToTEI opts name' - colwidth = if writerWrapText opts == WrapAuto +authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines +authorToTEI opts name' = do + name <- render Nothing <$> inlinesToTEI opts name' + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - in B.rawInline "tei" $ render colwidth $ + return $ B.rawInline "tei" $ render colwidth $ inTagsSimple "author" (text $ escapeStringForXML name) -- | Convert Pandoc document to string in Docbook format. -writeTEI :: WriterOptions -> Pandoc -> String -writeTEI opts (Pandoc meta blocks) = +writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeTEI opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth startLvl = case writerTopLevelDivision opts of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' = map (authorToTEI opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToTEI opts startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToTEI opts) + auths' <- mapM (authorToTEI opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToTEI opts startLvl) . hierarchicalize) + (fmap render' . inlinesToTEI opts) meta' - main = render' $ vcat (map (elementToTEI opts startLvl) elements) - context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML _ -> True - _ -> False) - $ metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements + let context = defField "body" main + $ + defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to TEI. -elementToTEI :: WriterOptions -> Int -> Element -> Doc +elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc elementToTEI opts _ (Blk block) = blockToTEI opts block -elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = +elementToTEI opts lvl (Sec _ _num attr title elements) = do -- TEI doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] @@ -96,14 +101,14 @@ elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "level" ++ show n | otherwise -> "section" - in inTags True "div" [("type", divType) | not (null id')] $ --- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $ - inTagsSimple "head" (inlinesToTEI opts title) $$ - vcat (map (elementToTEI opts (lvl + 1)) elements') + contents <- vcat <$> mapM (elementToTEI opts (lvl + 1)) elements' + titleContents <- inlinesToTEI opts title + return $ inTags True "div" (("type", divType) : idFromAttr opts attr) $ + inTagsSimple "head" titleContents $$ contents -- | Convert a list of Pandoc blocks to TEI. -blocksToTEI :: WriterOptions -> [Block] -> Doc -blocksToTEI opts = vcat . map (blockToTEI opts) +blocksToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc +blocksToTEI opts bs = vcat <$> mapM (blockToTEI opts) bs -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -112,48 +117,54 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a TEI -- list with labels and items. -deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToTEI :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> m Doc deflistItemsToTEI opts items = - vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items + vcat <$> mapM (uncurry (deflistItemToTEI opts)) items -- | Convert a term and a list of blocks into a TEI varlistentry. -deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToTEI opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "label" (inlinesToTEI opts term) $$ - inTagsIndented "item" (blocksToTEI opts def') +deflistItemToTEI :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> m Doc +deflistItemToTEI opts term defs = do + term' <- inlinesToTEI opts term + defs' <- blocksToTEI opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "label" term' $$ + inTagsIndented "item" defs' -- | Convert a list of lists of blocks to a list of TEI list items. -listItemsToTEI :: WriterOptions -> [[Block]] -> Doc -listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items +listItemsToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc +listItemsToTEI opts items = vcat <$> mapM (listItemToTEI opts) items -- | Convert a list of blocks into a TEI list item. -listItemToTEI :: WriterOptions -> [Block] -> Doc +listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc listItemToTEI opts item = - inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item + inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item) -imageToTEI :: WriterOptions -> Attr -> String -> Doc -imageToTEI _ attr src = selfClosingTag "graphic" $ - ("url", src) : idAndRole attr ++ dims +imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m Doc +imageToTEI opts attr src = return $ selfClosingTag "graphic" $ + ("url", src) : idFromAttr opts attr ++ dims where - dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of + dims = go Width "width" ++ go Height "height" + go dir dstr = case dimension dir attr of Just a -> [(dstr, show a)] Nothing -> [] -- | Convert a Pandoc block element to TEI. -blockToTEI :: WriterOptions -> Block -> Doc -blockToTEI _ Null = empty +blockToTEI :: PandocMonad m => WriterOptions -> Block -> m Doc +blockToTEI _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToTEI opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in - inTags False "p" attribs $ inlinesToTEI opts lst +blockToTEI opts (Div attr [Para lst]) = do + let attribs = idFromAttr opts attr + inTags False "p" attribs <$> inlinesToTEI opts lst blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs -blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize +blockToTEI _ h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- For TEI simple, text must be within containing block element, so --- we use plainToPara to ensure that Plain text ends up contained by --- something. +-- we use treat as Para to ensure that Plain text ends up contained by +-- something: blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- title beginning with fig: indicates that the image is a figure --blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = @@ -168,13 +179,13 @@ blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- (imageToTEI opts attr src)) $$ -- inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToTEI opts (Para lst) = - inTags False "p" [] $ inlinesToTEI opts lst + inTags False "p" [] <$> inlinesToTEI opts lst blockToTEI opts (LineBlock lns) = blockToTEI opts $ linesToPara lns blockToTEI opts (BlockQuote blocks) = - inTagsIndented "quote" $ blocksToTEI opts blocks + inTagsIndented "quote" <$> blocksToTEI opts blocks blockToTEI _ (CodeBlock (_,classes,_) str) = - text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> + return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" @@ -184,11 +195,11 @@ blockToTEI _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToTEI opts (BulletList lst) = +blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] - in inTags True "list" attribs $ listItemsToTEI opts lst -blockToTEI _ (OrderedList _ []) = empty -blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = + inTags True "list" attribs <$> listItemsToTEI opts lst +blockToTEI _ (OrderedList _ []) = return empty +blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do let attribs = case numstyle of DefaultStyle -> [] Decimal -> [("type", "ordered:arabic")] @@ -197,127 +208,138 @@ blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("type", "ordered:loweralpha")] UpperRoman -> [("type", "ordered:upperroman")] LowerRoman -> [("type", "ordered:lowerroman")] - items = if start == 1 - then listItemsToTEI opts (first:rest) - else (inTags True "item" [("n",show start)] - (blocksToTEI opts $ map plainToPara first)) $$ - listItemsToTEI opts rest - in inTags True "list" attribs items -blockToTEI opts (DefinitionList lst) = + items <- if start == 1 + then listItemsToTEI opts (first:rest) + else do + fi <- blocksToTEI opts $ map plainToPara first + re <- listItemsToTEI opts rest + return $ inTags True "item" [("n",show start)] fi $$ re + return $ inTags True "list" attribs items +blockToTEI opts (DefinitionList lst) = do let attribs = [("type", "definition")] - in inTags True "list" attribs $ deflistItemsToTEI opts lst -blockToTEI _ (RawBlock f str) - | f == "tei" = text str -- raw TEI block (should such a thing exist). --- | f == "html" = text str -- allow html for backwards compatibility - | otherwise = empty -blockToTEI _ HorizontalRule = - selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")] + inTags True "list" attribs <$> deflistItemsToTEI opts lst +blockToTEI _ b@(RawBlock f str) + | f == "tei" = return $ text str + -- raw TEI block (should such a thing exist). + | otherwise = do + report $ BlockNotRendered b + return empty +blockToTEI _ HorizontalRule = return $ + selfClosingTag "milestone" [("unit","undefined") + ,("type","separator") + ,("rendition","line")] -- | TEI Tables -- TEI Simple's tables are composed of cells and rows; other -- table info in the AST is here lossily discard. -blockToTEI opts (Table _ _ _ headers rows) = - let - headers' = tableHeadersToTEI opts headers --- headers' = if all null headers --- then return empty --- else tableRowToTEI opts headers - in - inTags True "table" [] $ - vcat $ [headers'] <> map (tableRowToTEI opts) rows +blockToTEI opts (Table _ _ _ headers rows) = do + headers' <- tableHeadersToTEI opts headers + rows' <- mapM (tableRowToTEI opts) rows + return $ inTags True "table" [] $ headers' $$ vcat rows' -tableRowToTEI :: WriterOptions - -> [[Block]] - -> Doc +tableRowToTEI :: PandocMonad m + => WriterOptions + -> [[Block]] + -> m Doc tableRowToTEI opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols + (inTagsIndented "row" . vcat) <$> mapM (tableItemToTEI opts) cols -tableHeadersToTEI :: WriterOptions +tableHeadersToTEI :: PandocMonad m + => WriterOptions -> [[Block]] - -> Doc + -> m Doc tableHeadersToTEI opts cols = - inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols + (inTags True "row" [("role","label")] . vcat) <$> + mapM (tableItemToTEI opts) cols -tableItemToTEI :: WriterOptions - -> [Block] - -> Doc +tableItemToTEI :: PandocMonad m + => WriterOptions + -> [Block] + -> m Doc tableItemToTEI opts item = - inTags False "cell" [] $ vcat $ map (blockToTEI opts) item + (inTags False "cell" [] . vcat) <$> mapM (blockToTEI opts) item -- | Convert a list of inline elements to TEI. -inlinesToTEI :: WriterOptions -> [Inline] -> Doc -inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst +inlinesToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m Doc +inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst -- | Convert an inline element to TEI. -inlineToTEI :: WriterOptions -> Inline -> Doc -inlineToTEI _ (Str str) = text $ escapeStringForXML str +inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m Doc +inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str inlineToTEI opts (Emph lst) = - inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst inlineToTEI opts (Strong lst) = - inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:bold")] <$> inlinesToTEI opts lst inlineToTEI opts (Strikeout lst) = - inTags False "hi" [("rendition", "simple:strikethrough")] $ + inTags False "hi" [("rendition", "simple:strikethrough")] <$> inlinesToTEI opts lst inlineToTEI opts (Superscript lst) = - inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:superscript")] <$> + inlinesToTEI opts lst inlineToTEI opts (Subscript lst) = - inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:subscript")] <$> + inlinesToTEI opts lst inlineToTEI opts (SmallCaps lst) = - inTags False "hi" [("rendition", "simple:smallcaps")] $ - inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:smallcaps")] <$> + inlinesToTEI opts lst inlineToTEI opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToTEI opts lst + inTagsSimple "quote" <$> inlinesToTEI opts lst inlineToTEI opts (Cite _ lst) = inlinesToTEI opts lst inlineToTEI opts (Span _ ils) = inlinesToTEI opts ils -inlineToTEI _ (Code _ str) = +inlineToTEI _ (Code _ str) = return $ inTags False "seg" [("type","code")] $ text (escapeStringForXML str) -- Distinguish display from inline math by wrapping the former in a "figure." -inlineToTEI _ (Math t str) = +inlineToTEI _ (Math t str) = return $ case t of InlineMath -> inTags False "formula" [("notation","TeX")] $ - text (str) + text str DisplayMath -> inTags True "figure" [("type","math")] $ - inTags False "formula" [("notation","TeX")] $ text (str) + inTags False "formula" [("notation","TeX")] $ text str -inlineToTEI _ (RawInline f x) | f == "tei" = text x - | otherwise = empty -inlineToTEI _ LineBreak = selfClosingTag "lb" [] -inlineToTEI _ Space = space +inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x + | otherwise = empty <$ + report (InlineNotRendered il) +inlineToTEI _ LineBreak = return $ selfClosingTag "lb" [] +inlineToTEI _ Space = + return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToTEI _ SoftBreak = space +inlineToTEI _ SoftBreak = + return space inlineToTEI opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = + | Just email <- stripPrefix "mailto:" src = do let emailLink = text $ - escapeStringForXML $ email - in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToTEI opts txt <+> - char '(' <> emailLink <> char ')' + escapeStringForXML email + case txt of + [Str s] | escapeURI s == email -> + return emailLink + _ -> do + linktext <- inlinesToTEI opts txt + return $ linktext <+> char '(' <> emailLink <> char ')' | otherwise = - (if isPrefixOf "#" src - then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr - else inTags False "ref" $ ("target", src) : idAndRole attr ) $ + (if "#" `isPrefixOf` src + then inTags False "ref" $ ("target", drop 1 src) + : idFromAttr opts attr + else inTags False "ref" $ ("target", src) + : idFromAttr opts attr ) <$> inlinesToTEI opts txt -inlineToTEI opts (Image attr description (src, tit)) = +inlineToTEI opts (Image attr description (src, tit)) = do let titleDoc = if null tit then empty - else inTags False "figDesc" [] (text $ escapeStringForXML tit) - imageDesc = if null description - then empty - else inTags False "head" [] (inlinesToTEI opts description) - in inTagsIndented "figure" $ imageDesc $$ - imageToTEI opts attr src $$ titleDoc + else inTags False "figDesc" [] + (text $ escapeStringForXML tit) + imageDesc <- if null description + then return empty + else inTags False "head" [] + <$> inlinesToTEI opts description + img <- imageToTEI opts attr src + return $ inTagsIndented "figure" $ imageDesc $$ img $$ titleDoc inlineToTEI opts (Note contents) = - inTagsIndented "note" $ blocksToTEI opts contents + inTagsIndented "note" <$> blocksToTEI opts contents -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role - where - ident = if null id' - then [] - else [("id", id')] - role = if null cls - then [] - else [("role", unwords cls)] +idFromAttr :: WriterOptions -> Attr -> [(String, String)] +idFromAttr opts (id',_,_) = + if null id' + then [] + else [("xml:id", writerIdentifierPrefix opts ++ id')] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index f2b9aa15f..bf434642e 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2015 John MacFarlane and Peter Wang +Copyright (C) 2008-2018 John MacFarlane + 2012 Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2018 John MacFarlane + 2012 Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,21 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where +import Control.Monad.Except (throwError) +import Control.Monad.State.Strict +import Data.Char (chr, ord) +import Data.List (maximumBy, transpose) +import Data.Ord (comparing) +import qualified Data.Set as Set +import Data.Text (Text) +import Network.URI (unEscapeString) +import System.FilePath +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Printf ( printf ) -import Data.List ( transpose, maximumBy ) -import Data.Ord ( comparing ) -import Data.Char ( chr, ord ) -import Control.Monad.State -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Network.URI ( isURI, unEscapeString ) -import System.FilePath -import qualified Data.Set as Set +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout @@ -59,10 +66,12 @@ data WriterState = - generated .texi files don't work when run through texi2dvi -} +type TI m = StateT WriterState m + -- | Convert Pandoc to Texinfo. -writeTexinfo :: WriterOptions -> Pandoc -> String +writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTexinfo options document = - evalState (pandocToTexinfo options $ wrapTop document) $ + evalStateT (pandocToTexinfo options $ wrapTop document) WriterState { stStrikeout = False, stSuperscript = False, stEscapeComma = False, stSubscript = False, stIdentifiers = Set.empty, stOptions = options} @@ -72,16 +81,18 @@ wrapTop :: Pandoc -> Pandoc wrapTop (Pandoc meta blocks) = Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks) -pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String +pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m Text pandocToTexinfo options (Pandoc meta blocks) = do let titlePage = not $ all null $ docTitle meta : docDate meta : docAuthors meta let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToTexinfo) - (fmap (render colwidth) . inlineListToTexinfo) + (fmap render' . blockListToTexinfo) + (fmap render' . inlineListToTexinfo) meta main <- blockListToTexinfo blocks st <- get @@ -91,11 +102,11 @@ pandocToTexinfo options (Pandoc meta blocks) = do $ defField "titlepage" titlePage $ defField "subscript" (stSubscript st) $ defField "superscript" (stSuperscript st) - $ defField "strikeout" (stStrikeout st) - $ metadata + $ + defField "strikeout" (stStrikeout st) metadata case writerTemplate options of Nothing -> return body - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape things as needed for Texinfo. stringToTexinfo :: String -> String @@ -110,7 +121,7 @@ stringToTexinfo = escapeStringUsing texinfoEscapes , ('\x2019', "'") ] -escapeCommas :: State WriterState Doc -> State WriterState Doc +escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc escapeCommas parser = do oldEscapeComma <- gets stEscapeComma modify $ \st -> st{ stEscapeComma = True } @@ -123,8 +134,9 @@ inCmd :: String -> Doc -> Doc inCmd cmd contents = char '@' <> text cmd <> braces contents -- | Convert Pandoc block element to Texinfo. -blockToTexinfo :: Block -- ^ Block to convert - -> State WriterState Doc +blockToTexinfo :: PandocMonad m + => Block -- ^ Block to convert + -> TI m Doc blockToTexinfo Null = return empty @@ -154,17 +166,19 @@ blockToTexinfo (BlockQuote lst) = do contents $$ text "@end quotation" -blockToTexinfo (CodeBlock _ str) = do +blockToTexinfo (CodeBlock _ str) = return $ blankline $$ - text "@verbatim" $$ - flush (text str) $$ - text "@end verbatim" <> blankline + text "@verbatim" $$ + flush (text str) $$ + text "@end verbatim" <> blankline -blockToTexinfo (RawBlock f str) +blockToTexinfo b@(RawBlock f str) | f == "texinfo" = return $ text str | f == "latex" || f == "tex" = return $ text "@tex" $$ text str $$ text "@end tex" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst @@ -204,7 +218,7 @@ blockToTexinfo HorizontalRule = text "@bigskip@hrule@bigskip" $$ text "@end iftex" $$ text "@ifnottex" $$ - text (take 72 $ repeat '-') $$ + text (replicate 72 '-') $$ text "@end ifnottex" blockToTexinfo (Header 0 _ lst) = do @@ -214,23 +228,27 @@ blockToTexinfo (Header 0 _ lst) = do return $ text "@node Top" $$ text "@top " <> txt <> blankline -blockToTexinfo (Header level _ lst) = do - node <- inlineListForNode lst - txt <- inlineListToTexinfo lst - idsUsed <- gets stIdentifiers - let id' = uniqueIdent lst idsUsed - modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } - return $ if (level > 0) && (level <= 4) - then blankline <> text "@node " <> node $$ - text (seccmd level) <> txt $$ - text "@anchor" <> braces (text $ '#':id') - else txt - where - seccmd 1 = "@chapter " - seccmd 2 = "@section " - seccmd 3 = "@subsection " - seccmd 4 = "@subsubsection " - seccmd _ = error "illegal seccmd level" +blockToTexinfo (Header level _ lst) + | level < 1 || level > 4 = blockToTexinfo (Para lst) + | otherwise = do + node <- inlineListForNode lst + txt <- inlineListToTexinfo lst + idsUsed <- gets stIdentifiers + let id' = uniqueIdent lst idsUsed + modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } + sec <- seccmd level + return $ if (level > 0) && (level <= 4) + then blankline <> text "@node " <> node $$ + text sec <> txt $$ + text "@anchor" <> braces (text $ '#':id') + else txt + where + seccmd :: PandocMonad m => Int -> TI m String + seccmd 1 = return "@chapter " + seccmd 2 = return "@section " + seccmd 3 = return "@subsection " + seccmd 4 = return "@subsubsection " + seccmd _ = throwError $ PandocSomeError "illegal seccmd level" blockToTexinfo (Table caption aligns widths heads rows) = do headers <- if all null heads @@ -256,28 +274,32 @@ blockToTexinfo (Table caption aligns widths heads rows) = do inCmd "caption" captionText $$ text "@end float" -tableHeadToTexinfo :: [Alignment] +tableHeadToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " -tableRowToTexinfo :: [Alignment] +tableRowToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableRowToTexinfo = tableAnyRowToTexinfo "@item " -tableAnyRowToTexinfo :: String +tableAnyRowToTexinfo :: PandocMonad m + => String -> [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableAnyRowToTexinfo itemtype aligns cols = zipWithM alignedBlock aligns cols >>= return . (text itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty -alignedBlock :: Alignment +alignedBlock :: PandocMonad m + => Alignment -> [Block] - -> State WriterState Doc + -> TI m Doc -- XXX @flushleft and @flushright text won't get word wrapped. Since word -- wrapping is more important than alignment, we ignore the alignment. alignedBlock _ = blockListToTexinfo @@ -292,8 +314,9 @@ alignedBlock _ col = blockListToTexinfo col -} -- | Convert Pandoc block elements to Texinfo. -blockListToTexinfo :: [Block] - -> State WriterState Doc +blockListToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc blockListToTexinfo [] = return empty blockListToTexinfo (x:xs) = do x' <- blockToTexinfo x @@ -316,8 +339,8 @@ blockListToTexinfo (x:xs) = do Para _ -> do xs' <- blockListToTexinfo xs case xs of - ((CodeBlock _ _):_) -> return $ x' $$ xs' - _ -> return $ x' $+$ xs' + (CodeBlock _ _:_) -> return $ x' $$ xs' + _ -> return $ x' $+$ xs' _ -> do xs' <- blockListToTexinfo xs return $ x' $$ xs' @@ -335,15 +358,17 @@ collectNodes level (x:xs) = _ -> collectNodes level xs -makeMenuLine :: Block - -> State WriterState Doc +makeMenuLine :: PandocMonad m + => Block + -> TI m Doc makeMenuLine (Header _ _ lst) = do txt <- inlineListForNode lst return $ text "* " <> txt <> text "::" -makeMenuLine _ = error "makeMenuLine called with non-Header block" +makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block" -listItemToTexinfo :: [Block] - -> State WriterState Doc +listItemToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc listItemToTexinfo lst = do contents <- blockListToTexinfo lst let spacer = case reverse lst of @@ -351,8 +376,9 @@ listItemToTexinfo lst = do _ -> empty return $ text "@item" $$ contents <> spacer -defListItemToTexinfo :: ([Inline], [[Block]]) - -> State WriterState Doc +defListItemToTexinfo :: PandocMonad m + => ([Inline], [[Block]]) + -> TI m Doc defListItemToTexinfo (term, defs) = do term' <- inlineListToTexinfo term let defToTexinfo bs = do d <- blockListToTexinfo bs @@ -363,13 +389,15 @@ defListItemToTexinfo (term, defs) = do return $ text "@item " <> term' $+$ vcat defs' -- | Convert list of inline elements to Texinfo. -inlineListToTexinfo :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToTexinfo :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat -- | Convert list of inline elements to Texinfo acceptable for a node name. -inlineListForNode :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListForNode :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListForNode = return . text . stringToTexinfo . filter (not . disallowedInNode) . stringify @@ -378,8 +406,9 @@ disallowedInNode :: Char -> Bool disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo -inlineToTexinfo :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToTexinfo :: PandocMonad m + => Inline -- ^ Inline to convert + -> TI m Doc inlineToTexinfo (Span _ lst) = inlineListToTexinfo lst @@ -408,7 +437,7 @@ inlineToTexinfo (Subscript lst) = do inlineToTexinfo (SmallCaps lst) = inlineListToTexinfo lst >>= return . inCmd "sc" -inlineToTexinfo (Code _ str) = do +inlineToTexinfo (Code _ str) = return $ text $ "@code{" ++ stringToTexinfo str ++ "}" inlineToTexinfo (Quoted SingleQuote lst) = do @@ -423,12 +452,14 @@ inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (RawInline f str) +inlineToTexinfo il@(RawInline f str) | f == "latex" || f == "tex" = return $ text "@tex" $$ text str $$ text "@end tex" | f == "texinfo" = return $ text str - | otherwise = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" <> cr + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToTexinfo LineBreak = return $ text "@*" <> cr inlineToTexinfo SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of @@ -441,10 +472,10 @@ inlineToTexinfo (Link _ txt (src@('#':_), _)) = do contents <- escapeCommas $ inlineListToTexinfo txt return $ text "@ref" <> braces (text (stringToTexinfo src) <> text "," <> contents) -inlineToTexinfo (Link _ txt (src, _)) = do +inlineToTexinfo (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - do return $ text $ "@url{" ++ x ++ "}" + return $ text $ "@url{" ++ x ++ "}" _ -> do contents <- escapeCommas $ inlineListToTexinfo txt let src1 = stringToTexinfo src return $ text ("@uref{" ++ src1 ++ ",") <> contents <> @@ -453,7 +484,7 @@ inlineToTexinfo (Link _ txt (src, _)) = do inlineToTexinfo (Image attr alternate (source, _)) = do content <- escapeCommas $ inlineListToTexinfo alternate opts <- gets stOptions - let showDim dim = case (dimension dim attr) of + let showDim dim = case dimension dim attr of (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in" (Just (Percent _)) -> "" (Just d) -> show d diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index f73876fd2..f46eb43bc 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,17 +30,20 @@ Conversion of 'Pandoc' documents to Textile markup. Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where +import Control.Monad.State.Strict +import Data.Char (isSpace) +import Data.List (intercalate) +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intercalate ) -import Control.Monad.State -import Data.Char ( isSpace ) +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { stNotes :: [String] -- Footnotes @@ -49,29 +52,34 @@ data WriterState = WriterState { , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } +type TW = StateT WriterState + -- | Convert Pandoc to Textile. -writeTextile :: WriterOptions -> Pandoc -> String +writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTextile opts document = - evalState (pandocToTextile opts document) - WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing, + evalStateT (pandocToTextile opts document) + WriterState { stNotes = [], + stListLevel = [], + stStartNum = Nothing, stUseTags = False } -- | Return Textile representation of document. -pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String +pandocToTextile :: PandocMonad m + => WriterOptions -> Pandoc -> TW m Text pandocToTextile opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (blockListToTextile opts) (inlineListToTextile opts) meta body <- blockListToTextile opts blocks - notes <- liftM (unlines . reverse . stNotes) get - let main = body ++ if null notes then "" else ("\n\n" ++ notes) + notes <- gets $ unlines . reverse . stNotes + let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes) let context = defField "body" main metadata case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Nothing -> return main + Just tpl -> renderTemplate' tpl context -withUseTags :: State WriterState a -> State WriterState a +withUseTags :: PandocMonad m => TW m a -> TW m a withUseTags action = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags modify $ \s -> s { stUseTags = True } result <- action modify $ \s -> s { stUseTags = oldUseTags } @@ -101,9 +109,10 @@ escapeStringForTextile :: String -> String escapeStringForTextile = concatMap escapeCharForTextile -- | Convert Pandoc block element to Textile. -blockToTextile :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState String +blockToTextile :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> TW m String blockToTextile _ Null = return "" @@ -123,8 +132,8 @@ blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do return $ im ++ "\n" ++ capt blockToTextile opts (Para inlines) = do - useTags <- liftM stUseTags get - listLevel <- liftM stListLevel get + useTags <- gets stUseTags + listLevel <- gets stListLevel contents <- inlineListToTextile opts inlines return $ if useTags then "<p>" ++ contents ++ "</p>" @@ -133,9 +142,11 @@ blockToTextile opts (Para inlines) = do blockToTextile opts (LineBlock lns) = blockToTextile opts $ linesToPara lns -blockToTextile _ (RawBlock f str) +blockToTextile _ b@(RawBlock f str) | f == Format "html" || f == Format "textile" = return str - | otherwise = return "" + | otherwise = do + report $ BlockNotRendered b + return "" blockToTextile _ HorizontalRule = return "<hr />\n" @@ -211,7 +222,7 @@ blockToTextile opts (Table capt aligns widths headers rows') = do "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" blockToTextile opts x@(BulletList items) = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -219,13 +230,13 @@ blockToTextile opts x@(BulletList items) = do return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" else do modify $ \s -> s { stListLevel = stListLevel s ++ "*" } - level <- get >>= return . length . stListLevel + level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } return $ vcat contents ++ (if level > 1 then "" else "\n") blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -237,7 +248,7 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do , stStartNum = if start > 1 then Just start else Nothing } - level <- get >>= return . length . stListLevel + level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s), stStartNum = Nothing } @@ -261,10 +272,11 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet or ordered list item (list of blocks) to Textile. -listItemToTextile :: WriterOptions -> [Block] -> State WriterState String +listItemToTextile :: PandocMonad m + => WriterOptions -> [Block] -> TW m String listItemToTextile opts items = do contents <- blockListToTextile opts items - useTags <- get >>= return . stUseTags + useTags <- gets stUseTags if useTags then return $ "<li>" ++ contents ++ "</li>" else do @@ -277,14 +289,15 @@ listItemToTextile opts items = do Nothing -> return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to Textile. -definitionListItemToTextile :: WriterOptions +definitionListItemToTextile :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> State WriterState String + -> TW m String definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label contents <- mapM (blockListToTextile opts) items return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents) + intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -301,16 +314,16 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -325,18 +338,19 @@ vcat = intercalate "\n" -- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, -- and Textile writers, and should be abstracted out.) -tableRowToTextile :: WriterOptions - -> [String] - -> Int - -> [[Block]] - -> State WriterState String +tableRowToTextile :: PandocMonad m + => WriterOptions + -> [String] + -> Int + -> [[Block]] + -> TW m String tableRowToTextile opts alignStrings rownum cols' = do let celltype = if rownum == 0 then "th" else "td" let rowclass = case rownum of - 0 -> "header" + 0 -> "header" x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith + _ -> "even" + cols'' <- zipWithM (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -348,11 +362,12 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableItemToTextile :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String +tableItemToTextile :: PandocMonad m + => WriterOptions + -> String + -> String + -> [Block] + -> TW m String tableItemToTextile opts celltype align' item = do let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ x ++ "</" ++ celltype ++ ">" @@ -360,19 +375,21 @@ tableItemToTextile opts celltype align' item = do return $ mkcell contents -- | Convert list of Pandoc block elements to Textile. -blockListToTextile :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState String +blockListToTextile :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> TW m String blockListToTextile opts blocks = mapM (blockToTextile opts) blocks >>= return . vcat -- | Convert list of Pandoc inline elements to Textile. -inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String +inlineListToTextile :: PandocMonad m + => WriterOptions -> [Inline] -> TW m String inlineListToTextile opts lst = mapM (inlineToTextile opts) lst >>= return . concat -- | Convert Pandoc inline element to Textile. -inlineToTextile :: WriterOptions -> Inline -> State WriterState String +inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String inlineToTextile opts (Span _ lst) = inlineListToTextile opts lst @@ -429,13 +446,15 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str inlineToTextile _ (Math _ str) = return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>" -inlineToTextile opts (RawInline f str) +inlineToTextile opts il@(RawInline f str) | f == Format "html" || f == Format "textile" = return str | (f == Format "latex" || f == Format "tex") && isEnabled Ext_raw_tex opts = return str - | otherwise = return "" + | otherwise = do + report $ InlineNotRendered il + return "" -inlineToTextile _ (LineBreak) = return "\n" +inlineToTextile _ LineBreak = return "\n" inlineToTextile _ SoftBreak = return " " @@ -464,7 +483,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do then "" else "(" ++ unwords cls ++ ")" showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";" - in case (dimension dir attr) of + in case dimension dir attr of Just (Percent a) -> toCss $ show (Percent a) Just dim -> toCss $ showInPixel opts dim ++ "px" Nothing -> Nothing @@ -476,7 +495,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!" inlineToTextile opts (Note contents) = do - curNotes <- liftM stNotes get + curNotes <- gets stNotes let newnum = length curNotes + 1 contents' <- blockListToTextile opts contents let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 423928c8a..dec1f9d4a 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,5 +1,6 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> + 2017-2018 Alex Ivkin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,11 +19,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ZimWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane, 2016 Alex Ivkin + Copyright : Copyright (C) 2008-2018 John MacFarlane, 2017-2018 Alex Ivkin License : GNU GPL, version 2 or above Maintainer : Alex Ivkin <alex@ivkin.net> - Stability : alpha + Stability : beta Portability : portable Conversion of 'Pandoc' documents to ZimWiki markup. @@ -31,48 +32,53 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html -} module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where +import Control.Monad (zipWithM) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) +import Data.Default (Default (..)) +import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) +import Data.Text (Text, breakOnAll, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr - , substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf ) -import Data.Text ( breakOnAll, pack ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) ---import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Logging +import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) +import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, + substitute, trimr) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared (defField, metaToJSON) data WriterState = WriterState { - stItemNum :: Int, - stIndent :: String -- Indent after the marker at the beginning of list items + stItemNum :: Int, + stIndent :: String, -- Indent after the marker at the beginning of list items + stInTable :: Bool, -- Inside a table + stInLink :: Bool -- Inside a link description } instance Default WriterState where - def = WriterState { stItemNum = 1, stIndent = "" } + def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False } + +type ZW = StateT WriterState -- | Convert Pandoc to ZimWiki. -writeZimWiki :: WriterOptions -> Pandoc -> String -writeZimWiki opts document = evalState (pandocToZimWiki opts document) (WriterState 1 "") +writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def -- | Return ZimWiki representation of document. -pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String +pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text pandocToZimWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToZimWiki opts) (inlineListToZimWiki opts) meta - body <- blockListToZimWiki opts blocks + body <- pack <$> blockListToZimWiki opts blocks --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" let main = body let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata + $ defField "toc" (writerTableOfContents opts) metadata case writerTemplate opts of - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context Nothing -> return main -- | Escape special characters for ZimWiki. @@ -83,7 +89,7 @@ escapeString = substitute "__" "''__''" . substitute "//" "''//''" -- | Convert Pandoc block element to ZimWiki. -blockToZimWiki :: WriterOptions -> Block -> State WriterState String +blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m String blockToZimWiki _ Null = return "" @@ -107,18 +113,20 @@ blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n" blockToZimWiki opts (Para inlines) = do - indent <- stIndent <$> get - -- useTags <- stUseTags <$> get + indent <- gets stIndent + -- useTags <- gets stUseTags contents <- inlineListToZimWiki opts inlines return $ contents ++ if null indent then "\n" else "" -blockToZimWiki opts (LineBlock lns) = do +blockToZimWiki opts (LineBlock lns) = blockToZimWiki opts $ linesToPara lns -blockToZimWiki opts (RawBlock f str) +blockToZimWiki opts b@(RawBlock f str) | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" + | f == Format "html" = indentFromHTML opts str + | otherwise = do + report $ BlockNotRendered b + return "" blockToZimWiki _ HorizontalRule = return "\n----\n" @@ -128,9 +136,13 @@ blockToZimWiki opts (Header level _ inlines) = do return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" blockToZimWiki _ (CodeBlock (_,classes,_) str) = do + -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using + let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")] + let langmap = Map.fromList langal return $ case classes of - [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- no lang block is a quote block - (x:_) -> "{{{code: lang=\"" ++ x ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec + [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block + (x:_) -> "{{{code: lang=\"" ++ + fromMaybe x (Map.lookup x langmap) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec blockToZimWiki opts (BlockQuote blocks) = do contents <- blockListToZimWiki opts blocks @@ -143,12 +155,12 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do c <- inlineListToZimWiki opts capt return $ "" ++ c ++ "\n" headers' <- if all null headers - then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0) - else zipWithM (tableItemToZimWiki opts) aligns headers + then zipWithM (tableItemToZimWiki opts) aligns (head rows) + else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows let widths = map (maximum . map length) $ transpose (headers':rows') let padTo (width, al) s = - case (width - length s) of + case width - length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault then s ++ replicate x ' ' @@ -157,63 +169,63 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do else replicate (x `div` 2) ' ' ++ s ++ replicate (x - x `div` 2) ' ' | otherwise -> s - let borderCell (width, al) _ = - if al == AlignLeft - then ":"++ replicate (width-1) '-' - else if al == AlignDefault - then replicate width '-' - else if al == AlignRight - then replicate (width-1) '-' ++ ":" - else ":" ++ replicate (width-2) '-' ++ ":" + let borderCell (width, al) _ + | al == AlignLeft = ":"++ replicate (width-1) '-' + | al == AlignDefault = replicate width '-' + | al == AlignRight = replicate (width-1) '-' ++ ":" + | otherwise = ":" ++ replicate (width-2) '-' ++ ":" let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|" - let renderRow sep cells = sep ++ intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep + let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|" return $ captionDoc ++ - (if null headers' then "" else renderRow "|" headers' ++ "\n") ++ underheader ++ "\n" ++ - unlines (map (renderRow "|") rows') + (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++ + unlines (map renderRow rows') blockToZimWiki opts (BulletList items) = do - indent <- stIndent <$> get + indent <- gets stIndent modify $ \s -> s { stIndent = stIndent s ++ "\t" } - contents <- (mapM (listItemToZimWiki opts) items) + contents <- mapM (listItemToZimWiki opts) items modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } return $ vcat contents ++ if null indent then "\n" else "" blockToZimWiki opts (OrderedList _ items) = do - indent <- stIndent <$> get + indent <- gets stIndent modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 } - contents <- (mapM (orderedListItemToZimWiki opts) items) + contents <- mapM (orderedListItemToZimWiki opts) items modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } return $ vcat contents ++ if null indent then "\n" else "" blockToZimWiki opts (DefinitionList items) = do - contents <- (mapM (definitionListItemToZimWiki opts) items) + contents <- mapM (definitionListItemToZimWiki opts) items return $ vcat contents -definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String +definitionListItemToZimWiki :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> ZW m String definitionListItemToZimWiki opts (label, items) = do labelText <- inlineListToZimWiki opts label contents <- mapM (blockListToZimWiki opts) items - indent <- stIndent <$> get + indent <- gets stIndent return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents -- Auxiliary functions for lists: -indentFromHTML :: WriterOptions -> String -> State WriterState String +indentFromHTML :: PandocMonad m => WriterOptions -> String -> ZW m String indentFromHTML _ str = do - indent <- stIndent <$> get - itemnum <- stItemNum <$> get - if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "." - else if isInfixOf "</li>" str then return "\n" - else if isInfixOf "<li value=" str then do + indent <- gets stIndent + itemnum <- gets stItemNum + if "<li>" `isInfixOf` str then return $ indent ++ show itemnum ++ "." + else if "</li>" `isInfixOf` str then return "\n" + else if "<li value=" `isInfixOf` str then do -- poor man's cut let val = drop 10 $ reverse $ drop 1 $ reverse str --let val = take ((length valls) - 2) valls modify $ \s -> s { stItemNum = read val } return "" - else if isInfixOf "<ol>" str then do + else if "<ol>" `isInfixOf` str then do let olcount=countSubStrs "<ol>" str modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 } return "" - else if isInfixOf "</ol>" str then do + else if "</ol>" `isInfixOf` str then do let olcount=countSubStrs "/<ol>" str modify $ \s -> s{ stIndent = drop olcount (stIndent s) } return "" @@ -230,23 +242,25 @@ vcat :: [String] -> String vcat = intercalate "\n" -- | Convert bullet list item (list of blocks) to ZimWiki. -listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String +listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m String listItemToZimWiki opts items = do contents <- blockListToZimWiki opts items - indent <- stIndent <$> get + indent <- gets stIndent return $ indent ++ "* " ++ contents -- | Convert ordered list item (list of blocks) to ZimWiki. -orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String +orderedListItemToZimWiki :: PandocMonad m + => WriterOptions -> [Block] -> ZW m String orderedListItemToZimWiki opts items = do contents <- blockListToZimWiki opts items - indent <- stIndent <$> get - itemnum <- stItemNum <$> get + indent <- gets stIndent + itemnum <- gets stItemNum --modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering return $ indent ++ show itemnum ++ ". " ++ contents -- Auxiliary functions for tables: -tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String +tableItemToZimWiki :: PandocMonad m + => WriterOptions -> Alignment -> [Block] -> ZW m String tableItemToZimWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " @@ -254,19 +268,24 @@ tableItemToZimWiki opts align' item = do (if align' == AlignLeft || align' == AlignCenter then " " else "") - contents <- blockListToZimWiki opts item -- local (\s -> s { stBackSlashLB = True }) $ + modify $ \s -> s { stInTable = True } + contents <- blockListToZimWiki opts item + modify $ \s -> s { stInTable = False } return $ mkcell contents -- | Convert list of Pandoc block elements to ZimWiki. -blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String +blockListToZimWiki :: PandocMonad m + => WriterOptions -> [Block] -> ZW m String blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks -- | Convert list of Pandoc inline elements to ZimWiki. -inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String -inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst) +inlineListToZimWiki :: PandocMonad m + => WriterOptions -> [Inline] -> ZW m String +inlineListToZimWiki opts lst = concat <$> mapM (inlineToZimWiki opts) lst -- | Convert Pandoc inline element to ZimWiki. -inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String +inlineToZimWiki :: PandocMonad m + => WriterOptions -> Inline -> ZW m String inlineToZimWiki opts (Emph lst) = do contents <- inlineListToZimWiki opts lst @@ -304,7 +323,15 @@ inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''" -inlineToZimWiki _ (Str str) = return $ escapeString str +inlineToZimWiki _ (Str str) = do + inTable <- gets stInTable + inLink <- gets stInLink + if inTable + then return $ substitute "|" "\\|" . escapeString $ str + else + if inLink + then return str + else return $ escapeString str inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped where delim = case mathType of @@ -312,12 +339,18 @@ inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note InlineMath -> "$" -- | f == Format "html" = return $ "<html>" ++ str ++ "</html>" -inlineToZimWiki opts (RawInline f str) +inlineToZimWiki opts il@(RawInline f str) | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" + | f == Format "html" = indentFromHTML opts str + | otherwise = do + report $ InlineNotRendered il + return "" -inlineToZimWiki _ (LineBreak) = return "\n" -- was \\\\ +inlineToZimWiki _ LineBreak = do + inTable <- gets stInTable + if inTable + then return "\\n" + else return "\n" inlineToZimWiki opts SoftBreak = case writerWrapText opts of @@ -328,37 +361,45 @@ inlineToZimWiki opts SoftBreak = inlineToZimWiki _ Space = return " " inlineToZimWiki opts (Link _ txt (src, _)) = do - label <- inlineListToZimWiki opts txt + inTable <- gets stInTable + modify $ \s -> s { stInLink = True } + label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it + modify $ \s -> s { stInLink = False } + let label'= if inTable + then "" -- no label is allowed in a table + else "|"++label case txt of [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ "|" ++ label ++ "]]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + then return $ "[[" ++ src ++ label' ++ "]]" + else return $ "[[" ++ src' ++ label' ++ "]]" where src' = case src of '/':xs -> xs -- with leading / it's a _ -> src -- link to a help page inlineToZimWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToZimWiki opts alt - let txt = case (tit, alt) of - ("", []) -> "" - ("", _ ) -> "|" ++ alt' - (_ , _ ) -> "|" ++ tit + inTable <- gets stInTable + let txt = case (tit, alt, inTable) of + ("",[], _) -> "" + ("", _, False ) -> "|" ++ alt' + (_ , _, False ) -> "|" ++ tit + (_ , _, True ) -> "" -- Relative links fail isURI and receive a colon prefix = if isURI source then "" else ":" return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}" inlineToZimWiki opts (Note contents) = do + -- no concept of notes in zim wiki, use a text block contents' <- blockListToZimWiki opts contents - return $ "((" ++ contents' ++ "))" - -- note - may not work for notes with multiple blocks + return $ " **{Note:** " ++ trimr contents' ++ "**}**" imageDims :: WriterOptions -> Attr -> String imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = "?" ++ w go (Just w) (Just h) = "?" ++ w ++ "x" ++ h go Nothing (Just h) = "?0x" ++ h |