diff options
-rw-r--r-- | README | 90 | ||||
-rw-r--r-- | benchmark/benchmark-pandoc.hs | 7 | ||||
-rw-r--r-- | data/docx/word/styles.xml | 19 | ||||
-rw-r--r-- | pandoc.cabal | 3 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 178 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 80 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 4 |
9 files changed, 331 insertions, 55 deletions
@@ -158,22 +158,22 @@ General options : Specify input format. *FORMAT* can be `native` (native Haskell), `json` (JSON version of native AST), `markdown` (pandoc's - extended markdown), `markdown_strict` (original unextended markdown), - `markdown_phpextra` (PHP Markdown Extra extended markdown), - `markdown_github` (github extended markdown), - `commonmark` (CommonMark markdown), - `textile` (Textile), `rst` (reStructuredText), `html` (HTML), - `docbook` (DocBook), `t2t` (txt2tags), `docx` (docx), `epub` (EPUB), - `opml` (OPML), `org` (Emacs Org-mode), `mediawiki` (MediaWiki markup), - `twiki` (TWiki markup), `haddock` (Haddock markup), or `latex` (LaTeX). - If `+lhs` is appended to `markdown`, `rst`, - `latex`, or `html`, the input will be treated as literate Haskell - source: see [Literate Haskell support](#literate-haskell-support), - below. Markdown syntax extensions can be individually enabled or - disabled by appending `+EXTENSION` or `-EXTENSION` to the format - name. So, for example, `markdown_strict+footnotes+definition_lists` - is strict markdown with footnotes and definition lists enabled, - and `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown + extended markdown), `markdown_strict` (original unextended + markdown), `markdown_phpextra` (PHP Markdown Extra extended + markdown), `markdown_github` (github extended markdown), + `commonmark` (CommonMark markdown), `textile` (Textile), `rst` + (reStructuredText), `html` (HTML), `docbook` (DocBook), `t2t` + (txt2tags), `docx` (docx), `epub` (EPUB), `opml` (OPML), `org` + (Emacs Org-mode), `mediawiki` (MediaWiki markup), `twiki` (TWiki + markup), `haddock` (Haddock markup), or `latex` (LaTeX). If + `+lhs` is appended to `markdown`, `rst`, `latex`, or `html`, the + input will be treated as literate Haskell source: see [Literate + Haskell support](#literate-haskell-support), below. Markdown + syntax extensions can be individually enabled or disabled by + appending `+EXTENSION` or `-EXTENSION` to the format name. So, for + example, `markdown_strict+footnotes+definition_lists` is strict + markdown with footnotes and definition lists enabled, and + `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown without pipe tables and with hard line breaks. See [Pandoc's markdown](#pandocs-markdown), below, for a list of extensions and their names. @@ -182,30 +182,33 @@ General options : Specify output format. *FORMAT* can be `native` (native Haskell), `json` (JSON version of native AST), `plain` (plain text), - `markdown` (pandoc's extended markdown), `markdown_strict` (original - unextended markdown), `markdown_phpextra` (PHP Markdown extra - extended markdown), `markdown_github` (github extended markdown), - `rst` (reStructuredText), `html` (XHTML 1), `html5` (HTML 5), - `latex` (LaTeX), `beamer` (LaTeX beamer slide show), - `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), - `dokuwiki` (DokuWiki markup), - `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo), - `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt` - (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock - markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` - (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), - `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show), - `slideous` (Slideous HTML and javascript slide show), `dzslides` - (DZSlides HTML5 + javascript slide show), `revealjs` (reveal.js - HTML5 + javascript slide show), `s5` (S5 HTML and javascript slide show), - or the path of a custom lua writer (see [Custom writers](#custom-writers), - below). Note that `odt`, `epub`, and `epub3` output will not be directed - to *stdout*; an output filename must be specified using the `-o/--output` - option. If `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`, - `html`, or `html5`, the output will be rendered as literate Haskell - source: see [Literate Haskell support](#literate-haskell-support), below. - Markdown syntax extensions can be individually enabled or disabled by - appending `+EXTENSION` or `-EXTENSION` to the format name, as described + `markdown` (pandoc's extended markdown), `markdown_strict` + (original unextended markdown), `markdown_phpextra` (PHP Markdown + extra extended markdown), `markdown_github` (github extended + markdown), `commonmark` (CommonMark markdown), `rst` + (reStructuredText), `html` (XHTML 1), `html5` (HTML 5), `latex` + (LaTeX), `beamer` (LaTeX beamer slide show), `context` (ConTeXt), + `man` (groff man), `mediawiki` (MediaWiki markup), `dokuwiki` + (DokuWiki markup), `textile` (Textile), `org` (Emacs Org-Mode), + `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook), + `opendocument` (OpenDocument), `odt` (OpenOffice text document), + `docx` (Word docx), `haddock` (Haddock markup), `rtf` (rich text + format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2` + (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign + ICML), `slidy` (Slidy HTML and javascript slide show), `slideous` + (Slideous HTML and javascript slide show), `dzslides` (DZSlides + HTML5 + javascript slide show), `revealjs` (reveal.js HTML5 + + javascript slide show), `s5` (S5 HTML and javascript slide show), + or the path of a custom lua writer (see [Custom + writers](#custom-writers), below). Note that `odt`, `epub`, and + `epub3` output will not be directed to *stdout*; an output + filename must be specified using the `-o/--output` option. If + `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`, + `html`, or `html5`, the output will be rendered as literate + Haskell source: see [Literate Haskell + support](#literate-haskell-support), below. Markdown syntax + extensions can be individually enabled or disabled by appending + `+EXTENSION` or `-EXTENSION` to the format name, as described above under `-f`. `-o` *FILE*, `--output=`*FILE* @@ -319,7 +322,12 @@ Reader options Those who would prefer to write filters in python can use the module `pandocfilters`, installable from PyPI. See <http://github.com/jgm/pandocfilters> for the module and several - examples. Note that the *EXECUTABLE* will be sought in the user's + examples. There are also pandoc filter libraries in + [PHP](https://github.com/vinai/pandocfilters-php), + [perl](https://metacpan.org/pod/Pandoc::Filter), and + [javascript/node.js](https://github.com/mvhenderson/pandoc-filter-node). + + Note that the *EXECUTABLE* will be sought in the user's `PATH`, and not in the working directory, if no directory is provided. If you want to run a script in the working directory, preface the filename with `./`. diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 5c0dab460..3fc70331f 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -34,12 +34,7 @@ readerBench doc (name, reader) = let inp = writer def{ writerWrapText = True} doc in return $ bench (name ++ " reader") $ nfIO $ (fmap handleError <$> reader def{ readerSmart = True }) inp - _ | name == "commonmark" -> - let inp = writeMarkdown def{ writerWrapText = True} doc - in return $ bench (name ++ " reader") $ nfIO $ - (fmap handleError <$> reader def{ readerSmart = True }) inp - | otherwise -> trace ("\nCould not find writer for " ++ name ++ - "\n") Nothing + _ -> trace ("\nCould not find writer for " ++ name ++ "\n") Nothing writerBench :: Pandoc -> (String, WriterOptions -> Pandoc -> String) diff --git a/data/docx/word/styles.xml b/data/docx/word/styles.xml index d2e35b4c3..347e7ff07 100644 --- a/data/docx/word/styles.xml +++ b/data/docx/word/styles.xml @@ -354,4 +354,23 @@ <w:color w:val="4F81BD" w:themeColor="accent1" /> </w:rPr> </w:style> + <w:style w:type="paragraph" w:styleId="TOCHeading"> + <w:name w:val="TOC Heading" /> + <w:basedOn w:val="Heading1" /> + <w:next w:val="BodyText" /> + <w:uiPriority w:val="39" /> + <w:unhideWhenUsed /> + <w:qFormat /> + <w:pPr> + <w:spacing w:before="240" w:line="259" w:lineRule="auto" /> + <w:outlineLvl w:val="9" /> + </w:pPr> + <w:rPr> + <w:rFonts w:asciiTheme="majorHAnsi" w:eastAsiaTheme="majorEastAsia" w:hAnsiTheme="majorHAnsi" w:cstheme="majorBidi" /> + <w:b w:val="0" /> + <w:bCs w:val="0" /> + <w:color w:val="365F91" w:themeColor="accent1" + w:themeShade="BF" /> + </w:rPr> + </w:style> </w:styles> diff --git a/pandoc.cabal b/pandoc.cabal index c9f583c68..9b1001ace 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -279,7 +279,7 @@ Library deepseq-generics >= 0.1 && < 0.2, JuicyPixels >= 3.1.6.1 && < 3.3, filemanip >= 0.3 && < 0.4, - cmark >= 0.3 && < 0.4 + cmark >= 0.3.1 && < 0.4 if flag(old-locale) Build-Depends: old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5 @@ -342,6 +342,7 @@ Library Text.Pandoc.Writers.Texinfo, Text.Pandoc.Writers.Man, Text.Pandoc.Writers.Markdown, + Text.Pandoc.Writers.CommonMark, Text.Pandoc.Writers.Haddock, Text.Pandoc.Writers.RST, Text.Pandoc.Writers.Org, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3387a7d64..dd361f8d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -112,6 +112,7 @@ module Text.Pandoc , writeOrg , writeAsciiDoc , writeHaddock + , writeCommonMark , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -165,6 +166,7 @@ import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options @@ -305,6 +307,7 @@ writers = [ ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) ,("haddock" , PureStringWriter writeHaddock) + ,("commonmark" , PureStringWriter writeCommonMark) ] getDefaultExtensions :: String -> Set Extension diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1dfbdd700..fc63cc11e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1093,7 +1093,7 @@ explicitOrImageLink = try $ do char ']' alt <- internalLink src title' return $ - (if isImageFilename src && isImageFilename title + (if isImageFilename title then B.link src "" $ B.image title mempty mempty else fromMaybe alt (linkToInlines src title')) diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..706b27175 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered + where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') + (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + metadata = runIdentity $ metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + context = defField "body" main $ metadata + rendered = if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node DOCUMENT (blocksToNodes bs) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) + | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) + | otherwise = id +blockToNodes (BlockQuote bs) = + (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = + (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = + (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = + (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] + : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) + | otherwise = id +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3c32434e7..4e81def60 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -66,6 +66,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord) data ListMarker = NoMarker | BulletMarker @@ -176,13 +177,29 @@ renumId f renumMap e renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) + | c == 0x9 = True + | c == 0xA = True + | c == 0xD = True + | 0x20 <= c && c <= 0xD7FF = True + | 0xE000 <= c && c <= 0xFFFD = True + | 0x10000 <= c && c <= 0x10FFFF = True + | otherwise = False + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath doc + let doc' = stripInvalidChars . walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime refArchive <- liftM (toArchive . toLazy) $ @@ -432,6 +449,17 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + -- we do, however, copy some settings over from reference + let settingsPath = "word/settings.xml" + settingsList = [ "w:autoHyphenation" + , "w:consecutiveHyphenLimit" + , "w:hyphenationZone" + , "w:doNotHyphenateCap" + ] + settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList + let entryFromArchive arch path = maybe (fail $ path ++ " corrupt or missing in reference docx") return @@ -439,9 +467,6 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - settingsEntry <- entryFromArchive distArchive "word/settings.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (fmap ("word/" ++) . extractTarget) @@ -504,6 +529,21 @@ styleToOpenXml sm style = $ backgroundColor style ) ] +copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren refArchive distArchive path timestamp elNames = do + ref <- parseXml refArchive distArchive path + dist <- parseXml distArchive distArchive path + return $ toEntry path timestamp $ renderXml dist{ + elContent = elContent dist ++ copyContent ref + } + where + strName QName{qName=name, qPrefix=prefix} + | Just p <- prefix = p++":"++name + | otherwise = name + shouldCopy = (`elem` elNames) . strName + cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} + copyContent = map cleanElem . filterChildrenName shouldCopy + -- this is the lowest number used for a list numId baseListId :: Int baseListId = 1000 @@ -581,6 +621,33 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists +makeTOC :: WriterOptions -> WS [Element] +makeTOC opts | writerTableOfContents opts = do + let depth = "1-"++(show (writerTOCDepth opts)) + let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para [Str "Table of Contents"]]) + return $ + [mknode "w:sdt" [] ([ + mknode "w:sdtPr" [] ( + mknode "w:docPartObj" [] ( + [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), + mknode "w:docPartUnique" [] ()] + ) -- w:docPartObj + ), -- w:sdtPr + mknode "w:sdtContent" [] (title++[ + mknode "w:p" [] ( + mknode "w:r" [] ([ + mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), + mknode "w:instrText" [("xml:space","preserve")] tocCmd, + mknode "w:fldChar" [("w:fldCharType","separate")] (), + mknode "w:fldChar" [("w:fldCharType","end")] () + ]) -- w:r + ) -- w:p + ]) + ])] -- w:sdt +makeTOC _ = return [] + + -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) @@ -613,7 +680,8 @@ writeOpenXML opts (Pandoc meta blocks) = do let blocks' = bottomUp convertSpace blocks doc' <- (setFirstPara >> blocksToOpenXML opts blocks') notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ subtitle ++ authors ++ date ++ abstract + toc <- makeTOC opts + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f555447c7..4cec54a68 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -216,6 +216,10 @@ tests = "[[sunset.png][dusk.svg]]" =?> (para $ link "sunset.png" "" (image "dusk.svg" "" "")) + , "Image link with non-image target" =: + "[[http://example.com][logo.png]]" =?> + (para $ link "http://example.com" "" (image "logo.png" "" "")) + , "Plain link" =: "Posts on http://zeitlens.com/ can be funny at times." =?> (para $ spcSep [ "Posts", "on" |