summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-03-23 11:35:44 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-03-29 23:42:42 -0700
commitccb828894b6c9aae056a6861c20c4d6807035d9f (patch)
tree5d9da5731ab132db0b8423b1f3178bad04d21ea7
parentcbe32c2aebc26ee3de27500929e4b623b971fc8b (diff)
Added CommonMark writer.
Added `Text.Pandoc.Writers.CommonMark`, exporting `writeCommonMark`.
-rw-r--r--README83
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc.hs3
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs178
4 files changed, 225 insertions, 40 deletions
diff --git a/README b/README
index db3d93ae1..412235c09 100644
--- a/README
+++ b/README
@@ -158,22 +158,22 @@ General options
: Specify input format. *FORMAT* can be `native` (native Haskell),
`json` (JSON version of native AST), `markdown` (pandoc's
- extended markdown), `markdown_strict` (original unextended markdown),
- `markdown_phpextra` (PHP Markdown Extra extended markdown),
- `markdown_github` (github extended markdown),
- `commonmark` (CommonMark markdown),
- `textile` (Textile), `rst` (reStructuredText), `html` (HTML),
- `docbook` (DocBook), `t2t` (txt2tags), `docx` (docx), `epub` (EPUB),
- `opml` (OPML), `org` (Emacs Org-mode), `mediawiki` (MediaWiki markup),
- `twiki` (TWiki markup), `haddock` (Haddock markup), or `latex` (LaTeX).
- If `+lhs` is appended to `markdown`, `rst`,
- `latex`, or `html`, the input will be treated as literate Haskell
- source: see [Literate Haskell support](#literate-haskell-support),
- below. Markdown syntax extensions can be individually enabled or
- disabled by appending `+EXTENSION` or `-EXTENSION` to the format
- name. So, for example, `markdown_strict+footnotes+definition_lists`
- is strict markdown with footnotes and definition lists enabled,
- and `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown
+ extended markdown), `markdown_strict` (original unextended
+ markdown), `markdown_phpextra` (PHP Markdown Extra extended
+ markdown), `markdown_github` (github extended markdown),
+ `commonmark` (CommonMark markdown), `textile` (Textile), `rst`
+ (reStructuredText), `html` (HTML), `docbook` (DocBook), `t2t`
+ (txt2tags), `docx` (docx), `epub` (EPUB), `opml` (OPML), `org`
+ (Emacs Org-mode), `mediawiki` (MediaWiki markup), `twiki` (TWiki
+ markup), `haddock` (Haddock markup), or `latex` (LaTeX). If
+ `+lhs` is appended to `markdown`, `rst`, `latex`, or `html`, the
+ input will be treated as literate Haskell source: see [Literate
+ Haskell support](#literate-haskell-support), below. Markdown
+ syntax extensions can be individually enabled or disabled by
+ appending `+EXTENSION` or `-EXTENSION` to the format name. So, for
+ example, `markdown_strict+footnotes+definition_lists` is strict
+ markdown with footnotes and definition lists enabled, and
+ `markdown-pipe_tables+hard_line_breaks` is pandoc's markdown
without pipe tables and with hard line breaks. See [Pandoc's
markdown](#pandocs-markdown), below, for a list of extensions and
their names.
@@ -182,30 +182,33 @@ General options
: Specify output format. *FORMAT* can be `native` (native Haskell),
`json` (JSON version of native AST), `plain` (plain text),
- `markdown` (pandoc's extended markdown), `markdown_strict` (original
- unextended markdown), `markdown_phpextra` (PHP Markdown extra
- extended markdown), `markdown_github` (github extended markdown),
- `rst` (reStructuredText), `html` (XHTML 1), `html5` (HTML 5),
- `latex` (LaTeX), `beamer` (LaTeX beamer slide show),
- `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup),
- `dokuwiki` (DokuWiki markup),
- `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo),
- `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt`
- (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock
- markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3`
- (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc),
- `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show),
- `slideous` (Slideous HTML and javascript slide show), `dzslides`
- (DZSlides HTML5 + javascript slide show), `revealjs` (reveal.js
- HTML5 + javascript slide show), `s5` (S5 HTML and javascript slide show),
- or the path of a custom lua writer (see [Custom writers](#custom-writers),
- below). Note that `odt`, `epub`, and `epub3` output will not be directed
- to *stdout*; an output filename must be specified using the `-o/--output`
- option. If `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`,
- `html`, or `html5`, the output will be rendered as literate Haskell
- source: see [Literate Haskell support](#literate-haskell-support), below.
- Markdown syntax extensions can be individually enabled or disabled by
- appending `+EXTENSION` or `-EXTENSION` to the format name, as described
+ `markdown` (pandoc's extended markdown), `markdown_strict`
+ (original unextended markdown), `markdown_phpextra` (PHP Markdown
+ extra extended markdown), `markdown_github` (github extended
+ markdown), `commonmark` (CommonMark markdown), `rst`
+ (reStructuredText), `html` (XHTML 1), `html5` (HTML 5), `latex`
+ (LaTeX), `beamer` (LaTeX beamer slide show), `context` (ConTeXt),
+ `man` (groff man), `mediawiki` (MediaWiki markup), `dokuwiki`
+ (DokuWiki markup), `textile` (Textile), `org` (Emacs Org-Mode),
+ `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook),
+ `opendocument` (OpenDocument), `odt` (OpenOffice text document),
+ `docx` (Word docx), `haddock` (Haddock markup), `rtf` (rich text
+ format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2`
+ (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign
+ ICML), `slidy` (Slidy HTML and javascript slide show), `slideous`
+ (Slideous HTML and javascript slide show), `dzslides` (DZSlides
+ HTML5 + javascript slide show), `revealjs` (reveal.js HTML5 +
+ javascript slide show), `s5` (S5 HTML and javascript slide show),
+ or the path of a custom lua writer (see [Custom
+ writers](#custom-writers), below). Note that `odt`, `epub`, and
+ `epub3` output will not be directed to *stdout*; an output
+ filename must be specified using the `-o/--output` option. If
+ `+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`,
+ `html`, or `html5`, the output will be rendered as literate
+ Haskell source: see [Literate Haskell
+ support](#literate-haskell-support), below. Markdown syntax
+ extensions can be individually enabled or disabled by appending
+ `+EXTENSION` or `-EXTENSION` to the format name, as described
above under `-f`.
`-o` *FILE*, `--output=`*FILE*
diff --git a/pandoc.cabal b/pandoc.cabal
index 7bbdd9f0f..9b1001ace 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -342,6 +342,7 @@ Library
Text.Pandoc.Writers.Texinfo,
Text.Pandoc.Writers.Man,
Text.Pandoc.Writers.Markdown,
+ Text.Pandoc.Writers.CommonMark,
Text.Pandoc.Writers.Haddock,
Text.Pandoc.Writers.RST,
Text.Pandoc.Writers.Org,
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 3387a7d64..dd361f8d7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -112,6 +112,7 @@ module Text.Pandoc
, writeOrg
, writeAsciiDoc
, writeHaddock
+ , writeCommonMark
, writeCustom
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
@@ -165,6 +166,7 @@ import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
+import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
@@ -305,6 +307,7 @@ writers = [
,("org" , PureStringWriter writeOrg)
,("asciidoc" , PureStringWriter writeAsciiDoc)
,("haddock" , PureStringWriter writeHaddock)
+ ,("commonmark" , PureStringWriter writeCommonMark)
]
getDefaultExtensions :: String -> Set Extension
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
new file mode 100644
index 000000000..706b27175
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -0,0 +1,178 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to CommonMark.
+
+CommonMark: <http://commonmark.org>
+-}
+module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
+
+import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (isTightList)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import CMark
+import qualified Data.Text as T
+import Control.Monad.Identity (runIdentity, Identity)
+import Control.Monad.State (runState, State, modify, get)
+import Text.Pandoc.Walk (walkM)
+
+-- | Convert Pandoc to CommonMark.
+writeCommonMark :: WriterOptions -> Pandoc -> String
+writeCommonMark opts (Pandoc meta blocks) = rendered
+ where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes')
+ (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ metadata = runIdentity $ metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ context = defField "body" main $ metadata
+ rendered = if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+processNotes :: Inline -> State [[Block]] Inline
+processNotes (Note bs) = do
+ modify (bs :)
+ notes <- get
+ return $ Str $ "[" ++ show (length notes) ++ "]"
+processNotes x = return x
+
+node :: NodeType -> [Node] -> Node
+node = Node Nothing
+
+blocksToCommonMark :: WriterOptions -> [Block] -> Identity String
+blocksToCommonMark opts bs = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node DOCUMENT (blocksToNodes bs)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String
+inlinesToCommonMark opts ils = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node PARAGRAPH (inlinesToNodes ils)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+blocksToNodes :: [Block] -> [Node]
+blocksToNodes = foldr blockToNodes []
+
+blockToNodes :: Block -> [Node] -> [Node]
+blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (CodeBlock (_,classes,_) xs) =
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :)
+blockToNodes (RawBlock fmt xs)
+ | fmt == Format "html" = (node (HTML (T.pack xs)) [] :)
+ | otherwise = id
+blockToNodes (BlockQuote bs) =
+ (node BLOCK_QUOTE (blocksToNodes bs) :)
+blockToNodes (BulletList items) =
+ (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes (OrderedList (start, _sty, delim) items) =
+ (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes HorizontalRule = (node HRULE [] :)
+blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :)
+blockToNodes (Div _ bs) = (blocksToNodes bs ++)
+blockToNodes (DefinitionList items) = blockToNodes (BulletList items')
+ where items' = map dlToBullet items
+ dlToBullet (term, ((Para xs : ys) : zs)) =
+ Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, ((Plain xs : ys) : zs)) =
+ Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, xs) =
+ Para term : concat xs
+blockToNodes t@(Table _ _ _ _ _) =
+ (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :)
+blockToNodes Null = id
+
+inlinesToNodes :: [Inline] -> [Node]
+inlinesToNodes = foldr inlineToNodes []
+
+inlineToNodes :: Inline -> [Node] -> [Node]
+inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
+inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes LineBreak = (node LINEBREAK [] :)
+inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
+inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
+inlineToNodes (Strikeout xs) =
+ ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</s>")) []]) ++ )
+inlineToNodes (Superscript xs) =
+ ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sub>")) []]) ++ )
+inlineToNodes (Subscript xs) =
+ ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sup>")) []]) ++ )
+inlineToNodes (SmallCaps xs) =
+ ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) []
+ : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</span>")) []]) ++ )
+inlineToNodes (Link ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (Image ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (RawInline fmt xs)
+ | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :)
+ | otherwise = id
+inlineToNodes (Quoted qt ils) =
+ ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+ where (start, end) = case qt of
+ SingleQuote -> (T.pack "‘", T.pack "’")
+ DoubleQuote -> (T.pack "“", T.pack "”")
+inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes (Math mt str) =
+ case mt of
+ InlineMath ->
+ (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Note _) = id -- should not occur
+-- we remove Note elements in preprocessing