summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/CommonMark.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/CommonMark.hs')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs228
1 files changed, 168 insertions, 60 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index d20d386e7..6fbc09c17 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.CommonMark
- Copyright : Copyright (C) 2015 John MacFarlane
+ Copyright : Copyright (C) 2015-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -32,48 +32,94 @@ CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
module Text.Pandoc.Readers.CommonMark (readCommonMark)
where
-import CMark
-import Data.Text (unpack, pack)
+import CMarkGFM
+import Control.Monad.State
+import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
import Data.List (groupBy)
+import qualified Data.Map as Map
+import Data.Text (Text, unpack)
+import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
import Text.Pandoc.Options
-import Text.Pandoc.Error
+import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.Walk (walkM)
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
-readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
- where opts' = if readerSmart opts
- then [optNormalize, optSmart]
- else [optNormalize]
-
-nodeToPandoc :: Node -> Pandoc
-nodeToPandoc (Node _ DOCUMENT nodes) =
- Pandoc nullMeta $ foldr addBlock [] nodes
-nodeToPandoc n = -- shouldn't happen
- Pandoc nullMeta $ foldr addBlock [] [n]
-
-addBlocks :: [Node] -> [Block]
-addBlocks = foldr addBlock []
-
-addBlock :: Node -> [Block] -> [Block]
-addBlock (Node _ PARAGRAPH nodes) =
- (Para (addInlines nodes) :)
-addBlock (Node _ THEMATIC_BREAK _) =
+readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark opts s = return $
+ (if isEnabled Ext_gfm_auto_identifiers opts
+ then addHeaderIdentifiers
+ else id) $
+ nodeToPandoc opts $ commonmarkToNode opts' exts s
+ where opts' = [ optSmart | isEnabled Ext_smart opts ]
+ exts = [ extStrikethrough | isEnabled Ext_strikeout opts ] ++
+ [ extTable | isEnabled Ext_pipe_tables opts ] ++
+ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
+
+convertEmojis :: String -> String
+convertEmojis (':':xs) =
+ case break (==':') xs of
+ (ys,':':zs) ->
+ case Map.lookup ys emojis of
+ Just s -> s ++ convertEmojis zs
+ Nothing -> ':' : ys ++ convertEmojis (':':zs)
+ _ -> ':':xs
+convertEmojis (x:xs) = x : convertEmojis xs
+convertEmojis [] = []
+
+addHeaderIdentifiers :: Pandoc -> Pandoc
+addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
+
+addHeaderId :: Block -> State (Map.Map String Int) Block
+addHeaderId (Header lev (_,classes,kvs) ils) = do
+ idmap <- get
+ let ident = toIdent ils
+ ident' <- case Map.lookup ident idmap of
+ Nothing -> do
+ put (Map.insert ident 1 idmap)
+ return ident
+ Just i -> do
+ put (Map.adjust (+ 1) ident idmap)
+ return (ident ++ "-" ++ show i)
+ return $ Header lev (ident',classes,kvs) ils
+addHeaderId x = return x
+
+toIdent :: [Inline] -> String
+toIdent = map (\c -> if isSpace c then '-' else c)
+ . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+ c == '_' || c == '-')
+ . map toLower . stringify
+
+nodeToPandoc :: ReaderOptions -> Node -> Pandoc
+nodeToPandoc opts (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr (addBlock opts) [] nodes
+nodeToPandoc opts n = -- shouldn't happen
+ Pandoc nullMeta $ foldr (addBlock opts) [] [n]
+
+addBlocks :: ReaderOptions -> [Node] -> [Block]
+addBlocks opts = foldr (addBlock opts) []
+
+addBlock :: ReaderOptions -> Node -> [Block] -> [Block]
+addBlock opts (Node _ PARAGRAPH nodes) =
+ (Para (addInlines opts nodes) :)
+addBlock _ (Node _ THEMATIC_BREAK _) =
(HorizontalRule :)
-addBlock (Node _ BLOCK_QUOTE nodes) =
- (BlockQuote (addBlocks nodes) :)
-addBlock (Node _ (HTML_BLOCK t) _) =
- (RawBlock (Format "html") (unpack t) :)
+addBlock opts (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks opts nodes) :)
+addBlock opts (Node _ (HTML_BLOCK t) _)
+ | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
+addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
-addBlock (Node _ (CODE_BLOCK info t) _) =
+addBlock _ (Node _ (CODE_BLOCK info t) _) =
(CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
-addBlock (Node _ (HEADING lev) nodes) =
- (Header lev ("",[],[]) (addInlines nodes) :)
-addBlock (Node _ (LIST listAttrs) nodes) =
- (constructor (map (setTightness . addBlocks . children) nodes) :)
+addBlock opts (Node _ (HEADING lev) nodes) =
+ (Header lev ("",[],[]) (addInlines opts nodes) :)
+addBlock opts (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks opts . children) nodes) :)
where constructor = case listType listAttrs of
BULLET_LIST -> BulletList
ORDERED_LIST -> OrderedList
@@ -82,46 +128,108 @@ addBlock (Node _ (LIST listAttrs) nodes) =
setTightness = if listTight listAttrs
then map paraToPlain
else id
- paraToPlain (Para xs) = Plain (xs)
+ paraToPlain (Para xs) = Plain xs
paraToPlain x = x
delim = case listDelim listAttrs of
- PERIOD_DELIM -> Period
- PAREN_DELIM -> OneParen
-addBlock (Node _ ITEM _) = id -- handled in LIST
-addBlock _ = id
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock opts (Node _ (TABLE alignments) nodes) =
+ (Table [] aligns widths headers rows :)
+ where aligns = map fromTableCellAlignment alignments
+ fromTableCellAlignment NoAlignment = AlignDefault
+ fromTableCellAlignment LeftAligned = AlignLeft
+ fromTableCellAlignment RightAligned = AlignRight
+ fromTableCellAlignment CenterAligned = AlignCenter
+ widths = replicate numcols 0.0
+ numcols = if null rows'
+ then 0
+ else maximum $ map length rows'
+ rows' = map toRow $ filter isRow nodes
+ (headers, rows) = case rows' of
+ (h:rs) -> (h, rs)
+ [] -> ([], [])
+ isRow (Node _ TABLE_ROW _) = True
+ isRow _ = False
+ isCell (Node _ TABLE_CELL _) = True
+ isCell _ = False
+ toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns
+ toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t
+ toCell (Node _ TABLE_CELL []) = []
+ toCell (Node _ TABLE_CELL (n:ns))
+ | isBlockNode n = addBlocks opts (n:ns)
+ | otherwise = [Plain (addInlines opts (n:ns))]
+ toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t
+addBlock _ (Node _ TABLE_ROW _) = id -- handled in TABLE
+addBlock _ (Node _ TABLE_CELL _) = id -- handled in TABLE
+addBlock _ _ = id
+
+isBlockNode :: Node -> Bool
+isBlockNode (Node _ nodetype _) =
+ case nodetype of
+ DOCUMENT -> True
+ THEMATIC_BREAK -> True
+ PARAGRAPH -> True
+ BLOCK_QUOTE -> True
+ HTML_BLOCK _ -> True
+ CUSTOM_BLOCK _ _ -> True
+ CODE_BLOCK _ _ -> True
+ HEADING _ -> True
+ LIST _ -> True
+ ITEM -> True
+ TEXT _ -> False
+ SOFTBREAK -> False
+ LINEBREAK -> False
+ HTML_INLINE _ -> False
+ CUSTOM_INLINE _ _ -> False
+ CODE _ -> False
+ EMPH -> False
+ STRONG -> False
+ LINK _ _ -> False
+ IMAGE _ _ -> False
+ STRIKETHROUGH -> False
+ TABLE _ -> False
+ TABLE_ROW -> False
+ TABLE_CELL -> False
children :: Node -> [Node]
children (Node _ _ ns) = ns
-addInlines :: [Node] -> [Inline]
-addInlines = foldr addInline []
+addInlines :: ReaderOptions -> [Node] -> [Inline]
+addInlines opts = foldr (addInline opts) []
-addInline :: Node -> [Inline] -> [Inline]
-addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
+addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str xs
-addInline (Node _ LINEBREAK _) = (LineBreak :)
-addInline (Node _ SOFTBREAK _) = (SoftBreak :)
-addInline (Node _ (HTML_INLINE t) _) =
- (RawInline (Format "html") (unpack t) :)
+ toinl (' ':_) = Space
+ toinl xs = Str $ if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else xs
+addInline _ (Node _ LINEBREAK _) = (LineBreak :)
+addInline opts (Node _ SOFTBREAK _)
+ | isEnabled Ext_hard_line_breaks opts = (LineBreak :)
+ | otherwise = (SoftBreak :)
+addInline opts (Node _ (HTML_INLINE t) _)
+ | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
+addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
-addInline (Node _ (CODE t) _) =
+addInline _ (Node _ (CODE t) _) =
(Code ("",[],[]) (unpack t) :)
-addInline (Node _ EMPH nodes) =
- (Emph (addInlines nodes) :)
-addInline (Node _ STRONG nodes) =
- (Strong (addInlines nodes) :)
-addInline (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline _ = id
+addInline opts (Node _ EMPH nodes) =
+ (Emph (addInlines opts nodes) :)
+addInline opts (Node _ STRONG nodes) =
+ (Strong (addInlines opts nodes) :)
+addInline opts (Node _ STRIKETHROUGH nodes) =
+ (Strikeout (addInlines opts nodes) :)
+addInline opts (Node _ (LINK url title) nodes) =
+ (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline opts (Node _ (IMAGE url title) nodes) =
+ (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline _ _ = id