summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Extensions.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Extensions.hs')
-rw-r--r--src/Text/Pandoc/Extensions.hs378
1 files changed, 378 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
new file mode 100644
index 000000000..968476930
--- /dev/null
+++ b/src/Text/Pandoc/Extensions.hs
@@ -0,0 +1,378 @@
+{-
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+{- |
+ Module : Text.Pandoc.Extensions
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data structures and functions for representing markup extensions.
+-}
+module Text.Pandoc.Extensions ( Extension(..)
+ , Extensions
+ , emptyExtensions
+ , extensionsFromList
+ , parseFormatSpec
+ , extensionEnabled
+ , enableExtension
+ , disableExtension
+ , getDefaultExtensions
+ , pandocExtensions
+ , plainExtensions
+ , strictExtensions
+ , phpMarkdownExtraExtensions
+ , githubMarkdownExtensions
+ , multimarkdownExtensions )
+where
+import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions)
+import Data.Aeson.TH (deriveJSON)
+import Data.Bits (clearBit, setBit, testBit, (.|.))
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import Text.Pandoc.Shared (safeRead)
+import Text.Parsec
+
+newtype Extensions = Extensions Integer
+ deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, ToJSON, FromJSON)
+
+instance Monoid Extensions where
+ mempty = Extensions 0
+ mappend (Extensions a) (Extensions b) = Extensions (a .|. b)
+
+extensionsFromList :: [Extension] -> Extensions
+extensionsFromList = foldr enableExtension emptyExtensions
+
+emptyExtensions :: Extensions
+emptyExtensions = Extensions 0
+
+extensionEnabled :: Extension -> Extensions -> Bool
+extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)
+
+enableExtension :: Extension -> Extensions -> Extensions
+enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))
+
+disableExtension :: Extension -> Extensions -> Extensions
+disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))
+
+-- | Individually selectable syntax extensions.
+data Extension =
+ Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
+ | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
+ | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
+ | Ext_angle_brackets_escapable -- ^ Make < and > escapable
+ | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
+ | Ext_auto_identifiers -- ^ Automatic identifiers for headers
+ | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
+ | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
+ | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
+ | Ext_blank_before_header -- ^ Require blank line before a header
+ | Ext_bracketed_spans -- ^ Bracketed spans with attributes
+ | Ext_citations -- ^ Pandoc/citeproc citations
+ | Ext_compact_definition_lists -- ^ Definition lists without space between items,
+ -- and disallow laziness
+ | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
+ | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
+ -- East Asian wide characters
+ | Ext_emoji -- ^ Support emoji like :smile:
+ | Ext_empty_paragraphs -- ^ Allow empty paragraphs
+ | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
+ | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
+ | Ext_example_lists -- ^ Markdown-style numbered examples
+ | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
+ | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
+ | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
+ | Ext_fenced_divs -- ^ Allow fenced div syntax :::
+ | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
+ | Ext_four_space_rule -- ^ Require 4-space indent for list contents
+ | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using
+ -- GitHub's method for generating identifiers
+ | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
+ | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
+ | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
+ | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
+ | Ext_implicit_figures -- ^ A paragraph with just an image is a figure
+ | Ext_implicit_header_references -- ^ Implicit reference links for headers
+ | Ext_inline_code_attributes -- ^ Allow attributes on inline code
+ | Ext_inline_notes -- ^ Pandoc-style inline notes
+ | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
+ | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
+ | Ext_line_blocks -- ^ RST style line blocks
+ | Ext_link_attributes -- ^ link and image attributes
+ | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
+ | Ext_literate_haskell -- ^ Enable literate Haskell conventions
+ | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff
+ -- container has attribute 'markdown'
+ | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
+ | Ext_mmd_link_attributes -- ^ MMD style reference link attributes
+ | Ext_mmd_title_block -- ^ Multimarkdown metadata block
+ | Ext_multiline_tables -- ^ Pandoc-style multiline tables
+ | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
+ | Ext_native_spans -- ^ Use Span inlines for contents of <span>
+ | Ext_ntb -- ^ ConTeXt Natural Tables
+ | Ext_old_dashes -- ^ -- = em, - before number = en
+ | Ext_pandoc_title_block -- ^ Pandoc title block
+ | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
+ | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines
+ | Ext_raw_html -- ^ Allow raw HTML
+ | Ext_raw_tex -- ^ Allow raw TeX (other than math)
+ | Ext_shortcut_reference_links -- ^ Shortcut reference links
+ | Ext_simple_tables -- ^ Pandoc-style simple tables
+ | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
+ | Ext_space_in_atx_header -- ^ Require space between # and header text
+ | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link
+ | Ext_startnum -- ^ Make start number of ordered list significant
+ | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
+ | Ext_subscript -- ^ Subscript using ~this~ syntax
+ | Ext_superscript -- ^ Superscript using ^this^ syntax
+ | Ext_styles -- ^ Read styles that pandoc doesn't know
+ | Ext_table_captions -- ^ Pandoc-style table captions
+ | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
+ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
+ | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
+ | Ext_yaml_metadata_block -- ^ YAML metadata block
+ deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
+
+-- | Extensions to be used with pandoc-flavored markdown.
+pandocExtensions :: Extensions
+pandocExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_inline_notes
+ , Ext_pandoc_title_block
+ , Ext_yaml_metadata_block
+ , Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_pipe_tables
+ , Ext_citations
+ , Ext_raw_tex
+ , Ext_raw_html
+ , Ext_tex_math_dollars
+ , Ext_latex_macros
+ , Ext_fenced_code_blocks
+ , Ext_fenced_code_attributes
+ , Ext_backtick_code_blocks
+ , Ext_inline_code_attributes
+ , Ext_raw_attribute
+ , Ext_markdown_in_html_blocks
+ , Ext_native_divs
+ , Ext_fenced_divs
+ , Ext_native_spans
+ , Ext_bracketed_spans
+ , Ext_escaped_line_breaks
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_all_symbols_escapable
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_space_in_atx_header
+ , Ext_strikeout
+ , Ext_superscript
+ , Ext_subscript
+ , Ext_auto_identifiers
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_implicit_header_references
+ , Ext_line_blocks
+ , Ext_shortcut_reference_links
+ , Ext_smart
+ ]
+
+-- | Extensions to be used with plain text output.
+plainExtensions :: Extensions
+plainExtensions = extensionsFromList
+ [ Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_latex_macros
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_strikeout
+ ]
+
+-- | Extensions to be used with github-flavored markdown.
+phpMarkdownExtraExtensions :: Extensions
+phpMarkdownExtraExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_fenced_code_blocks
+ , Ext_definition_lists
+ , Ext_intraword_underscores
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_abbreviations
+ , Ext_shortcut_reference_links
+ , Ext_spaced_reference_links
+ ]
+
+-- | Extensions to be used with github-flavored markdown.
+githubMarkdownExtensions :: Extensions
+githubMarkdownExtensions = extensionsFromList
+ [ Ext_angle_brackets_escapable
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_fenced_code_blocks
+ , Ext_gfm_auto_identifiers
+ , Ext_ascii_identifiers
+ , Ext_backtick_code_blocks
+ , Ext_autolink_bare_uris
+ , Ext_space_in_atx_header
+ , Ext_intraword_underscores
+ , Ext_strikeout
+ , Ext_emoji
+ , Ext_lists_without_preceding_blankline
+ , Ext_shortcut_reference_links
+ ]
+
+-- | Extensions to be used with multimarkdown.
+multimarkdownExtensions :: Extensions
+multimarkdownExtensions = extensionsFromList
+ [ Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_mmd_link_attributes
+ -- , Ext_raw_tex
+ -- Note: MMD's raw TeX syntax requires raw TeX to be
+ -- enclosed in HTML comment
+ , Ext_tex_math_double_backslash
+ , Ext_intraword_underscores
+ , Ext_mmd_title_block
+ , Ext_footnotes
+ , Ext_definition_lists
+ , Ext_all_symbols_escapable
+ , Ext_implicit_header_references
+ , Ext_shortcut_reference_links
+ , Ext_auto_identifiers
+ , Ext_mmd_header_identifiers
+ , Ext_implicit_figures
+ -- Note: MMD's syntax for superscripts and subscripts
+ -- is a bit more permissive than pandoc's, allowing
+ -- e^2 and a~1 instead of e^2^ and a~1~, so even with
+ -- these options we don't have full support for MMD
+ -- superscripts and subscripts, but there's no reason
+ -- not to include these:
+ , Ext_superscript
+ , Ext_subscript
+ , Ext_backtick_code_blocks
+ , Ext_spaced_reference_links
+ -- So far only in dev version of mmd:
+ , Ext_raw_attribute
+ ]
+
+-- | Language extensions to be used with strict markdown.
+strictExtensions :: Extensions
+strictExtensions = extensionsFromList
+ [ Ext_raw_html
+ , Ext_shortcut_reference_links
+ , Ext_spaced_reference_links
+ ]
+
+-- | Default extensions from format-describing string.
+getDefaultExtensions :: String -> Extensions
+getDefaultExtensions "markdown_strict" = strictExtensions
+getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
+getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
+getDefaultExtensions "markdown_github" = githubMarkdownExtensions
+getDefaultExtensions "markdown" = pandocExtensions
+getDefaultExtensions "muse" = extensionsFromList
+ [Ext_amuse,
+ Ext_auto_identifiers]
+getDefaultExtensions "plain" = plainExtensions
+getDefaultExtensions "gfm" = githubMarkdownExtensions
+getDefaultExtensions "commonmark" = extensionsFromList
+ [Ext_raw_html]
+getDefaultExtensions "org" = extensionsFromList
+ [Ext_citations,
+ Ext_auto_identifiers]
+getDefaultExtensions "html" = extensionsFromList
+ [Ext_auto_identifiers,
+ Ext_native_divs,
+ Ext_line_blocks,
+ Ext_native_spans]
+getDefaultExtensions "html4" = getDefaultExtensions "html"
+getDefaultExtensions "html5" = getDefaultExtensions "html"
+getDefaultExtensions "epub" = extensionsFromList
+ [Ext_raw_html,
+ Ext_native_divs,
+ Ext_native_spans,
+ Ext_epub_html_exts]
+getDefaultExtensions "epub2" = getDefaultExtensions "epub"
+getDefaultExtensions "epub3" = getDefaultExtensions "epub"
+getDefaultExtensions "latex" = extensionsFromList
+ [Ext_smart,
+ Ext_latex_macros,
+ Ext_auto_identifiers]
+getDefaultExtensions "context" = extensionsFromList
+ [Ext_smart,
+ Ext_auto_identifiers]
+getDefaultExtensions "textile" = extensionsFromList
+ [Ext_old_dashes,
+ Ext_smart,
+ Ext_raw_html,
+ Ext_auto_identifiers]
+getDefaultExtensions "opml" = pandocExtensions -- affects notes
+getDefaultExtensions _ = extensionsFromList
+ [Ext_auto_identifiers]
+
+-- | Parse a format-specifying string into a markup format and a function that
+-- takes Extensions and enables and disables extensions as defined in the format
+-- spec.
+parseFormatSpec :: String
+ -> Either ParseError (String, Extensions -> Extensions)
+parseFormatSpec = parse formatSpec ""
+ where formatSpec = do
+ name <- formatName
+ extMods <- many extMod
+ return (name, \x -> foldl (flip ($)) x extMods)
+ formatName = many1 $ noneOf "-+"
+ extMod = do
+ polarity <- oneOf "-+"
+ name <- many $ noneOf "-+"
+ ext <- case safeRead ("Ext_" ++ name) of
+ Just n -> return n
+ Nothing
+ | name == "lhs" -> return Ext_literate_haskell
+ | otherwise -> fail $ "Unknown extension: " ++ name
+ return $ case polarity of
+ '-' -> disableExtension ext
+ _ -> enableExtension ext
+
+$(deriveJSON defaultOptions ''Extension)