diff options
Diffstat (limited to 'src/Text/Pandoc/Extensions.hs')
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs new file mode 100644 index 000000000..968476930 --- /dev/null +++ b/src/Text/Pandoc/Extensions.hs @@ -0,0 +1,378 @@ +{- +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE TemplateHaskell #-} + +{- | + Module : Text.Pandoc.Extensions + Copyright : Copyright (C) 2012-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data structures and functions for representing markup extensions. +-} +module Text.Pandoc.Extensions ( Extension(..) + , Extensions + , emptyExtensions + , extensionsFromList + , parseFormatSpec + , extensionEnabled + , enableExtension + , disableExtension + , getDefaultExtensions + , pandocExtensions + , plainExtensions + , strictExtensions + , phpMarkdownExtraExtensions + , githubMarkdownExtensions + , multimarkdownExtensions ) +where +import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions) +import Data.Aeson.TH (deriveJSON) +import Data.Bits (clearBit, setBit, testBit, (.|.)) +import Data.Data (Data) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Text.Pandoc.Shared (safeRead) +import Text.Parsec + +newtype Extensions = Extensions Integer + deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, ToJSON, FromJSON) + +instance Monoid Extensions where + mempty = Extensions 0 + mappend (Extensions a) (Extensions b) = Extensions (a .|. b) + +extensionsFromList :: [Extension] -> Extensions +extensionsFromList = foldr enableExtension emptyExtensions + +emptyExtensions :: Extensions +emptyExtensions = Extensions 0 + +extensionEnabled :: Extension -> Extensions -> Bool +extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) + +enableExtension :: Extension -> Extensions -> Extensions +enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) + +disableExtension :: Extension -> Extensions -> Extensions +disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) + +-- | Individually selectable syntax extensions. +data Extension = + Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup + | Ext_angle_brackets_escapable -- ^ Make < and > escapable + | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks + | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote + | Ext_blank_before_header -- ^ Require blank line before a header + | Ext_bracketed_spans -- ^ Bracketed spans with attributes + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_compact_definition_lists -- ^ Definition lists without space between items, + -- and disallow laziness + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters + | Ext_emoji -- ^ Support emoji like :smile: + | Ext_empty_paragraphs -- ^ Allow empty paragraphs + | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_divs -- ^ Allow fenced div syntax ::: + | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_four_space_rule -- ^ Require 4-space indent for list contents + | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using + -- GitHub's method for generating identifiers + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_implicit_figures -- ^ A paragraph with just an image is a figure + | Ext_implicit_header_references -- ^ Implicit reference links for headers + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) + | Ext_line_blocks -- ^ RST style line blocks + | Ext_link_attributes -- ^ link and image attributes + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff + -- container has attribute 'markdown' + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] + | Ext_mmd_link_attributes -- ^ MMD style reference link attributes + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> + | Ext_ntb -- ^ ConTeXt Natural Tables + | Ext_old_dashes -- ^ -- = em, - before number = en + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines + | Ext_raw_html -- ^ Allow raw HTML + | Ext_raw_tex -- ^ Allow raw TeX (other than math) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + | Ext_simple_tables -- ^ Pandoc-style simple tables + | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes + | Ext_space_in_atx_header -- ^ Require space between # and header text + | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_styles -- ^ Read styles that pandoc doesn't know + | Ext_table_captions -- ^ Pandoc-style table captions + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_yaml_metadata_block -- ^ YAML metadata block + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) + +-- | Extensions to be used with pandoc-flavored markdown. +pandocExtensions :: Extensions +pandocExtensions = extensionsFromList + [ Ext_footnotes + , Ext_inline_notes + , Ext_pandoc_title_block + , Ext_yaml_metadata_block + , Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_pipe_tables + , Ext_citations + , Ext_raw_tex + , Ext_raw_html + , Ext_tex_math_dollars + , Ext_latex_macros + , Ext_fenced_code_blocks + , Ext_fenced_code_attributes + , Ext_backtick_code_blocks + , Ext_inline_code_attributes + , Ext_raw_attribute + , Ext_markdown_in_html_blocks + , Ext_native_divs + , Ext_fenced_divs + , Ext_native_spans + , Ext_bracketed_spans + , Ext_escaped_line_breaks + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_all_symbols_escapable + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_space_in_atx_header + , Ext_strikeout + , Ext_superscript + , Ext_subscript + , Ext_auto_identifiers + , Ext_header_attributes + , Ext_link_attributes + , Ext_implicit_header_references + , Ext_line_blocks + , Ext_shortcut_reference_links + , Ext_smart + ] + +-- | Extensions to be used with plain text output. +plainExtensions :: Extensions +plainExtensions = extensionsFromList + [ Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_latex_macros + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_strikeout + ] + +-- | Extensions to be used with github-flavored markdown. +phpMarkdownExtraExtensions :: Extensions +phpMarkdownExtraExtensions = extensionsFromList + [ Ext_footnotes + , Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_fenced_code_blocks + , Ext_definition_lists + , Ext_intraword_underscores + , Ext_header_attributes + , Ext_link_attributes + , Ext_abbreviations + , Ext_shortcut_reference_links + , Ext_spaced_reference_links + ] + +-- | Extensions to be used with github-flavored markdown. +githubMarkdownExtensions :: Extensions +githubMarkdownExtensions = extensionsFromList + [ Ext_angle_brackets_escapable + , Ext_pipe_tables + , Ext_raw_html + , Ext_fenced_code_blocks + , Ext_gfm_auto_identifiers + , Ext_ascii_identifiers + , Ext_backtick_code_blocks + , Ext_autolink_bare_uris + , Ext_space_in_atx_header + , Ext_intraword_underscores + , Ext_strikeout + , Ext_emoji + , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links + ] + +-- | Extensions to be used with multimarkdown. +multimarkdownExtensions :: Extensions +multimarkdownExtensions = extensionsFromList + [ Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_mmd_link_attributes + -- , Ext_raw_tex + -- Note: MMD's raw TeX syntax requires raw TeX to be + -- enclosed in HTML comment + , Ext_tex_math_double_backslash + , Ext_intraword_underscores + , Ext_mmd_title_block + , Ext_footnotes + , Ext_definition_lists + , Ext_all_symbols_escapable + , Ext_implicit_header_references + , Ext_shortcut_reference_links + , Ext_auto_identifiers + , Ext_mmd_header_identifiers + , Ext_implicit_figures + -- Note: MMD's syntax for superscripts and subscripts + -- is a bit more permissive than pandoc's, allowing + -- e^2 and a~1 instead of e^2^ and a~1~, so even with + -- these options we don't have full support for MMD + -- superscripts and subscripts, but there's no reason + -- not to include these: + , Ext_superscript + , Ext_subscript + , Ext_backtick_code_blocks + , Ext_spaced_reference_links + -- So far only in dev version of mmd: + , Ext_raw_attribute + ] + +-- | Language extensions to be used with strict markdown. +strictExtensions :: Extensions +strictExtensions = extensionsFromList + [ Ext_raw_html + , Ext_shortcut_reference_links + , Ext_spaced_reference_links + ] + +-- | Default extensions from format-describing string. +getDefaultExtensions :: String -> Extensions +getDefaultExtensions "markdown_strict" = strictExtensions +getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions +getDefaultExtensions "markdown_mmd" = multimarkdownExtensions +getDefaultExtensions "markdown_github" = githubMarkdownExtensions +getDefaultExtensions "markdown" = pandocExtensions +getDefaultExtensions "muse" = extensionsFromList + [Ext_amuse, + Ext_auto_identifiers] +getDefaultExtensions "plain" = plainExtensions +getDefaultExtensions "gfm" = githubMarkdownExtensions +getDefaultExtensions "commonmark" = extensionsFromList + [Ext_raw_html] +getDefaultExtensions "org" = extensionsFromList + [Ext_citations, + Ext_auto_identifiers] +getDefaultExtensions "html" = extensionsFromList + [Ext_auto_identifiers, + Ext_native_divs, + Ext_line_blocks, + Ext_native_spans] +getDefaultExtensions "html4" = getDefaultExtensions "html" +getDefaultExtensions "html5" = getDefaultExtensions "html" +getDefaultExtensions "epub" = extensionsFromList + [Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] +getDefaultExtensions "epub2" = getDefaultExtensions "epub" +getDefaultExtensions "epub3" = getDefaultExtensions "epub" +getDefaultExtensions "latex" = extensionsFromList + [Ext_smart, + Ext_latex_macros, + Ext_auto_identifiers] +getDefaultExtensions "context" = extensionsFromList + [Ext_smart, + Ext_auto_identifiers] +getDefaultExtensions "textile" = extensionsFromList + [Ext_old_dashes, + Ext_smart, + Ext_raw_html, + Ext_auto_identifiers] +getDefaultExtensions "opml" = pandocExtensions -- affects notes +getDefaultExtensions _ = extensionsFromList + [Ext_auto_identifiers] + +-- | Parse a format-specifying string into a markup format and a function that +-- takes Extensions and enables and disables extensions as defined in the format +-- spec. +parseFormatSpec :: String + -> Either ParseError (String, Extensions -> Extensions) +parseFormatSpec = parse formatSpec "" + where formatSpec = do + name <- formatName + extMods <- many extMod + return (name, \x -> foldl (flip ($)) x extMods) + formatName = many1 $ noneOf "-+" + extMod = do + polarity <- oneOf "-+" + name <- many $ noneOf "-+" + ext <- case safeRead ("Ext_" ++ name) of + Just n -> return n + Nothing + | name == "lhs" -> return Ext_literate_haskell + | otherwise -> fail $ "Unknown extension: " ++ name + return $ case polarity of + '-' -> disableExtension ext + _ -> enableExtension ext + +$(deriveJSON defaultOptions ''Extension) |