From 512bf2eebf5d59916b1154ef2025d776b367c035 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 30 May 2016 15:07:50 +0200 Subject: Org reader: undo code duplication Some code was duplicated (copy-pasted) or placed in an inappropriate module during the modularization refactoring. Those functions are moved into a `Shared` module, as was originally intended but forgotten. Better documentation of the respective functions is a positive side-effect. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Org/Blocks.hs | 24 ++--------- src/Text/Pandoc/Readers/Org/Inlines.hs | 38 ++--------------- src/Text/Pandoc/Readers/Org/Shared.hs | 76 ++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 55 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Org/Shared.hs diff --git a/pandoc.cabal b/pandoc.cabal index 7d7250a21..0484b3293 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -398,6 +398,7 @@ Library Text.Pandoc.Readers.Org.Inlines, Text.Pandoc.Readers.Org.ParserState, Text.Pandoc.Readers.Org.Parsing, + Text.Pandoc.Readers.Org.Shared, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 36645a356..e26beffdc 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -35,6 +35,9 @@ import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared + ( isImageFilename, rundocBlockClass, toRundocAttrib + , translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) @@ -43,7 +46,6 @@ import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Options import Text.Pandoc.Shared ( compactify', compactify'DL ) -import Control.Arrow ( first ) import Control.Monad ( foldM, guard, mzero ) import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) @@ -314,7 +316,6 @@ codeHeaderArgs = try $ do else ([ pandocLang ], parameters) where hasRundocParameters = not . null - toRundocAttrib = first ("rundoc-" ++) switch :: OrgParser (Char, Maybe String) switch = try $ simpleSwitch <|> lineNumbersSwitch @@ -323,25 +324,6 @@ switch = try $ simpleSwitch <|> lineNumbersSwitch lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> (string "-l \"" *> many1Till nonspaceChar (char '"')) -translateLang :: String -> String -translateLang "C" = "c" -translateLang "C++" = "cpp" -translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported -translateLang "js" = "javascript" -translateLang "lisp" = "commonlisp" -translateLang "R" = "r" -translateLang "sh" = "bash" -translateLang "sqlite" = "sql" -translateLang cs = cs - --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - blockOption :: OrgParser (String, String) blockOption = try $ do argKey <- orgArgKey diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6971ca3c6..be7fc600a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -30,13 +30,15 @@ module Text.Pandoc.Readers.Org.Inlines ( inline , inlines , addToNotesTable - , isImageFilename , linkTarget ) where import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared + ( isImageFilename, rundocBlockClass, toRundocAttrib + , translateLang ) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines ) @@ -47,35 +49,12 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Arrow ( first ) import Control.Monad ( guard, mplus, mzero, when ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( isPrefixOf, isSuffixOf ) +import Data.List ( isPrefixOf ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - -translateLang :: String -> String -translateLang "C" = "c" -translateLang "C++" = "cpp" -translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported -translateLang "js" = "javascript" -translateLang "lisp" = "commonlisp" -translateLang "R" = "r" -translateLang "sh" = "bash" -translateLang "sqlite" = "sql" -translateLang cs = cs - -- -- Functions acting on the parser state -- @@ -405,15 +384,6 @@ cleanLinkString s = in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme && not (null path) -isImageFilename :: String -> Bool -isImageFilename filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename) - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] - internalLink :: String -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs new file mode 100644 index 000000000..3ba46b9e4 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Options + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Utility functions used in other Pandoc Org modules. +-} +module Text.Pandoc.Readers.Org.Shared + ( isImageFilename + , rundocBlockClass + , toRundocAttrib + , translateLang + ) where + +import Control.Arrow ( first ) +import Data.List ( isPrefixOf, isSuffixOf ) + + +-- | Check whether the given string looks like the path to of URL of an image. +isImageFilename :: String -> Bool +isImageFilename filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + (any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename) + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] + +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +-- | Prefix the name of a attribute, marking it as a code execution parameter. +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first (rundocPrefix ++) + +-- | Translate from Org-mode's programming language identifiers to those used +-- by Pandoc. This is useful to allow for proper syntax highlighting in +-- Pandoc output. +translateLang :: String -> String +translateLang cs = + case cs of + "C" -> "c" + "C++" -> "cpp" + "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported + "js" -> "javascript" + "lisp" -> "commonlisp" + "R" -> "r" + "sh" -> "bash" + "sqlite" -> "sql" + _ -> cs -- cgit v1.2.3 From 729fca311fed275a8000d2ce3789d985133c34fc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 30 May 2016 17:30:04 +0200 Subject: Org reader: drop unused field from parser state The `OrgParserState` contained both an `orgStateMeta` and `orgStateMeta'` field, the former for plain meta information and the latter for F-monad wrapped meta info. The plain meta info is only used to make `OrgParserState` an instance of the `HasMeta` class, which in turn is never used in the reader. The (F Meta) version is hence renamed to the "un-primed" version while the other one is dropped. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 11 +++++------ src/Text/Pandoc/Readers/Org/ParserState.hs | 15 +++------------ 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index e26beffdc..52db466a4 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -69,7 +69,7 @@ blockList = do meta :: OrgParser Meta meta = do st <- getState - return $ runF (orgStateMeta' st) st + return $ runF (orgStateMeta st) st blocks :: OrgParser (F Blocks) blocks = mconcat <$> manyTill block eof @@ -462,12 +462,11 @@ commentLine = commentLineStart *> anyLine *> pure mempty declarationLine :: OrgParser () declarationLine = try $ do - key <- metaKey - inlinesF <- metaInlines + key <- metaKey + value <- metaInlines updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } - return () + let meta' = B.setMeta key <$> value <*> pure nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index e648a883e..cadc9bfb7 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -54,8 +54,7 @@ import Data.Default (Default(..)) import qualified Data.Map as M import qualified Data.Set as Set -import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), - trimInlines ) +import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) import Text.Pandoc.Definition ( Meta(..), nullMeta ) import Text.Pandoc.Options ( ReaderOptions(..) ) import Text.Pandoc.Parsing ( HasHeaderMap(..) @@ -96,8 +95,7 @@ data OrgParserState = OrgParserState , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMeta :: Meta - , orgStateMeta' :: F Meta + , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateParserContext :: ParserContext , orgStateIdentifiers :: Set.Set String @@ -112,12 +110,6 @@ instance Default OrgParserLocal where instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions -instance HasMeta OrgParserState where - setMeta field val st = - st{ orgStateMeta = setMeta field val $ orgStateMeta st } - deleteMeta field st = - st{ orgStateMeta = deleteMeta field $ orgStateMeta st } - instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } @@ -151,8 +143,7 @@ defaultOrgParserState = OrgParserState , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty - , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta + , orgStateMeta = return nullMeta , orgStateNotes' = [] , orgStateParserContext = NullState , orgStateIdentifiers = Set.empty -- cgit v1.2.3 From 952a7dac589107cc19e606ff9cf29dfbb33002bc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 2 Jun 2016 22:17:46 +0200 Subject: Org reader: support smart quotes export option Reading of smart quotes can be toggled using the `'` option. --- src/Text/Pandoc/Readers/Org.hs | 5 +++-- src/Text/Pandoc/Readers/Org/Blocks.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 2 ++ src/Text/Pandoc/Readers/Org/ParserState.hs | 31 +++++++++++++++++++++--------- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1042b5a21..d593f856d 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,7 +29,7 @@ module Text.Pandoc.Readers.Org ( readOrg ) where import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) -import Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) ) +import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState ) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -42,7 +42,8 @@ import Control.Monad.Reader ( runReader ) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Either PandocError Pandoc -readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = flip runReader def $ + readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") -- -- Parser diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 52db466a4..9019f44fb 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -500,7 +500,7 @@ addLinkFormat key formatter = updateState $ \s -> exportSetting :: OrgParser () exportSetting = choice [ booleanSetting "^" setExportSubSuperscripts - , ignoredSetting "'" + , booleanSetting "'" setExportSmartQuotes , ignoredSetting "*" , ignoredSetting "-" , ignoredSetting ":" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index be7fc600a..80a94f1b3 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -730,6 +730,7 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do + guard . exportSmartQuotes . orgStateExportSettings =<< getState singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -741,6 +742,7 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do + guard . exportSmartQuotes . orgStateExportSettings =<< getState doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index cadc9bfb7..d566346e8 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -42,9 +42,11 @@ module Text.Pandoc.Readers.Org.ParserState , returnF , ExportSettingSetter , ExportSettings (..) - , setExportSubSuperscripts , setExportDrawers + , setExportSmartQuotes + , setExportSubSuperscripts , modifyExportSettings + , optionsToParserState ) where import Control.Monad (liftM, liftM2) @@ -77,11 +79,12 @@ type OrgLinkFormatters = M.Map String (String -> String) -- | Export settings -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings - { exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts - , exportDrawers :: Either [String] [String] + { exportDrawers :: Either [String] [String] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. + , exportSmartQuotes :: Bool -- ^ Parse quotes, ellipses, apostrophs smartly + , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts } -- | Org-mode parser state @@ -152,25 +155,35 @@ defaultOrgParserState = OrgParserState defaultExportSettings :: ExportSettings defaultExportSettings = ExportSettings - { exportSubSuperscripts = True - , exportDrawers = Left ["LOGBOOK"] + { exportDrawers = Left ["LOGBOOK"] + , exportSmartQuotes = True + , exportSubSuperscripts = True } +optionsToParserState :: ReaderOptions -> OrgParserState +optionsToParserState opts = + def { orgStateOptions = opts } + -- -- Setter for exporting options -- type ExportSettingSetter a = a -> ExportSettings -> ExportSettings +-- | Set export options for drawers. See the @exportDrawers@ in ADT +-- @ExportSettings@ for details. +setExportDrawers :: ExportSettingSetter (Either [String] [String]) +setExportDrawers val es = es { exportDrawers = val } + -- | Set export options for sub/superscript parsing. The short syntax will -- not be parsed if this is set set to @False@. setExportSubSuperscripts :: ExportSettingSetter Bool setExportSubSuperscripts val es = es { exportSubSuperscripts = val } --- | Set export options for drawers. See the @exportDrawers@ in ADT --- @ExportSettings@ for details. -setExportDrawers :: ExportSettingSetter (Either [String] [String]) -setExportDrawers val es = es { exportDrawers = val } +-- | Set export options for sub/superscript parsing. The short syntax will +-- not be parsed if this is set set to @False@. +setExportSmartQuotes :: ExportSettingSetter Bool +setExportSmartQuotes val es = es { exportSmartQuotes = val } -- | Modify a parser state modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParserState -- cgit v1.2.3 From d4de8451b9eae1681b0026057757444127d18d43 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 2 Jun 2016 23:38:22 +0200 Subject: Org reader: support emphasized text export option Parsing of emphasized text can be toggled using the `*` option. This influences parsing of text marked as emphasized, strong, strikeout, and underline. Parsing of inline math, code, and verbatim text is not affected by this option. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 15 +++++++++++---- src/Text/Pandoc/Readers/Org/ParserState.hs | 26 ++++++++++++++++---------- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 9019f44fb..896f1da1a 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -501,7 +501,7 @@ exportSetting :: OrgParser () exportSetting = choice [ booleanSetting "^" setExportSubSuperscripts , booleanSetting "'" setExportSmartQuotes - , ignoredSetting "*" + , booleanSetting "*" setExportEmphasizedText , ignoredSetting "-" , ignoredSetting ":" , ignoredSetting "<" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 80a94f1b3..fdefd2a3f 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -108,10 +108,7 @@ inline = , inlineCodeBlock , str , endline - , emph - , strong - , strikeout - , underline + , emphasizedText , code , math , displayMath @@ -443,6 +440,16 @@ inlineCodeBlock = try $ do <* skipSpaces +emphasizedText :: OrgParser (F Inlines) +emphasizedText = do + state <- getState + guard . exportEmphasizedText . orgStateExportSettings $ state + try $ choice + [ emph + , strong + , strikeout + , underline + ] enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d566346e8..2fa5d920b 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -43,6 +43,7 @@ module Text.Pandoc.Readers.Org.ParserState , ExportSettingSetter , ExportSettings (..) , setExportDrawers + , setExportEmphasizedText , setExportSmartQuotes , setExportSubSuperscripts , modifyExportSettings @@ -83,26 +84,27 @@ data ExportSettings = ExportSettings -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. + , exportEmphasizedText :: Bool -- ^ Parse emphasized text. , exportSmartQuotes :: Bool -- ^ Parse quotes, ellipses, apostrophs smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts } -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateOptions :: ReaderOptions - , orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [String] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateExportSettings :: ExportSettings + , orgStateHeaderMap :: M.Map Inlines String + , orgStateIdentifiers :: Set.Set String , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateOptions :: ReaderOptions , orgStateParserContext :: ParserContext - , orgStateIdentifiers :: Set.Set String - , orgStateHeaderMap :: M.Map Inlines String } data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } @@ -137,25 +139,26 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState - { orgStateOptions = def - , orgStateAnchorIds = [] + { orgStateAnchorIds = [] , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def + , orgStateHeaderMap = M.empty + , orgStateIdentifiers = Set.empty , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty , orgStateMeta = return nullMeta , orgStateNotes' = [] + , orgStateOptions = def , orgStateParserContext = NullState - , orgStateIdentifiers = Set.empty - , orgStateHeaderMap = M.empty } defaultExportSettings :: ExportSettings defaultExportSettings = ExportSettings { exportDrawers = Left ["LOGBOOK"] + , exportEmphasizedText = True , exportSmartQuotes = True , exportSubSuperscripts = True } @@ -175,13 +178,16 @@ type ExportSettingSetter a = a -> ExportSettings -> ExportSettings setExportDrawers :: ExportSettingSetter (Either [String] [String]) setExportDrawers val es = es { exportDrawers = val } +-- | Set export options for emphasis parsing. +setExportEmphasizedText :: ExportSettingSetter Bool +setExportEmphasizedText val es = es { exportEmphasizedText = val } + -- | Set export options for sub/superscript parsing. The short syntax will -- not be parsed if this is set set to @False@. setExportSubSuperscripts :: ExportSettingSetter Bool setExportSubSuperscripts val es = es { exportSubSuperscripts = val } --- | Set export options for sub/superscript parsing. The short syntax will --- not be parsed if this is set set to @False@. +-- | Set export options for parsing of smart quotes. setExportSmartQuotes :: ExportSettingSetter Bool setExportSmartQuotes val es = es { exportSmartQuotes = val } -- cgit v1.2.3 From f56792927fbcebfd25593c488476fee69d078b48 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 3 Jun 2016 11:10:18 +0200 Subject: Org reader: support special strings export option Parsing of special strings (like '...' as ellipsis or '--' as en dash) can be toggled using the `-` option. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 12 ++++++++---- src/Text/Pandoc/Readers/Org/ParserState.hs | 20 ++++++++++++++------ src/Text/Pandoc/Readers/Org/Parsing.hs | 14 +++++++++++++- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 896f1da1a..75e564f2f 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -502,7 +502,7 @@ exportSetting = choice [ booleanSetting "^" setExportSubSuperscripts , booleanSetting "'" setExportSmartQuotes , booleanSetting "*" setExportEmphasizedText - , ignoredSetting "-" + , booleanSetting "-" setExportSpecialStrings , ignoredSetting ":" , ignoredSetting "<" , ignoredSetting "\\n" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index fdefd2a3f..001aeb569 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -728,8 +728,12 @@ smart = do doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) where - orgDash = dash <* updatePositions '-' - orgEllipses = ellipses <* updatePositions '.' + orgDash = do + guard =<< getExportSetting exportSpecialStrings + dash <* updatePositions '-' + orgEllipses = do + guard =<< getExportSetting exportSpecialStrings + ellipses <* updatePositions '.' orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos @@ -737,7 +741,7 @@ smart = do singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do - guard . exportSmartQuotes . orgStateExportSettings =<< getState + guard =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -749,7 +753,7 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do - guard . exportSmartQuotes . orgStateExportSettings =<< getState + guard =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 2fa5d920b..0c58183f9 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Readers.Org.ParserState , setExportDrawers , setExportEmphasizedText , setExportSmartQuotes + , setExportSpecialStrings , setExportSubSuperscripts , modifyExportSettings , optionsToParserState @@ -84,8 +85,9 @@ data ExportSettings = ExportSettings -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. - , exportEmphasizedText :: Bool -- ^ Parse emphasized text. - , exportSmartQuotes :: Bool -- ^ Parse quotes, ellipses, apostrophs smartly + , exportEmphasizedText :: Bool -- ^ Parse emphasized text + , exportSmartQuotes :: Bool -- ^ Parse quotes smartly + , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts } @@ -160,6 +162,7 @@ defaultExportSettings = ExportSettings { exportDrawers = Left ["LOGBOOK"] , exportEmphasizedText = True , exportSmartQuotes = True + , exportSpecialStrings = True , exportSubSuperscripts = True } @@ -182,15 +185,20 @@ setExportDrawers val es = es { exportDrawers = val } setExportEmphasizedText :: ExportSettingSetter Bool setExportEmphasizedText val es = es { exportEmphasizedText = val } +-- | Set export options for parsing of smart quotes. +setExportSmartQuotes :: ExportSettingSetter Bool +setExportSmartQuotes val es = es { exportSmartQuotes = val } + +-- | Set export options for parsing of special strings (like em/en dashes or +-- ellipses). +setExportSpecialStrings :: ExportSettingSetter Bool +setExportSpecialStrings val es = es { exportSpecialStrings = val } + -- | Set export options for sub/superscript parsing. The short syntax will -- not be parsed if this is set set to @False@. setExportSubSuperscripts :: ExportSettingSetter Bool setExportSubSuperscripts val es = es { exportSubSuperscripts = val } --- | Set export options for parsing of smart quotes. -setExportSmartQuotes :: ExportSettingSetter Bool -setExportSmartQuotes val es = es { exportSmartQuotes = val } - -- | Modify a parser state modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParserState modifyExportSettings setter val state = diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 0b6b876d8..8cf0c696c 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -37,6 +37,7 @@ module Text.Pandoc.Readers.Org.Parsing , skipSpaces1 , inList , withContext + , getExportSetting , updateLastForbiddenCharPos , updateLastPreCharPos , orgArgKey @@ -174,9 +175,13 @@ withContext context parser = do return result -- --- Parser state update functions +-- Parser state functions -- +-- | Get an export setting. +getExportSetting :: (ExportSettings -> a) -> OrgParser a +getExportSetting s = s . orgStateExportSettings <$> getState + -- | Set the current position as the last position at which a forbidden char -- was found (i.e. a character which is not allowed at the inner border of -- markup). @@ -190,13 +195,20 @@ updateLastPreCharPos :: OrgParser () updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} +-- +-- Org key-value parsing +-- + +-- | Read the key of a plist style key-value list. orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar +-- | Read the value of a plist style key-value list. orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Chars treated as part of a word in plists. orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" -- cgit v1.2.3