summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-06-03 09:39:32 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2016-06-03 09:39:32 -0700
commit2b2769975fea76337a5afd9e985a4de43c7276a0 (patch)
tree1657d1b7920d2854fe3623500bc448323319996d
parent9eb3dba243cf88e6e4d852161a308e815ef626d7 (diff)
parentf56792927fbcebfd25593c488476fee69d078b48 (diff)
Merge pull request #2962 from tarleb/org-more-export-settings
Org reader: support more export settings
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Readers/Org.hs5
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs41
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs63
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs78
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs14
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs76
7 files changed, 175 insertions, 103 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 7d7250a21..0484b3293 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -398,6 +398,7 @@ Library
Text.Pandoc.Readers.Org.Inlines,
Text.Pandoc.Readers.Org.ParserState,
Text.Pandoc.Readers.Org.Parsing,
+ Text.Pandoc.Readers.Org.Shared,
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,
Text.Pandoc.MIME,
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1042b5a21..d593f856d 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,7 +29,7 @@ module Text.Pandoc.Readers.Org ( readOrg ) where
import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
-import Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) )
+import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState )
import Text.Pandoc.Definition
import Text.Pandoc.Error
@@ -42,7 +42,8 @@ import Control.Monad.Reader ( runReader )
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
-readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+readOrg opts s = flip runReader def $
+ readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
--
-- Parser
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 36645a356..75e564f2f 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -35,6 +35,9 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared
+ ( isImageFilename, rundocBlockClass, toRundocAttrib
+ , translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
@@ -43,7 +46,6 @@ import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Options
import Text.Pandoc.Shared ( compactify', compactify'DL )
-import Control.Arrow ( first )
import Control.Monad ( foldM, guard, mzero )
import Data.Char ( isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf )
@@ -67,7 +69,7 @@ blockList = do
meta :: OrgParser Meta
meta = do
st <- getState
- return $ runF (orgStateMeta' st) st
+ return $ runF (orgStateMeta st) st
blocks :: OrgParser (F Blocks)
blocks = mconcat <$> manyTill block eof
@@ -314,7 +316,6 @@ codeHeaderArgs = try $ do
else ([ pandocLang ], parameters)
where
hasRundocParameters = not . null
- toRundocAttrib = first ("rundoc-" ++)
switch :: OrgParser (Char, Maybe String)
switch = try $ simpleSwitch <|> lineNumbersSwitch
@@ -323,25 +324,6 @@ switch = try $ simpleSwitch <|> lineNumbersSwitch
lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
(string "-l \"" *> many1Till nonspaceChar (char '"'))
-translateLang :: String -> String
-translateLang "C" = "c"
-translateLang "C++" = "cpp"
-translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
-translateLang "js" = "javascript"
-translateLang "lisp" = "commonlisp"
-translateLang "R" = "r"
-translateLang "sh" = "bash"
-translateLang "sqlite" = "sql"
-translateLang cs = cs
-
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
blockOption :: OrgParser (String, String)
blockOption = try $ do
argKey <- orgArgKey
@@ -480,12 +462,11 @@ commentLine = commentLineStart *> anyLine *> pure mempty
declarationLine :: OrgParser ()
declarationLine = try $ do
- key <- metaKey
- inlinesF <- metaInlines
+ key <- metaKey
+ value <- metaInlines
updateState $ \st ->
- let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
- in st { orgStateMeta' = orgStateMeta' st <> meta' }
- return ()
+ let meta' = B.setMeta key <$> value <*> pure nullMeta
+ in st { orgStateMeta = orgStateMeta st <> meta' }
metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
@@ -519,9 +500,9 @@ addLinkFormat key formatter = updateState $ \s ->
exportSetting :: OrgParser ()
exportSetting = choice
[ booleanSetting "^" setExportSubSuperscripts
- , ignoredSetting "'"
- , ignoredSetting "*"
- , ignoredSetting "-"
+ , booleanSetting "'" setExportSmartQuotes
+ , booleanSetting "*" setExportEmphasizedText
+ , booleanSetting "-" setExportSpecialStrings
, ignoredSetting ":"
, ignoredSetting "<"
, ignoredSetting "\\n"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 6971ca3c6..001aeb569 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -30,13 +30,15 @@ module Text.Pandoc.Readers.Org.Inlines
( inline
, inlines
, addToNotesTable
- , isImageFilename
, linkTarget
) where
import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared
+ ( isImageFilename, rundocBlockClass, toRundocAttrib
+ , translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines )
@@ -47,35 +49,12 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Arrow ( first )
import Control.Monad ( guard, mplus, mzero, when )
import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( isPrefixOf, isSuffixOf )
+import Data.List ( isPrefixOf )
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
-translateLang :: String -> String
-translateLang "C" = "c"
-translateLang "C++" = "cpp"
-translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
-translateLang "js" = "javascript"
-translateLang "lisp" = "commonlisp"
-translateLang "R" = "r"
-translateLang "sh" = "bash"
-translateLang "sqlite" = "sql"
-translateLang cs = cs
-
--
-- Functions acting on the parser state
--
@@ -129,10 +108,7 @@ inline =
, inlineCodeBlock
, str
, endline
- , emph
- , strong
- , strikeout
- , underline
+ , emphasizedText
, code
, math
, displayMath
@@ -405,15 +381,6 @@ cleanLinkString s =
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
-isImageFilename :: String -> Bool
-isImageFilename filename =
- any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
- ':' `notElem` filename)
- where
- imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
- protocols = [ "file", "http", "https" ]
-
internalLink :: String -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
@@ -473,6 +440,16 @@ inlineCodeBlock = try $ do
<* skipSpaces
+emphasizedText :: OrgParser (F Inlines)
+emphasizedText = do
+ state <- getState
+ guard . exportEmphasizedText . orgStateExportSettings $ state
+ try $ choice
+ [ emph
+ , strong
+ , strikeout
+ , underline
+ ]
enclosedByPair :: Char -- ^ opening char
-> Char -- ^ closing char
@@ -751,8 +728,12 @@ smart = do
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
where
- orgDash = dash <* updatePositions '-'
- orgEllipses = ellipses <* updatePositions '.'
+ orgDash = do
+ guard =<< getExportSetting exportSpecialStrings
+ dash <* updatePositions '-'
+ orgEllipses = do
+ guard =<< getExportSetting exportSpecialStrings
+ ellipses <* updatePositions '.'
orgApostrophe =
(char '\'' <|> char '\8217') <* updateLastPreCharPos
<* updateLastForbiddenCharPos
@@ -760,6 +741,7 @@ smart = do
singleQuoted :: OrgParser (F Inlines)
singleQuoted = try $ do
+ guard =<< getExportSetting exportSmartQuotes
singleQuoteStart
updatePositions '\''
withQuoteContext InSingleQuote $
@@ -771,6 +753,7 @@ singleQuoted = try $ do
-- in the same paragraph.
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted = try $ do
+ guard =<< getExportSetting exportSmartQuotes
doubleQuoteStart
updatePositions '"'
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index e648a883e..0c58183f9 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -42,9 +42,13 @@ module Text.Pandoc.Readers.Org.ParserState
, returnF
, ExportSettingSetter
, ExportSettings (..)
- , setExportSubSuperscripts
, setExportDrawers
+ , setExportEmphasizedText
+ , setExportSmartQuotes
+ , setExportSpecialStrings
+ , setExportSubSuperscripts
, modifyExportSettings
+ , optionsToParserState
) where
import Control.Monad (liftM, liftM2)
@@ -54,8 +58,7 @@ import Data.Default (Default(..))
import qualified Data.Map as M
import qualified Data.Set as Set
-import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
- trimInlines )
+import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
import Text.Pandoc.Definition ( Meta(..), nullMeta )
import Text.Pandoc.Options ( ReaderOptions(..) )
import Text.Pandoc.Parsing ( HasHeaderMap(..)
@@ -78,30 +81,32 @@ type OrgLinkFormatters = M.Map String (String -> String)
-- | Export settings <http://orgmode.org/manual/Export-settings.html>
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
- { exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
- , exportDrawers :: Either [String] [String]
+ { exportDrawers :: Either [String] [String]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
+ , exportEmphasizedText :: Bool -- ^ Parse emphasized text
+ , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
+ , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
+ , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
}
-- | Org-mode parser state
data OrgParserState = OrgParserState
- { orgStateOptions :: ReaderOptions
- , orgStateAnchorIds :: [String]
+ { orgStateAnchorIds :: [String]
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisNewlines :: Maybe Int
, orgStateExportSettings :: ExportSettings
+ , orgStateHeaderMap :: M.Map Inlines String
+ , orgStateIdentifiers :: Set.Set String
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMeta :: Meta
- , orgStateMeta' :: F Meta
+ , orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
+ , orgStateOptions :: ReaderOptions
, orgStateParserContext :: ParserContext
- , orgStateIdentifiers :: Set.Set String
- , orgStateHeaderMap :: M.Map Inlines String
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -112,12 +117,6 @@ instance Default OrgParserLocal where
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
-instance HasMeta OrgParserState where
- setMeta field val st =
- st{ orgStateMeta = setMeta field val $ orgStateMeta st }
- deleteMeta field st =
- st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
-
instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
@@ -142,45 +141,64 @@ instance Default OrgParserState where
defaultOrgParserState :: OrgParserState
defaultOrgParserState = OrgParserState
- { orgStateOptions = def
- , orgStateAnchorIds = []
+ { orgStateAnchorIds = []
, orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing
, orgStateExportSettings = def
+ , orgStateHeaderMap = M.empty
+ , orgStateIdentifiers = Set.empty
, orgStateLastForbiddenCharPos = Nothing
, orgStateLastPreCharPos = Nothing
, orgStateLastStrPos = Nothing
, orgStateLinkFormatters = M.empty
- , orgStateMeta = nullMeta
- , orgStateMeta' = return nullMeta
+ , orgStateMeta = return nullMeta
, orgStateNotes' = []
+ , orgStateOptions = def
, orgStateParserContext = NullState
- , orgStateIdentifiers = Set.empty
- , orgStateHeaderMap = M.empty
}
defaultExportSettings :: ExportSettings
defaultExportSettings = ExportSettings
- { exportSubSuperscripts = True
- , exportDrawers = Left ["LOGBOOK"]
+ { exportDrawers = Left ["LOGBOOK"]
+ , exportEmphasizedText = True
+ , exportSmartQuotes = True
+ , exportSpecialStrings = True
+ , exportSubSuperscripts = True
}
+optionsToParserState :: ReaderOptions -> OrgParserState
+optionsToParserState opts =
+ def { orgStateOptions = opts }
+
--
-- Setter for exporting options
--
type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
--- | Set export options for sub/superscript parsing. The short syntax will
--- not be parsed if this is set set to @False@.
-setExportSubSuperscripts :: ExportSettingSetter Bool
-setExportSubSuperscripts val es = es { exportSubSuperscripts = val }
-
-- | Set export options for drawers. See the @exportDrawers@ in ADT
-- @ExportSettings@ for details.
setExportDrawers :: ExportSettingSetter (Either [String] [String])
setExportDrawers val es = es { exportDrawers = val }
+-- | Set export options for emphasis parsing.
+setExportEmphasizedText :: ExportSettingSetter Bool
+setExportEmphasizedText val es = es { exportEmphasizedText = val }
+
+-- | Set export options for parsing of smart quotes.
+setExportSmartQuotes :: ExportSettingSetter Bool
+setExportSmartQuotes val es = es { exportSmartQuotes = val }
+
+-- | Set export options for parsing of special strings (like em/en dashes or
+-- ellipses).
+setExportSpecialStrings :: ExportSettingSetter Bool
+setExportSpecialStrings val es = es { exportSpecialStrings = val }
+
+-- | Set export options for sub/superscript parsing. The short syntax will
+-- not be parsed if this is set set to @False@.
+setExportSubSuperscripts :: ExportSettingSetter Bool
+setExportSubSuperscripts val es = es { exportSubSuperscripts = val }
+
-- | Modify a parser state
modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParserState
modifyExportSettings setter val state =
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 0b6b876d8..8cf0c696c 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -37,6 +37,7 @@ module Text.Pandoc.Readers.Org.Parsing
, skipSpaces1
, inList
, withContext
+ , getExportSetting
, updateLastForbiddenCharPos
, updateLastPreCharPos
, orgArgKey
@@ -174,9 +175,13 @@ withContext context parser = do
return result
--
--- Parser state update functions
+-- Parser state functions
--
+-- | Get an export setting.
+getExportSetting :: (ExportSettings -> a) -> OrgParser a
+getExportSetting s = s . orgStateExportSettings <$> getState
+
-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
@@ -190,13 +195,20 @@ updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+--
+-- Org key-value parsing
+--
+
+-- | Read the key of a plist style key-value list.
orgArgKey :: OrgParser String
orgArgKey = try $
skipSpaces *> char ':'
*> many1 orgArgWordChar
+-- | Read the value of a plist style key-value list.
orgArgWord :: OrgParser String
orgArgWord = many1 orgArgWordChar
+-- | Chars treated as part of a word in plists.
orgArgWordChar :: OrgParser Char
orgArgWordChar = alphaNum <|> oneOf "-_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
new file mode 100644
index 000000000..3ba46b9e4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -0,0 +1,76 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Utility functions used in other Pandoc Org modules.
+-}
+module Text.Pandoc.Readers.Org.Shared
+ ( isImageFilename
+ , rundocBlockClass
+ , toRundocAttrib
+ , translateLang
+ ) where
+
+import Control.Arrow ( first )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+
+-- | Check whether the given string looks like the path to of URL of an image.
+isImageFilename :: String -> Bool
+isImageFilename filename =
+ any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
+ (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+ ':' `notElem` filename)
+ where
+ imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+ protocols = [ "file", "http", "https" ]
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+-- | Prefix the name of a attribute, marking it as a code execution parameter.
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first (rundocPrefix ++)
+
+-- | Translate from Org-mode's programming language identifiers to those used
+-- by Pandoc. This is useful to allow for proper syntax highlighting in
+-- Pandoc output.
+translateLang :: String -> String
+translateLang cs =
+ case cs of
+ "C" -> "c"
+ "C++" -> "cpp"
+ "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported
+ "js" -> "javascript"
+ "lisp" -> "commonlisp"
+ "R" -> "r"
+ "sh" -> "bash"
+ "sqlite" -> "sql"
+ _ -> cs