summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-07-01 19:31:43 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-07-07 12:36:00 +0200
commit0feb7504b1c68cef76b30ea9987e2eae3101714c (patch)
tree6d64b020b93416b6970b4a6d2e59595f82fa405e /src
parent1dd769e55897757812a1d8188b80c5df7fcb2971 (diff)
Rewrote LaTeX reader with proper tokenization.
This rewrite is primarily motivated by the need to get macros working properly. A side benefit is that the reader is significantly faster (27s -> 19s in one benchmark, and there is a lot of room for further optimization). We now tokenize the input text, then parse the token stream. Macros modify the token stream, so they should now be effective in any context, including math. Thus, we no longer need the clunky macro processing capacities of texmath. A custom state LaTeXState is used instead of ParserState. This, plus the tokenization, will require some rewriting of the exported functions rawLaTeXInline, inlineCommand, rawLaTeXBlock. * Added Text.Pandoc.Readers.LaTeX.Types (new exported module). Exports Macro, Tok, TokType, Line, Column. [API change] * Text.Pandoc.Parsing: adjusted type of `insertIncludedFile` so it can be used with token parser. * Removed old texmath macro stuff from Parsing. Use Macro from Text.Pandoc.Readers.LaTeX.Types instead. * Removed texmath macro material from Markdown reader. * Changed types for Text.Pandoc.Readers.LaTeX's rawLaTeXInline and rawLaTeXBlock. (Both now return a String, and they are polymorphic in state.) * Added orgMacros field to OrgState. [API change] * Removed readerApplyMacros from ReaderOptions. Now we just check the `latex_macros` reader extension. * Allow `\newcommand\foo{blah}` without braces. Fixes #1390. Fixes #2118. Fixes #3236. Fixes #3779. Fixes #934. Fixes #982.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs1
-rw-r--r--src/Text/Pandoc/Error.hs3
-rw-r--r--src/Text/Pandoc/Extensions.hs1
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/Parsing.hs67
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2725
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs48
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs19
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs5
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs10
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs2
14 files changed, 1746 insertions, 1143 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6fdd2a44c..689c0a784 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -308,7 +308,6 @@ convertWithOpts opts = do
, readerColumns = optColumns opts
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses = optIndentedCodeClasses opts
- , readerApplyMacros = not laTeXOutput
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 60bc699ab..24186720c 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -64,6 +64,7 @@ data PandocError = PandocIOError String IOError
| PandocTemplateError String
| PandocAppError String
| PandocEpubSubdirectoryError String
+ | PandocMacroLoop String
deriving (Show, Typeable, Generic)
instance Exception PandocError
@@ -107,6 +108,8 @@ handleError (Left e) =
PandocAppError s -> err 1 s
PandocEpubSubdirectoryError s -> err 31 $
"EPUB subdirectory name '" ++ s ++ "' contains illegal characters"
+ PandocMacroLoop s -> err 91 $
+ "Loop encountered in expanding macro " ++ s
err :: Int -> String -> IO a
err exitCode msg = do
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index bd164635c..28459d4e6 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -318,6 +318,7 @@ getDefaultExtensions "epub2" = getDefaultExtensions "epub"
getDefaultExtensions "epub3" = getDefaultExtensions "epub"
getDefaultExtensions "latex" = extensionsFromList
[Ext_smart,
+ Ext_latex_macros,
Ext_auto_identifiers]
getDefaultExtensions "context" = extensionsFromList
[Ext_smart,
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 6519f807c..d7e77010e 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -61,7 +61,6 @@ data ReaderOptions = ReaderOptions{
, readerStandalone :: Bool -- ^ Standalone document with header
, readerColumns :: Int -- ^ Number of columns in terminal
, readerTabStop :: Int -- ^ Tab stop
- , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
, readerIndentedCodeClasses :: [String] -- ^ Default classes for
-- indented code blocks
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
@@ -75,7 +74,6 @@ instance Default ReaderOptions
, readerStandalone = False
, readerColumns = 80
, readerTabStop = 4
- , readerApplyMacros = True
, readerIndentedCodeClasses = []
, readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eb5b37f40..f6263c782 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -109,8 +109,6 @@ module Text.Pandoc.Parsing ( anyLine,
dash,
nested,
citeKey,
- macro,
- applyMacros',
Parser,
ParserT,
F,
@@ -130,6 +128,7 @@ module Text.Pandoc.Parsing ( anyLine,
runParser,
runParserT,
parse,
+ tokenPrim,
anyToken,
getInput,
setInput,
@@ -178,13 +177,16 @@ module Text.Pandoc.Parsing ( anyLine,
sourceLine,
setSourceColumn,
setSourceLine,
- newPos
+ newPos,
+ Line,
+ Column
)
where
+import Data.Text (Text)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..), trimInlines)
+import Text.Pandoc.Builder (Blocks, Inlines, HasMeta(..), trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
@@ -195,7 +197,7 @@ import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
import Data.List ( intercalate, transpose, isSuffixOf )
import Text.Pandoc.Shared
import qualified Data.Map as M
-import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, pMacroDefinition)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.HTML.TagSoup.Entity ( lookupEntity )
import Text.Pandoc.Asciify (toAsciiChar)
import Data.Monoid ((<>))
@@ -994,7 +996,7 @@ data ParserState = ParserState
stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
stateNextExample :: Int, -- ^ Number of next example
stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateMacros :: [Macro], -- ^ List of macros defined so far
+ stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
@@ -1057,8 +1059,8 @@ instance HasIdentifierList ParserState where
updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
class HasMacros st where
- extractMacros :: st -> [Macro]
- updateMacros :: ([Macro] -> [Macro]) -> st -> st
+ extractMacros :: st -> M.Map Text Macro
+ updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
instance HasMacros ParserState where
extractMacros = stateMacros
@@ -1112,7 +1114,7 @@ defaultParserState =
stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
- stateMacros = [],
+ stateMacros = M.empty,
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
@@ -1341,33 +1343,6 @@ token :: (Stream s m t)
-> ParsecT s st m a
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
---
--- Macros
---
-
--- | Parse a \newcommand or \newenviroment macro definition.
-macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
- => ParserT [Char] st m Blocks
-macro = do
- apply <- getOption readerApplyMacros
- (m, def') <- withRaw pMacroDefinition
- if apply
- then do
- updateState $ \st -> updateMacros (m:) st
- return mempty
- else return $ rawBlock "latex" def'
-
--- | Apply current macros to string.
-applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
- => String
- -> ParserT [Char] st m String
-applyMacros' target = do
- apply <- getOption readerApplyMacros
- if apply
- then do macros <- extractMacros <$> getState
- return $ applyMacros macros target
- else return target
-
infixr 5 <+?>
(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
@@ -1385,10 +1360,11 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
Functor mf, Applicative mf, Monad mf)
- => ParserT String st m (mf Blocks)
+ => ParserT [a] st m (mf Blocks)
+ -> (String -> [a])
-> [FilePath] -> FilePath
- -> ParserT String st m (mf Blocks)
-insertIncludedFile' blocks dirs f = do
+ -> ParserT [a] st m (mf Blocks)
+insertIncludedFile' blocks totoks dirs f = do
oldPos <- getPosition
oldInput <- getInput
containers <- getIncludeFiles <$> getState
@@ -1402,7 +1378,7 @@ insertIncludedFile' blocks dirs f = do
report $ CouldNotLoadIncludeFile f oldPos
return ""
setPosition $ newPos f 1 1
- setInput contents
+ setInput $ totoks contents
bs <- blocks
setInput oldInput
setPosition oldPos
@@ -1412,11 +1388,12 @@ insertIncludedFile' blocks dirs f = do
-- | Parse content of include file as blocks. Circular includes result in an
-- @PandocParseError@.
insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
- => ParserT String st m Blocks
+ => ParserT [a] st m Blocks
+ -> (String -> [a])
-> [FilePath] -> FilePath
- -> ParserT String st m Blocks
-insertIncludedFile blocks dirs f =
- runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f
+ -> ParserT [a] st m Blocks
+insertIncludedFile blocks totoks dirs f =
+ runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-- | Parse content of include file as future blocks. Circular includes result in
-- an @PandocParseError@.
@@ -1424,4 +1401,4 @@ insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
=> ParserT String st m (Future st Blocks)
-> [FilePath] -> FilePath
-> ParserT String st m (Future st Blocks)
-insertIncludedFileF = insertIncludedFile'
+insertIncludedFileF p = insertIncludedFile' p id
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 090dc5fdb..d82e6a5dc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
@@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
+
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ applyMacros,
rawLaTeXInline,
rawLaTeXBlock,
- inlineCommand,
+ macro,
+ inlineCommand
) where
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (chr, isAlphaNum, isLetter, ord)
-import Data.Text (Text, unpack)
+import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe (fromMaybe, maybeToList)
import Safe (minimumDef)
import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional,
- space, (<|>))
+import Text.Pandoc.Parsing hiding (many, optional, withRaw,
+ mathInline, mathDisplay,
+ space, (<|>), spaces, blankline)
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..),
+ TokType(..), Line, Column)
import Text.Pandoc.Walk
+import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop))
+
+-- for debugging:
+-- import Text.Pandoc.Extensions (getDefaultExtensions)
+-- import Text.Pandoc.Class (runIOorExplode, PandocIO)
+-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
@@ -63,18 +81,18 @@ readLaTeX :: PandocMonad m
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readLaTeX opts ltx = do
- parsed <- readWithM parseLaTeX def{ stateOptions = opts }
- (unpack (crFilter ltx))
+ parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
+ (tokenize (crFilter ltx))
case parsed of
Right result -> return result
- Left e -> throwError e
+ Left e -> throwError $ PandocParsecError (T.unpack ltx) e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
- let meta = stateMeta st
+ let meta = sMeta st
let doc' = doc bs
let headerLevel (Header n _ _) = [n]
headerLevel _ = []
@@ -88,177 +106,476 @@ parseLaTeX = do
else id) doc'
return $ Pandoc meta bs'
-type LP m = ParserT String ParserState m
-
-anyControlSeq :: PandocMonad m => LP m String
-anyControlSeq = do
- char '\\'
- next <- option '\n' anyChar
- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
-
-controlSeq :: PandocMonad m => String -> LP m String
-controlSeq name = try $ do
- char '\\'
- case name of
- "" -> mzero
- [c] | not (isLetter c) -> string [c]
- cs -> string cs <* notFollowedBy letter <* optional sp
- return name
-
-dimenarg :: PandocMonad m => LP m String
-dimenarg = try $ do
- ch <- option "" $ string "="
- num <- many1 digit
- dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- return $ ch ++ num ++ dim
+-- testParser :: LP PandocIO a -> Text -> IO a
+-- testParser p t = do
+-- res <- runIOorExplode (runParserT p defaultLaTeXState{
+-- sOptions = def{ readerExtensions =
+-- enableExtension Ext_raw_tex $
+-- getDefaultExtensions "latex" }} "source" (tokenize t))
+-- case res of
+-- Left e -> error (show e)
+-- Right r -> return r
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: Maybe Inlines
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = Nothing
+ , sInListItem = False
+ , sInTableCell = False
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXBlock = do
+ lookAhead (try (char '\\' >> letter))
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- try $
+ withRaw (environment <|> macroDef <|> blockCommand)
+ return raw
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right raw -> count (T.length (untokenize raw)) anyChar
+
+macro :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m Blocks
+macro = do
+ guardEnabled Ext_latex_macros
+ lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *>
+ oneOfStrings ["command", "environment"])
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- withRaw $ try macroDef
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, st) -> do
+ updateState (updateMacros (const $ sMacros st))
+ mempty <$ count (T.length (untokenize raw)) anyChar
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = do
+ (guardEnabled Ext_latex_macros >>
+ do let retokenize = doMacros 0 *> (toksToString <$> getInput)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s') <|> return s
+
+rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXInline = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ count (T.length (untokenize raw)) anyChar
+
+inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (il, raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (il, raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ count (T.length (untokenize raw)) anyChar
+ return il
+
+tokenize :: Text -> [Tok]
+tokenize = totoks (1, 1)
+
+totoks :: (Line, Column) -> Text -> [Tok]
+totoks (lin,col) t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok (lin, col) Newline "\n"
+ : totoks (lin + 1,1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok (lin, col) Spaces sps
+ : totoks (lin, col + T.length sps) rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok (lin, col) Word ws
+ : totoks (lin, col + T.length ws) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok (lin, col) Comment ("%" <> cs)
+ : totoks (lin, col + 1 + T.length cs) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok (lin, col) Symbol (T.singleton c)]
+ Just (d, rest')
+ | isLetter d ->
+ let (ws, rest'') = T.span isLetter rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (lin,
+ col + 1 + T.length ws + T.length ss) rest'''
+ | d == '\t' || d == '\n' ->
+ Tok (lin, col) Symbol ("\\")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (lin, col + 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok (lin, col) (Arg i) ("#" <> t1)
+ : totoks (lin, col + 1 + T.length t1) t2
+ Nothing ->
+ Tok (lin, col) Symbol ("#")
+ : totoks (lin, col + 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok (lin, col) Esc2 (T.pack ['^','^',d,e])
+ : totoks (lin, col + 4) rest'''
+ _ ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ | d < '\128' ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ _ -> [Tok (lin, col) Symbol ("^"),
+ Tok (lin, col + 1) Symbol ("^")]
+ _ -> Tok (lin, col) Symbol ("^")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest
+
+ where isSpaceOrTab ' ' = True
+ isSpaceOrTab '\t' = True
+ isSpaceOrTab _ = False
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos spos _ (Tok (lin,col) _ _ : _) =
+ setSourceColumn (setSourceLine spos lin) col
+ updatePos spos _ [] = spos
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ when (not verbatimMode) $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro numargs optarg newtoks) -> do
+ setInput ts
+ let getarg = spaces >> braced
+ args <- case optarg of
+ Nothing -> count numargs getarg
+ Just o ->
+ (:) <$> option o bracketedToks
+ <*> count (numargs - 1) getarg
+ let addTok (Tok _ (Arg i) _) acc | i > 0
+ , i <= numargs =
+ map (setpos spos) (args !! (i - 1)) ++ acc
+ addTok t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr addTok ts' newtoks
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+
+setpos :: (Line, Column) -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+ where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+ isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSym
+ where isSym (Tok _ Symbol _) = True
+ isSym _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline
whitespace :: PandocMonad m => LP m ()
-whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+whitespace = () <$ satisfyTok isSpaceTok
+ where isSpaceTok (Tok _ Spaces _) = True
+ isSpaceTok _ = False
-endline :: PandocMonad m => LP m ()
-endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-
-tildeEscape :: PandocMonad m => LP m Char
-tildeEscape = try $ do
- string "^^"
- c <- satisfy (\x -> x >= '\0' && x <= '\128')
- d <- if isLowerHex c
- then option "" $ count 1 (satisfy isLowerHex)
- else return ""
- if null d
- then case ord c of
- x | x >= 64 && x <= 127 -> return $ chr (x - 64)
- | otherwise -> return $ chr (x + 64)
- else return $ chr $ read ('0':'x':c:d)
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
comment :: PandocMonad m => LP m ()
-comment = do
- char '%'
- skipMany (satisfy (/='\n'))
- optional newline
- return ()
+comment = () <$ satisfyTok isCommentTok
+ where isCommentTok (Tok _ Comment _) = True
+ isCommentTok _ = False
-bgroup :: PandocMonad m => LP m ()
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
+
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
bgroup = try $ do
- skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
- () <$ char '{'
- <|> () <$ controlSeq "bgroup"
- <|> () <$ controlSeq "begingroup"
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-egroup :: PandocMonad m => LP m ()
-egroup = () <$ char '}'
- <|> () <$ controlSeq "egroup"
- <|> () <$ controlSeq "endgroup"
+egroup :: PandocMonad m => LP m Tok
+egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup")
-grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
grouped parser = try $ do
bgroup
-- first we check for an inner 'grouped', because
-- {{a,b}} should be parsed the same as {a,b}
- try (grouped parser <* egroup)
- <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m String
-braced = grouped chunk
- where chunk =
- many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
- <|> try (string "\\}")
- <|> try (string "\\{")
- <|> try (string "\\\\")
- <|> ((\x -> "{" ++ x ++ "}") <$> braced)
- <|> count 1 anyChar
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' 1
+ where braced' (n :: Int) =
+ handleEgroup n <|> handleBgroup n <|> handleOther n
+ handleEgroup n = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' (n - 1)
+ handleBgroup n = do
+ t <- bgroup
+ (t:) <$> braced' (n + 1)
+ handleOther n = do
+ t <- anyTok
+ (t:) <$> braced' n
bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
-
-mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
-mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
-
-mathInline :: PandocMonad m => LP m String -> LP m Inlines
-mathInline p = math <$> (try p >>= applyMacros')
-
-mathChars :: PandocMonad m => LP m String
-mathChars =
- concat <$> many (escapedChar
- <|> (snd <$> withRaw braced)
- <|> many1 (satisfy isOrdChar))
- where escapedChar = try $ do char '\\'
- c <- anyChar
- return ['\\',c]
- isOrdChar '$' = False
- isOrdChar '{' = False
- isOrdChar '}' = False
- isOrdChar '\\' = False
- isOrdChar _ = True
-
-quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
-quoted' f starter ender = do
- startchs <- starter
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then do
- ils <- many (notFollowedBy ender >> inline)
- (ender >> return (f (mconcat ils))) <|>
- (<> mconcat ils) <$>
- lit (case startchs of
- "``" -> "“"
- "`" -> "‘"
- _ -> startchs)
- else lit startchs
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
-doubleQuote :: PandocMonad m => LP m Inlines
-doubleQuote = do
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ (T.take 2 (T.reverse s)) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
-singleQuote :: PandocMonad m => LP m Inlines
-singleQuote = do
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- else str <$> many1 (oneOf "`\'‘’")
+-- inline elements:
-inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> inlineText
- <|> inlineCommand
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (char '-' *> option (str "-")
- (char '-' *> option (str "–") (str "—" <$ char '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (string "''"))
- <|> (str "”" <$ char '”')
- <|> (str "’" <$ char '\'')
- <|> (str "’" <$ char '’')
- <|> (str "\160" <$ char '~')
- <|> mathDisplay (string "$$" *> mathChars <* string "$$")
- <|> mathInline (char '$' *> mathChars <* char '$')
- <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str . (:[]) <$> tildeEscape)
- <|> (do res <- oneOf "#&~^'`\"[]"
- pos <- getPosition
- report $ ParsingUnescaped [res] pos
- return $ str [res])
+word :: PandocMonad m => LP m Inlines
+word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
-inlines :: PandocMonad m => LP m Inlines
-inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+regularSymbol :: PandocMonad m => LP m Inlines
+regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+ where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
+ isRegularSymbol _ = False
+ isSpecial c = c `Set.member` specialChars
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
@@ -269,443 +586,19 @@ inlineGroup = do
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
-block :: PandocMonad m => LP m Blocks
-block = (mempty <$ comment)
- <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
- <|> environment
- <|> include
- <|> macro
- <|> blockCommand
- <|> paragraph
- <|> grouped block
-
-blocks :: PandocMonad m => LP m Blocks
-blocks = mconcat <$> many block
-
-getRawCommand :: PandocMonad m => String -> LP m String
-getRawCommand name' = do
- rawargs <- withRaw (many (try (optional sp *> opt)) *>
- option "" (try (optional sp *> dimenarg)) *>
- many braced)
- return $ '\\' : name' ++ snd rawargs
-
-lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
-lookupListDefault d = (fromMaybe d .) . lookupList
- where
- lookupList l m = msum $ map (`M.lookup` m) l
-
-blockCommand :: PandocMonad m => LP m Blocks
-blockCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*" <* optional sp)
- let name' = name ++ star
- let raw = do
- rawcommand <- getRawCommand name'
- transformed <- applyMacros' rawcommand
- guard $ transformed /= rawcommand
- notFollowedBy $ parseFromString' inlines transformed
- parseFromString' blocks transformed
- lookupListDefault raw [name',name] blockCommands
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
--- eat an optional argument and one or more arguments in braces
-ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
-ignoreInlines name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawInline "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-guardRaw :: PandocMonad m => LP m ()
-guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex
-
-optargs :: PandocMonad m => LP m String
-optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
-ignoreBlocks name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
-blockCommands = M.fromList $
- [ ("par", mempty <$ skipopts)
- , ("parbox", braced >> grouped blocks)
- , ("title", mempty <$ (skipopts *>
- (grouped inline >>= addMeta "title")
- <|> (grouped block >>= addMeta "title")))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
- , ("author", mempty <$ (skipopts *> authors))
- -- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
- , ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- -- Koma-script metadata commands
- , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
- -- sectioning
- , ("part", section nullAttr (-1))
- , ("part*", section nullAttr (-1))
- , ("chapter", section nullAttr 0)
- , ("chapter*", section ("",["unnumbered"],[]) 0)
- , ("section", section nullAttr 1)
- , ("section*", section ("",["unnumbered"],[]) 1)
- , ("subsection", section nullAttr 2)
- , ("subsection*", section ("",["unnumbered"],[]) 2)
- , ("subsubsection", section nullAttr 3)
- , ("subsubsection*", section ("",["unnumbered"],[]) 3)
- , ("paragraph", section nullAttr 4)
- , ("paragraph*", section ("",["unnumbered"],[]) 4)
- , ("subparagraph", section nullAttr 5)
- , ("subparagraph*", section ("",["unnumbered"],[]) 5)
- -- beamer slides
- , ("frametitle", section nullAttr 3)
- , ("framesubtitle", section nullAttr 4)
- -- letters
- , ("opening", (para . trimInlines) <$> (skipopts *> tok))
- , ("closing", skipopts *> closing)
- --
- , ("hrule", pure horizontalRule)
- , ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> looseItem)
- , ("documentclass", skipopts *> braced *> preamble)
- , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> setCaption)
- , ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- , ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- -- includes
- , ("lstinputlisting", inputListing)
- , ("graphicspath", graphicsPath)
- -- hyperlink
- , ("hypertarget", braced >> grouped block)
- ] ++ map ignoreBlocks
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks
- [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
- -- newcommand, etc. should be parsed by macro, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
- , "bibliographystyle"
- , "maketitle", "makeindex", "makeglossary"
- , "addcontentsline", "addtocontents", "addtocounter"
- -- \ignore{} is used conventionally in literate haskell for definitions
- -- that are to be processed by the compiler but not printed.
- , "ignore"
- , "hyperdef"
- , "markboth", "markright", "markleft"
- , "hspace", "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-graphicsPath :: PandocMonad m => LP m Blocks
-graphicsPath = do
- ps <- bgroup *> (manyTill braced egroup)
- getResourcePath >>= setResourcePath . (++ ps)
- return mempty
-
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
-addMeta field val = updateState $ \st ->
- st{ stateMeta = addMetaField field val $ stateMeta st }
-
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-
-setCaption :: PandocMonad m => LP m Blocks
-setCaption = do
- ils <- tok
- mblabel <- option Nothing $
- try $ spaces' >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("data-label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ stateCaption = Just ils' }
- return mempty
-
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
-
-authors :: PandocMonad m => LP m ()
-authors = try $ do
- bgroup
- let oneAuthor = mconcat <$>
- many1 (notFollowedBy' (controlSeq "and") >>
- (inline <|> mempty <$ blockCommand))
- -- skip e.g. \vspace{10pt}
- auths <- sepBy oneAuthor (controlSeq "and")
- egroup
- addMeta "author" (map trimInlines auths)
-
-section :: PandocMonad m => Attr -> Int -> LP m Blocks
-section (ident, classes, kvs) lvl = do
- skipopts
- contents <- grouped inline
- lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
- attr' <- registerHeader (lab, classes, kvs) contents
- return $ headerWith attr' lvl contents
-
-inlineCommand :: PandocMonad m => LP m Inlines
-inlineCommand = try $ do
- (name, raw') <- withRaw anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*")
- let name' = name ++ star
- let raw = do
- guard $ not (isBlockCommand name)
- rawargs <- withRaw
- (skipangles *> skipopts *> option "" dimenarg *> many braced)
- let rawcommand = raw' ++ star ++ snd rawargs
- transformed <- applyMacros' rawcommand
- exts <- getOption readerExtensions
- if transformed /= rawcommand
- then parseFromString' inlines transformed
- else if extensionEnabled Ext_raw_tex exts
- then return $ rawInline "latex" rawcommand
- else ignore rawcommand
- (lookupListDefault raw [name',name] inlineCommands <*
- optional (try (string "{}")))
-
-rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
- parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
- if parseRaw
- then rawInline "latex" <$> getRawCommand name'
- else fallback
-
-isBlockCommand :: String -> Bool
-isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
-
-
-inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
-inlineEnvironments = M.fromList
- [ ("displaymath", mathEnvWith id Nothing "displaymath")
- , ("math", math <$> mathEnv "math")
- , ("equation", mathEnvWith id Nothing "equation")
- , ("equation*", mathEnvWith id Nothing "equation*")
- , ("gather", mathEnvWith id (Just "gathered") "gather")
- , ("gather*", mathEnvWith id (Just "gathered") "gather*")
- , ("multline", mathEnvWith id (Just "gathered") "multline")
- , ("multline*", mathEnvWith id (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith id (Just "aligned") "align")
- , ("align*", mathEnvWith id (Just "aligned") "align*")
- , ("alignat", mathEnvWith id (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
- ]
-
-inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
-inlineCommands = M.fromList $
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
- , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
- , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
- , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
- , ("texttt", ttfamily)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("ldots", lit "…")
- , ("vdots", lit "\8942")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("label", rawInlineOr "label" (inBrackets <$> tok))
- , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
- , ("textgreek", tok)
- , ("sep", lit ",")
- , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
- , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
- , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", try $ tok >>= accent hungarumlaut)
- , ("`", option (str "`") $ try $ tok >>= accent grave)
- , ("'", option (str "'") $ try $ tok >>= accent acute)
- , ("^", option (str "^") $ try $ tok >>= accent circ)
- , ("~", option (str "~") $ try $ tok >>= accent tilde)
- , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
- , (".", option (str ".") $ try $ tok >>= accent dot)
- , ("=", option (str "=") $ try $ tok >>= accent macron)
- , ("c", option (str "c") $ try $ tok >>= accent cedilla)
- , ("v", option (str "v") $ try $ tok >>= accent hacek)
- , ("u", option (str "u") $ try $ tok >>= accent breve)
- , ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
- , (",", lit "\8198")
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", note <$> grouped block)
- , ("footnote", note <$> grouped block)
- , ("verb", doverb)
- , ("lstinline", dolstinline)
- , ("Verb", doverb)
- , ("url", (unescapeURL <$> braced) >>= \url ->
- pure (link url "" (str url)))
- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
- tok >>= \lab ->
- pure (link url "" lab))
- , ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . removeDoubleQuotes <$> braced
- mkImage options src)
- , ("enquote", enquote)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- , ("hypertarget", braced >> tok)
- -- siuntix
- , ("SI", dosiunitx)
- -- hyphenat
- , ("bshyp", lit "\\\173")
- , ("fshyp", lit "/\173")
- , ("dothyp", lit ".\173")
- , ("colonhyp", lit ":\173")
- , ("hyp", lit "-")
- , ("nohyphens", tok)
- , ("textnhtt", ttfamily)
- , ("nhttfamily", ttfamily)
- -- fontawesome
- , ("faCheck", lit "\10003")
- , ("faClose", lit "\10007")
- ] ++ map ignoreInlines
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks:
- [ "index"
- , "hspace"
- , "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-ttfamily :: PandocMonad m => LP m Inlines
-ttfamily = (code . stringify . toList) <$> tok
+doLHSverb :: PandocMonad m => LP m Inlines
+doLHSverb =
+ (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
- let replaceTextwidth (k,v) = case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
- _ -> (k, v)
- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let replaceTextwidth (k,v) =
+ case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth
+ $ filter (\(k,_) -> k `elem` ["width", "height"]) options
let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
@@ -714,56 +607,131 @@ mkImage options src = do
return $ imageWith attr (addExtension src defaultExt) "" alt
_ -> return $ imageWith attr src "" alt
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
+-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
+dosiunitx :: PandocMonad m => LP m Inlines
+dosiunitx = do
+ skipopts
+ value <- tok
+ valueprefix <- option "" $ bracketed tok
+ unit <- tok
+ let emptyOr160 "" = ""
+ emptyOr160 _ = "\160"
+ return . mconcat $ [valueprefix,
+ emptyOr160 valueprefix,
+ value,
+ emptyOr160 unit,
+ unit]
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
+lit :: String -> LP m Inlines
+lit = pure . str
+
+removeDoubleQuotes :: Text -> Text
+removeDoubleQuotes t =
+ maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+
+doubleQuote :: PandocMonad m => LP m Inlines
+doubleQuote = do
+ quoted' doubleQuoted (try $ count 2 $ symbol '`')
+ (void $ try $ count 2 $ symbol '\'')
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`'])
+ (void $ try $ sequence [symbol '"', symbol '\''])
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '"')
+ (void $ symbol '"')
+
+singleQuote :: PandocMonad m => LP m Inlines
+singleQuote = do
+ quoted' singleQuoted ((:[]) <$> symbol '`')
+ (try $ symbol '\'' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ <|> quoted' singleQuoted ((:[]) <$> symbol '‘')
+ (try $ symbol '’' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ where startsWithLetter (Tok _ Word t) =
+ case T.uncons t of
+ Just (c, _) | isLetter c -> True
+ _ -> False
+ startsWithLetter _ = False
+
+quoted' :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> LP m [Tok]
+ -> LP m ()
+ -> LP m Inlines
+quoted' f starter ender = do
+ startchs <- (T.unpack . untokenize) <$> starter
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then do
+ ils <- many (notFollowedBy ender >> inline)
+ (ender >> return (f (mconcat ils))) <|>
+ (<> mconcat ils) <$>
+ lit (case startchs of
+ "``" -> "“"
+ "`" -> "‘"
+ cs -> cs)
+ else lit startchs
enquote :: PandocMonad m => LP m Inlines
enquote = do
skipopts
- context <- stateQuoteContext <$> getState
- if context == InDoubleQuote
+ quoteContext <- sQuoteContext <$> getState
+ if quoteContext == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
doverb :: PandocMonad m => LP m Inlines
doverb = do
- marker <- anyChar
- code <$> manyTill (satisfy (/='\n')) (char marker)
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ (code . T.unpack . untokenize) <$>
+ manyTill (verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok)
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar)
+ : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp
+ return $ Tok (lin, col) toktype t1
dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
options <- option [] keyvals
let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
- marker <- char '{' <|> anyChar
- codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker)
-
-doLHSverb :: PandocMonad m => LP m Inlines
-doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ (codeWith ("",classes,[]) . T.unpack . untokenize) <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
--- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
-dosiunitx :: PandocMonad m => LP m Inlines
-dosiunitx = do
- skipopts
- value <- tok
- valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']'))
- unit <- tok
- let emptyOr160 "" = ""
- emptyOr160 _ = "\160"
- return . mconcat $ [valueprefix,
- emptyOr160 valueprefix,
- value,
- emptyOr160 unit,
- unit]
+keyval :: PandocMonad m => LP m (String, String)
+keyval = try $ do
+ Tok _ Word key <- satisfyTok isWordTok
+ let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"]
+ isSpecSym _ = False
+ val <- option [] $ do
+ symbol '='
+ braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
+ <|> anyControlSeq))
+ optional sp
+ optional (symbol ',')
+ optional sp
+ return (T.unpack key, T.unpack . untokenize $ val)
-lit :: String -> LP m Inlines
-lit = pure . str
+keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
@@ -971,18 +939,151 @@ breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+mathDisplay :: String -> Inlines
+mathDisplay = displayMath . trim
+
+mathInline :: String -> Inlines
+mathInline = math . trim
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ contents <- trim . toksToString <$>
+ many (notFollowedBy (symbol '$') >> anyTok)
+ if display
+ then do
+ mathDisplay contents <$ try (symbol '$' >> symbol '$')
+ <|> (guard (null contents) >> return (mathInline ""))
+ else mathInline contents <$ (symbol '$')
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: PandocMonad m => LP m [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> (manyTill citationLabel egroup)
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+citationLabel :: PandocMonad m => LP m String
+citationLabel = do
+ optional sp
+ toksToString <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* optional sp
+ <* optional (symbol ',')
+ <* optional sp)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char]
+
+cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+
+citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw))
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
+complexNatbibCitation mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` (symbol ';')
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
+inlineCommand' :: PandocMonad m => LP m Inlines
+inlineCommand' = try $ do
+ Tok _ (CtrlSeq name) cmd <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ (star, rawstar) <- withRaw $ option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name] -- check non-starred as fallback
+ let raw = do
+ guard $ isInlineCommand name || not (isBlockCommand name)
+ (_, rawargs) <- withRaw
+ (skipangles *> skipopts *> option "" dimenarg *> many braced)
+ let rawcommand = T.unpack $ cmd <> untokenize (rawstar ++ rawargs)
+ (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand))
+ <|> ignore rawcommand
+ lookupListDefault raw names inlineCommands
+
tok :: PandocMonad m => LP m Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
+tok = grouped inline <|> inlineCommand' <|> singleChar
+ where singleChar = try $ do
+ Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ (Tok (lin, col + 1) toktype t2) : inp
+ return $ str (T.unpack t1)
+ else return $ str (T.unpack t)
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
-rawopt :: PandocMonad m => LP m String
+rawopt :: PandocMonad m => LP m Text
rawopt = do
- contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
- try (string "\\[") <|> rawopt)
+ symbol '['
+ inner <- untokenize <$> manyTill anyTok (symbol ']')
optional sp
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> inner <> "]"
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
@@ -990,58 +1091,703 @@ skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
- char '<'
- skipMany (noneOf ">")
- char '>'
- return ()
+ symbol '<'
+ () <$ manyTill anyTok (symbol '>')
skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
-inlineText :: PandocMonad m => LP m Inlines
-inlineText = str <$> many1 inlineChar
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (0,0) Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
+ "\\end{" ++ T.unpack y ++ "}"
+
+mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ T.unpack $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name inlineEnvironments
-inlineChar :: PandocMonad m => LP m Char
-inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ ]
+
+inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineCommands = M.fromList $
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+ , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+ , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+ , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+ , ("texttt", ttfamily)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", rawInlineOr "label" (inBrackets <$> tok))
+ , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
+ , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", try $ tok >>= accent hungarumlaut)
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent circ)
+ , ("~", option (str "~") $ try $ tok >>= accent tilde)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , (".", option (str ".") $ try $ tok >>= accent dot)
+ , ("=", option (str "=") $ try $ tok >>= accent macron)
+ , ("c", option (str "c") $ try $ tok >>= accent cedilla)
+ , ("v", option (str "v") $ try $ tok >>= accent hacek)
+ , ("u", option (str "u") $ try $ tok >>= accent breve)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional (bracketed inline)
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", note <$> grouped block)
+ , ("footnote", note <$> grouped block)
+ , ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("Verb", doverb)
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ pure (link url "" (str url)))
+ , ("href", (unescapeURL . toksToString <$>
+ braced <* optional sp) >>= \url ->
+ tok >>= \lab -> pure (link url "" lab))
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL . T.unpack .
+ removeDoubleQuotes . untokenize <$> braced
+ mkImage options src)
+ , ("enquote", enquote)
+ , ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ , ("hypertarget", braced >> tok)
+ -- siuntix
+ , ("SI", dosiunitx)
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ , ("nohyphens", tok)
+ , ("textnhtt", ttfamily)
+ , ("nhttfamily", ttfamily)
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ ]
+
+ttfamily :: PandocMonad m => LP m Inlines
+ttfamily = (code . stringify . toList) <$> tok
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name'
+ else fallback
+
+getRawCommand :: PandocMonad m => Text -> LP m String
+getRawCommand txt = do
+ (_, rawargs) <- withRaw
+ (many (try (optional sp *> opt)) *>
+ option "" (try (optional sp *> dimenarg)) *>
+ many braced)
+ return $ T.unpack (txt <> untokenize rawargs)
+
+isBlockCommand :: Text -> Bool
+isBlockCommand s =
+ s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks))
+ || s `Set.member` treatAsBlock
+
+treatAsBlock :: Set.Set Text
+treatAsBlock = Set.fromList
+ [ "newcommand", "renewcommand"
+ , "newenvironment", "renewenvironment"
+ , "providecommand", "provideenvironment"
+ -- newcommand, etc. should be parsed by macroDef, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "markboth", "markright", "markleft"
+ , "hspace", "vspace"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+isInlineCommand :: Text -> Bool
+isInlineCommand s =
+ s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines))
+ || s `Set.member` treatAsInline
+
+treatAsInline :: Set.Set Text
+treatAsInline = Set.fromList
+ [ "index"
+ , "hspace"
+ , "vspace"
+ , "noindent"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where lookupList l m = msum $ map (`M.lookup` m) l
+
+inline :: PandocMonad m => LP m Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ whitespace)
+ <|> (softbreak <$ endline)
+ <|> word
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ <|> (symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-')))
+ <|> doubleQuote
+ <|> singleQuote
+ <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
+ <|> (str "”" <$ symbol '”')
+ <|> (str "’" <$ symbol '\'')
+ <|> (str "’" <$ symbol '’')
+ <|> (str "\160" <$ symbol '~')
+ <|> dollarsMath
+ <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
+ <|> (str . (:[]) <$> primEscape)
+ <|> regularSymbol
+ <|> (do res <- symbolIn "#^'`\"[]"
+ pos <- getPosition
+ let s = T.unpack (untoken res)
+ report $ ParsingUnescaped s pos
+ return $ str s)
+
+inlines :: PandocMonad m => LP m Inlines
+inlines = mconcat <$> many inline
+
+-- block elements:
+
+begin_ :: PandocMonad m => Text -> LP m ()
+begin_ t = (try $ do
+ controlSeq "begin"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}")
+
+end_ :: PandocMonad m => Text -> LP m ()
+end_ t = (try $ do
+ controlSeq "end"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}")
+
+preamble :: PandocMonad m => LP m Blocks
+preamble = mempty <$ many preambleBlock
+ where preambleBlock = spaces1
+ <|> void include
+ <|> void macroDef
+ <|> void blockCommand
+ <|> void braced
+ <|> (notFollowedBy (begin_ "document") >> void anyTok)
+
+paragraph :: PandocMonad m => LP m Blocks
+paragraph = do
+ x <- trimInlines . mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para x
+
+include :: PandocMonad m => LP m Blocks
+include = do
+ (Tok _ (CtrlSeq name) _) <-
+ controlSeq "include" <|> controlSeq "input" <|>
+ controlSeq "subfile" <|> controlSeq "usepackage"
+ skipMany $ bracketed inline -- skip options
+ fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced
+ let fs' = if name == "usepackage"
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs'
+
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
+
+authors :: PandocMonad m => LP m ()
+authors = try $ do
+ bgroup
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >>
+ (inline <|> mempty <$ blockCommand))
+ -- skip e.g. \vspace{10pt}
+ auths <- sepBy oneAuthor (controlSeq "and")
+ egroup
+ addMeta "author" (map trimInlines auths)
+
+macroDef :: PandocMonad m => LP m Blocks
+macroDef = do
+ guardEnabled Ext_latex_macros
+ mempty <$ ((commandDef <|> environmentDef) <* doMacros 0)
+ where commandDef = do
+ (name, macro') <- newcommand
+ updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) }
+ environmentDef = do
+ (name, macro1, macro2) <- newenvironment
+ updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand"
+ optional $ symbol '*'
+ Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents <- braced
+ when (mtype == "newcommand") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ optional $ symbol '*'
+ symbol '{'
+ spaces
+ Tok _ Word name <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ startcontents <- braced
+ spaces
+ endcontents <- braced
+ when (mtype == "newenvironment") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg startcontents,
+ Macro 0 Nothing endcontents)
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ manyTill anyTok (symbol ']')
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead (T.unpack ds) of
+ Just i -> return i
+ _ -> return 0
+
+setCaption :: PandocMonad m => LP m Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ sCaption = Just ils' }
+ return mempty
+
+looseItem :: PandocMonad m => LP m Blocks
+looseItem = do
+ inListItem <- sInListItem <$> getState
+ guard $ not inListItem
+ skipopts
+ return mempty
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
+ skipopts
+ contents <- grouped inline
+ lab <- option ident $
+ try (spaces >> controlSeq "label"
+ >> spaces >> toksToString <$> braced)
+ attr' <- registerHeader (lab, classes, kvs) contents
+ return $ headerWith attr' lvl contents
+
+blockCommand :: PandocMonad m => LP m Blocks
+blockCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name]
+ let raw = do
+ guard $ isBlockCommand name || not (isInlineCommand name)
+ rawBlock "latex" <$> getRawCommand txt
+ lookupListDefault raw names blockCommands
+
+closing :: PandocMonad m => LP m Blocks
+closing = do
+ contents <- tok
+ st <- getState
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (sMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
+ return $ para (trimInlines contents) <> sigs
+
+blockCommands :: PandocMonad m => M.Map Text (LP m Blocks)
+blockCommands = M.fromList $
+ [ ("par", mempty <$ skipopts)
+ , ("parbox", braced >> grouped blocks)
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
+ -- Koma-script metadata commands
+ , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
+ -- sectioning
+ , ("part", section nullAttr (-1))
+ , ("part*", section nullAttr (-1))
+ , ("chapter", section nullAttr 0)
+ , ("chapter*", section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
+ -- beamer slides
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ --
+ , ("hrule", pure horizontalRule)
+ , ("strut", pure mempty)
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("item", looseItem)
+ , ("documentclass", skipopts *> braced *> preamble)
+ , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+ , ("caption", skipopts *> setCaption)
+ , ("bibliography", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ , ("addbibresource", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ -- includes
+ , ("lstinputlisting", inputListing)
+ , ("graphicspath", graphicsPath)
+ -- hyperlink
+ , ("hypertarget", try $ braced >> grouped block)
+ ]
+
+
+environments :: PandocMonad m => M.Map Text (LP m Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
+ , ("minipage", env "minipage" $
+ skipopts *> spaces *> optional braced *> spaces *> blocks)
+ , ("figure", env "figure" $ skipopts *> figure)
+ , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
+ , ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> simpTable "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular" $ simpTable "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable "tabular" False)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", orderedList')
+ , ("alltt", alltt <$> env "alltt" blocks)
+ , ("code", guardEnabled Ext_literate_haskell *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
+ , ("Verbatim", fancyverbEnv "Verbatim")
+ , ("BVerbatim", fancyverbEnv "BVerbatim")
+ , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
+ codeBlockWith attr <$> verbEnv "lstlisting")
+ , ("minted", minted)
+ , ("obeylines", obeylines)
+ , ("displaymath", mathEnvWith para Nothing "displaymath")
+ , ("equation", mathEnvWith para Nothing "equation")
+ , ("equation*", mathEnvWith para Nothing "equation*")
+ , ("gather", mathEnvWith para (Just "gathered") "gather")
+ , ("gather*", mathEnvWith para (Just "gathered") "gather*")
+ , ("multline", mathEnvWith para (Just "gathered") "multline")
+ , ("multline*", mathEnvWith para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith para (Just "aligned") "align")
+ , ("align*", mathEnvWith para (Just "aligned") "align*")
+ , ("alignat", mathEnvWith para (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
+ , ("tikzpicture", rawVerbEnv "tikzpicture")
+ ]
environment :: PandocMonad m => LP m Blocks
environment = do
controlSeq "begin"
- name <- braced
+ name <- untokenize <$> braced
M.findWithDefault mzero name environments
<|> rawEnv name
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name inlineEnvironments
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
-rawEnv :: PandocMonad m => String -> LP m Blocks
+rawEnv :: PandocMonad m => Text -> LP m Blocks
rawEnv name = do
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
rawOptions <- mconcat <$> many rawopt
- let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
+ let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
pos1 <- getPosition
(bs, raw) <- withRaw $ env name blocks
- raw' <- applyMacros' $ beginCommand ++ raw
- if raw' /= beginCommand ++ raw
- then parseFromString' blocks raw'
- else if parseRaw
- then return $ rawBlock "latex" $ beginCommand ++ raw'
- else do
- unless parseRaw $ do
- report $ SkippedContent beginCommand pos1
- pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
- return bs
-
-rawVerbEnv :: PandocMonad m => String -> LP m Blocks
+ if parseRaw
+ then return $ rawBlock "latex"
+ $ T.unpack $ beginCommand <> untokenize raw
+ else do
+ unless parseRaw $ do
+ report $ SkippedContent (T.unpack beginCommand) pos1
+ pos2 <- getPosition
+ report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ return bs
+
+rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{tikzpicture}" ++ raw
+ let raw' = "\\begin{tikzpicture}" ++ toksToString raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -1050,36 +1796,106 @@ rawVerbEnv name = do
report $ SkippedContent raw' pos
return mempty
-----
+verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv name = withVerbatimMode $ do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ toksToString res
-maybeAddExtension :: String -> FilePath -> FilePath
-maybeAddExtension ext fp =
- if null (takeExtension fp)
- then addExtension fp ext
- else fp
+fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
+fancyverbEnv name = do
+ options <- option [] keyvals
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv name
-include :: PandocMonad m => LP m Blocks
-include = do
- fs' <- try $ do
- char '\\'
- name <- try (string "include")
- <|> try (string "input")
- <|> try (string "subfile")
- <|> string "usepackage"
- -- skip options
- skipMany $ try $ char '[' *> manyTill anyChar (char ']')
- fs <- (map trim . splitBy (==',')) <$> braced
- return $ if name == "usepackage"
- then map (maybeAddExtension ".sty") fs
- else map (maybeAddExtension ".tex") fs
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mconcat <$> mapM (insertIncludedFile blocks dirs) fs'
+obeylines :: PandocMonad m => LP m Blocks
+obeylines = do
+ para . fromList . removeLeadingTrailingBreaks .
+ walk softBreakToHard . toList <$> env "obeylines" inlines
+ where softBreakToHard SoftBreak = LineBreak
+ softBreakToHard x = x
+ removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak .
+ reverse . dropWhile isLineBreak
+ isLineBreak LineBreak = True
+ isLineBreak _ = False
+
+minted :: PandocMonad m => LP m Blocks
+minted = do
+ options <- option [] keyvals
+ lang <- toksToString <$> braced
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ lang | not (null lang) ] ++
+ [ "numberLines" |
+ lookup "linenos" options == Just "true" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv "minted"
+
+letterContents :: PandocMonad m => LP m Blocks
+letterContents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case lookupMeta "address" (sMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
+ return $ addr <> bs -- sig added by \closing
+
+figure :: PandocMonad m => LP m Blocks
+figure = try $ do
+ resetCaption
+ blocks >>= addImageCaption
+
+addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
+addImageCaption = walkM go
+ where go (Image attr alt (src,tit))
+ | not ("fig:" `isPrefixOf` tit) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
+ Nothing -> Image attr alt (src,tit)
+ go x = return x
+
+graphicsPath :: PandocMonad m => LP m Blocks
+graphicsPath = do
+ ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ getResourcePath >>= setResourcePath . (++ ps)
+ return mempty
+
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+
+alltt :: Blocks -> Blocks
+alltt = walk strToCode
+ where strToCode (Str s) = Code nullAttr s
+ strToCode Space = RawInline (Format "latex") "\\ "
+ strToCode SoftBreak = LineBreak
+ strToCode x = x
+
+parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions options =
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ ++ maybeToList (lookup "language" options
+ >>= fromListingsLanguage)
+ in (fromMaybe "" (lookup "label" options), classes, kvs)
inputListing :: PandocMonad m => LP m Blocks
inputListing = do
pos <- getPosition
options <- option [] keyvals
- f <- filter (/='"') <$> braced
+ f <- filter (/='"') . toksToString <$> braced
dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
mbCode <- readFileFromDirs dirs f
codeLines <- case mbCode of
@@ -1098,169 +1914,10 @@ inputListing = do
drop (firstline - 1) codeLines
return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
-parseListingsOptions :: [(String, String)] -> Attr
-parseListingsOptions options =
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- ++ maybeToList (lookup "language" options
- >>= fromListingsLanguage)
- in (fromMaybe "" (lookup "label" options), classes, kvs)
-
-----
-
-keyval :: PandocMonad m => LP m (String, String)
-keyval = try $ do
- key <- many1 alphaNum
- val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\"))
- skipMany spaceChar
- optional (char ',')
- skipMany spaceChar
- return (key, val)
-
-
-keyvals :: PandocMonad m => LP m [(String, String)]
-keyvals = try $ char '[' *> manyTill keyval (char ']')
-
-alltt :: PandocMonad m => String -> LP m Blocks
-alltt t = walk strToCode <$> parseFromString' blocks
- (substitute " " "\\ " $ substitute "%" "\\%" $
- intercalate "\\\\\n" $ lines t)
- where strToCode (Str s) = Code nullAttr s
- strToCode x = x
-
-rawLaTeXBlock :: PandocMonad m => LP m String
-rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-
-rawLaTeXInline :: PandocMonad m => LP m Inline
-rawLaTeXInline = do
- raw <- (snd <$> withRaw inlineCommand)
- <|> (snd <$> withRaw inlineEnvironment)
- <|> (snd <$> withRaw blockCommand)
- RawInline "latex" <$> applyMacros' raw
-
-addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
-addImageCaption = walkM go
- where go (Image attr alt (src,tit))
- | not ("fig:" `isPrefixOf` tit) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
- Nothing -> Image attr alt (src,tit)
- go x = return x
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table c als ws hs rs) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Table (toList ils) als ws hs rs
- Nothing -> Table c als ws hs rs
- go x = return x
-
-environments :: PandocMonad m => M.Map String (LP m Blocks)
-environments = M.fromList
- [ ("document", env "document" blocks <* skipMany anyChar)
- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
- , ("letter", env "letter" letterContents)
- , ("minipage", env "minipage" $
- skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- , ("figure", env "figure" $ skipopts *> figure)
- , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
- , ("center", env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable "longtable" False >>= addTableCaption)
- , ("table", env "table" $
- resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular" $ simpTable "tabular*" True)
- , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
- , ("tabular", env "tabular" $ simpTable "tabular" False)
- , ("quote", blockQuote <$> env "quote" blocks)
- , ("quotation", blockQuote <$> env "quotation" blocks)
- , ("verse", blockQuote <$> env "verse" blocks)
- , ("itemize", bulletList <$> listenv "itemize" (many item))
- , ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", orderedList')
- , ("alltt", alltt =<< verbEnv "alltt")
- , ("code", guardEnabled Ext_literate_haskell *>
- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- verbEnv "code"))
- , ("comment", mempty <$ verbEnv "comment")
- , ("verbatim", codeBlock <$> verbEnv "verbatim")
- , ("Verbatim", fancyverbEnv "Verbatim")
- , ("BVerbatim", fancyverbEnv "BVerbatim")
- , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
- codeBlockWith attr <$> verbEnv "lstlisting")
- , ("minted", do options <- option [] keyvals
- lang <- grouped (many1 $ satisfy (/='}'))
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
- [ "numberLines" |
- lookup "linenos" options == Just "true" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv "minted")
- , ("obeylines", parseFromString
- (para . trimInlines . mconcat <$> many inline) =<<
- intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnvWith para Nothing "displaymath")
- , ("equation", mathEnvWith para Nothing "equation")
- , ("equation*", mathEnvWith para Nothing "equation*")
- , ("gather", mathEnvWith para (Just "gathered") "gather")
- , ("gather*", mathEnvWith para (Just "gathered") "gather*")
- , ("multline", mathEnvWith para (Just "gathered") "multline")
- , ("multline*", mathEnvWith para (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith para (Just "aligned") "align")
- , ("align*", mathEnvWith para (Just "aligned") "align*")
- , ("alignat", mathEnvWith para (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
- , ("tikzpicture", rawVerbEnv "tikzpicture")
- ]
-
-figure :: PandocMonad m => LP m Blocks
-figure = try $ do
- resetCaption
- blocks >>= addImageCaption
-
-letterContents :: PandocMonad m => LP m Blocks
-letterContents = do
- bs <- blocks
- st <- getState
- -- add signature (author) and address (title)
- let addr = case lookupMeta "address" (stateMeta st) of
- Just (MetaBlocks [Plain xs]) ->
- para $ trimInlines $ fromList xs
- _ -> mempty
- return $ addr <> bs -- sig added by \closing
-
-closing :: PandocMonad m => LP m Blocks
-closing = do
- contents <- tok
- st <- getState
- let extractInlines (MetaBlocks [Plain ys]) = ys
- extractInlines (MetaBlocks [Para ys ]) = ys
- extractInlines _ = []
- let sigs = case lookupMeta "author" (stateMeta st) of
- Just (MetaList xs) ->
- para $ trimInlines $ fromList $
- intercalate [LineBreak] $ map extractInlines xs
- _ -> mempty
- return $ para (trimInlines contents) <> sigs
+-- lists
item :: PandocMonad m => LP m Blocks
-item = blocks *> controlSeq "item" *> skipopts *> blocks
-
-looseItem :: PandocMonad m => LP m Blocks
-looseItem = do
- ctx <- stateParserContext `fmap` getState
- if ctx == ListItemState
- then mzero
- else return mempty
+item = void blocks *> controlSeq "item" *> skipopts *> blocks
descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
@@ -1271,302 +1928,210 @@ descItem = do
bs <- blocks
return (ils, [bs])
-env :: PandocMonad m => String -> LP m a -> LP m a
-env name p = p <*
- (try (controlSeq "end" *> braced >>= guard . (== name))
- <?> ("\\end{" ++ name ++ "}"))
-
-listenv :: PandocMonad m => String -> LP m a -> LP m a
+listenv :: PandocMonad m => Text -> LP m a -> LP m a
listenv name p = try $ do
- oldCtx <- stateParserContext `fmap` getState
- updateState $ \st -> st{ stateParserContext = ListItemState }
+ oldInListItem <- sInListItem `fmap` getState
+ updateState $ \st -> st{ sInListItem = True }
res <- env name p
- updateState $ \st -> st{ stateParserContext = oldCtx }
+ updateState $ \st -> st{ sInListItem = oldInListItem }
return res
-mathEnvWith :: PandocMonad m
- => (Inlines -> a) -> Maybe String -> String -> LP m a
-mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name)
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
- "\\end{" ++ y ++ "}"
-
-mathEnv :: PandocMonad m => String -> LP m String
-mathEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = skipMany comment *>
- (many1 (noneOf "\\%") <|> try (string "\\%")
- <|> try (string "\\\\") <|> count 1 anyChar)
- res <- concat <$> manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-verbEnv :: PandocMonad m => String -> LP m String
-verbEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = anyChar
- res <- manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-fancyverbEnv :: PandocMonad m => String -> LP m Blocks
-fancyverbEnv name = do
- options <- option [] keyvals
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv name
-
orderedList' :: PandocMonad m => LP m Blocks
orderedList' = try $ do
- optional sp
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
+ let markerSpec = do
+ symbol '['
+ ts <- toksToString <$> manyTill anyTok (symbol ']')
+ case runParser anyOrderedListMarker def "option" ts of
+ Right r -> return r
+ Left _ -> do
+ pos <- getPosition
+ report $ SkippedContent ("[" ++ ts ++ "]") pos
+ return (1, DefaultStyle, DefaultDelim)
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
- start <- option 1 $ try $ do controlSeq "setcounter"
- grouped (string "enum" *> many1 (oneOf "iv"))
+ optional $ try $ controlSeq "setlength"
+ *> grouped (count 1 $ controlSeq "itemindent")
+ *> braced
+ spaces
+ start <- option 1 $ try $ do pos <- getPosition
+ controlSeq "setcounter"
+ ctr <- toksToString <$> braced
+ guard $ "enum" `isPrefixOf` ctr
+ guard $ all (`elem` ['i','v']) (drop 4 ctr)
optional sp
- num <- grouped (many1 digit)
- spaces
- return (read num + 1 :: Int)
+ num <- toksToString <$> braced
+ case safeRead num of
+ Just i -> return (i + 1 :: Int)
+ Nothing -> do
+ report $ SkippedContent
+ ("\\setcounter{" ++ ctr ++
+ "}{" ++ num ++ "}") pos
+ return 1
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
-paragraph :: PandocMonad m => LP m Blocks
-paragraph = do
- x <- trimInlines . mconcat <$> many1 inline
- if x == mempty
- then return mempty
- else return $ para x
-
-preamble :: PandocMonad m => LP m Blocks
-preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = void comment
- <|> void sp
- <|> void blanklines
- <|> void include
- <|> void macro
- <|> void blockCommand
- <|> void anyControlSeq
- <|> void braced
- <|> void anyChar
-
--------
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- keys <- try $ bgroup *> (manyTill citationLabel egroup)
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m String
-citationLabel = optional sp *>
- (many1 (satisfy isBibtexKeyChar)
- <* optional sp
- <* optional (char ',')
- <* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
+-- tables
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then many1 simpleCiteArgs
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional $ bracketed inline
+ return ()
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
- let ils = (toList . trimInlines . mconcat) <$>
- many (notFollowedBy (oneOf "\\};") >> inline)
- let parseOne = try $ do
- skipSpaces
- pref <- ils
- cit' <- inline -- expect a citation
- let citlist = toList cit'
- cits' <- case citlist of
- [Cite cs _] -> return cs
- _ -> mzero
- suff <- ils
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff cits'
- (c:cits, raw) <- withRaw $ grouped parseOne
- return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ raw)
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
--- tables
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) -> do
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ _ -> return ()
-parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))]
+parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
parseAligns = try $ do
- bgroup
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
- maybeBar
- let cAlign = AlignCenter <$ char 'c'
- let lAlign = AlignLeft <$ char 'l'
- let rAlign = AlignRight <$ char 'r'
- let parAlign = AlignLeft <$ char 'p'
- -- algins from tabularx
- let xAlign = AlignLeft <$ char 'X'
- let mAlign = AlignLeft <$ char 'm'
- let bAlign = AlignLeft <$ char 'b'
- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
- <|> xAlign <|> mAlign <|> bAlign
- let alignPrefix = char '>' >> braced
- let alignSuffix = char '<' >> braced
+ let maybeBar = skipMany $
+ sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced)
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
- char '{'
- ds <- many1 (oneOf "0123456789.")
+ symbol '{'
+ ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
spaces
- string "\\linewidth"
- char '}'
+ symbol '}'
case safeRead ds of
Just w -> return w
Nothing -> return 0.0
- let alignSpec = do
+ let alignSpec = try $ do
spaces
- pref <- option "" alignPrefix
+ pref <- option [] alignPrefix
spaces
al <- alignChar
- width <- colWidth <|> option 0.0 (do s <- braced
+ width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
pos <- getPosition
report $ SkippedContent s pos
return 0.0)
spaces
- suff <- option "" alignSuffix
+ suff <- option [] alignSuffix
return (al, width, (pref, suff))
- aligns' <- sepEndBy alignSpec maybeBar
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many (alignSpec <* maybeBar)
spaces
egroup
spaces
- return $ aligns'
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces'
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces'
- optional $ bracketed (many1 (satisfy (/=']')))
- return ()
-
-lbreak :: PandocMonad m => LP m ()
-lbreak = () <$ try (spaces' *>
- (controlSeq "\\" <|> controlSeq "tabularnewline") <*
- spaces')
-
-amp :: PandocMonad m => LP m ()
-amp = () <$ try (spaces' *> char '&' <* spaces')
+ return aligns'
parseTableRow :: PandocMonad m
- => String -- ^ table environment name
- -> [(String, String)] -- ^ pref/suffixes
+ => Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
-> LP m [Blocks]
-parseTableRow envname prefsufs = try $ do
+parseTableRow envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
let cols = length prefsufs
- let tableCellRaw = concat <$> many
- (do notFollowedBy amp
- notFollowedBy lbreak
- notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}"))
- many1 (noneOf "&%\n\r\\")
- <|> try (string "\\&")
- <|> count 1 anyChar)
- let plainify bs = case toList bs of
- [Para ils] -> plain (fromList ils)
- _ -> bs
- rawcells <- sepBy1 tableCellRaw amp
- guard $ length rawcells == cols
- let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs
- let tableCell = plainify <$> blocks
- cells' <- mapM (parseFromString' tableCell) rawcells'
- let numcells = length cells'
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- many (notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ >> anyTok)
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref
+ ++ contents ++
+ map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff
+ rawcells <- sequence (map celltoks prefsufs)
+ oldInput <- getInput
+ cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells
+ setInput oldInput
+ spaces
+ let numcells = length cells
guard $ numcells <= cols && numcells >= 1
- guard $ cells' /= [mempty]
+ guard $ cells /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
- let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces'
- return cells''
+ return $ cells ++ replicate (cols - numcells) mempty
-spaces' :: PandocMonad m => LP m ()
-spaces' = spaces *> skipMany (comment *> spaces)
+parseTableCell :: PandocMonad m => LP m Blocks
+parseTableCell = do
+ let plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+ updateState $ \st -> st{ sInTableCell = True }
+ cells <- plainify <$> blocks
+ updateState $ \st -> st{ sInTableCell = False }
+ return cells
-simpTable :: PandocMonad m => String -> Bool -> LP m Blocks
+simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
simpTable envname hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces' >> tok)
+ when hasWidthParameter $ () <$ (spaces >> tok)
skipopts
colspecs <- parseAligns
let (aligns, widths, prefsufs) = unzip3 colspecs
let cols = length colspecs
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
skipMany hline
- spaces'
+ spaces
header' <- option [] $ try (parseTableRow envname prefsufs <*
lbreak <* many1 hline)
- spaces'
+ spaces
rows <- sepEndBy (parseTableRow envname prefsufs)
(lbreak <* optional (skipMany hline))
- spaces'
+ spaces
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
let header'' = if null header'
then replicate cols mempty
else header'
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns widths) header'' rows
-removeDoubleQuotes :: String -> String
-removeDoubleQuotes ('"':xs) =
- case reverse xs of
- '"':ys -> reverse ys
- _ -> '"':xs
-removeDoubleQuotes xs = xs
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table c als ws hs rs) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
+ go x = return x
+
+
+block :: PandocMonad m => LP m Blocks
+block = (mempty <$ spaces1)
+ <|> environment
+ <|> include
+ <|> macroDef
+ <|> paragraph
+ <|> blockCommand
+ <|> grouped block
+
+blocks :: PandocMonad m => LP m Blocks
+blocks = mconcat <$> many block
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
new file mode 100644
index 000000000..6f84ae1f1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -0,0 +1,48 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Types
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Types for LaTeX tokens and macros.
+-}
+module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
+ , TokType(..)
+ , Macro(..)
+ , Line
+ , Column )
+where
+import Data.Text (Text)
+import Text.Parsec.Pos (Line, Column)
+
+data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
+ Esc1 | Esc2 | Arg Int
+ deriving (Eq, Ord, Show)
+
+data Tok = Tok (Line, Column) TokType Text
+ deriving (Eq, Ord, Show)
+
+data Macro = Macro Int (Maybe [Tok]) [Tok]
+ deriving Show
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c2342b9f3..ab6a32b78 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -61,7 +61,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
-import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros,
+ macro)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
@@ -1105,10 +1106,11 @@ latexMacro = try $ do
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
+ result <- (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
+ <|> (B.rawBlock "latex" . concat <$>
+ rawLaTeXBlock `sepEndBy1` blankline)
+
spaces
return $ return result
@@ -1553,8 +1555,8 @@ code = try $ do
Right attr -> B.codeWith attr result
math :: PandocMonad m => MarkdownParser m (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
+ <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
(guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
@@ -1878,9 +1880,8 @@ rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead (char '\\')
notFollowedBy' rawConTeXtEnvironment
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index a51306347..1ae73c148 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -58,7 +58,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter)
-import Text.Pandoc.Parsing hiding (macro, nested)
+import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag)
import Text.Pandoc.XML (fromEntities)
import System.FilePath (takeExtension)
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 66273e05d..42fdfd4dd 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just
inlineLaTeXCommand :: PandocMonad m => OrgParser m String
inlineLaTeXCommand = try $ do
rest <- getInput
- parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
+ st <- getState
+ parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest
case parsed of
- Right (RawInline _ cs) -> do
+ Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
let cmdNoSpc = dropWhileEnd isSpace cs
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 92f868516..fc98213fb 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, OrgNoteRecord
, HasReaderOptions (..)
, HasQuoteContext (..)
+ , HasMacros (..)
, TodoMarker (..)
, TodoSequence
, TodoState (..)
@@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local)
import Data.Default (Default (..))
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Definition (Meta (..), nullMeta)
import Text.Pandoc.Logging
import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..),
HasIncludeFiles (..), HasLastStrPosition (..),
HasLogMessages (..), HasQuoteContext (..),
+ HasMacros (..),
HasReaderOptions (..), ParserContext (..),
QuoteContext (..), SourcePos, askF, asksF, returnF,
runF, trimInlinesF)
@@ -118,6 +122,7 @@ data OrgParserState = OrgParserState
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
, orgLogMessages :: [LogMessage]
+ , orgMacros :: M.Map Text Macro
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where
addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
getLogMessages st = reverse $ orgLogMessages st
+instance HasMacros OrgParserState where
+ extractMacros st = orgMacros st
+ updateMacros f st = st{ orgMacros = f (orgMacros st) }
+
instance HasIncludeFiles OrgParserState where
getIncludeFiles = orgStateIncludeFiles
addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st }
@@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState
, orgStateParserContext = NullState
, orgStateTodoSequences = []
, orgLogMessages = []
+ , orgMacros = M.empty
}
optionsToParserState :: ReaderOptions -> OrgParserState
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 210d3e5aa..d41152de5 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -45,7 +45,7 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Shared (crFilter)
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a80d75340..853d2768f 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -573,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- B.singleton <$> rawLaTeXInline
+ B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 5708358f6..f000646c2 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -40,7 +40,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (macro, space, spaces, uri)
+import Text.Pandoc.Parsing hiding (space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter)
import Control.Monad (guard, void, when)
import Control.Monad.Reader (Reader, asks, runReader)