summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs1636
-rw-r--r--src/Text/Pandoc/Asciify.hs6
-rw-r--r--src/Text/Pandoc/BCP47.hs125
-rw-r--r--src/Text/Pandoc/CSS.hs8
-rw-r--r--src/Text/Pandoc/CSV.hs102
-rw-r--r--src/Text/Pandoc/Class.hs1082
-rw-r--r--src/Text/Pandoc/Data.hs (renamed from src/Text/Pandoc/Data.hsb)13
-rw-r--r--src/Text/Pandoc/Emoji.hs1
-rw-r--r--src/Text/Pandoc/Error.hs97
-rw-r--r--src/Text/Pandoc/Extensions.hs378
-rw-r--r--src/Text/Pandoc/Filter.hs60
-rw-r--r--src/Text/Pandoc/Filter/JSON.hs97
-rw-r--r--src/Text/Pandoc/Filter/Lua.hs53
-rw-r--r--src/Text/Pandoc/Filter/Path.hs53
-rw-r--r--src/Text/Pandoc/Highlighting.hs63
-rw-r--r--src/Text/Pandoc/ImageSize.hs232
-rw-r--r--src/Text/Pandoc/Logging.hs342
-rw-r--r--src/Text/Pandoc/Lua.hs83
-rw-r--r--src/Text/Pandoc/Lua/Filter.hs170
-rw-r--r--src/Text/Pandoc/Lua/Init.hs117
-rw-r--r--src/Text/Pandoc/Lua/Module/MediaBag.hs108
-rw-r--r--src/Text/Pandoc/Lua/Module/Pandoc.hs134
-rw-r--r--src/Text/Pandoc/Lua/Module/Utils.hs126
-rw-r--r--src/Text/Pandoc/Lua/Packages.hs115
-rw-r--r--src/Text/Pandoc/Lua/StackInstances.hs376
-rw-r--r--src/Text/Pandoc/Lua/Util.hs187
-rw-r--r--src/Text/Pandoc/MIME.hs15
-rw-r--r--src/Text/Pandoc/MediaBag.hs52
-rw-r--r--src/Text/Pandoc/Options.hs389
-rw-r--r--src/Text/Pandoc/PDF.hs351
-rw-r--r--src/Text/Pandoc/Parsing.hs714
-rw-r--r--src/Text/Pandoc/Pretty.hs120
-rw-r--r--src/Text/Pandoc/Process.hs34
-rw-r--r--src/Text/Pandoc/Readers.hs160
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs228
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs320
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs117
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs631
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs55
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs89
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs98
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs526
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs12
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs27
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs140
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs944
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs45
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs496
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs3403
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs51
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1449
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs308
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs924
-rw-r--r--src/Text/Pandoc/Readers/Native.hs36
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs71
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs54
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs116
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs282
-rw-r--r--src/Text/Pandoc/Readers/Odt/Base.hs7
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs83
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs140
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs21
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs323
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs10
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs187
-rw-r--r--src/Text/Pandoc/Readers/Org.hs43
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs84
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs732
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs304
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs53
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs443
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs145
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs156
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs54
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs38
-rw-r--r--src/Text/Pandoc/Readers/RST.hs1041
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs215
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs48
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs279
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs654
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs159
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs673
-rw-r--r--src/Text/Pandoc/SelfContained.hs270
-rw-r--r--src/Text/Pandoc/Shared.hs865
-rw-r--r--src/Text/Pandoc/Slides.hs10
-rw-r--r--src/Text/Pandoc/Templates.hs82
-rw-r--r--src/Text/Pandoc/Translations.hs112
-rw-r--r--src/Text/Pandoc/UTF8.hs109
-rw-r--r--src/Text/Pandoc/UUID.hs30
-rw-r--r--src/Text/Pandoc/Writers.hs195
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs159
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs366
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs375
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs384
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs410
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs923
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs169
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs683
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs442
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs1134
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs170
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs189
-rw-r--r--src/Text/Pandoc/Writers/JATS.hs455
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs920
-rw-r--r--src/Text/Pandoc/Writers/Man.hs227
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs774
-rw-r--r--src/Text/Pandoc/Writers/Math.hs56
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs781
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs639
-rw-r--r--src/Text/Pandoc/Writers/Muse.hs408
-rw-r--r--src/Text/Pandoc/Writers/Native.hs23
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs218
-rw-r--r--src/Text/Pandoc/Writers/OOXML.hs108
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs88
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs313
-rw-r--r--src/Text/Pandoc/Writers/Org.hs242
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint.hs63
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Output.hs1834
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Presentation.hs987
-rw-r--r--src/Text/Pandoc/Writers/RST.hs330
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs408
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs226
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs330
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs191
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs151
-rw-r--r--src/Text/Pandoc/Writers/ZimWiki.hs219
-rw-r--r--src/Text/Pandoc/XML.hs33
127 files changed, 29399 insertions, 11615 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
new file mode 100644
index 000000000..26c754cd6
--- /dev/null
+++ b/src/Text/Pandoc/App.hs
@@ -0,0 +1,1636 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TupleSections #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.App
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Does a pandoc conversion based on command-line options.
+-}
+module Text.Pandoc.App (
+ convertWithOpts
+ , Opt(..)
+ , LineEnding(..)
+ , Filter(..)
+ , defaultOpts
+ , parseOptions
+ , options
+ , applyFilters
+ ) where
+import qualified Control.Exception as E
+import Control.Monad
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad.Trans
+import Data.Aeson (defaultOptions)
+import Data.Aeson.TH (deriveJSON)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import Data.Char (toLower, toUpper)
+import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Monoid
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TE
+import qualified Data.Text.Encoding.Error as TE
+import Data.Yaml (decode)
+import qualified Data.Yaml as Yaml
+import GHC.Generics
+import Network.URI (URI (..), parseURI)
+import Paths_pandoc (getDataDir)
+import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
+ defConfig, Indent(..), NumberFormat(..))
+import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme,
+ pygments)
+import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition)
+import System.Console.GetOpt
+import System.Directory (getAppUserDataDirectory)
+import System.Environment (getArgs, getProgName)
+import System.Exit (exitSuccess)
+import System.FilePath
+import System.IO (nativeNewline, stdout)
+import qualified System.IO as IO (Newline (..))
+import System.IO.Error (isDoesNotExistError)
+import Text.Pandoc
+import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import Text.Pandoc.Builder (setMeta, deleteMeta)
+import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
+import Text.Pandoc.Highlighting (highlightingStyles)
+import Text.Pandoc.PDF (makePDF)
+import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
+import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
+ headerShift, isURI, ordNub, safeRead, tabFilter)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
+import Text.Pandoc.XML (toEntities)
+import Text.Printf
+#ifndef _WINDOWS
+import System.Posix.IO (stdOutput)
+import System.Posix.Terminal (queryTerminal)
+#endif
+
+data LineEnding = LF | CRLF | Native deriving (Show, Generic)
+
+parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
+parseOptions options' defaults = do
+ rawArgs <- map UTF8.decodeArg <$> getArgs
+ prg <- getProgName
+
+ let (actions, args, unrecognizedOpts, errors) =
+ getOpt' Permute options' rawArgs
+
+ let unknownOptionErrors =
+ foldr (handleUnrecognizedOption . takeWhile (/= '=')) []
+ unrecognizedOpts
+
+ unless (null errors && null unknownOptionErrors) $
+ E.throwIO $ PandocOptionError $
+ concat errors ++ unlines unknownOptionErrors ++
+ ("Try " ++ prg ++ " --help for more information.")
+
+ -- thread option data structure through all supplied option actions
+ opts <- foldl (>>=) (return defaults) actions
+ return (opts{ optInputFiles = args })
+
+latexEngines :: [String]
+latexEngines = ["pdflatex", "lualatex", "xelatex"]
+
+htmlEngines :: [String]
+htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"]
+
+engines :: [(String, String)]
+engines = map ("html",) htmlEngines ++
+ map ("html5",) htmlEngines ++
+ map ("latex",) latexEngines ++
+ map ("beamer",) latexEngines ++
+ [ ("ms", "pdfroff")
+ , ("context", "context")
+ ]
+
+pdfEngines :: [String]
+pdfEngines = ordNub $ map snd engines
+
+pdfWriterAndProg :: Maybe String -- ^ user-specified writer name
+ -> Maybe String -- ^ user-specified pdf-engine
+ -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
+pdfWriterAndProg mWriter mEngine = do
+ let panErr msg = liftIO $ E.throwIO $ PandocAppError msg
+ case go mWriter mEngine of
+ Right (writ, prog) -> return (writ, Just prog)
+ Left err -> panErr err
+ where
+ go Nothing Nothing = Right ("latex", "pdflatex")
+ go (Just writer) Nothing = (writer,) <$> engineForWriter writer
+ go Nothing (Just engine) = (,engine) <$> writerForEngine engine
+ go (Just writer) (Just engine) =
+ case find (== (baseWriterName writer, engine)) engines of
+ Just _ -> Right (writer, engine)
+ Nothing -> Left $ "pdf-engine " ++ engine ++
+ " is not compatible with output format " ++ writer
+
+ writerForEngine eng = case [f | (f,e) <- engines, e == eng] of
+ fmt : _ -> Right fmt
+ [] -> Left $
+ "pdf-engine " ++ eng ++ " not known"
+
+ engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of
+ eng : _ -> Right eng
+ [] -> Left $
+ "cannot produce pdf output from " ++ w
+
+convertWithOpts :: Opt -> IO ()
+convertWithOpts opts = do
+ let outputFile = fromMaybe "-" (optOutputFile opts)
+ let filters = optFilters opts
+ let verbosity = optVerbosity opts
+
+ when (optDumpArgs opts) $
+ do UTF8.hPutStrLn stdout outputFile
+ mapM_ (UTF8.hPutStrLn stdout) (optInputFiles opts)
+ exitSuccess
+
+ epubMetadata <- case optEpubMetadata opts of
+ Nothing -> return Nothing
+ Just fp -> Just <$> UTF8.readFile fp
+
+ let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
+ isPandocCiteproc _ = False
+ -- --bibliography implies -F pandoc-citeproc for backwards compatibility:
+ let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) &&
+ optCiteMethod opts `notElem` [Natbib, Biblatex] &&
+ all (not . isPandocCiteproc) filters
+ let filters' = if needsCiteproc then JSONFilter "pandoc-citeproc" : filters
+ else filters
+
+ let sources = case optInputFiles opts of
+ [] -> ["-"]
+ xs | optIgnoreArgs opts -> ["-"]
+ | otherwise -> xs
+
+ datadir <- case optDataDir opts of
+ Nothing -> E.catch
+ (Just <$> getAppUserDataDirectory "pandoc")
+ (\e -> let _ = (e :: E.SomeException)
+ in return Nothing)
+ Just _ -> return $ optDataDir opts
+
+ -- assign reader and writer based on options and filenames
+ let readerName = fromMaybe ( defaultReaderName
+ (if any isURI sources
+ then "html"
+ else "markdown") sources) (optReader opts)
+
+ let nonPdfWriterName Nothing = defaultWriterName outputFile
+ nonPdfWriterName (Just x) = x
+
+ let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+ (writerName, maybePdfProg) <-
+ if pdfOutput
+ then pdfWriterAndProg (optWriter opts) (optPdfEngine opts)
+ else return (nonPdfWriterName $ optWriter opts, Nothing)
+
+ let format = baseWriterName
+ $ takeFileName writerName -- in case path to lua script
+
+ -- disabling the custom writer for now
+ (writer, writerExts) <-
+ if ".lua" `isSuffixOf` format
+ -- note: use non-lowercased version writerName
+ then return (TextWriter
+ (\o d -> writeCustom writerName o d)
+ :: Writer PandocIO, mempty)
+ else case getWriter writerName of
+ Left e -> E.throwIO $ PandocAppError $
+ if format == "pdf"
+ then e ++
+ "\nTo create a pdf using pandoc, use " ++
+ "-t latex|beamer|context|ms|html5" ++
+ "\nand specify an output file with " ++
+ ".pdf extension (-o filename.pdf)."
+ else e
+ Right (w, es) -> return (w :: Writer PandocIO, es)
+
+ -- TODO: we have to get the input and the output into the state for
+ -- the sake of the text2tags reader.
+ (reader, readerExts) <-
+ case getReader readerName of
+ Right (r, es) -> return (r :: Reader PandocIO, es)
+ Left e -> E.throwIO $ PandocAppError e'
+ where e' = case readerName of
+ "pdf" -> e ++
+ "\nPandoc can convert to PDF, but not from PDF."
+ "doc" -> e ++
+ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc."
+ _ -> e
+
+ let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
+ let addStringAsVariable varname s vars = return $ (varname, s) : vars
+
+ highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
+ let addSyntaxMap existingmap f = do
+ res <- parseSyntaxDefinition f
+ case res of
+ Left errstr -> E.throwIO $ PandocSyntaxMapError errstr
+ Right syn -> return $ addSyntaxDefinition syn existingmap
+
+ syntaxMap <- foldM addSyntaxMap defaultSyntaxMap
+ (optSyntaxDefinitions opts)
+
+ -- We don't want to send output to the terminal if the user
+ -- does 'pandoc -t docx input.txt'; though we allow them to
+ -- force this with '-o -'. On posix systems, we detect
+ -- when stdout is being piped and allow output to stdout
+ -- in that case, but on Windows we can't.
+#ifdef _WINDOWS
+ let istty = True
+#else
+ istty <- queryTerminal stdOutput
+#endif
+ when (not (isTextFormat format) && istty && isNothing ( optOutputFile opts)) $
+ E.throwIO $ PandocAppError $
+ "Cannot write " ++ format ++ " output to terminal.\n" ++
+ "Specify an output file using the -o option, or " ++
+ "use '-o -' to force output to stdout."
+
+ let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
+ then 0
+ else optTabStop opts)
+
+ readSources :: [FilePath] -> PandocIO Text
+ readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
+ mapM readSource srcs
+
+ let runIO' :: PandocIO a -> IO a
+ runIO' f = do
+ (res, reports) <- runIOorExplode $ do
+ setTrace (optTrace opts)
+ setVerbosity verbosity
+ x <- f
+ rs <- getLog
+ return (x, rs)
+ case optLogFile opts of
+ Nothing -> return ()
+ Just logfile -> B.writeFile logfile (encodeLogMessages reports)
+ let isWarning msg = messageVerbosity msg == WARNING
+ when (optFailIfWarnings opts && any isWarning reports) $
+ E.throwIO PandocFailOnWarningError
+ return res
+
+ let eol = case optEol opts of
+ CRLF -> IO.CRLF
+ LF -> IO.LF
+ Native -> nativeNewline
+
+ -- note: this reverses the list constructed in option parsing,
+ -- which in turn was reversed from the command-line order,
+ -- so we end up with the correct order in the variable list:
+ let withList _ [] vars = return vars
+ withList f (x:xs) vars = f x vars >>= withList f xs
+
+ let addContentsAsVariable varname fp vars = do
+ s <- UTF8.toString <$> readFileStrict fp
+ return $ (varname, s) : vars
+
+ runIO' $ do
+ setUserDataDir datadir
+ setInputFiles (optInputFiles opts)
+ setOutputFile (optOutputFile opts)
+
+ variables <-
+ withList (addStringAsVariable "sourcefile")
+ (reverse $ optInputFiles opts)
+ (("outputfile", fromMaybe "-" (optOutputFile opts))
+ : optVariables opts)
+ -- we reverse this list because, unlike
+ -- the other option lists here, it is
+ -- not reversed when parsed from CLI arguments.
+ -- See withList, above.
+ >>=
+ withList (addContentsAsVariable "include-before")
+ (optIncludeBeforeBody opts)
+ >>=
+ withList (addContentsAsVariable "include-after")
+ (optIncludeAfterBody opts)
+ >>=
+ withList (addContentsAsVariable "header-includes")
+ (optIncludeInHeader opts)
+ >>=
+ withList (addStringAsVariable "css") (optCss opts)
+ >>=
+ maybe return (addStringAsVariable "title-prefix")
+ (optTitlePrefix opts)
+ >>=
+ maybe return (addStringAsVariable "epub-cover-image")
+ (optEpubCoverImage opts)
+ >>=
+ (\vars -> case optHTMLMathMethod opts of
+ LaTeXMathML Nothing -> do
+ s <- UTF8.toString <$> readDataFile "LaTeXMathML.js"
+ return $ ("mathml-script", s) : vars
+ _ -> return vars)
+ >>=
+ (\vars -> if format == "dzslides"
+ then do
+ dztempl <- UTF8.toString <$> readDataFile
+ ("dzslides" </> "template.html")
+ let dzline = "<!-- {{{{ dzslides core"
+ let dzcore = unlines
+ $ dropWhile (not . (dzline `isPrefixOf`))
+ $ lines dztempl
+ return $ ("dzslides-core", dzcore) : vars
+ else return vars)
+
+ abbrevs <- (Set.fromList . filter (not . null) . lines) <$>
+ case optAbbreviations opts of
+ Nothing -> UTF8.toString <$> readDataFile "abbreviations"
+ Just f -> UTF8.toString <$> readFileStrict f
+
+ templ <- case optTemplate opts of
+ _ | not standalone -> return Nothing
+ Nothing -> Just <$> getDefaultTemplate format
+ Just tp -> do
+ -- strip off extensions
+ let tp' = case takeExtension tp of
+ "" -> tp <.> format
+ _ -> tp
+ Just . UTF8.toString <$>
+ (readFileStrict tp' `catchError`
+ (\e ->
+ case e of
+ PandocIOError _ e' |
+ isDoesNotExistError e' ->
+ readDataFile ("templates" </> tp')
+ _ -> throwError e))
+
+ metadata <- if format == "jats" &&
+ isNothing (lookup "csl" (optMetadata opts)) &&
+ isNothing (lookup "citation-style" (optMetadata opts))
+ then do
+ jatsCSL <- readDataFile "jats.csl"
+ let jatsEncoded = makeDataURI
+ ("application/xml", jatsCSL)
+ return $ ("csl", jatsEncoded) : optMetadata opts
+ else return $ optMetadata opts
+
+ case lookup "lang" (optMetadata opts) of
+ Just l -> case parseBCP47 l of
+ Left _ -> return ()
+ Right l' -> setTranslations l'
+ Nothing -> setTranslations $ Lang "en" "" "US" []
+
+ let writerOptions = def {
+ writerTemplate = templ
+ , writerVariables = variables
+ , writerTabStop = optTabStop opts
+ , writerTableOfContents = optTableOfContents opts
+ , writerHTMLMathMethod = optHTMLMathMethod opts
+ , writerIncremental = optIncremental opts
+ , writerCiteMethod = optCiteMethod opts
+ , writerNumberSections = optNumberSections opts
+ , writerNumberOffset = optNumberOffset opts
+ , writerSectionDivs = optSectionDivs opts
+ , writerExtensions = writerExts
+ , writerReferenceLinks = optReferenceLinks opts
+ , writerReferenceLocation = optReferenceLocation opts
+ , writerDpi = optDpi opts
+ , writerWrapText = optWrapText opts
+ , writerColumns = optColumns opts
+ , writerEmailObfuscation = optEmailObfuscation opts
+ , writerIdentifierPrefix = optIdentifierPrefix opts
+ , writerHtmlQTags = optHtmlQTags opts
+ , writerTopLevelDivision = optTopLevelDivision opts
+ , writerListings = optListings opts
+ , writerSlideLevel = optSlideLevel opts
+ , writerHighlightStyle = highlightStyle
+ , writerSetextHeaders = optSetextHeaders opts
+ , writerEpubSubdirectory = optEpubSubdirectory opts
+ , writerEpubMetadata = epubMetadata
+ , writerEpubFonts = optEpubFonts opts
+ , writerEpubChapterLevel = optEpubChapterLevel opts
+ , writerTOCDepth = optTOCDepth opts
+ , writerReferenceDoc = optReferenceDoc opts
+ , writerSyntaxMap = syntaxMap
+ }
+
+ let readerOpts = def{
+ readerStandalone = standalone
+ , readerColumns = optColumns opts
+ , readerTabStop = optTabStop opts
+ , readerIndentedCodeClasses = optIndentedCodeClasses opts
+ , readerDefaultImageExtension =
+ optDefaultImageExtension opts
+ , readerTrackChanges = optTrackChanges opts
+ , readerAbbreviations = abbrevs
+ , readerExtensions = readerExts
+ , readerStripComments = optStripComments opts
+ }
+
+ let transforms = (case optBaseHeaderLevel opts of
+ x | x > 1 -> (headerShift (x - 1) :)
+ | otherwise -> id) .
+ (if optStripEmptyParagraphs opts
+ then (stripEmptyParagraphs :)
+ else id) .
+ (if extensionEnabled Ext_east_asian_line_breaks
+ readerExts &&
+ not (extensionEnabled Ext_east_asian_line_breaks
+ writerExts &&
+ writerWrapText writerOptions == WrapPreserve)
+ then (eastAsianLineBreakFilter :)
+ else id) $
+ []
+
+ let sourceToDoc :: [FilePath] -> PandocIO Pandoc
+ sourceToDoc sources' =
+ case reader of
+ TextReader r
+ | optFileScope opts || readerName == "json" ->
+ mconcat <$> mapM (readSource >=> r readerOpts) sources
+ | otherwise ->
+ readSources sources' >>= r readerOpts
+ ByteStringReader r ->
+ mconcat <$> mapM (readFile' >=> r readerOpts) sources
+
+
+ when (readerName == "markdown_github" ||
+ writerName == "markdown_github") $
+ report $ Deprecated "markdown_github" "Use gfm instead."
+
+ setResourcePath (optResourcePath opts)
+ mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
+
+ doc <- sourceToDoc sources >>=
+ ( (if isJust (optExtractMedia opts)
+ then fillMediaBag
+ else return)
+ >=> return . addMetadata metadata
+ >=> applyTransforms transforms
+ >=> applyFilters readerOpts filters' [format]
+ >=> maybe return extractMedia (optExtractMedia opts)
+ )
+
+ case writer of
+ ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile
+ TextWriter f -> case maybePdfProg of
+ Just pdfProg -> do
+ res <- makePDF pdfProg (optPdfEngineArgs opts) f
+ writerOptions doc
+ case res of
+ Right pdf -> writeFnBinary outputFile pdf
+ Left err' -> liftIO $
+ E.throwIO $ PandocPDFError $
+ TL.unpack (TE.decodeUtf8With TE.lenientDecode err')
+
+ Nothing -> do
+ let htmlFormat = format `elem`
+ ["html","html4","html5","s5","slidy",
+ "slideous","dzslides","revealjs"]
+ handleEntities = if (htmlFormat ||
+ format == "docbook4" ||
+ format == "docbook5" ||
+ format == "docbook") && optAscii opts
+ then toEntities
+ else id
+ addNl = if standalone
+ then id
+ else (<> T.singleton '\n')
+ output <- (addNl . handleEntities) <$> f writerOptions doc
+ writerFn eol outputFile =<<
+ if optSelfContained opts && htmlFormat
+ -- TODO not maximally efficient; change type
+ -- of makeSelfContained so it works w/ Text
+ then T.pack <$> makeSelfContained (T.unpack output)
+ else return output
+
+type Transform = Pandoc -> Pandoc
+
+isTextFormat :: String -> Bool
+isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"]
+
+-- | Data structure for command line options.
+data Opt = Opt
+ { optTabStop :: Int -- ^ Number of spaces per tab
+ , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces
+ , optStandalone :: Bool -- ^ Include header, footer
+ , optReader :: Maybe String -- ^ Reader format
+ , optWriter :: Maybe String -- ^ Writer format
+ , optTableOfContents :: Bool -- ^ Include table of contents
+ , optBaseHeaderLevel :: Int -- ^ Base header level
+ , optTemplate :: Maybe FilePath -- ^ Custom template
+ , optVariables :: [(String,String)] -- ^ Template variables to set
+ , optMetadata :: [(String, String)] -- ^ Metadata fields to set
+ , optOutputFile :: Maybe FilePath -- ^ Name of output file
+ , optInputFiles :: [FilePath] -- ^ Names of input files
+ , optNumberSections :: Bool -- ^ Number sections in LaTeX
+ , optNumberOffset :: [Int] -- ^ Starting number for sections
+ , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5
+ , optSelfContained :: Bool -- ^ Make HTML accessible offline
+ , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML
+ , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code
+ , optSyntaxDefinitions :: [FilePath] -- ^ xml syntax defs to load
+ , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions
+ , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
+ , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file
+ , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
+ , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container
+ , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
+ , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
+ , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters
+ , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub
+ , optTOCDepth :: Int -- ^ Number of levels to include in TOC
+ , optDumpArgs :: Bool -- ^ Output command-line arguments
+ , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
+ , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output
+ , optTrace :: Bool -- ^ Enable tracing
+ , optLogFile :: Maybe FilePath -- ^ File to write JSON log output
+ , optFailIfWarnings :: Bool -- ^ Fail on warnings
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output
+ , optDpi :: Int -- ^ Dpi
+ , optWrapText :: WrapOption -- ^ Options for wrapping text
+ , optColumns :: Int -- ^ Line length in characters
+ , optFilters :: [Filter] -- ^ Filters to apply
+ , optEmailObfuscation :: ObfuscationMethod
+ , optIdentifierPrefix :: String
+ , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs
+ , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
+ , optDataDir :: Maybe FilePath
+ , optCiteMethod :: CiteMethod -- ^ Method to output cites
+ , optListings :: Bool -- ^ Use listings package for code blocks
+ , optPdfEngine :: Maybe String -- ^ Program to use for latex/html -> pdf
+ , optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine
+ , optSlideLevel :: Maybe Int -- ^ Header level that creates slides
+ , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
+ , optAscii :: Bool -- ^ Use ascii characters only in html
+ , optDefaultImageExtension :: String -- ^ Default image extension
+ , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
+ , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
+ , optFileScope :: Bool -- ^ Parse input files before combining
+ , optTitlePrefix :: Maybe String -- ^ Prefix for title
+ , optCss :: [FilePath] -- ^ CSS files to link to
+ , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
+ , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
+ , optIncludeInHeader :: [FilePath] -- ^ Files to include in header
+ , optResourcePath :: [FilePath] -- ^ Path to search for images etc
+ , optRequestHeaders :: [(String, String)] -- ^ Headers for HTTP requests
+ , optEol :: LineEnding -- ^ Style of line-endings to use
+ , optStripComments :: Bool -- ^ Skip HTML comments
+ } deriving (Generic, Show)
+
+-- | Defaults for command-line options.
+defaultOpts :: Opt
+defaultOpts = Opt
+ { optTabStop = 4
+ , optPreserveTabs = False
+ , optStandalone = False
+ , optReader = Nothing
+ , optWriter = Nothing
+ , optTableOfContents = False
+ , optBaseHeaderLevel = 1
+ , optTemplate = Nothing
+ , optVariables = []
+ , optMetadata = []
+ , optOutputFile = Nothing
+ , optInputFiles = []
+ , optNumberSections = False
+ , optNumberOffset = [0,0,0,0,0,0]
+ , optSectionDivs = False
+ , optIncremental = False
+ , optSelfContained = False
+ , optHtmlQTags = False
+ , optHighlightStyle = Just "pygments"
+ , optSyntaxDefinitions = []
+ , optTopLevelDivision = TopLevelDefault
+ , optHTMLMathMethod = PlainMath
+ , optAbbreviations = Nothing
+ , optReferenceDoc = Nothing
+ , optEpubSubdirectory = "EPUB"
+ , optEpubMetadata = Nothing
+ , optEpubFonts = []
+ , optEpubChapterLevel = 1
+ , optEpubCoverImage = Nothing
+ , optTOCDepth = 3
+ , optDumpArgs = False
+ , optIgnoreArgs = False
+ , optVerbosity = WARNING
+ , optTrace = False
+ , optLogFile = Nothing
+ , optFailIfWarnings = False
+ , optReferenceLinks = False
+ , optReferenceLocation = EndOfDocument
+ , optDpi = 96
+ , optWrapText = WrapAuto
+ , optColumns = 72
+ , optFilters = []
+ , optEmailObfuscation = NoObfuscation
+ , optIdentifierPrefix = ""
+ , optStripEmptyParagraphs = False
+ , optIndentedCodeClasses = []
+ , optDataDir = Nothing
+ , optCiteMethod = Citeproc
+ , optListings = False
+ , optPdfEngine = Nothing
+ , optPdfEngineArgs = []
+ , optSlideLevel = Nothing
+ , optSetextHeaders = True
+ , optAscii = False
+ , optDefaultImageExtension = ""
+ , optExtractMedia = Nothing
+ , optTrackChanges = AcceptChanges
+ , optFileScope = False
+ , optTitlePrefix = Nothing
+ , optCss = []
+ , optIncludeBeforeBody = []
+ , optIncludeAfterBody = []
+ , optIncludeInHeader = []
+ , optResourcePath = ["."]
+ , optRequestHeaders = []
+ , optEol = Native
+ , optStripComments = False
+ }
+
+addMetadata :: [(String, String)] -> Pandoc -> Pandoc
+addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs
+
+addMeta :: (String, String) -> Pandoc -> Pandoc
+addMeta (k, v) (Pandoc meta bs) = Pandoc meta' bs
+ where meta' = case lookupMeta k meta of
+ Nothing -> setMeta k v' meta
+ Just (MetaList xs) ->
+ setMeta k (MetaList (xs ++ [v'])) meta
+ Just x -> setMeta k (MetaList [x, v']) meta
+ v' = readMetaValue v
+
+removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc
+removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs
+
+readMetaValue :: String -> MetaValue
+readMetaValue s = case decode (UTF8.fromString s) of
+ Just (Yaml.String t) -> MetaString $ T.unpack t
+ Just (Yaml.Bool b) -> MetaBool b
+ _ -> MetaString s
+
+-- Determine default reader based on source file extensions
+defaultReaderName :: String -> [FilePath] -> String
+defaultReaderName fallback [] = fallback
+defaultReaderName fallback (x:xs) =
+ case takeExtension (map toLower x) of
+ ".xhtml" -> "html"
+ ".html" -> "html"
+ ".htm" -> "html"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".muse" -> "muse"
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".rst" -> "rst"
+ ".org" -> "org"
+ ".lhs" -> "markdown+lhs"
+ ".db" -> "docbook"
+ ".opml" -> "opml"
+ ".wiki" -> "mediawiki"
+ ".dokuwiki" -> "dokuwiki"
+ ".textile" -> "textile"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".docx" -> "docx"
+ ".t2t" -> "t2t"
+ ".epub" -> "epub"
+ ".odt" -> "odt"
+ ".pdf" -> "pdf" -- so we get an "unknown reader" error
+ ".doc" -> "doc" -- so we get an "unknown reader" error
+ _ -> defaultReaderName fallback xs
+
+-- Determine default writer based on output file extension
+defaultWriterName :: FilePath -> String
+defaultWriterName "-" = "html" -- no output file
+defaultWriterName x =
+ case takeExtension (map toLower x) of
+ "" -> "markdown" -- empty extension
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".context" -> "context"
+ ".ctx" -> "context"
+ ".rtf" -> "rtf"
+ ".rst" -> "rst"
+ ".s5" -> "s5"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".txt" -> "markdown"
+ ".text" -> "markdown"
+ ".md" -> "markdown"
+ ".muse" -> "muse"
+ ".markdown" -> "markdown"
+ ".textile" -> "textile"
+ ".lhs" -> "markdown+lhs"
+ ".texi" -> "texinfo"
+ ".texinfo" -> "texinfo"
+ ".db" -> "docbook"
+ ".odt" -> "odt"
+ ".docx" -> "docx"
+ ".epub" -> "epub"
+ ".org" -> "org"
+ ".asciidoc" -> "asciidoc"
+ ".adoc" -> "asciidoc"
+ ".fb2" -> "fb2"
+ ".opml" -> "opml"
+ ".icml" -> "icml"
+ ".tei.xml" -> "tei"
+ ".tei" -> "tei"
+ ".ms" -> "ms"
+ ".roff" -> "ms"
+ ".pptx" -> "pptx"
+ ['.',y] | y `elem` ['1'..'9'] -> "man"
+ _ -> "html"
+
+-- Transformations of a Pandoc document post-parsing:
+
+applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
+applyTransforms transforms d = return $ foldr ($) d transforms
+
+readSource :: FilePath -> PandocIO Text
+readSource "-" = liftIO (UTF8.toText <$> BS.getContents)
+readSource src = case parseURI src of
+ Just u | uriScheme u `elem` ["http:","https:"] ->
+ readURI src
+ | uriScheme u == "file:" ->
+ liftIO $ UTF8.toText <$>
+ BS.readFile (uriPath u)
+ _ -> liftIO $ UTF8.toText <$>
+ BS.readFile src
+
+readURI :: FilePath -> PandocIO Text
+readURI src = UTF8.toText . fst <$> openURL src
+
+readFile' :: MonadIO m => FilePath -> m B.ByteString
+readFile' "-" = liftIO B.getContents
+readFile' f = liftIO $ B.readFile f
+
+writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m ()
+writeFnBinary "-" = liftIO . B.putStr
+writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f)
+
+writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
+-- TODO this implementation isn't maximally efficient:
+writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
+writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack
+
+lookupHighlightStyle :: Maybe String -> IO (Maybe Style)
+lookupHighlightStyle Nothing = return Nothing
+lookupHighlightStyle (Just s)
+ | takeExtension s == ".theme" = -- attempt to load KDE theme
+ do contents <- B.readFile s
+ case parseTheme contents of
+ Left _ -> E.throwIO $ PandocOptionError $
+ "Could not read highlighting theme " ++ s
+ Right sty -> return (Just sty)
+ | otherwise =
+ case lookup (map toLower s) highlightingStyles of
+ Just sty -> return (Just sty)
+ Nothing -> E.throwIO $ PandocOptionError $
+ "Unknown highlight-style " ++ s
+
+-- | A list of functions, each transforming the options data structure
+-- in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
+ [ Option "fr" ["from","read"]
+ (ReqArg
+ (\arg opt -> return opt { optReader =
+ Just (map toLower arg) })
+ "FORMAT")
+ ""
+
+ , Option "tw" ["to","write"]
+ (ReqArg
+ (\arg opt -> return opt { optWriter =
+ Just (map toLower arg) })
+ "FORMAT")
+ ""
+
+ , Option "o" ["output"]
+ (ReqArg
+ (\arg opt -> return opt { optOutputFile = Just arg })
+ "FILE")
+ "" -- "Name of output file"
+
+ , Option "" ["data-dir"]
+ (ReqArg
+ (\arg opt -> return opt { optDataDir = Just arg })
+ "DIRECTORY") -- "Directory containing pandoc data files."
+ ""
+
+ , Option "" ["base-header-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 && t < 6 ->
+ return opt{ optBaseHeaderLevel = t }
+ _ -> E.throwIO $ PandocOptionError
+ "base-header-level must be 1-5")
+ "NUMBER")
+ "" -- "Headers base level"
+
+ , Option "" ["strip-empty-paragraphs"]
+ (NoArg
+ (\opt -> do
+ deprecatedOption "--stripEmptyParagraphs"
+ "Use +empty_paragraphs extension."
+ return opt{ optStripEmptyParagraphs = True }))
+ "" -- "Strip empty paragraphs"
+
+ , Option "" ["indented-code-classes"]
+ (ReqArg
+ (\arg opt -> return opt { optIndentedCodeClasses = words $
+ map (\c -> if c == ',' then ' ' else c) arg })
+ "STRING")
+ "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
+
+ , Option "F" ["filter"]
+ (ReqArg
+ (\arg opt -> return opt { optFilters =
+ JSONFilter arg : optFilters opt })
+ "PROGRAM")
+ "" -- "External JSON filter"
+
+ , Option "" ["lua-filter"]
+ (ReqArg
+ (\arg opt -> return opt { optFilters =
+ LuaFilter arg : optFilters opt })
+ "SCRIPTPATH")
+ "" -- "Lua filter"
+
+ , Option "p" ["preserve-tabs"]
+ (NoArg
+ (\opt -> return opt { optPreserveTabs = True }))
+ "" -- "Preserve tabs instead of converting to spaces"
+
+ , Option "" ["tab-stop"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optTabStop = t }
+ _ -> E.throwIO $ PandocOptionError
+ "tab-stop must be a number greater than 0")
+ "NUMBER")
+ "" -- "Tab stop (default 4)"
+
+ , Option "" ["track-changes"]
+ (ReqArg
+ (\arg opt -> do
+ action <- case arg of
+ "accept" -> return AcceptChanges
+ "reject" -> return RejectChanges
+ "all" -> return AllChanges
+ _ -> E.throwIO $ PandocOptionError
+ ("Unknown option for track-changes: " ++ arg)
+ return opt { optTrackChanges = action })
+ "accept|reject|all")
+ "" -- "Accepting or reject MS Word track-changes.""
+
+ , Option "" ["file-scope"]
+ (NoArg
+ (\opt -> return opt { optFileScope = True }))
+ "" -- "Parse input files before combining"
+
+ , Option "" ["extract-media"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optExtractMedia = Just arg })
+ "PATH")
+ "" -- "Directory to which to extract embedded media"
+
+ , Option "s" ["standalone"]
+ (NoArg
+ (\opt -> return opt { optStandalone = True }))
+ "" -- "Include needed header and footer on output"
+
+ , Option "" ["template"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optTemplate = Just arg,
+ optStandalone = True })
+ "FILE")
+ "" -- "Use custom template"
+
+ , Option "M" ["metadata"]
+ (ReqArg
+ (\arg opt -> do
+ let (key, val) = splitField arg
+ return opt{ optMetadata = (key, val) : optMetadata opt })
+ "KEY[:VALUE]")
+ ""
+
+ , Option "V" ["variable"]
+ (ReqArg
+ (\arg opt -> do
+ let (key, val) = splitField arg
+ return opt{ optVariables = (key, val) : optVariables opt })
+ "KEY[:VALUE]")
+ ""
+
+ , Option "D" ["print-default-template"]
+ (ReqArg
+ (\arg _ -> do
+ templ <- runIO $ do
+ setUserDataDir Nothing
+ getDefaultTemplate arg
+ case templ of
+ Right t -> UTF8.hPutStr stdout t
+ Left e -> E.throwIO e
+ exitSuccess)
+ "FORMAT")
+ "" -- "Print default template for FORMAT"
+
+ , Option "" ["print-default-data-file"]
+ (ReqArg
+ (\arg _ -> do
+ runIOorExplode $
+ readDefaultDataFile arg >>= liftIO . BS.hPutStr stdout
+ exitSuccess)
+ "FILE")
+ "" -- "Print default data file"
+
+ , Option "" ["print-highlight-style"]
+ (ReqArg
+ (\arg _ -> do
+ sty <- fromMaybe pygments <$>
+ lookupHighlightStyle (Just arg)
+ B.putStr $ encodePretty'
+ defConfig{confIndent = Spaces 4
+ ,confCompare = keyOrder
+ (map T.pack
+ ["text-color"
+ ,"background-color"
+ ,"line-number-color"
+ ,"line-number-background-color"
+ ,"bold"
+ ,"italic"
+ ,"underline"
+ ,"text-styles"])
+ ,confNumFormat = Generic
+ ,confTrailingNewline = True} sty
+ exitSuccess)
+ "STYLE|FILE")
+ "" -- "Print default template for FORMAT"
+
+ , Option "" ["dpi"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optDpi = t }
+ _ -> E.throwIO $ PandocOptionError
+ "dpi must be a number greater than 0")
+ "NUMBER")
+ "" -- "Dpi (default 96)"
+
+ , Option "" ["eol"]
+ (ReqArg
+ (\arg opt ->
+ case toLower <$> arg of
+ "crlf" -> return opt { optEol = CRLF }
+ "lf" -> return opt { optEol = LF }
+ "native" -> return opt { optEol = Native }
+ -- mac-syntax (cr) is not supported in ghc-base.
+ _ -> E.throwIO $ PandocOptionError
+ "--eol must be crlf, lf, or native")
+ "crlf|lf|native")
+ "" -- "EOL (default OS-dependent)"
+
+ , Option "" ["wrap"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of
+ Just o -> return opt { optWrapText = o }
+ Nothing -> E.throwIO $ PandocOptionError
+ "--wrap must be auto, none, or preserve")
+ "auto|none|preserve")
+ "" -- "Option for wrapping text in output"
+
+ , Option "" ["columns"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optColumns = t }
+ _ -> E.throwIO $ PandocOptionError
+ "columns must be a number greater than 0")
+ "NUMBER")
+ "" -- "Length of line in characters"
+
+ , Option "" ["strip-comments"]
+ (NoArg
+ (\opt -> return opt { optStripComments = True }))
+ "" -- "Strip HTML comments"
+
+ , Option "" ["toc", "table-of-contents"]
+ (NoArg
+ (\opt -> return opt { optTableOfContents = True }))
+ "" -- "Include table of contents"
+
+ , Option "" ["toc-depth"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optTOCDepth = t }
+ _ -> E.throwIO $ PandocOptionError
+ "TOC level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Number of levels to include in TOC"
+
+ , Option "" ["no-highlight"]
+ (NoArg
+ (\opt -> return opt { optHighlightStyle = Nothing }))
+ "" -- "Don't highlight source code"
+
+ , Option "" ["highlight-style"]
+ (ReqArg
+ (\arg opt -> return opt{ optHighlightStyle = Just arg })
+ "STYLE|FILE")
+ "" -- "Style for highlighted code"
+
+ , Option "" ["syntax-definition"]
+ (ReqArg
+ (\arg opt -> return opt{ optSyntaxDefinitions = arg :
+ optSyntaxDefinitions opt })
+ "FILE")
+ "" -- "Syntax definition (xml) file"
+
+ , Option "H" ["include-in-header"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeInHeader =
+ arg : optIncludeInHeader opt,
+ optStandalone = True })
+ "FILE")
+ "" -- "File to include at end of header (implies -s)"
+
+ , Option "B" ["include-before-body"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeBeforeBody =
+ arg : optIncludeBeforeBody opt,
+ optStandalone = True })
+ "FILE")
+ "" -- "File to include before document body"
+
+ , Option "A" ["include-after-body"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeAfterBody =
+ arg : optIncludeAfterBody opt,
+ optStandalone = True })
+ "FILE")
+ "" -- "File to include after document body"
+
+ , Option "" ["resource-path"]
+ (ReqArg
+ (\arg opt -> return opt { optResourcePath =
+ splitSearchPath arg })
+ "SEARCHPATH")
+ "" -- "Paths to search for images and other resources"
+
+ , Option "" ["request-header"]
+ (ReqArg
+ (\arg opt -> do
+ let (key, val) = splitField arg
+ return opt{ optRequestHeaders =
+ (key, val) : optRequestHeaders opt })
+ "NAME:VALUE")
+ ""
+
+ , Option "" ["self-contained"]
+ (NoArg
+ (\opt -> return opt { optSelfContained = True,
+ optStandalone = True }))
+ "" -- "Make slide shows include all the needed js and css"
+
+ , Option "" ["html-q-tags"]
+ (NoArg
+ (\opt ->
+ return opt { optHtmlQTags = True }))
+ "" -- "Use <q> tags for quotes in HTML"
+
+ , Option "" ["ascii"]
+ (NoArg
+ (\opt -> return opt { optAscii = True }))
+ "" -- "Use ascii characters only in HTML output"
+
+ , Option "" ["reference-links"]
+ (NoArg
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
+
+ , Option "" ["reference-location"]
+ (ReqArg
+ (\arg opt -> do
+ action <- case arg of
+ "block" -> return EndOfBlock
+ "section" -> return EndOfSection
+ "document" -> return EndOfDocument
+ _ -> E.throwIO $ PandocOptionError
+ ("Unknown option for reference-location: " ++ arg)
+ return opt { optReferenceLocation = action })
+ "block|section|document")
+ "" -- "Accepting or reject MS Word track-changes.""
+
+ , Option "" ["atx-headers"]
+ (NoArg
+ (\opt -> return opt { optSetextHeaders = False } ))
+ "" -- "Use atx-style headers for markdown"
+
+ , Option "" ["top-level-division"]
+ (ReqArg
+ (\arg opt -> do
+ let tldName = "TopLevel" ++ uppercaseFirstLetter arg
+ case safeRead tldName of
+ Just tlDiv -> return opt { optTopLevelDivision = tlDiv }
+ _ -> E.throwIO $ PandocOptionError
+ ("Top-level division must be " ++
+ "section, chapter, part, or default"))
+ "section|chapter|part")
+ "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook"
+
+ , Option "N" ["number-sections"]
+ (NoArg
+ (\opt -> return opt { optNumberSections = True }))
+ "" -- "Number sections in LaTeX"
+
+ , Option "" ["number-offset"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead ('[':arg ++ "]") of
+ Just ns -> return opt { optNumberOffset = ns,
+ optNumberSections = True }
+ _ -> E.throwIO $ PandocOptionError
+ "could not parse number-offset")
+ "NUMBERS")
+ "" -- "Starting number for sections, subsections, etc."
+
+ , Option "" ["listings"]
+ (NoArg
+ (\opt -> return opt { optListings = True }))
+ "" -- "Use listings package for LaTeX code blocks"
+
+ , Option "i" ["incremental"]
+ (NoArg
+ (\opt -> return opt { optIncremental = True }))
+ "" -- "Make list items display incrementally in Slidy/Slideous/S5"
+
+ , Option "" ["slide-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optSlideLevel = Just t }
+ _ -> E.throwIO $ PandocOptionError
+ "slide level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Force header level for slides"
+
+ , Option "" ["section-divs"]
+ (NoArg
+ (\opt -> return opt { optSectionDivs = True }))
+ "" -- "Put sections in div tags in HTML"
+
+ , Option "" ["default-image-extension"]
+ (ReqArg
+ (\arg opt -> return opt { optDefaultImageExtension = arg })
+ "extension")
+ "" -- "Default extension for extensionless images"
+
+ , Option "" ["email-obfuscation"]
+ (ReqArg
+ (\arg opt -> do
+ method <- case arg of
+ "references" -> return ReferenceObfuscation
+ "javascript" -> return JavascriptObfuscation
+ "none" -> return NoObfuscation
+ _ -> E.throwIO $ PandocOptionError
+ ("Unknown obfuscation method: " ++ arg)
+ return opt { optEmailObfuscation = method })
+ "none|javascript|references")
+ "" -- "Method for obfuscating email in HTML"
+
+ , Option "" ["id-prefix"]
+ (ReqArg
+ (\arg opt -> return opt { optIdentifierPrefix = arg })
+ "STRING")
+ "" -- "Prefix to add to automatically generated HTML identifiers"
+
+ , Option "T" ["title-prefix"]
+ (ReqArg
+ (\arg opt -> do
+ let newvars = ("title-prefix", arg) : optVariables opt
+ return opt { optVariables = newvars,
+ optStandalone = True })
+ "STRING")
+ "" -- "String to prefix to HTML window title"
+
+ , Option "c" ["css"]
+ (ReqArg
+ (\arg opt -> return opt{ optCss = arg : optCss opt })
+ -- add new link to end, so it is included in proper order
+ "URL")
+ "" -- "Link to CSS style sheet"
+
+ , Option "" ["reference-doc"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optReferenceDoc = Just arg })
+ "FILE")
+ "" -- "Path of custom reference doc"
+
+ , Option "" ["epub-subdirectory"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optEpubSubdirectory = arg })
+ "DIRNAME")
+ "" -- "Name of subdirectory for epub content in OCF container"
+
+ , Option "" ["epub-cover-image"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optVariables =
+ ("epub-cover-image", arg) : optVariables opt })
+ "FILE")
+ "" -- "Path of epub cover image"
+
+ , Option "" ["epub-metadata"]
+ (ReqArg
+ (\arg opt -> return opt { optEpubMetadata = Just arg })
+ "FILE")
+ "" -- "Path of epub metadata file"
+
+ , Option "" ["epub-embed-font"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optEpubFonts = arg : optEpubFonts opt })
+ "FILE")
+ "" -- "Directory of fonts to embed"
+
+ , Option "" ["epub-chapter-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optEpubChapterLevel = t }
+ _ -> E.throwIO $ PandocOptionError
+ "chapter level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Header level at which to split chapters in EPUB"
+
+ , Option "" ["pdf-engine"]
+ (ReqArg
+ (\arg opt -> do
+ let b = takeBaseName arg
+ if b `elem` pdfEngines
+ then return opt { optPdfEngine = Just arg }
+ else E.throwIO $ PandocOptionError $ "pdf-engine must be one of "
+ ++ intercalate ", " pdfEngines)
+ "PROGRAM")
+ "" -- "Name of program to use in generating PDF"
+
+ , Option "" ["pdf-engine-opt"]
+ (ReqArg
+ (\arg opt -> do
+ let oldArgs = optPdfEngineArgs opt
+ return opt { optPdfEngineArgs = oldArgs ++ [arg]})
+ "STRING")
+ "" -- "Flags to pass to the PDF-engine, all instances of this option are accumulated and used"
+
+ , Option "" ["bibliography"]
+ (ReqArg
+ (\arg opt -> return opt{ optMetadata =
+ ("bibliography", arg) : optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["csl"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optMetadata =
+ ("csl", arg) : optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["citation-abbreviations"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optMetadata =
+ ("citation-abbreviations", arg): optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["natbib"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Natbib }))
+ "" -- "Use natbib cite commands in LaTeX output"
+
+ , Option "" ["biblatex"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Biblatex }))
+ "" -- "Use biblatex cite commands in LaTeX output"
+
+ , Option "" ["mathml"]
+ (NoArg
+ (\opt ->
+ return opt { optHTMLMathMethod = MathML }))
+ "" -- "Use mathml for HTML math"
+
+ , Option "" ["webtex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use web service for HTML math"
+
+ , Option "" ["mathjax"]
+ (OptArg
+ (\arg opt -> do
+ let url' = fromMaybe (defaultMathJaxURL ++
+ "MathJax.js?config=TeX-AMS_CHTML-full") arg
+ return opt { optHTMLMathMethod = MathJax url'})
+ "URL")
+ "" -- "Use MathJax for HTML math"
+
+ , Option "" ["katex"]
+ (OptArg
+ (\arg opt ->
+ return opt
+ { optHTMLMathMethod = KaTeX $
+ fromMaybe defaultKaTeXURL arg })
+ "URL")
+ "" -- Use KaTeX for HTML Math
+
+ , Option "m" ["latexmathml", "asciimathml"]
+ (OptArg
+ (\arg opt -> do
+ deprecatedOption "--latexmathml, --asciimathml, -m" ""
+ return opt { optHTMLMathMethod = LaTeXMathML arg })
+ "URL")
+ "" -- "Use LaTeXMathML script in html output"
+
+ , Option "" ["mimetex"]
+ (OptArg
+ (\arg opt -> do
+ deprecatedOption "--mimetex" ""
+ let url' = case arg of
+ Just u -> u ++ "?"
+ Nothing -> "/cgi-bin/mimetex.cgi?"
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use mimetex for HTML math"
+
+ , Option "" ["jsmath"]
+ (OptArg
+ (\arg opt -> do
+ deprecatedOption "--jsmath" ""
+ return opt { optHTMLMathMethod = JsMath arg})
+ "URL")
+ "" -- "Use jsMath for HTML math"
+
+ , Option "" ["gladtex"]
+ (NoArg
+ (\opt -> do
+ deprecatedOption "--gladtex" ""
+ return opt { optHTMLMathMethod = GladTeX }))
+ "" -- "Use gladtex for HTML math"
+
+ , Option "" ["abbreviations"]
+ (ReqArg
+ (\arg opt -> return opt { optAbbreviations = Just arg })
+ "FILE")
+ "" -- "Specify file for custom abbreviations"
+
+ , Option "" ["trace"]
+ (NoArg
+ (\opt -> return opt { optTrace = True }))
+ "" -- "Turn on diagnostic tracing in readers."
+
+ , Option "" ["dump-args"]
+ (NoArg
+ (\opt -> return opt { optDumpArgs = True }))
+ "" -- "Print output filename and arguments to stdout."
+
+ , Option "" ["ignore-args"]
+ (NoArg
+ (\opt -> return opt { optIgnoreArgs = True }))
+ "" -- "Ignore command-line arguments."
+
+ , Option "" ["verbose"]
+ (NoArg
+ (\opt -> return opt { optVerbosity = INFO }))
+ "" -- "Verbose diagnostic output."
+
+ , Option "" ["quiet"]
+ (NoArg
+ (\opt -> return opt { optVerbosity = ERROR }))
+ "" -- "Suppress warnings."
+
+ , Option "" ["fail-if-warnings"]
+ (NoArg
+ (\opt -> return opt { optFailIfWarnings = True }))
+ "" -- "Exit with error status if there were warnings."
+
+ , Option "" ["log"]
+ (ReqArg
+ (\arg opt -> return opt{ optLogFile = Just arg })
+ "FILE")
+ "" -- "Log messages in JSON format to this file."
+
+ , Option "" ["bash-completion"]
+ (NoArg
+ (\_ -> do
+ ddir <- getDataDir
+ tpl <- runIOorExplode $
+ UTF8.toString <$>
+ readDefaultDataFile "bash_completion.tpl"
+ let optnames (Option shorts longs _ _) =
+ map (\c -> ['-',c]) shorts ++
+ map ("--" ++) longs
+ let allopts = unwords (concatMap optnames options)
+ UTF8.hPutStrLn stdout $ printf tpl allopts
+ (unwords readersNames)
+ (unwords writersNames)
+ (unwords $ map fst highlightingStyles)
+ ddir
+ exitSuccess ))
+ "" -- "Print bash completion script"
+
+ , Option "" ["list-input-formats"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout) readersNames
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-output-formats"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout) writersNames
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-extensions"]
+ (OptArg
+ (\arg _ -> do
+ let exts = getDefaultExtensions (fromMaybe "markdown" arg)
+ let showExt x = (if extensionEnabled x exts
+ then '+'
+ else '-') : drop 4 (show x)
+ mapM_ (UTF8.hPutStrLn stdout . showExt)
+ ([minBound..maxBound] :: [Extension])
+ exitSuccess )
+ "FORMAT")
+ ""
+
+ , Option "" ["list-highlight-languages"]
+ (NoArg
+ (\_ -> do
+ let langs = [ T.unpack (T.toLower (sShortname s))
+ | s <- M.elems defaultSyntaxMap
+ , sShortname s `notElem`
+ [T.pack "Alert", T.pack "Alert_indent"]
+ ]
+ mapM_ (UTF8.hPutStrLn stdout) langs
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-highlight-styles"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
+ exitSuccess ))
+ ""
+
+ , Option "v" ["version"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ defaultDatadir <- E.catch
+ (getAppUserDataDirectory "pandoc")
+ (\e -> let _ = (e :: E.SomeException)
+ in return "")
+ UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++
+ compileInfo ++ "\nDefault user data directory: " ++
+ defaultDatadir ++ copyrightMessage)
+ exitSuccess ))
+ "" -- "Print version"
+
+ , Option "h" ["help"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ UTF8.hPutStr stdout (usageMessage prg options)
+ exitSuccess ))
+ "" -- "Show help"
+
+ ]
+
+-- Returns usage message
+usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
+usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
+
+copyrightMessage :: String
+copyrightMessage = intercalate "\n" [
+ "",
+ "Copyright (C) 2006-2018 John MacFarlane",
+ "Web: http://pandoc.org",
+ "This is free software; see the source for copying conditions.",
+ "There is no warranty, not even for merchantability or fitness",
+ "for a particular purpose." ]
+
+compileInfo :: String
+compileInfo =
+ "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++
+ VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting
+
+handleUnrecognizedOption :: String -> [String] -> [String]
+handleUnrecognizedOption "--smart" =
+ (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++
+ "For example: pandoc -f markdown+smart -t markdown-smart.") :)
+handleUnrecognizedOption "--normalize" =
+ ("--normalize has been removed. Normalization is now automatic." :)
+handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
+handleUnrecognizedOption "--old-dashes" =
+ ("--old-dashes has been removed. Use +old_dashes extension instead." :)
+handleUnrecognizedOption "--no-wrap" =
+ ("--no-wrap has been removed. Use --wrap=none instead." :)
+handleUnrecognizedOption "--latex-engine" =
+ ("--latex-engine has been removed. Use --pdf-engine instead." :)
+handleUnrecognizedOption "--latex-engine-opt" =
+ ("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :)
+handleUnrecognizedOption "--chapters" =
+ ("--chapters has been removed. Use --top-level-division=chapter instead." :)
+handleUnrecognizedOption "--reference-docx" =
+ ("--reference-docx has been removed. Use --reference-doc instead." :)
+handleUnrecognizedOption "--reference-odt" =
+ ("--reference-odt has been removed. Use --reference-doc instead." :)
+handleUnrecognizedOption "--parse-raw" =
+ ("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n" :)
+handleUnrecognizedOption "--epub-stylesheet" =
+ ("--epub-stylesheet has been removed. Use --css instead.\n" :)
+handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
+handleUnrecognizedOption x =
+ (("Unknown option " ++ x ++ ".") :)
+
+uppercaseFirstLetter :: String -> String
+uppercaseFirstLetter (c:cs) = toUpper c : cs
+uppercaseFirstLetter [] = []
+
+readersNames :: [String]
+readersNames = sort (map fst (readers :: [(String, Reader PandocIO)]))
+
+writersNames :: [String]
+writersNames = sort (map fst (writers :: [(String, Writer PandocIO)]))
+
+splitField :: String -> (String, String)
+splitField s =
+ case break (`elem` ":=") s of
+ (k,_:v) -> (k,v)
+ (k,[]) -> (k,"true")
+
+baseWriterName :: String -> String
+baseWriterName = takeWhile (\c -> c /= '+' && c /= '-')
+
+deprecatedOption :: String -> String -> IO ()
+deprecatedOption o msg =
+ runIO (report $ Deprecated o msg) >>=
+ \r -> case r of
+ Right () -> return ()
+ Left e -> E.throwIO e
+
+-- see https://github.com/jgm/pandoc/pull/4083
+-- using generic deriving caused long compilation times
+$(deriveJSON defaultOptions ''LineEnding)
+$(deriveJSON defaultOptions ''Opt)
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 8eb1ba663..11d3eddac 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Asciify
- Copyright : Copyright (C) 2013-2016 John MacFarlane
+ Copyright : Copyright (C) 2013-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,8 +30,8 @@ ascii equivalents (used in constructing HTML identifiers).
-}
module Text.Pandoc.Asciify (toAsciiChar)
where
-import qualified Data.Map as M
import Data.Char (isAscii)
+import qualified Data.Map as M
toAsciiChar :: Char -> Maybe Char
toAsciiChar c | isAscii c = Just c
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
new file mode 100644
index 000000000..2dd825142
--- /dev/null
+++ b/src/Text/Pandoc/BCP47.hs
@@ -0,0 +1,125 @@
+{-
+Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.BCP47
+ Copyright : Copyright (C) 2017–2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing and rendering BCP47 language identifiers.
+-}
+module Text.Pandoc.BCP47 (
+ getLang
+ , parseBCP47
+ , Lang(..)
+ , renderLang
+ )
+where
+import Control.Monad (guard)
+import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower,
+ toUpper)
+import Data.List (intercalate)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import qualified Text.Parsec as P
+
+-- | Represents BCP 47 language/country code.
+data Lang = Lang{ langLanguage :: String
+ , langScript :: String
+ , langRegion :: String
+ , langVariants :: [String] }
+ deriving (Eq, Ord, Show)
+
+-- | Render a Lang as BCP 47.
+renderLang :: Lang -> String
+renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
+ ([langScript lang, langRegion lang] ++ langVariants lang))
+
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: WriterOptions -> Meta -> Maybe String
+getLang opts meta =
+ case lookup "lang" (writerVariables opts) of
+ Just s -> Just s
+ _ ->
+ case lookupMeta "lang" meta of
+ Just (MetaInlines [Str s]) -> Just s
+ Just (MetaString s) -> Just s
+ _ -> Nothing
+
+-- | Parse a BCP 47 string as a Lang. Currently we parse
+-- extensions and private-use fields as "variants," even
+-- though officially they aren't.
+parseBCP47 :: String -> Either String Lang
+parseBCP47 lang =
+ case P.parse bcp47 "lang" lang of
+ Right r -> Right r
+ Left e -> Left $ show e
+ where bcp47 = do
+ language <- pLanguage
+ script <- P.option "" pScript
+ region <- P.option "" pRegion
+ variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
+ P.eof
+ return Lang{ langLanguage = language
+ , langScript = script
+ , langRegion = region
+ , langVariants = variants }
+ asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
+ pLanguage = do
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return $ map toLower cs
+ pScript = P.try $ do
+ P.char '-'
+ x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
+ xs <- P.count 3
+ (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
+ return $ map toLower (x:xs)
+ pRegion = P.try $ do
+ P.char '-'
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return $ map toUpper cs
+ pVariant = P.try $ do
+ P.char '-'
+ ds <- P.option "" (P.count 1 P.digit)
+ cs <- P.many1 asciiLetter
+ let var = ds ++ cs
+ guard $ if null ds
+ then length var >= 5 && length var <= 8
+ else length var == 4
+ return $ map toLower var
+ pExtension = P.try $ do
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ length cs >= 2 && length cs <= 8
+ return $ map toLower cs
+ pPrivateUse = P.try $ do
+ P.char '-'
+ P.char 'x'
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ not (null cs) && length cs <= 8
+ let var = "x-" ++ cs
+ return $ map toLower var
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
index f479ed9d0..d44b5e1e2 100644
--- a/src/Text/Pandoc/CSS.hs
+++ b/src/Text/Pandoc/CSS.hs
@@ -11,7 +11,7 @@ import Text.Parsec.String
ruleParser :: Parser (String, String)
ruleParser = do
p <- many1 (noneOf ":") <* char ':'
- v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces
+ v <- many1 (noneOf ":;") <* optional (char ';') <* spaces
return (trim p, trim v)
styleAttrParser :: Parser [(String, String)]
@@ -25,14 +25,14 @@ foldOrElse v xs = foldr (orElse v) v xs
eitherToMaybe :: Either a b -> Maybe b
eitherToMaybe (Right x) = Just x
-eitherToMaybe _ = Nothing
+eitherToMaybe _ = Nothing
-- | takes a list of keys/properties and a CSS string and
-- returns the corresponding key-value-pairs.
pickStylesToKVs :: [String] -> String -> [(String, String)]
pickStylesToKVs props styleAttr =
case parse styleAttrParser "" styleAttr of
- Left _ -> []
+ Left _ -> []
Right styles -> filter (\s -> fst s `elem` props) styles
-- | takes a list of key/property synonyms and a CSS string and maybe
@@ -40,4 +40,4 @@ pickStylesToKVs props styleAttr =
pickStyleAttrProps :: [String] -> String -> Maybe String
pickStyleAttrProps lookupProps styleAttr = do
styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
- foldOrElse Nothing $ map (flip lookup styles) lookupProps
+ foldOrElse Nothing $ map (`lookup` styles) lookupProps
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
new file mode 100644
index 000000000..3415ae88f
--- /dev/null
+++ b/src/Text/Pandoc/CSV.hs
@@ -0,0 +1,102 @@
+{-
+Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.CSV
+ Copyright : Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu>
+ License : GNU GPL, version 2 or above
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Simple CSV parser.
+-}
+
+module Text.Pandoc.CSV (
+ CSVOptions(..),
+ defaultCSVOptions,
+ parseCSV,
+ ParseError
+) where
+
+import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Parsec
+import Text.Parsec.Text (Parser)
+
+data CSVOptions = CSVOptions{
+ csvDelim :: Char
+ , csvQuote :: Char
+ , csvKeepSpace :: Bool -- treat whitespace following delim as significant
+ , csvEscape :: Maybe Char -- default is to double up quote
+} deriving (Read, Show)
+
+defaultCSVOptions :: CSVOptions
+defaultCSVOptions = CSVOptions{
+ csvDelim = ','
+ , csvQuote = '"'
+ , csvKeepSpace = False
+ , csvEscape = Nothing }
+
+parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
+parseCSV opts t = parse (pCSV opts) "csv" t
+
+pCSV :: CSVOptions -> Parser [[Text]]
+pCSV opts =
+ (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
+
+pCSVRow :: CSVOptions -> Parser [Text]
+pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
+
+blank :: Parser ()
+blank = try $ spaces >> (() <$ endline <|> eof)
+
+pCSVCell :: CSVOptions -> Parser Text
+pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
+
+pCSVQuotedCell :: CSVOptions -> Parser Text
+pCSVQuotedCell opts = do
+ char (csvQuote opts)
+ res <- many (satisfy (\c -> c /= csvQuote opts &&
+ Just c /= csvEscape opts) <|> escaped opts)
+ char (csvQuote opts)
+ return $ T.pack res
+
+escaped :: CSVOptions -> Parser Char
+escaped opts =
+ case csvEscape opts of
+ Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
+ Just c -> try $ char c >> noneOf "\r\n"
+
+pCSVUnquotedCell :: CSVOptions -> Parser Text
+pCSVUnquotedCell opts = T.pack <$>
+ many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
+ && c /= csvQuote opts))
+
+pCSVDelim :: CSVOptions -> Parser ()
+pCSVDelim opts = do
+ char (csvDelim opts)
+ if csvKeepSpace opts
+ then return ()
+ else skipMany (oneOf " \t")
+
+endline :: Parser ()
+endline = do
+ optional (void $ char '\r')
+ void $ char '\n'
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
new file mode 100644
index 000000000..aa0379942
--- /dev/null
+++ b/src/Text/Pandoc/Class.hs
@@ -0,0 +1,1082 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+#if MIN_VERSION_base(4,8,0)
+#else
+{-# LANGUAGE OverlappingInstances #-}
+#endif
+
+{-
+Copyright (C) 2016-17 Jesse Rosenthal <jrosenthal@jhu.edu>
+and John MacFarlane.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Class
+ Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+This module defines a type class, 'PandocMonad', for pandoc readers
+and writers. A pure instance 'PandocPure' and an impure instance
+'PandocIO' are provided. This allows users of the library to choose
+whether they want conversions to perform IO operations (such as
+reading include files or images).
+-}
+
+module Text.Pandoc.Class ( PandocMonad(..)
+ , CommonState(..)
+ , PureState(..)
+ , getPureState
+ , getsPureState
+ , putPureState
+ , modifyPureState
+ , getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , setTrace
+ , setRequestHeader
+ , getLog
+ , setVerbosity
+ , getVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , setUserDataDir
+ , getUserDataDir
+ , fetchItem
+ , getInputFiles
+ , setInputFiles
+ , getOutputFile
+ , setOutputFile
+ , setResourcePath
+ , getResourcePath
+ , PandocIO(..)
+ , PandocPure(..)
+ , FileTree
+ , FileInfo(..)
+ , addToFileTree
+ , insertInFileTree
+ , runIO
+ , runIOorExplode
+ , runPure
+ , readDefaultDataFile
+ , readDataFile
+ , fetchMediaResource
+ , fillMediaBag
+ , extractMedia
+ , toLang
+ , setTranslations
+ , translateTerm
+ , Translations
+ ) where
+
+import Prelude hiding (readFile)
+import System.Random (StdGen, next, mkStdGen)
+import qualified System.Random as IO (newStdGen)
+import Codec.Archive.Zip
+import qualified Data.CaseInsensitive as CI
+import Data.Unique (hashUnique)
+import Data.List (stripPrefix)
+import qualified Data.Unique as IO (newUnique)
+import qualified Text.Pandoc.UTF8 as UTF8
+import qualified System.Directory as Directory
+import Text.Pandoc.Compat.Time (UTCTime)
+import Text.Pandoc.Logging
+import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
+import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.Pandoc.Definition
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (fromMaybe)
+import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
+ , posixSecondsToUTCTime
+ , POSIXTime )
+import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
+import Data.ByteString.Base64 (decodeLenient)
+import Network.URI ( escapeURIString, nonStrictRelativeTo,
+ unEscapeString, parseURIReference, isAllowedInURI,
+ parseURI, URI(..) )
+import Network.HTTP.Client
+ (httpLbs, responseBody, responseHeaders,
+ Request(port, host, requestHeaders), parseRequest, newManager)
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import System.Environment (getEnv)
+import Network.HTTP.Types.Header ( hContentType )
+import Network (withSocketsDo)
+import Data.ByteString.Lazy (toChunks)
+import qualified Control.Exception as E
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Walk (walkM, walk)
+import qualified Text.Pandoc.MediaBag as MB
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified System.Environment as IO (lookupEnv)
+import System.FilePath.Glob (match, compile)
+import System.Directory (createDirectoryIfMissing, getDirectoryContents,
+ doesDirectoryExist)
+import System.FilePath
+ ((</>), (<.>), takeDirectory, takeExtension, dropExtension,
+ isRelative, normalise, splitDirectories)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.FilePath.Posix as Posix
+import qualified System.Directory as IO (getModificationTime)
+import Control.Monad as M (fail)
+import Control.Monad.State.Strict
+import Control.Monad.Except
+import Data.Word (Word8)
+import Data.Default
+import System.IO.Error
+import System.IO (stderr)
+import qualified Data.Map as M
+import Text.Pandoc.Error
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
+ readTranslations)
+import qualified Debug.Trace
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#else
+import qualified Paths_pandoc as Paths
+#endif
+
+-- | The PandocMonad typeclass contains all the potentially
+-- IO-related functions used in pandoc's readers and writers.
+-- Instances of this typeclass may implement these functions
+-- in IO (as in 'PandocIO') or using an internal state that
+-- represents a file system, time, and so on (as in 'PandocPure').
+class (Functor m, Applicative m, Monad m, MonadError PandocError m)
+ => PandocMonad m where
+ -- | Lookup an environment variable.
+ lookupEnv :: String -> m (Maybe String)
+ -- | Get the current (UTC) time.
+ getCurrentTime :: m UTCTime
+ -- | Get the locale's time zone.
+ getCurrentTimeZone :: m TimeZone
+ -- | Return a new generator for random numbers.
+ newStdGen :: m StdGen
+ -- | Return a new unique integer.
+ newUniqueHash :: m Int
+ -- | Retrieve contents and mime type from a URL, raising
+ -- an error on failure.
+ openURL :: String -> m (B.ByteString, Maybe MimeType)
+ -- | Read the lazy ByteString contents from a file path,
+ -- raising an error on failure.
+ readFileLazy :: FilePath -> m BL.ByteString
+ -- | Read the strict ByteString contents from a file path,
+ -- raising an error on failure.
+ readFileStrict :: FilePath -> m B.ByteString
+ -- | Return a list of paths that match a glob, relative to
+ -- the working directory. See 'System.FilePath.Glob' for
+ -- the glob syntax.
+ glob :: String -> m [FilePath]
+ -- | Returns True if file exists.
+ fileExists :: FilePath -> m Bool
+ -- | Returns the path of data file.
+ getDataFileName :: FilePath -> m FilePath
+ -- | Return the modification time of a file.
+ getModificationTime :: FilePath -> m UTCTime
+ -- | Get the value of the 'CommonState' used by all instances
+ -- of 'PandocMonad'.
+ getCommonState :: m CommonState
+ -- | Set the value of the 'CommonState' used by all instances
+ -- of 'PandocMonad'.
+ -- | Get the value of a specific field of 'CommonState'.
+ putCommonState :: CommonState -> m ()
+ -- | Get the value of a specific field of 'CommonState'.
+ getsCommonState :: (CommonState -> a) -> m a
+ getsCommonState f = f <$> getCommonState
+ -- | Modify the 'CommonState'.
+ modifyCommonState :: (CommonState -> CommonState) -> m ()
+ modifyCommonState f = getCommonState >>= putCommonState . f
+ -- Output a log message.
+ logOutput :: LogMessage -> m ()
+ -- Output a debug message to sterr, using 'Debug.Trace.trace',
+ -- if tracing is enabled. Note: this writes to stderr even in
+ -- pure instances.
+ trace :: String -> m ()
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ())
+
+-- * Functions defined for all PandocMonad instances
+
+-- | Set the verbosity level.
+setVerbosity :: PandocMonad m => Verbosity -> m ()
+setVerbosity verbosity =
+ modifyCommonState $ \st -> st{ stVerbosity = verbosity }
+
+-- | Get the verbosity level.
+getVerbosity :: PandocMonad m => m Verbosity
+getVerbosity = getsCommonState stVerbosity
+
+-- Get the accomulated log messages (in temporal order).
+getLog :: PandocMonad m => m [LogMessage]
+getLog = reverse <$> getsCommonState stLog
+
+-- | Log a message using 'logOutput'. Note that 'logOutput' is
+-- called only if the verbosity level exceeds the level of the
+-- message, but the message is added to the list of log messages
+-- that will be retrieved by 'getLog' regardless of its verbosity level.
+report :: PandocMonad m => LogMessage -> m ()
+report msg = do
+ verbosity <- getsCommonState stVerbosity
+ let level = messageVerbosity msg
+ when (level <= verbosity) $ logOutput msg
+ modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+-- | Determine whether tracing is enabled. This affects
+-- the behavior of 'trace'. If tracing is not enabled,
+-- 'trace' does nothing.
+setTrace :: PandocMonad m => Bool -> m ()
+setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
+
+-- | Set request header to use in HTTP requests.
+setRequestHeader :: PandocMonad m
+ => String -- ^ Header name
+ -> String -- ^ Value
+ -> m ()
+setRequestHeader name val = modifyCommonState $ \st ->
+ st{ stRequestHeaders =
+ (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) }
+
+-- | Initialize the media bag.
+setMediaBag :: PandocMonad m => MediaBag -> m ()
+setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
+
+-- Retrieve the media bag.
+getMediaBag :: PandocMonad m => m MediaBag
+getMediaBag = getsCommonState stMediaBag
+
+-- Insert an item into the media bag.
+insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
+insertMedia fp mime bs = do
+ mb <- getMediaBag
+ let mb' = MB.insertMedia fp mime bs mb
+ setMediaBag mb'
+
+-- Retrieve the input filenames.
+getInputFiles :: PandocMonad m => m [FilePath]
+getInputFiles = getsCommonState stInputFiles
+
+-- Set the input filenames.
+setInputFiles :: PandocMonad m => [FilePath] -> m ()
+setInputFiles fs = do
+ let sourceURL = case fs of
+ [] -> Nothing
+ (x:_) -> case parseURI x of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
+
+ modifyCommonState $ \st -> st{ stInputFiles = fs
+ , stSourceURL = sourceURL }
+
+-- Retrieve the output filename.
+getOutputFile :: PandocMonad m => m (Maybe FilePath)
+getOutputFile = getsCommonState stOutputFile
+
+-- Set the output filename.
+setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
+setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
+
+-- Retrieve the resource path searched by 'fetchItem'.
+getResourcePath :: PandocMonad m => m [FilePath]
+getResourcePath = getsCommonState stResourcePath
+
+-- Set the resource path searched by 'fetchItem'.
+setResourcePath :: PandocMonad m => [FilePath] -> m ()
+setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
+
+-- Get the POSIX time.
+getPOSIXTime :: PandocMonad m => m POSIXTime
+getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
+
+-- Get the zoned time.
+getZonedTime :: PandocMonad m => m ZonedTime
+getZonedTime = do
+ t <- getCurrentTime
+ tz <- getCurrentTimeZone
+ return $ utcToZonedTime tz t
+
+-- | Read file, checking in any number of directories.
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
+readFileFromDirs [] _ = return Nothing
+readFileFromDirs (d:ds) f = catchError
+ ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
+ (\_ -> readFileFromDirs ds f)
+
+--
+
+-- | 'CommonState' represents state that is used by all
+-- instances of 'PandocMonad'. Normally users should not
+-- need to interact with it directly; instead, auxiliary
+-- functions like 'setVerbosity' and 'withMediaBag' should be used.
+data CommonState = CommonState { stLog :: [LogMessage]
+ -- ^ A list of log messages in reverse order
+ , stUserDataDir :: Maybe FilePath
+ -- ^ Directory to search for data files
+ , stSourceURL :: Maybe String
+ -- ^ Absolute URL + dir of 1st source file
+ , stRequestHeaders :: [(String, String)]
+ -- ^ Headers to add for HTTP requests
+ , stMediaBag :: MediaBag
+ -- ^ Media parsed from binary containers
+ , stTranslations :: Maybe
+ (Lang, Maybe Translations)
+ -- ^ Translations for localization
+ , stInputFiles :: [FilePath]
+ -- ^ List of input files from command line
+ , stOutputFile :: Maybe FilePath
+ -- ^ Output file from command line
+ , stResourcePath :: [FilePath]
+ -- ^ Path to search for resources like
+ -- included images
+ , stVerbosity :: Verbosity
+ -- ^ Verbosity level
+ , stTrace :: Bool
+ -- ^ Controls whether tracing messages are
+ -- issued.
+ }
+
+instance Default CommonState where
+ def = CommonState { stLog = []
+ , stUserDataDir = Nothing
+ , stSourceURL = Nothing
+ , stRequestHeaders = []
+ , stMediaBag = mempty
+ , stTranslations = Nothing
+ , stInputFiles = []
+ , stOutputFile = Nothing
+ , stResourcePath = ["."]
+ , stVerbosity = WARNING
+ , stTrace = False
+ }
+
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+ case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+ modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+ mbtrans <- getsCommonState stTranslations
+ case mbtrans of
+ Nothing -> return mempty -- no language defined
+ Just (_, Just t) -> return t
+ Just (lang, Nothing) -> do -- read from file
+ let translationFile = "translations/" ++ renderLang lang ++ ".yaml"
+ let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml"
+ let getTrans fp = do
+ bs <- readDataFile fp
+ case readTranslations (UTF8.toString bs) of
+ Left e -> do
+ report $ CouldNotLoadTranslations (renderLang lang)
+ (fp ++ ": " ++ e)
+ -- make sure we don't try again...
+ modifyCommonState $ \st ->
+ st{ stTranslations = Nothing }
+ return mempty
+ Right t -> do
+ modifyCommonState $ \st ->
+ st{ stTranslations = Just (lang, Just t) }
+ return t
+ catchError (getTrans translationFile)
+ (\_ ->
+ catchError (getTrans fallbackFile)
+ (\e -> do
+ report $ CouldNotLoadTranslations (renderLang lang)
+ $ case e of
+ PandocCouldNotFindDataFileError _ ->
+ "data file " ++ fallbackFile ++ " not found"
+ _ -> ""
+ -- make sure we don't try again...
+ modifyCommonState $ \st -> st{ stTranslations = Nothing }
+ return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m String
+translateTerm term = do
+ translations <- getTranslations
+ case lookupTerm term translations of
+ Just s -> return s
+ Nothing -> do
+ report $ NoTranslation (show term)
+ return ""
+
+-- | Evaluate a 'PandocIO' operation.
+runIO :: PandocIO a -> IO (Either PandocError a)
+runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
+
+-- | Evaluate a 'PandocIO' operation, handling any errors
+-- by exiting with an appropriate message and error status.
+runIOorExplode :: PandocIO a -> IO a
+runIOorExplode ma = runIO ma >>= handleError
+
+newtype PandocIO a = PandocIO {
+ unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
+ } deriving ( MonadIO
+ , Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Utility function to lift IO errors into 'PandocError's.
+liftIOError :: (String -> IO a) -> String -> PandocIO a
+liftIOError f u = do
+ res <- liftIO $ tryIOError $ f u
+ case res of
+ Left e -> throwError $ PandocIOError u e
+ Right r -> return r
+
+instance PandocMonad PandocIO where
+ lookupEnv = liftIO . IO.lookupEnv
+ getCurrentTime = liftIO IO.getCurrentTime
+ getCurrentTimeZone = liftIO IO.getCurrentTimeZone
+ newStdGen = liftIO IO.newStdGen
+ newUniqueHash = hashUnique <$> liftIO IO.newUnique
+
+ openURL u
+ | Just u'' <- stripPrefix "data:" u = do
+ let mime = takeWhile (/=',') u''
+ let contents = UTF8.fromString $
+ unEscapeString $ drop 1 $ dropWhile (/=',') u''
+ return (decodeLenient contents, Just mime)
+ | otherwise = do
+ let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v)
+ customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
+ report $ Fetching u
+ res <- liftIO $ E.try $ withSocketsDo $ do
+ let parseReq = parseRequest
+ proxy <- tryIOError (getEnv "http_proxy")
+ let addProxy' x = case proxy of
+ Left _ -> return x
+ Right pr -> parseReq pr >>= \r ->
+ return (addProxy (host r) (port r) x)
+ req <- parseReq u >>= addProxy'
+ let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
+ resp <- newManager tlsManagerSettings >>= httpLbs req'
+ return (B.concat $ toChunks $ responseBody resp,
+ UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
+
+ case res of
+ Right r -> return r
+ Left e -> throwError $ PandocHttpError u e
+
+ readFileLazy s = liftIOError BL.readFile s
+ readFileStrict s = liftIOError B.readFile s
+
+ glob = liftIOError IO.glob
+ fileExists = liftIOError Directory.doesFileExist
+#ifdef EMBED_DATA_FILES
+ getDataFileName = return
+#else
+ getDataFileName = liftIOError Paths.getDataFileName
+#endif
+ getModificationTime = liftIOError IO.getModificationTime
+ getCommonState = PandocIO $ lift get
+ putCommonState x = PandocIO $ lift $ put x
+ logOutput msg = liftIO $ do
+ UTF8.hPutStr stderr $
+ "[" ++ show (messageVerbosity msg) ++ "] "
+ alertIndent $ lines $ showLogMessage msg
+
+alertIndent :: [String] -> IO ()
+alertIndent [] = return ()
+alertIndent (l:ls) = do
+ UTF8.hPutStrLn stderr l
+ mapM_ go ls
+ where go l' = do UTF8.hPutStr stderr " "
+ UTF8.hPutStrLn stderr l'
+
+-- | Specialized version of parseURIReference that disallows
+-- single-letter schemes. Reason: these are usually windows absolute
+-- paths.
+parseURIReference' :: String -> Maybe URI
+parseURIReference' s =
+ case parseURIReference s of
+ Just u
+ | length (uriScheme u) > 2 -> Just u
+ | null (uriScheme u) -> Just u -- protocol-relative
+ _ -> Nothing
+
+-- | Set the user data directory in common state.
+setUserDataDir :: PandocMonad m
+ => Maybe FilePath
+ -> m ()
+setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
+
+-- | Get the user data directory from common state.
+getUserDataDir :: PandocMonad m
+ => m (Maybe FilePath)
+getUserDataDir = getsCommonState stUserDataDir
+
+-- | Fetch an image or other item from the local filesystem or the net.
+-- Returns raw content and maybe mime type.
+fetchItem :: PandocMonad m
+ => String
+ -> m (B.ByteString, Maybe MimeType)
+fetchItem s = do
+ mediabag <- getMediaBag
+ case lookupMedia s mediabag of
+ Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+ Nothing -> downloadOrRead s
+
+downloadOrRead :: PandocMonad m
+ => String
+ -> m (B.ByteString, Maybe MimeType)
+downloadOrRead s = do
+ sourceURL <- getsCommonState stSourceURL
+ case (sourceURL >>= parseURIReference' .
+ ensureEscaped, ensureEscaped s) of
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference' s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
+ case parseURIReference' s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s') ->
+ case parseURI s' of -- requires absolute URI
+ -- We don't want to treat C:/ as a scheme:
+ Just u' | length (uriScheme u') > 2 -> openURL (show u')
+ Just u' | uriScheme u' == "file:" ->
+ readLocalFile $ dropWhile (=='/') (uriPath u')
+ _ -> readLocalFile fp -- get from local file system
+ where readLocalFile f = do
+ resourcePath <- getResourcePath
+ cont <- if isRelative f
+ then withPaths resourcePath readFileStrict f
+ else readFileStrict f
+ return (cont, mime)
+ httpcolon = URI{ uriScheme = "http:",
+ uriAuthority = Nothing,
+ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ dropFragmentAndQuery s
+ mime = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
+ x -> getMimeType x
+ ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
+ convertSlash '\\' = '/'
+ convertSlash x = x
+
+-- Retrieve default reference.docx.
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+ let paths = ["[Content_Types].xml",
+ "_rels/.rels",
+ "docProps/app.xml",
+ "docProps/core.xml",
+ "word/document.xml",
+ "word/fontTable.xml",
+ "word/footnotes.xml",
+ "word/comments.xml",
+ "word/numbering.xml",
+ "word/settings.xml",
+ "word/webSettings.xml",
+ "word/styles.xml",
+ "word/_rels/document.xml.rels",
+ "word/_rels/footnotes.xml.rels",
+ "word/theme/theme1.xml"]
+ let toLazy = BL.fromChunks . (:[])
+ let pathToEntry path = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+ contents <- toLazy <$> readDataFile ("docx/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.docx")
+ if exists
+ then return (Just (d </> "reference.docx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+-- Retrieve default reference.odt.
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+ let paths = ["mimetype",
+ "manifest.rdf",
+ "styles.xml",
+ "content.xml",
+ "meta.xml",
+ "settings.xml",
+ "Configurations2/accelerator/current.xml",
+ "Thumbnails/thumbnail.png",
+ "META-INF/manifest.xml"]
+ let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+ contents <- (BL.fromChunks . (:[])) `fmap`
+ readDataFile ("odt/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.odt")
+ if exists
+ then return (Just (d </> "reference.odt"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+getDefaultReferencePptx :: PandocMonad m => m Archive
+getDefaultReferencePptx = do
+ -- We're going to narrow this down substantially once we get it
+ -- working.
+ let paths = [ "[Content_Types].xml"
+ , "_rels/.rels"
+ , "docProps/app.xml"
+ , "docProps/core.xml"
+ , "ppt/_rels/presentation.xml.rels"
+ , "ppt/presProps.xml"
+ , "ppt/presentation.xml"
+ , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
+ , "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
+ , "ppt/slideLayouts/slideLayout1.xml"
+ , "ppt/slideLayouts/slideLayout10.xml"
+ , "ppt/slideLayouts/slideLayout11.xml"
+ , "ppt/slideLayouts/slideLayout2.xml"
+ , "ppt/slideLayouts/slideLayout3.xml"
+ , "ppt/slideLayouts/slideLayout4.xml"
+ , "ppt/slideLayouts/slideLayout5.xml"
+ , "ppt/slideLayouts/slideLayout6.xml"
+ , "ppt/slideLayouts/slideLayout7.xml"
+ , "ppt/slideLayouts/slideLayout8.xml"
+ , "ppt/slideLayouts/slideLayout9.xml"
+ , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+ , "ppt/slideMasters/slideMaster1.xml"
+ , "ppt/slides/_rels/slide1.xml.rels"
+ , "ppt/slides/slide1.xml"
+ , "ppt/slides/_rels/slide2.xml.rels"
+ , "ppt/slides/slide2.xml"
+ , "ppt/tableStyles.xml"
+ , "ppt/theme/theme1.xml"
+ , "ppt/viewProps.xml"
+ -- These relate to notes slides.
+ , "ppt/notesMasters/notesMaster1.xml"
+ , "ppt/notesMasters/_rels/notesMaster1.xml.rels"
+ , "ppt/notesSlides/notesSlide1.xml"
+ , "ppt/notesSlides/_rels/notesSlide1.xml.rels"
+ , "ppt/notesSlides/notesSlide2.xml"
+ , "ppt/notesSlides/_rels/notesSlide2.xml.rels"
+ , "ppt/theme/theme2.xml"
+ ]
+ let toLazy = BL.fromChunks . (:[])
+ let pathToEntry path = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+ contents <- toLazy <$> readDataFile ("pptx/" ++ path)
+ return $ toEntry path epochtime contents
+ datadir <- getUserDataDir
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- fileExists (d </> "reference.pptx")
+ if exists
+ then return (Just (d </> "reference.pptx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> readFileLazy arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+ datadir <- getUserDataDir
+ case datadir of
+ Nothing -> readDefaultDataFile fname
+ Just userDir -> do
+ exists <- fileExists (userDir </> fname)
+ if exists
+ then readFileStrict (userDir </> fname)
+ else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.pptx" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
+readDefaultDataFile "reference.odt" =
+ (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+ case lookup (makeCanonical fname) dataFiles of
+ Nothing -> throwError $ PandocCouldNotFindDataFileError fname
+ Just contents -> return contents
+#else
+ getDataFileName fname' >>= checkExistence >>= readFileStrict
+ where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+ exists <- fileExists fn
+ if exists
+ then return fn
+ else throwError $ PandocCouldNotFindDataFileError fn
+#endif
+
+makeCanonical :: FilePath -> FilePath
+makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ where transformPathParts = reverse . foldl go []
+ go as "." = as
+ go (_:as) ".." = as
+ go as x = x : as
+
+withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths [] _ fp = throwError $ PandocResourceNotFound fp
+withPaths (p:ps) action fp =
+ catchError (action (p </> fp))
+ (\_ -> withPaths ps action fp)
+
+-- | Fetch local or remote resource (like an image) and provide data suitable
+-- for adding it to the MediaBag.
+fetchMediaResource :: PandocMonad m
+ => String -> m (FilePath, Maybe MimeType, BL.ByteString)
+fetchMediaResource src = do
+ (bs, mt) <- downloadOrRead src
+ let ext = fromMaybe (takeExtension src)
+ (mt >>= extensionFromMimeType)
+ let bs' = BL.fromChunks [bs]
+ let basename = showDigest $ sha1 bs'
+ let fname = basename <.> ext
+ return (fname, mt, bs')
+
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
+fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
+fillMediaBag d = walkM handleImage d
+ where handleImage :: PandocMonad m => Inline -> m Inline
+ handleImage (Image attr lab (src, tit)) = catchError
+ (do mediabag <- getMediaBag
+ case lookupMedia src mediabag of
+ Just (_, _) -> return $ Image attr lab (src, tit)
+ Nothing -> do
+ (fname, mt, bs) <- fetchMediaResource src
+ insertMedia fname mt bs
+ return $ Image attr lab (fname, tit))
+ (\e ->
+ case e of
+ PandocResourceNotFound _ -> do
+ report $ CouldNotFetchResource src
+ "replacing image with description"
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ PandocHttpError u er -> do
+ report $ CouldNotFetchResource u
+ (show er ++ "\rReplacing image with description.")
+ -- emit alt text
+ return $ Span ("",["image"],[]) lab
+ _ -> throwError e)
+ handleImage x = return x
+
+-- | Extract media from the mediabag into a directory.
+extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
+extractMedia dir d = do
+ media <- getMediaBag
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ mapM_ (writeMedia dir media) fps
+ return $ walk (adjustImagePath dir fps) d
+
+-- Write the contents of a media bag to a path.
+writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
+writeMedia dir mediabag subpath = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> normalise subpath
+ let mbcontents = lookupMedia subpath mediabag
+ case mbcontents of
+ Nothing -> throwError $ PandocResourceNotFound subpath
+ Just (_, bs) -> do
+ report $ Extracting fullpath
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ liftIOError (\p -> BL.writeFile p bs) fullpath
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
+-- | The 'PureState' contains ersatz representations
+-- of things that would normally be obtained through IO.
+data PureState = PureState { stStdGen :: StdGen
+ , stWord8Store :: [Word8] -- should be
+ -- inifinite,
+ -- i.e. [1..]
+ , stUniqStore :: [Int] -- should be
+ -- inifinite and
+ -- contain every
+ -- element at most
+ -- once, e.g. [1..]
+ , stEnv :: [(String, String)]
+ , stTime :: UTCTime
+ , stTimeZone :: TimeZone
+ , stReferenceDocx :: Archive
+ , stReferencePptx :: Archive
+ , stReferenceODT :: Archive
+ , stFiles :: FileTree
+ , stUserDataFiles :: FileTree
+ , stCabalDataFiles :: FileTree
+ }
+
+instance Default PureState where
+ def = PureState { stStdGen = mkStdGen 1848
+ , stWord8Store = [1..]
+ , stUniqStore = [1..]
+ , stEnv = [("USER", "pandoc-user")]
+ , stTime = posixSecondsToUTCTime 0
+ , stTimeZone = utc
+ , stReferenceDocx = emptyArchive
+ , stReferencePptx = emptyArchive
+ , stReferenceODT = emptyArchive
+ , stFiles = mempty
+ , stUserDataFiles = mempty
+ , stCabalDataFiles = mempty
+ }
+
+
+getPureState :: PandocPure PureState
+getPureState = PandocPure $ lift $ lift get
+
+getsPureState :: (PureState -> a) -> PandocPure a
+getsPureState f = f <$> getPureState
+
+putPureState :: PureState -> PandocPure ()
+putPureState ps= PandocPure $ lift $ lift $ put ps
+
+modifyPureState :: (PureState -> PureState) -> PandocPure ()
+modifyPureState f = PandocPure $ lift $ lift $ modify f
+
+
+data FileInfo = FileInfo { infoFileMTime :: UTCTime
+ , infoFileContents :: B.ByteString
+ }
+
+newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
+ deriving (Monoid)
+
+getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
+getFileInfo fp tree =
+ M.lookup (makeCanonical fp) (unFileTree tree)
+
+-- | Add the specified file to the FileTree. If file
+-- is a directory, add its contents recursively.
+addToFileTree :: FileTree -> FilePath -> IO FileTree
+addToFileTree tree fp = do
+ isdir <- doesDirectoryExist fp
+ if isdir
+ then do -- recursively add contents of directories
+ let isSpecial ".." = True
+ isSpecial "." = True
+ isSpecial _ = False
+ fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp
+ foldM addToFileTree tree fs
+ else do
+ contents <- B.readFile fp
+ mtime <- IO.getModificationTime fp
+ return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
+ , infoFileContents = contents } tree
+
+-- | Insert an ersatz file into the 'FileTree'.
+insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
+insertInFileTree fp info (FileTree treemap) =
+ FileTree $ M.insert (makeCanonical fp) info treemap
+
+newtype PandocPure a = PandocPure {
+ unPandocPure :: ExceptT PandocError
+ (StateT CommonState (State PureState)) a
+ } deriving ( Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- Run a 'PandocPure' operation.
+runPure :: PandocPure a -> Either PandocError a
+runPure x = flip evalState def $
+ flip evalStateT def $
+ runExceptT $
+ unPandocPure x
+
+instance PandocMonad PandocPure where
+ lookupEnv s = do
+ env <- getsPureState stEnv
+ return (lookup s env)
+
+ getCurrentTime = getsPureState stTime
+
+ getCurrentTimeZone = getsPureState stTimeZone
+
+ newStdGen = do
+ g <- getsPureState stStdGen
+ let (_, nxtGen) = next g
+ modifyPureState $ \st -> st { stStdGen = nxtGen }
+ return g
+
+ newUniqueHash = do
+ uniqs <- getsPureState stUniqStore
+ case uniqs of
+ u : us -> do
+ modifyPureState $ \st -> st { stUniqStore = us }
+ return u
+ _ -> M.fail "uniq store ran out of elements"
+ openURL u = throwError $ PandocResourceNotFound u
+ readFileLazy fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return (BL.fromStrict bs)
+ Nothing -> throwError $ PandocResourceNotFound fp
+ readFileStrict fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return bs
+ Nothing -> throwError $ PandocResourceNotFound fp
+
+ glob s = do
+ FileTree ftmap <- getsPureState stFiles
+ return $ filter (match (compile s)) $ M.keys ftmap
+
+ fileExists fp = do
+ fps <- getsPureState stFiles
+ case getFileInfo fp fps of
+ Nothing -> return False
+ Just _ -> return True
+
+ getDataFileName fp = return $ "data/" ++ fp
+
+ getModificationTime fp = do
+ fps <- getsPureState stFiles
+ case infoFileMTime <$> getFileInfo fp fps of
+ Just tm -> return tm
+ Nothing -> throwError $ PandocIOError fp
+ (userError "Can't get modification time")
+
+ getCommonState = PandocPure $ lift get
+ putCommonState x = PandocPure $ lift $ put x
+
+ logOutput _msg = return ()
+
+-- This requires UndecidableInstances. We could avoid that
+-- by repeating the definitions below for every monad transformer
+-- we use: ReaderT, WriterT, StateT, RWST. But this seems to
+-- be harmless.
+instance (MonadTrans t, PandocMonad m, Functor (t m),
+ MonadError PandocError (t m), Monad (t m),
+ Applicative (t m)) => PandocMonad (t m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ glob = lift . glob
+ fileExists = lift . fileExists
+ getDataFileName = lift . getDataFileName
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+#if MIN_VERSION_base(4,8,0)
+instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
+#else
+instance PandocMonad m => PandocMonad (ParsecT s st m) where
+#endif
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ glob = lift . glob
+ fileExists = lift . fileExists
+ getDataFileName = lift . getDataFileName
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ do
+ pos <- getPosition
+ Debug.Trace.trace
+ ("[trace] Parsed " ++ msg ++ " at line " ++
+ show (sourceLine pos) ++
+ if sourceName pos == "chunk"
+ then " of chunk"
+ else "")
+ (return ())
+ logOutput = lift . logOutput
diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hs
index 8786647c5..af0e4504f 100644
--- a/src/Text/Pandoc/Data.hsb
+++ b/src/Text/Pandoc/Data.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE OverloadedStrings #-}
--- to be processed using hsb2hs
+{-# LANGUAGE TemplateHaskell #-}
+
module Text.Pandoc.Data (dataFiles) where
+
import qualified Data.ByteString as B
+import Data.FileEmbed
import System.FilePath (splitDirectories)
import qualified System.FilePath.Posix as Posix
@@ -12,4 +14,9 @@ dataFiles = map (\(fp, contents) ->
(Posix.joinPath (splitDirectories fp), contents)) dataFiles'
dataFiles' :: [(FilePath, B.ByteString)]
-dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data"
+dataFiles' = ("MANUAL.txt", $(embedFile "MANUAL.txt")) :
+ -- handle the hidden file separately, since embedDir doesn't
+ -- include it:
+ ("docx/_rels/.rels", $(embedFile "data/docx/_rels/.rels")) :
+ ("pptx/_rels/.rels", $(embedFile "data/pptx/_rels/.rels")) :
+ $(embedDir "data")
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs
index c9f368abc..3766960ea 100644
--- a/src/Text/Pandoc/Emoji.hs
+++ b/src/Text/Pandoc/Emoji.hs
@@ -903,4 +903,3 @@ emojis = M.fromList
,("zero","0\65039\8419")
,("zzz","\128164")
]
-
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 5e26771fe..f78a31481 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Error
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,40 +30,94 @@ This module provides a standard way to deal with possible errors encounted
during parsing.
-}
-module Text.Pandoc.Error (PandocError(..), handleError) where
+module Text.Pandoc.Error (
+ PandocError(..),
+ handleError) where
+import Control.Exception (Exception)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import Network.HTTP.Client (HttpException)
+import System.Exit (ExitCode (..), exitWith)
+import System.IO (stderr)
+import qualified Text.Pandoc.UTF8 as UTF8
import Text.Parsec.Error
import Text.Parsec.Pos hiding (Line)
-import GHC.Generics (Generic)
-import Data.Generics (Typeable)
-import Control.Exception (Exception)
type Input = String
-data PandocError = -- | Generic parse failure
- ParseFailure String
- -- | Error thrown by a Parsec parser
- | ParsecError Input ParseError
+data PandocError = PandocIOError String IOError
+ | PandocHttpError String HttpException
+ | PandocShouldNeverHappenError String
+ | PandocSomeError String
+ | PandocParseError String
+ | PandocParsecError Input ParseError
+ | PandocMakePDFError String
+ | PandocOptionError String
+ | PandocSyntaxMapError String
+ | PandocFailOnWarningError
+ | PandocPDFProgramNotFoundError String
+ | PandocPDFError String
+ | PandocFilterError String String
+ | PandocCouldNotFindDataFileError String
+ | PandocResourceNotFound String
+ | PandocTemplateError String
+ | PandocAppError String
+ | PandocEpubSubdirectoryError String
+ | PandocMacroLoop String
deriving (Show, Typeable, Generic)
instance Exception PandocError
--- | An unsafe method to handle `PandocError`s.
-handleError :: Either PandocError a -> a
-handleError (Right r) = r
-handleError (Left err) =
- case err of
- ParseFailure string -> error string
- ParsecError input err' ->
+-- | Handle PandocError by exiting with an error message.
+handleError :: Either PandocError a -> IO a
+handleError (Right r) = return r
+handleError (Left e) =
+ case e of
+ PandocIOError _ err' -> ioError err'
+ PandocHttpError u err' -> err 61 $
+ "Could not fetch " ++ u ++ "\n" ++ show err'
+ PandocShouldNeverHappenError s -> err 62 s
+ PandocSomeError s -> err 63 s
+ PandocParseError s -> err 64 s
+ PandocParsecError input err' ->
let errPos = errorPos err'
errLine = sourceLine errPos
errColumn = sourceColumn errPos
ls = lines input ++ [""]
errorInFile = if length ls > errLine - 1
- then concat ["\n", (ls !! (errLine - 1))
+ then concat ["\n", ls !! (errLine - 1)
,"\n", replicate (errColumn - 1) ' '
,"^"]
else ""
- in error $ "\nError at " ++ show err'
- ++ errorInFile
+ in err 65 $ "\nError at " ++ show err' ++
+ -- if error comes from a chunk or included file,
+ -- then we won't get the right text this way:
+ if sourceName errPos == "source"
+ then errorInFile
+ else ""
+ PandocMakePDFError s -> err 65 s
+ PandocOptionError s -> err 2 s
+ PandocSyntaxMapError s -> err 67 s
+ PandocFailOnWarningError -> err 3 "Failing because there were warnings."
+ PandocPDFProgramNotFoundError pdfprog -> err 47 $
+ pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog
+ PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg
+ PandocFilterError filtername msg -> err 83 $ "Error running filter " ++
+ filtername ++ ":\n" ++ msg
+ PandocCouldNotFindDataFileError fn -> err 97 $
+ "Could not find data file " ++ fn
+ PandocResourceNotFound fn -> err 99 $
+ "File " ++ fn ++ " not found in resource path"
+ PandocTemplateError s -> err 5 s
+ PandocAppError s -> err 1 s
+ PandocEpubSubdirectoryError s -> err 31 $
+ "EPUB subdirectory name '" ++ s ++ "' contains illegal characters"
+ PandocMacroLoop s -> err 91 $
+ "Loop encountered in expanding macro " ++ s
+err :: Int -> String -> IO a
+err exitCode msg = do
+ UTF8.hPutStrLn stderr msg
+ exitWith $ ExitFailure exitCode
+ return undefined
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
new file mode 100644
index 000000000..968476930
--- /dev/null
+++ b/src/Text/Pandoc/Extensions.hs
@@ -0,0 +1,378 @@
+{-
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+{- |
+ Module : Text.Pandoc.Extensions
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data structures and functions for representing markup extensions.
+-}
+module Text.Pandoc.Extensions ( Extension(..)
+ , Extensions
+ , emptyExtensions
+ , extensionsFromList
+ , parseFormatSpec
+ , extensionEnabled
+ , enableExtension
+ , disableExtension
+ , getDefaultExtensions
+ , pandocExtensions
+ , plainExtensions
+ , strictExtensions
+ , phpMarkdownExtraExtensions
+ , githubMarkdownExtensions
+ , multimarkdownExtensions )
+where
+import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions)
+import Data.Aeson.TH (deriveJSON)
+import Data.Bits (clearBit, setBit, testBit, (.|.))
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import Text.Pandoc.Shared (safeRead)
+import Text.Parsec
+
+newtype Extensions = Extensions Integer
+ deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, ToJSON, FromJSON)
+
+instance Monoid Extensions where
+ mempty = Extensions 0
+ mappend (Extensions a) (Extensions b) = Extensions (a .|. b)
+
+extensionsFromList :: [Extension] -> Extensions
+extensionsFromList = foldr enableExtension emptyExtensions
+
+emptyExtensions :: Extensions
+emptyExtensions = Extensions 0
+
+extensionEnabled :: Extension -> Extensions -> Bool
+extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)
+
+enableExtension :: Extension -> Extensions -> Extensions
+enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))
+
+disableExtension :: Extension -> Extensions -> Extensions
+disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))
+
+-- | Individually selectable syntax extensions.
+data Extension =
+ Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
+ | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
+ | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
+ | Ext_angle_brackets_escapable -- ^ Make < and > escapable
+ | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
+ | Ext_auto_identifiers -- ^ Automatic identifiers for headers
+ | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
+ | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
+ | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
+ | Ext_blank_before_header -- ^ Require blank line before a header
+ | Ext_bracketed_spans -- ^ Bracketed spans with attributes
+ | Ext_citations -- ^ Pandoc/citeproc citations
+ | Ext_compact_definition_lists -- ^ Definition lists without space between items,
+ -- and disallow laziness
+ | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
+ | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
+ -- East Asian wide characters
+ | Ext_emoji -- ^ Support emoji like :smile:
+ | Ext_empty_paragraphs -- ^ Allow empty paragraphs
+ | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
+ | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
+ | Ext_example_lists -- ^ Markdown-style numbered examples
+ | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
+ | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
+ | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
+ | Ext_fenced_divs -- ^ Allow fenced div syntax :::
+ | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
+ | Ext_four_space_rule -- ^ Require 4-space indent for list contents
+ | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using
+ -- GitHub's method for generating identifiers
+ | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
+ | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
+ | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
+ | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
+ | Ext_implicit_figures -- ^ A paragraph with just an image is a figure
+ | Ext_implicit_header_references -- ^ Implicit reference links for headers
+ | Ext_inline_code_attributes -- ^ Allow attributes on inline code
+ | Ext_inline_notes -- ^ Pandoc-style inline notes
+ | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
+ | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
+ | Ext_line_blocks -- ^ RST style line blocks
+ | Ext_link_attributes -- ^ link and image attributes
+ | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
+ | Ext_literate_haskell -- ^ Enable literate Haskell conventions
+ | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff
+ -- container has attribute 'markdown'
+ | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
+ | Ext_mmd_link_attributes -- ^ MMD style reference link attributes
+ | Ext_mmd_title_block -- ^ Multimarkdown metadata block
+ | Ext_multiline_tables -- ^ Pandoc-style multiline tables
+ | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
+ | Ext_native_spans -- ^ Use Span inlines for contents of <span>
+ | Ext_ntb -- ^ ConTeXt Natural Tables
+ | Ext_old_dashes -- ^ -- = em, - before number = en
+ | Ext_pandoc_title_block -- ^ Pandoc title block
+ | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
+ | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines
+ | Ext_raw_html -- ^ Allow raw HTML
+ | Ext_raw_tex -- ^ Allow raw TeX (other than math)
+ | Ext_shortcut_reference_links -- ^ Shortcut reference links
+ | Ext_simple_tables -- ^ Pandoc-style simple tables
+ | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
+ | Ext_space_in_atx_header -- ^ Require space between # and header text
+ | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link
+ | Ext_startnum -- ^ Make start number of ordered list significant
+ | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
+ | Ext_subscript -- ^ Subscript using ~this~ syntax
+ | Ext_superscript -- ^ Superscript using ^this^ syntax
+ | Ext_styles -- ^ Read styles that pandoc doesn't know
+ | Ext_table_captions -- ^ Pandoc-style table captions
+ | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
+ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
+ | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
+ | Ext_yaml_metadata_block -- ^ YAML metadata block
+ deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
+
+-- | Extensions to be used with pandoc-flavored markdown.
+pandocExtensions :: Extensions
+pandocExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_inline_notes
+ , Ext_pandoc_title_block
+ , Ext_yaml_metadata_block
+ , Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_pipe_tables
+ , Ext_citations
+ , Ext_raw_tex
+ , Ext_raw_html
+ , Ext_tex_math_dollars
+ , Ext_latex_macros
+ , Ext_fenced_code_blocks
+ , Ext_fenced_code_attributes
+ , Ext_backtick_code_blocks
+ , Ext_inline_code_attributes
+ , Ext_raw_attribute
+ , Ext_markdown_in_html_blocks
+ , Ext_native_divs
+ , Ext_fenced_divs
+ , Ext_native_spans
+ , Ext_bracketed_spans
+ , Ext_escaped_line_breaks
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_all_symbols_escapable
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_space_in_atx_header
+ , Ext_strikeout
+ , Ext_superscript
+ , Ext_subscript
+ , Ext_auto_identifiers
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_implicit_header_references
+ , Ext_line_blocks
+ , Ext_shortcut_reference_links
+ , Ext_smart
+ ]
+
+-- | Extensions to be used with plain text output.
+plainExtensions :: Extensions
+plainExtensions = extensionsFromList
+ [ Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_latex_macros
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_strikeout
+ ]
+
+-- | Extensions to be used with github-flavored markdown.
+phpMarkdownExtraExtensions :: Extensions
+phpMarkdownExtraExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_fenced_code_blocks
+ , Ext_definition_lists
+ , Ext_intraword_underscores
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_abbreviations
+ , Ext_shortcut_reference_links
+ , Ext_spaced_reference_links
+ ]
+
+-- | Extensions to be used with github-flavored markdown.
+githubMarkdownExtensions :: Extensions
+githubMarkdownExtensions = extensionsFromList
+ [ Ext_angle_brackets_escapable
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_fenced_code_blocks
+ , Ext_gfm_auto_identifiers
+ , Ext_ascii_identifiers
+ , Ext_backtick_code_blocks
+ , Ext_autolink_bare_uris
+ , Ext_space_in_atx_header
+ , Ext_intraword_underscores
+ , Ext_strikeout
+ , Ext_emoji
+ , Ext_lists_without_preceding_blankline
+ , Ext_shortcut_reference_links
+ ]
+
+-- | Extensions to be used with multimarkdown.
+multimarkdownExtensions :: Extensions
+multimarkdownExtensions = extensionsFromList
+ [ Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_mmd_link_attributes
+ -- , Ext_raw_tex
+ -- Note: MMD's raw TeX syntax requires raw TeX to be
+ -- enclosed in HTML comment
+ , Ext_tex_math_double_backslash
+ , Ext_intraword_underscores
+ , Ext_mmd_title_block
+ , Ext_footnotes
+ , Ext_definition_lists
+ , Ext_all_symbols_escapable
+ , Ext_implicit_header_references
+ , Ext_shortcut_reference_links
+ , Ext_auto_identifiers
+ , Ext_mmd_header_identifiers
+ , Ext_implicit_figures
+ -- Note: MMD's syntax for superscripts and subscripts
+ -- is a bit more permissive than pandoc's, allowing
+ -- e^2 and a~1 instead of e^2^ and a~1~, so even with
+ -- these options we don't have full support for MMD
+ -- superscripts and subscripts, but there's no reason
+ -- not to include these:
+ , Ext_superscript
+ , Ext_subscript
+ , Ext_backtick_code_blocks
+ , Ext_spaced_reference_links
+ -- So far only in dev version of mmd:
+ , Ext_raw_attribute
+ ]
+
+-- | Language extensions to be used with strict markdown.
+strictExtensions :: Extensions
+strictExtensions = extensionsFromList
+ [ Ext_raw_html
+ , Ext_shortcut_reference_links
+ , Ext_spaced_reference_links
+ ]
+
+-- | Default extensions from format-describing string.
+getDefaultExtensions :: String -> Extensions
+getDefaultExtensions "markdown_strict" = strictExtensions
+getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
+getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
+getDefaultExtensions "markdown_github" = githubMarkdownExtensions
+getDefaultExtensions "markdown" = pandocExtensions
+getDefaultExtensions "muse" = extensionsFromList
+ [Ext_amuse,
+ Ext_auto_identifiers]
+getDefaultExtensions "plain" = plainExtensions
+getDefaultExtensions "gfm" = githubMarkdownExtensions
+getDefaultExtensions "commonmark" = extensionsFromList
+ [Ext_raw_html]
+getDefaultExtensions "org" = extensionsFromList
+ [Ext_citations,
+ Ext_auto_identifiers]
+getDefaultExtensions "html" = extensionsFromList
+ [Ext_auto_identifiers,
+ Ext_native_divs,
+ Ext_line_blocks,
+ Ext_native_spans]
+getDefaultExtensions "html4" = getDefaultExtensions "html"
+getDefaultExtensions "html5" = getDefaultExtensions "html"
+getDefaultExtensions "epub" = extensionsFromList
+ [Ext_raw_html,
+ Ext_native_divs,
+ Ext_native_spans,
+ Ext_epub_html_exts]
+getDefaultExtensions "epub2" = getDefaultExtensions "epub"
+getDefaultExtensions "epub3" = getDefaultExtensions "epub"
+getDefaultExtensions "latex" = extensionsFromList
+ [Ext_smart,
+ Ext_latex_macros,
+ Ext_auto_identifiers]
+getDefaultExtensions "context" = extensionsFromList
+ [Ext_smart,
+ Ext_auto_identifiers]
+getDefaultExtensions "textile" = extensionsFromList
+ [Ext_old_dashes,
+ Ext_smart,
+ Ext_raw_html,
+ Ext_auto_identifiers]
+getDefaultExtensions "opml" = pandocExtensions -- affects notes
+getDefaultExtensions _ = extensionsFromList
+ [Ext_auto_identifiers]
+
+-- | Parse a format-specifying string into a markup format and a function that
+-- takes Extensions and enables and disables extensions as defined in the format
+-- spec.
+parseFormatSpec :: String
+ -> Either ParseError (String, Extensions -> Extensions)
+parseFormatSpec = parse formatSpec ""
+ where formatSpec = do
+ name <- formatName
+ extMods <- many extMod
+ return (name, \x -> foldl (flip ($)) x extMods)
+ formatName = many1 $ noneOf "-+"
+ extMod = do
+ polarity <- oneOf "-+"
+ name <- many $ noneOf "-+"
+ ext <- case safeRead ("Ext_" ++ name) of
+ Just n -> return n
+ Nothing
+ | name == "lhs" -> return Ext_literate_haskell
+ | otherwise -> fail $ "Unknown extension: " ++ name
+ return $ case polarity of
+ '-' -> disableExtension ext
+ _ -> enableExtension ext
+
+$(deriveJSON defaultOptions ''Extension)
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
new file mode 100644
index 000000000..e2a3c3e16
--- /dev/null
+++ b/src/Text/Pandoc/Filter.hs
@@ -0,0 +1,60 @@
+{-
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE TemplateHaskell #-}
+
+{- |
+ Module : Text.Pandoc.Filter
+ Copyright : Copyright (C) 2006-2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Programmatically modifications of pandoc documents.
+-}
+module Text.Pandoc.Filter
+ ( Filter (..)
+ , applyFilters
+ ) where
+
+import Data.Aeson (defaultOptions)
+import Data.Aeson.TH (deriveJSON)
+import Data.Foldable (foldrM)
+import Text.Pandoc.Class (PandocIO)
+import Text.Pandoc.Definition (Pandoc)
+import Text.Pandoc.Options (ReaderOptions)
+import qualified Text.Pandoc.Filter.JSON as JSONFilter
+import qualified Text.Pandoc.Filter.Lua as LuaFilter
+
+data Filter = LuaFilter FilePath
+ | JSONFilter FilePath
+ deriving (Show)
+
+applyFilters :: ReaderOptions
+ -> [Filter]
+ -> [String]
+ -> Pandoc
+ -> PandocIO Pandoc
+applyFilters ropts filters args d =
+ foldrM ($) d $ map applyFilter filters
+ where
+ applyFilter (JSONFilter f) = JSONFilter.apply ropts args f
+ applyFilter (LuaFilter f) = LuaFilter.apply ropts args f
+
+$(deriveJSON defaultOptions ''Filter)
diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs
new file mode 100644
index 000000000..5772c2c41
--- /dev/null
+++ b/src/Text/Pandoc/Filter/JSON.hs
@@ -0,0 +1,97 @@
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Filter
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Programmatically modifications of pandoc documents via JSON filters.
+-}
+module Text.Pandoc.Filter.JSON (apply) where
+
+import Control.Monad (unless, when)
+import Control.Monad.Trans (MonadIO (liftIO))
+import Data.Aeson (eitherDecode', encode)
+import Data.Char (toLower)
+import Data.Maybe (isNothing)
+import System.Directory (executable, doesFileExist, findExecutable,
+ getPermissions)
+import System.Environment (getEnvironment)
+import System.Exit (ExitCode (..))
+import System.FilePath ((</>), takeExtension)
+import Text.Pandoc.Class (PandocIO)
+import Text.Pandoc.Error (PandocError (PandocFilterError))
+import Text.Pandoc.Definition (Pandoc)
+import Text.Pandoc.Filter.Path (expandFilterPath)
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Process (pipeProcess)
+import Text.Pandoc.Shared (pandocVersion)
+import qualified Control.Exception as E
+import qualified Text.Pandoc.UTF8 as UTF8
+
+apply :: ReaderOptions
+ -> [String]
+ -> FilePath
+ -> Pandoc
+ -> PandocIO Pandoc
+apply ropts args f d = do
+ f' <- expandFilterPath f
+ liftIO $ externalFilter ropts f' args d
+
+externalFilter :: MonadIO m
+ => ReaderOptions -> FilePath -> [String] -> Pandoc -> m Pandoc
+externalFilter ropts f args' d = liftIO $ do
+ exists <- doesFileExist f
+ isExecutable <- if exists
+ then executable <$> getPermissions f
+ else return True
+ let (f', args'') = if exists
+ then case map toLower (takeExtension f) of
+ _ | isExecutable -> ("." </> f, args')
+ ".py" -> ("python", f:args')
+ ".hs" -> ("runhaskell", f:args')
+ ".pl" -> ("perl", f:args')
+ ".rb" -> ("ruby", f:args')
+ ".php" -> ("php", f:args')
+ ".js" -> ("node", f:args')
+ ".r" -> ("Rscript", f:args')
+ _ -> (f, args')
+ else (f, args')
+ unless (exists && isExecutable) $ do
+ mbExe <- findExecutable f'
+ when (isNothing mbExe) $
+ E.throwIO $ PandocFilterError f ("Could not find executable " ++ f')
+ env <- getEnvironment
+ let env' = Just
+ ( ("PANDOC_VERSION", pandocVersion)
+ : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts))
+ : env )
+ (exitcode, outbs) <- E.handle filterException $
+ pipeProcess env' f' args'' $ encode d
+ case exitcode of
+ ExitSuccess -> either (E.throwIO . PandocFilterError f)
+ return $ eitherDecode' outbs
+ ExitFailure ec -> E.throwIO $ PandocFilterError f
+ ("Filter returned error status " ++ show ec)
+ where filterException :: E.SomeException -> IO a
+ filterException e = E.throwIO $ PandocFilterError f (show e)
diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs
new file mode 100644
index 000000000..597a31cbc
--- /dev/null
+++ b/src/Text/Pandoc/Filter/Lua.hs
@@ -0,0 +1,53 @@
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Filter.Lua
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Apply Lua filters to modify a pandoc documents programmatically.
+-}
+module Text.Pandoc.Filter.Lua (apply) where
+
+import Control.Exception (throw)
+import Text.Pandoc.Class (PandocIO)
+import Text.Pandoc.Definition (Pandoc)
+import Text.Pandoc.Error (PandocError (PandocFilterError))
+import Text.Pandoc.Filter.Path (expandFilterPath)
+import Text.Pandoc.Lua (LuaException (..), runLuaFilter)
+import Text.Pandoc.Options (ReaderOptions)
+
+apply :: ReaderOptions
+ -> [String]
+ -> FilePath
+ -> Pandoc
+ -> PandocIO Pandoc
+apply ropts args f d = do
+ f' <- expandFilterPath f
+ let format = case args of
+ (x:_) -> x
+ _ -> error "Format not supplied for lua filter"
+ res <- runLuaFilter ropts f' format d
+ case res of
+ Right x -> return x
+ Left (LuaException s) -> throw (PandocFilterError f s)
diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs
new file mode 100644
index 000000000..8074bcbb7
--- /dev/null
+++ b/src/Text/Pandoc/Filter/Path.hs
@@ -0,0 +1,53 @@
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Filter.Path
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Expand paths of filters, searching the data directory.
+-}
+module Text.Pandoc.Filter.Path
+ ( expandFilterPath
+ ) where
+
+import Text.Pandoc.Class (PandocMonad, fileExists, getUserDataDir)
+import System.FilePath ((</>), isRelative)
+
+ -- First we check to see if a filter is found. If not, and if it's
+ -- not an absolute path, we check to see whether it's in `userdir/filters`.
+ -- If not, we leave it unchanged.
+expandFilterPath :: PandocMonad m => FilePath -> m FilePath
+expandFilterPath fp = do
+ mbDatadir <- getUserDataDir
+ fpExists <- fileExists fp
+ if fpExists
+ then return fp
+ else case mbDatadir of
+ Just datadir | isRelative fp -> do
+ let filterPath = datadir </> "filters" </> fp
+ filterPathExists <- fileExists filterPath
+ if filterPathExists
+ then return filterPath
+ else return fp
+ _ -> return fp
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 896682389..113727750 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Highlighting
- Copyright : Copyright (C) 2008-2016 John MacFarlane
+ Copyright : Copyright (C) 2008-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Exports functions for syntax highlighting.
-}
-module Text.Pandoc.Highlighting ( languages
+module Text.Pandoc.Highlighting ( highlightingStyles
+ , languages
, languagesByExtension
, highlight
, formatLaTeXInline
@@ -48,14 +49,25 @@ module Text.Pandoc.Highlighting ( languages
, fromListingsLanguage
, toListingsLanguage
) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared (safeRead)
-import Skylighting
-import Data.Maybe (fromMaybe)
+import Control.Monad
import Data.Char (toLower)
import qualified Data.Map as M
-import Control.Monad
+import Data.Maybe (fromMaybe)
import qualified Data.Text as T
+import Skylighting
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
+
+highlightingStyles :: [(String, Style)]
+highlightingStyles =
+ [("pygments", pygments),
+ ("tango", tango),
+ ("espresso", espresso),
+ ("zenburn", zenburn),
+ ("kate", kate),
+ ("monochrome", monochrome),
+ ("breezedark", breezeDark),
+ ("haddock", haddock)]
languages :: [String]
languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap]
@@ -64,34 +76,39 @@ languagesByExtension :: String -> [String]
languagesByExtension ext =
[T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext]
-highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
+highlight :: SyntaxMap
+ -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
-> Attr -- ^ Attributes of the CodeBlock
-> String -- ^ Raw contents of the CodeBlock
- -> Maybe a -- ^ Maybe the formatted result
-highlight formatter (_, classes, keyvals) rawCode =
+ -> Either String a
+highlight syntaxmap formatter (ident, classes, keyvals) rawCode =
let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals))
fmtOpts = defaultFormatOpts{
startNumber = firstNum,
+ lineAnchors = any (`elem`
+ ["line-anchors", "lineAnchors"]) classes,
numberLines = any (`elem`
- ["number","numberLines", "number-lines"]) classes }
- tokenizeOpts = TokenizerConfig{ syntaxMap = defaultSyntaxMap
+ ["number","numberLines", "number-lines"]) classes,
+ lineIdPrefix = if null ident
+ then mempty
+ else T.pack (ident ++ "-") }
+ tokenizeOpts = TokenizerConfig{ syntaxMap = syntaxmap
, traceOutput = False }
classes' = map T.pack classes
rawCode' = T.pack rawCode
- in case msum (map (\l -> lookupSyntax l defaultSyntaxMap) classes') of
+ in case msum (map (`lookupSyntax` syntaxmap) classes') of
Nothing
- | numberLines fmtOpts -> Just
+ | numberLines fmtOpts -> Right
$ formatter fmtOpts{ codeClasses = [],
containerClasses = classes' }
- $ map (\ln -> [(NormalTok, ln)]) $ T.lines rawCode'
- | otherwise -> Nothing
+ $ map (\ln -> [(NormalTok, ln)])
+ $ T.lines rawCode'
+ | otherwise -> Left ""
Just syntax ->
- case tokenize tokenizeOpts syntax rawCode' of
- Right slines -> Just $
- formatter fmtOpts{ codeClasses =
- [T.toLower (sShortname syntax)],
- containerClasses = classes' } slines
- Left _ -> Nothing
+ formatter fmtOpts{ codeClasses =
+ [T.toLower (sShortname syntax)],
+ containerClasses = classes' } <$>
+ tokenize tokenizeOpts syntax rawCode'
-- Functions for correlating latex listings package's language names
-- with skylighting language names:
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e46c91eda..4c76aac13 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
{-# OPTIONS_GHC -fno-warn-type-defaults #-}
{-
- Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+ Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@
{- |
Module : Text.Pandoc.ImageSize
-Copyright : Copyright (C) 2011-2016 John MacFarlane
+Copyright : Copyright (C) 2011-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -38,8 +38,12 @@ module Text.Pandoc.ImageSize ( ImageType(..)
, Dimension(..)
, Direction(..)
, dimension
+ , lengthToDim
+ , scaleDimension
, inInch
+ , inPixel
, inPoints
+ , inEm
, numUnit
, showInInch
, showInPixel
@@ -53,11 +57,13 @@ import Control.Monad
import Data.Bits
import Data.Binary
import Data.Binary.Get
-import Text.Pandoc.Shared (safeRead, hush)
+import Text.Pandoc.Shared (safeRead)
import Data.Default (Default)
import Numeric (showFFloat)
import Text.Pandoc.Definition
import Text.Pandoc.Options
+import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Text.XML.Light as Xml
import qualified Data.Map as M
import Control.Monad.Except
import Data.Maybe (fromMaybe)
@@ -65,7 +71,7 @@ import Data.Maybe (fromMaybe)
-- quick and dirty functions to get image sizes
-- algorithms borrowed from wwwis.pl
-data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show
+data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show
data Direction = Width | Height
instance Show Direction where
show Width = "width"
@@ -73,13 +79,19 @@ instance Show Direction where
data Dimension = Pixel Integer
| Centimeter Double
+ | Millimeter Double
| Inch Double
| Percent Double
+ | Em Double
+ deriving Eq
+
instance Show Dimension where
show (Pixel a) = show a ++ "px"
show (Centimeter a) = showFl a ++ "cm"
+ show (Millimeter a) = showFl a ++ "mm"
show (Inch a) = showFl a ++ "in"
show (Percent a) = show a ++ "%"
+ show (Em a) = showFl a ++ "em"
data ImageSize = ImageSize{
pxX :: Integer
@@ -91,7 +103,13 @@ instance Default ImageSize where
def = ImageSize 300 200 72 72
showFl :: (RealFloat a) => a -> String
-showFl a = showFFloat (Just 5) a ""
+showFl a = removeExtra0s $ showFFloat (Just 5) a ""
+
+removeExtra0s :: String -> String
+removeExtra0s s =
+ case dropWhile (=='0') $ reverse s of
+ '.':xs -> reverse xs
+ xs -> reverse xs
imageType :: ByteString -> Maybe ImageType
imageType img = case B.take 4 img of
@@ -100,19 +118,31 @@ imageType img = case B.take 4 img of
"\xff\xd8\xff\xe0" -> return Jpeg -- JFIF
"\xff\xd8\xff\xe1" -> return Jpeg -- Exif
"%PDF" -> return Pdf
+ "<svg" -> return Svg
+ "<?xm"
+ | findSvgTag img
+ -> return Svg
"%!PS"
- | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
+ | B.take 4 (B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
-> return Eps
+ "\x01\x00\x00\x00"
+ | B.take 4 (B.drop 40 img) == " EMF"
+ -> return Emf
_ -> mzero
-imageSize :: ByteString -> Either String ImageSize
-imageSize img =
+findSvgTag :: ByteString -> Bool
+findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img
+
+imageSize :: WriterOptions -> ByteString -> Either String ImageSize
+imageSize opts img =
case imageType img of
Just Png -> mbToEither "could not determine PNG size" $ pngSize img
Just Gif -> mbToEither "could not determine GIF size" $ gifSize img
Just Jpeg -> jpegSize img
+ Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img
Just Eps -> mbToEither "could not determine EPS size" $ epsSize img
- Just Pdf -> Left "could not determine PDF size" -- TODO
+ Just Pdf -> mbToEither "could not determine PDF size" $ pdfSize img
+ Just Emf -> mbToEither "could not determine EMF size" $ emfSize img
Nothing -> Left "could not determine image type"
where mbToEither msg Nothing = Left msg
mbToEither _ (Just x) = Right x
@@ -145,7 +175,7 @@ desiredSizeInPoints opts attr s =
(Nothing, Nothing) -> sizeInPoints s
where
ratio = fromIntegral (pxX s) / fromIntegral (pxY s)
- getDim dir = case (dimension dir attr) of
+ getDim dir = case dimension dir attr of
Just (Percent _) -> Nothing
Just dim -> Just $ inPoints opts dim
Nothing -> Nothing
@@ -153,13 +183,30 @@ desiredSizeInPoints opts attr s =
inPoints :: WriterOptions -> Dimension -> Double
inPoints opts dim = 72 * inInch opts dim
+inEm :: WriterOptions -> Dimension -> Double
+inEm opts dim = (64/11) * inInch opts dim
+
inInch :: WriterOptions -> Dimension -> Double
inInch opts dim =
case dim of
- (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts)
+ (Pixel a) -> fromIntegral a / fromIntegral (writerDpi opts)
(Centimeter a) -> a * 0.3937007874
+ (Millimeter a) -> a * 0.03937007874
(Inch a) -> a
(Percent _) -> 0
+ (Em a) -> a * (11/64)
+
+inPixel :: WriterOptions -> Dimension -> Integer
+inPixel opts dim =
+ case dim of
+ (Pixel a) -> a
+ (Centimeter a) -> floor $ dpi * a * 0.3937007874 :: Integer
+ (Millimeter a) -> floor $ dpi * a * 0.03937007874 :: Integer
+ (Inch a) -> floor $ dpi * a :: Integer
+ (Percent _) -> 0
+ (Em a) -> floor $ dpi * a * (11/64) :: Integer
+ where
+ dpi = fromIntegral $ writerDpi opts
-- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000".
-- Note: Dimensions in percentages are converted to the empty string.
@@ -170,14 +217,8 @@ showInInch opts dim = showFl $ inInch opts dim
-- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600".
-- Note: Dimensions in percentages are converted to the empty string.
showInPixel :: WriterOptions -> Dimension -> String
-showInPixel opts dim =
- case dim of
- (Pixel a) -> show a
- (Centimeter a) -> show (floor $ dpi * a * 0.3937007874 :: Int)
- (Inch a) -> show (floor $ dpi * a :: Int)
- (Percent _) -> ""
- where
- dpi = fromIntegral $ writerDpi opts
+showInPixel _ (Percent _) = ""
+showInPixel opts dim = show $ inPixel opts dim
-- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm")
numUnit :: String -> Maybe (Double, String)
@@ -187,6 +228,17 @@ numUnit s =
Just n -> Just (n, unit)
Nothing -> Nothing
+-- | Scale a dimension by a factor.
+scaleDimension :: Double -> Dimension -> Dimension
+scaleDimension factor dim =
+ case dim of
+ Pixel x -> Pixel (round $ factor * fromIntegral x)
+ Centimeter x -> Centimeter (factor * x)
+ Millimeter x -> Millimeter (factor * x)
+ Inch x -> Inch (factor * x)
+ Percent x -> Percent (factor * x)
+ Em x -> Em (factor * x)
+
-- | Read a Dimension from an Attr attribute.
-- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc.
dimension :: Direction -> Attr -> Maybe Dimension
@@ -195,20 +247,21 @@ dimension dir (_, _, kvs) =
Width -> extractDim "width"
Height -> extractDim "height"
where
- extractDim key =
- case lookup key kvs of
- Just str ->
- case numUnit str of
- Just (num, unit) -> toDim num unit
- Nothing -> Nothing
- Nothing -> Nothing
+ extractDim key = lookup key kvs >>= lengthToDim
+
+lengthToDim :: String -> Maybe Dimension
+lengthToDim s = numUnit s >>= uncurry toDim
+ where
toDim a "cm" = Just $ Centimeter a
- toDim a "mm" = Just $ Centimeter (a / 10)
+ toDim a "mm" = Just $ Millimeter a
toDim a "in" = Just $ Inch a
toDim a "inch" = Just $ Inch a
toDim a "%" = Just $ Percent a
toDim a "px" = Just $ Pixel (floor a::Integer)
toDim a "" = Just $ Pixel (floor a::Integer)
+ toDim a "pt" = Just $ Inch (a / 72)
+ toDim a "pc" = Just $ Inch (a / 6)
+ toDim a "em" = Just $ Em a
toDim _ _ = Nothing
epsSize :: ByteString -> Maybe ImageSize
@@ -218,7 +271,7 @@ epsSize img = do
case ls' of
[] -> mzero
(x:_) -> case B.words x of
- (_:_:_:ux:uy:[]) -> do
+ [_, _, _, ux, uy] -> do
ux' <- safeRead $ B.unpack ux
uy' <- safeRead $ B.unpack uy
return ImageSize{
@@ -228,6 +281,29 @@ epsSize img = do
, dpiY = 72 }
_ -> mzero
+pdfSize :: ByteString -> Maybe ImageSize
+pdfSize img =
+ case dropWhile (\l -> not (l == "stream" ||
+ "/MediaBox" `B.isPrefixOf` l)) (B.lines img) of
+ (x:_)
+ | "/MediaBox" `B.isPrefixOf` x
+ -> case B.words . B.takeWhile (/=']')
+ . B.drop 1
+ . B.dropWhile (/='[')
+ $ x of
+ [x1, y1, x2, y2] -> do
+ x1' <- safeRead $ B.unpack x1
+ x2' <- safeRead $ B.unpack x2
+ y1' <- safeRead $ B.unpack y1
+ y2' <- safeRead $ B.unpack y2
+ return ImageSize{
+ pxX = x2' - x1'
+ , pxY = y2' - y1'
+ , dpiX = 72
+ , dpiY = 72 }
+ _ -> mzero
+ _ -> mzero
+
pngSize :: ByteString -> Maybe ImageSize
pngSize img = do
let (h, rest) = B.splitAt 8 img
@@ -236,27 +312,26 @@ pngSize img = do
let (i, rest') = B.splitAt 4 $ B.drop 4 rest
guard $ i == "MHDR" || i == "IHDR"
let (sizes, rest'') = B.splitAt 8 rest'
- (x,y) <- case map fromIntegral $ unpack $ sizes of
+ (x,y) <- case map fromIntegral $unpack sizes of
([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
- ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
- (shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
- _ -> (hush . Left) "PNG parse error"
+ (shift w1 24 + shift w2 16 + shift w3 8 + w4,
+ shift h1 24 + shift h2 16 + shift h3 8 + h4)
+ _ -> Nothing -- "PNG parse error"
let (dpix, dpiy) = findpHYs rest''
- return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
+ return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
findpHYs :: ByteString -> (Integer, Integer)
-findpHYs x =
- if B.null x || "IDAT" `B.isPrefixOf` x
- then (72,72) -- default, no pHYs
- else if "pHYs" `B.isPrefixOf` x
- then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral
- $ unpack $ B.take 9 $ B.drop 4 x
- factor = if u == 1 -- dots per meter
- then \z -> z * 254 `div` 10000
- else const 72
- in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4,
- factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 )
- else findpHYs $ B.drop 1 x -- read another byte
+findpHYs x
+ | B.null x || "IDAT" `B.isPrefixOf` x = (72,72)
+ | "pHYs" `B.isPrefixOf` x =
+ let [x1,x2,x3,x4,y1,y2,y3,y4,u] =
+ map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x
+ factor = if u == 1 -- dots per meter
+ then \z -> z * 254 `div` 10000
+ else const 72
+ in ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4,
+ factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 )
+ | otherwise = findpHYs $ B.drop 1 x -- read another byte
gifSize :: ByteString -> Maybe ImageSize
gifSize img = do
@@ -269,7 +344,55 @@ gifSize img = do
dpiX = 72,
dpiY = 72
}
- _ -> (hush . Left) "GIF parse error"
+ _ -> Nothing -- "GIF parse error"
+
+svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
+svgSize opts img = do
+ doc <- Xml.parseXMLDoc $ UTF8.toString img
+ let dpi = fromIntegral $ writerDpi opts
+ let dirToInt dir = do
+ dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim
+ return $ inPixel opts dim
+ w <- dirToInt "width"
+ h <- dirToInt "height"
+ return ImageSize {
+ pxX = w
+ , pxY = h
+ , dpiX = dpi
+ , dpiY = dpi
+ }
+
+emfSize :: ByteString -> Maybe ImageSize
+emfSize img =
+ let
+ parseheader = runGetOrFail $ do
+ skip 0x18 -- 0x00
+ frameL <- getWord32le -- 0x18 measured in 1/100 of a millimetre
+ frameT <- getWord32le -- 0x1C
+ frameR <- getWord32le -- 0x20
+ frameB <- getWord32le -- 0x24
+ skip 0x20 -- 0x28
+ deviceX <- getWord32le -- 0x48 pixels of reference device
+ deviceY <- getWord32le -- 0x4C
+ mmX <- getWord32le -- 0x50 real mm of reference device (always 320*240?)
+ mmY <- getWord32le -- 0x58
+ -- end of header
+ let
+ w = (deviceX * (frameR - frameL)) `quot` (mmX * 100)
+ h = (deviceY * (frameB - frameT)) `quot` (mmY * 100)
+ dpiW = (deviceX * 254) `quot` (mmX * 10)
+ dpiH = (deviceY * 254) `quot` (mmY * 10)
+ return $ ImageSize
+ { pxX = fromIntegral w
+ , pxY = fromIntegral h
+ , dpiX = fromIntegral dpiW
+ , dpiY = fromIntegral dpiH
+ }
+ in
+ case parseheader . BL.fromStrict $ img of
+ Left _ -> Nothing
+ Right (_, _, size) -> Just size
+
jpegSize :: ByteString -> Either String ImageSize
jpegSize img =
@@ -284,16 +407,16 @@ jpegSize img =
jfifSize :: ByteString -> Either String ImageSize
jfifSize rest =
let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
- $ unpack $ B.take 5 $ B.drop 9 $ rest
+ $ unpack $ B.take 5 $B.drop 9 rest
factor = case dpiDensity of
1 -> id
- 2 -> \x -> (x * 254 `div` 10)
+ 2 -> \x -> x * 254 `div` 10
_ -> const 72
dpix = factor (shift dpix1 8 + dpix2)
dpiy = factor (shift dpiy1 8 + dpiy2)
in case findJfifSize rest of
Left msg -> Left msg
- Right (w,h) -> Right $ ImageSize { pxX = w
+ Right (w,h) ->Right ImageSize { pxX = w
, pxY = h
, dpiX = dpix
, dpiY = dpiy }
@@ -327,7 +450,7 @@ runGet' p bl =
exifSize :: ByteString -> Either String ImageSize
-exifSize bs = runGet' header $ bl
+exifSize bs =runGet' header bl
where bl = BL.fromChunks [bs]
header = runExceptT $ exifHeader bl
-- NOTE: It would be nicer to do
@@ -397,14 +520,13 @@ exifHeader hdr = do
Left msg -> throwError msg
Right x -> return x
return (tag, payload)
- entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
+ entries <- replicateM (fromIntegral numentries) ifdEntry
subentries <- case lookup ExifOffset entries of
Just (UnsignedLong offset') -> do
pos <- lift bytesRead
lift $ skip (fromIntegral offset' - (fromIntegral pos - 8))
numsubentries <- lift getWord16
- sequence $
- replicate (fromIntegral numsubentries) ifdEntry
+ replicateM (fromIntegral numsubentries) ifdEntry
_ -> return []
let allentries = entries ++ subentries
(wdth, hght) <- case (lookup ExifImageWidth allentries,
@@ -415,13 +537,13 @@ exifHeader hdr = do
-- we return a default width and height when
-- the exif header doesn't contain these
let resfactor = case lookup ResolutionUnit allentries of
- Just (UnsignedShort 1) -> (100 / 254)
+ Just (UnsignedShort 1) -> 100 / 254
_ -> 1
let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
$ lookup XResolution allentries
let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
$ lookup YResolution allentries
- return $ ImageSize{
+ return ImageSize{
pxX = wdth
, pxY = hght
, dpiX = xres
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
new file mode 100644
index 000000000..b22c08467
--- /dev/null
+++ b/src/Text/Pandoc/Logging.hs
@@ -0,0 +1,342 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2016-17 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Logging
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This module provides data types and functions for warnings
+and info messages.
+
+-}
+module Text.Pandoc.Logging (
+ Verbosity(..)
+ , LogMessage(..)
+ , encodeLogMessages
+ , showLogMessage
+ , messageVerbosity
+ ) where
+
+import Control.Monad (mzero)
+import Data.Aeson
+import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty',
+ keyOrder)
+import qualified Data.ByteString.Lazy as BL
+import Data.Data (Data, toConstr)
+import Data.List (isSuffixOf)
+import qualified Data.Text as Text
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import Text.Pandoc.Definition
+import Text.Parsec.Pos
+
+-- | Verbosity level.
+data Verbosity = ERROR | WARNING | INFO
+ deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic)
+
+instance ToJSON Verbosity where
+ toJSON x = toJSON (show x)
+instance FromJSON Verbosity where
+ parseJSON (String t) =
+ case t of
+ "ERROR" -> return ERROR
+ "WARNING" -> return WARNING
+ "INFO" -> return INFO
+ _ -> mzero
+ parseJSON _ = mzero
+
+data LogMessage =
+ SkippedContent String SourcePos
+ | CouldNotParseYamlMetadata String SourcePos
+ | DuplicateLinkReference String SourcePos
+ | DuplicateNoteReference String SourcePos
+ | NoteDefinedButNotUsed String SourcePos
+ | DuplicateIdentifier String SourcePos
+ | ReferenceNotFound String SourcePos
+ | CircularReference String SourcePos
+ | UndefinedToggle String SourcePos
+ | ParsingUnescaped String SourcePos
+ | CouldNotLoadIncludeFile String SourcePos
+ | MacroAlreadyDefined String SourcePos
+ | InlineNotRendered Inline
+ | BlockNotRendered Block
+ | DocxParserWarning String
+ | CouldNotFetchResource String String
+ | CouldNotDetermineImageSize String String
+ | CouldNotConvertImage String String
+ | CouldNotDetermineMimeType String
+ | CouldNotConvertTeXMath String String
+ | CouldNotParseCSS String
+ | Fetching String
+ | Extracting String
+ | NoTitleElement String
+ | NoLangSpecified
+ | InvalidLang String
+ | CouldNotHighlight String
+ | MissingCharacter String
+ | Deprecated String String
+ | NoTranslation String
+ | CouldNotLoadTranslations String String
+ deriving (Show, Eq, Data, Ord, Typeable, Generic)
+
+instance ToJSON LogMessage where
+ toJSON x = object $
+ "verbosity" .= toJSON (messageVerbosity x) :
+ "type" .= toJSON (show $ toConstr x) :
+ case x of
+ SkippedContent s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= sourceLine pos,
+ "column" .= sourceColumn pos]
+ CouldNotParseYamlMetadata s pos ->
+ ["message" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ DuplicateLinkReference s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ NoteDefinedButNotUsed s pos ->
+ ["key" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ DuplicateNoteReference s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ DuplicateIdentifier s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ ReferenceNotFound s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ CircularReference s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ UndefinedToggle s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ ParsingUnescaped s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ CouldNotLoadIncludeFile fp pos ->
+ ["path" .= Text.pack fp,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ MacroAlreadyDefined name pos ->
+ ["name" .= Text.pack name,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ InlineNotRendered il ->
+ ["contents" .= toJSON il]
+ BlockNotRendered bl ->
+ ["contents" .= toJSON bl]
+ DocxParserWarning s ->
+ ["contents" .= Text.pack s]
+ CouldNotFetchResource fp s ->
+ ["path" .= Text.pack fp,
+ "message" .= Text.pack s]
+ CouldNotDetermineImageSize fp s ->
+ ["path" .= Text.pack fp,
+ "message" .= Text.pack s]
+ CouldNotConvertImage fp s ->
+ ["path" .= Text.pack fp,
+ "message" .= Text.pack s]
+ CouldNotDetermineMimeType fp ->
+ ["path" .= Text.pack fp]
+ CouldNotConvertTeXMath s msg ->
+ ["contents" .= Text.pack s,
+ "message" .= Text.pack msg]
+ CouldNotParseCSS msg ->
+ ["message" .= Text.pack msg]
+ Fetching fp ->
+ ["path" .= Text.pack fp]
+ Extracting fp ->
+ ["path" .= Text.pack fp]
+ NoTitleElement fallback ->
+ ["fallback" .= Text.pack fallback]
+ NoLangSpecified -> []
+ InvalidLang s ->
+ ["lang" .= Text.pack s]
+ CouldNotHighlight msg ->
+ ["message" .= Text.pack msg]
+ MissingCharacter msg ->
+ ["message" .= Text.pack msg]
+ Deprecated thing msg ->
+ ["thing" .= Text.pack thing,
+ "message" .= Text.pack msg]
+ NoTranslation term ->
+ ["term" .= Text.pack term]
+ CouldNotLoadTranslations lang msg ->
+ ["lang" .= Text.pack lang,
+ "message" .= Text.pack msg]
+
+
+showPos :: SourcePos -> String
+showPos pos = sn ++ "line " ++
+ show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
+ where sn = if sourceName pos == "source" || sourceName pos == ""
+ then ""
+ else sourceName pos ++ " "
+
+encodeLogMessages :: [LogMessage] -> BL.ByteString
+encodeLogMessages ms =
+ encodePretty' defConfig{ confCompare =
+ keyOrder [ "type", "verbosity", "contents", "message", "path",
+ "source", "line", "column" ] } ms
+
+showLogMessage :: LogMessage -> String
+showLogMessage msg =
+ case msg of
+ SkippedContent s pos ->
+ "Skipped '" ++ s ++ "' at " ++ showPos pos
+ CouldNotParseYamlMetadata s pos ->
+ "Could not parse YAML metadata at " ++ showPos pos ++
+ if null s then "" else ": " ++ s
+ DuplicateLinkReference s pos ->
+ "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos
+ DuplicateNoteReference s pos ->
+ "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos
+ NoteDefinedButNotUsed s pos ->
+ "Note with key '" ++ s ++ "' defined at " ++ showPos pos ++
+ " but not used."
+ DuplicateIdentifier s pos ->
+ "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos
+ ReferenceNotFound s pos ->
+ "Reference not found for '" ++ s ++ "' at " ++ showPos pos
+ CircularReference s pos ->
+ "Circular reference '" ++ s ++ "' at " ++ showPos pos
+ UndefinedToggle s pos ->
+ "Undefined toggle '" ++ s ++ "' at " ++ showPos pos
+ ParsingUnescaped s pos ->
+ "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos
+ CouldNotLoadIncludeFile fp pos ->
+ "Could not load include file '" ++ fp ++ "' at " ++ showPos pos
+ MacroAlreadyDefined name pos ->
+ "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos
+ InlineNotRendered il ->
+ "Not rendering " ++ show il
+ BlockNotRendered bl ->
+ "Not rendering " ++ show bl
+ DocxParserWarning s ->
+ "Docx parser warning: " ++ s
+ CouldNotFetchResource fp s ->
+ "Could not fetch resource '" ++ fp ++ "'" ++
+ if null s then "" else ": " ++ s
+ CouldNotDetermineImageSize fp s ->
+ "Could not determine image size for '" ++ fp ++ "'" ++
+ if null s then "" else ": " ++ s
+ CouldNotConvertImage fp s ->
+ "Could not convert image '" ++ fp ++ "'" ++
+ if null s then "" else ": " ++ s
+ CouldNotDetermineMimeType fp ->
+ "Could not determine mime type for '" ++ fp ++ "'"
+ CouldNotConvertTeXMath s m ->
+ "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++
+ if null m then "" else ':' : '\n' : m
+ CouldNotParseCSS m ->
+ "Could not parse CSS" ++ if null m then "" else ':' : '\n' : m
+ Fetching fp ->
+ "Fetching " ++ fp ++ "..."
+ Extracting fp ->
+ "Extracting " ++ fp ++ "..."
+ NoTitleElement fallback ->
+ "This document format requires a nonempty <title> element.\n" ++
+ "Please specify either 'title' or 'pagetitle' in the metadata.\n" ++
+ "Falling back to '" ++ fallback ++ "'"
+ NoLangSpecified ->
+ "No value for 'lang' was specified in the metadata.\n" ++
+ "It is recommended that lang be specified for this format."
+ InvalidLang s ->
+ "Invalid 'lang' value '" ++ s ++ "'.\n" ++
+ "Use an IETF language tag like 'en-US'."
+ CouldNotHighlight m ->
+ "Could not highlight code block:\n" ++ m
+ MissingCharacter m ->
+ "Missing character: " ++ m
+ Deprecated t m ->
+ "Deprecated: " ++ t ++
+ if null m
+ then ""
+ else ". " ++ m
+ NoTranslation t ->
+ "The term " ++ t ++ " has no translation defined."
+ CouldNotLoadTranslations lang m ->
+ "Could not load translations for " ++ lang ++
+ if null m then "" else '\n' : m
+
+messageVerbosity:: LogMessage -> Verbosity
+messageVerbosity msg =
+ case msg of
+ SkippedContent{} -> INFO
+ CouldNotParseYamlMetadata{} -> WARNING
+ DuplicateLinkReference{} -> WARNING
+ DuplicateNoteReference{} -> WARNING
+ NoteDefinedButNotUsed{} -> WARNING
+ DuplicateIdentifier{} -> WARNING
+ ReferenceNotFound{} -> WARNING
+ CircularReference{} -> WARNING
+ UndefinedToggle{} -> WARNING
+ CouldNotLoadIncludeFile f _
+ | ".sty" `isSuffixOf` f -> INFO
+ | otherwise -> WARNING
+ MacroAlreadyDefined{} -> WARNING
+ ParsingUnescaped{} -> INFO
+ InlineNotRendered{} -> INFO
+ BlockNotRendered{} -> INFO
+ DocxParserWarning{} -> INFO
+ CouldNotFetchResource{} -> WARNING
+ CouldNotDetermineImageSize{} -> WARNING
+ CouldNotConvertImage{} -> WARNING
+ CouldNotDetermineMimeType{} -> WARNING
+ CouldNotConvertTeXMath{} -> WARNING
+ CouldNotParseCSS{} -> WARNING
+ Fetching{} -> INFO
+ Extracting{} -> INFO
+ NoTitleElement{} -> WARNING
+ NoLangSpecified -> INFO
+ InvalidLang{} -> WARNING
+ CouldNotHighlight{} -> WARNING
+ MissingCharacter{} -> WARNING
+ Deprecated{} -> WARNING
+ NoTranslation{} -> WARNING
+ CouldNotLoadTranslations{} -> WARNING
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
new file mode 100644
index 000000000..79955509d
--- /dev/null
+++ b/src/Text/Pandoc/Lua.hs
@@ -0,0 +1,83 @@
+{-
+Copyright © 2017–2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Lua
+ Copyright : Copyright © 2017–2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Running pandoc Lua filters.
+-}
+module Text.Pandoc.Lua
+ ( LuaException (..)
+ , runLuaFilter
+ , runPandocLua
+ ) where
+
+import Control.Monad ((>=>))
+import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..),
+ Status (OK), ToLuaStack (push))
+import Text.Pandoc.Class (PandocIO)
+import Text.Pandoc.Definition (Pandoc)
+import Text.Pandoc.Lua.Filter (LuaFilter, walkMWithLuaFilter)
+import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath)
+import Text.Pandoc.Lua.Util (popValue)
+import Text.Pandoc.Options (ReaderOptions)
+import qualified Foreign.Lua as Lua
+
+-- | Run the Lua filter in @filterPath@ for a transformation to target
+-- format @format@. Pandoc uses Lua init files to setup the Lua
+-- interpreter.
+runLuaFilter :: ReaderOptions -> FilePath -> String
+ -> Pandoc -> PandocIO (Either LuaException Pandoc)
+runLuaFilter ropts filterPath format doc =
+ runPandocLua (runLuaFilter' ropts filterPath format doc)
+
+runLuaFilter' :: ReaderOptions -> FilePath -> String
+ -> Pandoc -> Lua Pandoc
+runLuaFilter' ropts filterPath format pd = do
+ registerFormat
+ registerReaderOptions
+ registerScriptPath filterPath
+ top <- Lua.gettop
+ stat <- Lua.dofile filterPath
+ if stat /= OK
+ then do
+ luaErrMsg <- popValue
+ Lua.throwLuaError luaErrMsg
+ else do
+ newtop <- Lua.gettop
+ -- Use the returned filters, or the implicitly defined global filter if
+ -- nothing was returned.
+ luaFilters <- if newtop - top >= 1
+ then peek (-1)
+ else Lua.getglobal "_G" *> fmap (:[]) popValue
+ runAll luaFilters pd
+ where
+ registerFormat = do
+ push format
+ Lua.setglobal "FORMAT"
+
+ registerReaderOptions = do
+ push ropts
+ Lua.setglobal "PANDOC_READER_OPTIONS"
+
+runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc
+runAll = foldr ((>=>) . walkMWithLuaFilter) return
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
new file mode 100644
index 000000000..cc2b9d47e
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -0,0 +1,170 @@
+{-# LANGUAGE FlexibleContexts #-}
+
+module Text.Pandoc.Lua.Filter ( LuaFilterFunction
+ , LuaFilter
+ , tryFilter
+ , runFilterFunction
+ , walkMWithLuaFilter
+ , walkInlines
+ , walkBlocks
+ , blockElementNames
+ , inlineElementNames
+ ) where
+import Control.Monad (mplus, unless, when, (>=>))
+import Control.Monad.Catch (finally)
+import Text.Pandoc.Definition
+import Data.Foldable (foldrM)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Foreign.Lua as Lua
+import Foreign.Lua (FromLuaStack (peek), Lua, StackIndex,
+ Status (OK), ToLuaStack (push))
+import Text.Pandoc.Walk (walkM, Walkable)
+import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
+ showConstr, toConstr, tyconUQname)
+import Text.Pandoc.Lua.StackInstances()
+import Text.Pandoc.Lua.Util (typeCheck)
+
+type FunctionMap = Map String LuaFilterFunction
+
+newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int }
+
+instance ToLuaStack LuaFilterFunction where
+ push = pushFilterFunction
+
+instance FromLuaStack LuaFilterFunction where
+ peek = registerFilterFunction
+
+newtype LuaFilter = LuaFilter FunctionMap
+
+instance FromLuaStack LuaFilter where
+ peek idx =
+ let constrs = metaFilterName : pandocFilterNames
+ ++ blockElementNames
+ ++ inlineElementNames
+ fn c acc = do
+ Lua.getfield idx c
+ filterFn <- Lua.tryLua (peek (-1))
+ Lua.pop 1
+ return $ case filterFn of
+ Left _ -> acc
+ Right f -> (c, f) : acc
+ in LuaFilter . Map.fromList <$> foldrM fn [] constrs
+
+-- | Push the filter function to the top of the stack.
+pushFilterFunction :: LuaFilterFunction -> Lua ()
+pushFilterFunction lf =
+ -- The function is stored in a lua registry table, retrieve it from there.
+ Lua.rawgeti Lua.registryindex (functionIndex lf)
+
+registerFilterFunction :: StackIndex -> Lua LuaFilterFunction
+registerFilterFunction idx = do
+ isFn <- Lua.isfunction idx
+ unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx
+ Lua.pushvalue idx
+ refIdx <- Lua.ref Lua.registryindex
+ return $ LuaFilterFunction refIdx
+
+elementOrList :: FromLuaStack a => a -> Lua [a]
+elementOrList x = do
+ let topOfStack = Lua.stackTop
+ elementUnchanged <- Lua.isnil topOfStack
+ if elementUnchanged
+ then [x] <$ Lua.pop 1
+ else do
+ mbres <- Lua.peekEither topOfStack
+ case mbres of
+ Right res -> [res] <$ Lua.pop 1
+ Left _ -> do
+ typeCheck Lua.stackTop Lua.TypeTable
+ Lua.toList topOfStack `finally` Lua.pop 1
+
+-- | Try running a filter for the given element
+tryFilter :: (Data a, FromLuaStack a, ToLuaStack a)
+ => LuaFilter -> a -> Lua [a]
+tryFilter (LuaFilter fnMap) x =
+ let filterFnName = showConstr (toConstr x)
+ catchAllName = tyconUQname $ dataTypeName (dataTypeOf x)
+ in
+ case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of
+ Just fn -> runFilterFunction fn x *> elementOrList x
+ Nothing -> return [x]
+
+-- | Push a value to the stack via a lua filter function. The filter function is
+-- called with given element as argument and is expected to return an element.
+-- Alternatively, the function can return nothing or nil, in which case the
+-- element is left unchanged.
+runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua ()
+runFilterFunction lf x = do
+ pushFilterFunction lf
+ push x
+ z <- Lua.pcall 1 1 Nothing
+ when (z /= OK) $ do
+ let addPrefix = ("Error while running filter function: " ++)
+ Lua.throwTopMessageAsError' addPrefix
+
+walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc
+walkMWithLuaFilter f =
+ walkInlines f >=> walkBlocks f >=> walkMeta f >=> walkPandoc f
+
+mconcatMapM :: (Monad m, Functor m) => (a -> m [a]) -> [a] -> m [a]
+mconcatMapM f = fmap mconcat . mapM f
+
+hasOneOf :: LuaFilter -> [String] -> Bool
+hasOneOf (LuaFilter fnMap) = any (\k -> Map.member k fnMap)
+
+walkInlines :: Walkable [Inline] a => LuaFilter -> a -> Lua a
+walkInlines f =
+ if f `hasOneOf` inlineElementNames
+ then walkM (mconcatMapM (tryFilter f :: Inline -> Lua [Inline]))
+ else return
+
+walkBlocks :: Walkable [Block] a => LuaFilter -> a -> Lua a
+walkBlocks f =
+ if f `hasOneOf` blockElementNames
+ then walkM (mconcatMapM (tryFilter f :: Block -> Lua [Block]))
+ else return
+
+walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc
+walkMeta (LuaFilter fnMap) =
+ case Map.lookup "Meta" fnMap of
+ Just fn -> walkM (\(Pandoc meta blocks) -> do
+ meta' <- runFilterFunction fn meta *> singleElement meta
+ return $ Pandoc meta' blocks)
+ Nothing -> return
+
+walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
+walkPandoc (LuaFilter fnMap) =
+ case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+ Just fn -> \x -> runFilterFunction fn x *> singleElement x
+ Nothing -> return
+
+constructorsFor :: DataType -> [String]
+constructorsFor x = map show (dataTypeConstrs x)
+
+inlineElementNames :: [String]
+inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str []))
+
+blockElementNames :: [String]
+blockElementNames = "Block" : constructorsFor (dataTypeOf (Para []))
+
+metaFilterName :: String
+metaFilterName = "Meta"
+
+pandocFilterNames :: [String]
+pandocFilterNames = ["Pandoc", "Doc"]
+
+singleElement :: FromLuaStack a => a -> Lua a
+singleElement x = do
+ elementUnchanged <- Lua.isnil (-1)
+ if elementUnchanged
+ then x <$ Lua.pop 1
+ else do
+ mbres <- Lua.peekEither (-1)
+ case mbres of
+ Right res -> res <$ Lua.pop 1
+ Left err -> do
+ Lua.pop 1
+ Lua.throwLuaError $
+ "Error while trying to get a filter's return " ++
+ "value from lua stack.\n" ++ err
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
new file mode 100644
index 000000000..8fa228837
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -0,0 +1,117 @@
+{-
+Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Lua
+ Copyright : Copyright © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Functions to initialize the Lua interpreter.
+-}
+module Text.Pandoc.Lua.Init
+ ( LuaException (..)
+ , LuaPackageParams (..)
+ , runPandocLua
+ , initLuaState
+ , luaPackageParams
+ , registerScriptPath
+ ) where
+
+import Control.Monad.Trans (MonadIO (..))
+import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr)
+import Data.IORef (newIORef, readIORef)
+import Data.Version (Version (versionBranch))
+import Foreign.Lua (Lua, LuaException (..))
+import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8)
+import Paths_pandoc (version)
+import Text.Pandoc.Class (PandocIO, getCommonState, getUserDataDir, getMediaBag,
+ setMediaBag)
+import Text.Pandoc.Definition (pandocTypesVersion)
+import Text.Pandoc.Lua.Packages (LuaPackageParams (..),
+ installPandocPackageSearcher)
+import Text.Pandoc.Lua.Util (loadScriptFromDataDir)
+
+import qualified Foreign.Lua as Lua
+import qualified Foreign.Lua.Module.Text as Lua
+import qualified Text.Pandoc.Definition as Pandoc
+
+-- | Run the lua interpreter, using pandoc's default way of environment
+-- initalization.
+runPandocLua :: Lua a -> PandocIO (Either LuaException a)
+runPandocLua luaOp = do
+ luaPkgParams <- luaPackageParams
+ enc <- liftIO $ getForeignEncoding <* setForeignEncoding utf8
+ res <- liftIO $ Lua.runLuaEither (initLuaState luaPkgParams *> luaOp)
+ liftIO $ setForeignEncoding enc
+ newMediaBag <- liftIO (readIORef (luaPkgMediaBag luaPkgParams))
+ setMediaBag newMediaBag
+ return res
+
+-- | Generate parameters required to setup pandoc's lua environment.
+luaPackageParams :: PandocIO LuaPackageParams
+luaPackageParams = do
+ commonState <- getCommonState
+ datadir <- getUserDataDir
+ mbRef <- liftIO . newIORef =<< getMediaBag
+ return LuaPackageParams
+ { luaPkgCommonState = commonState
+ , luaPkgDataDir = datadir
+ , luaPkgMediaBag = mbRef
+ }
+
+-- Initialize the lua state with all required values
+initLuaState :: LuaPackageParams -> Lua ()
+initLuaState luaPkgParams = do
+ Lua.openlibs
+ Lua.preloadTextModule "text"
+ Lua.push (versionBranch version)
+ Lua.setglobal "PANDOC_VERSION"
+ Lua.push (versionBranch pandocTypesVersion)
+ Lua.setglobal "PANDOC_API_VERSION"
+ installPandocPackageSearcher luaPkgParams
+ loadScriptFromDataDir (luaPkgDataDir luaPkgParams) "init.lua"
+ putConstructorsInRegistry
+
+registerScriptPath :: FilePath -> Lua ()
+registerScriptPath fp = do
+ Lua.push fp
+ Lua.setglobal "PANDOC_SCRIPT_FILE"
+
+putConstructorsInRegistry :: Lua ()
+putConstructorsInRegistry = do
+ Lua.getglobal "pandoc"
+ constrsToReg $ Pandoc.Pandoc mempty mempty
+ constrsToReg $ Pandoc.Str mempty
+ constrsToReg $ Pandoc.Para mempty
+ constrsToReg $ Pandoc.Meta mempty
+ constrsToReg $ Pandoc.MetaList mempty
+ constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0
+ putInReg "Attr" -- used for Attr type alias
+ Lua.pop 1
+ where
+ constrsToReg :: Data a => a -> Lua ()
+ constrsToReg = mapM_ (putInReg . showConstr) . dataTypeConstrs . dataTypeOf
+
+ putInReg :: String -> Lua ()
+ putInReg name = do
+ Lua.push ("pandoc." ++ name) -- name in registry
+ Lua.push name -- in pandoc module
+ Lua.rawget (Lua.nthFromTop 3)
+ Lua.rawset Lua.registryindex
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
new file mode 100644
index 000000000..7d942a452
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -0,0 +1,108 @@
+{-
+Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Lua.Module.MediaBag
+ Copyright : Copyright © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+The lua module @pandoc.mediabag@.
+-}
+module Text.Pandoc.Lua.Module.MediaBag
+ ( pushModule
+ ) where
+
+import Control.Monad (zipWithM_)
+import Data.IORef (IORef, modifyIORef', readIORef)
+import Data.Maybe (fromMaybe)
+import Foreign.Lua (Lua, NumResults, Optional, liftIO)
+import Text.Pandoc.Class (CommonState (..), fetchItem, putCommonState,
+ runIOorExplode, setMediaBag)
+import Text.Pandoc.Lua.StackInstances ()
+import Text.Pandoc.Lua.Util (addFunction)
+import Text.Pandoc.MIME (MimeType)
+
+import qualified Data.ByteString.Lazy as BL
+import qualified Foreign.Lua as Lua
+import qualified Text.Pandoc.MediaBag as MB
+
+--
+-- MediaBag submodule
+--
+pushModule :: CommonState -> IORef MB.MediaBag -> Lua NumResults
+pushModule commonState mediaBagRef = do
+ Lua.newtable
+ addFunction "insert" (insertMediaFn mediaBagRef)
+ addFunction "lookup" (lookupMediaFn mediaBagRef)
+ addFunction "list" (mediaDirectoryFn mediaBagRef)
+ addFunction "fetch" (fetch commonState mediaBagRef)
+ return 1
+
+insertMediaFn :: IORef MB.MediaBag
+ -> FilePath
+ -> Optional MimeType
+ -> BL.ByteString
+ -> Lua NumResults
+insertMediaFn mbRef fp optionalMime contents = do
+ liftIO . modifyIORef' mbRef $
+ MB.insertMedia fp (Lua.fromOptional optionalMime) contents
+ return 0
+
+lookupMediaFn :: IORef MB.MediaBag
+ -> FilePath
+ -> Lua NumResults
+lookupMediaFn mbRef fp = do
+ res <- MB.lookupMedia fp <$> liftIO (readIORef mbRef)
+ case res of
+ Nothing -> Lua.pushnil *> return 1
+ Just (mimeType, contents) -> do
+ Lua.push mimeType
+ Lua.push contents
+ return 2
+
+mediaDirectoryFn :: IORef MB.MediaBag
+ -> Lua NumResults
+mediaDirectoryFn mbRef = do
+ dirContents <- MB.mediaDirectory <$> liftIO (readIORef mbRef)
+ Lua.newtable
+ zipWithM_ addEntry [1..] dirContents
+ return 1
+ where
+ addEntry :: Int -> (FilePath, MimeType, Int) -> Lua ()
+ addEntry idx (fp, mimeType, contentLength) = do
+ Lua.newtable
+ Lua.push "path" *> Lua.push fp *> Lua.rawset (-3)
+ Lua.push "type" *> Lua.push mimeType *> Lua.rawset (-3)
+ Lua.push "length" *> Lua.push contentLength *> Lua.rawset (-3)
+ Lua.rawseti (-2) idx
+
+fetch :: CommonState
+ -> IORef MB.MediaBag
+ -> String
+ -> Lua NumResults
+fetch commonState mbRef src = do
+ mediaBag <- liftIO $ readIORef mbRef
+ (bs, mimeType) <- liftIO . runIOorExplode $ do
+ putCommonState commonState
+ setMediaBag mediaBag
+ fetchItem src
+ Lua.push $ fromMaybe "" mimeType
+ Lua.push bs
+ return 2 -- returns 2 values: contents, mimetype
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
new file mode 100644
index 000000000..b9410a353
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -0,0 +1,134 @@
+{-
+Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE FlexibleContexts #-}
+{- |
+ Module : Text.Pandoc.Lua.Module.Pandoc
+ Copyright : Copyright © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Pandoc module for lua.
+-}
+module Text.Pandoc.Lua.Module.Pandoc
+ ( pushModule
+ ) where
+
+import Control.Monad (when)
+import Data.Default (Default (..))
+import Data.Maybe (fromMaybe)
+import Data.Text (pack)
+import Foreign.Lua (ToLuaStack, FromLuaStack, Lua, NumResults, Optional, liftIO)
+import System.Exit (ExitCode (..))
+import Text.Pandoc.Class (runIO)
+import Text.Pandoc.Definition (Block, Inline)
+import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter)
+import Text.Pandoc.Lua.StackInstances ()
+import Text.Pandoc.Lua.Util (addFunction, addValue, loadScriptFromDataDir)
+import Text.Pandoc.Walk (Walkable)
+import Text.Pandoc.Options (ReaderOptions (readerExtensions))
+import Text.Pandoc.Process (pipeProcess)
+import Text.Pandoc.Readers (Reader (..), getReader)
+
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BSL
+import qualified Foreign.Lua as Lua
+
+-- | Push the "pandoc" on the lua stack. Requires the `list` module to be
+-- loaded.
+pushModule :: Maybe FilePath -> Lua NumResults
+pushModule datadir = do
+ loadScriptFromDataDir datadir "pandoc.lua"
+ addFunction "read" readDoc
+ addFunction "pipe" pipeFn
+ addFunction "walk_block" walkBlock
+ addFunction "walk_inline" walkInline
+ return 1
+
+walkElement :: (ToLuaStack a, Walkable [Inline] a, Walkable [Block] a)
+ => a -> LuaFilter -> Lua a
+walkElement x f = walkInlines f x >>= walkBlocks f
+
+walkInline :: Inline -> LuaFilter -> Lua Inline
+walkInline = walkElement
+
+walkBlock :: Block -> LuaFilter -> Lua Block
+walkBlock = walkElement
+
+readDoc :: String -> Optional String -> Lua NumResults
+readDoc content formatSpecOrNil = do
+ let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil)
+ case getReader formatSpec of
+ Left s -> Lua.raiseError s -- Unknown reader
+ Right (reader, es) ->
+ case reader of
+ TextReader r -> do
+ res <- liftIO $ runIO $ r def{ readerExtensions = es } (pack content)
+ case res of
+ Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc
+ Left s -> Lua.raiseError (show s) -- error while reading
+ _ -> Lua.raiseError "Only string formats are supported at the moment."
+
+-- | Pipes input through a command.
+pipeFn :: String
+ -> [String]
+ -> BL.ByteString
+ -> Lua NumResults
+pipeFn command args input = do
+ (ec, output) <- liftIO $ pipeProcess Nothing command args input
+ case ec of
+ ExitSuccess -> 1 <$ Lua.push output
+ ExitFailure n -> Lua.raiseError (PipeError command n output)
+
+data PipeError = PipeError
+ { pipeErrorCommand :: String
+ , pipeErrorCode :: Int
+ , pipeErrorOutput :: BL.ByteString
+ }
+
+instance FromLuaStack PipeError where
+ peek idx =
+ PipeError
+ <$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1)
+ <*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1)
+ <*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1)
+
+instance ToLuaStack PipeError where
+ push pipeErr = do
+ Lua.newtable
+ addValue "command" (pipeErrorCommand pipeErr)
+ addValue "error_code" (pipeErrorCode pipeErr)
+ addValue "output" (pipeErrorOutput pipeErr)
+ pushPipeErrorMetaTable
+ Lua.setmetatable (-2)
+ where
+ pushPipeErrorMetaTable :: Lua ()
+ pushPipeErrorMetaTable = do
+ v <- Lua.newmetatable "pandoc pipe error"
+ when v $ addFunction "__tostring" pipeErrorMessage
+
+ pipeErrorMessage :: PipeError -> Lua BL.ByteString
+ pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat
+ [ BSL.pack "Error running "
+ , BSL.pack cmd
+ , BSL.pack " (error code "
+ , BSL.pack $ show errorCode
+ , BSL.pack "): "
+ , if output == mempty then BSL.pack "<no output>" else output
+ ]
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
new file mode 100644
index 000000000..f8eb96dc7
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -0,0 +1,126 @@
+{-
+Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Lua.Module.Utils
+ Copyright : Copyright © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Utility module for lua, exposing internal helper functions.
+-}
+module Text.Pandoc.Lua.Module.Utils
+ ( pushModule
+ ) where
+
+import Control.Applicative ((<|>))
+import Data.Default (def)
+import Foreign.Lua (FromLuaStack, Lua, LuaInteger, NumResults)
+import Text.Pandoc.Class (runIO, setUserDataDir)
+import Text.Pandoc.Definition (Pandoc, Meta, MetaValue, Block, Inline)
+import Text.Pandoc.Lua.StackInstances ()
+import Text.Pandoc.Lua.Util (addFunction, popValue)
+
+import qualified Data.Digest.Pure.SHA as SHA
+import qualified Data.ByteString.Lazy as BSL
+import qualified Foreign.Lua as Lua
+import qualified Text.Pandoc.Filter.JSON as JSONFilter
+import qualified Text.Pandoc.Shared as Shared
+
+-- | Push the "pandoc.utils" module to the lua stack.
+pushModule :: Maybe FilePath -> Lua NumResults
+pushModule mbDatadir = do
+ Lua.newtable
+ addFunction "hierarchicalize" hierarchicalize
+ addFunction "normalize_date" normalizeDate
+ addFunction "run_json_filter" (runJSONFilter mbDatadir)
+ addFunction "sha1" sha1
+ addFunction "stringify" stringify
+ addFunction "to_roman_numeral" toRomanNumeral
+ return 1
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements.
+hierarchicalize :: [Block] -> Lua [Shared.Element]
+hierarchicalize = return . Shared.hierarchicalize
+
+-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
+-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
+-- or equal to 1583, but MS Word only accepts dates starting 1601).
+-- Returns nil instead of a string if the conversion failed.
+normalizeDate :: String -> Lua (Lua.Optional String)
+normalizeDate = return . Lua.Optional . Shared.normalizeDate
+
+-- | Run a JSON filter on the given document.
+runJSONFilter :: Maybe FilePath
+ -> Pandoc
+ -> FilePath
+ -> Lua.Optional [String]
+ -> Lua NumResults
+runJSONFilter mbDatadir doc filterFile optArgs = do
+ args <- case Lua.fromOptional optArgs of
+ Just x -> return x
+ Nothing -> do
+ Lua.getglobal "FORMAT"
+ (:[]) <$> popValue
+ filterRes <- Lua.liftIO . runIO $ do
+ setUserDataDir mbDatadir
+ JSONFilter.apply def args filterFile doc
+ case filterRes of
+ Left err -> Lua.raiseError (show err)
+ Right d -> (1 :: NumResults) <$ Lua.push d
+
+-- | Calculate the hash of the given contents.
+sha1 :: BSL.ByteString
+ -> Lua String
+sha1 = return . SHA.showDigest . SHA.sha1
+
+-- | Convert pandoc structure to a string with formatting removed.
+-- Footnotes are skipped (since we don't want their contents in link
+-- labels).
+stringify :: AstElement -> Lua String
+stringify el = return $ case el of
+ PandocElement pd -> Shared.stringify pd
+ InlineElement i -> Shared.stringify i
+ BlockElement b -> Shared.stringify b
+ MetaElement m -> Shared.stringify m
+ MetaValueElement m -> Shared.stringify m
+
+data AstElement
+ = PandocElement Pandoc
+ | MetaElement Meta
+ | BlockElement Block
+ | InlineElement Inline
+ | MetaValueElement MetaValue
+ deriving (Show)
+
+instance FromLuaStack AstElement where
+ peek idx = do
+ res <- Lua.tryLua $ (PandocElement <$> Lua.peek idx)
+ <|> (InlineElement <$> Lua.peek idx)
+ <|> (BlockElement <$> Lua.peek idx)
+ <|> (MetaElement <$> Lua.peek idx)
+ <|> (MetaValueElement <$> Lua.peek idx)
+ case res of
+ Right x -> return x
+ Left _ -> Lua.throwLuaError
+ "Expected an AST element, but could not parse value as such."
+
+-- | Convert a number < 4000 to uppercase roman numeral.
+toRomanNumeral :: LuaInteger -> Lua String
+toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
new file mode 100644
index 000000000..1e6ff22fe
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -0,0 +1,115 @@
+{-
+Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{- |
+ Module : Text.Pandoc.Lua.Packages
+ Copyright : Copyright © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Pandoc module for lua.
+-}
+module Text.Pandoc.Lua.Packages
+ ( LuaPackageParams (..)
+ , installPandocPackageSearcher
+ ) where
+
+import Control.Monad (forM_)
+import Data.ByteString.Char8 (unpack)
+import Data.IORef (IORef)
+import Foreign.Lua (Lua, NumResults, liftIO)
+import Text.Pandoc.Class (CommonState, readDataFile, runIO, setUserDataDir)
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Lua.Util (dostring')
+
+import qualified Foreign.Lua as Lua
+import Text.Pandoc.Lua.Module.Pandoc as Pandoc
+import Text.Pandoc.Lua.Module.MediaBag as MediaBag
+import Text.Pandoc.Lua.Module.Utils as Utils
+
+-- | Parameters used to create lua packages/modules.
+data LuaPackageParams = LuaPackageParams
+ { luaPkgCommonState :: CommonState
+ , luaPkgDataDir :: Maybe FilePath
+ , luaPkgMediaBag :: IORef MediaBag
+ }
+
+-- | Insert pandoc's package loader as the first loader, making it the default.
+installPandocPackageSearcher :: LuaPackageParams -> Lua ()
+installPandocPackageSearcher luaPkgParams = do
+ luaVersion <- Lua.getglobal "_VERSION" *> Lua.peek (-1)
+ if luaVersion == "Lua 5.1"
+ then Lua.getglobal' "package.loaders"
+ else Lua.getglobal' "package.searchers"
+ shiftArray
+ Lua.pushHaskellFunction (pandocPackageSearcher luaPkgParams)
+ Lua.wrapHaskellFunction
+ Lua.rawseti (-2) 1
+ Lua.pop 1 -- remove 'package.searchers' from stack
+ where
+ shiftArray = forM_ [4, 3, 2, 1] $ \i -> do
+ Lua.rawgeti (-1) i
+ Lua.rawseti (-2) (i + 1)
+
+-- | Load a pandoc module.
+pandocPackageSearcher :: LuaPackageParams -> String -> Lua NumResults
+pandocPackageSearcher luaPkgParams pkgName =
+ case pkgName of
+ "pandoc" -> let datadir = luaPkgDataDir luaPkgParams
+ in pushWrappedHsFun (Pandoc.pushModule datadir)
+ "pandoc.mediabag" -> let st = luaPkgCommonState luaPkgParams
+ mbRef = luaPkgMediaBag luaPkgParams
+ in pushWrappedHsFun (MediaBag.pushModule st mbRef)
+ "pandoc.utils" -> let datadirMb = luaPkgDataDir luaPkgParams
+ in pushWrappedHsFun (Utils.pushModule datadirMb)
+ _ -> searchPureLuaLoader
+ where
+ pushWrappedHsFun f = do
+ Lua.pushHaskellFunction f
+ Lua.wrapHaskellFunction
+ return 1
+ searchPureLuaLoader = do
+ let filename = pkgName ++ ".lua"
+ modScript <- liftIO (dataDirScript (luaPkgDataDir luaPkgParams) filename)
+ case modScript of
+ Just script -> pushWrappedHsFun (loadStringAsPackage pkgName script)
+ Nothing -> do
+ Lua.push ("no file '" ++ filename ++ "' in pandoc's datadir")
+ return 1
+
+loadStringAsPackage :: String -> String -> Lua NumResults
+loadStringAsPackage pkgName script = do
+ status <- dostring' script
+ if status == Lua.OK
+ then return (1 :: NumResults)
+ else do
+ msg <- Lua.peek (-1) <* Lua.pop 1
+ Lua.push ("Error while loading ``" ++ pkgName ++ "`.\n" ++ msg)
+ Lua.lerror
+ return (2 :: NumResults)
+
+-- | Get the string representation of the pandoc module
+dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe String)
+dataDirScript datadir moduleFile = do
+ res <- runIO $ setUserDataDir datadir >> readDataFile moduleFile
+ return $ case res of
+ Left _ -> Nothing
+ Right s -> Just (unpack s)
diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs
new file mode 100644
index 000000000..7e0dc20c4
--- /dev/null
+++ b/src/Text/Pandoc/Lua/StackInstances.hs
@@ -0,0 +1,376 @@
+{-
+Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu>
+ 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{- |
+ Module : Text.Pandoc.Lua.StackInstances
+ Copyright : © 2012-2018 John MacFarlane
+ © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+StackValue instances for pandoc types.
+-}
+module Text.Pandoc.Lua.StackInstances () where
+
+import Control.Applicative ((<|>))
+import Control.Monad (when)
+import Control.Monad.Catch (finally)
+import Data.Data (showConstr, toConstr)
+import Data.Foldable (forM_)
+import Foreign.Lua (FromLuaStack (peek), Lua, LuaInteger, LuaNumber, StackIndex,
+ ToLuaStack (push), Type (..), throwLuaError, tryLua)
+import Text.Pandoc.Definition
+import Text.Pandoc.Extensions (Extensions)
+import Text.Pandoc.Lua.Util (getTable, getTag, pushViaConstructor, typeCheck)
+import Text.Pandoc.Options (ReaderOptions (..), TrackChanges)
+import Text.Pandoc.Shared (Element (Blk, Sec), safeRead)
+
+import qualified Foreign.Lua as Lua
+import qualified Data.Set as Set
+import qualified Text.Pandoc.Lua.Util as LuaUtil
+
+defineHowTo :: String -> Lua a -> Lua a
+defineHowTo ctx op = op `Lua.modifyLuaError` (("Could not " ++ ctx ++ ": ") ++)
+
+instance ToLuaStack Pandoc where
+ push (Pandoc meta blocks) =
+ pushViaConstructor "Pandoc" blocks meta
+
+instance FromLuaStack Pandoc where
+ peek idx = defineHowTo "get Pandoc value" $ do
+ typeCheck idx Lua.TypeTable
+ blocks <- getTable idx "blocks"
+ meta <- Lua.getfield idx "meta" *> (Lua.peek Lua.stackTop `finally` Lua.pop 1)
+ return $ Pandoc meta blocks
+
+instance ToLuaStack Meta where
+ push (Meta mmap) =
+ pushViaConstructor "Meta" mmap
+instance FromLuaStack Meta where
+ peek idx = defineHowTo "get Meta value" $ do
+ typeCheck idx Lua.TypeTable
+ Meta <$> peek idx
+
+instance ToLuaStack MetaValue where
+ push = pushMetaValue
+instance FromLuaStack MetaValue where
+ peek = peekMetaValue
+
+instance ToLuaStack Block where
+ push = pushBlock
+
+instance FromLuaStack Block where
+ peek = peekBlock
+
+-- Inline
+instance ToLuaStack Inline where
+ push = pushInline
+
+instance FromLuaStack Inline where
+ peek = peekInline
+
+-- Citation
+instance ToLuaStack Citation where
+ push (Citation cid prefix suffix mode noteNum hash) =
+ pushViaConstructor "Citation" cid mode prefix suffix noteNum hash
+
+instance FromLuaStack Citation where
+ peek idx = do
+ id' <- getTable idx "id"
+ prefix <- getTable idx "prefix"
+ suffix <- getTable idx "suffix"
+ mode <- getTable idx "mode"
+ num <- getTable idx "note_num"
+ hash <- getTable idx "hash"
+ return $ Citation id' prefix suffix mode num hash
+
+instance ToLuaStack Alignment where
+ push = push . show
+instance FromLuaStack Alignment where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack CitationMode where
+ push = push . show
+instance FromLuaStack CitationMode where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack Format where
+ push (Format f) = push f
+instance FromLuaStack Format where
+ peek idx = Format <$> peek idx
+
+instance ToLuaStack ListNumberDelim where
+ push = push . show
+instance FromLuaStack ListNumberDelim where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack ListNumberStyle where
+ push = push . show
+instance FromLuaStack ListNumberStyle where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack MathType where
+ push = push . show
+instance FromLuaStack MathType where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack QuoteType where
+ push = push . show
+instance FromLuaStack QuoteType where
+ peek idx = safeRead' =<< peek idx
+
+instance ToLuaStack Double where
+ push = push . (realToFrac :: Double -> LuaNumber)
+instance FromLuaStack Double where
+ peek = fmap (realToFrac :: LuaNumber -> Double) . peek
+
+instance ToLuaStack Int where
+ push = push . (fromIntegral :: Int -> LuaInteger)
+instance FromLuaStack Int where
+ peek = fmap (fromIntegral :: LuaInteger-> Int) . peek
+
+safeRead' :: Read a => String -> Lua a
+safeRead' s = case safeRead s of
+ Nothing -> throwLuaError ("Could not read: " ++ s)
+ Just x -> return x
+
+-- | Push an meta value element to the top of the lua stack.
+pushMetaValue :: MetaValue -> Lua ()
+pushMetaValue = \case
+ MetaBlocks blcks -> pushViaConstructor "MetaBlocks" blcks
+ MetaBool bool -> push bool
+ MetaInlines inlns -> pushViaConstructor "MetaInlines" inlns
+ MetaList metalist -> pushViaConstructor "MetaList" metalist
+ MetaMap metamap -> pushViaConstructor "MetaMap" metamap
+ MetaString str -> push str
+
+-- | Interpret the value at the given stack index as meta value.
+peekMetaValue :: StackIndex -> Lua MetaValue
+peekMetaValue idx = defineHowTo "get MetaValue" $ do
+ -- Get the contents of an AST element.
+ let elementContent :: FromLuaStack a => Lua a
+ elementContent = peek idx
+ luatype <- Lua.ltype idx
+ case luatype of
+ TypeBoolean -> MetaBool <$> peek idx
+ TypeString -> MetaString <$> peek idx
+ TypeTable -> do
+ tag <- tryLua $ getTag idx
+ case tag of
+ Right "MetaBlocks" -> MetaBlocks <$> elementContent
+ Right "MetaBool" -> MetaBool <$> elementContent
+ Right "MetaMap" -> MetaMap <$> elementContent
+ Right "MetaInlines" -> MetaInlines <$> elementContent
+ Right "MetaList" -> MetaList <$> elementContent
+ Right "MetaString" -> MetaString <$> elementContent
+ Right t -> throwLuaError ("Unknown meta tag: " ++ t)
+ Left _ -> do
+ -- no meta value tag given, try to guess.
+ len <- Lua.rawlen idx
+ if len <= 0
+ then MetaMap <$> peek idx
+ else (MetaInlines <$> peek idx)
+ <|> (MetaBlocks <$> peek idx)
+ <|> (MetaList <$> peek idx)
+ _ -> throwLuaError "could not get meta value"
+
+-- | Push an block element to the top of the lua stack.
+pushBlock :: Block -> Lua ()
+pushBlock = \case
+ BlockQuote blcks -> pushViaConstructor "BlockQuote" blcks
+ BulletList items -> pushViaConstructor "BulletList" items
+ CodeBlock attr code -> pushViaConstructor "CodeBlock" code (LuaAttr attr)
+ DefinitionList items -> pushViaConstructor "DefinitionList" items
+ Div attr blcks -> pushViaConstructor "Div" blcks (LuaAttr attr)
+ Header lvl attr inlns -> pushViaConstructor "Header" lvl inlns (LuaAttr attr)
+ HorizontalRule -> pushViaConstructor "HorizontalRule"
+ LineBlock blcks -> pushViaConstructor "LineBlock" blcks
+ OrderedList lstAttr list -> pushViaConstructor "OrderedList" list lstAttr
+ Null -> pushViaConstructor "Null"
+ Para blcks -> pushViaConstructor "Para" blcks
+ Plain blcks -> pushViaConstructor "Plain" blcks
+ RawBlock f cs -> pushViaConstructor "RawBlock" f cs
+ Table capt aligns widths headers rows ->
+ pushViaConstructor "Table" capt aligns widths headers rows
+
+-- | Return the value at the given index as block if possible.
+peekBlock :: StackIndex -> Lua Block
+peekBlock idx = defineHowTo "get Block value" $ do
+ typeCheck idx Lua.TypeTable
+ tag <- getTag idx
+ case tag of
+ "BlockQuote" -> BlockQuote <$> elementContent
+ "BulletList" -> BulletList <$> elementContent
+ "CodeBlock" -> withAttr CodeBlock <$> elementContent
+ "DefinitionList" -> DefinitionList <$> elementContent
+ "Div" -> withAttr Div <$> elementContent
+ "Header" -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst)
+ <$> elementContent
+ "HorizontalRule" -> return HorizontalRule
+ "LineBlock" -> LineBlock <$> elementContent
+ "OrderedList" -> uncurry OrderedList <$> elementContent
+ "Null" -> return Null
+ "Para" -> Para <$> elementContent
+ "Plain" -> Plain <$> elementContent
+ "RawBlock" -> uncurry RawBlock <$> elementContent
+ "Table" -> (\(capt, aligns, widths, headers, body) ->
+ Table capt aligns widths headers body)
+ <$> elementContent
+ _ -> throwLuaError ("Unknown block type: " ++ tag)
+ where
+ -- Get the contents of an AST element.
+ elementContent :: FromLuaStack a => Lua a
+ elementContent = getTable idx "c"
+
+-- | Push an inline element to the top of the lua stack.
+pushInline :: Inline -> Lua ()
+pushInline = \case
+ Cite citations lst -> pushViaConstructor "Cite" lst citations
+ Code attr lst -> pushViaConstructor "Code" lst (LuaAttr attr)
+ Emph inlns -> pushViaConstructor "Emph" inlns
+ Image attr alt (src,tit) -> pushViaConstructor "Image" alt src tit (LuaAttr attr)
+ LineBreak -> pushViaConstructor "LineBreak"
+ Link attr lst (src,tit) -> pushViaConstructor "Link" lst src tit (LuaAttr attr)
+ Note blcks -> pushViaConstructor "Note" blcks
+ Math mty str -> pushViaConstructor "Math" mty str
+ Quoted qt inlns -> pushViaConstructor "Quoted" qt inlns
+ RawInline f cs -> pushViaConstructor "RawInline" f cs
+ SmallCaps inlns -> pushViaConstructor "SmallCaps" inlns
+ SoftBreak -> pushViaConstructor "SoftBreak"
+ Space -> pushViaConstructor "Space"
+ Span attr inlns -> pushViaConstructor "Span" inlns (LuaAttr attr)
+ Str str -> pushViaConstructor "Str" str
+ Strikeout inlns -> pushViaConstructor "Strikeout" inlns
+ Strong inlns -> pushViaConstructor "Strong" inlns
+ Subscript inlns -> pushViaConstructor "Subscript" inlns
+ Superscript inlns -> pushViaConstructor "Superscript" inlns
+
+-- | Return the value at the given index as inline if possible.
+peekInline :: StackIndex -> Lua Inline
+peekInline idx = defineHowTo "get Inline value" $ do
+ typeCheck idx Lua.TypeTable
+ tag <- getTag idx
+ case tag of
+ "Cite" -> uncurry Cite <$> elementContent
+ "Code" -> withAttr Code <$> elementContent
+ "Emph" -> Emph <$> elementContent
+ "Image" -> (\(LuaAttr attr, lst, tgt) -> Image attr lst tgt)
+ <$> elementContent
+ "Link" -> (\(LuaAttr attr, lst, tgt) -> Link attr lst tgt)
+ <$> elementContent
+ "LineBreak" -> return LineBreak
+ "Note" -> Note <$> elementContent
+ "Math" -> uncurry Math <$> elementContent
+ "Quoted" -> uncurry Quoted <$> elementContent
+ "RawInline" -> uncurry RawInline <$> elementContent
+ "SmallCaps" -> SmallCaps <$> elementContent
+ "SoftBreak" -> return SoftBreak
+ "Space" -> return Space
+ "Span" -> withAttr Span <$> elementContent
+ "Str" -> Str <$> elementContent
+ "Strikeout" -> Strikeout <$> elementContent
+ "Strong" -> Strong <$> elementContent
+ "Subscript" -> Subscript <$> elementContent
+ "Superscript"-> Superscript <$> elementContent
+ _ -> throwLuaError ("Unknown inline type: " ++ tag)
+ where
+ -- Get the contents of an AST element.
+ elementContent :: FromLuaStack a => Lua a
+ elementContent = getTable idx "c"
+
+withAttr :: (Attr -> a -> b) -> (LuaAttr, a) -> b
+withAttr f (attributes, x) = f (fromLuaAttr attributes) x
+
+-- | Wrapper for Attr
+newtype LuaAttr = LuaAttr { fromLuaAttr :: Attr }
+
+instance ToLuaStack LuaAttr where
+ push (LuaAttr (id', classes, kv)) =
+ pushViaConstructor "Attr" id' classes kv
+
+instance FromLuaStack LuaAttr where
+ peek idx = defineHowTo "get Attr value" (LuaAttr <$> peek idx)
+
+--
+-- Hierarchical elements
+--
+instance ToLuaStack Element where
+ push (Blk blk) = push blk
+ push (Sec lvl num attr label contents) = do
+ Lua.newtable
+ LuaUtil.addValue "level" lvl
+ LuaUtil.addValue "numbering" num
+ LuaUtil.addValue "attr" (LuaAttr attr)
+ LuaUtil.addValue "label" label
+ LuaUtil.addValue "contents" contents
+ pushSecMetaTable
+ Lua.setmetatable (-2)
+ where
+ pushSecMetaTable :: Lua ()
+ pushSecMetaTable = do
+ inexistant <- Lua.newmetatable "PandocElementSec"
+ when inexistant $ do
+ LuaUtil.addValue "t" "Sec"
+ Lua.push "__index"
+ Lua.pushvalue (-2)
+ Lua.rawset (-3)
+
+
+--
+-- Reader Options
+--
+instance ToLuaStack Extensions where
+ push exts = push (show exts)
+
+instance ToLuaStack TrackChanges where
+ push = push . showConstr . toConstr
+
+instance ToLuaStack a => ToLuaStack (Set.Set a) where
+ push set = do
+ Lua.newtable
+ forM_ set (`LuaUtil.addValue` True)
+
+instance ToLuaStack ReaderOptions where
+ push ro = do
+ let ReaderOptions
+ (extensions :: Extensions)
+ (standalone :: Bool)
+ (columns :: Int)
+ (tabStop :: Int)
+ (indentedCodeClasses :: [String])
+ (abbreviations :: Set.Set String)
+ (defaultImageExtension :: String)
+ (trackChanges :: TrackChanges)
+ (stripComments :: Bool)
+ = ro
+ Lua.newtable
+ LuaUtil.addValue "extensions" extensions
+ LuaUtil.addValue "standalone" standalone
+ LuaUtil.addValue "columns" columns
+ LuaUtil.addValue "tabStop" tabStop
+ LuaUtil.addValue "indentedCodeClasses" indentedCodeClasses
+ LuaUtil.addValue "abbreviations" abbreviations
+ LuaUtil.addValue "defaultImageExtension" defaultImageExtension
+ LuaUtil.addValue "trackChanges" trackChanges
+ LuaUtil.addValue "stripComments" stripComments
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
new file mode 100644
index 000000000..b7149af39
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -0,0 +1,187 @@
+{-
+Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu>
+ 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE FlexibleInstances #-}
+{- |
+ Module : Text.Pandoc.Lua.Util
+ Copyright : © 2012–2018 John MacFarlane,
+ © 2017-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+ Stability : alpha
+
+Lua utility functions.
+-}
+module Text.Pandoc.Lua.Util
+ ( getTag
+ , getTable
+ , addValue
+ , addFunction
+ , getRawInt
+ , setRawInt
+ , addRawInt
+ , typeCheck
+ , raiseError
+ , popValue
+ , PushViaCall
+ , pushViaCall
+ , pushViaConstructor
+ , loadScriptFromDataDir
+ , dostring'
+ ) where
+
+import Control.Monad (when)
+import Control.Monad.Catch (finally)
+import Data.ByteString.Char8 (unpack)
+import Foreign.Lua (FromLuaStack (..), NumResults, Lua, NumArgs, StackIndex,
+ ToLuaStack (..), ToHaskellFunction)
+import Foreign.Lua.Api (Status, call, pop, rawget, rawgeti, rawset, rawseti)
+import Text.Pandoc.Class (readDataFile, runIOorExplode, setUserDataDir)
+
+import qualified Foreign.Lua as Lua
+
+-- | Adjust the stack index, assuming that @n@ new elements have been pushed on
+-- the stack.
+adjustIndexBy :: StackIndex -> StackIndex -> StackIndex
+adjustIndexBy idx n =
+ if idx < 0
+ then idx - n
+ else idx
+
+-- | Get value behind key from table at given index.
+getTable :: (ToLuaStack a, FromLuaStack b) => StackIndex -> a -> Lua b
+getTable idx key = do
+ push key
+ rawget (idx `adjustIndexBy` 1)
+ popValue
+
+-- | Add a key-value pair to the table at the top of the stack.
+addValue :: (ToLuaStack a, ToLuaStack b) => a -> b -> Lua ()
+addValue key value = do
+ push key
+ push value
+ rawset (-3)
+
+-- | Add a function to the table at the top of the stack, using the given name.
+addFunction :: ToHaskellFunction a => String -> a -> Lua ()
+addFunction name fn = do
+ Lua.push name
+ Lua.pushHaskellFunction fn
+ Lua.wrapHaskellFunction
+ Lua.rawset (-3)
+
+-- | Get value behind key from table at given index.
+getRawInt :: FromLuaStack a => StackIndex -> Int -> Lua a
+getRawInt idx key = do
+ rawgeti idx key
+ popValue
+
+-- | Set numeric key/value in table at the given index
+setRawInt :: ToLuaStack a => StackIndex -> Int -> a -> Lua ()
+setRawInt idx key value = do
+ push value
+ rawseti (idx `adjustIndexBy` 1) key
+
+-- | Set numeric key/value in table at the top of the stack.
+addRawInt :: ToLuaStack a => Int -> a -> Lua ()
+addRawInt = setRawInt (-1)
+
+typeCheck :: StackIndex -> Lua.Type -> Lua ()
+typeCheck idx expected = do
+ actual <- Lua.ltype idx
+ when (actual /= expected) $ do
+ expName <- Lua.typename expected
+ actName <- Lua.typename actual
+ Lua.throwLuaError $ "expected " ++ expName ++ " but got " ++ actName ++ "."
+
+raiseError :: ToLuaStack a => a -> Lua NumResults
+raiseError e = do
+ Lua.push e
+ fromIntegral <$> Lua.lerror
+
+-- | Get, then pop the value at the top of the stack.
+popValue :: FromLuaStack a => Lua a
+popValue = do
+ resOrError <- Lua.peekEither (-1)
+ pop 1
+ case resOrError of
+ Left err -> Lua.throwLuaError err
+ Right x -> return x
+
+-- | Helper class for pushing a single value to the stack via a lua function.
+-- See @pushViaCall@.
+class PushViaCall a where
+ pushViaCall' :: String -> Lua () -> NumArgs -> a
+
+instance PushViaCall (Lua ()) where
+ pushViaCall' fn pushArgs num = do
+ Lua.push fn
+ Lua.rawget (Lua.registryindex)
+ pushArgs
+ call num 1
+
+instance (ToLuaStack a, PushViaCall b) => PushViaCall (a -> b) where
+ pushViaCall' fn pushArgs num x =
+ pushViaCall' fn (pushArgs *> push x) (num + 1)
+
+-- | Push an value to the stack via a lua function. The lua function is called
+-- with all arguments that are passed to this function and is expected to return
+-- a single value.
+pushViaCall :: PushViaCall a => String -> a
+pushViaCall fn = pushViaCall' fn (return ()) 0
+
+-- | Call a pandoc element constructor within lua, passing all given arguments.
+pushViaConstructor :: PushViaCall a => String -> a
+pushViaConstructor pandocFn = pushViaCall ("pandoc." ++ pandocFn)
+
+-- | Load a file from pandoc's data directory.
+loadScriptFromDataDir :: Maybe FilePath -> FilePath -> Lua ()
+loadScriptFromDataDir datadir scriptFile = do
+ script <- fmap unpack . Lua.liftIO . runIOorExplode $
+ setUserDataDir datadir >> readDataFile scriptFile
+ status <- dostring' script
+ when (status /= Lua.OK) .
+ Lua.throwTopMessageAsError' $ \msg ->
+ "Couldn't load '" ++ scriptFile ++ "'.\n" ++ msg
+
+-- | Load a string and immediately perform a full garbage collection. This is
+-- important to keep the program from hanging: If the program contained a call
+-- to @require@, the a new loader function was created which then become
+-- garbage. If that function is collected at an inopportune times, i.e. when the
+-- Lua API is called via a function that doesn't allow calling back into Haskell
+-- (getraw, setraw, …), then the function's finalizer, and the full program,
+-- will hang.
+dostring' :: String -> Lua Status
+dostring' script = do
+ loadRes <- Lua.loadstring script
+ if loadRes == Lua.OK
+ then Lua.pcall 0 1 Nothing <* Lua.gc Lua.GCCOLLECT 0
+ else return loadRes
+
+-- | Get the tag of a value. This is an optimized and specialized version of
+-- @Lua.getfield idx "tag"@. It only checks for the field on the table at index
+-- @idx@ and on its metatable, also ignoring any @__index@ value on the
+-- metatable.
+getTag :: StackIndex -> Lua String
+getTag idx = do
+ top <- Lua.gettop
+ hasMT <- Lua.getmetatable idx
+ push "tag"
+ if hasMT then Lua.rawget (-2) else Lua.rawget (idx `adjustIndexBy` 1)
+ peek Lua.stackTop `finally` Lua.settop top
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index a08091217..43abe9b2f 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.MIME
- Copyright : Copyright (C) 2011-2016 John MacFarlane
+ Copyright : Copyright (C) 2011-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,11 +29,11 @@ Mime type lookup for ODT writer.
-}
module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType )where
-import System.FilePath
-import Data.Char ( toLower )
+import Data.Char (toLower)
import Data.List (isPrefixOf, isSuffixOf)
-import Data.Maybe (fromMaybe)
import qualified Data.Map as M
+import Data.Maybe (fromMaybe)
+import System.FilePath
type MimeType = String
@@ -168,6 +168,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("emb","chemical/x-embl-dl-nucleotide")
,("embl","chemical/x-embl-dl-nucleotide")
,("emf","image/x-emf")
+ ,("emz","application/x-msmetafile")
,("eml","message/rfc822")
,("ent","chemical/x-ncbi-asn1-ascii")
,("eot","application/vnd.ms-fontobject")
@@ -243,7 +244,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("jpeg","image/jpeg")
,("jfif","image/jpeg")
,("jpg","image/jpeg")
- ,("js","application/x-javascript")
+ ,("js","application/javascript")
,("kar","audio/midi")
,("key","application/pgp-keys")
,("kil","application/x-killustrator")
@@ -324,6 +325,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("ogv","video/ogg")
,("ogx","application/ogg")
,("old","application/x-trash")
+ ,("opus","audio/ogg")
,("otg","application/vnd.oasis.opendocument.graphics-template")
,("oth","application/vnd.oasis.opendocument.text-web")
,("otp","application/vnd.oasis.opendocument.presentation-template")
@@ -524,4 +526,3 @@ mimeTypesList = -- List borrowed from happstack-server.
,("zip","application/zip")
,("zmt","chemical/x-mopac-input")
]
-
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index eea25fadf..0d060fe1a 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
-Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2014-2015, 2017–2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.MediaBag
- Copyright : Copyright (C) 2014 John MacFarlane
+ Copyright : Copyright (C) 2014-2015, 2017–2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -34,20 +35,15 @@ module Text.Pandoc.MediaBag (
lookupMedia,
insertMedia,
mediaDirectory,
- extractMediaBag
) where
-import System.FilePath
-import qualified System.FilePath.Posix as Posix
-import System.Directory (createDirectoryIfMissing)
-import qualified Data.Map as M
import qualified Data.ByteString.Lazy as BL
-import Control.Monad (when)
-import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
-import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Maybe (fromMaybe)
-import System.IO (stderr)
import Data.Data (Data)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe)
import Data.Typeable (Typeable)
+import System.FilePath
+import qualified System.FilePath.Posix as Posix
+import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
-- | A container for a collection of binary resources, with names and
-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
@@ -70,8 +66,8 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap)
where mime = fromMaybe fallback mbMime
fallback = case takeExtension fp of
- ".gz" -> getMimeTypeDef $ dropExtension fp
- _ -> getMimeTypeDef fp
+ ".gz" -> getMimeTypeDef $ dropExtension fp
+ _ -> getMimeTypeDef fp
-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
lookupMedia :: FilePath
@@ -83,27 +79,5 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap
-- their corresponding mime types and the lengths in bytes of the contents.
mediaDirectory :: MediaBag -> [(String, MimeType, Int)]
mediaDirectory (MediaBag mediamap) =
- M.foldWithKey (\fp (mime,contents) ->
- (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap
-
--- | Extract contents of MediaBag to a given directory. Print informational
--- messages if 'verbose' is true.
-extractMediaBag :: Bool
- -> FilePath
- -> MediaBag
- -> IO ()
-extractMediaBag verbose dir (MediaBag mediamap) = do
- sequence_ $ M.foldWithKey
- (\fp (_ ,contents) ->
- ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap
-
-writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
-writeMedia verbose dir (subpath, bs) = do
- -- we join and split to convert a/b/c to a\b\c on Windows;
- -- in zip containers all paths use /
- let fullpath = dir </> normalise subpath
- createDirectoryIfMissing True $ takeDirectory fullpath
- when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
- BL.writeFile fullpath bs
-
-
+ M.foldrWithKey (\fp (mime,contents) ->
+ ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 48bc5f4eb..bd4ab252b 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE TemplateHaskell #-}
{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Options
- Copyright : Copyright (C) 2012-2016 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,13 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Data structures and functions for representing parser and writer
options.
-}
-module Text.Pandoc.Options ( Extension(..)
- , pandocExtensions
- , plainExtensions
- , strictExtensions
- , phpMarkdownExtraExtensions
- , githubMarkdownExtensions
- , multimarkdownExtensions
+module Text.Pandoc.Options ( module Text.Pandoc.Extensions
, ReaderOptions(..)
, HTMLMathMethod (..)
, CiteMethod (..)
@@ -50,248 +46,57 @@ module Text.Pandoc.Options ( Extension(..)
, def
, isEnabled
) where
-import Data.Set (Set)
-import qualified Data.Set as Set
-import Data.Default
-import Text.Pandoc.Highlighting (Style, pygments)
-import Text.Pandoc.MediaBag (MediaBag)
+import Data.Aeson (defaultOptions)
+import Data.Aeson.TH (deriveJSON)
import Data.Data (Data)
+import Data.Default
+import qualified Data.Set as Set
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
+import Skylighting (SyntaxMap, defaultSyntaxMap)
+import Text.Pandoc.Extensions
+import Text.Pandoc.Highlighting (Style, pygments)
--- | Individually selectable syntax extensions.
-data Extension =
- Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
- | Ext_inline_notes -- ^ Pandoc-style inline notes
- | Ext_pandoc_title_block -- ^ Pandoc title block
- | Ext_yaml_metadata_block -- ^ YAML metadata block
- | Ext_mmd_title_block -- ^ Multimarkdown metadata block
- | Ext_table_captions -- ^ Pandoc-style table captions
- | Ext_implicit_figures -- ^ A paragraph with just an image is a figure
- | Ext_simple_tables -- ^ Pandoc-style simple tables
- | Ext_multiline_tables -- ^ Pandoc-style multiline tables
- | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
- | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
- | Ext_citations -- ^ Pandoc/citeproc citations
- | Ext_raw_tex -- ^ Allow raw TeX (other than math)
- | Ext_raw_html -- ^ Allow raw HTML
- | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
- | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
- | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
- | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
- | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
- | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
- | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
- | Ext_inline_code_attributes -- ^ Allow attributes on inline code
- | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
- | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
- | Ext_native_spans -- ^ Use Span inlines for contents of <span>
- | Ext_bracketed_spans -- ^ Bracketed spans with attributes
- | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
- -- iff container has attribute 'markdown'
- | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
- | Ext_link_attributes -- ^ link and image attributes
- | Ext_mmd_link_attributes -- ^ MMD style reference link attributes
- | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
- | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
- | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
- | Ext_startnum -- ^ Make start number of ordered list significant
- | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
- | Ext_compact_definition_lists -- ^ Definition lists without
- -- space between items, and disallow laziness
- | Ext_example_lists -- ^ Markdown-style numbered examples
- | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
- | Ext_angle_brackets_escapable -- ^ Make < and > escapable
- | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
- | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
- | Ext_blank_before_header -- ^ Require blank line before a header
- | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
- | Ext_superscript -- ^ Superscript using ^this^ syntax
- | Ext_subscript -- ^ Subscript using ~this~ syntax
- | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
- | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
- | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
- -- East Asian wide characters
- | Ext_literate_haskell -- ^ Enable literate Haskell conventions
- | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
- | Ext_emoji -- ^ Support emoji like :smile:
- | Ext_auto_identifiers -- ^ Automatic identifiers for headers
- | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
- | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
- | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
- | Ext_implicit_header_references -- ^ Implicit reference links for headers
- | Ext_line_blocks -- ^ RST style line blocks
- | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
- | Ext_shortcut_reference_links -- ^ Shortcut reference links
- deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
-
-pandocExtensions :: Set Extension
-pandocExtensions = Set.fromList
- [ Ext_footnotes
- , Ext_inline_notes
- , Ext_pandoc_title_block
- , Ext_yaml_metadata_block
- , Ext_table_captions
- , Ext_implicit_figures
- , Ext_simple_tables
- , Ext_multiline_tables
- , Ext_grid_tables
- , Ext_pipe_tables
- , Ext_citations
- , Ext_raw_tex
- , Ext_raw_html
- , Ext_tex_math_dollars
- , Ext_latex_macros
- , Ext_fenced_code_blocks
- , Ext_fenced_code_attributes
- , Ext_backtick_code_blocks
- , Ext_inline_code_attributes
- , Ext_markdown_in_html_blocks
- , Ext_native_divs
- , Ext_native_spans
- , Ext_bracketed_spans
- , Ext_escaped_line_breaks
- , Ext_fancy_lists
- , Ext_startnum
- , Ext_definition_lists
- , Ext_example_lists
- , Ext_all_symbols_escapable
- , Ext_intraword_underscores
- , Ext_blank_before_blockquote
- , Ext_blank_before_header
- , Ext_strikeout
- , Ext_superscript
- , Ext_subscript
- , Ext_auto_identifiers
- , Ext_header_attributes
- , Ext_link_attributes
- , Ext_implicit_header_references
- , Ext_line_blocks
- , Ext_shortcut_reference_links
- ]
-
-plainExtensions :: Set Extension
-plainExtensions = Set.fromList
- [ Ext_table_captions
- , Ext_implicit_figures
- , Ext_simple_tables
- , Ext_multiline_tables
- , Ext_grid_tables
- , Ext_latex_macros
- , Ext_fancy_lists
- , Ext_startnum
- , Ext_definition_lists
- , Ext_example_lists
- , Ext_intraword_underscores
- , Ext_blank_before_blockquote
- , Ext_blank_before_header
- , Ext_strikeout
- ]
-
-phpMarkdownExtraExtensions :: Set Extension
-phpMarkdownExtraExtensions = Set.fromList
- [ Ext_footnotes
- , Ext_pipe_tables
- , Ext_raw_html
- , Ext_markdown_attribute
- , Ext_fenced_code_blocks
- , Ext_definition_lists
- , Ext_intraword_underscores
- , Ext_header_attributes
- , Ext_link_attributes
- , Ext_abbreviations
- , Ext_shortcut_reference_links
- ]
-
-githubMarkdownExtensions :: Set Extension
-githubMarkdownExtensions = Set.fromList
- [ Ext_angle_brackets_escapable
- , Ext_pipe_tables
- , Ext_raw_html
- , Ext_fenced_code_blocks
- , Ext_auto_identifiers
- , Ext_ascii_identifiers
- , Ext_backtick_code_blocks
- , Ext_autolink_bare_uris
- , Ext_intraword_underscores
- , Ext_strikeout
- , Ext_hard_line_breaks
- , Ext_emoji
- , Ext_lists_without_preceding_blankline
- , Ext_shortcut_reference_links
- ]
-
-multimarkdownExtensions :: Set Extension
-multimarkdownExtensions = Set.fromList
- [ Ext_pipe_tables
- , Ext_raw_html
- , Ext_markdown_attribute
- , Ext_mmd_link_attributes
- -- , Ext_raw_tex
- -- Note: MMD's raw TeX syntax requires raw TeX to be
- -- enclosed in HTML comment
- , Ext_tex_math_double_backslash
- , Ext_intraword_underscores
- , Ext_mmd_title_block
- , Ext_footnotes
- , Ext_definition_lists
- , Ext_all_symbols_escapable
- , Ext_implicit_header_references
- , Ext_auto_identifiers
- , Ext_mmd_header_identifiers
- , Ext_implicit_figures
- -- Note: MMD's syntax for superscripts and subscripts
- -- is a bit more permissive than pandoc's, allowing
- -- e^2 and a~1 instead of e^2^ and a~1~, so even with
- -- these options we don't have full support for MMD
- -- superscripts and subscripts, but there's no reason
- -- not to include these:
- , Ext_superscript
- , Ext_subscript
- ]
-
-strictExtensions :: Set Extension
-strictExtensions = Set.fromList
- [ Ext_raw_html
- , Ext_shortcut_reference_links
- ]
+class HasSyntaxExtensions a where
+ getExtensions :: a -> Extensions
data ReaderOptions = ReaderOptions{
- readerExtensions :: Set Extension -- ^ Syntax extensions
- , readerSmart :: Bool -- ^ Smart punctuation
- , readerStandalone :: Bool -- ^ Standalone document with header
- , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX
- , readerColumns :: Int -- ^ Number of columns in terminal
- , readerTabStop :: Int -- ^ Tab stop
- , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior
- -- in parsing dashes; -- is em-dash;
- -- - before numerial is en-dash
- , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
- , readerIndentedCodeClasses :: [String] -- ^ Default classes for
+ readerExtensions :: Extensions -- ^ Syntax extensions
+ , readerStandalone :: Bool -- ^ Standalone document with header
+ , readerColumns :: Int -- ^ Number of columns in terminal
+ , readerTabStop :: Int -- ^ Tab stop
+ , readerIndentedCodeClasses :: [String] -- ^ Default classes for
-- indented code blocks
+ , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
, readerDefaultImageExtension :: String -- ^ Default extension for images
- , readerTrace :: Bool -- ^ Print debugging info
- , readerTrackChanges :: TrackChanges
- , readerFileScope :: Bool -- ^ Parse before combining
+ , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx
+ , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
} deriving (Show, Read, Data, Typeable, Generic)
+instance HasSyntaxExtensions ReaderOptions where
+ getExtensions opts = readerExtensions opts
+
instance Default ReaderOptions
where def = ReaderOptions{
- readerExtensions = pandocExtensions
- , readerSmart = False
+ readerExtensions = emptyExtensions
, readerStandalone = False
- , readerParseRaw = False
, readerColumns = 80
, readerTabStop = 4
- , readerOldDashes = False
- , readerApplyMacros = True
, readerIndentedCodeClasses = []
+ , readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
- , readerTrace = False
, readerTrackChanges = AcceptChanges
- , readerFileScope = False
+ , readerStripComments = False
}
+defaultAbbrevs :: Set.Set String
+defaultAbbrevs = Set.fromList
+ [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
+ "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
+ "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
+ "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
+ "ch.", "sec.", "cf.", "cp."]
+
--
-- Writer options
--
@@ -303,9 +108,9 @@ data HTMLMathMethod = PlainMath
| JsMath (Maybe String) -- url of jsMath load script
| GladTeX
| WebTeX String -- url of TeX->image script.
- | MathML (Maybe String) -- url of MathMLinHTML.js
+ | MathML
| MathJax String -- url of MathJax.js
- | KaTeX String String -- url of stylesheet and katex.js
+ | KaTeX String -- url of KaTeX files
deriving (Show, Read, Eq, Data, Typeable, Generic)
data CiteMethod = Citeproc -- use citeproc to render them
@@ -356,51 +161,39 @@ data ReferenceLocation = EndOfBlock -- ^ End of block
-- | Options for writers
data WriterOptions = WriterOptions
- { writerTemplate :: Maybe String -- ^ Template to use
- , writerVariables :: [(String, String)] -- ^ Variables to set in template
- , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous?
- , writerIncremental :: Bool -- ^ True if lists should be incremental
- , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
- , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ...
- , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
- , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions
- , writerWrapText :: WrapOption -- ^ Option for wrapping text
- , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
- , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
- , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
+ { writerTemplate :: Maybe String -- ^ Template to use
+ , writerVariables :: [(String, String)] -- ^ Variables to set in template
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerIncremental :: Bool -- ^ True if lists should be incremental
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ...
+ , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , writerExtensions :: Extensions -- ^ Markdown extensions that can be used
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions
+ , writerWrapText :: WrapOption -- ^ Option for wrapping text
+ , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
+ , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
-- and for footnote marks in markdown
- , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file
- , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
- , writerCiteMethod :: CiteMethod -- ^ How to print cites
- , writerDocbook5 :: Bool -- ^ Produce DocBook5
- , writerHtml5 :: Bool -- ^ Produce HTML5
- , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML
- , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show
- , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
- , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions
- , writerListings :: Bool -- ^ Use listings package for code
- , writerHighlight :: Bool -- ^ Highlight source code
- , writerHighlightStyle :: Style -- ^ Style to use for highlighting
- , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
- , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex
- , writerEpubVersion :: Maybe EPUBVersion -- ^ Nothing or EPUB version
- , writerEpubMetadata :: String -- ^ Metadata to include in EPUB
- , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line
- , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
- , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
- , writerTOCDepth :: Int -- ^ Number of levels to include in TOC
- , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified
- , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
- , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
- , writerVerbose :: Bool -- ^ Verbose debugging output
- , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
+ , writerCiteMethod :: CiteMethod -- ^ How to print cites
+ , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML
+ , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
+ , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions
+ , writerListings :: Bool -- ^ Use listings package for code
+ , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting
+ -- (Nothing = no highlighting)
+ , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
+ , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF
+ , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB
+ , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
+ , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
+ , writerTOCDepth :: Int -- ^ Number of levels to include in TOC
+ , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
+ , writerSyntaxMap :: SyntaxMap
} deriving (Show, Data, Typeable, Generic)
instance Default WriterOptions where
@@ -408,48 +201,48 @@ instance Default WriterOptions where
, writerVariables = []
, writerTabStop = 4
, writerTableOfContents = False
- , writerSlideVariant = NoSlides
, writerIncremental = False
, writerHTMLMathMethod = PlainMath
- , writerIgnoreNotes = False
, writerNumberSections = False
, writerNumberOffset = [0,0,0,0,0,0]
, writerSectionDivs = False
- , writerExtensions = pandocExtensions
+ , writerExtensions = emptyExtensions
, writerReferenceLinks = False
, writerDpi = 96
, writerWrapText = WrapAuto
, writerColumns = 72
, writerEmailObfuscation = NoObfuscation
, writerIdentifierPrefix = ""
- , writerSourceURL = Nothing
- , writerUserDataDir = Nothing
, writerCiteMethod = Citeproc
- , writerDocbook5 = False
- , writerHtml5 = False
, writerHtmlQTags = False
- , writerBeamer = False
, writerSlideLevel = Nothing
, writerTopLevelDivision = TopLevelDefault
, writerListings = False
- , writerHighlight = False
- , writerHighlightStyle = pygments
+ , writerHighlightStyle = Just pygments
, writerSetextHeaders = True
- , writerTeXLigatures = True
- , writerEpubVersion = Nothing
- , writerEpubMetadata = ""
- , writerEpubStylesheet = Nothing
+ , writerEpubSubdirectory = "EPUB"
+ , writerEpubMetadata = Nothing
, writerEpubFonts = []
, writerEpubChapterLevel = 1
, writerTOCDepth = 3
- , writerReferenceODT = Nothing
- , writerReferenceDocx = Nothing
- , writerMediaBag = mempty
- , writerVerbose = False
- , writerLaTeXArgs = []
+ , writerReferenceDoc = Nothing
, writerReferenceLocation = EndOfDocument
+ , writerSyntaxMap = defaultSyntaxMap
}
+instance HasSyntaxExtensions WriterOptions where
+ getExtensions opts = writerExtensions opts
+
-- | Returns True if the given extension is enabled.
-isEnabled :: Extension -> WriterOptions -> Bool
-isEnabled ext opts = ext `Set.member` (writerExtensions opts)
+isEnabled :: HasSyntaxExtensions a => Extension -> a -> Bool
+isEnabled ext opts = ext `extensionEnabled` getExtensions opts
+
+$(deriveJSON defaultOptions ''ReaderOptions)
+$(deriveJSON defaultOptions ''HTMLMathMethod)
+$(deriveJSON defaultOptions ''CiteMethod)
+$(deriveJSON defaultOptions ''ObfuscationMethod)
+$(deriveJSON defaultOptions ''HTMLSlideVariant)
+$(deriveJSON defaultOptions ''TrackChanges)
+$(deriveJSON defaultOptions ''WrapOption)
+$(deriveJSON defaultOptions ''TopLevelDivision)
+$(deriveJSON defaultOptions ''ReferenceLocation)
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 9faff1816..5f41d6c55 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.PDF
- Copyright : Copyright (C) 2012-2016 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,49 +32,60 @@ Conversion of LaTeX documents to PDF.
-}
module Text.Pandoc.PDF ( makePDF ) where
+import qualified Codec.Picture as JP
+import qualified Control.Exception as E
+import Control.Monad (unless, when)
+import Control.Monad.Trans (MonadIO (..))
+import qualified Data.ByteString as BS
import Data.ByteString.Lazy (ByteString)
-import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BC
-import qualified Data.ByteString as BS
+import Data.Maybe (fromMaybe)
import Data.Monoid ((<>))
-import System.Exit (ExitCode (..))
-import System.FilePath
-import System.IO (stderr, stdout)
-import System.IO.Temp (withTempFile)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.IO as TextIO
import System.Directory
-import Data.Digest.Pure.SHA (showDigest, sha1)
import System.Environment
-import Control.Monad (unless, when, (<=<))
-import qualified Control.Exception as E
-import Data.List (isInfixOf)
-import Data.Maybe (fromMaybe)
-import qualified Text.Pandoc.UTF8 as UTF8
+import System.Exit (ExitCode (..))
+import System.FilePath
+import System.IO (stdout)
+import System.IO.Temp (withTempDirectory, withTempFile)
+#if MIN_VERSION_base(4,8,3)
+import System.IO.Error (IOError, isDoesNotExistError)
+#else
+import System.IO.Error (isDoesNotExistError)
+#endif
import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError))
+import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..))
+import Text.Pandoc.Process (pipeProcess)
+import Text.Pandoc.Shared (inDirectory, stringify, withTempDir)
+import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory,
- stringify)
import Text.Pandoc.Writers.Shared (getField, metaToJSON)
-import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
-import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
-import Text.Pandoc.Process (pipeProcess)
-import qualified Data.ByteString.Lazy as BL
-import qualified Codec.Picture as JP
#ifdef _WINDOWS
import Data.List (intercalate)
#endif
+import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getCommonState,
+ getVerbosity, putCommonState, report, runIO,
+ runIOorExplode, setVerbosity)
+import Text.Pandoc.Logging
#ifdef _WINDOWS
changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
#endif
-makePDF :: String -- ^ pdf creator (pdflatex, lualatex,
- -- xelatex, context, wkhtmltopdf)
- -> (WriterOptions -> Pandoc -> String) -- ^ writer
+makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex,
+ -- wkhtmltopdf, weasyprint, prince, context, pdfroff)
+ -> [String] -- ^ arguments to pass to pdf creator
+ -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer
-> WriterOptions -- ^ options
-> Pandoc -- ^ document
- -> IO (Either ByteString ByteString)
-makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do
+ -> PandocIO (Either ByteString ByteString)
+makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do
let mathArgs = case writerHTMLMathMethod opts of
-- with MathJax, wait til all math is rendered:
MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
@@ -80,8 +93,7 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do
_ -> []
meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
- let args = mathArgs ++
- concatMap toArgs
+ let args = pdfargs ++ mathArgs ++ concatMap toArgs
[("page-size", getField "papersize" meta')
,("title", getField "title" meta')
,("margin-bottom", fromMaybe (Just "1.2in")
@@ -92,55 +104,61 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do
(getField "margin-right" meta'))
,("margin-left", fromMaybe (Just "1.25in")
(getField "margin-left" meta'))
+ ,("footer-html", fromMaybe Nothing
+ (getField "footer-html" meta'))
+ ,("header-html", fromMaybe Nothing
+ (getField "header-html" meta'))
]
- let source = writer opts doc
- html2pdf (writerVerbose opts) args source
-makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
- doc' <- handleImages opts tmpdir doc
- let source = writer opts doc'
- args = writerLaTeXArgs opts
- case takeBaseName program of
- "context" -> context2pdf (writerVerbose opts) tmpdir source
- prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
- -> tex2pdf' (writerVerbose opts) args tmpdir program source
- _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
+ source <- writer opts doc
+ verbosity <- getVerbosity
+ liftIO $ html2pdf verbosity "wkhtmltopdf" args source
+makePDF "weasyprint" pdfargs writer opts doc = do
+ source <- writer opts doc
+ verbosity <- getVerbosity
+ liftIO $ html2pdf verbosity "weasyprint" pdfargs source
+makePDF "prince" pdfargs writer opts doc = do
+ source <- writer opts doc
+ verbosity <- getVerbosity
+ liftIO $ html2pdf verbosity "prince" pdfargs source
+makePDF "pdfroff" pdfargs writer opts doc = do
+ source <- writer opts doc
+ let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i",
+ "--no-toc-relocation"] ++ pdfargs
+ verbosity <- getVerbosity
+ liftIO $ ms2pdf verbosity args source
+makePDF program pdfargs writer opts doc = do
+ let withTemp = if takeBaseName program == "context"
+ then withTempDirectory "."
+ else withTempDir
+ commonState <- getCommonState
+ verbosity <- getVerbosity
+ liftIO $ withTemp "tex2pdf." $ \tmpdir -> do
+ source <- runIOorExplode $ do
+ putCommonState commonState
+ doc' <- handleImages tmpdir doc
+ writer opts doc'
+ case takeBaseName program of
+ "context" -> context2pdf verbosity tmpdir source
+ prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
+ -> tex2pdf' verbosity pdfargs tmpdir program source
+ _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
-handleImages :: WriterOptions
- -> FilePath -- ^ temp dir to store images
+handleImages :: FilePath -- ^ temp dir to store images
-> Pandoc -- ^ document
- -> IO Pandoc
-handleImages opts tmpdir = walkM (convertImages tmpdir) <=< walkM (handleImage' opts tmpdir)
-
-handleImage' :: WriterOptions
- -> FilePath
- -> Inline
- -> IO Inline
-handleImage' opts tmpdir (Image attr ils (src,tit)) = do
- exists <- doesFileExist src
- if exists
- then return $ Image attr ils (src,tit)
- else do
- res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
- case res of
- Right (contents, Just mime) -> do
- let ext = fromMaybe (takeExtension src) $
- extensionFromMimeType mime
- let basename = showDigest $ sha1 $ BL.fromChunks [contents]
- let fname = tmpdir </> basename <.> ext
- BS.writeFile fname contents
- return $ Image attr ils (fname,tit)
- _ -> do
- warn $ "Could not find image `" ++ src ++ "', skipping..."
- -- return alt text
- return $ Emph ils
-handleImage' _ _ x = return x
+ -> PandocIO Pandoc
+handleImages tmpdir doc =
+ fillMediaBag doc >>=
+ extractMedia tmpdir >>=
+ walkM (convertImages tmpdir)
-convertImages :: FilePath -> Inline -> IO Inline
+convertImages :: FilePath -> Inline -> PandocIO Inline
convertImages tmpdir (Image attr ils (src, tit)) = do
- img <- convertImage tmpdir src
+ img <- liftIO $ convertImage tmpdir src
newPath <-
case img of
- Left e -> src <$ warn e
+ Left e -> do
+ report $ CouldNotConvertImage src e
+ return src
Right fp -> return fp
return (Image attr ils (newPath, tit))
convertImages _ x = return x
@@ -152,29 +170,43 @@ convertImage tmpdir fname =
Just "image/png" -> doNothing
Just "image/jpeg" -> doNothing
Just "application/pdf" -> doNothing
+ Just "image/svg+xml" -> E.catch (do
+ (exit, _) <- pipeProcess Nothing "rsvg-convert"
+ ["-f","pdf","-a","-o",pdfOut,fname] BL.empty
+ if exit == ExitSuccess
+ then return $ Right pdfOut
+ else return $ Left "conversion from SVG failed")
+ (\(e :: E.SomeException) -> return $ Left $
+ "check that rsvg2pdf is in path.\n" ++
+ show e)
_ -> JP.readImage fname >>= \res ->
case res of
- Left _ -> return $ Left $ "Unable to convert `" ++
- fname ++ "' for use with pdflatex."
+ Left e -> return $ Left e
Right img ->
- E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
+ E.catch (Right pngOut <$ JP.savePngImage pngOut img) $
\(e :: E.SomeException) -> return (Left (show e))
where
- fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir
+ pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir
+ pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir
mime = getMimeType fname
doNothing = return (Right fname)
-tex2pdf' :: Bool -- ^ Verbose output
+tex2pdf' :: Verbosity -- ^ Verbosity level
-> [String] -- ^ Arguments to the latex-engine
-> FilePath -- ^ temp directory for output
-> String -- ^ tex program
- -> String -- ^ tex source
+ -> Text -- ^ tex source
-> IO (Either ByteString ByteString)
-tex2pdf' verbose args tmpDir program source = do
- let numruns = if "\\tableofcontents" `isInfixOf` source
+tex2pdf' verbosity args tmpDir program source = do
+ let numruns = if "\\tableofcontents" `T.isInfixOf` source
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
- (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source
+ (exit, log', mbPdf) <- E.catch
+ (runTeXProgram verbosity program args 1 numruns tmpDir source)
+ (\(e :: IOError) -> if isDoesNotExistError e
+ then E.throwIO $
+ PandocPDFProgramNotFoundError program
+ else E.throwIO e)
case (exit, mbPdf) of
(ExitFailure _, _) -> do
let logmsg = extractMsg log'
@@ -182,11 +214,26 @@ tex2pdf' verbose args tmpDir program source = do
case logmsg of
x | "! Package inputenc Error" `BC.isPrefixOf` x
&& program /= "xelatex"
- -> "\nTry running pandoc with --latex-engine=xelatex."
+ -> "\nTry running pandoc with --pdf-engine=xelatex."
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
- (ExitSuccess, Just pdf) -> return $ Right pdf
+ (ExitSuccess, Just pdf) -> do
+ missingCharacterWarnings verbosity log'
+ return $ Right pdf
+
+missingCharacterWarnings :: Verbosity -> ByteString -> IO ()
+missingCharacterWarnings verbosity log' = do
+ let ls = BC.lines log'
+ let isMissingCharacterWarning = BC.isPrefixOf "Missing character: "
+ let warnings = [ UTF8.toStringLazy (BC.drop 19 l)
+ | l <- ls
+ , isMissingCharacterWarning l
+ ]
+ runIO $ do
+ setVerbosity verbosity
+ mapM_ (report . MissingCharacter) warnings
+ return ()
-- parsing output
@@ -212,12 +259,12 @@ extractConTeXtMsg log' = do
-- Run a TeX program on an input bytestring and return (exit code,
-- contents of stdout, contents of produced PDF if any). Rerun
-- a fixed number of times to resolve references.
-runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String
- -> IO (ExitCode, ByteString, Maybe ByteString)
-runTeXProgram verbose program args runNumber numRuns tmpDir source = do
+runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath
+ -> Text -> IO (ExitCode, ByteString, Maybe ByteString)
+runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
let file = tmpDir </> "input.tex"
exists <- doesFileExist file
- unless exists $ UTF8.writeFile file source
+ unless exists $ BS.writeFile file $ UTF8.fromText source
#ifdef _WINDOWS
-- note: we want / even on Windows, for TexLive
let tmpDir' = changePathSeparators tmpDir
@@ -234,7 +281,7 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do
$ lookup "TEXINPUTS" env'
let env'' = ("TEXINPUTS", texinputs) :
[(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
- when (verbose && runNumber == 1) $ do
+ when (verbosity >= INFO && runNumber == 1) $ do
putStrLn "[makePDF] temp dir:"
putStrLn tmpDir'
putStrLn "[makePDF] Command line:"
@@ -244,16 +291,15 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do
mapM_ print env''
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
- B.readFile file' >>= B.putStr
+ BL.readFile file' >>= BL.putStr
putStr "\n"
- (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
- when verbose $ do
+ (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty
+ when (verbosity >= INFO) $ do
putStrLn $ "[makePDF] Run #" ++ show runNumber
- B.hPutStr stdout out
- B.hPutStr stderr err
+ BL.hPutStr stdout out
putStr "\n"
if runNumber <= numRuns
- then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source
+ then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
else do
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
pdfExists <- doesFileExist pdfFile
@@ -261,36 +307,75 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do
-- We read PDF as a strict bytestring to make sure that the
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
else return Nothing
- return (exit, out <> err, pdf)
+ -- Note that some things like Missing character warnings
+ -- appear in the log but not on stderr, so we prefer the log:
+ let logFile = replaceExtension file ".log"
+ logExists <- doesFileExist logFile
+ log' <- if logExists
+ then BL.readFile logFile
+ else return out
+ return (exit, log', pdf)
+
+ms2pdf :: Verbosity
+ -> [String]
+ -> Text
+ -> IO (Either ByteString ByteString)
+ms2pdf verbosity args source = do
+ env' <- getEnvironment
+ when (verbosity >= INFO) $ do
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "pdfroff " ++ " " ++ unwords (map show args)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env'
+ putStr "\n"
+ putStrLn "[makePDF] Contents:\n"
+ putStr $ T.unpack source
+ putStr "\n"
+ (exit, out) <- E.catch
+ (pipeProcess (Just env') "pdfroff" args
+ (BL.fromStrict $ UTF8.fromText source))
+ (\(e :: IOError) -> if isDoesNotExistError e
+ then E.throwIO $
+ PandocPDFProgramNotFoundError "pdfroff"
+ else E.throwIO e)
+ when (verbosity >= INFO) $ do
+ BL.hPutStr stdout out
+ putStr "\n"
+ return $ case exit of
+ ExitFailure _ -> Left out
+ ExitSuccess -> Right out
-html2pdf :: Bool -- ^ Verbose output
- -> [String] -- ^ Args to wkhtmltopdf
- -> String -- ^ HTML5 source
+html2pdf :: Verbosity -- ^ Verbosity level
+ -> String -- ^ Program (wkhtmltopdf, weasyprint or prince)
+ -> [String] -- ^ Args to program
+ -> Text -- ^ HTML5 source
-> IO (Either ByteString ByteString)
-html2pdf verbose args source = do
- file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
+html2pdf verbosity program args source = do
pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
- UTF8.writeFile file source
- let programArgs = args ++ [file, pdfFile]
+ let pdfFileArgName = ["-o" | program == "prince"]
+ let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile]
env' <- getEnvironment
- when verbose $ do
+ when (verbosity >= INFO) $ do
putStrLn "[makePDF] Command line:"
- putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
putStr "\n"
- putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
+ putStrLn "[makePDF] Contents of intermediate HTML:"
+ TextIO.putStr source
putStr "\n"
- (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf"
- programArgs BL.empty
- removeFile file
- when verbose $ do
- B.hPutStr stdout out
- B.hPutStr stderr err
+ (exit, out) <- E.catch
+ (pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source)
+ (\(e :: IOError) -> if isDoesNotExistError e
+ then E.throwIO $
+ PandocPDFProgramNotFoundError program
+ else E.throwIO e)
+ when (verbosity >= INFO) $ do
+ BL.hPutStr stdout out
putStr "\n"
pdfExists <- doesFileExist pdfFile
mbPdf <- if pdfExists
@@ -298,23 +383,22 @@ html2pdf verbose args source = do
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
then do
- res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
removeFile pdfFile
return res
else return Nothing
- let log' = out <> err
return $ case (exit, mbPdf) of
- (ExitFailure _, _) -> Left log'
+ (ExitFailure _, _) -> Left out
(ExitSuccess, Nothing) -> Left ""
(ExitSuccess, Just pdf) -> Right pdf
-context2pdf :: Bool -- ^ Verbose output
+context2pdf :: Verbosity -- ^ Verbosity level
-> FilePath -- ^ temp directory for output
- -> String -- ^ ConTeXt source
+ -> Text -- ^ ConTeXt source
-> IO (Either ByteString ByteString)
-context2pdf verbose tmpDir source = inDirectory tmpDir $ do
+context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
let file = "input.tex"
- UTF8.writeFile file source
+ BS.writeFile file $ UTF8.fromText source
#ifdef _WINDOWS
-- note: we want / even on Windows, for TexLive
let tmpDir' = changePathSeparators tmpDir
@@ -323,27 +407,26 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do
#endif
let programArgs = "--batchmode" : [file]
env' <- getEnvironment
- let sep = [searchPathSeparator]
- let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++)
- $ lookup "TEXINPUTS" env'
- let env'' = ("TEXINPUTS", texinputs) :
- [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
- when verbose $ do
+ when (verbosity >= INFO) $ do
putStrLn "[makePDF] temp dir:"
putStrLn tmpDir'
putStrLn "[makePDF] Command line:"
putStrLn $ "context" ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
- mapM_ print env''
+ mapM_ print env'
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
+ BL.readFile file >>= BL.putStr
putStr "\n"
- (exit, out, err) <- pipeProcess (Just env'') "context" programArgs BL.empty
- when verbose $ do
- B.hPutStr stdout out
- B.hPutStr stderr err
+ (exit, out) <- E.catch
+ (pipeProcess (Just env') "context" programArgs BL.empty)
+ (\(e :: IOError) -> if isDoesNotExistError e
+ then E.throwIO $
+ PandocPDFProgramNotFoundError "context"
+ else E.throwIO e)
+ when (verbosity >= INFO) $ do
+ BL.hPutStr stdout out
putStr "\n"
let pdfFile = replaceExtension file ".pdf"
pdfExists <- doesFileExist pdfFile
@@ -351,13 +434,11 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do
-- We read PDF as a strict bytestring to make sure that the
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
else return Nothing
- let log' = out <> err
case (exit, mbPdf) of
(ExitFailure _, _) -> do
- let logmsg = extractConTeXtMsg log'
+ let logmsg = extractConTeXtMsg out
return $ Left logmsg
(ExitSuccess, Nothing) -> return $ Left ""
(ExitSuccess, Just pdf) -> return $ Right pdf
-
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 110e34c6a..82abcb440 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,11 +1,12 @@
-{-# LANGUAGE
- FlexibleContexts
-, GeneralizedNewtypeDeriving
-, TypeSynonymInstances
-, MultiParamTypeClasses
-, FlexibleInstances #-}
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE IncoherentInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE TypeSynonymInstances #-}
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Parsing
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -33,8 +34,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
A utility library with parsers used in pandoc readers.
-}
-module Text.Pandoc.Parsing ( anyLine,
+module Text.Pandoc.Parsing ( takeWhileP,
+ takeP,
+ anyLine,
+ anyLineNewline,
+ indentWith,
many1Till,
+ manyUntil,
+ sepBy1',
notFollowedBy',
oneOfStrings,
oneOfStringsCI,
@@ -43,9 +50,12 @@ module Text.Pandoc.Parsing ( anyLine,
skipSpaces,
blankline,
blanklines,
+ gobbleSpaces,
+ gobbleAtMostSpaces,
enclosed,
stringAnyCase,
parseFromString,
+ parseFromString',
lineClump,
charsInBalanced,
romanNumeral,
@@ -57,6 +67,11 @@ module Text.Pandoc.Parsing ( anyLine,
withRaw,
escaped,
characterReference,
+ upperRoman,
+ lowerRoman,
+ decimal,
+ lowerAlpha,
+ upperAlpha,
anyOrderedListMarker,
orderedListMarker,
charRef,
@@ -64,20 +79,24 @@ module Text.Pandoc.Parsing ( anyLine,
tableWith,
widthsFromIndices,
gridTableWith,
+ gridTableWith',
readWith,
- readWithWarnings,
readWithM,
testStringWith,
guardEnabled,
guardDisabled,
updateLastStrPos,
notAfterString,
+ logMessage,
+ reportLogMessages,
ParserState (..),
HasReaderOptions (..),
HasHeaderMap (..),
HasIdentifierList (..),
HasMacros (..),
+ HasLogMessages (..),
HasLastStrPosition (..),
+ HasIncludeFiles (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
@@ -100,20 +119,26 @@ module Text.Pandoc.Parsing ( anyLine,
dash,
nested,
citeKey,
- macro,
- applyMacros',
Parser,
ParserT,
- F(..),
+ F,
+ Future(..),
runF,
askF,
asksF,
+ returnF,
+ trimInlinesF,
token,
+ (<+?>),
+ extractIdClass,
+ insertIncludedFile,
+ insertIncludedFileF,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
runParserT,
parse,
+ tokenPrim,
anyToken,
getInput,
setInput,
@@ -162,61 +187,106 @@ module Text.Pandoc.Parsing ( anyLine,
sourceLine,
setSourceColumn,
setSourceLine,
+ incSourceColumn,
newPos,
- addWarning,
- (<+?>),
- extractIdClass
+ Line,
+ Column
)
where
+import Control.Monad.Identity
+import Control.Monad.Reader
+import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isHexDigit,
+ isPunctuation, isSpace, ord, toLower, toUpper)
+import Data.Default
+import Data.List (intercalate, isSuffixOf, transpose)
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Monoid ((<>))
+import qualified Data.Set as Set
+import Data.Text (Text)
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report)
import Text.Pandoc.Definition
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
+import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
+import Text.Pandoc.XML (fromEntities)
import Text.Parsec hiding (token)
-import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
- isHexDigit, isSpace, isPunctuation )
-import Data.List ( intercalate, transpose, isSuffixOf )
-import Text.Pandoc.Shared
-import qualified Data.Map as M
-import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
- parseMacroDefinitions)
-import Text.HTML.TagSoup.Entity ( lookupEntity )
-import Text.Pandoc.Asciify (toAsciiChar)
-import Data.Monoid ((<>))
-import Data.Default
-import qualified Data.Set as Set
-import Control.Monad.Reader
-import Control.Monad.Identity
-import Data.Maybe (catMaybes)
+import Text.Parsec.Pos (initialPos, newPos, updatePosString)
+import Control.Monad.Except
import Text.Pandoc.Error
type Parser t s = Parsec t s
type ParserT = ParsecT
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
+-- | Reader monad wrapping the parser state. This is used to possibly delay
+-- evaluation until all relevant information has been parsed and made available
+-- in the parser state.
+newtype Future s a = Future { runDelayed :: Reader s a }
+ deriving (Monad, Applicative, Functor)
+
+type F = Future ParserState
-runF :: F a -> ParserState -> a
-runF = runReader . unF
+runF :: Future s a -> s -> a
+runF = runReader . runDelayed
-askF :: F ParserState
-askF = F ask
+askF :: Future s s
+askF = Future ask
-asksF :: (ParserState -> a) -> F a
-asksF f = F $ asks f
+asksF :: (s -> a) -> Future s a
+asksF f = Future $ asks f
-instance Monoid a => Monoid (F a) where
+returnF :: Monad m => a -> m (Future s a)
+returnF = return . return
+
+trimInlinesF :: Future s Inlines -> Future s Inlines
+trimInlinesF = liftM trimInlines
+
+instance Monoid a => Monoid (Future s a) where
mempty = return mempty
mappend = liftM2 mappend
mconcat = liftM mconcat . sequence
+-- | Parse characters while a predicate is true.
+takeWhileP :: Monad m
+ => (Char -> Bool)
+ -> ParserT [Char] st m [Char]
+takeWhileP f = do
+ -- faster than 'many (satisfy f)'
+ inp <- getInput
+ pos <- getPosition
+ let (xs, rest) = span f inp
+ -- needed to persuade parsec that this won't match an empty string:
+ anyChar
+ setInput rest
+ setPosition $ updatePosString pos xs
+ return xs
+
+-- Parse n characters of input (or the rest of the input if
+-- there aren't n characters).
+takeP :: Monad m => Int -> ParserT [Char] st m [Char]
+takeP n = do
+ guard (n > 0)
+ -- faster than 'count n anyChar'
+ inp <- getInput
+ pos <- getPosition
+ let (xs, rest) = splitAt n inp
+ -- needed to persuade parsec that this won't match an empty string:
+ anyChar
+ setInput rest
+ setPosition $ updatePosString pos xs
+ return xs
+
-- | Parse any line of text
-anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
+anyLine :: Monad m => ParserT [Char] st m [Char]
anyLine = do
-- This is much faster than:
-- manyTill anyChar newline
@@ -231,16 +301,54 @@ anyLine = do
return this
_ -> mzero
+-- | Parse any line, include the final newline in the output
+anyLineNewline :: Monad m => ParserT [Char] st m [Char]
+anyLineNewline = (++ "\n") <$> anyLine
+
+-- | Parse indent by specified number of spaces (or equiv. tabs)
+indentWith :: Stream s m Char
+ => HasReaderOptions st
+ => Int -> ParserT s st m [Char]
+indentWith num = do
+ tabStop <- getOption readerTabStop
+ if num < tabStop
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> indentWith (num - tabStop)) ]
+
-- | Like @manyTill@, but reads at least one item.
-many1Till :: Stream s m t
+many1Till :: (Show end, Stream s m t)
=> ParserT s st m a
-> ParserT s st m end
-> ParserT s st m [a]
many1Till p end = do
+ notFollowedBy' end
first <- p
rest <- manyTill p end
return (first:rest)
+-- | Like @manyTill@, but also returns the result of end parser.
+manyUntil :: (Stream s m t)
+ => ParserT s u m a
+ -> ParserT s u m b
+ -> ParserT s u m ([a], b)
+manyUntil p end = scan
+ where scan =
+ (do e <- end
+ return ([], e)
+ ) <|>
+ (do x <- p
+ (xs, e) <- scan
+ return (x:xs, e))
+
+-- | Like @sepBy1@ from Parsec,
+-- but does not fail if it @sep@ succeeds and @p@ fails.
+sepBy1' :: (Stream s m t)
+ => ParsecT s u m a
+ -> ParsecT s u m sep
+ -> ParsecT s u m [a]
+sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p)
+
-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
@@ -276,7 +384,7 @@ oneOfStringsCI = oneOfStrings' ciMatch
-- this optimizes toLower by checking common ASCII case
-- first, before calling the expensive unicode-aware
-- function:
- toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
+ toLower' c | isAsciiUpper c = chr (ord c + 32)
| isAscii c = c
| otherwise = toLower c
@@ -300,8 +408,38 @@ blankline = try $ skipSpaces >> newline
blanklines :: Stream s m Char => ParserT s st m [Char]
blanklines = many1 blankline
+-- | Gobble n spaces; if tabs are encountered, expand them
+-- and gobble some or all of their spaces, leaving the rest.
+gobbleSpaces :: (HasReaderOptions st, Monad m)
+ => Int -> ParserT [Char] st m ()
+gobbleSpaces 0 = return ()
+gobbleSpaces n
+ | n < 0 = error "gobbleSpaces called with negative number"
+ | otherwise = try $ do
+ char ' ' <|> eatOneSpaceOfTab
+ gobbleSpaces (n - 1)
+
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char
+eatOneSpaceOfTab = do
+ char '\t'
+ tabstop <- getOption readerTabStop
+ inp <- getInput
+ setInput $ replicate (tabstop - 1) ' ' ++ inp
+ return ' '
+
+-- | Gobble up to n spaces; if tabs are encountered, expand them
+-- and gobble some or all of their spaces, leaving the rest.
+gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
+ => Int -> ParserT [Char] st m Int
+gobbleAtMostSpaces 0 = return 0
+gobbleAtMostSpaces n
+ | n < 0 = error "gobbleAtMostSpaces called with negative number"
+ | otherwise = option 0 $ do
+ char ' ' <|> eatOneSpaceOfTab
+ (+ 1) <$> gobbleAtMostSpaces (n - 1)
+
-- | Parses material enclosed between start and end parsers.
-enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
+enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
-> ParserT s st m end -- ^ end parser
-> ParserT s st m a -- ^ content parser (to be used repeatedly)
-> ParserT s st m [a]
@@ -317,9 +455,13 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
+parseFromString :: Monad m
+ => ParserT [Char] st m a
+ -> String
+ -> ParserT [Char] st m a
parseFromString parser str = do
oldPos <- getPosition
+ setPosition $ initialPos "chunk"
oldInput <- getInput
setInput str
result <- parser
@@ -329,10 +471,22 @@ parseFromString parser str = do
setPosition oldPos
return result
+-- | Like 'parseFromString' but specialized for 'ParserState'.
+-- This resets 'stateLastStrPos', which is almost always what we want.
+parseFromString' :: Monad m
+ => ParserT String ParserState m a
+ -> String
+ -> ParserT String ParserState m a
+parseFromString' parser str = do
+ oldStrPos <- stateLastStrPos <$> getState
+ res <- parseFromString parser str
+ updateState $ \st -> st{ stateLastStrPos = oldStrPos }
+ return res
+
-- | Parse raw line block up to and including blank lines.
-lineClump :: Stream [Char] m Char => ParserT [Char] st m String
+lineClump :: Monad m => ParserT [Char] st m String
lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+ <|> (unlines <$> many1 (notFollowedBy blankline >> anyLine))
-- | Parse a string of characters between an open character
-- and a close character, including text between balanced
@@ -373,19 +527,19 @@ romanNumeral upperCase = do
lookAhead $ oneOf romanDigits
let [one, five, ten, fifty, hundred, fivehundred, thousand] =
map char romanDigits
- thousands <- many thousand >>= (return . (1000 *) . length)
+ thousands <- ((1000 *) . length) <$> many thousand
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fivehundreds <- ((500 *) . length) <$> many fivehundred
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
+ hundreds <- ((100 *) . length) <$> many hundred
nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
+ fifties <- ((50 *) . length) <$> many fifty
forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
+ tens <- ((10 *) . length) <$> many ten
nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
+ fives <- ((5 *) . length) <$> many five
fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
+ ones <- length <$> many one
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
hundreds + nineties + fifties + forties + tens + nines +
fives + fours + ones
@@ -401,8 +555,8 @@ emailAddress :: Stream s m Char => ParserT s st m (String, String)
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" ++ full)
- mailbox = intercalate "." <$> (emailWord `sepby1` dot)
- domain = intercalate "." <$> (subdomain `sepby1` dot)
+ mailbox = intercalate "." <$> (emailWord `sepBy1'` dot)
+ domain = intercalate "." <$> (subdomain `sepBy1'` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
-- this excludes some valid email addresses, since an
@@ -419,59 +573,33 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
return (x:xs)
isEmailChar c = isAlphaNum c || isEmailPunct c
isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
- -- note: sepBy1 from parsec consumes input when sep
- -- succeeds and p fails, so we use this variant here.
- sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
-
-
--- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript, isbn, pmid
-schemes :: [String]
-schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
- "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
- "h323","http","https","iax","icap","im","imap","info","ipp","iris",
- "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
- "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
- "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
- "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
- "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
- "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
- "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
- "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
- "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
- "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
- "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
- "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
- "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
- "platform","proxy","psyc","query","res","resource","rmi","rsync",
- "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
- "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
- "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr", "isbn", "pmid"]
+
uriScheme :: Stream s m Char => ParserT s st m String
-uriScheme = oneOfStringsCI schemes
+uriScheme = oneOfStringsCI (Set.toList schemes)
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
+uri :: Monad m => ParserT [Char] st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
+ -- Avoid parsing e.g. "**Notes:**" as a raw URI:
+ notFollowedBy (oneOf "*_]")
-- We allow sentence punctuation except at the end, since
-- we don't want the trailing '.' in 'http://google.com.' We want to allow
-- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
-- as a URL, while NOT picking up the closing paren in
-- (http://wikipedia.org). So we include balanced parens in the URL.
- let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
+ let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-&="
let wordChar = satisfy isWordChar
let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
let entity = () <$ characterReference
let punct = skipMany1 (char ',')
- <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
+ <|> () <$ satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')
let uriChunk = skipMany1 wordChar
<|> percentEscaped
<|> entity
- <|> (try $ punct >>
+ <|> try (punct >>
lookAhead (void (satisfy isWordChar) <|> percentEscaped))
str <- snd <$> withRaw (skipMany1 ( () <$
(enclosed (char '(') (char ')') uriChunk
@@ -485,7 +613,7 @@ uri = try $ do
mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
mathInlineWith op cl = try $ do
string op
- notFollowedBy space
+ when (op == "$") $ notFollowedBy space
words' <- many1Till (count 1 (noneOf " \t\n\\")
<|> (char '\\' >>
-- This next clause is needed because \text{..} can
@@ -499,7 +627,7 @@ mathInlineWith op cl = try $ do
return " "
) (try $ string cl)
notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
+ return $ trim $ concat words'
where
inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
inBalancedBraces 0 "" = do
@@ -556,7 +684,9 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
-withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
+withRaw :: Monad m
+ => ParsecT [Char] st m a
+ -> ParsecT [Char] st m (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
@@ -566,9 +696,9 @@ withRaw parser = do
let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
let inplines = take ((l2 - l1) + 1) $ lines inp
let raw = case inplines of
- [] -> ""
- [l] -> take (c2 - c1) l
- ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
+ [] -> ""
+ [l] -> take (c2 - c1) l
+ ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
return (result, raw)
-- | Parses backslash, then applies character parser.
@@ -584,11 +714,11 @@ characterReference = try $ do
ent <- many1Till nonspaceChar (char ';')
let ent' = case ent of
'#':'X':xs -> '#':'x':xs -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
+ '#':_ -> ent
+ _ -> ent ++ ";"
case lookupEntity ent' of
- Just (c : _) -> return c
- _ -> fail "entity not found"
+ Just (c : _) -> return c
+ _ -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
@@ -651,7 +781,7 @@ romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
-- | Parses an ordered list marker and returns list attributes.
anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
-anyOrderedListMarker = choice $
+anyOrderedListMarker = choice
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
@@ -717,7 +847,7 @@ charRef = do
c <- characterReference
return $ Str [c]
-lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
+lineBlockLine :: Monad m => ParserT [Char] st m String
lineBlockLine = try $ do
char '|'
char ' '
@@ -727,33 +857,48 @@ lineBlockLine = try $ do
continuations <- many (try $ char ' ' >> anyLine)
return $ white ++ unwords (line : continuations)
-blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char
+blankLineBlockLine :: Stream s m Char => ParserT s st m Char
blankLineBlockLine = try (char '|' >> blankline)
-- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
+lineBlockLines :: Monad m => ParserT [Char] st m [String]
lineBlockLines = try $ do
lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine))
- skipMany1 $ blankline <|> blankLineBlockLine
+ skipMany blankline
return lines'
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: Stream s m Char
- => ParserT s ParserState m ([[Block]], [Alignment], [Int])
- -> ([Int] -> ParserT s ParserState m [[Block]])
- -> ParserT s ParserState m sep
- -> ParserT s ParserState m end
- -> ParserT s ParserState m Block
+tableWith :: (Stream s m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT s st m (mf [Blocks], [Alignment], [Int])
+ -> ([Int] -> ParserT s st m (mf [Blocks]))
+ -> ParserT s st m sep
+ -> ParserT s st m end
+ -> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do
+ (aligns, widths, heads, rows) <- tableWith' headerParser rowParser
+ lineParser footerParser
+ return $ B.table mempty (zip aligns widths) <$> heads <*> rows
+
+type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]])
+
+tableWith' :: (Stream s m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT s st m (mf [Blocks], [Alignment], [Int])
+ -> ([Int] -> ParserT s st m (mf [Blocks]))
+ -> ParserT s st m sep
+ -> ParserT s st m end
+ -> ParserT s st m (TableComponents mf)
+tableWith' headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy1` lineParser
+ lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
- let widths = if (indices == [])
+ let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
- return $ Table [] aligns widths heads lines'
+ return (aligns, widths, heads, lines')
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
@@ -777,7 +922,7 @@ widthsFromIndices numColumns' indices =
quotient = if totLength > numColumns
then fromIntegral totLength
else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ fracs = map (\l -> fromIntegral l / quotient) lengths in
tail fracs
---
@@ -786,25 +931,44 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Stream [Char] m Char
- => ParserT [Char] ParserState m [Block] -- ^ Block list parser
- -> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Block
+gridTableWith :: (Monad m, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks) -- ^ Block list parser
+ -> Bool -- ^ Headerless table
+ -> ParserT [Char] st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
+gridTableWith' :: (Monad m, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks) -- ^ Block list parser
+ -> Bool -- ^ Headerless table
+ -> ParserT [Char] st m (TableComponents mf)
+gridTableWith' blocks headless =
+ tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
+ (gridTableSep '-') gridTableFooter
+
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
+gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
gridPart ch = do
+ leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
+ rightColon <- option False (True <$ char ':')
char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+ let lengthDashes = length dashes + (if leftColon then 1 else 0) +
+ (if rightColon then 1 else 0)
+ let alignment = case (leftColon, rightColon) of
+ (True, True) -> AlignCenter
+ (True, False) -> AlignLeft
+ (False, True) -> AlignRight
+ (False, False) -> AlignDefault
+ return ((lengthDashes, lengthDashes + 1), alignment)
+
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
@@ -812,14 +976,14 @@ removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
+gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
-gridTableHeader :: Stream [Char] m Char
+gridTableHeader :: (Monad m, Functor mf, Applicative mf, Monad mf)
=> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m [Block]
- -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int])
+ -> ParserT [Char] st m (mf Blocks)
+ -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -828,62 +992,63 @@ gridTableHeader headless blocks = try $ do
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
+ underDashes <- if headless
+ then return dashes
+ else gridDashedLines '='
+ guard $ length dashes == length underDashes
+ let lines' = map (snd . fst) underDashes
let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault
- -- RST does not have a notion of alignments
+ let aligns = map snd underDashes
let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
+ then replicate (length underDashes) ""
+ else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString blocks) $ map trim rawHeads
+ heads <- sequence <$> mapM (parseFromString blocks . trim) rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: Stream [Char] m Char
- => ParserT [Char] ParserState m [Block]
+gridTableRow :: (Monad m, Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks)
-> [Int]
- -> ParserT [Char] ParserState m [[Block]]
+ -> ParserT [Char] st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- mapM (liftM compactifyCell . parseFromString blocks) cols
+ compactifyCell bs = case compactify [bs] of
+ [] -> mempty
+ x:_ -> x
+ cells <- sequence <$> mapM (parseFromString blocks) cols
+ return $ fmap (map compactifyCell) cells
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
if all startsWithSpace xs
then map (drop 1) xs
else xs
- where startsWithSpace "" = True
+ where startsWithSpace "" = True
startsWithSpace (y:_) = y == ' '
-compactifyCell :: [Block] -> [Block]
-compactifyCell bs = head $ compactify [bs]
-
-- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
+gridTableFooter :: Stream s m Char => ParserT s st m [Char]
gridTableFooter = blanklines
---
-- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Monad m)
- => ParserT [Char] st m a -- ^ parser
+readWithM :: Monad m
+ => ParserT [Char] st m a -- ^ parser
-> st -- ^ initial state
-> String -- ^ input
-> m (Either PandocError a)
readWithM parser state input =
- mapLeft (ParsecError input) `liftM` runParserT parser state "source" input
+ mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input
-- | Parse a string with a given parser and state
@@ -893,17 +1058,8 @@ readWith :: Parser [Char] st a
-> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
-readWithWarnings :: Parser [Char] ParserState a
- -> ParserState
- -> String
- -> Either PandocError (a, [String])
-readWithWarnings p = readWith $ do
- doc <- p
- warnings <- stateWarnings <$> getState
- return (doc, warnings)
-
-- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a)
+testStringWith :: Show a
=> ParserT [Char] ParserState Identity a
-> [Char]
-> IO ()
@@ -912,34 +1068,37 @@ testStringWith parser str = UTF8.putStrLn $ show $
-- | Parsing options.
data ParserState = ParserState
- { stateOptions :: ReaderOptions, -- ^ User options
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateAllowLinks :: Bool, -- ^ Allow parsing of links
- stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
- stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
- stateSubstitutions :: SubstTable, -- ^ List of substitution references
- stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
- stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
- stateMeta :: Meta, -- ^ Document metadata
- stateMeta' :: F Meta, -- ^ Document metadata
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
- stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
- stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateHasChapters :: Bool, -- ^ True if \chapter encountered
- stateMacros :: [Macro], -- ^ List of macros defined so far
- stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
- stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
+ { stateOptions :: ReaderOptions, -- ^ User options
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateAllowLinks :: Bool, -- ^ Allow parsing of links
+ stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
+ stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
+ stateSubstitutions :: SubstTable, -- ^ List of substitution references
+ stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
+ stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
+ stateNoteRefs :: Set.Set String, -- ^ List of note references used
+ stateMeta :: Meta, -- ^ Document metadata
+ stateMeta' :: F Meta, -- ^ Document metadata
+ stateCitations :: M.Map String String, -- ^ RST-style citations
+ stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
+ stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
+ stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
+ stateNextExample :: Int, -- ^ Number of next example
+ stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
+ stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far
+ stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
+ stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
-- roles), 3) Additional classes (rest of Attr is unused)).
- stateCaption :: Maybe Inlines, -- ^ Caption in current environment
- stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
- stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
- stateWarnings :: [String] -- ^ Warnings generated by the parser
+ stateCaption :: Maybe Inlines, -- ^ Caption in current environment
+ stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
+ stateFencedDivLevel :: Int, -- ^ Depth of fenced div
+ stateContainers :: [String], -- ^ parent include files
+ stateLogMessages :: [LogMessage], -- ^ log messages
+ stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context
}
instance Default ParserState where
@@ -957,6 +1116,9 @@ class HasReaderOptions st where
-- default
getOption f = (f . extractReaderOptions) <$> getState
+instance HasReaderOptions ParserState where
+ extractReaderOptions = stateOptions
+
class HasQuoteContext st m where
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
@@ -972,9 +1134,6 @@ instance Monad m => HasQuoteContext ParserState m where
setState newState { stateQuoteContext = oldQuoteContext }
return result
-instance HasReaderOptions ParserState where
- extractReaderOptions = stateOptions
-
class HasHeaderMap st where
extractHeaderMap :: st -> M.Map Inlines String
updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
@@ -993,8 +1152,8 @@ instance HasIdentifierList ParserState where
updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
class HasMacros st where
- extractMacros :: st -> [Macro]
- updateMacros :: ([Macro] -> [Macro]) -> st -> st
+ extractMacros :: st -> M.Map Text Macro
+ updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
instance HasMacros ParserState where
extractMacros = stateMacros
@@ -1008,6 +1167,24 @@ instance HasLastStrPosition ParserState where
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
getLastStrPos st = stateLastStrPos st
+class HasLogMessages st where
+ addLogMessage :: LogMessage -> st -> st
+ getLogMessages :: st -> [LogMessage]
+
+instance HasLogMessages ParserState where
+ addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
+ getLogMessages st = reverse $ stateLogMessages st
+
+class HasIncludeFiles st where
+ getIncludeFiles :: st -> [String]
+ addIncludeFile :: String -> st -> st
+ dropLatestIncludeFile :: st -> st
+
+instance HasIncludeFiles ParserState where
+ getIncludeFiles = stateContainers
+ addIncludeFile f s = s{ stateContainers = f : stateContainers s }
+ dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
+
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
@@ -1020,30 +1197,45 @@ defaultParserState =
stateHeaderKeys = M.empty,
stateSubstitutions = M.empty,
stateNotes = [],
- stateNotes' = [],
+ stateNotes' = M.empty,
+ stateNoteRefs = Set.empty,
stateMeta = nullMeta,
stateMeta' = return nullMeta,
+ stateCitations = M.empty,
stateHeaderTable = [],
stateHeaders = M.empty,
stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
- stateHasChapters = False,
- stateMacros = [],
+ stateMacros = M.empty,
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
stateInHtmlBlock = Nothing,
- stateMarkdownAttribute = False,
- stateWarnings = []}
+ stateFencedDivLevel = 0,
+ stateContainers = [],
+ stateLogMessages = [],
+ stateMarkdownAttribute = False
+ }
+
+-- | Add a log message.
+logMessage :: (Stream s m a, HasLogMessages st)
+ => LogMessage -> ParserT s st m ()
+logMessage msg = updateState (addLogMessage msg)
+
+-- | Report all the accumulated log messages, according to verbosity level.
+reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
+reportLogMessages = do
+ msgs <- getLogMessages <$> getState
+ mapM_ report msgs
-- | Succeed only if the extension is enabled.
guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
+guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
-- | Succeed only if the extension is disabled.
guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
+guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
-- | Update the position on which the last string ended.
updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
@@ -1074,7 +1266,8 @@ data QuoteContext
type NoteTable = [(String, String)]
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
+type NoteTable' = M.Map String (SourcePos, F Blocks)
+-- used in markdown reader
newtype Key = Key String deriving (Show, Read, Eq, Ord)
@@ -1091,37 +1284,40 @@ type SubstTable = M.Map Key Inlines
-- with its associated identifier. If the identifier is null
-- and the auto_identifers extension is set, generate a new
-- unique identifier, and update the list of identifiers
--- in state.
-registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+-- in state. Issue a warning if an explicit identifier
+-- is encountered that duplicates an earlier identifier
+-- (explict or automatically generated).
+registerHeader :: (Stream s m a, HasReaderOptions st,
+ HasHeaderMap st, HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
ids <- extractIdentifierList <$> getState
exts <- getOption readerExtensions
let insert' = M.insertWith (\_new old -> old)
- if null ident && Ext_auto_identifiers `Set.member` exts
+ if null ident && Ext_auto_identifiers `extensionEnabled` exts
then do
let id' = uniqueIdent (B.toList header') ids
- let id'' = if Ext_ascii_identifiers `Set.member` exts
- then catMaybes $ map toAsciiChar id'
+ let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
+ then mapMaybe toAsciiChar id'
else id'
updateState $ updateIdentifierList $ Set.insert id'
updateState $ updateIdentifierList $ Set.insert id''
updateState $ updateHeaderMap $ insert' header' id'
return (id'',classes,kvs)
else do
- unless (null ident) $
+ unless (null ident) $ do
+ when (ident `Set.member` ids) $ do
+ pos <- getPosition
+ logMessage $ DuplicateIdentifier ident pos
+ updateState $ updateIdentifierList $ Set.insert ident
updateState $ updateHeaderMap $ insert' header' ident
return (ident,classes,kvs)
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
-failUnlessSmart = getOption readerSmart >>= guard
-
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
- failUnlessSmart
+ guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
apostrophe :: Stream s m Char => ParserT s st m Inlines
@@ -1153,9 +1349,7 @@ failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
-> ParserT s st m ()
failIfInQuoteContext context = do
context' <- getQuoteContext
- if context' == context
- then fail "already inside quotes"
- else return ()
+ when (context' == context) $ fail "already inside quotes"
charOrRef :: Stream s m Char => String -> ParserT s st m Char
charOrRef cs =
@@ -1195,7 +1389,7 @@ ellipses = try (string "..." >> return (B.str "\8230"))
dash :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m Inlines
dash = try $ do
- oldDashes <- getOption readerOldDashes
+ oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
if oldDashes
then do
char '-'
@@ -1241,43 +1435,6 @@ token :: (Stream s m t)
-> ParsecT s st m a
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
---
--- Macros
---
-
--- | Parse a \newcommand or \renewcommand macro definition.
-macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
- => ParserT [Char] st m Blocks
-macro = do
- apply <- getOption readerApplyMacros
- inp <- getInput
- case parseMacroDefinitions inp of
- ([], _) -> mzero
- (ms, rest) -> do def' <- count (length inp - length rest) anyChar
- if apply
- then do
- updateState $ \st ->
- updateMacros (ms ++) st
- return mempty
- else return $ rawBlock "latex" def'
-
--- | Apply current macros to string.
-applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
- => String
- -> ParserT [Char] st m String
-applyMacros' target = do
- apply <- getOption readerApplyMacros
- if apply
- then do macros <- extractMacros <$> getState
- return $ applyMacros macros target
- else return target
-
--- | Append a warning to the log.
-addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState ()
-addWarning mbpos msg =
- updateState $ \st -> st{
- stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
- stateWarnings st }
infixr 5 <+?>
(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
@@ -1285,10 +1442,53 @@ a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
extractIdClass :: Attr -> Attr
extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
where
- ident' = case (lookup "id" kvs) of
- Just v -> v
- Nothing -> ident
- cls' = case (lookup "class" kvs) of
+ ident' = fromMaybe ident (lookup "id" kvs)
+ cls' = case lookup "class" kvs of
Just cl -> words cl
Nothing -> cls
kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
+
+insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT [a] st m (mf Blocks)
+ -> (String -> [a])
+ -> [FilePath] -> FilePath
+ -> ParserT [a] st m (mf Blocks)
+insertIncludedFile' blocks totoks dirs f = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ containers <- getIncludeFiles <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ updateState $ addIncludeFile f
+ mbcontents <- readFileFromDirs dirs f
+ contents <- case mbcontents of
+ Just s -> return s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f oldPos
+ return ""
+ setPosition $ newPos f 1 1
+ setInput $ totoks contents
+ bs <- blocks
+ setInput oldInput
+ setPosition oldPos
+ updateState dropLatestIncludeFile
+ return bs
+
+-- | Parse content of include file as blocks. Circular includes result in an
+-- @PandocParseError@.
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+ => ParserT [a] st m Blocks
+ -> (String -> [a])
+ -> [FilePath] -> FilePath
+ -> ParserT [a] st m Blocks
+insertIncludedFile blocks totoks dirs f =
+ runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
+
+-- | Parse content of include file as future blocks. Circular includes result in
+-- an @PandocParseError@.
+insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
+ => ParserT String st m (Future st Blocks)
+ -> [FilePath] -> FilePath
+ -> ParserT String st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p id
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 256f38b0c..25c2373a6 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA
{- |
Module : Text.Pandoc.Pretty
- Copyright : Copyright (C) 2010-2016 John MacFarlane
+ Copyright : Copyright (C) 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -76,22 +77,24 @@ module Text.Pandoc.Pretty (
)
where
-import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..))
-import qualified Data.Sequence as Seq
+import Control.Monad
+import Control.Monad.State.Strict
+import Data.Char (isSpace)
import Data.Foldable (toList)
import Data.List (intersperse)
-import Data.String
-import Control.Monad.State
-import Data.Char (isSpace)
import Data.Monoid ((<>))
+import Data.Sequence (Seq, ViewL (..), fromList, mapWithIndex, singleton, viewl,
+ (<|))
+import qualified Data.Sequence as Seq
+import Data.String
data RenderState a = RenderState{
- output :: [a] -- ^ In reverse order
- , prefix :: String
- , usePrefix :: Bool
- , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping
- , column :: Int
- , newlines :: Int -- ^ Number of preceding newlines
+ output :: [a] -- ^ In reverse order
+ , prefix :: String
+ , usePrefix :: Bool
+ , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping
+ , column :: Int
+ , newlines :: Int -- ^ Number of preceding newlines
}
type DocState a = State (RenderState a) ()
@@ -106,10 +109,10 @@ data D = Text Int String
| CarriageReturn
| NewLine
| BlankLines Int -- number of blank lines
- deriving (Show)
+ deriving (Show, Eq)
newtype Doc = Doc { unDoc :: Seq D }
- deriving (Monoid, Show)
+ deriving (Monoid, Show, Eq)
instance IsString Doc where
fromString = text
@@ -142,11 +145,10 @@ hcat = mconcat
-- between them.
infixr 6 <+>
(<+>) :: Doc -> Doc -> Doc
-(<+>) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> space <> y
+(<+>) x y
+ | isEmpty x = y
+ | isEmpty y = x
+ | otherwise = x <> space <> y
-- | Same as 'cat', but putting breakable spaces between the
-- 'Doc's.
@@ -156,20 +158,18 @@ hsep = foldr (<+>) empty
infixr 5 $$
-- | @a $$ b@ puts @a@ above @b@.
($$) :: Doc -> Doc -> Doc
-($$) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> cr <> y
+($$) x y
+ | isEmpty x = y
+ | isEmpty y = x
+ | otherwise = x <> cr <> y
infixr 5 $+$
-- | @a $+$ b@ puts @a@ above @b@, with a blank line between.
($+$) :: Doc -> Doc -> Doc
-($+$) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> blankline <> y
+($+$) x y
+ | isEmpty x = y
+ | isEmpty y = x
+ | otherwise = x <> blankline <> y
-- | List version of '$$'.
vcat :: [Doc] -> Doc
@@ -184,21 +184,21 @@ nestle :: Doc -> Doc
nestle (Doc d) = Doc $ go d
where go x = case viewl x of
(BlankLines _ :< rest) -> go rest
- (NewLine :< rest) -> go rest
- _ -> x
+ (NewLine :< rest) -> go rest
+ _ -> x
-- | Chomps trailing blank space off of a 'Doc'.
chomp :: Doc -> Doc
chomp d = Doc (fromList dl')
where dl = toList (unDoc d)
dl' = reverse $ go $ reverse dl
- go [] = []
- go (BreakingSpace : xs) = go xs
+ go [] = []
+ go (BreakingSpace : xs) = go xs
go (CarriageReturn : xs) = go xs
- go (NewLine : xs) = go xs
- go (BlankLines _ : xs) = go xs
- go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
- go xs = xs
+ go (NewLine : xs) = go xs
+ go (BlankLines _ : xs) = go xs
+ go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
+ go xs = xs
outp :: (IsString a) => Int -> String -> DocState a
outp off s | off < 0 = do -- offset < 0 means newline characters
@@ -215,9 +215,9 @@ outp off s | off < 0 = do -- offset < 0 means newline characters
outp off s = do -- offset >= 0 (0 might be combining char)
st' <- get
let pref = prefix st'
- when (column st' == 0 && usePrefix st' && not (null pref)) $ do
+ when (column st' == 0 && usePrefix st' && not (null pref)) $
modify $ \st -> st{ output = fromString pref : output st
- , column = column st + realLength pref }
+ , column = column st + realLength pref }
modify $ \st -> st{ output = fromString s : output st
, column = column st + off
, newlines = 0 }
@@ -306,10 +306,10 @@ renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs)
renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
renderList (BreakingSpace : xs) = do
- let isText (Text _ _) = True
- isText (Block _ _) = True
- isText (AfterBreak _) = True
- isText _ = False
+ let isText (Text _ _) = True
+ isText (Block _ _) = True
+ isText (AfterBreak _) = True
+ isText _ = False
let isBreakingSpace BreakingSpace = True
isBreakingSpace _ = False
let xs' = dropWhile isBreakingSpace xs
@@ -326,9 +326,7 @@ renderList (BreakingSpace : xs) = do
renderList (AfterBreak s : xs) = do
st <- get
- if newlines st > 0
- then outp (realLength s) s
- else return ()
+ when (newlines st > 0) $ outp (realLength s) s
renderList xs
renderList (Block i1 s1 : Block i2 s2 : xs) =
@@ -342,7 +340,7 @@ renderList (Block _width lns : xs) = do
let oldPref = prefix st
case column st - realLength oldPref of
n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
- _ -> return ()
+ _ -> return ()
renderList $ intersperse CarriageReturn (map (Text 0) lns)
modify $ \s -> s{ prefix = oldPref }
renderList xs
@@ -359,13 +357,13 @@ mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) =
| otherwise -> (lns1, lns2)
pad n s = s ++ replicate (n - realLength s) ' '
sp "" = ""
- sp xs = if addSpace then (' ' : xs) else xs
+ sp xs = if addSpace then ' ' : xs else xs
offsetOf :: D -> Int
-offsetOf (Text o _) = o
-offsetOf (Block w _) = w
-offsetOf BreakingSpace = 1
-offsetOf _ = 0
+offsetOf (Text o _) = o
+offsetOf (Block w _) = w
+offsetOf BreakingSpace = 1
+offsetOf _ = 0
-- | A literal string.
text :: String -> Doc
@@ -396,8 +394,8 @@ cr = Doc $ singleton CarriageReturn
blankline :: Doc
blankline = Doc $ singleton (BlankLines 1)
--- | Inserts a blank lines unless they exists already.
--- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@.
+-- | Inserts blank lines unless they exist already.
+-- (@blanklines m <> blanklines n@ has the same effect as @blanklines (max m n)@.
blanklines :: Int -> Doc
blanklines n = Doc $ singleton (BlankLines n)
@@ -430,7 +428,7 @@ beforeNonBlank d = Doc $ singleton (BeforeNonBlank d)
nowrap :: Doc -> Doc
nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc
where replaceSpace _ BreakingSpace = Text 1 " "
- replaceSpace _ x = x
+ replaceSpace _ x = x
-- | Content to print only if it comes at the beginning of a line,
-- to be used e.g. for escaping line-initial `.` in groff man.
@@ -440,8 +438,8 @@ afterBreak s = Doc $ singleton (AfterBreak s)
-- | Returns the width of a 'Doc'.
offset :: Doc -> Int
offset d = case map realLength . lines . render Nothing $ d of
- [] -> 0
- os -> maximum os
+ [] -> 0
+ os -> maximum os
-- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces.
minOffset :: Doc -> Int
@@ -466,7 +464,7 @@ height = length . lines . render Nothing
block :: (String -> String) -> Int -> Doc -> Doc
block filler width d
- | width < 1 && not (isEmpty d) = error "Text.Pandoc.Pretty.block: width < 1"
+ | width < 1 && not (isEmpty d) = block filler 1 d
| otherwise = Doc $ singleton $ Block width $ map filler
$ chop width $ render (Just width) d
@@ -554,4 +552,4 @@ charWidth c =
-- | Get real length of string, taking into account combining and double-wide
-- characters.
realLength :: String -> Int
-realLength = foldr (\a b -> charWidth a + b) 0
+realLength = sum . map charWidth
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index bc71f1392..27807a8c8 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Process
- Copyright : Copyright (C) 2013-2016 John MacFarlane
+ Copyright : Copyright (C) 2013-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,22 +29,22 @@ ByteString variant of 'readProcessWithExitCode'.
-}
module Text.Pandoc.Process (pipeProcess)
where
-import System.Process
-import System.Exit (ExitCode (..))
+import Control.Concurrent (forkIO, newEmptyMVar, putMVar, takeMVar)
import Control.Exception
-import System.IO (hClose, hFlush)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
import Control.Monad (unless)
import qualified Data.ByteString.Lazy as BL
+import System.Exit (ExitCode (..))
+import System.IO (hClose, hFlush)
+import System.Process
{- |
Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings
instead of strings and allows setting environment variables.
@readProcessWithExitCode@ creates an external process, reads its
-standard output and standard error strictly, waits until the process
-terminates, and then returns the 'ExitCode' of the process,
-the standard output, and the standard error.
+standard output strictly, waits until the process
+terminates, and then returns the 'ExitCode' of the process
+and the standard output. stderr is inherited from the parent.
If an asynchronous exception is thrown to the thread executing
@readProcessWithExitCode@, the forked process will be terminated and
@@ -57,25 +57,21 @@ pipeProcess
-> FilePath -- ^ Filename of the executable (see 'proc' for details)
-> [String] -- ^ any arguments
-> BL.ByteString -- ^ standard input
- -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr
+ -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout
pipeProcess mbenv cmd args input =
mask $ \restore -> do
- (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args)
+ (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args)
{ env = mbenv,
std_in = CreatePipe,
std_out = CreatePipe,
- std_err = CreatePipe }
+ std_err = Inherit }
flip onException
- (do hClose inh; hClose outh; hClose errh;
+ (do hClose inh; hClose outh;
terminateProcess pid; waitForProcess pid) $ restore $ do
-- fork off a thread to start consuming stdout
out <- BL.hGetContents outh
waitOut <- forkWait $ evaluate $ BL.length out
- -- fork off a thread to start consuming stderr
- err <- BL.hGetContents errh
- waitErr <- forkWait $ evaluate $ BL.length err
-
-- now write and flush any input
let writeInput = do
unless (BL.null input) $ do
@@ -87,15 +83,13 @@ pipeProcess mbenv cmd args input =
-- wait on the output
waitOut
- waitErr
hClose outh
- hClose errh
-- wait on the process
ex <- waitForProcess pid
- return (ex, out, err)
+ return (ex, out)
forkWait :: IO a -> IO (IO a)
forkWait a = do
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
new file mode 100644
index 000000000..b9374ba06
--- /dev/null
+++ b/src/Text/Pandoc/Readers.hs
@@ -0,0 +1,160 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This helper module exports the readers.
+
+Note: all of the readers assume that the input text has @'\n'@
+line endings. So if you get your input text from a web form,
+you should remove @'\r'@ characters using @filter (/='\r')@.
+
+-}
+
+module Text.Pandoc.Readers
+ (
+ -- * Readers: converting /to/ Pandoc format
+ Reader (..)
+ , readers
+ , readDocx
+ , readOdt
+ , readMarkdown
+ , readCommonMark
+ , readCreole
+ , readMediaWiki
+ , readVimwiki
+ , readRST
+ , readOrg
+ , readLaTeX
+ , readHtml
+ , readJATS
+ , readTextile
+ , readDocBook
+ , readOPML
+ , readHaddock
+ , readNative
+ , readJSON
+ , readTWiki
+ , readTikiWiki
+ , readTxt2Tags
+ , readEPUB
+ , readMuse
+ -- * Miscellaneous
+ , getReader
+ , getDefaultExtensions
+ ) where
+
+import Control.Monad.Except (throwError)
+import Data.Aeson
+import qualified Data.ByteString.Lazy as BL
+import Data.List (intercalate)
+import Data.Text (Text)
+import Text.Pandoc.Class
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Extensions
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Creole
+import Text.Pandoc.Readers.DocBook
+import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Readers.EPUB
+import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Readers.JATS (readJATS)
+import Text.Pandoc.Readers.LaTeX
+import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.MediaWiki
+import Text.Pandoc.Readers.Muse
+import Text.Pandoc.Readers.Native
+import Text.Pandoc.Readers.Odt
+import Text.Pandoc.Readers.OPML
+import Text.Pandoc.Readers.Org
+import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.Textile
+import Text.Pandoc.Readers.TikiWiki
+import Text.Pandoc.Readers.TWiki
+import Text.Pandoc.Readers.Txt2Tags
+import Text.Pandoc.Readers.Vimwiki
+import Text.Pandoc.Shared (mapLeft)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Parsec.Error
+
+data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
+
+-- | Association list of formats and readers.
+readers :: PandocMonad m => [(String, Reader m)]
+readers = [ ("native" , TextReader readNative)
+ ,("json" , TextReader $ \o s ->
+ case readJSON o s of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "JSON parse error")
+ ,("markdown" , TextReader readMarkdown)
+ ,("markdown_strict" , TextReader readMarkdown)
+ ,("markdown_phpextra" , TextReader readMarkdown)
+ ,("markdown_github" , TextReader readMarkdown)
+ ,("markdown_mmd", TextReader readMarkdown)
+ ,("commonmark" , TextReader readCommonMark)
+ ,("creole" , TextReader readCreole)
+ ,("gfm" , TextReader readCommonMark)
+ ,("rst" , TextReader readRST)
+ ,("mediawiki" , TextReader readMediaWiki)
+ ,("vimwiki" , TextReader readVimwiki)
+ ,("docbook" , TextReader readDocBook)
+ ,("opml" , TextReader readOPML)
+ ,("org" , TextReader readOrg)
+ ,("textile" , TextReader readTextile) -- TODO : textile+lhs
+ ,("html" , TextReader readHtml)
+ ,("jats" , TextReader readJATS)
+ ,("latex" , TextReader readLaTeX)
+ ,("haddock" , TextReader readHaddock)
+ ,("twiki" , TextReader readTWiki)
+ ,("tikiwiki" , TextReader readTikiWiki)
+ ,("docx" , ByteStringReader readDocx)
+ ,("odt" , ByteStringReader readOdt)
+ ,("t2t" , TextReader readTxt2Tags)
+ ,("epub" , ByteStringReader readEPUB)
+ ,("muse" , TextReader readMuse)
+ ]
+
+-- | Retrieve reader, extensions based on formatSpec (format+extensions).
+getReader :: PandocMonad m => String -> Either String (Reader m, Extensions)
+getReader s =
+ case parseFormatSpec s of
+ Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
+ Right (readerName, setExts) ->
+ case lookup readerName readers of
+ Nothing -> Left $ "Unknown reader: " ++ readerName
+ Just r -> Right (r, setExts $
+ getDefaultExtensions readerName)
+
+-- | Read pandoc document from JSON format.
+readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc
+readJSON _ =
+ mapLeft PandocParseError . eitherDecode' . BL.fromStrict . UTF8.fromText
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index d20d386e7..6fbc09c17 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.CommonMark
- Copyright : Copyright (C) 2015 John MacFarlane
+ Copyright : Copyright (C) 2015-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -32,48 +32,94 @@ CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
module Text.Pandoc.Readers.CommonMark (readCommonMark)
where
-import CMark
-import Data.Text (unpack, pack)
+import CMarkGFM
+import Control.Monad.State
+import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
import Data.List (groupBy)
+import qualified Data.Map as Map
+import Data.Text (Text, unpack)
+import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
import Text.Pandoc.Options
-import Text.Pandoc.Error
+import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.Walk (walkM)
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
-readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
- where opts' = if readerSmart opts
- then [optNormalize, optSmart]
- else [optNormalize]
-
-nodeToPandoc :: Node -> Pandoc
-nodeToPandoc (Node _ DOCUMENT nodes) =
- Pandoc nullMeta $ foldr addBlock [] nodes
-nodeToPandoc n = -- shouldn't happen
- Pandoc nullMeta $ foldr addBlock [] [n]
-
-addBlocks :: [Node] -> [Block]
-addBlocks = foldr addBlock []
-
-addBlock :: Node -> [Block] -> [Block]
-addBlock (Node _ PARAGRAPH nodes) =
- (Para (addInlines nodes) :)
-addBlock (Node _ THEMATIC_BREAK _) =
+readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark opts s = return $
+ (if isEnabled Ext_gfm_auto_identifiers opts
+ then addHeaderIdentifiers
+ else id) $
+ nodeToPandoc opts $ commonmarkToNode opts' exts s
+ where opts' = [ optSmart | isEnabled Ext_smart opts ]
+ exts = [ extStrikethrough | isEnabled Ext_strikeout opts ] ++
+ [ extTable | isEnabled Ext_pipe_tables opts ] ++
+ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
+
+convertEmojis :: String -> String
+convertEmojis (':':xs) =
+ case break (==':') xs of
+ (ys,':':zs) ->
+ case Map.lookup ys emojis of
+ Just s -> s ++ convertEmojis zs
+ Nothing -> ':' : ys ++ convertEmojis (':':zs)
+ _ -> ':':xs
+convertEmojis (x:xs) = x : convertEmojis xs
+convertEmojis [] = []
+
+addHeaderIdentifiers :: Pandoc -> Pandoc
+addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
+
+addHeaderId :: Block -> State (Map.Map String Int) Block
+addHeaderId (Header lev (_,classes,kvs) ils) = do
+ idmap <- get
+ let ident = toIdent ils
+ ident' <- case Map.lookup ident idmap of
+ Nothing -> do
+ put (Map.insert ident 1 idmap)
+ return ident
+ Just i -> do
+ put (Map.adjust (+ 1) ident idmap)
+ return (ident ++ "-" ++ show i)
+ return $ Header lev (ident',classes,kvs) ils
+addHeaderId x = return x
+
+toIdent :: [Inline] -> String
+toIdent = map (\c -> if isSpace c then '-' else c)
+ . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+ c == '_' || c == '-')
+ . map toLower . stringify
+
+nodeToPandoc :: ReaderOptions -> Node -> Pandoc
+nodeToPandoc opts (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr (addBlock opts) [] nodes
+nodeToPandoc opts n = -- shouldn't happen
+ Pandoc nullMeta $ foldr (addBlock opts) [] [n]
+
+addBlocks :: ReaderOptions -> [Node] -> [Block]
+addBlocks opts = foldr (addBlock opts) []
+
+addBlock :: ReaderOptions -> Node -> [Block] -> [Block]
+addBlock opts (Node _ PARAGRAPH nodes) =
+ (Para (addInlines opts nodes) :)
+addBlock _ (Node _ THEMATIC_BREAK _) =
(HorizontalRule :)
-addBlock (Node _ BLOCK_QUOTE nodes) =
- (BlockQuote (addBlocks nodes) :)
-addBlock (Node _ (HTML_BLOCK t) _) =
- (RawBlock (Format "html") (unpack t) :)
+addBlock opts (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks opts nodes) :)
+addBlock opts (Node _ (HTML_BLOCK t) _)
+ | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
+addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
-addBlock (Node _ (CODE_BLOCK info t) _) =
+addBlock _ (Node _ (CODE_BLOCK info t) _) =
(CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
-addBlock (Node _ (HEADING lev) nodes) =
- (Header lev ("",[],[]) (addInlines nodes) :)
-addBlock (Node _ (LIST listAttrs) nodes) =
- (constructor (map (setTightness . addBlocks . children) nodes) :)
+addBlock opts (Node _ (HEADING lev) nodes) =
+ (Header lev ("",[],[]) (addInlines opts nodes) :)
+addBlock opts (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks opts . children) nodes) :)
where constructor = case listType listAttrs of
BULLET_LIST -> BulletList
ORDERED_LIST -> OrderedList
@@ -82,46 +128,108 @@ addBlock (Node _ (LIST listAttrs) nodes) =
setTightness = if listTight listAttrs
then map paraToPlain
else id
- paraToPlain (Para xs) = Plain (xs)
+ paraToPlain (Para xs) = Plain xs
paraToPlain x = x
delim = case listDelim listAttrs of
- PERIOD_DELIM -> Period
- PAREN_DELIM -> OneParen
-addBlock (Node _ ITEM _) = id -- handled in LIST
-addBlock _ = id
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock opts (Node _ (TABLE alignments) nodes) =
+ (Table [] aligns widths headers rows :)
+ where aligns = map fromTableCellAlignment alignments
+ fromTableCellAlignment NoAlignment = AlignDefault
+ fromTableCellAlignment LeftAligned = AlignLeft
+ fromTableCellAlignment RightAligned = AlignRight
+ fromTableCellAlignment CenterAligned = AlignCenter
+ widths = replicate numcols 0.0
+ numcols = if null rows'
+ then 0
+ else maximum $ map length rows'
+ rows' = map toRow $ filter isRow nodes
+ (headers, rows) = case rows' of
+ (h:rs) -> (h, rs)
+ [] -> ([], [])
+ isRow (Node _ TABLE_ROW _) = True
+ isRow _ = False
+ isCell (Node _ TABLE_CELL _) = True
+ isCell _ = False
+ toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns
+ toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t
+ toCell (Node _ TABLE_CELL []) = []
+ toCell (Node _ TABLE_CELL (n:ns))
+ | isBlockNode n = addBlocks opts (n:ns)
+ | otherwise = [Plain (addInlines opts (n:ns))]
+ toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t
+addBlock _ (Node _ TABLE_ROW _) = id -- handled in TABLE
+addBlock _ (Node _ TABLE_CELL _) = id -- handled in TABLE
+addBlock _ _ = id
+
+isBlockNode :: Node -> Bool
+isBlockNode (Node _ nodetype _) =
+ case nodetype of
+ DOCUMENT -> True
+ THEMATIC_BREAK -> True
+ PARAGRAPH -> True
+ BLOCK_QUOTE -> True
+ HTML_BLOCK _ -> True
+ CUSTOM_BLOCK _ _ -> True
+ CODE_BLOCK _ _ -> True
+ HEADING _ -> True
+ LIST _ -> True
+ ITEM -> True
+ TEXT _ -> False
+ SOFTBREAK -> False
+ LINEBREAK -> False
+ HTML_INLINE _ -> False
+ CUSTOM_INLINE _ _ -> False
+ CODE _ -> False
+ EMPH -> False
+ STRONG -> False
+ LINK _ _ -> False
+ IMAGE _ _ -> False
+ STRIKETHROUGH -> False
+ TABLE _ -> False
+ TABLE_ROW -> False
+ TABLE_CELL -> False
children :: Node -> [Node]
children (Node _ _ ns) = ns
-addInlines :: [Node] -> [Inline]
-addInlines = foldr addInline []
+addInlines :: ReaderOptions -> [Node] -> [Inline]
+addInlines opts = foldr (addInline opts) []
-addInline :: Node -> [Inline] -> [Inline]
-addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
+addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str xs
-addInline (Node _ LINEBREAK _) = (LineBreak :)
-addInline (Node _ SOFTBREAK _) = (SoftBreak :)
-addInline (Node _ (HTML_INLINE t) _) =
- (RawInline (Format "html") (unpack t) :)
+ toinl (' ':_) = Space
+ toinl xs = Str $ if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else xs
+addInline _ (Node _ LINEBREAK _) = (LineBreak :)
+addInline opts (Node _ SOFTBREAK _)
+ | isEnabled Ext_hard_line_breaks opts = (LineBreak :)
+ | otherwise = (SoftBreak :)
+addInline opts (Node _ (HTML_INLINE t) _)
+ | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :)
+ | otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
-addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
+addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
-addInline (Node _ (CODE t) _) =
+addInline _ (Node _ (CODE t) _) =
(Code ("",[],[]) (unpack t) :)
-addInline (Node _ EMPH nodes) =
- (Emph (addInlines nodes) :)
-addInline (Node _ STRONG nodes) =
- (Strong (addInlines nodes) :)
-addInline (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline _ = id
+addInline opts (Node _ EMPH nodes) =
+ (Emph (addInlines opts nodes) :)
+addInline opts (Node _ STRONG nodes) =
+ (Strong (addInlines opts nodes) :)
+addInline opts (Node _ STRIKETHROUGH nodes) =
+ (Strikeout (addInlines opts nodes) :)
+addInline opts (Node _ (LINK url title) nodes) =
+ (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline opts (Node _ (IMAGE url title) nodes) =
+ (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+addInline _ _ = id
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
new file mode 100644
index 000000000..505d1686d
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -0,0 +1,320 @@
+{-
+ Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de>
+
+ partly based on all the other readers, especialy the work by
+ John MacFarlane <jgm@berkeley.edu> and
+ Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ all bugs are solely created by me.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Creole
+ Copyright : Copyright (C) 2017 Sascha Wilde
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Sascha Wilde <wilde@sha-bang.de>
+ Stability : alpha
+ Portability : portable
+
+Conversion of creole text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Creole ( readCreole
+ ) where
+
+import Control.Monad.Except (guard, liftM2, throwError)
+import qualified Data.Foldable as F
+import Data.Maybe (fromMaybe)
+import Data.Monoid
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed)
+import Text.Pandoc.Shared (crFilter)
+
+
+-- | Read creole from an input string and return a Pandoc document.
+readCreole :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readCreole opts s = do
+ res <- readWithM parseCreole def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type CRLParser = ParserT [Char] ParserState
+
+--
+-- Utility functions
+--
+
+(<+>) :: (Monad m, Monoid a) => m a -> m a -> m a
+(<+>) = liftM2 (<>)
+
+-- we have to redefine `enclosed' from Text.Pandoc.Parsing, because it
+-- assumes, that there can't be a space after the start parser, but
+-- with creole this is possible.
+enclosed :: (Show end, PandocMonad m) => CRLParser m start -- ^ start parser
+ -> CRLParser m end -- ^ end parser
+ -> CRLParser m a -- ^ content parser (to be used repeatedly)
+ -> CRLParser m [a]
+enclosed start end parser = try $ start >> many1Till parser end
+
+--
+-- main parser
+--
+
+specialChars :: [Char]
+specialChars = "*/~{}\\|[]()<>\"'"
+
+parseCreole :: PandocMonad m => CRLParser m Pandoc
+parseCreole = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => CRLParser m B.Blocks
+block = do
+ res <- mempty <$ skipMany1 blankline
+ <|> nowiki
+ <|> header
+ <|> horizontalRule
+ <|> anyList 1
+ <|> table
+ <|> para
+ skipMany blankline
+ return res
+
+nowiki :: PandocMonad m => CRLParser m B.Blocks
+nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart
+ >> manyTill content nowikiEnd)
+ where
+ content = brackets <|> line
+ brackets = try $ option "" ((:[]) <$> newline)
+ <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol)
+ line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol
+ eol = lookAhead $ try $ nowikiEnd <|> newline
+ nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline
+ nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline
+
+header :: PandocMonad m => CRLParser m B.Blocks
+header = try $ do
+ skipSpaces
+ level <-
+ fmap length (many1 (char '='))
+ guard $ level <= 6
+ skipSpaces
+ content <- B.str <$> manyTill (noneOf "\n") headerEnd
+ return $ B.header level content
+ where
+ headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline
+
+unorderedList :: PandocMonad m => Int -> CRLParser m B.Blocks
+unorderedList = list '*' B.bulletList
+
+orderedList :: PandocMonad m => Int -> CRLParser m B.Blocks
+orderedList = list '#' B.orderedList
+
+anyList :: PandocMonad m => Int -> CRLParser m B.Blocks
+anyList n = unorderedList n <|> orderedList n
+
+anyListItem :: PandocMonad m => Int -> CRLParser m B.Blocks
+anyListItem n = listItem '*' n <|> listItem '#' n
+
+list :: PandocMonad m => Char -> ([B.Blocks] -> B.Blocks) -> Int -> CRLParser m B.Blocks
+list c f n =
+ fmap f (many1 (itemPlusSublist <|> listItem c n))
+ where itemPlusSublist = try $ listItem c n <+> anyList (n+1)
+
+listItem :: PandocMonad m => Char -> Int -> CRLParser m B.Blocks
+listItem c n =
+ fmap (B.plain . B.trimInlines .mconcat) (listStart >> many1Till inline itemEnd)
+ where
+ listStart = try $ skipSpaces >> optional newline >> skipSpaces
+ >> count n (char c)
+ >> lookAhead (noneOf [c]) >> skipSpaces
+ itemEnd = endOfParaElement <|> nextItem n
+ <|> if n < 3 then nextItem (n+1)
+ else nextItem (n+1) <|> nextItem (n-1)
+ nextItem x = lookAhead $ try $ blankline >> anyListItem x >> return mempty
+
+table :: PandocMonad m => CRLParser m B.Blocks
+table = try $ do
+ headers <- optionMaybe headerRow
+ rows <- many1 row
+ return $ B.simpleTable (fromMaybe [mempty] headers) rows
+ where
+ headerRow = try $ skipSpaces >> many1Till headerCell rowEnd
+ headerCell = B.plain . B.trimInlines . mconcat
+ <$> (string "|=" >> many1Till inline cellEnd)
+ row = try $ skipSpaces >> many1Till cell rowEnd
+ cell = B.plain . B.trimInlines . mconcat
+ <$> (char '|' >> many1Till inline cellEnd)
+ rowEnd = try $ optional (char '|') >> skipSpaces >> newline
+ cellEnd = lookAhead $ try $ char '|' <|> rowEnd
+
+para :: PandocMonad m => CRLParser m B.Blocks
+para = fmap (result . mconcat) (many1Till inline endOfParaElement)
+ where
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+endOfParaElement :: PandocMonad m => CRLParser m ()
+endOfParaElement = lookAhead $ endOfInput <|> endOfPara
+ <|> startOfList <|> startOfTable
+ <|> startOfHeader <|> hr <|> startOfNowiki
+ where
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ startOf :: PandocMonad m => CRLParser m a -> CRLParser m ()
+ startOf p = try $ blankline >> p >> return mempty
+ startOfList = startOf $ anyListItem 1
+ startOfTable = startOf table
+ startOfHeader = startOf header
+ startOfNowiki = startOf nowiki
+ hr = startOf horizontalRule
+
+horizontalRule :: PandocMonad m => CRLParser m B.Blocks
+horizontalRule = try $ skipSpaces >> string "----" >> skipSpaces >> newline
+ >> return B.horizontalRule
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => CRLParser m B.Inlines
+inline = choice [ whitespace
+ , escapedLink
+ , escapedChar
+ , link
+ , inlineNowiki
+ , placeholder
+ , image
+ , forcedLinebreak
+ , bold
+ , finalBold
+ , italics
+ , finalItalics
+ , str
+ , symbol
+ ] <?> "inline"
+
+escapedChar :: PandocMonad m => CRLParser m B.Inlines
+escapedChar =
+ fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ")
+
+escapedLink :: PandocMonad m => CRLParser m B.Inlines
+escapedLink = try $ do
+ char '~'
+ (orig, _) <- uri
+ return $ B.str orig
+
+image :: PandocMonad m => CRLParser m B.Inlines
+image = try $ do
+ (orig, src) <- wikiImg
+ return $ B.image src "" (B.str orig)
+ where
+ linkSrc = many $ noneOf "|}\n\r\t"
+ linkDsc = char '|' >> many (noneOf "}\n\r\t")
+ wikiImg = try $ do
+ string "{{"
+ src <- linkSrc
+ dsc <- option "" linkDsc
+ string "}}"
+ return (dsc, src)
+
+link :: PandocMonad m => CRLParser m B.Inlines
+link = try $ do
+ (orig, src) <- uriLink <|> wikiLink
+ return $ B.link src "" orig
+ where
+ linkSrc = many $ noneOf "|]\n\r\t"
+ linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines
+ linkDsc otxt = B.str
+ <$> try (option otxt
+ (char '|' >> many (noneOf "]\n\r\t")))
+ linkImg = try $ char '|' >> image
+ wikiLink = try $ do
+ string "[["
+ src <- linkSrc
+ dsc <- linkImg <|> linkDsc src
+ string "]]"
+ return (dsc, src)
+ uriLink = try $ do
+ (orig, src) <- uri
+ return (B.str orig, src)
+
+inlineNowiki :: PandocMonad m => CRLParser m B.Inlines
+inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end)
+ where
+ start = try $ string "{{{"
+ end = try $ string "}}}" >> lookAhead (noneOf "}")
+
+placeholder :: PandocMonad m => CRLParser m B.Inlines
+-- The semantics of the placeholder is basicallly implementation
+-- dependent, so there is no way to DTRT for all cases.
+-- So for now we just drop them.
+placeholder = B.text <$> try (string "<<<" >> manyTill anyChar (string ">>>")
+ >> return "")
+
+whitespace :: PandocMonad m => CRLParser m B.Inlines
+whitespace = lb <|> regsp
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+linebreak :: PandocMonad m => CRLParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+symbol :: PandocMonad m => CRLParser m B.Inlines
+symbol = fmap (B.str . (:[])) (oneOf specialChars)
+
+str :: PandocMonad m => CRLParser m B.Inlines
+str = let strChar = noneOf ("\t\n " ++ specialChars) in
+ fmap B.str (many1 strChar)
+
+bold :: PandocMonad m => CRLParser m B.Inlines
+bold = B.strong . mconcat <$>
+ enclosed (string "**") (try $ string "**") inline
+
+italics :: PandocMonad m => CRLParser m B.Inlines
+italics = B.emph . mconcat <$>
+ enclosed (string "//") (try $ string "//") inline
+
+finalBold :: PandocMonad m => CRLParser m B.Inlines
+finalBold = B.strong . mconcat <$>
+ try (string "**" >> many1Till inline endOfParaElement)
+
+finalItalics :: PandocMonad m => CRLParser m B.Inlines
+finalItalics = B.emph . mconcat <$>
+ try (string "//" >> many1Till inline endOfParaElement)
+
+forcedLinebreak :: PandocMonad m => CRLParser m B.Inlines
+forcedLinebreak = try $ string "\\\\" >> return B.linebreak
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 68552ccb3..728f77a05 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,22 +1,22 @@
+{-# LANGUAGE ExplicitForAll #-}
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper)
-import Text.Pandoc.Shared (safeRead)
-import Text.Pandoc.Options
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Control.Monad.State.Strict
+import Data.Char (isSpace, toUpper)
+import Data.Default
import Data.Either (rights)
+import Data.Foldable (asum)
import Data.Generics
-import Data.Char (isSpace)
-import Control.Monad.State
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter, safeRead)
import Text.TeXMath (readMathML, writeTeX)
-import Text.Pandoc.Error (PandocError)
-import Control.Monad.Except
-import Data.Default
-import Data.Foldable (asum)
+import Text.XML.Light
{-
@@ -50,7 +50,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] author - The name of an individual author
[ ] authorblurb - A short description or note about an author
[x] authorgroup - Wrapper for author information when a document has
- multiple authors or collabarators
+ multiple authors or collaborators
[x] authorinitials - The initials or other short identifier for an author
[o] beginpage - The location of a page break in a print version of the document
[ ] bibliocoverage - The spatial or temporal coverage of a document
@@ -502,7 +502,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
-type DB = ExceptT PandocError (State DBState)
+type DB m = StateT DBState m
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@@ -523,10 +523,12 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
-readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
- where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree
- tree = normalizeTree . parseXML . handleInstructions $ inp
+readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook _ inp = do
+ let tree = normalizeTree . parseXML . handleInstructions
+ $ T.unpack $ crFilter inp
+ (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
+ return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
@@ -536,12 +538,12 @@ handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':
handleInstructions xs = case break (=='<') xs of
(ys, []) -> ys
([], '<':zs) -> '<' : handleInstructions zs
- (ys, zs) -> ys ++ handleInstructions zs
+ (ys, zs) -> ys ++ handleInstructions zs
-getFigure :: Element -> DB Blocks
+getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
tit <- case filterChild (named "title") e of
- Just t -> getInlines t
+ Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbFigureTitle = tit }
res <- getBlocks e
@@ -564,14 +566,12 @@ normalizeTree = everywhere (mkT go)
go xs = xs
convertEntity :: String -> String
-convertEntity e = maybe (map toUpper e) id (lookupEntity e)
+convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
- case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
- Just z -> z
- Nothing -> ""
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
named :: String -> Element -> Bool
@@ -579,20 +579,20 @@ named s e = qName (elName e) == s
--
-acceptingMetadata :: DB a -> DB a
+acceptingMetadata :: PandocMonad m => DB m a -> DB m a
acceptingMetadata p = do
modify (\s -> s { dbAcceptsMeta = True } )
res <- p
modify (\s -> s { dbAcceptsMeta = False })
return res
-checkInMeta :: Monoid a => DB () -> DB a
+checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
checkInMeta p = do
accepts <- dbAcceptsMeta <$> get
when accepts p
return mempty
-addMeta :: ToMetaValue a => String -> a -> DB ()
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
addMeta field val = modify (setMeta field val)
instance HasMeta DBState where
@@ -631,7 +631,7 @@ addToStart toadd bs =
-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
-- A DocBook mediaobject is a wrapper around a set of alternative presentations
-getMediaobject :: Element -> DB Inlines
+getMediaobject :: PandocMonad m => Element -> DB m Inlines
getMediaobject e = do
(imageUrl, attr) <-
case filterChild (named "imageobject") e of
@@ -651,18 +651,20 @@ getMediaobject e = do
|| named "textobject" x
|| named "alt" x) el of
Nothing -> return mempty
- Just z -> mconcat <$> (mapM parseInline $ elContent z)
+ Just z -> mconcat <$>
+ mapM parseInline (elContent z)
figTitle <- gets dbFigureTitle
let (caption, title) = if isNull figTitle
then (getCaption e, "")
else (return figTitle, "fig:")
- liftM (imageWith attr imageUrl title) caption
+ fmap (imageWith attr imageUrl title) caption
-getBlocks :: Element -> DB Blocks
-getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+getBlocks :: PandocMonad m => Element -> DB m Blocks
+getBlocks e = mconcat <$>
+ mapM parseBlock (elContent e)
-parseBlock :: Content -> DB Blocks
+parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
@@ -795,15 +797,16 @@ parseBlock (Elem e) =
return $ p <> b <> x
codeBlockWithLang = do
let classes' = case attrValue "language" e of
- "" -> []
- x -> [x]
+ "" -> []
+ x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
Just z -> (para . (str "— " <>) . mconcat)
- <$> (mapM parseInline $ elContent z)
+ <$>
+ mapM parseInline (elContent z)
contents <- getBlocks e
return $ blockQuote (contents <> attrib)
listitems = mapM getBlocks $ filterChildren (named "listitem") e
@@ -868,11 +871,11 @@ parseBlock (Elem e) =
|| x == '.') w
Nothing -> 0 :: Double
let numrows = case bodyrows of
- [] -> 0
- xs -> maximum $ map length xs
+ [] -> 0
+ xs -> maximum $ map length xs
let aligns = case colspecs of
- [] -> replicate numrows AlignDefault
- cs -> map toAlignment cs
+ [] -> replicate numrows AlignDefault
+ cs -> map toAlignment cs
let widths = case colspecs of
[] -> replicate numrows 0
cs -> let ws = map toWidth cs
@@ -892,7 +895,7 @@ parseBlock (Elem e) =
headerText <- case filterChild (named "title") e `mplus`
(filterChild (named "info") e >>=
filterChild (named "title")) of
- Just t -> getInlines t
+ Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
@@ -902,8 +905,9 @@ parseBlock (Elem e) =
lineItems = mapM getInlines $ filterChildren (named "line") e
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
-getInlines :: Element -> DB Inlines
-getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
+getInlines :: PandocMonad m => Element -> DB m Inlines
+getInlines e' = (trimInlines . mconcat) <$>
+ mapM parseInline (elContent e')
strContentRecursive :: Element -> String
strContentRecursive = strContent .
@@ -913,10 +917,10 @@ elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
-parseInline :: Content -> DB Inlines
+parseInline :: PandocMonad m => Content -> DB m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref
+ return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"equation" -> equation displayMath
@@ -957,8 +961,10 @@ parseInline (Elem e) =
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
"keycap" -> return (str $ strContent e)
- "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
- "menuchoice" -> menuchoice <$> (mapM parseInline $
+ "keycombo" -> keycombo <$>
+ mapM parseInline (elContent e)
+ "menuchoice" -> menuchoice <$>
+ mapM parseInline (
filter isGuiMenu $ elContent e)
"xref" -> do
content <- dbContent <$> get
@@ -977,17 +983,18 @@ parseInline (Elem e) =
ils <- innerInlines
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just h -> h
- _ -> ('#' : attrValue "linkend" e)
+ _ -> '#' : attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, words $ attrValue "role" e, [])
return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
- "bold" -> strong <$> innerInlines
- "strong" -> strong <$> innerInlines
+ "bold" -> strong <$> innerInlines
+ "strong" -> strong <$> innerInlines
"strikethrough" -> strikeout <$> innerInlines
- _ -> emph <$> innerInlines
- "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
+ _ -> emph <$> innerInlines
+ "footnote" -> (note . mconcat) <$>
+ mapM parseBlock (elContent e)
"title" -> return mempty
"affiliation" -> return mempty
-- Note: this isn't a real docbook tag; it's what we convert
@@ -996,7 +1003,7 @@ parseInline (Elem e) =
"br" -> return linebreak
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
- (mapM parseInline $ elContent e)
+ mapM parseInline (elContent e)
equation constructor = return $ mconcat $
map (constructor . writeTeX)
$ rights
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 595c805bf..e58b0a905 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-}
-
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Docx
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -50,12 +51,13 @@ implemented, [-] means partially implemented):
* Inlines
- [X] Str
- - [X] Emph (italics and underline both read as Emph)
+ - [X] Emph
- [X] Strong
- [X] Strikeout
- [X] Superscript
- [X] Subscript
- [X] SmallCaps
+ - [-] Underline (was previously converted to Emph)
- [ ] Quoted
- [ ] Cite
- [X] Code (styled with `VerbatimChar`)
@@ -64,88 +66,91 @@ implemented, [-] means partially implemented):
- [X] Math
- [X] Link (links to an arbitrary bookmark create a span with the target as
id and "anchor" class)
- - [X] Image
+ - [X] Image
- [X] Note (Footnotes and Endnotes are silently combined.)
-}
module Text.Pandoc.Readers.Docx
- ( readDocxWithWarnings
- , readDocx
+ ( readDocx
) where
import Codec.Archive.Zip
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import qualified Data.ByteString.Lazy as B
+import Data.Default (Default)
+import Data.List (delete, intersect)
+import qualified Data.Map as M
+import Data.Maybe (isJust, fromMaybe)
+import Data.Sequence (ViewL (..), viewl)
+import qualified Data.Sequence as Seq
+import qualified Data.Set as Set
import Text.Pandoc.Builder
-import Text.Pandoc.Walk
-import Text.Pandoc.Readers.Docx.Parse
-import Text.Pandoc.Readers.Docx.Lists
+-- import Text.Pandoc.Definition
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
+import Text.Pandoc.Readers.Docx.Lists
+import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Shared
-import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Data.List (delete, intersect)
+import Text.Pandoc.Walk
import Text.TeXMath (writeTeX)
-import Data.Default (Default)
-import qualified Data.ByteString.Lazy as B
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import Control.Monad.Reader
-import Control.Monad.State
-import Data.Sequence (ViewL(..), viewl)
-import qualified Data.Sequence as Seq (null)
#if !(MIN_VERSION_base(4,8,0))
import Data.Traversable (traverse)
#endif
-
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Error
-import Control.Monad.Except
+import Text.Pandoc.Logging
-readDocxWithWarnings :: ReaderOptions
- -> B.ByteString
- -> Either PandocError (Pandoc, MediaBag, [String])
-readDocxWithWarnings opts bytes
+readDocx :: PandocMonad m
+ => ReaderOptions
+ -> B.ByteString
+ -> m Pandoc
+readDocx opts bytes
| Right archive <- toArchiveOrFail bytes
, Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
- (meta, blks, mediaBag, warnings) <- docxToOutput opts docx
- return (Pandoc meta blks, mediaBag, parserWarnings ++ warnings)
-readDocxWithWarnings _ _ =
- Left (ParseFailure "couldn't parse docx file")
-
-readDocx :: ReaderOptions
- -> B.ByteString
- -> Either PandocError (Pandoc, MediaBag)
-readDocx opts bytes = do
- (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes
- return (pandoc, mediaBag)
+ mapM_ (P.report . DocxParserWarning) parserWarnings
+ (meta, blks) <- docxToOutput opts docx
+ return $ Pandoc meta blks
+readDocx _ _ =
+ throwError $ PandocSomeError "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
- , docxMediaBag :: MediaBag
- , docxDropCap :: Inlines
- , docxWarnings :: [String]
+ , docxAnchorSet :: Set.Set String
+ , docxImmedPrevAnchor :: Maybe String
+ , docxMediaBag :: MediaBag
+ , docxDropCap :: Inlines
+ , docxWarnings :: [String]
+ -- keep track of (numId, lvl) values for
+ -- restarting
+ , docxListState :: M.Map (String, String) Integer
+ , docxPrevPara :: Inlines
}
instance Default DState where
def = DState { docxAnchorMap = M.empty
+ , docxAnchorSet = mempty
+ , docxImmedPrevAnchor = Nothing
, docxMediaBag = mempty
, docxDropCap = mempty
, docxWarnings = []
+ , docxListState = M.empty
+ , docxPrevPara = mempty
}
-data DEnv = DEnv { docxOptions :: ReaderOptions
- , docxInHeaderBlock :: Bool }
+data DEnv = DEnv { docxOptions :: ReaderOptions
+ , docxInHeaderBlock :: Bool
+ }
instance Default DEnv where
def = DEnv def False
-type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState))
-
-evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a
-evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx
+type DocxContext m = ReaderT DEnv (StateT DState m)
-addDocxWarning :: String -> DocxContext ()
-addDocxWarning msg = do
- warnings <- gets docxWarnings
- modify $ \s -> s {docxWarnings = msg : warnings}
+evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
+evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx
-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
@@ -162,7 +167,7 @@ metaStyles = M.fromList [ ("Title", "title")
, ("Abstract", "abstract")]
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
-sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
@@ -174,28 +179,28 @@ isEmptyPar (Paragraph _ parParts) =
all isEmptyParPart parParts
where
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
- isEmptyParPart _ = False
+ isEmptyParPart _ = False
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
-bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
+bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
- , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+ , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
- f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+ f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks)
f m (MetaList mv) = MetaList (m : mv)
f m n = MetaList [m, n]
return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
-bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
+bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
mp <- bodyPartsToMeta' bps
let mp' =
@@ -208,7 +213,7 @@ fixAuthors :: MetaValue -> MetaValue
fixAuthors (MetaBlocks blks) =
MetaList $ map g $ filter f blks
where f (Para _) = True
- f _ = False
+ f _ = False
g (Para ils) = MetaInlines ils
g _ = MetaInlines []
fixAuthors mv = mv
@@ -220,106 +225,122 @@ codeDivs :: [String]
codeDivs = ["SourceCode"]
runElemToInlines :: RunElem -> Inlines
-runElemToInlines (TextRun s) = text s
-runElemToInlines (LnBrk) = linebreak
-runElemToInlines (Tab) = space
-runElemToInlines (SoftHyphen) = text "\xad"
-runElemToInlines (NoBreakHyphen) = text "\x2011"
+runElemToInlines (TextRun s) = text s
+runElemToInlines LnBrk = linebreak
+runElemToInlines Tab = space
+runElemToInlines SoftHyphen = text "\xad"
+runElemToInlines NoBreakHyphen = text "\x2011"
runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString (LnBrk) = ['\n']
-runElemToString (Tab) = ['\t']
-runElemToString (SoftHyphen) = ['\xad']
-runElemToString (NoBreakHyphen) = ['\x2011']
+runElemToString (TextRun s) = s
+runElemToString LnBrk = ['\n']
+runElemToString Tab = ['\t']
+runElemToString SoftHyphen = ['\xad']
+runElemToString NoBreakHyphen = ['\x2011']
runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems
-runToString _ = ""
+runToString _ = ""
parPartToString :: ParPart -> String
-parPartToString (PlainRun run) = runToString run
+parPartToString (PlainRun run) = runToString run
parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
-parPartToString _ = ""
+parPartToString _ = ""
blacklistedCharStyles :: [String]
blacklistedCharStyles = ["Hyperlink"]
-resolveDependentRunStyle :: RunStyle -> RunStyle
+resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
| Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
- rPr
- | Just (_, cs) <- rStyle rPr =
- let rPr' = resolveDependentRunStyle cs
- in
- RunStyle { isBold = case isBold rPr of
- Just bool -> Just bool
- Nothing -> isBold rPr'
- , isItalic = case isItalic rPr of
- Just bool -> Just bool
- Nothing -> isItalic rPr'
- , isSmallCaps = case isSmallCaps rPr of
- Just bool -> Just bool
- Nothing -> isSmallCaps rPr'
- , isStrike = case isStrike rPr of
- Just bool -> Just bool
- Nothing -> isStrike rPr'
- , rVertAlign = case rVertAlign rPr of
- Just valign -> Just valign
- Nothing -> rVertAlign rPr'
- , rUnderline = case rUnderline rPr of
- Just ulstyle -> Just ulstyle
- Nothing -> rUnderline rPr'
- , rStyle = rStyle rPr }
- | otherwise = rPr
-
-runStyleToTransform :: RunStyle -> (Inlines -> Inlines)
+ return rPr
+ | Just (_, cs) <- rStyle rPr = do
+ opts <- asks docxOptions
+ if isEnabled Ext_styles opts
+ then return rPr
+ else do rPr' <- resolveDependentRunStyle cs
+ return $
+ RunStyle { isBold = case isBold rPr of
+ Just bool -> Just bool
+ Nothing -> isBold rPr'
+ , isItalic = case isItalic rPr of
+ Just bool -> Just bool
+ Nothing -> isItalic rPr'
+ , isSmallCaps = case isSmallCaps rPr of
+ Just bool -> Just bool
+ Nothing -> isSmallCaps rPr'
+ , isStrike = case isStrike rPr of
+ Just bool -> Just bool
+ Nothing -> isStrike rPr'
+ , rVertAlign = case rVertAlign rPr of
+ Just valign -> Just valign
+ Nothing -> rVertAlign rPr'
+ , rUnderline = case rUnderline rPr of
+ Just ulstyle -> Just ulstyle
+ Nothing -> rUnderline rPr'
+ , rStyle = rStyle rPr }
+ | otherwise = return rPr
+
+runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
| Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep =
- let rPr' = rPr{rStyle = Nothing}
- in
- (spanWith ("", [s], [])) . (runStyleToTransform rPr')
- | Just True <- isItalic rPr =
- emph . (runStyleToTransform rPr {isItalic = Nothing})
- | Just True <- isBold rPr =
- strong . (runStyleToTransform rPr {isBold = Nothing})
- | Just True <- isSmallCaps rPr =
- smallcaps . (runStyleToTransform rPr {isSmallCaps = Nothing})
- | Just True <- isStrike rPr =
- strikeout . (runStyleToTransform rPr {isStrike = Nothing})
- | Just SupScrpt <- rVertAlign rPr =
- superscript . (runStyleToTransform rPr {rVertAlign = Nothing})
- | Just SubScrpt <- rVertAlign rPr =
- subscript . (runStyleToTransform rPr {rVertAlign = Nothing})
- | Just "single" <- rUnderline rPr =
- emph . (runStyleToTransform rPr {rUnderline = Nothing})
- | otherwise = id
-
-runToInlines :: Run -> DocxContext Inlines
+ , s `elem` spansToKeep = do
+ transform <- runStyleToTransform rPr{rStyle = Nothing}
+ return $ spanWith ("", [s], []) . transform
+ | Just (s, _) <- rStyle rPr = do
+ opts <- asks docxOptions
+ let extraInfo = if isEnabled Ext_styles opts
+ then spanWith ("", [], [("custom-style", s)])
+ else id
+ transform <- runStyleToTransform rPr{rStyle = Nothing}
+ return $ extraInfo . transform
+ | Just True <- isItalic rPr = do
+ transform <- runStyleToTransform rPr{isItalic = Nothing}
+ return $ emph . transform
+ | Just True <- isBold rPr = do
+ transform <- runStyleToTransform rPr{isBold = Nothing}
+ return $ strong . transform
+ | Just True <- isSmallCaps rPr = do
+ transform <- runStyleToTransform rPr{isSmallCaps = Nothing}
+ return $ smallcaps . transform
+ | Just True <- isStrike rPr = do
+ transform <- runStyleToTransform rPr{isStrike = Nothing}
+ return $ strikeout . transform
+ | Just SupScrpt <- rVertAlign rPr = do
+ transform <- runStyleToTransform rPr{rVertAlign = Nothing}
+ return $ superscript . transform
+ | Just SubScrpt <- rVertAlign rPr = do
+ transform <- runStyleToTransform rPr{rVertAlign = Nothing}
+ return $ subscript . transform
+ | Just "single" <- rUnderline rPr = do
+ transform <- runStyleToTransform rPr{rUnderline = Nothing}
+ return $ underlineSpan . transform
+ | otherwise = return id
+
+runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
- , s `elem` codeStyles =
- let rPr = resolveDependentRunStyle rs
- codeString = code $ concatMap runElemToString runElems
- in
- return $ case rVertAlign rPr of
- Just SupScrpt -> superscript codeString
- Just SubScrpt -> subscript codeString
- _ -> codeString
+ , s `elem` codeStyles = do
+ rPr <- resolveDependentRunStyle rs
+ let codeString = code $ concatMap runElemToString runElems
+ return $ case rVertAlign rPr of
+ Just SupScrpt -> superscript codeString
+ Just SubScrpt -> subscript codeString
+ _ -> codeString
| otherwise = do
- let ils = smushInlines (map runElemToInlines runElems)
- return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
+ rPr <- resolveDependentRunStyle rs
+ let ils = smushInlines (map runElemToInlines runElems)
+ transform <- runStyleToTransform rPr
+ return $ transform ils
runToInlines (Footnote bps) = do
- blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
+ blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ note blksList
runToInlines (Endnote bps) = do
- blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
+ blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
- mediaBag <- gets docxMediaBag
- modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ (lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
@@ -330,20 +351,39 @@ extentToAttr (Just (w, h)) =
showDim d = show (d / 914400) ++ "in"
extentToAttr _ = nullAttr
-blocksToInlinesWarn :: String -> Blocks -> DocxContext Inlines
+blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
- notParaOrPlain (Para _) = False
+ notParaOrPlain (Para _) = False
notParaOrPlain (Plain _) = False
- notParaOrPlain _ = True
- when (not $ null $ filter notParaOrPlain blkList)
- (addDocxWarning $ "Docx comment " ++ cmtId ++ " will not retain formatting")
- return $ fromList $ blocksToInlines blkList
-
-parPartToInlines :: ParPart -> DocxContext Inlines
-parPartToInlines (PlainRun r) = runToInlines r
-parPartToInlines (Insertion _ author date runs) = do
+ notParaOrPlain _ = True
+ unless ( not (any notParaOrPlain blkList)) $
+ lift $ P.report $ DocxParserWarning $
+ "Docx comment " ++ cmtId ++ " will not retain formatting"
+ return $ blocksToInlines' blkList
+
+-- The majority of work in this function is done in the primed
+-- subfunction `partPartToInlines'`. We make this wrapper so that we
+-- don't have to modify `docxImmedPrevAnchor` state after every function.
+parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
+parPartToInlines parPart =
+ case parPart of
+ (BookMark _ anchor) | notElem anchor dummyAnchors -> do
+ inHdrBool <- asks docxInHeaderBlock
+ ils <- parPartToInlines' parPart
+ immedPrevAnchor <- gets docxImmedPrevAnchor
+ unless (isJust immedPrevAnchor || inHdrBool)
+ (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
+ return ils
+ _ -> do
+ ils <- parPartToInlines' parPart
+ modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
+ return ils
+
+parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
+parPartToInlines' (PlainRun r) = runToInlines r
+parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> smushInlines <$> mapM runToInlines runs
@@ -352,7 +392,7 @@ parPartToInlines (Insertion _ author date runs) = do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
-parPartToInlines (Deletion _ author date runs) = do
+parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
@@ -361,7 +401,7 @@ parPartToInlines (Deletion _ author date runs) = do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
-parPartToInlines (CommentStart cmtId author date bodyParts) = do
+parPartToInlines' (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
@@ -370,16 +410,16 @@ parPartToInlines (CommentStart cmtId author date bodyParts) = do
let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
return $ spanWith attr ils
_ -> return mempty
-parPartToInlines (CommentEnd cmtId) = do
+parPartToInlines' (CommentEnd cmtId) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
let attr = ("", ["comment-end"], [("id", cmtId)])
return $ spanWith attr mempty
_ -> return mempty
-parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
+parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
return mempty
-parPartToInlines (BookMark _ anchor) =
+parPartToInlines' (BookMark _ anchor) =
-- We record these, so we can make sure not to overwrite
-- user-defined anchor links with header auto ids.
do
@@ -395,27 +435,40 @@ parPartToInlines (BookMark _ anchor) =
-- of rewriting user-defined anchor links. However, since these
-- are not defined in pandoc, it seems like a necessary evil to
-- avoid an extra pass.
- let newAnchor =
- if not inHdrBool && anchor `elem` (M.elems anchorMap)
- then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
- else anchor
- unless inHdrBool
- (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
- return $ spanWith (newAnchor, ["anchor"], []) mempty
-parPartToInlines (Drawing fp title alt bs ext) = do
- mediaBag <- gets docxMediaBag
- modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ immedPrevAnchor <- gets docxImmedPrevAnchor
+ case immedPrevAnchor of
+ Just prevAnchor -> do
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
+ return mempty
+ Nothing -> do
+ let newAnchor =
+ if not inHdrBool && anchor `elem` M.elems anchorMap
+ then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
+ else anchor
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
+ return $ spanWith (newAnchor, ["anchor"], []) mempty
+parPartToInlines' (Drawing fp title alt bs ext) = do
+ (lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
-parPartToInlines Chart = do
+parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-parPartToInlines (InternalHyperLink anchor runs) = do
+parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils
-parPartToInlines (ExternalHyperLink target runs) = do
+parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
-parPartToInlines (PlainOMath exps) = do
+parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
+parPartToInlines' (SmartTag runs) =
+ smushInlines <$> mapM runToInlines runs
+parPartToInlines' (Field info runs) =
+ case info of
+ HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
+ UnknownField -> smushInlines <$> mapM runToInlines runs
+parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, classes, kvs) _) =
@@ -426,10 +479,10 @@ isAnchorSpan _ = False
dummyAnchors :: [String]
dummyAnchors = ["_GoBack"]
-makeHeaderAnchor :: Blocks -> DocxContext Blocks
+makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
-makeHeaderAnchor' :: Block -> DocxContext Block
+makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
-- If there is an anchor already there (an anchor span in the header,
-- to be exact), we rename and associate the new id with the old one.
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
@@ -458,126 +511,184 @@ makeHeaderAnchor' blk = return blk
-- Rewrite a standalone paragraph block as a plain
singleParaToPlain :: Blocks -> Blocks
singleParaToPlain blks
- | (Para (ils) :< seeq) <- viewl $ unMany blks
+ | (Para ils :< seeq) <- viewl $ unMany blks
, Seq.null seeq =
singleton $ Plain ils
singleParaToPlain blks = blks
-cellToBlocks :: Cell -> DocxContext Blocks
+cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
cellToBlocks (Cell bps) = do
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
-rowToBlocksList :: Row -> DocxContext [Blocks]
+rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
rowToBlocksList (Row cells) = do
blksList <- mapM cellToBlocks cells
return $ map singleParaToPlain blksList
-trimLineBreaks :: [Inline] -> [Inline]
-trimLineBreaks [] = []
-trimLineBreaks (LineBreak : ils) = trimLineBreaks ils
-trimLineBreaks ils
- | (LineBreak : ils') <- reverse ils = trimLineBreaks (reverse ils')
-trimLineBreaks ils = ils
+-- like trimInlines, but also take out linebreaks
+trimSps :: Inlines -> Inlines
+trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
+ where isSp Space = True
+ isSp SoftBreak = True
+ isSp LineBreak = True
+ isSp _ = False
-parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks)
+parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
- , c `elem` divsToKeep =
- let pPr' = pPr { pStyle = cs }
- in
- (divWith ("", [c], [])) . (parStyleToTransform pPr')
+ , c `elem` divsToKeep = do
+ let pPr' = pPr { pStyle = cs }
+ transform <- parStyleToTransform pPr'
+ return $ divWith ("", [c], []) . transform
| (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs =
+ c `elem` listParagraphDivs = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
- in
- (divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (_:cs) <- pStyle pPr
- , Just True <- pBlockQuote pPr =
- let pPr' = pPr { pStyle = cs }
- in
- blockQuote . (parStyleToTransform pPr')
- | (_:cs) <- pStyle pPr =
+ transform <- parStyleToTransform pPr'
+ return $ divWith ("", [c], []) . transform
+ | (c:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr = do
+ opts <- asks docxOptions
+ let pPr' = pPr { pStyle = cs }
+ transform <- parStyleToTransform pPr'
+ let extraInfo = if isEnabled Ext_styles opts
+ then divWith ("", [], [("custom-style", c)])
+ else id
+ return $ extraInfo . blockQuote . transform
+ | (c:cs) <- pStyle pPr = do
+ opts <- asks docxOptions
let pPr' = pPr { pStyle = cs}
- in
- parStyleToTransform pPr'
+ transform <- parStyleToTransform pPr'
+ let extraInfo = if isEnabled Ext_styles opts
+ then divWith ("", [], [("custom-style", c)])
+ else id
+ return $ extraInfo . transform
| null (pStyle pPr)
, Just left <- indentation pPr >>= leftParIndent
- , Just hang <- indentation pPr >>= hangingParIndent =
+ , Just hang <- indentation pPr >>= hangingParIndent = do
let pPr' = pPr { indentation = Nothing }
- in
- case (left - hang) > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
+ transform <- parStyleToTransform pPr'
+ return $ case (left - hang) > 0 of
+ True -> blockQuote . transform
+ False -> transform
| null (pStyle pPr),
- Just left <- indentation pPr >>= leftParIndent =
+ Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
- in
- case left > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
-parStyleToTransform _ = id
+ transform <- parStyleToTransform pPr'
+ return $ case left > 0 of
+ True -> blockQuote . transform
+ False -> transform
+parStyleToTransform _ = return id
-bodyPartToBlocks :: BodyPart -> DocxContext Blocks
+bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) =
- return
- $ parStyleToTransform pPr
- $ codeBlock
- $ concatMap parPartToString parparts
+ | not $ null $ codeDivs `intersect` (pStyle pPr) = do
+ transform <- parStyleToTransform pPr
+ return $
+ transform $
+ codeBlock $
+ concatMap parPartToString parparts
| Just (style, n) <- pHeading pPr = do
- ils <- local (\s-> s{docxInHeaderBlock=True}) $
+ ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
- ils <- smushInlines <$> mapM parPartToInlines parparts >>=
- (return . fromList . trimLineBreaks . normalizeSpaces . toList)
+ ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts
+ prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
if dropCap pPr
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
- return $ case isNull ils' of
- True -> mempty
- _ -> parStyleToTransform pPr $ para ils'
+ let ils'' = prevParaIls <>
+ (if isNull prevParaIls then mempty else space) <>
+ ils'
+ opts <- asks docxOptions
+ case () of
+
+ _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
+ return mempty
+ _ | Just (TrackedChange Insertion _) <- pChange pPr
+ , AcceptChanges <- readerTrackChanges opts -> do
+ modify $ \s -> s {docxPrevPara = mempty}
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
+ _ | Just (TrackedChange Insertion _) <- pChange pPr
+ , RejectChanges <- readerTrackChanges opts -> do
+ modify $ \s -> s {docxPrevPara = ils''}
+ return mempty
+ _ | Just (TrackedChange Insertion cInfo) <- pChange pPr
+ , AllChanges <- readerTrackChanges opts
+ , ChangeInfo _ cAuthor cDate <- cInfo -> do
+ let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
+ insertMark = spanWith attr mempty
+ transform <- parStyleToTransform pPr
+ return $ transform $
+ para $ ils'' <> insertMark
+ _ | Just (TrackedChange Deletion _) <- pChange pPr
+ , AcceptChanges <- readerTrackChanges opts -> do
+ modify $ \s -> s {docxPrevPara = ils''}
+ return mempty
+ _ | Just (TrackedChange Deletion _) <- pChange pPr
+ , RejectChanges <- readerTrackChanges opts -> do
+ modify $ \s -> s {docxPrevPara = mempty}
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
+ _ | Just (TrackedChange Deletion cInfo) <- pChange pPr
+ , AllChanges <- readerTrackChanges opts
+ , ChangeInfo _ cAuthor cDate <- cInfo -> do
+ let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
+ insertMark = spanWith attr mempty
+ transform <- parStyleToTransform pPr
+ return $ transform $
+ para $ ils'' <> insertMark
+ _ | otherwise -> do
+ modify $ \s -> s {docxPrevPara = mempty}
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
- let
- kvs = case levelInfo of
- (_, fmt, txt, Just start) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- , ("start", (show start))
- ]
-
- (_, fmt, txt, Nothing) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- ]
+ -- We check whether this current numId has previously been used,
+ -- since Docx expects us to pick up where we left off.
+ listState <- gets docxListState
+ let startFromState = M.lookup (numId, lvl) listState
+ (_, fmt,txt, startFromLevelInfo) = levelInfo
+ start = case startFromState of
+ Just n -> n + 1
+ Nothing -> fromMaybe 1 startFromLevelInfo
+ kvs = [ ("level", lvl)
+ , ("num-id", numId)
+ , ("format", fmt)
+ , ("text", txt)
+ , ("start", show start)
+ ]
+ modify $ \st -> st{ docxListState = M.insert (numId, lvl) start listState}
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
-bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
- let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)}
+bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
+ let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
return $ para mempty
-bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
+bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
let caption = text cap
(hdr, rows) = case firstRowFormatting look of
True | null rs -> (Nothing, [r])
| otherwise -> (Just r, rs)
False -> (Nothing, r:rs)
- cells <- mapM rowToBlocksList rows
+ cells <- mapM rowToBlocksList rows
- let width = case cells of
- r':_ -> length r'
- -- shouldn't happen
- [] -> 0
+ let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
+ -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
+ -- our own, see
+ -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
+ nonEmpty [] = Nothing
+ nonEmpty l = Just l
+ rowLength :: Row -> Int
+ rowLength (Row c) = length c
hdrCells <- case hdr of
Just r' -> rowToBlocksList r'
@@ -592,36 +703,54 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
widths = replicate width 0 :: [Double]
return $ table caption (zip alignments widths) hdrCells cells
-bodyPartToBlocks (OMathPara e) = do
+bodyPartToBlocks (OMathPara e) =
return $ para $ displayMath (writeTeX e)
-- replace targets with generated anchors.
-rewriteLink' :: Inline -> DocxContext Inline
+rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils ('#':target, title)) = do
anchorMap <- gets docxAnchorMap
- return $ case M.lookup target anchorMap of
- Just newTarget -> (Link attr ils ('#':newTarget, title))
- Nothing -> l
+ case M.lookup target anchorMap of
+ Just newTarget -> do
+ modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
+ return $ Link attr ils ('#':newTarget, title)
+ Nothing -> do
+ modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
+ return l
rewriteLink' il = return il
-rewriteLinks :: [Block] -> DocxContext [Block]
+rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')
-bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag, [String])
+removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
+removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
+ | "anchor" `elem` classes = do
+ anchorSet <- gets docxAnchorSet
+ return $ if ident `Set.member` anchorSet
+ then [s]
+ else ils
+removeOrphanAnchors'' il = return [il]
+
+removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
+removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils
+
+removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
+removeOrphanAnchors = mapM (walkM removeOrphanAnchors')
+
+bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
- mediaBag <- gets docxMediaBag
- warnings <- gets docxWarnings
- return $ (meta,
- blks',
- mediaBag,
- warnings)
-
-docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag, [String])
+ blks'' <- removeOrphanAnchors blks'
+ return (meta, blks'')
+
+docxToOutput :: PandocMonad m
+ => ReaderOptions
+ -> Docx
+ -> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 39e0df825..003265e6e 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,15 +1,16 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- PatternGuards #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE TypeSynonymInstances #-}
module Text.Pandoc.Readers.Docx.Combine ( smushInlines
, smushBlocks
)
where
-import Text.Pandoc.Builder
import Data.List
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
+import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>))
import qualified Data.Sequence as Seq (null)
+import Text.Pandoc.Builder
data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
@@ -35,16 +36,16 @@ spaceOutInlines ils =
right = case viewr contents of
(_ :> Space) -> space
_ -> mempty in
- (left, (stackInlines fs $ trimInlines . Many $ contents), right)
+ (left, stackInlines fs $ trimInlines . Many $ contents, right)
stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
stackInlines [] ms = ms
stackInlines (NullModifier : fs) ms = stackInlines fs ms
-stackInlines ((Modifier f) : fs) ms =
+stackInlines (Modifier f : fs) ms =
if isEmpty ms
then stackInlines fs ms
else f $ stackInlines fs ms
-stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
+stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms
unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
unstackInlines ms = case ilModifier ms of
@@ -56,15 +57,15 @@ unstackInlines ms = case ilModifier ms of
ilModifier :: Inlines -> Modifier Inlines
ilModifier ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
+ (Emph _) -> Modifier emph
+ (Strong _) -> Modifier strong
+ (SmallCaps _) -> Modifier smallcaps
+ (Strikeout _) -> Modifier strikeout
+ (Superscript _) -> Modifier superscript
+ (Subscript _) -> Modifier subscript
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
+ (Span attr _) -> AttrModifier spanWith attr
+ _ -> NullModifier
_ -> NullModifier
ilInnards :: Inlines -> Inlines
@@ -78,25 +79,25 @@ ilInnards ils = case viewl (unMany ils) of
(Subscript lst) -> fromList lst
(Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
- _ -> ils
+ _ -> ils
_ -> ils
inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of
(s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
+ _ -> (mempty, ils)
inlinesR :: Inlines -> (Inlines, Inlines)
inlinesR ils = case viewr $ unMany ils of
(sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
+ _ -> (ils, mempty)
combineInlines :: Inlines -> Inlines -> Inlines
combineInlines x y =
let (xs', x') = inlinesR x
(y', ys') = inlinesL y
in
- xs' <> (combineSingletonInlines x' y') <> ys'
+ xs' <> combineSingletonInlines x' y' <> ys'
combineSingletonInlines :: Inlines -> Inlines -> Inlines
combineSingletonInlines x y =
@@ -113,10 +114,10 @@ combineSingletonInlines x y =
stackInlines (x_rem_attr ++ y_rem_attr) mempty
| isEmpty xs ->
let (sp, y') = spaceOutInlinesL y in
- (stackInlines x_rem_attr mempty) <> sp <> y'
+ stackInlines x_rem_attr mempty <> sp <> y'
| isEmpty ys ->
let (x', sp) = spaceOutInlinesR x in
- x' <> sp <> (stackInlines y_rem_attr mempty)
+ x' <> sp <> stackInlines y_rem_attr mempty
| otherwise ->
let (x', xsp) = spaceOutInlinesR x
(ysp, y') = spaceOutInlinesL y
@@ -129,15 +130,15 @@ combineSingletonInlines x y =
combineBlocks :: Blocks -> Blocks -> Blocks
combineBlocks bs cs
- | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
- , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
- Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
+ | bs' :> BlockQuote bs'' <- viewr (unMany bs)
+ , BlockQuote cs'' :< cs' <- viewl (unMany cs) =
+ Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs'
combineBlocks bs cs = bs <> cs
instance (Monoid a, Eq a) => Eq (Modifier a) where
- (Modifier f) == (Modifier g) = (f mempty == g mempty)
- (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
- (NullModifier) == (NullModifier) = True
+ (Modifier f) == (Modifier g) = f mempty == g mempty
+ (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty
+ NullModifier == NullModifier = True
_ == _ = False
isEmpty :: (Monoid a, Eq a) => a -> Bool
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
new file mode 100644
index 000000000..6eeb55d2f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -0,0 +1,89 @@
+{-
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Fields
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+For parsing Field definitions in instText tags, as described in
+ECMA-376-1:2016, §17.16.5 -}
+
+module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
+ , parseFieldInfo
+ ) where
+
+import Text.Parsec
+import Text.Parsec.String (Parser)
+
+type URL = String
+
+data FieldInfo = HyperlinkField URL
+ | UnknownField
+ deriving (Show)
+
+parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo = parse fieldInfo ""
+
+fieldInfo :: Parser FieldInfo
+fieldInfo =
+ try (HyperlinkField <$> hyperlink)
+ <|>
+ return UnknownField
+
+escapedQuote :: Parser String
+escapedQuote = string "\\\""
+
+inQuotes :: Parser String
+inQuotes =
+ (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+
+quotedString :: Parser String
+quotedString = do
+ char '"'
+ concat <$> manyTill inQuotes (try (char '"'))
+
+unquotedString :: Parser String
+unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof)
+
+fieldArgument :: Parser String
+fieldArgument = quotedString <|> unquotedString
+
+-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
+hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch = do
+ sw <- string "\\l"
+ spaces
+ farg <- fieldArgument
+ return (sw, farg)
+
+hyperlink :: Parser URL
+hyperlink = do
+ many space
+ string "HYPERLINK"
+ spaces
+ farg <- fieldArgument
+ switches <- spaces *> many hyperlinkSwitch
+ let url = case switches of
+ ("\\l", s) : _ -> farg ++ ('#': s)
+ _ -> farg
+ return url
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 395a53907..c0f05094a 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Docx.Lists
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -33,38 +33,33 @@ module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
, listParagraphDivs
) where
-import Text.Pandoc.JSON
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.Shared (trim)
-import Control.Monad
import Data.List
import Data.Maybe
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.JSON
+import Text.Pandoc.Shared (trim)
isListItem :: Block -> Bool
isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
-isListItem _ = False
+isListItem _ = False
getLevel :: Block -> Maybe Integer
-getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs
-getLevel _ = Nothing
+getLevel (Div (_, _, kvs) _) = read <$> lookup "level" kvs
+getLevel _ = Nothing
getLevelN :: Block -> Integer
-getLevelN b = case getLevel b of
- Just n -> n
- Nothing -> -1
+getLevelN b = fromMaybe (-1) (getLevel b)
getNumId :: Block -> Maybe Integer
-getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs
-getNumId _ = Nothing
+getNumId (Div (_, _, kvs) _) = read <$> lookup "num-id" kvs
+getNumId _ = Nothing
getNumIdN :: Block -> Integer
-getNumIdN b = case getNumId b of
- Just n -> n
- Nothing -> -1
+getNumIdN b = fromMaybe (-1) (getNumId b)
getText :: Block -> Maybe String
getText (Div (_, _, kvs) _) = lookup "text" kvs
-getText _ = Nothing
+getText _ = Nothing
data ListType = Itemized | Enumerated ListAttributes
@@ -109,27 +104,27 @@ listParagraphDivs = ["ListParagraph"]
handleListParagraphs :: [Block] -> [Block]
handleListParagraphs [] = []
handleListParagraphs (
- (Div attr1@(_, classes1, _) blks1) :
- (Div (ident2, classes2, kvs2) blks2) :
+ Div attr1@(_, classes1, _) blks1 :
+ Div (ident2, classes2, kvs2) blks2 :
blks
) | "list-item" `elem` classes1 &&
- not ("list-item" `elem` classes2) &&
+ notElem "list-item" classes2 &&
(not . null) (listParagraphDivs `intersect` classes2) =
-- We don't want to keep this indent.
let newDiv2 =
- (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
+ Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2
in
- handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
-handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
+ handleListParagraphs (Div attr1 (blks1 ++ [newDiv2]) : blks)
+handleListParagraphs (blk:blks) = blk : handleListParagraphs blks
separateBlocks' :: Block -> [[Block]] -> [[Block]]
-separateBlocks' blk ([] : []) = [[blk]]
-separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' blk [[]] = [[blk]]
+separateBlocks' b@(BulletList _) acc = init acc ++ [last acc ++ [b]]
+separateBlocks' b@(OrderedList _ _) acc = init acc ++ [last acc ++ [b]]
-- The following is for the invisible bullet lists. This is how
-- pandoc-generated ooxml does multiparagraph item lists.
-separateBlocks' b acc | liftM trim (getText b) == Just "" =
- (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b acc | fmap trim (getText b) == Just "" =
+ init acc ++ [last acc ++ [b]]
separateBlocks' b acc = acc ++ [[b]]
separateBlocks :: [Block] -> [[Block]]
@@ -138,63 +133,60 @@ separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
flatToBullets' :: Integer -> [Block] -> [Block]
flatToBullets' _ [] = []
flatToBullets' num xs@(b : elems)
- | getLevelN b == num = b : (flatToBullets' num elems)
+ | getLevelN b == num = b : flatToBullets' num elems
| otherwise =
let bNumId = getNumIdN b
bLevel = getLevelN b
(children, remaining) =
span
(\b' ->
- ((getLevelN b') > bLevel ||
- ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
+ getLevelN b' > bLevel ||
+ (getLevelN b' == bLevel && getNumIdN b' == bNumId))
xs
in
case getListType b of
Just (Enumerated attr) ->
- (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
+ OrderedList attr (separateBlocks $ flatToBullets' bLevel children) :
+ flatToBullets' num remaining
_ ->
- (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
+ BulletList (separateBlocks $ flatToBullets' bLevel children) :
+ flatToBullets' num remaining
flatToBullets :: [Block] -> [Block]
flatToBullets elems = flatToBullets' (-1) elems
singleItemHeaderToHeader :: Block -> Block
-singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
-singleItemHeaderToHeader blk = blk
+singleItemHeaderToHeader (OrderedList _ [[h@Header{}]]) = h
+singleItemHeaderToHeader blk = blk
blocksToBullets :: [Block] -> [Block]
blocksToBullets blks =
map singleItemHeaderToHeader $
- bottomUp removeListDivs $
- flatToBullets $ (handleListParagraphs blks)
+ bottomUp removeListDivs $flatToBullets (handleListParagraphs blks)
plainParaInlines :: Block -> [Inline]
plainParaInlines (Plain ils) = ils
-plainParaInlines (Para ils) = ils
-plainParaInlines _ = []
+plainParaInlines (Para ils) = ils
+plainParaInlines _ = []
blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
blocksToDefinitions' [] acc [] = reverse acc
blocksToDefinitions' defAcc acc [] =
- reverse $ (DefinitionList (reverse defAcc)) : acc
+ reverse $ DefinitionList (reverse defAcc) : acc
blocksToDefinitions' defAcc acc
- ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
+ (Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks)
| "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- pair = case remainingAttr2 == ("", [], []) of
- True -> (concatMap plainParaInlines blks1, [blks2])
- False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
+ pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
in
blocksToDefinitions' (pair : defAcc) acc blks
blocksToDefinitions' defAcc acc
- ((Div (ident2, classes2, kvs2) blks2) : blks)
+ (Div (ident2, classes2, kvs2) blks2 : blks)
| (not . null) defAcc && "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
defItems2 = case remainingAttr2 == ("", [], []) of
- True -> blks2
+ True -> blks2
False -> [Div remainingAttr2 blks2]
((defTerm, defItems):defs) = defAcc
defAcc' = case null defItems of
@@ -205,18 +197,18 @@ blocksToDefinitions' defAcc acc
blocksToDefinitions' [] acc (b:blks) =
blocksToDefinitions' [] (b:acc) blks
blocksToDefinitions' defAcc acc (b:blks) =
- blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+ blocksToDefinitions' [] (b : DefinitionList (reverse defAcc) : acc) blks
removeListDivs' :: Block -> [Block]
removeListDivs' (Div (ident, classes, kvs) blks)
| "list-item" `elem` classes =
case delete "list-item" classes of
- [] -> blks
- classes' -> [Div (ident, classes', kvs) $ blks]
+ [] -> blks
+ classes' -> [Div (ident, classes', kvs) blks]
removeListDivs' (Div (ident, classes, kvs) blks)
| not $ null $ listParagraphDivs `intersect` classes =
case classes \\ listParagraphDivs of
- [] -> blks
+ [] -> blks
classes' -> [Div (ident, classes', kvs) blks]
removeListDivs' blk = [blk]
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index deb2caccf..1f7f07e36 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Docx.Parse
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -49,28 +51,34 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ParagraphStyle(..)
, Row(..)
, Cell(..)
+ , TrackedChange(..)
+ , ChangeType(..)
+ , ChangeInfo(..)
+ , FieldInfo(..)
, archiveToDocx
, archiveToDocxWithWarnings
) where
import Codec.Archive.Zip
-import Text.XML.Light
-import Data.Maybe
-import Data.List
-import System.FilePath
+import Control.Applicative ((<|>))
+import Control.Monad.Except
+import Control.Monad.Reader
+import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad.Reader
-import Control.Monad.State
-import Control.Applicative ((<|>))
+import Data.Char (chr, ord, readLitChar)
+import Data.List
import qualified Data.Map as M
-import Control.Monad.Except
-import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive)
-import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..))
-import Text.TeXMath (Exp)
+import Data.Maybe
+import System.FilePath
import Text.Pandoc.Readers.Docx.Util
-import Data.Char (readLitChar, ord, chr, isDigit)
+import Text.Pandoc.Readers.Docx.Fields
+import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.TeXMath (Exp)
+import Text.TeXMath.Readers.OMML (readOMML)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.XML.Light
+import qualified Text.XML.Light.Cursor as XMLC
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
@@ -84,10 +92,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String] }
+data ReaderState = ReaderState { stateWarnings :: [String]
+ , stateFldCharState :: FldCharState
+ }
deriving Show
-data DocxError = DocxError | WrongElem
+data FldCharState = FldCharOpen
+ | FldCharFieldInfo FieldInfo
+ | FldCharContent FieldInfo [Run]
+ | FldCharClosed
+ deriving (Show)
+
+data DocxError = DocxError
+ | WrongElem
deriving Show
type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
@@ -97,7 +114,7 @@ runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
-maybeToD Nothing = throwError DocxError
+maybeToD Nothing = throwError DocxError
eitherToD :: Either a b -> D b
eitherToD (Right b) = return b
@@ -115,6 +132,36 @@ mapD f xs =
in
concatMapM handler xs
+unwrapSDT :: NameSpaces -> Content -> [Content]
+unwrapSDT ns (Elem element)
+ | isElem ns "w" "sdt" element
+ , Just sdtContent <- findChildByName ns "w" "sdtContent" element
+ = concatMap (unwrapSDT ns) $ map Elem $ elChildren sdtContent
+unwrapSDT _ content = [content]
+
+unwrapSDTchild :: NameSpaces -> Content -> Content
+unwrapSDTchild ns (Elem element) =
+ Elem $ element { elContent = concatMap (unwrapSDT ns) (elContent element) }
+unwrapSDTchild _ content = content
+
+walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
+walkDocument' ns cur =
+ let modifiedCur = XMLC.modifyContent (unwrapSDTchild ns) cur
+ in
+ case XMLC.nextDF modifiedCur of
+ Just cur' -> walkDocument' ns cur'
+ Nothing -> XMLC.root modifiedCur
+
+walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument ns element =
+ let cur = XMLC.fromContent (Elem element)
+ cur' = walkDocument' ns cur
+ in
+ case XMLC.toTree cur' of
+ Elem element' -> Just element'
+ _ -> Nothing
+
+
data Docx = Docx Document
deriving Show
@@ -160,17 +207,27 @@ data Notes = Notes NameSpaces
data Comments = Comments NameSpaces (M.Map String Element)
deriving Show
-data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
- , rightParIndent :: Maybe Integer
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+ , rightParIndent :: Maybe Integer
, hangingParIndent :: Maybe Integer}
deriving Show
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+data ChangeType = Insertion | Deletion
+ deriving Show
+
+data ChangeInfo = ChangeInfo ChangeId Author ChangeDate
+ deriving Show
+
+data TrackedChange = TrackedChange ChangeType ChangeInfo
+ deriving Show
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
, pHeading :: Maybe (String, Int)
, pNumInfo :: Maybe (String, String)
, pBlockQuote :: Maybe Bool
+ , pChange :: Maybe TrackedChange
}
deriving Show
@@ -181,6 +238,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
, pHeading = Nothing
, pNumInfo = Nothing
, pBlockQuote = Nothing
+ , pChange = Nothing
}
@@ -208,8 +266,7 @@ data Cell = Cell [BodyPart]
type Extent = Maybe (Double, Double)
data ParPart = PlainRun Run
- | Insertion ChangeId Author ChangeDate [Run]
- | Deletion ChangeId Author ChangeDate [Run]
+ | ChangedRuns TrackedChange [Run]
| CommentStart CommentId Author CommentDate [BodyPart]
| CommentEnd CommentId
| BookMark BookMarkId Anchor
@@ -218,6 +275,10 @@ data ParPart = PlainRun Run
| Drawing FilePath String String B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
+ | SmartTag [Run]
+ | Field FieldInfo [Run]
+ | NullParPart -- when we need to return nothing, but
+ -- not because of an error.
deriving Show
data Run = Run RunStyle [RunElem]
@@ -233,19 +294,19 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
, isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle}
+ , isStrike :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rStyle :: Maybe CharStyle}
deriving Show
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
, isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
+ , numInfo :: Maybe (String, String)
+ , psStyle :: Maybe ParStyle}
deriving Show
defaultRunStyle :: RunStyle
@@ -281,7 +342,9 @@ archiveToDocxWithWarnings archive = do
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
+ rState = ReaderState { stateWarnings = []
+ , stateFldCharState = FldCharClosed
+ }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
Right doc -> Right (Docx doc, stateWarnings st)
@@ -294,14 +357,14 @@ archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces docElem
- bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
- body <- elemToBody namespaces bodyElem
+ bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
+ let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+ body <- elemToBody namespaces bodyElem'
return $ Document namespaces body
elemToBody :: NameSpaces -> Element -> D Body
elemToBody ns element | isElem ns "w" "body" element =
- mapD (elemToBodyPart ns) (elChildren element) >>=
- (\bps -> return $ Body bps)
+ fmap Body (mapD (elemToBodyPart ns) (elChildren element))
elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
@@ -322,15 +385,15 @@ archiveToStyles zf =
isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just styleType <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
- , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
- findAttr (elemName ns "w" "val")
- , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
+ , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
+ findAttrByName ns "w" "val"
+ , Just ps <- parentStyle = basedOnVal == getStyleId ps
| isElem ns "w" "style" element
- , Just styleType <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
- , Nothing <- findChild (elemName ns "w" "basedOn") element
+ , Nothing <- findChildByName ns "w" "basedOn" element
, Nothing <- parentStyle = True
| otherwise = False
@@ -343,8 +406,8 @@ instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ , Just "character" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
Just (styleId, elemToRunStyle ns element parentStyle)
| otherwise = Nothing
getStyleId s = fst s
@@ -353,8 +416,8 @@ instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "paragraph" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ , Just "paragraph" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
Just (styleId, elemToParStyleData ns element parentStyle)
| otherwise = Nothing
getStyleId s = fst s
@@ -368,10 +431,10 @@ getStyleChildren ns element parentStyle
buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
- case (getStyleChildren ns element rootStyle) of
+ case getStyleChildren ns element rootStyle of
[] -> []
stys -> stys ++
- (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
+ concatMap (buildBasedOnList ns element . Just) stys
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
@@ -380,14 +443,14 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= (elemToNotes ns "footnote")
- en = enElem >>= (elemToNotes ns "endnote")
+ fn = fnElem >>= elemToNotes ns "footnote"
+ en = enElem >>= elemToNotes ns "endnote"
in
Notes ns fn en
@@ -396,19 +459,19 @@ archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
cmts_namespaces = case cmtsElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
- cmts = (elemToComments cmts_namespaces) <$> cmtsElem
+ cmts = elemToComments cmts_namespaces <$> cmtsElem
in
case cmts of
- Just c -> Comments cmts_namespaces c
+ Just c -> Comments cmts_namespaces c
Nothing -> Comments cmts_namespaces M.empty
filePathToRelType :: FilePath -> Maybe DocumentLocation
-filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
-filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
-filePathToRelType _ = Nothing
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
@@ -439,24 +502,23 @@ lookupLevel :: String -> String -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
- lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
- return lvl
+ lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttr (elemName ns "w" "numId") element
- absNumId <- findChild (elemName ns "w" "abstractNumId") element
- >>= findAttr (elemName ns "w" "val")
+ numId <- findAttrByName ns "w" "numId" element
+ absNumId <- findChildByName ns "w" "abstractNumId" element
+ >>= findAttrByName ns "w" "val"
return $ Numb numId absNumId
numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttr (elemName ns "w" "abstractNumId") element
- let levelElems = findChildren (elemName ns "w" "lvl") element
+ absNumId <- findAttrByName ns "w" "abstractNumId" element
+ let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
absNumElemToAbsNum _ _ = Nothing
@@ -464,26 +526,26 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttr (elemName ns "w" "ilvl") element
- fmt <- findChild (elemName ns "w" "numFmt") element
- >>= findAttr (elemName ns "w" "val")
- txt <- findChild (elemName ns "w" "lvlText") element
- >>= findAttr (elemName ns "w" "val")
- let start = findChild (elemName ns "w" "start") element
- >>= findAttr (elemName ns "w" "val")
+ ilvl <- findAttrByName ns "w" "ilvl" element
+ fmt <- findChildByName ns "w" "numFmt" element
+ >>= findAttrByName ns "w" "val"
+ txt <- findChildByName ns "w" "lvlText" element
+ >>= findAttrByName ns "w" "val"
+ let start = findChildByName ns "w" "start" element
+ >>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
return (ilvl, fmt, txt, start)
levelElemToLevel _ _ = Nothing
archiveToNumbering' :: Archive -> Maybe Numbering
-archiveToNumbering' zf = do
+archiveToNumbering' zf =
case findEntryByPath "word/numbering.xml" zf of
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces numberingElem
- numElems = findChildren (elemName namespaces "w" "num") numberingElem
- absNumElems = findChildren (elemName namespaces "w" "abstractNum") numberingElem
+ numElems = findChildrenByName namespaces "w" "num" numberingElem
+ absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
nums = mapMaybe (numElemToNum namespaces) numElems
absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
return $ Numbering namespaces nums absNums
@@ -496,22 +558,23 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
elemToNotes ns notetype element
| isElem ns "w" (notetype ++ "s") element =
let pairs = mapMaybe
- (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
- (findChildren (elemName ns "w" notetype) element)
+ (findChildrenByName ns "w" notetype element)
in
- Just $ M.fromList $ pairs
+ Just $
+ M.fromList pairs
elemToNotes _ _ _ = Nothing
elemToComments :: NameSpaces -> Element -> M.Map String Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
- (findChildren (elemName ns "w" "comment") element)
+ (findChildrenByName ns "w" "comment" element)
in
- M.fromList $ pairs
+ M.fromList pairs
elemToComments _ _ = M.empty
@@ -520,16 +583,16 @@ elemToComments _ _ = M.empty
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
- let cols = findChildren (elemName ns "w" "gridCol") element
+ let cols = findChildrenByName ns "w" "gridCol" element
in
- mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger))
+ mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
cols
elemToTblGrid _ _ = throwError WrongElem
elemToTblLook :: NameSpaces -> Element -> D TblLook
elemToTblLook ns element | isElem ns "w" "tblLook" element =
- let firstRow = findAttr (elemName ns "w" "firstRow") element
- val = findAttr (elemName ns "w" "val") element
+ let firstRow = findAttrByName ns "w" "firstRow" element
+ val = findAttrByName ns "w" "val" element
firstRowFmt =
case firstRow of
Just "1" -> True
@@ -538,13 +601,13 @@ elemToTblLook ns element | isElem ns "w" "tblLook" element =
Just bitMask -> testBitMask bitMask 0x020
Nothing -> False
in
- return $ TblLook{firstRowFormatting = firstRowFmt}
+ return TblLook{firstRowFormatting = firstRowFmt}
elemToTblLook _ _ = throwError WrongElem
elemToRow :: NameSpaces -> Element -> D Row
elemToRow ns element | isElem ns "w" "tr" element =
do
- let cellElems = findChildren (elemName ns "w" "tc") element
+ let cellElems = findChildrenByName ns "w" "tc" element
cells <- mapD (elemToCell ns) cellElems
return $ Row cells
elemToRow _ _ = throwError WrongElem
@@ -558,15 +621,15 @@ elemToCell _ _ = throwError WrongElem
elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just $ ParIndentation {
+ Just ParIndentation {
leftParIndent =
- findAttr (elemName ns "w" "left") element >>=
+ findAttrByName ns "w" "left" element >>=
stringToInteger
, rightParIndent =
- findAttr (elemName ns "w" "right") element >>=
+ findAttrByName ns "w" "right" element >>=
stringToInteger
, hangingParIndent =
- findAttr (elemName ns "w" "hanging") element >>=
+ findAttrByName ns "w" "hanging" element >>=
stringToInteger}
elemToParIndentation _ _ = Nothing
@@ -574,7 +637,7 @@ testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
[] -> False
- ((n', _) : _) -> ((n' .|. n) /= 0)
+ ((n', _) : _) -> (n' .|. n) /= 0
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
@@ -582,7 +645,7 @@ stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
- , (c:_) <- findChildren (elemName ns "m" "oMathPara") element =
+ , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
expsLst <- eitherToD $ readOMML $ showElement c
return $ OMathPara expsLst
@@ -610,17 +673,17 @@ elemToBodyPart ns element
_ -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
- let caption' = findChild (elemName ns "w" "tblPr") element
- >>= findChild (elemName ns "w" "tblCaption")
- >>= findAttr (elemName ns "w" "val")
+ let caption' = findChildByName ns "w" "tblPr" element
+ >>= findChildByName ns "w" "tblCaption"
+ >>= findAttrByName ns "w" "val"
caption = (fromMaybe "" caption')
- grid' = case findChild (elemName ns "w" "tblGrid") element of
- Just g -> elemToTblGrid ns g
+ grid' = case findChildByName ns "w" "tblGrid" element of
+ Just g -> elemToTblGrid ns g
Nothing -> return []
- tblLook' = case findChild (elemName ns "w" "tblPr") element >>=
- findChild (elemName ns "w" "tblLook")
+ tblLook' = case findChildByName ns "w" "tblPr" element >>=
+ findChildByName ns "w" "tblLook"
of
- Just l -> elemToTblLook ns l
+ Just l -> elemToTblLook ns l
Nothing -> return defaultTblLook
grid <- grid'
@@ -649,26 +712,22 @@ expandDrawingId s = do
getTitleAndAlt :: NameSpaces -> Element -> (String, String)
getTitleAndAlt ns element =
- let mbDocPr = findChild (elemName ns "wp" "inline") element >>=
- findChild (elemName ns "wp" "docPr")
- title = case mbDocPr >>= findAttr (elemName ns "" "title") of
- Just title' -> title'
- Nothing -> ""
- alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
- Just alt' -> alt'
- Nothing -> ""
+ let mbDocPr = findChildByName ns "wp" "inline" element >>=
+ findChildByName ns "wp" "docPr"
+ title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
elemToParPart ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
, pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
, Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (elemName ns "r" "embed")
+ >>= findAttrByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -676,9 +735,9 @@ elemToParPart ns element
-- The below is an attempt to deal with images in deprecated vml format.
elemToParPart ns element
| isElem ns "w" "r" element
- , Just _ <- findChild (elemName ns "w" "pict") element =
+ , Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttr (elemName ns "r" "id")
+ >>= findAttrByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -687,86 +746,148 @@ elemToParPart ns element
-- Chart
elemToParPart ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
+ = return Chart
+{-
+The next one is a bit complicated. fldChar fields work by first
+having a <w:fldChar fldCharType="begin"> in a run, then a run with
+<w:instrText>, then a <w:fldChar fldCharType="separate"> run, then the
+content runs, and finally a <w:fldChar fldCharType="end"> run. For
+example (omissions and my comments in brackets):
+
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="begin"/>
+ </w:r>
+ <w:r>
+ [...]
+ <w:instrText xml:space="preserve"> HYPERLINK [hyperlink url] </w:instrText>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="separate"/>
+ </w:r>
+ <w:r w:rsidRPr=[...]>
+ [...]
+ <w:t>Foundations of Analysis, 2nd Edition</w:t>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="end"/>
+ </w:r>
+
+So we do this in a number of steps. If we encounter the fldchar begin
+tag, we start open a fldchar state variable (see state above). We add
+the instrtext to it as FieldInfo. Then we close that and start adding
+the runs when we get to separate. Then when we get to end, we produce
+the Field type with approriate FieldInfo and Runs.
+-}
elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
+ | isElem ns "w" "r" element
+ , Just fldChar <- findChildByName ns "w" "fldChar" element
+ , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharClosed | fldCharType == "begin" -> do
+ modify $ \st -> st {stateFldCharState = FldCharOpen}
+ return NullParPart
+ FldCharFieldInfo info | fldCharType == "separate" -> do
+ modify $ \st -> st {stateFldCharState = FldCharContent info []}
+ return NullParPart
+ FldCharContent info runs | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = FldCharClosed}
+ return $ Field info $ reverse runs
+ _ -> throwError WrongElem
elemToParPart ns element
- | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttr (elemName ns "w" "id") element
- , Just cAuthor <- findAttr (elemName ns "w" "author") element
- , Just cDate <- findAttr (elemName ns "w" "date") element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Insertion cId cAuthor cDate runs
+ | isElem ns "w" "r" element
+ , Just instrText <- findChildByName ns "w" "instrText" element = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharOpen -> do
+ info <- eitherToD $ parseFieldInfo $ strContent instrText
+ modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
+ return NullParPart
+ _ -> return NullParPart
elemToParPart ns element
- | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttr (elemName ns "w" "id") element
- , Just cAuthor <- findAttr (elemName ns "w" "author") element
- , Just cDate <- findAttr (elemName ns "w" "date") element = do
+ | isElem ns "w" "r" element = do
+ run <- elemToRun ns element
+ -- we check to see if we have an open FldChar in state that we're
+ -- recording.
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharContent info runs -> do
+ modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
+ return NullParPart
+ _ -> return $ PlainRun run
+elemToParPart ns element
+ | Just change <- getTrackedChange ns element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ ChangedRuns change runs
+elemToParPart ns element
+ | isElem ns "w" "smartTag" element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ Deletion cId cAuthor cDate runs
+ return $ SmartTag runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttr (elemName ns "w" "id") element
- , Just bmName <- findAttr (elemName ns "w" "name") element =
+ , Just bmId <- findAttrByName ns "w" "id" element
+ , Just bmName <- findAttrByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttr (elemName ns "r" "id") element = do
+ , Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
- Just target -> do
- case findAttr (elemName ns "w" "anchor") element of
+ Just target ->
+ case findAttrByName ns "w" "anchor" element of
Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ , Just anchor <- findAttrByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
- Nothing -> throwError WrongElem
+ Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element =
+ , Just cmtId <- findAttrByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
+ fmap PlainOMath (eitherToD $ readOMML $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element
- , Just cmtAuthor <- findAttr (elemName ns "w" "author") element
- , Just cmtDate <- findAttr (elemName ns "w" "date") element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrByName ns "w" "author" element
+ , Just cmtDate <- findAttrByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
lookupFootnote :: String -> Notes -> Maybe Element
-lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
+lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
lookupEndnote :: String -> Notes -> Maybe Element
-lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
+lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
elemToExtent drawingElem =
case (getDim "cx", getDim "cy") of
(Just w, Just h) -> Just (w, h)
- _ -> Nothing
+ _ -> Nothing
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
@@ -794,7 +915,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttr (elemName ns "w" "id") element = do
+ , Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -802,7 +923,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttr (elemName ns "w" "id") element = do
+ , Just enId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -813,8 +934,8 @@ childElemToRun _ _ = throwError WrongElem
elemToRun :: NameSpaces -> Element -> D Run
elemToRun ns element
| isElem ns "w" "r" element
- , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element =
- do let choices = findChildren (elemName ns "mc" "Choice") altCont
+ , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
+ do let choices = findChildrenByName ns "mc" "Choice" altCont
choiceChildren = map head $ filter (not . null) $ map elChildren choices
outputs <- mapD (childElemToRun ns) choiceChildren
case outputs of
@@ -822,15 +943,15 @@ elemToRun ns element
[] -> throwError WrongElem
elemToRun ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+ , Just drawingElem <- findChildByName ns "w" "drawing" element =
childElemToRun ns drawingElem
elemToRun ns element
| isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "footnoteReference") element =
+ , Just ref <- findChildByName ns "w" "footnoteReference" element =
childElemToRun ns ref
elemToRun ns element
| isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "endnoteReference") element =
+ , Just ref <- findChildByName ns "w" "endnoteReference" element =
childElemToRun ns ref
elemToRun ns element
| isElem ns "w" "r" element = do
@@ -854,22 +975,37 @@ getParStyleField field stylemap styles
= Just y
getParStyleField _ _ _ = Nothing
+getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
+getTrackedChange ns element
+ | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element =
+ Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
+getTrackedChange ns element
+ | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element =
+ Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
+getTrackedChange _ _ = Nothing
+
elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
elemToParagraphStyle ns element sty
- | Just pPr <- findChild (elemName ns "w" "pPr") element =
+ | Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (findAttr (elemName ns "w" "val"))
- (findChildren (elemName ns "w" "pStyle") pPr)
+ (findAttrByName ns "w" "val")
+ (findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = style
, indentation =
- findChild (elemName ns "w" "ind") pPr >>=
+ findChildByName ns "w" "ind" pPr >>=
elemToParIndentation ns
, dropCap =
case
- findChild (elemName ns "w" "framePr") pPr >>=
- findAttr (elemName ns "w" "dropCap")
+ findChildByName ns "w" "framePr" pPr >>=
+ findAttrByName ns "w" "dropCap"
of
Just "none" -> False
Just _ -> True
@@ -877,13 +1013,20 @@ elemToParagraphStyle ns element sty
, pHeading = getParStyleField headingLev sty style
, pNumInfo = getParStyleField numInfo sty style
, pBlockQuote = getParStyleField isBlockQuote sty style
+ , pChange = findChildByName ns "w" "rPr" pPr >>=
+ filterChild (\e -> isElem ns "w" "ins" e ||
+ isElem ns "w" "moveTo" e ||
+ isElem ns "w" "del" e ||
+ isElem ns "w" "moveFrom" e
+ ) >>=
+ getTrackedChange ns
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
| Just t <- findChild tag rPr
- , Just val <- findAttr (elemName ns "w" "val") t =
+ , Just val <- findAttrByName ns "w" "val" t =
Just $ case val of
"true" -> True
"false" -> False
@@ -897,11 +1040,11 @@ checkOnOff _ _ _ = Nothing
elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
- | Just rPr <- findChild (elemName ns "w" "rPr") element = do
+ | Just rPr <- findChildByName ns "w" "rPr" element = do
charStyles <- asks envCharStyles
let parentSty = case
- findChild (elemName ns "w" "rStyle") rPr >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "rStyle" rPr >>=
+ findAttrByName ns "w" "val"
of
Just styName | Just style <- M.lookup styName charStyles ->
Just (styName, style)
@@ -911,7 +1054,7 @@ elemToRunStyleD _ _ = return defaultRunStyle
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
- | Just rPr <- findChild (elemName ns "w" "rPr") element =
+ | Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
isBold = checkOnOff ns rPr (elemName ns "w" "b")
@@ -919,32 +1062,31 @@ elemToRunStyle ns element parentStyle
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, rVertAlign =
- findChild (elemName ns "w" "vertAlign") rPr >>=
- findAttr (elemName ns "w" "val") >>=
+ findChildByName ns "w" "vertAlign" rPr >>=
+ findAttrByName ns "w" "val" >>=
\v -> Just $ case v of
"superscript" -> SupScrpt
"subscript" -> SubScrpt
_ -> BaseLn
, rUnderline =
- findChild (elemName ns "w" "u") rPr >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "u" rPr >>=
+ findAttrByName ns "w" "val"
, rStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
-isNumericNotNull :: String -> Bool
-isNumericNotNull str = (str /= []) && (all isDigit str)
-
getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
getHeaderLevel ns element
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, Just index <- stripPrefix "Heading" styleId
- , isNumericNotNull index = Just (styleId, read index)
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
- , Just index <- findChild (elemName ns "w" "name") element >>=
- findAttr (elemName ns "w" "val") >>=
+ , Just n <- stringToInteger index
+ , n > 0 = Just (styleId, fromInteger n)
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just index <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val" >>=
stripPrefix "heading "
- , isNumericNotNull index = Just (styleId, read index)
+ , Just n <- stringToInteger index
+ , n > 0 = Just (styleId, fromInteger n)
getHeaderLevel _ _ = Nothing
blockQuoteStyleIds :: [String]
@@ -955,23 +1097,23 @@ blockQuoteStyleNames = ["Quote", "Block Text"]
getBlockQuote :: NameSpaces -> Element -> Maybe Bool
getBlockQuote ns element
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChild (elemName ns "w" "name") element >>=
- findAttr (elemName ns "w" "val")
+ | Just styleName <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val"
, styleName `elem` blockQuoteStyleNames = Just True
getBlockQuote _ _ = Nothing
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
- let numPr = findChild (elemName ns "w" "pPr") element >>=
- findChild (elemName ns "w" "numPr")
+ let numPr = findChildByName ns "w" "pPr" element >>=
+ findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val"))
+ findChildByName ns "w" "ilvl" >>=
+ findAttrByName ns "w" "val")
numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "numId" >>=
+ findAttrByName ns "w" "val"
return (numId, lvl)
@@ -1015,10 +1157,10 @@ getSymChar ns element
let [(char, _)] = readLitChar ("\\x" ++ s) in
TextRun . maybe "" (:[]) $ getUnicode font char
where
- getCodepoint = findAttr (elemName ns "w" "char") element
- getFont = stringToFont =<< findAttr (elemName ns "w" "font") element
+ getCodepoint = findAttrByName ns "w" "char" element
+ getFont = stringToFont =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
+ lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1029,11 +1171,9 @@ elemToRunElems ns element
let font = do
fontElem <- findElement (qualName "rFonts") element
stringToFont =<<
- (foldr (<|>) Nothing $
- map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+ foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
setFont f s = s{envFont = f}
-
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
index 00906cf07..b32a73770 100644
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -7,11 +7,11 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
, hasStyleName
) where
-import Text.XML.Light
-import Text.Pandoc.Readers.Docx.Util
-import Control.Monad.State
-import Data.Char (toLower)
-import qualified Data.Map as M
+import Control.Monad.State.Strict
+import Data.Char (toLower)
+import qualified Data.Map as M
+import Text.Pandoc.Readers.Docx.Util
+import Text.XML.Light
newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
newtype CharStyleMap = CharStyleMap ( M.Map String String )
@@ -30,7 +30,7 @@ instance StyleMap CharStyleMap where
insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
insert (Just k) (Just v) m = alterMap (M.insert k v) m
-insert _ _ m = m
+insert _ _ m = m
getStyleId :: (StyleMap a) => String -> a -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index 33d69ccf3..d9d65bc07 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -3,10 +3,13 @@ module Text.Pandoc.Readers.Docx.Util (
, elemName
, isElem
, elemToNameSpaces
+ , findChildByName
+ , findChildrenByName
+ , findAttrByName
) where
-import Text.XML.Light
import Data.Maybe (mapMaybe)
+import Text.XML.Light
type NameSpaces = [(String, String)]
@@ -15,7 +18,7 @@ elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
+attrToNSPair _ = Nothing
elemName :: NameSpaces -> String -> String -> QName
elemName ns prefix name =
@@ -23,5 +26,21 @@ elemName ns prefix name =
isElem :: NameSpaces -> String -> String -> Element -> Bool
isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == lookup prefix ns
+ let ns' = ns ++ elemToNameSpaces element
+ in qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns'
+
+findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findChild (elemName ns' pref name) el
+
+findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findChildren (elemName ns' pref name) el
+
+findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findAttr (elemName ns' pref name) el
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 4c31bf1ae..3b13bbe13 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -1,55 +1,54 @@
-{-# LANGUAGE
- ViewPatterns
- , StandaloneDeriving
- , TupleSections
- , FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
module Text.Pandoc.Readers.EPUB
(readEPUB)
where
-import Text.XML.Light
+import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+ toArchiveOrFail)
+import Control.DeepSeq (NFData, deepseq)
+import Control.Monad (guard, liftM)
+import Control.Monad.Except (throwError)
+import qualified Data.ByteString.Lazy as BL (ByteString)
+import Data.List (isInfixOf, isPrefixOf)
+import qualified Data.Map as M (Map, elems, fromList, lookup)
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Monoid ((<>))
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+import Network.URI (unEscapeString)
+import System.FilePath (dropFileName, dropFileName, normalise, splitFileName,
+ takeFileName, (</>))
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, insertMedia)
import Text.Pandoc.Definition hiding (Attr)
-import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Error
-import Text.Pandoc.Walk (walk, query)
-import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
-import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
-import Network.URI (unEscapeString)
-import Text.Pandoc.MediaBag (MediaBag, insertMedia)
-import Control.Monad.Except (MonadError, throwError, runExcept, Except)
+import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
-import qualified Text.Pandoc.Builder as B
-import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry
- , findEntryByPath, Entry)
-import qualified Data.ByteString.Lazy as BL (ByteString)
-import System.FilePath ( takeFileName, (</>), dropFileName, normalise
- , dropFileName
- , splitFileName )
+import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
-import Control.Monad (guard, liftM, when)
-import Data.List (isPrefixOf, isInfixOf)
-import Data.Maybe (mapMaybe, fromMaybe)
-import qualified Data.Map as M (Map, lookup, fromList, elems)
-import Data.Monoid ((<>))
-import Control.DeepSeq (deepseq, NFData)
-
-import Debug.Trace (trace)
+import Text.Pandoc.Walk (query, walk)
+import Text.XML.Light
type Items = M.Map String (FilePath, MimeType)
-readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
+readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB opts bytes = case toArchiveOrFail bytes of
- Right archive -> runEPUB $ archiveToEPUB opts $ archive
- Left _ -> Left $ ParseFailure "Couldn't extract ePub file"
+ Right archive -> archiveToEPUB opts archive
+ Left _ -> throwError $ PandocParseError "Couldn't extract ePub file"
-runEPUB :: Except PandocError a -> Either PandocError a
-runEPUB = runExcept
+-- runEPUB :: Except PandocError a -> Either PandocError a
+-- runEPUB = runExcept
-- Note that internal reference are aggresively normalised so that all ids
-- are of the form "filename#id"
--
-archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
@@ -62,40 +61,36 @@ archiveToEPUB os archive = do
Pandoc _ bs <-
foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
`liftM` parseSpineElem root b) mempty spine
- let ast = coverDoc <> (Pandoc meta bs)
- let mediaBag = fetchImages (M.elems items) root archive ast
- return $ (ast, mediaBag)
+ let ast = coverDoc <> Pandoc meta bs
+ fetchImages (M.elems items) root archive ast
+ return ast
where
- os' = os {readerParseRaw = True}
- parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc
+ os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)}
+ parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
- when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc
+ mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (unEscapeString -> root)
(unEscapeString -> path) = do
fname <- findEntryByPathE (root </> path) archive
- html <- either throwError return .
- readHtml os' .
- UTF8.toStringLazy $
- fromEntry fname
+ html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname
return $ fixInternalReferences path html
mimeToReader s _ (unEscapeString -> path)
| s `elem` imageMimes = return $ imageToPandoc path
- | otherwise = return $ mempty
+ | otherwise = return mempty
-- paths should be absolute when this function is called
-- renameImages should do this
-fetchImages :: [(FilePath, MimeType)]
+fetchImages :: PandocMonad m
+ => [(FilePath, MimeType)]
-> FilePath -- ^ Root
-> Archive
-> Pandoc
- -> MediaBag
+ -> m ()
fetchImages mimes root arc (query iq -> links) =
- foldr (uncurry3 insertMedia) mempty
- (mapMaybe getEntry links)
+ mapM_ (uncurry3 insertMedia) (mapMaybe getEntry links)
where
getEntry link =
let abslink = normalise (root </> link) in
@@ -104,7 +99,7 @@ fetchImages mimes root arc (query iq -> links) =
iq :: Inline -> [FilePath]
iq (Image _ _ (url, _)) = [url]
-iq _ = []
+iq _ = []
-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
@@ -121,13 +116,13 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
-parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items)
parseManifest content = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
r <- mapM parseItem items
let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
- return (cover, (M.fromList r))
+ return (cover, M.fromList r)
where
findCover e = maybe False (isInfixOf "cover-image")
(findAttr (emptyName "properties") e)
@@ -137,18 +132,18 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
-parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)]
+parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
- mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs
+ mapM (mkE "parseSpine" . flip M.lookup is) $ mapMaybe parseItemRef itemRefs
where
parseItemRef ref = do
let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref)
guard linear
findAttr (emptyName "idref") ref
-parseMeta :: MonadError PandocError m => Element -> m Meta
+parseMeta :: PandocMonad m => Element -> m Meta
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
@@ -164,29 +159,29 @@ parseMetaItem e@(stripNamespace . elName -> field) meta =
renameMeta :: String -> String
renameMeta "creator" = "author"
-renameMeta s = s
+renameMeta s = s
-getManifest :: MonadError PandocError m => Archive -> m (String, Element)
+getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
- as <- liftM ((map attrToPair) . elAttribs)
+ as <- fmap (map attrToPair . elAttribs)
(findElementE (QName "rootfile" (Just ns) Nothing) docElem)
manifestFile <- mkE "Root not found" (lookup "full-path" as)
let rootdir = dropFileName manifestFile
--mime <- lookup "media-type" as
manifest <- findEntryByPathE manifestFile archive
- liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+ fmap ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
-- Fixup
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
- (walk $ renameImages root)
- . (walk $ fixBlockIRs filename)
- . (walk $ fixInlineIRs filename)
+ walk (renameImages root)
+ . walk (fixBlockIRs filename)
+ . walk (fixInlineIRs filename)
where
(root, escapeURI -> filename) = splitFileName pathToFile
@@ -221,7 +216,7 @@ fixAttrs :: FilePath -> B.Attr -> B.Attr
fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
addHash :: String -> String -> String
-addHash _ "" = ""
+addHash _ "" = ""
addHash s ident = takeFileName s ++ "#" ++ ident
removeEPUBAttrs :: [(String, String)] -> [(String, String)]
@@ -242,9 +237,6 @@ foldM' f z (x:xs) = do
uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 f (a, b, c) = f a b c
-traceM :: Monad m => String -> m ()
-traceM = flip trace (return ())
-
-- Utility
stripNamespace :: QName -> String
@@ -252,7 +244,7 @@ stripNamespace (QName v _ _) = v
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
-attrToNSPair _ = Nothing
+attrToNSPair _ = Nothing
attrToPair :: Attr -> (String, String)
attrToPair (Attr (QName name _ _) val) = (name, val)
@@ -268,18 +260,18 @@ emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: MonadError PandocError m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
-findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry
+findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
-parseXMLDocE :: MonadError PandocError m => String -> m Element
+parseXMLDocE :: PandocMonad m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
-findElementE :: MonadError PandocError m => QName -> Element -> m Element
+findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
-mkE :: MonadError PandocError m => String -> Maybe a -> m a
-mkE s = maybe (throwError . ParseFailure $ s) return
+mkE :: PandocMonad m => String -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index abe5f66ce..0e79f9ec3 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,7 +1,10 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
-ViewPatterns#-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -34,66 +37,79 @@ module Text.Pandoc.Readers.HTML ( readHtml
, htmlInBalanced
, isInlineTag
, isBlockTag
+ , NamedTag(..)
, isTextTag
, isCommentTag
) where
+import Control.Applicative ((<|>))
+import Control.Arrow (first)
+import Control.Monad (guard, mplus, msum, mzero, unless, void)
+import Control.Monad.Except (throwError)
+import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT)
+import Data.Char (isAlphaNum, isDigit, isLetter)
+import Data.Default (Default (..), def)
+import Data.Foldable (for_)
+import Data.List (isPrefixOf)
+import Data.List.Split (wordsBy, splitWhen)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Monoid (First (..), (<>))
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI (URI, nonStrictRelativeTo, parseURIReference)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
-import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
-import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
- , escapeURI, safeRead, mapLeft )
-import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
- , Extension (Ext_epub_html_exts,
- Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.Options (
+ Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
+ Ext_native_spans, Ext_raw_html, Ext_line_blocks),
+ ReaderOptions (readerExtensions, readerStripComments),
+ extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
+import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
+ extractSpaces, safeRead, underlineSpan)
import Text.Pandoc.Walk
-import qualified Data.Map as M
-import Data.Maybe ( fromMaybe, isJust)
-import Data.List ( intercalate, isInfixOf, isPrefixOf )
-import Data.Char ( isDigit )
-import Control.Monad ( guard, when, mzero, void, unless )
-import Control.Arrow ((***))
-import Control.Applicative ( (<|>) )
-import Data.Monoid (First (..))
-import Text.Printf (printf)
-import Debug.Trace (trace)
-import Text.TeXMath (readMathML, writeTeX)
-import Data.Default (Default (..), def)
-import Control.Monad.Reader (Reader,ask, asks, local, runReader)
-import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
-import Text.Pandoc.Error
-import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
-import Data.Monoid ((<>))
import Text.Parsec.Error
-import qualified Data.Set as Set
+import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Either PandocError Pandoc
-readHtml opts inp =
- mapLeft (ParseFailure . getError) . flip runReader def $
- runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
- "source" tags
- where tags = stripPrefixes . canonicalizeTags $
- parseTagsOptions parseOptions{ optTagPosition = True } inp
- parseDoc = do
- blocks <- (fixPlains False) . mconcat <$> manyTill block eof
- meta <- stateMeta . parserState <$> getState
- bs' <- replaceNotes (B.toList blocks)
- return $ Pandoc meta bs'
- getError (errorMessages -> ms) = case ms of
- [] -> ""
- (m:_) -> messageString m
-
-replaceNotes :: [Block] -> TagParser [Block]
+readHtml :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m Pandoc
+readHtml opts inp = do
+ let tags = stripPrefixes . canonicalizeTags $
+ parseTagsOptions parseOptions{ optTagPosition = True }
+ (crFilter inp)
+ parseDoc = do
+ blocks <- fixPlains False . mconcat <$> manyTill block eof
+ meta <- stateMeta . parserState <$> getState
+ bs' <- replaceNotes (B.toList blocks)
+ reportLogMessages
+ return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
+ result <- flip runReaderT def $
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+ "source" tags
+ case result of
+ Right doc -> return doc
+ Left err -> throwError $ PandocParseError $ getError err
+
+replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes = walkM replaceNotes'
-replaceNotes' :: Inline -> TagParser Inline
+replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline
replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
where
getNotes = noteTable <$> getState
@@ -105,38 +121,46 @@ data HTMLState =
noteTable :: [(String, Blocks)],
baseHref :: Maybe URI,
identifiers :: Set.Set String,
- headerMap :: M.Map Inlines String
+ headerMap :: M.Map Inlines String,
+ logMessages :: [LogMessage]
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
- , inChapter :: Bool -- ^ Set if in chapter section
- , inPlain :: Bool -- ^ Set if in pPlain
+ , inChapter :: Bool -- ^ Set if in chapter section
+ , inPlain :: Bool -- ^ Set if in pPlain
}
-setInChapter :: HTMLParser s a -> HTMLParser s a
+setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInChapter = local (\s -> s {inChapter = True})
-setInPlain :: HTMLParser s a -> HTMLParser s a
+setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInPlain = local (\s -> s {inPlain = True})
-type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
-type TagParser = HTMLParser [Tag String]
+type TagParser m = HTMLParser m [Tag Text]
-pBody :: TagParser Blocks
+pHtml :: PandocMonad m => TagParser m Blocks
+pHtml = try $ do
+ (TagOpen "html" attr) <- lookAhead pAnyTag
+ for_ (lookup "lang" attr) $
+ updateState . B.setMeta "lang" . B.text . T.unpack
+ pInTags "html" block
+
+pBody :: PandocMonad m => TagParser m Blocks
pBody = pInTags "body" block
-pHead :: TagParser Blocks
+pHead :: PandocMonad m => TagParser m Blocks
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
where pTitle = pInTags "title" inline >>= setTitle . trimInlines
- setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
+ setTitle t = mempty <$ updateState (B.setMeta "title" t)
pMetaTag = do
- mt <- pSatisfy (~== TagOpen "meta" [])
- let name = fromAttrib "name" mt
+ mt <- pSatisfy (matchTagOpen "meta" [])
+ let name = T.unpack $ fromAttrib "name" mt
if null name
then return mempty
else do
- let content = fromAttrib "content" mt
+ let content = T.unpack $ fromAttrib "content" mt
updateState $ \s ->
let ps = parserState s in
s{ parserState = ps{
@@ -144,15 +168,13 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
(stateMeta ps) } }
return mempty
pBaseTag = do
- bt <- pSatisfy (~== TagOpen "base" [])
+ bt <- pSatisfy (matchTagOpen "base" [])
updateState $ \st -> st{ baseHref =
- parseURIReference $ fromAttrib "href" bt }
+ parseURIReference $ T.unpack $ fromAttrib "href" bt }
return mempty
-block :: TagParser Blocks
+block :: PandocMonad m => TagParser m Blocks
block = do
- tr <- getOption readerTrace
- pos <- getPosition
res <- choice
[ eSection
, eSwitch B.para block
@@ -166,94 +188,107 @@ block = do
, pList
, pHrule
, pTable
+ , pHtml
, pHead
, pBody
+ , pLineBlock
, pDiv
, pPlain
+ , pFigure
, pRawHtmlBlock
]
- when tr $ trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList res)) (return ())
+ trace (take 60 $ show $ B.toList res)
return res
-namespaces :: [(String, TagParser Inlines)]
+namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
mathMLNamespace :: String
mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
-eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a
+eSwitch :: (PandocMonad m, Monoid a)
+ => (Inlines -> a)
+ -> TagParser m a
+ -> TagParser m a
eSwitch constructor parser = try $ do
guardEnabled Ext_epub_html_exts
- pSatisfy (~== TagOpen "switch" [])
+ pSatisfy (matchTagOpen "switch" [])
cases <- getFirst . mconcat <$>
manyTill (First <$> (eCase <* skipMany pBlank) )
- (lookAhead $ try $ pSatisfy (~== TagOpen "default" []))
+ (lookAhead $ try $ pSatisfy (matchTagOpen "default" []))
skipMany pBlank
fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank)
skipMany pBlank
- pSatisfy (~== TagClose "switch")
+ pSatisfy (matchTagClose "switch")
return $ maybe fallback constructor cases
-eCase :: TagParser (Maybe Inlines)
+eCase :: PandocMonad m => TagParser m (Maybe Inlines)
eCase = do
skipMany pBlank
- TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
- case (flip lookup namespaces) =<< lookup "required-namespace" attr of
- Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
- Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
+ TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "case" [])
+ let attr = toStringAttr attr'
+ case flip lookup namespaces =<< lookup "required-namespace" attr of
+ Just p -> Just <$> pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)
+ Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case"))
-eFootnote :: TagParser ()
+eFootnote :: PandocMonad m => TagParser m ()
eFootnote = try $ do
let notes = ["footnote", "rearnote"]
guardEnabled Ext_epub_html_exts
- (TagOpen tag attr) <- lookAhead $ pAnyTag
- guard (maybe False (flip elem notes) (lookup "type" attr))
+ (TagOpen tag attr') <- lookAhead pAnyTag
+ let attr = toStringAttr attr'
+ guard $ maybe False (`elem` notes) (lookup "type" attr)
let ident = fromMaybe "" (lookup "id" attr)
content <- pInTags tag block
addNote ident content
-addNote :: String -> Blocks -> TagParser ()
-addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
+addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
+addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})
-eNoteref :: TagParser Inlines
+eNoteref :: PandocMonad m => TagParser m Inlines
eNoteref = try $ do
guardEnabled Ext_epub_html_exts
- TagOpen tag attr <- lookAhead $ pAnyTag
- guard (maybe False (== "noteref") (lookup "type" attr))
+ TagOpen tag attr' <- lookAhead pAnyTag
+ let attr = toStringAttr attr'
+ guard $ lookup "type" attr == Just "noteref"
let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
guard (not (null ident))
pInTags tag block
return $ B.rawInline "noteref" ident
-- Strip TOC if there is one, better to generate again
-eTOC :: TagParser ()
+eTOC :: PandocMonad m => TagParser m ()
eTOC = try $ do
guardEnabled Ext_epub_html_exts
- (TagOpen tag attr) <- lookAhead $ pAnyTag
- guard (maybe False (== "toc") (lookup "type" attr))
+ (TagOpen tag attr) <- lookAhead pAnyTag
+ guard $ lookup "type" attr == Just "toc"
void (pInTags tag block)
-pList :: TagParser Blocks
+pList :: PandocMonad m => TagParser m Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
-pBulletList :: TagParser Blocks
+pBulletList :: PandocMonad m => TagParser m Blocks
pBulletList = try $ do
- pSatisfy (~== TagOpen "ul" [])
+ pSatisfy (matchTagOpen "ul" [])
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
- not (t ~== TagClose "ul"))
+ not (matchTagClose "ul" t))
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pListItem nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items
-pListItem :: TagParser a -> TagParser Blocks
+pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
pListItem nonItem = do
- TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
- let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
- (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+ TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" [])
+ let attr = toStringAttr attr'
+ let addId ident bs = case B.toList bs of
+ (Plain ils:xs) -> B.fromList (Plain
+ [Span (ident, [], []) ils] : xs)
+ _ -> B.divWith (ident, [], []) bs
+ maybe id addId (lookup "id" attr) <$>
+ pInTags "li" block <* skipMany nonItem
parseListStyleType :: String -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman
@@ -271,9 +306,10 @@ parseTypeAttr "A" = UpperAlpha
parseTypeAttr "1" = Decimal
parseTypeAttr _ = DefaultStyle
-pOrderedList :: TagParser Blocks
+pOrderedList :: PandocMonad m => TagParser m Blocks
pOrderedList = try $ do
- TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
+ TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" [])
+ let attribs = toStringAttr attribs'
let (start, style) = (sta', sty')
where sta = fromMaybe "1" $
lookup "start" attribs
@@ -295,23 +331,23 @@ pOrderedList = try $ do
]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
- not (t ~== TagClose "ol"))
+ not (matchTagClose "ol" t))
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pListItem nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
-pDefinitionList :: TagParser Blocks
+pDefinitionList :: PandocMonad m => TagParser m Blocks
pDefinitionList = try $ do
- pSatisfy (~== TagOpen "dl" [])
+ pSatisfy (matchTagOpen "dl" [])
items <- manyTill pDefListItem (pCloses "dl")
return $ B.definitionList items
-pDefListItem :: TagParser (Inlines, [Blocks])
+pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks])
pDefListItem = try $ do
- let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
- not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
+ let nonItem = pSatisfy (\t -> not (matchTagOpen "dt" [] t) &&
+ not (matchTagOpen "dd" [] t) && not (matchTagClose "dl" t))
terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
@@ -322,133 +358,165 @@ fixPlains :: Bool -> Blocks -> Blocks
fixPlains inList bs = if any isParaish bs'
then B.fromList $ map plainToPara bs'
else bs
- where isParaish (Para _) = True
- isParaish (CodeBlock _ _) = True
- isParaish (Header _ _ _) = True
- isParaish (BlockQuote _) = True
- isParaish (BulletList _) = not inList
- isParaish (OrderedList _ _) = not inList
- isParaish (DefinitionList _) = not inList
- isParaish _ = False
+ where isParaish Para{} = True
+ isParaish CodeBlock{} = True
+ isParaish Header{} = True
+ isParaish BlockQuote{} = True
+ isParaish BulletList{} = not inList
+ isParaish OrderedList{} = not inList
+ isParaish DefinitionList{} = not inList
+ isParaish _ = False
plainToPara (Plain xs) = Para xs
- plainToPara x = x
+ plainToPara x = x
bs' = B.toList bs
-pRawTag :: TagParser String
+pRawTag :: PandocMonad m => TagParser m Text
pRawTag = do
tag <- pAnyTag
let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
if tagOpen ignorable (const True) tag || tagClose ignorable tag
- then return []
+ then return mempty
else return $ renderTags' [tag]
-pDiv :: TagParser Blocks
+pLineBlock :: PandocMonad m => TagParser m Blocks
+pLineBlock = try $ do
+ guardEnabled Ext_line_blocks
+ _ <- pSatisfy $ tagOpen (=="div") (== [("class","line-block")])
+ ils <- trimInlines . mconcat <$> manyTill inline (pSatisfy (tagClose (=="div")))
+ let lns = map B.fromList $
+ splitWhen (== LineBreak) $ filter (/= SoftBreak) $
+ B.toList ils
+ return $ B.lineBlock lns
+
+pDiv :: PandocMonad m => TagParser m Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
- let isDivLike "div" = True
+ let isDivLike "div" = True
isDivLike "section" = True
- isDivLike _ = False
- TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
+ isDivLike "main" = True
+ isDivLike _ = False
+ TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
+ let attr = toStringAttr attr'
contents <- pInTags tag block
let (ident, classes, kvs) = mkAttr attr
let classes' = if tag == "section"
then "section":classes
else classes
- return $ B.divWith (ident, classes', kvs) contents
+ kvs' = if tag == "main" && isNothing (lookup "role" kvs)
+ then ("role", "main"):kvs
+ else kvs
+ return $ B.divWith (ident, classes', kvs') contents
-pRawHtmlBlock :: TagParser Blocks
+pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
- raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
- parseRaw <- getOption readerParseRaw
- if parseRaw && not (null raw)
+ raw <- T.unpack <$> (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag)
+ exts <- getOption readerExtensions
+ if extensionEnabled Ext_raw_html exts && not (null raw)
then return $ B.rawBlock "html" raw
- else return mempty
+ else ignore raw
+
+ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
+ignore raw = do
+ pos <- getPosition
+ -- raw can be null for tags like <!DOCTYPE>; see paRawTag
+ -- in this case we don't want a warning:
+ unless (null raw) $
+ logMessage $ SkippedContent raw pos
+ return mempty
-pHtmlBlock :: String -> TagParser String
+pHtmlBlock :: PandocMonad m => Text -> TagParser m Text
pHtmlBlock t = try $ do
- open <- pSatisfy (~== TagOpen t [])
- contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
- return $ renderTags' $ [open] ++ contents ++ [TagClose t]
+ open <- pSatisfy (matchTagOpen t [])
+ contents <- manyTill pAnyTag (pSatisfy (matchTagClose t))
+ return $ renderTags' $ [open] <> contents <> [TagClose t]
-- Sets chapter context
-eSection :: TagParser Blocks
+eSection :: PandocMonad m => TagParser m Blocks
eSection = try $ do
- let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
+ let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as)
let sectTag = tagOpen (`elem` sectioningContent) matchChapter
TagOpen tag _ <- lookAhead $ pSatisfy sectTag
setInChapter (pInTags tag block)
-headerLevel :: String -> TagParser Int
-headerLevel tagtype = do
- let level = read (drop 1 tagtype)
- (try $ do
- guardEnabled Ext_epub_html_exts
- asks inChapter >>= guard
- return (level - 1))
- <|>
- return level
-
-eTitlePage :: TagParser ()
+headerLevel :: PandocMonad m => Text -> TagParser m Int
+headerLevel tagtype =
+ case safeRead (T.unpack (T.drop 1 tagtype)) of
+ Just level ->
+-- try (do
+-- guardEnabled Ext_epub_html_exts
+-- asks inChapter >>= guard
+-- return (level - 1))
+-- <|>
+ return level
+ Nothing -> fail "Could not retrieve header level"
+
+eTitlePage :: PandocMonad m => TagParser m ()
eTitlePage = try $ do
- let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as)
+ let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as)
let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")
isTitlePage
TagOpen tag _ <- lookAhead $ pSatisfy groupTag
() <$ pInTags tag block
-pHeader :: TagParser Blocks
+pHeader :: PandocMonad m => TagParser m Blocks
pHeader = try $ do
- TagOpen tagtype attr <- pSatisfy $
+ TagOpen tagtype attr' <- pSatisfy $
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
(const True)
- let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
+ let attr = toStringAttr attr'
+ let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
+ [("class","title")]
level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
- attr' <- registerHeader (ident, classes, keyvals) contents
+ attr'' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
then mempty -- skip a representation of the title in the body
- else B.headerWith attr' level contents
+ else B.headerWith attr'' level contents
-pHrule :: TagParser Blocks
+pHrule :: PandocMonad m => TagParser m Blocks
pHrule = do
pSelfClosing (=="hr") (const True)
return B.horizontalRule
-pTable :: TagParser Blocks
+pTable :: PandocMonad m => TagParser m Blocks
pTable = try $ do
- TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
+ TagOpen _ _ <- pSatisfy (matchTagOpen "table" [])
skipMany pBlank
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
let pTh = option [] $ pInTags "tr" (pCell "th")
- pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
- pTBody = do pOptInTag "tbody" $ many1 pTr
+ pTr = try $ skipMany pBlank >>
+ pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = pOptInTag "tbody" $ many1 pTr
head'' <- pOptInTag "thead" pTh
- head' <- pOptInTag "tbody" $ do
- if null head''
- then pTh
- else return head''
+ head' <- map snd <$>
+ pOptInTag "tbody"
+ (if null head'' then pTh else return head'')
rowsLs <- many pTBody
rows' <- pOptInTag "tfoot" $ many pTr
- TagClose _ <- pSatisfy (~== TagClose "table")
- let rows'' = (concat rowsLs) ++ rows'
+ TagClose _ <- pSatisfy (matchTagClose "table")
+ let rows'' = concat rowsLs <> rows'
+ let rows''' = map (map snd) rows''
+ -- let rows''' = map (map snd) rows''
-- fail on empty table
- guard $ not $ null head' && null rows''
+ guard $ not $ null head' && null rows'''
let isSinglePlain x = case B.toList x of
[] -> True
[Plain _] -> True
_ -> False
- let isSimple = all isSinglePlain $ concat (head':rows'')
- let cols = length $ if null head' then head rows'' else head'
+ let isSimple = all isSinglePlain $ concat (head':rows''')
+ let cols = length $ if null head' then head rows''' else head'
-- add empty cells to short rows
let addEmpties r = case cols - length r of
- n | n > 0 -> r ++ replicate n mempty
+ n | n > 0 -> r <> replicate n mempty
| otherwise -> r
- let rows = map addEmpties rows''
- let aligns = replicate cols AlignDefault
+ let rows = map addEmpties rows'''
+ let aligns = case rows'' of
+ (cs:_) -> map fst cs
+ _ -> replicate cols AlignDefault
let widths = if null widths'
then if isSimple
then replicate cols 0
@@ -456,80 +524,120 @@ pTable = try $ do
else widths'
return $ B.table caption (zip aligns widths) head' rows
-pCol :: TagParser Double
+pCol :: PandocMonad m => TagParser m Double
pCol = try $ do
- TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
+ TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
+ let attribs = toStringAttr attribs'
skipMany pBlank
- optional $ pSatisfy (~== TagClose "col")
+ optional $ pSatisfy (matchTagClose "col")
skipMany pBlank
- return $ case lookup "width" attribs of
+ let width = case lookup "width" attribs of
Nothing -> case lookup "style" attribs of
Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
- fromMaybe 0.0 $ safeRead ('0':'.':filter
- (`notElem` " \t\r\n%'\";") xs)
+ fromMaybe 0.0 $ safeRead (filter
+ (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
_ -> 0.0
Just x | not (null x) && last x == '%' ->
- fromMaybe 0.0 $ safeRead ('0':'.':init x)
+ fromMaybe 0.0 $ safeRead (init x)
_ -> 0.0
+ if width > 0.0
+ then return $ width / 100.0
+ else return 0.0
-pColgroup :: TagParser [Double]
+pColgroup :: PandocMonad m => TagParser m [Double]
pColgroup = try $ do
- pSatisfy (~== TagOpen "colgroup" [])
+ pSatisfy (matchTagOpen "colgroup" [])
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
-noColOrRowSpans :: Tag String -> Bool
+noColOrRowSpans :: Tag Text -> Bool
noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
where isNullOrOne x = case fromAttrib x t of
"" -> True
"1" -> True
_ -> False
-pCell :: String -> TagParser [Blocks]
+pCell :: PandocMonad m => Text -> TagParser m [(Alignment, Blocks)]
pCell celltype = try $ do
skipMany pBlank
+ tag <- lookAhead $
+ pSatisfy (\t -> t ~== TagOpen celltype [] && noColOrRowSpans t)
+ let extractAlign' [] = ""
+ extractAlign' ("text-align":x:_) = x
+ extractAlign' (_:xs) = extractAlign' xs
+ let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':'])
+ let align = case maybeFromAttrib "align" tag `mplus`
+ (extractAlign <$> maybeFromAttrib "style" tag) of
+ Just "left" -> AlignLeft
+ Just "right" -> AlignRight
+ Just "center" -> AlignCenter
+ _ -> AlignDefault
res <- pInTags' celltype noColOrRowSpans block
skipMany pBlank
- return [res]
+ return [(align, res)]
-pBlockQuote :: TagParser Blocks
+pBlockQuote :: PandocMonad m => TagParser m Blocks
pBlockQuote = do
contents <- pInTags "blockquote" block
return $ B.blockQuote $ fixPlains False contents
-pPlain :: TagParser Blocks
+pPlain :: PandocMonad m => TagParser m Blocks
pPlain = do
contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
if B.isNull contents
then return mempty
else return $ B.plain contents
-pPara :: TagParser Blocks
+pPara :: PandocMonad m => TagParser m Blocks
pPara = do
contents <- trimInlines <$> pInTags "p" inline
- return $ B.para contents
+ (do guardDisabled Ext_empty_paragraphs
+ guard (B.isNull contents)
+ return mempty)
+ <|> return (B.para contents)
+
+pFigure :: PandocMonad m => TagParser m Blocks
+pFigure = try $ do
+ TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
+ skipMany pBlank
+ let pImg = (\x -> (Just x, Nothing)) <$>
+ (pOptInTag "p" pImage <* skipMany pBlank)
+ pCapt = (\x -> (Nothing, Just x)) <$> do
+ bs <- pInTags "figcaption" block
+ return $ blocksToInlines' $ B.toList bs
+ pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
+ res <- many (pImg <|> pCapt <|> pSkip)
+ let mbimg = msum $ map fst res
+ let mbcap = msum $ map snd res
+ TagClose _ <- pSatisfy (matchTagClose "figure")
+ let caption = fromMaybe mempty mbcap
+ case B.toList <$> mbimg of
+ Just [Image attr _ (url, tit)] ->
+ return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption
+ _ -> mzero
-pCodeBlock :: TagParser Blocks
+pCodeBlock :: PandocMonad m => TagParser m Blocks
pCodeBlock = try $ do
- TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
+ TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
+ let attr = toStringAttr attr'
contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
let rawText = concatMap tagToString contents
-- drop leading newline if any
let result' = case rawText of
- '\n':xs -> xs
- _ -> rawText
+ '\n':xs -> xs
+ _ -> rawText
-- drop trailing newline if any
let result = case reverse result' of
- '\n':_ -> init result'
- _ -> result'
+ '\n':_ -> init result'
+ _ -> result'
return $ B.codeBlockWith (mkAttr attr) result
-tagToString :: Tag String -> String
-tagToString (TagText s) = s
+tagToString :: Tag Text -> String
+tagToString (TagText s) = T.unpack s
tagToString (TagOpen "br" _) = "\n"
-tagToString _ = ""
+tagToString _ = ""
-inline :: TagParser Inlines
+inline :: PandocMonad m => TagParser m Inlines
inline = choice
[ eNoteref
, eSwitch id inline
@@ -540,6 +648,7 @@ inline = choice
, pSuperscript
, pSubscript
, pStrikeout
+ , pUnderline
, pLineBreak
, pLink
, pImage
@@ -549,30 +658,31 @@ inline = choice
, pRawHtmlInline
]
-pLocation :: TagParser ()
+pLocation :: PandocMonad m => TagParser m ()
pLocation = do
(TagPosition r c) <- pSat isTagPosition
setPosition $ newPos "input" r c
-pSat :: (Tag String -> Bool) -> TagParser (Tag String)
+pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat f = do
pos <- getPosition
token show (const pos) (\x -> if f x then Just x else Nothing)
-pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String)
+pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy f = try $ optional pLocation >> pSat f
-pAnyTag :: TagParser (Tag String)
+pAnyTag :: PandocMonad m => TagParser m (Tag Text)
pAnyTag = pSatisfy (const True)
-pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool)
- -> TagParser (Tag String)
+pSelfClosing :: PandocMonad m
+ => (Text -> Bool) -> ([Attribute Text] -> Bool)
+ -> TagParser m (Tag Text)
pSelfClosing f g = do
open <- pSatisfy (tagOpen f g)
optional $ pSatisfy (tagClose f)
return open
-pQ :: TagParser Inlines
+pQ :: PandocMonad m => TagParser m Inlines
pQ = do
context <- asks quoteContext
let quoteType = case context of
@@ -587,45 +697,50 @@ pQ = do
withQuoteContext innerQuoteContext $
pInlinesInTags "q" constructor
-pEmph :: TagParser Inlines
+pEmph :: PandocMonad m => TagParser m Inlines
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
-pStrong :: TagParser Inlines
+pStrong :: PandocMonad m => TagParser m Inlines
pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
-pSuperscript :: TagParser Inlines
+pSuperscript :: PandocMonad m => TagParser m Inlines
pSuperscript = pInlinesInTags "sup" B.superscript
-pSubscript :: TagParser Inlines
+pSubscript :: PandocMonad m => TagParser m Inlines
pSubscript = pInlinesInTags "sub" B.subscript
-pStrikeout :: TagParser Inlines
-pStrikeout = do
+pStrikeout :: PandocMonad m => TagParser m Inlines
+pStrikeout =
pInlinesInTags "s" B.strikeout <|>
pInlinesInTags "strike" B.strikeout <|>
pInlinesInTags "del" B.strikeout <|>
- try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
+ try (do pSatisfy (matchTagOpen "span" [("class","strikeout")])
contents <- mconcat <$> manyTill inline (pCloses "span")
return $ B.strikeout contents)
-pLineBreak :: TagParser Inlines
+pUnderline :: PandocMonad m => TagParser m Inlines
+pUnderline = pInlinesInTags "u" underlineSpan <|> pInlinesInTags "ins" underlineSpan
+
+pLineBreak :: PandocMonad m => TagParser m Inlines
pLineBreak = do
pSelfClosing (=="br") (const True)
return B.linebreak
-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
-maybeFromAttrib :: String -> Tag String -> Maybe String
-maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
+maybeFromAttrib :: String -> Tag Text -> Maybe String
+maybeFromAttrib name (TagOpen _ attrs) =
+ T.unpack <$> lookup (T.pack name) attrs
maybeFromAttrib _ _ = Nothing
-pLink :: TagParser Inlines
+pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
- let title = fromAttrib "title" tag
+ let title = T.unpack $ fromAttrib "title" tag
-- take id from id attribute if present, otherwise name
- let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag
- let cls = words $ fromAttrib "class" tag
+ let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ maybeFromAttrib "id" tag
+ let cls = words $ T.unpack $ fromAttrib "class" tag
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
@@ -639,128 +754,149 @@ pLink = try $ do
_ -> url'
return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
-pImage :: TagParser Inlines
+pImage :: PandocMonad m => TagParser m Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
mbBaseHref <- baseHref <$> getState
- let url' = fromAttrib "src" tag
+ let url' = T.unpack $ fromAttrib "src" tag
let url = case (parseURIReference url', mbBaseHref) of
(Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
_ -> url'
- let title = fromAttrib "title" tag
- let alt = fromAttrib "alt" tag
- let uid = fromAttrib "id" tag
- let cls = words $ fromAttrib "class" tag
+ let title = T.unpack $ fromAttrib "title" tag
+ let alt = T.unpack $ fromAttrib "alt" tag
+ let uid = T.unpack $ fromAttrib "id" tag
+ let cls = words $ T.unpack $ fromAttrib "class" tag
let getAtt k = case fromAttrib k tag of
"" -> []
- v -> [(k, v)]
- let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
+ v -> [(T.unpack k, T.unpack v)]
+ let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
-pCode :: TagParser Inlines
+pCode :: PandocMonad m => TagParser m Inlines
pCode = try $ do
- (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
+ (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
+ let attr = toStringAttr attr'
result <- manyTill pAnyTag (pCloses open)
- return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
+ return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $
+ innerText result
-pSpan :: TagParser Inlines
+pSpan :: PandocMonad m => TagParser m Inlines
pSpan = try $ do
guardEnabled Ext_native_spans
- TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+ TagOpen _ attr' <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+ let attr = toStringAttr attr'
contents <- pInTags "span" inline
- let isSmallCaps = fontVariant == "small-caps"
+ let isSmallCaps = fontVariant == "small-caps" || "smallcaps" `elem` classes
where styleAttr = fromMaybe "" $ lookup "style" attr
fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ classes = fromMaybe [] $
+ words <$> lookup "class" attr
let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
return $ tag contents
-pRawHtmlInline :: TagParser Inlines
+pRawHtmlInline :: PandocMonad m => TagParser m Inlines
pRawHtmlInline = do
inplain <- asks inPlain
result <- pSatisfy (tagComment (const True))
<|> if inplain
then pSatisfy (not . isBlockTag)
else pSatisfy isInlineTag
- parseRaw <- getOption readerParseRaw
- if parseRaw
- then return $ B.rawInline "html" $ renderTags' [result]
- else return mempty
+ exts <- getOption readerExtensions
+ let raw = T.unpack $ renderTags' [result]
+ if extensionEnabled Ext_raw_html exts
+ then return $ B.rawInline "html" raw
+ else ignore raw
mathMLToTeXMath :: String -> Either String String
mathMLToTeXMath s = writeTeX <$> readMathML s
-pMath :: Bool -> TagParser Inlines
+toStringAttr :: [(Text, Text)] -> [(String, String)]
+toStringAttr = map go
+ where go (x,y) = (T.unpack x, T.unpack y)
+
+pMath :: PandocMonad m => Bool -> TagParser m Inlines
pMath inCase = try $ do
- open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
+ open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True)
-- we'll assume math tags are MathML unless specially marked
-- otherwise...
+ let attr = toStringAttr attr'
unless inCase $
guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
- contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math"))
- case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of
+ contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math"))
+ case mathMLToTeXMath (T.unpack $ renderTags $
+ [open] <> contents <> [TagClose "math"]) of
Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $
- innerText contents
+ T.unpack $ innerText contents
Right [] -> return mempty
Right x -> return $ case lookup "display" attr of
Just "block" -> B.displayMath x
_ -> B.math x
-pInlinesInTags :: String -> (Inlines -> Inlines)
- -> TagParser Inlines
+pInlinesInTags :: PandocMonad m => Text -> (Inlines -> Inlines)
+ -> TagParser m Inlines
pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
-pInTags :: (Monoid a) => String -> TagParser a -> TagParser a
+pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a
pInTags tagtype parser = pInTags' tagtype (const True) parser
-pInTags' :: (Monoid a) => String -> (Tag String -> Bool) -> TagParser a
- -> TagParser a
+pInTags' :: (PandocMonad m, Monoid a)
+ => Text
+ -> (Tag Text -> Bool)
+ -> TagParser m a
+ -> TagParser m a
pInTags' tagtype tagtest parser = try $ do
pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-- parses p, preceeded by an optional opening tag
-- and followed by an optional closing tags
-pOptInTag :: String -> TagParser a -> TagParser a
+pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a
pOptInTag tagtype p = try $ do
skipMany pBlank
- optional $ pSatisfy (~== TagOpen tagtype [])
+ optional $ pSatisfy (matchTagOpen tagtype [])
skipMany pBlank
x <- p
skipMany pBlank
- optional $ pSatisfy (~== TagClose tagtype)
+ optional $ pSatisfy (matchTagClose tagtype)
skipMany pBlank
return x
-pCloses :: String -> TagParser ()
+pCloses :: PandocMonad m => Text -> TagParser m ()
pCloses tagtype = try $ do
t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
case t of
- (TagClose t') | t' == tagtype -> pAnyTag >> return ()
+ (TagClose t') | t' == tagtype -> void pAnyTag
(TagOpen t' _) | t' `closes` tagtype -> return ()
(TagClose "ul") | tagtype == "li" -> return ()
(TagClose "ol") | tagtype == "li" -> return ()
(TagClose "dl") | tagtype == "dd" -> return ()
(TagClose "table") | tagtype == "td" -> return ()
(TagClose "table") | tagtype == "tr" -> return ()
+ (TagClose "td") | tagtype `Set.member` blockHtmlTags -> return ()
+ (TagClose "th") | tagtype `Set.member` blockHtmlTags -> return ()
+ (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags
+ -> return () -- see #3794
_ -> mzero
-pTagText :: TagParser Inlines
+pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
- case flip runReader qu $ runParserT (many pTagContents) st "text" str of
- Left _ -> fail $ "Could not parse `" ++ str ++ "'"
+ parsed <- lift $ lift $
+ flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ case parsed of
+ Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'"
Right result -> return $ mconcat result
-pBlank :: TagParser ()
+pBlank :: PandocMonad m => TagParser m ()
pBlank = try $ do
(TagText str) <- pSatisfy isTagText
- guard $ all isSpace str
+ guard $ T.all isSpace str
-type InlinesParser = HTMLParser String
+type InlinesParser m = HTMLParser m Text
-pTagContents :: InlinesParser Inlines
+pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@@ -770,7 +906,7 @@ pTagContents =
<|> pSymbol
<|> pBad
-pStr :: InlinesParser Inlines
+pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
@@ -778,24 +914,24 @@ pStr = do
return $ B.str result
isSpecial :: Char -> Bool
-isSpecial '"' = True
-isSpecial '\'' = True
-isSpecial '.' = True
-isSpecial '-' = True
-isSpecial '$' = True
+isSpecial '"' = True
+isSpecial '\'' = True
+isSpecial '.' = True
+isSpecial '-' = True
+isSpecial '$' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
isSpecial '\8221' = True
-isSpecial _ = False
+isSpecial _ = False
-pSymbol :: InlinesParser Inlines
+pSymbol :: PandocMonad m => InlinesParser m Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: InlinesParser Inlines
+pBad :: PandocMonad m => InlinesParser m Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -829,7 +965,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
-pSpace :: InlinesParser Inlines
+pSpace :: PandocMonad m => InlinesParser m Inlines
pSpace = many1 (satisfy isSpace) >>= \xs ->
if '\n' `elem` xs
then return B.softbreak
@@ -839,86 +975,96 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
-- Constants
--
-eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
- "del", "ins",
- "progress", "map", "area", "noscript", "script",
- "object", "svg", "video", "source"]
-
-{-
-inlineHtmlTags :: [[Char]]
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
- "br", "cite", "code", "dfn", "em", "font", "i", "img",
- "input", "kbd", "label", "q", "s", "samp", "select",
- "small", "span", "strike", "strong", "sub", "sup",
- "textarea", "tt", "u", "var"]
--}
-
-blockHtmlTags :: [String]
-blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
- "blockquote", "body", "button", "canvas",
- "caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "fieldset", "figcaption", "figure",
- "footer", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "head", "header", "hgroup", "hr", "html",
- "isindex", "menu", "noframes", "ol", "output", "p", "pre",
- "section", "table", "tbody", "textarea",
- "thead", "tfoot", "ul", "dd",
- "dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr", "script", "style"]
+eitherBlockOrInline :: Set.Set Text
+eitherBlockOrInline = Set.fromList
+ ["audio", "applet", "button", "iframe", "embed",
+ "del", "ins", "progress", "map", "area", "noscript", "script",
+ "object", "svg", "video", "source"]
+
+blockHtmlTags :: Set.Set Text
+blockHtmlTags = Set.fromList
+ ["?xml", "!DOCTYPE", "address", "article", "aside",
+ "blockquote", "body", "canvas",
+ "caption", "center", "col", "colgroup", "dd", "details",
+ "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure",
+ "footer", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "head", "header", "hgroup", "hr", "html",
+ "isindex", "main", "menu", "meta", "noframes", "ol", "output", "p", "pre",
+ "section", "table", "tbody", "textarea",
+ "thead", "tfoot", "ul", "dd",
+ "dt", "frameset", "li", "tbody", "td", "tfoot",
+ "th", "thead", "tr", "script", "style"]
-- We want to allow raw docbook in markdown documents, so we
-- include docbook block tags here too.
-blockDocBookTags :: [String]
-blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
- "orderedlist", "segmentedlist", "simplelist",
- "variablelist", "caution", "important", "note", "tip",
- "warning", "address", "literallayout", "programlisting",
- "programlistingco", "screen", "screenco", "screenshot",
- "synopsis", "example", "informalexample", "figure",
- "informalfigure", "table", "informaltable", "para",
- "simpara", "formalpara", "equation", "informalequation",
- "figure", "screenshot", "mediaobject", "qandaset",
- "procedure", "task", "cmdsynopsis", "funcsynopsis",
- "classsynopsis", "blockquote", "epigraph", "msgset",
- "sidebar", "title"]
-
-epubTags :: [String]
-epubTags = ["case", "switch", "default"]
-
-blockTags :: [String]
-blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags
-
-isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen isInlineTagName (const True) t ||
- tagClose isInlineTagName t ||
- tagComment (const True) t
- where isInlineTagName x = x `notElem` blockTags
-
-isBlockTag :: Tag String -> Bool
-isBlockTag t = tagOpen isBlockTagName (const True) t ||
- tagClose isBlockTagName t ||
- tagComment (const True) t
- where isBlockTagName ('?':_) = True
- isBlockTagName ('!':_) = True
- isBlockTagName x = x `elem` blockTags
- || x `elem` eitherBlockOrInline
-
-isTextTag :: Tag String -> Bool
+blockDocBookTags :: Set.Set Text
+blockDocBookTags = Set.fromList
+ ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+ "orderedlist", "segmentedlist", "simplelist",
+ "variablelist", "caution", "important", "note", "tip",
+ "warning", "address", "literallayout", "programlisting",
+ "programlistingco", "screen", "screenco", "screenshot",
+ "synopsis", "example", "informalexample", "figure",
+ "informalfigure", "table", "informaltable", "para",
+ "simpara", "formalpara", "equation", "informalequation",
+ "figure", "screenshot", "mediaobject", "qandaset",
+ "procedure", "task", "cmdsynopsis", "funcsynopsis",
+ "classsynopsis", "blockquote", "epigraph", "msgset",
+ "sidebar", "title"]
+
+epubTags :: Set.Set Text
+epubTags = Set.fromList ["case", "switch", "default"]
+
+blockTags :: Set.Set Text
+blockTags = Set.unions [blockHtmlTags, blockDocBookTags, epubTags]
+
+class NamedTag a where
+ getTagName :: a -> Maybe Text
+
+instance NamedTag (Tag Text) where
+ getTagName (TagOpen t _) = Just t
+ getTagName (TagClose t) = Just t
+ getTagName _ = Nothing
+
+instance NamedTag (Tag String) where
+ getTagName (TagOpen t _) = Just (T.pack t)
+ getTagName (TagClose t) = Just (T.pack t)
+ getTagName _ = Nothing
+
+isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
+isInlineTag t = isInlineTagName || isCommentTag t
+ where isInlineTagName = case getTagName t of
+ Just x -> x
+ `Set.notMember` blockTags
+ Nothing -> False
+
+isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+isBlockTag t = isBlockTagName || isTagComment t
+ where isBlockTagName =
+ case getTagName t of
+ Just x
+ | "?" `T.isPrefixOf` x -> True
+ | "!" `T.isPrefixOf` x -> True
+ | otherwise -> x `Set.member` blockTags
+ || x `Set.member` eitherBlockOrInline
+ Nothing -> False
+
+isTextTag :: Tag a -> Bool
isTextTag = tagText (const True)
-isCommentTag :: Tag String -> Bool
+isCommentTag :: Tag a -> Bool
isCommentTag = tagComment (const True)
-- taken from HXT and extended
-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
-closes :: String -> String -> Bool
+closes :: Text -> Text -> Bool
_ `closes` "body" = False
_ `closes` "html" = False
"body" `closes` "head" = True
"a" `closes` "a" = True
"li" `closes` "li" = True
"th" `closes` t | t `elem` ["th","td"] = True
+"td" `closes` t | t `elem` ["th","td"] = True
"tr" `closes` t | t `elem` ["th","td","tr"] = True
"dd" `closes` t | t `elem` ["dt", "dd"] = True
"dt" `closes` t | t `elem` ["dt","dd"] = True
@@ -926,12 +1072,12 @@ _ `closes` "html" = False
"optgroup" `closes` "optgroup" = True
"optgroup" `closes` "option" = True
"option" `closes` "option" = True
--- http://www.w3.org/TR/html-markup/p.html
+-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote",
"dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section",
+ "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", "p", "pre", "section",
"table", "ul"] = True
-"meta" `closes` "meta" = True
+_ `closes` "meta" = True
"form" `closes` "form" = True
"label" `closes` "label" = True
"map" `closes` "map" = True
@@ -942,17 +1088,18 @@ t `closes` "select" | t /= "option" = True
"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True
"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True
t `closes` t2 |
- t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
- t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
+ t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","main","p"] &&
+ t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" or "main"
t1 `closes` t2 |
- t1 `elem` blockTags &&
- t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
+ t1 `Set.member` blockTags &&
+ t2 `Set.notMember` blockTags &&
+ t2 `Set.notMember` eitherBlockOrInline = True
_ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Monad m)
+htmlInBalanced :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT String st m String
htmlInBalanced f = try $ do
@@ -973,8 +1120,11 @@ htmlInBalanced f = try $ do
let cs = ec - sc
lscontents <- unlines <$> count ls anyLine
cscontents <- count cs anyChar
- (_,closetag) <- htmlTag (~== TagClose tn)
- return (lscontents ++ cscontents ++ closetag)
+ closetag <- do
+ x <- many (satisfy (/='>'))
+ char '>'
+ return (x <> ">")
+ return (lscontents <> cscontents <> closetag)
_ -> mzero
_ -> mzero
@@ -992,64 +1142,99 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
go n (t:ts') = (t :) <$> go n ts'
go _ [] = mzero
-hasTagWarning :: [Tag String] -> Bool
+hasTagWarning :: [Tag a] -> Bool
hasTagWarning (TagWarning _:_) = True
-hasTagWarning _ = False
+hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
-htmlTag :: Monad m
+htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead (char '<')
+ startpos <- getPosition
inp <- getInput
- let (next : _) = canonicalizeTags $ parseTagsOptions
- parseOptions{ optTagWarning = False } inp
- guard $ f next
+ let ts = canonicalizeTags $ parseTagsOptions
+ parseOptions{ optTagWarning = False
+ , optTagPosition = True }
+ (inp ++ " ") -- add space to ensure that
+ -- we get a TagPosition after the tag
+ (next, ln, col) <- case ts of
+ (TagPosition{} : next : TagPosition ln col : _)
+ | f next -> return (next, ln, col)
+ _ -> mzero
+
+ -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+ -- should NOT be parsed as an HTML tag, see #2277,
+ -- so we exclude . even though it's a valid character
+ -- in XML element names
+ let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
+ let isName s = case s of
+ [] -> False
+ ('?':_) -> True -- processing instruction
+ (c:cs) -> isLetter c && all isNameChar cs
+
+ let endpos = if ln == 1
+ then setSourceColumn startpos
+ (sourceColumn startpos + (col - 1))
+ else setSourceColumn (setSourceLine startpos
+ (sourceLine startpos + (ln - 1)))
+ col
+ let endAngle = try $
+ do char '>'
+ pos <- getPosition
+ guard $ pos >= endpos
+
let handleTag tagname = do
- -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
- -- should NOT be parsed as an HTML tag, see #2277
- guard $ not ('.' `elem` tagname)
+ -- basic sanity check, since the parser is very forgiving
+ -- and finds tags in stuff like x<y)
+ guard $ isName tagname
+ guard $ not $ null tagname
-- <https://example.org> should NOT be a tag either.
-- tagsoup will parse it as TagOpen "https:" [("example.org","")]
- guard $ not (null tagname)
guard $ last tagname /= ':'
- rendered <- manyTill anyChar (char '>')
- return (next, rendered ++ ">")
+ char '<'
+ rendered <- manyTill anyChar endAngle
+ return (next, "<" ++ rendered ++ ">")
case next of
TagComment s
| "<!--" `isPrefixOf` inp -> do
- count (length s + 4) anyChar
- skipMany (satisfy (/='>'))
- char '>'
- return (next, "<!--" ++ s ++ "-->")
+ string "<!--"
+ count (length s) anyChar
+ string "-->"
+ stripComments <- getOption readerStripComments
+ if stripComments
+ then return (next, "")
+ else return (next, "<!--" <> s <> "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
- TagOpen tagname _attr -> handleTag tagname
- TagClose tagname -> handleTag tagname
+ TagOpen tagname attr -> do
+ guard $ all (isName . fst) attr
+ handleTag tagname
+ TagClose tagname ->
+ handleTag tagname
_ -> mzero
mkAttr :: [(String, String)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
where attribsId = fromMaybe "" $ lookup "id" attr
- attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes
+ attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
-- Strip namespace prefixes
-stripPrefixes :: [Tag String] -> [Tag String]
+stripPrefixes :: [Tag Text] -> [Tag Text]
stripPrefixes = map stripPrefix
-stripPrefix :: Tag String -> Tag String
+stripPrefix :: Tag Text -> Tag Text
stripPrefix (TagOpen s as) =
- TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as)
+ TagOpen (stripPrefix' s) (map (first stripPrefix') as)
stripPrefix (TagClose s) = TagClose (stripPrefix' s)
stripPrefix x = x
-stripPrefix' :: String -> String
+stripPrefix' :: Text -> Text
stripPrefix' s =
- case span (/= ':') s of
- (_, "") -> s
- (_, (_:ts)) -> ts
+ if T.null t then s else T.drop 1 t
+ where (_, t) = T.span (/= ':') s
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -1068,9 +1253,13 @@ instance HasHeaderMap HTMLState where
extractHeaderMap = headerMap
updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+instance HasLogMessages HTMLState where
+ addLogMessage m s = s{ logMessages = m : logMessages s }
+ getLogMessages = reverse . logMessages
+
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
-instance HasQuoteContext st (Reader HTMLLocal) where
+instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
getQuoteContext = asks quoteContext
withQuoteContext q = local (\s -> s{quoteContext = q})
@@ -1088,19 +1277,32 @@ instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState
+-- For now we need a special verison here; the one in Shared has String type
+renderTags' :: [Tag Text] -> Text
+renderTags' = renderTagsOptions
+ renderOptions{ optMinimize = matchTags ["hr", "br", "img",
+ "meta", "link"]
+ , optRawTag = matchTags ["script", "style"] }
+ where matchTags tags = flip elem tags . T.toLower
+
-- EPUB Specific
--
--
-sectioningContent :: [String]
+sectioningContent :: [Text]
sectioningContent = ["article", "aside", "nav", "section"]
-groupingContent :: [String]
+groupingContent :: [Text]
groupingContent = ["p", "hr", "pre", "blockquote", "ol"
, "ul", "li", "dl", "dt", "dt", "dd"
, "figure", "figcaption", "div", "main"]
+matchTagClose :: Text -> (Tag Text -> Bool)
+matchTagClose t = (~== TagClose t)
+
+matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool)
+matchTagOpen t as = (~== TagOpen t as)
{-
@@ -1108,7 +1310,7 @@ types :: [(String, ([String], Int))]
types = -- Document divisions
map (\s -> (s, (["section", "body"], 0)))
["volume", "part", "chapter", "division"]
- ++ -- Document section and components
+ <> -- Document section and components
[
("abstract", ([], 0))]
-}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 12953bb72..e98c79ed8 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -14,33 +14,40 @@ module Text.Pandoc.Readers.Haddock
( readHaddock
) where
-import Text.Pandoc.Builder (Blocks, Inlines)
-import qualified Text.Pandoc.Builder as B
-import Data.Monoid ((<>))
-import Text.Pandoc.Shared (trim, splitBy)
+import Control.Monad.Except (throwError)
import Data.List (intersperse, stripPrefix)
import Data.Maybe (fromMaybe)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
+import Data.Monoid ((<>))
+import Data.Text (Text, unpack)
import Documentation.Haddock.Parser
import Documentation.Haddock.Types
-import Debug.Trace (trace)
-
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter, splitBy, trim)
+
-- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse
- -> Either PandocError Pandoc
-readHaddock opts =
+readHaddock :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+ Right result -> return result
+ Left e -> throwError e
+
+readHaddockEither :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse
+ -> Either PandocError Pandoc
+readHaddockEither _opts =
#if MIN_VERSION_haddock_library(1,2,0)
- Right . B.doc . docHToBlocks . trace' . _doc . parseParas
+ Right . B.doc . docHToBlocks . _doc . parseParas
#else
- Right . B.doc . docHToBlocks . trace' . parseParas
+ Right . B.doc . docHToBlocks . parseParas
#endif
- where trace' x = if readerTrace opts
- then trace (show x) x
- else x
docHToBlocks :: DocH String Identifier -> Blocks
docHToBlocks d' =
@@ -90,7 +97,7 @@ docHToBlocks d' =
isPlain (Plain _) = True
isPlain _ = False
extractContents (Plain xs) = xs
- extractContents _ = []
+ extractContents _ = []
docHToInlines :: Bool -> DocH String Identifier -> Inlines
docHToInlines isCode d' =
@@ -135,7 +142,7 @@ makeExample prompt expression result =
<> B.space
<> B.codeWith ([], ["haskell","expr"], []) (trim expression)
<> B.linebreak
- <> (mconcat $ intersperse B.linebreak $ map coder result')
+ <> mconcat (intersperse B.linebreak $ map coder result')
where
-- 1. drop trailing whitespace from the prompt, remember the prefix
prefix = takeWhile (`elem` " \t") prompt
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
new file mode 100644
index 000000000..8158a4511
--- /dev/null
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -0,0 +1,496 @@
+{-# LANGUAGE ExplicitForAll, TupleSections #-}
+module Text.Pandoc.Readers.JATS ( readJATS ) where
+import Control.Monad.State.Strict
+import Data.Char (isDigit, isSpace, toUpper)
+import Data.Default
+import Data.Generics
+import Data.List (intersperse)
+import qualified Data.Map as Map
+import Data.Maybe (maybeToList, fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead)
+import Text.TeXMath (readMathML, writeTeX)
+import Text.XML.Light
+import qualified Data.Set as S (fromList, member)
+import Data.Set ((\\))
+
+type JATS m = StateT JATSState m
+
+data JATSState = JATSState{ jatsSectionLevel :: Int
+ , jatsQuoteType :: QuoteType
+ , jatsMeta :: Meta
+ , jatsBook :: Bool
+ , jatsFigureTitle :: Inlines
+ , jatsContent :: [Content]
+ } deriving Show
+
+instance Default JATSState where
+ def = JATSState{ jatsSectionLevel = 0
+ , jatsQuoteType = DoubleQuote
+ , jatsMeta = mempty
+ , jatsBook = False
+ , jatsFigureTitle = mempty
+ , jatsContent = [] }
+
+
+readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS _ inp = do
+ let tree = normalizeTree . parseXML
+ $ T.unpack $ crFilter inp
+ (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
+ return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
+
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText (convertEntity r ++ s1) z):xs
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ go xs = xs
+
+convertEntity :: String -> String
+convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+
+-- convenience function to get an attribute value, defaulting to ""
+attrValue :: String -> Element -> String
+attrValue attr =
+ fromMaybe "" . maybeAttrValue attr
+
+maybeAttrValue :: String -> Element -> Maybe String
+maybeAttrValue attr elt =
+ lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+
+-- convenience function
+named :: String -> Element -> Bool
+named s e = qName (elName e) == s
+
+--
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m ()
+addMeta field val = modify (setMeta field val)
+
+instance HasMeta JATSState where
+ setMeta field v s = s {jatsMeta = setMeta field v (jatsMeta s)}
+ deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)}
+
+isBlockElement :: Content -> Bool
+isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
+ where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags
+ paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap",
+ "code", "fig", "fig-group", "graphic", "media", "preformat",
+ "supplementary-material", "table-wrap", "table-wrap-group",
+ "alternatives", "disp-formula", "disp-formula-group"]
+ lists = ["def-list", "list"]
+ mathML = ["tex-math", "mml:math"]
+ other = ["p", "related-article", "related-object", "ack", "disp-quote",
+ "speech", "statement", "verse-group", "x"]
+ inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material",
+ "related-article", "related-object", "hr", "bold", "fixed-case",
+ "italic", "monospace", "overline", "overline-start", "overline-end",
+ "roman", "sans-serif", "sc", "strike", "underline", "underline-start",
+ "underline-end", "ruby", "alternatives", "inline-graphic", "private-char",
+ "chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev",
+ "milestone-end", "milestone-start", "named-content", "styled-content",
+ "fn", "target", "xref", "sub", "sup", "x", "address", "array",
+ "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic",
+ "media", "preformat", "supplementary-material", "table-wrap",
+ "table-wrap-group", "disp-formula", "disp-formula-group",
+ "citation-alternatives", "element-citation", "mixed-citation",
+ "nlm-citation", "award-id", "funding-source", "open-access",
+ "def-list", "list", "ack", "disp-quote", "speech", "statement",
+ "verse-group"]
+isBlockElement _ = False
+
+-- Trim leading and trailing newline characters
+trimNl :: String -> String
+trimNl = reverse . go . reverse . go
+ where go ('\n':xs) = xs
+ go xs = xs
+
+-- function that is used by both graphic (in parseBlock)
+-- and inline-graphic (in parseInline)
+getGraphic :: PandocMonad m => Element -> JATS m Inlines
+getGraphic e = do
+ let atVal a = attrValue a e
+ attr = (atVal "id", words $ atVal "role", [])
+ imageUrl = atVal "href"
+ captionOrLabel = case filterChild (\x -> named "caption" x
+ || named "label" x) e of
+ Nothing -> return mempty
+ Just z -> mconcat <$>
+ mapM parseInline (elContent z)
+ figTitle <- gets jatsFigureTitle
+ let (caption, title) = if isNull figTitle
+ then (captionOrLabel, atVal "title")
+ else (return figTitle, "fig:")
+ fmap (imageWith attr imageUrl title) caption
+
+getBlocks :: PandocMonad m => Element -> JATS m Blocks
+getBlocks e = mconcat <$>
+ mapM parseBlock (elContent e)
+
+
+parseBlock :: PandocMonad m => Content -> JATS m Blocks
+parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
+parseBlock (Text (CData _ s _)) = if all isSpace s
+ then return mempty
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "p" -> parseMixed para (elContent e)
+ "code" -> codeBlockWithLang
+ "preformat" -> codeBlockWithLang
+ "disp-quote" -> parseBlockquote
+ "list" -> case attrValue "list-type" e of
+ "bullet" -> bulletList <$> listitems
+ listType -> do
+ let start = fromMaybe 1 $
+ (strContent <$> (filterElement (named "list-item") e
+ >>= filterElement (named "lable")))
+ >>= safeRead
+ orderedListWith (start, parseListStyleType listType, DefaultDelim)
+ <$> listitems
+ "def-list" -> definitionList <$> deflistitems
+ "sec" -> gets jatsSectionLevel >>= sect . (+1)
+ "graphic" -> para <$> getGraphic e
+ "journal-meta" -> parseMetadata e
+ "article-meta" -> parseMetadata e
+ "custom-meta" -> parseMetadata e
+ "title" -> return mempty -- processed by header
+ "table" -> parseTable
+ "fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
+ "table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e
+ "caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6
+ "ref-list" -> parseRefList e
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where parseMixed container conts = do
+ let (ils,rest) = break isBlockElement conts
+ ils' <- (trimInlines . mconcat) <$> mapM parseInline ils
+ let p = if ils' == mempty then mempty else container ils'
+ case rest of
+ [] -> return p
+ (r:rs) -> do
+ b <- parseBlock r
+ x <- parseMixed container rs
+ return $ p <> b <> x
+ codeBlockWithLang = do
+ let classes' = case attrValue "language" e of
+ "" -> []
+ x -> [x]
+ return $ codeBlockWith (attrValue "id" e, classes', [])
+ $ trimNl $ strContentRecursive e
+ parseBlockquote = do
+ attrib <- case filterChild (named "attribution") e of
+ Nothing -> return mempty
+ Just z -> (para . (str "— " <>) . mconcat)
+ <$>
+ mapM parseInline (elContent z)
+ contents <- getBlocks e
+ return $ blockQuote (contents <> attrib)
+ parseListStyleType "roman-lower" = LowerRoman
+ parseListStyleType "roman-upper" = UpperRoman
+ parseListStyleType "alpha-lower" = LowerAlpha
+ parseListStyleType "alpha-upper" = UpperAlpha
+ parseListStyleType _ = DefaultStyle
+ listitems = mapM getBlocks $ filterChildren (named "list-item") e
+ deflistitems = mapM parseVarListEntry $ filterChildren
+ (named "def-item") e
+ parseVarListEntry e' = do
+ let terms = filterChildren (named "term") e'
+ let items = filterChildren (named "def") e'
+ terms' <- mapM getInlines terms
+ items' <- mapM getBlocks items
+ return (mconcat $ intersperse (str "; ") terms', items')
+ parseTable = do
+ let isCaption x = named "title" x || named "caption" x
+ caption <- case filterChild isCaption e of
+ Just t -> getInlines t
+ Nothing -> return mempty
+ let e' = fromMaybe e $ filterChild (named "tgroup") e
+ let isColspec x = named "colspec" x || named "col" x
+ let colspecs = case filterChild (named "colgroup") e' of
+ Just c -> filterChildren isColspec c
+ _ -> filterChildren isColspec e'
+ let isRow x = named "row" x || named "tr" x
+ headrows <- case filterChild (named "thead") e' of
+ Just h -> case filterChild isRow h of
+ Just x -> parseRow x
+ Nothing -> return []
+ Nothing -> return []
+ bodyrows <- case filterChild (named "tbody") e' of
+ Just b -> mapM parseRow
+ $ filterChildren isRow b
+ Nothing -> mapM parseRow
+ $ filterChildren isRow e'
+ let toAlignment c = case findAttr (unqual "align") c of
+ Just "left" -> AlignLeft
+ Just "right" -> AlignRight
+ Just "center" -> AlignCenter
+ _ -> AlignDefault
+ let toWidth c = case findAttr (unqual "colwidth") c of
+ Just w -> fromMaybe 0
+ $ safeRead $ '0': filter (\x ->
+ isDigit x || x == '.') w
+ Nothing -> 0 :: Double
+ let numrows = case bodyrows of
+ [] -> 0
+ xs -> maximum $ map length xs
+ let aligns = case colspecs of
+ [] -> replicate numrows AlignDefault
+ cs -> map toAlignment cs
+ let widths = case colspecs of
+ [] -> replicate numrows 0
+ cs -> let ws = map toWidth cs
+ tot = sum ws
+ in if all (> 0) ws
+ then map (/ tot) ws
+ else replicate numrows 0
+ let headrows' = if null headrows
+ then replicate numrows mempty
+ else headrows
+ return $ table caption (zip aligns widths)
+ headrows' bodyrows
+ isEntry x = named "entry" x || named "td" x || named "th" x
+ parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
+ sect n = do isbook <- gets jatsBook
+ let n' = if isbook || n == 0 then n + 1 else n
+ headerText <- case filterChild (named "title") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "title")) of
+ Just t -> getInlines t
+ Nothing -> return mempty
+ oldN <- gets jatsSectionLevel
+ modify $ \st -> st{ jatsSectionLevel = n }
+ b <- getBlocks e
+ let ident = attrValue "id" e
+ modify $ \st -> st{ jatsSectionLevel = oldN }
+ return $ headerWith (ident,[],[]) n' headerText <> b
+
+getInlines :: PandocMonad m => Element -> JATS m Inlines
+getInlines e' = (trimInlines . mconcat) <$>
+ mapM parseInline (elContent e')
+
+parseMetadata :: PandocMonad m => Element -> JATS m Blocks
+parseMetadata e = do
+ getTitle e
+ getAuthors e
+ getAffiliations e
+ return mempty
+
+getTitle :: PandocMonad m => Element -> JATS m ()
+getTitle e = do
+ tit <- case filterElement (named "article-title") e of
+ Just s -> getInlines s
+ Nothing -> return mempty
+ subtit <- case filterElement (named "subtitle") e of
+ Just s -> (text ": " <>) <$>
+ getInlines s
+ Nothing -> return mempty
+ when (tit /= mempty) $ addMeta "title" tit
+ when (subtit /= mempty) $ addMeta "subtitle" subtit
+
+getAuthors :: PandocMonad m => Element -> JATS m ()
+getAuthors e = do
+ authors <- mapM getContrib $ filterElements
+ (\x -> named "contrib" x &&
+ attrValue "contrib-type" x == "author") e
+ authorNotes <- mapM getInlines $ filterElements (named "author-notes") e
+ let authors' = case (reverse authors, authorNotes) of
+ ([], _) -> []
+ (_, []) -> authors
+ (a:as, ns) -> reverse as ++ [a <> mconcat ns]
+ unless (null authors) $ addMeta "author" authors'
+
+getAffiliations :: PandocMonad m => Element -> JATS m ()
+getAffiliations x = do
+ affs <- mapM getInlines $ filterChildren (named "aff") x
+ unless (null affs) $ addMeta "institute" affs
+
+getContrib :: PandocMonad m => Element -> JATS m Inlines
+getContrib x = do
+ given <- maybe (return mempty) getInlines
+ $ filterElement (named "given-names") x
+ family <- maybe (return mempty) getInlines
+ $ filterElement (named "surname") x
+ if given == mempty && family == mempty
+ then return mempty
+ else if given == mempty || family == mempty
+ then return $ given <> family
+ else return $ given <> space <> family
+
+parseRefList :: PandocMonad m => Element -> JATS m Blocks
+parseRefList e = do
+ refs <- mapM parseRef $ filterChildren (named "ref") e
+ addMeta "references" refs
+ return mempty
+
+parseRef :: PandocMonad m
+ => Element -> JATS m (Map.Map String MetaValue)
+parseRef e = do
+ let refId = text $ attrValue "id" e
+ let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
+ case filterChild (named "element-citation") e of
+ Just c -> do
+ let refType = text $
+ case attrValue "publication-type" c of
+ "journal" -> "article-journal"
+ x -> x
+ (refTitle, refContainerTitle) <- do
+ t <- getInlineText "article-title" c
+ ct <- getInlineText "source" c
+ if t == mempty
+ then return (ct, mempty)
+ else return (t, ct)
+ refLabel <- getInlineText "label" c
+ refYear <- getInlineText "year" c
+ refVolume <- getInlineText "volume" c
+ refFirstPage <- getInlineText "fpage" c
+ refLastPage <- getInlineText "lpage" c
+ refPublisher <- getInlineText "publisher-name" c
+ refPublisherPlace <- getInlineText "publisher-loc" c
+ let refPages = refFirstPage <> (if refLastPage == mempty
+ then mempty
+ else text "\x2013" <> refLastPage)
+ let personGroups' = filterChildren (named "person-group") c
+ let getName nm = do
+ given <- maybe (return mempty) getInlines
+ $ filterChild (named "given-names") nm
+ family <- maybe (return mempty) getInlines
+ $ filterChild (named "surname") nm
+ return $ toMetaValue $ Map.fromList [
+ ("given", given)
+ , ("family", family)
+ ]
+ personGroups <- mapM (\pg ->
+ do names <- mapM getName
+ (filterChildren (named "name") pg)
+ return (attrValue "person-group-type" pg,
+ toMetaValue names))
+ personGroups'
+ return $ Map.fromList $
+ [ ("id", toMetaValue refId)
+ , ("type", toMetaValue refType)
+ , ("title", toMetaValue refTitle)
+ , ("container-title", toMetaValue refContainerTitle)
+ , ("publisher", toMetaValue refPublisher)
+ , ("publisher-place", toMetaValue refPublisherPlace)
+ , ("title", toMetaValue refTitle)
+ , ("issued", toMetaValue
+ $ Map.fromList [
+ ("year", refYear)
+ ])
+ , ("volume", toMetaValue refVolume)
+ , ("page", toMetaValue refPages)
+ , ("citation-label", toMetaValue refLabel)
+ ] ++ personGroups
+ Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
+ -- TODO handle mixed-citation
+
+strContentRecursive :: Element -> String
+strContentRecursive = strContent .
+ (\e' -> e'{ elContent = map elementToStr $ elContent e' })
+
+elementToStr :: Content -> Content
+elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+elementToStr x = x
+
+parseInline :: PandocMonad m => Content -> JATS m Inlines
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) =
+ return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
+parseInline (Elem e) =
+ case qName (elName e) of
+ "italic" -> emph <$> innerInlines
+ "bold" -> strong <$> innerInlines
+ "strike" -> strikeout <$> innerInlines
+ "sub" -> subscript <$> innerInlines
+ "sup" -> superscript <$> innerInlines
+ "underline" -> underlineSpan <$> innerInlines
+ "break" -> return linebreak
+ "sc" -> smallcaps <$> innerInlines
+
+ "code" -> codeWithLang
+ "monospace" -> codeWithLang
+
+ "inline-graphic" -> getGraphic e
+ "disp-quote" -> do
+ qt <- gets jatsQuoteType
+ let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
+ modify $ \st -> st{ jatsQuoteType = qt' }
+ contents <- innerInlines
+ modify $ \st -> st{ jatsQuoteType = qt }
+ return $ if qt == SingleQuote
+ then singleQuoted contents
+ else doubleQuoted contents
+
+ "xref" -> do
+ ils <- innerInlines
+ let rid = attrValue "rid" e
+ let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
+ let attr = (attrValue "id" e, [], maybeToList refType)
+ return $ if refType == Just ("ref-type","bibr")
+ then cite [Citation{
+ citationId = rid
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0}] ils
+ else linkWith attr ('#' : rid) "" ils
+ "ext-link" -> do
+ ils <- innerInlines
+ let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ Just h -> h
+ _ -> '#' : attrValue "rid" e
+ let ils' = if ils == mempty then str href else ils
+ let attr = (attrValue "id" e, [], [])
+ return $ linkWith attr href title ils'
+
+ "disp-formula" -> formula displayMath
+ "inline-formula" -> formula math
+ "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+ "tex-math" -> return . math $ strContent e
+
+ "email" -> return $ link ("mailto:" ++ strContent e) ""
+ $ str $ strContent e
+ "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ "fn" -> (note . mconcat) <$>
+ mapM parseBlock (elContent e)
+ _ -> innerInlines
+ where innerInlines = (trimInlines . mconcat) <$>
+ mapM parseInline (elContent e)
+ mathML x =
+ case readMathML . showElement $ everywhere (mkT removePrefix) x of
+ Left _ -> mempty
+ Right m -> writeTeX m
+ formula constructor = do
+ let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
+ texMaths = map strContent $
+ filterChildren (named "tex-math") whereToLook
+ mathMLs = map mathML $
+ filterChildren isMathML whereToLook
+ return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
+
+ isMathML x = qName (elName x) == "math" &&
+ qPrefix (elName x) == Just "mml"
+ removePrefix elname = elname { qPrefix = Nothing }
+ codeWithLang = do
+ let classes' = case attrValue "language" e of
+ "" -> []
+ l -> [l]
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index edcf35e51..57d2803ba 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,6 +1,10 @@
-{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,218 +31,596 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
+
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ applyMacros,
rawLaTeXInline,
rawLaTeXBlock,
inlineCommand,
- handleIncludes
+ tokenize,
+ untokenize
) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
- mathDisplay, mathInline)
-import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Char ( chr, ord, isLetter, isAlphaNum )
-import Control.Monad.Trans (lift)
+import Control.Applicative (many, optional, (<|>))
import Control.Monad
-import Text.Pandoc.Builder
-import Control.Applicative ((<|>), many, optional)
-import Data.Maybe (fromMaybe, maybeToList)
-import System.Environment (getEnv)
-import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
-import Data.List (intercalate)
+import Control.Monad.Except (throwError)
+import Control.Monad.Trans (lift)
+import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower)
+import Data.Default
+import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
-import qualified Control.Exception as E
-import Text.Pandoc.Highlighting (fromListingsLanguage)
+import Data.Maybe (fromMaybe, maybeToList)
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Safe (minimumDef)
+import System.FilePath (addExtension, replaceExtension, takeExtension)
+import Text.Pandoc.BCP47 (Lang (..), renderLang)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
+ readFileFromDirs, report, setResourcePath,
+ setTranslations, translateTerm, trace)
+import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError))
+import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
-import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
+ Tok (..), TokType (..))
+import Text.Pandoc.Shared
+import qualified Text.Pandoc.Translations as Translations
+import Text.Pandoc.Walk
+import Text.Parsec.Pos
+import qualified Text.Pandoc.Builder as B
--- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Either PandocError Pandoc
-readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
+-- for debugging:
+-- import Text.Pandoc.Extensions (getDefaultExtensions)
+-- import Text.Pandoc.Class (runIOorExplode, PandocIO)
+-- import Debug.Trace (traceShowId)
-parseLaTeX :: LP Pandoc
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m Pandoc
+readLaTeX opts ltx = do
+ parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
+ (tokenize "source" (crFilter ltx))
+ case parsed of
+ Right result -> return result
+ Left e -> throwError $ PandocParsecError (T.unpack ltx) e
+
+parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
- let meta = stateMeta st
- let (Pandoc _ bs') = doc bs
+ let meta = sMeta st
+ let doc' = doc bs
+ let headerLevel (Header n _ _) = [n]
+ headerLevel _ = []
+ let bottomLevel = minimumDef 1 $ query headerLevel doc'
+ let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
+ adjustHeaders _ x = x
+ let (Pandoc _ bs') =
+ -- handle the case where you have \part or \chapter
+ (if bottomLevel < 1
+ then walk (adjustHeaders (1 - bottomLevel))
+ else id) $
+ walk (resolveRefs (sLabels st)) doc'
return $ Pandoc meta bs'
-type LP = Parser String ParserState
-
-anyControlSeq :: LP String
-anyControlSeq = do
- char '\\'
- next <- option '\n' anyChar
- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
-
-controlSeq :: String -> LP String
-controlSeq name = try $ do
- char '\\'
- case name of
- "" -> mzero
- [c] | not (isLetter c) -> string [c]
- cs -> string cs <* notFollowedBy letter <* optional sp
- return name
-
-dimenarg :: LP String
-dimenarg = try $ do
- ch <- option "" $ string "="
- num <- many1 digit
- dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- return $ ch ++ num ++ dim
-
-sp :: LP ()
-sp = whitespace <|> endline
-
-whitespace :: LP ()
-whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
-
-endline :: LP ()
-endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
+resolveRefs :: M.Map String [Inline] -> Inline -> Inline
+resolveRefs labels x@(Link (ident,classes,kvs) _ _) =
+ case (lookup "reference-type" kvs,
+ lookup "reference" kvs) of
+ (Just "ref", Just lab) ->
+ case M.lookup lab labels of
+ Just txt -> Link (ident,classes,kvs) txt ('#':lab, "")
+ Nothing -> x
+ _ -> x
+resolveRefs _ x = x
+
+
+-- testParser :: LP PandocIO a -> Text -> IO a
+-- testParser p t = do
+-- res <- runIOorExplode (runParserT p defaultLaTeXState{
+-- sOptions = def{ readerExtensions =
+-- enableExtension Ext_raw_tex $
+-- getDefaultExtensions "latex" }} "source" (tokenize "source" t))
+-- case res of
+-- Left e -> error (show e)
+-- Right r -> return r
+
+newtype HeaderNum = HeaderNum [Int]
+ deriving (Show)
+
+renderHeaderNum :: HeaderNum -> String
+renderHeaderNum (HeaderNum xs) =
+ intercalate "." (map show xs)
+
+incrementHeaderNum :: Int -> HeaderNum -> HeaderNum
+incrementHeaderNum level (HeaderNum ns) = HeaderNum $
+ case reverse (take level (ns ++ repeat 0)) of
+ (x:xs) -> reverse (x+1 : xs)
+ [] -> [] -- shouldn't happen
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: Maybe Inlines
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ , sLastHeaderNum :: HeaderNum
+ , sLabels :: M.Map String [Inline]
+ , sToggles :: M.Map String Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = Nothing
+ , sInListItem = False
+ , sInTableCell = False
+ , sLastHeaderNum = HeaderNum []
+ , sLabels = M.empty
+ , sToggles = M.empty
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => LP m a -> ParserT String s m (a, String)
+rawLaTeXParser parser = do
+ inp <- getInput
+ let toks = tokenize "source" $ T.pack inp
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ let rawparser = (,) <$> withRaw parser <*> getState
+ res <- lift $ runParserT rawparser lstate "chunk" toks
+ case res of
+ Left _ -> mzero
+ Right ((val, raw), st) -> do
+ updateState (updateMacros (sMacros st <>))
+ rawstring <- takeP (T.length (untokenize raw))
+ return (val, rawstring)
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
+ do let retokenize = doMacros 0 *>
+ (toksToString <$> many (satisfyTok (const True)))
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s'
+
+rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXBlock = do
+ lookAhead (try (char '\\' >> letter))
+ -- we don't want to apply newly defined latex macros to their own
+ -- definitions:
+ snd <$> rawLaTeXParser macroDef
+ <|> ((snd <$> rawLaTeXParser (environment <|> blockCommand)) >>= applyMacros)
+
+rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXInline = do
+ lookAhead (try (char '\\' >> letter))
+ rawLaTeXParser (inlineEnvironment <|> inlineCommand') >>= applyMacros . snd
+
+inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand = do
+ lookAhead (try (char '\\' >> letter))
+ fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand')
+
+tokenize :: SourceName -> Text -> [Tok]
+tokenize sourcename = totoks (initialPos sourcename)
+
+totoks :: SourcePos -> Text -> [Tok]
+totoks pos t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok pos Newline "\n"
+ : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok pos Spaces sps
+ : totoks (incSourceColumn pos (T.length sps))
+ rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok pos Word ws
+ : totoks (incSourceColumn pos (T.length ws)) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok pos Comment ("%" <> cs)
+ : totoks (incSourceColumn pos (1 + T.length cs)) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok pos (CtrlSeq " ") "\\"]
+ Just (d, rest')
+ | isLetterOrAt d ->
+ -- \makeatletter is common in macro defs;
+ -- ideally we should make tokenization sensitive
+ -- to \makeatletter and \makeatother, but this is
+ -- probably best for now
+ let (ws, rest'') = T.span isLetterOrAt rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (incSourceColumn pos
+ (1 + T.length ws + T.length ss)) rest'''
+ | isSpaceOrTab d || d == '\n' ->
+ let (w1, r1) = T.span isSpaceOrTab rest
+ (w2, (w3, r3)) = case T.uncons r1 of
+ Just ('\n', r2)
+ -> (T.pack "\n",
+ T.span isSpaceOrTab r2)
+ _ -> (mempty, (mempty, r1))
+ ws = "\\" <> w1 <> w2 <> w3
+ in case T.uncons r3 of
+ Just ('\n', _) ->
+ Tok pos (CtrlSeq " ") ("\\" <> w1)
+ : totoks (incSourceColumn pos (T.length ws))
+ r1
+ _ ->
+ Tok pos (CtrlSeq " ") ws
+ : totoks (incSourceColumn pos (T.length ws))
+ r3
+ | otherwise ->
+ Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (incSourceColumn pos 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok pos (Arg i) ("#" <> t1)
+ : totoks (incSourceColumn pos (1 + T.length t1)) t2
+ Nothing ->
+ Tok pos Symbol "#"
+ : totoks (incSourceColumn pos 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok pos Esc2 (T.pack ['^','^',d,e])
+ : totoks (incSourceColumn pos 4) rest'''
+ _ ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ | d < '\128' ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ _ -> Tok pos Symbol "^" :
+ Tok (incSourceColumn pos 1) Symbol "^" :
+ totoks (incSourceColumn pos 2) rest'
+ _ -> Tok pos Symbol "^"
+ : totoks (incSourceColumn pos 1) rest
+ | otherwise ->
+ Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
+
+isSpaceOrTab :: Char -> Bool
+isSpaceOrTab ' ' = True
+isSpaceOrTab '\t' = True
+isSpaceOrTab _ = False
+
+isLetterOrAt :: Char -> Bool
+isLetterOrAt '@' = True
+isLetterOrAt c = isLetter c
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-tildeEscape :: LP Char
-tildeEscape = try $ do
- string "^^"
- c <- satisfy (\x -> x >= '\0' && x <= '\128')
- d <- if isLowerHex c
- then option "" $ count 1 (satisfy isLowerHex)
- else return ""
- if null d
- then case ord c of
- x | x >= 64 && x <= 127 -> return $ chr (x - 64)
- | otherwise -> return $ chr (x + 64)
- else return $ chr $ read ('0':'x':c:d)
-
-comment :: LP ()
-comment = do
- char '%'
- skipMany (satisfy (/='\n'))
- optional newline
- return ()
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos _spos _ (Tok pos _ _ : _) = pos
+ updatePos spos _ [] = incSourceColumn spos 1
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ unless verbatimMode $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok _ (CtrlSeq "expandafter") _ : t : ts
+ -> do setInput ts
+ doMacros n
+ getInput >>= setInput . combineTok t
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
+ | T.all isLetterOrAt w =
+ Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
+ where (x1, x2) = T.break isSpaceOrTab x
+ combineTok t ts = t:ts
+ handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro expansionPoint numargs optarg newtoks) -> do
+ setInput ts
+ let getarg = try $ spaces >> bracedOrToken
+ args <- case optarg of
+ Nothing -> count numargs getarg
+ Just o ->
+ (:) <$> option o bracketedToks
+ <*> count (numargs - 1) getarg
+ -- first boolean param is true if we're tokenizing
+ -- an argument (in which case we don't want to
+ -- expand #1 etc.)
+ let addTok False (Tok _ (Arg i) _) acc | i > 0
+ , i <= numargs =
+ foldr (addTok True) acc (args !! (i - 1))
+ -- add space if needed after control sequence
+ -- see #4007
+ addTok _ (Tok _ (CtrlSeq x) txt)
+ acc@(Tok _ Word _ : _)
+ | not (T.null txt) &&
+ isLetter (T.last txt) =
+ Tok spos (CtrlSeq x) (txt <> " ") : acc
+ addTok _ t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr (addTok False) ts' newtoks
+ case expansionPoint of
+ ExpandWhenUsed ->
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+ ExpandWhenDefined -> return ()
+
+
+setpos :: SourcePos -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+ where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+ isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSym
+ where isSym (Tok _ Symbol _) = True
+ isSym _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
+
+sp :: PandocMonad m => LP m ()
+sp = whitespace <|> endline
-bgroup :: LP ()
+whitespace :: PandocMonad m => LP m ()
+whitespace = () <$ satisfyTok isSpaceTok
+ where isSpaceTok (Tok _ Spaces _) = True
+ isSpaceTok _ = False
+
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
+
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
+
+comment :: PandocMonad m => LP m ()
+comment = () <$ satisfyTok isCommentTok
+ where isCommentTok (Tok _ Comment _) = True
+ isCommentTok _ = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
+
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
bgroup = try $ do
- skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
- () <$ char '{'
- <|> () <$ controlSeq "bgroup"
- <|> () <$ controlSeq "begingroup"
-
-egroup :: LP ()
-egroup = () <$ char '}'
- <|> () <$ controlSeq "egroup"
- <|> () <$ controlSeq "endgroup"
-
-grouped :: Monoid a => LP a -> LP a
-grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
-
-braced :: LP String
-braced = bgroup *> (concat <$> manyTill
- ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
- <|> try (string "\\}")
- <|> try (string "\\{")
- <|> try (string "\\\\")
- <|> ((\x -> "{" ++ x ++ "}") <$> braced)
- <|> count 1 anyChar
- ) egroup)
-
-bracketed :: Monoid a => LP a -> LP a
-bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
-
-mathDisplay :: LP String -> LP Inlines
-mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
-
-mathInline :: LP String -> LP Inlines
-mathInline p = math <$> (try p >>= applyMacros')
-
-mathChars :: LP String
-mathChars =
- concat <$> many (escapedChar
- <|> (snd <$> withRaw braced)
- <|> many1 (satisfy isOrdChar))
- where escapedChar = try $ do char '\\'
- c <- anyChar
- return ['\\',c]
- isOrdChar '$' = False
- isOrdChar '{' = False
- isOrdChar '}' = False
- isOrdChar '\\' = False
- isOrdChar _ = True
-
-quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
-quoted' f starter ender = do
- startchs <- starter
- smart <- getOption readerSmart
- if smart
- then do
- ils <- many (notFollowedBy ender >> inline)
- (ender >> return (f (mconcat ils))) <|>
- (<> mconcat ils) <$>
- lit (case startchs of
- "``" -> "“"
- "`" -> "‘"
- _ -> startchs)
- else lit startchs
-
-doubleQuote :: LP Inlines
-doubleQuote = do
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
-
-singleQuote :: LP Inlines
-singleQuote = do
- smart <- getOption readerSmart
- if smart
- then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- else str <$> many1 (oneOf "`\'‘’")
-
-inline :: LP Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> inlineText
- <|> inlineCommand
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (char '-' *> option (str "-")
- (char '-' *> option (str "–") (str "—" <$ char '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (string "''"))
- <|> (str "”" <$ char '”')
- <|> (str "’" <$ char '\'')
- <|> (str "’" <$ char '’')
- <|> (str "\160" <$ char '~')
- <|> mathDisplay (string "$$" *> mathChars <* string "$$")
- <|> mathInline (char '$' *> mathChars <* char '$')
- <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str . (:[]) <$> tildeEscape)
- <|> (str . (:[]) <$> oneOf "[]")
- <|> (str . (:[]) <$> oneOf "#&~^'`\"[]") -- TODO print warning?
- -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
-
-inlines :: LP Inlines
-inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
-
-inlineGroup :: LP Inlines
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+
+egroup :: PandocMonad m => LP m Tok
+egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
+grouped parser = try $ do
+ bgroup
+ -- first we check for an inner 'grouped', because
+ -- {{a,b}} should be parsed the same as {a,b}
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' 1
+ where braced' (n :: Int) =
+ handleEgroup n <|> handleBgroup n <|> handleOther n
+ handleEgroup n = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' (n - 1)
+ handleBgroup n = do
+ t <- bgroup
+ (t:) <$> braced' (n + 1)
+ handleOther n = do
+ t <- anyTok
+ (t:) <$> braced' n
+
+bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
+
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ T.take 2 (T.reverse s) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
+
+-- inline elements:
+
+word :: PandocMonad m => LP m Inlines
+word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
+
+regularSymbol :: PandocMonad m => LP m Inlines
+regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+ where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
+ isRegularSymbol _ = False
+ isSpecial c = c `Set.member` specialChars
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
+
+inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
if isNull ils
@@ -247,386 +629,19 @@ inlineGroup = do
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
-block :: LP Blocks
-block = (mempty <$ comment)
- <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
- <|> environment
- <|> macro
- <|> blockCommand
- <|> paragraph
- <|> grouped block
- <|> (mempty <$ char '&') -- loose & in table environment
-
-
-blocks :: LP Blocks
-blocks = mconcat <$> many block
-
-getRawCommand :: String -> LP String
-getRawCommand name' = do
- rawargs <- withRaw (many (try (optional sp *> opt)) *>
- option "" (try (optional sp *> dimenarg)) *>
- many braced)
- return $ '\\' : name' ++ snd rawargs
-
-lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
-lookupListDefault d = (fromMaybe d .) . lookupList
- where
- lookupList l m = msum $ map (`M.lookup` m) l
-
-blockCommand :: LP Blocks
-blockCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*" <* optional sp)
- let name' = name ++ star
- let raw = do
- rawcommand <- getRawCommand name'
- transformed <- applyMacros' rawcommand
- guard $ transformed /= rawcommand
- notFollowedBy $ parseFromString inlines transformed
- parseFromString blocks transformed
- lookupListDefault raw [name',name] blockCommands
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
--- eat an optional argument and one or more arguments in braces
-ignoreInlines :: String -> (String, LP Inlines)
-ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
- where optargs = skipopts *> skipMany (try $ optional sp *> braced)
- contseq = '\\':name
- doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> withRaw optargs)
-
-ignoreBlocks :: String -> (String, LP Blocks)
-ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
- where optargs = skipopts *> skipMany (try $ optional sp *> braced)
- contseq = '\\':name
- doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> withRaw optargs)
-
-blockCommands :: M.Map String (LP Blocks)
-blockCommands = M.fromList $
- [ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *>
- (grouped inline >>= addMeta "title")
- <|> (grouped block >>= addMeta "title")))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
- , ("author", mempty <$ (skipopts *> authors))
- -- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
- , ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- -- sectioning
- , ("chapter", updateState (\s -> s{ stateHasChapters = True })
- *> section nullAttr 0)
- , ("chapter*", updateState (\s -> s{ stateHasChapters = True })
- *> section ("",["unnumbered"],[]) 0)
- , ("section", section nullAttr 1)
- , ("section*", section ("",["unnumbered"],[]) 1)
- , ("subsection", section nullAttr 2)
- , ("subsection*", section ("",["unnumbered"],[]) 2)
- , ("subsubsection", section nullAttr 3)
- , ("subsubsection*", section ("",["unnumbered"],[]) 3)
- , ("paragraph", section nullAttr 4)
- , ("paragraph*", section ("",["unnumbered"],[]) 4)
- , ("subparagraph", section nullAttr 5)
- , ("subparagraph*", section ("",["unnumbered"],[]) 5)
- -- beamer slides
- , ("frametitle", section nullAttr 3)
- , ("framesubtitle", section nullAttr 4)
- -- letters
- , ("opening", (para . trimInlines) <$> (skipopts *> tok))
- , ("closing", skipopts *> closing)
- --
- , ("hrule", pure horizontalRule)
- , ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> looseItem)
- , ("documentclass", skipopts *> braced *> preamble)
- , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> setCaption)
- , ("PandocStartInclude", startInclude)
- , ("PandocEndInclude", endInclude)
- , ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- , ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- ] ++ map ignoreBlocks
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks
- [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
- -- newcommand, etc. should be parsed by macro, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
- , "bibliographystyle"
- , "maketitle", "makeindex", "makeglossary"
- , "addcontentsline", "addtocontents", "addtocounter"
- -- \ignore{} is used conventionally in literate haskell for definitions
- -- that are to be processed by the compiler but not printed.
- , "ignore"
- , "hyperdef"
- , "markboth", "markright", "markleft"
- , "newpage"
- ]
-
-addMeta :: ToMetaValue a => String -> a -> LP ()
-addMeta field val = updateState $ \st ->
- st{ stateMeta = addMetaField field val $ stateMeta st }
-
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-
-setCaption :: LP Blocks
-setCaption = do
- ils <- tok
- mblabel <- option Nothing $
- try $ spaces' >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("data-label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ stateCaption = Just ils' }
- return mempty
-
-resetCaption :: LP ()
-resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
-
-authors :: LP ()
-authors = try $ do
- char '{'
- let oneAuthor = mconcat <$>
- many1 (notFollowedBy' (controlSeq "and") >>
- (inline <|> mempty <$ blockCommand))
- -- skip e.g. \vspace{10pt}
- auths <- sepBy oneAuthor (controlSeq "and")
- char '}'
- addMeta "author" (map trimInlines auths)
-
-section :: Attr -> Int -> LP Blocks
-section (ident, classes, kvs) lvl = do
- hasChapters <- stateHasChapters `fmap` getState
- let lvl' = if hasChapters then lvl + 1 else lvl
- skipopts
- contents <- grouped inline
- lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
- attr' <- registerHeader (lab, classes, kvs) contents
- return $ headerWith attr' lvl' contents
-
-inlineCommand :: LP Inlines
-inlineCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- guard $ not $ isBlockCommand name
- parseRaw <- getOption readerParseRaw
- star <- option "" (string "*")
- let name' = name ++ star
- let raw = do
- rawargs <- withRaw
- (skipangles *> skipopts *> option "" dimenarg *> many braced)
- let rawcommand = '\\' : name ++ star ++ snd rawargs
- transformed <- applyMacros' rawcommand
- if transformed /= rawcommand
- then parseFromString inlines transformed
- else if parseRaw
- then return $ rawInline "latex" rawcommand
- else return mempty
- (lookupListDefault mzero [name',name] inlineCommands <*
- optional (try (string "{}")))
- <|> raw
-
-unlessParseRaw :: LP ()
-unlessParseRaw = getOption readerParseRaw >>= guard . not
-
-isBlockCommand :: String -> Bool
-isBlockCommand s = s `M.member` blockCommands
-
-
-inlineEnvironments :: M.Map String (LP Inlines)
-inlineEnvironments = M.fromList
- [ ("displaymath", mathEnv id Nothing "displaymath")
- , ("math", math <$> verbEnv "math")
- , ("equation", mathEnv id Nothing "equation")
- , ("equation*", mathEnv id Nothing "equation*")
- , ("gather", mathEnv id (Just "gathered") "gather")
- , ("gather*", mathEnv id (Just "gathered") "gather*")
- , ("multline", mathEnv id (Just "gathered") "multline")
- , ("multline*", mathEnv id (Just "gathered") "multline*")
- , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
- , ("align", mathEnv id (Just "aligned") "align")
- , ("align*", mathEnv id (Just "aligned") "align*")
- , ("alignat", mathEnv id (Just "aligned") "alignat")
- , ("alignat*", mathEnv id (Just "aligned") "alignat*")
- ]
-
-inlineCommands :: M.Map String (LP Inlines)
-inlineCommands = M.fromList $
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("ldots", lit "…")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("label", unlessParseRaw >> (inBrackets <$> tok))
- , ("ref", unlessParseRaw >> (inBrackets <$> tok))
- , ("noindent", unlessParseRaw >> return mempty)
- , ("textgreek", tok)
- , ("sep", lit ",")
- , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
- , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
- , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", try $ tok >>= accent hungarumlaut)
- , ("`", option (str "`") $ try $ tok >>= accent grave)
- , ("'", option (str "'") $ try $ tok >>= accent acute)
- , ("^", option (str "^") $ try $ tok >>= accent circ)
- , ("~", option (str "~") $ try $ tok >>= accent tilde)
- , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
- , (".", option (str ".") $ try $ tok >>= accent dot)
- , ("=", option (str "=") $ try $ tok >>= accent macron)
- , ("c", option (str "c") $ try $ tok >>= accent cedilla)
- , ("v", option (str "v") $ try $ tok >>= accent hacek)
- , ("u", option (str "u") $ try $ tok >>= accent breve)
- , ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
- , (",", pure mempty)
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
- , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
- , ("verb", doverb)
- , ("lstinline", skipopts *> doverb)
- , ("Verb", doverb)
- , ("texttt", (code . stringify . toList) <$> tok)
- , ("url", (unescapeURL <$> braced) >>= \url ->
- pure (link url "" (str url)))
- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
- tok >>= \lab ->
- pure (link url "" lab))
- , ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . removeDoubleQuotes <$> braced
- mkImage options src)
- , ("enquote", enquote)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- ] ++ map ignoreInlines
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks:
- [ "index" ]
+doLHSverb :: PandocMonad m => LP m Inlines
+doLHSverb =
+ (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
-mkImage :: [(String, String)] -> String -> LP Inlines
+mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
- let replaceTextwidth (k,v) = case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
- _ -> (k, v)
- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let replaceTextwidth (k,v) =
+ case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth
+ $ filter (\(k,_) -> k `elem` ["width", "height"]) options
let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
@@ -635,40 +650,164 @@ mkImage options src = do
return $ imageWith attr (addExtension src defaultExt) "" alt
_ -> return $ imageWith attr src "" alt
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+ (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+ where startsWithLetter (Tok _ Word t) =
+ case T.uncons t of
+ Just (c, _) | isLetter c -> True
+ _ -> False
+ startsWithLetter _ = False
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
-enquote :: LP Inlines
+-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
+dosiunitx :: PandocMonad m => LP m Inlines
+dosiunitx = do
+ skipopts
+ value <- tok
+ valueprefix <- option "" $ bracketed tok
+ unit <- tok
+ let emptyOr160 "" = ""
+ emptyOr160 _ = "\160"
+ return . mconcat $ [valueprefix,
+ emptyOr160 valueprefix,
+ value,
+ emptyOr160 unit,
+ unit]
+
+lit :: String -> LP m Inlines
+lit = pure . str
+
+removeDoubleQuotes :: Text -> Text
+removeDoubleQuotes t =
+ Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+
+doubleQuote :: PandocMonad m => LP m Inlines
+doubleQuote =
+ quoted' doubleQuoted (try $ count 2 $ symbol '`')
+ (void $ try $ count 2 $ symbol '\'')
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`'])
+ (void $ try $ sequence [symbol '"', symbol '\''])
+
+singleQuote :: PandocMonad m => LP m Inlines
+singleQuote =
+ quoted' singleQuoted ((:[]) <$> symbol '`')
+ (try $ symbol '\'' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ <|> quoted' singleQuoted ((:[]) <$> symbol '‘')
+ (try $ symbol '’' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ where startsWithLetter (Tok _ Word t) =
+ case T.uncons t of
+ Just (c, _) | isLetter c -> True
+ _ -> False
+ startsWithLetter _ = False
+
+quoted' :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> LP m [Tok]
+ -> LP m ()
+ -> LP m Inlines
+quoted' f starter ender = do
+ startchs <- (T.unpack . untokenize) <$> starter
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then do
+ ils <- many (notFollowedBy ender >> inline)
+ (ender >> return (f (mconcat ils))) <|>
+ (<> mconcat ils) <$>
+ lit (case startchs of
+ "``" -> "“"
+ "`" -> "‘"
+ cs -> cs)
+ else lit startchs
+
+enquote :: PandocMonad m => LP m Inlines
enquote = do
skipopts
- context <- stateQuoteContext <$> getState
- if context == InDoubleQuote
+ quoteContext <- sQuoteContext <$> getState
+ if quoteContext == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
-doverb :: LP Inlines
+doAcronym :: PandocMonad m => String -> LP m Inlines
+doAcronym form = do
+ acro <- braced
+ return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
+ ("acronym-form", "singular+" ++ form)])
+ $ str $ toksToString acro]
+
+doAcronymPlural :: PandocMonad m => String -> LP m Inlines
+doAcronymPlural form = do
+ acro <- braced
+ plural <- lit "s"
+ return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
+ ("acronym-form", "plural+" ++ form)]) $
+ mconcat [str $ toksToString acro, plural]]
+
+doverb :: PandocMonad m => LP m Inlines
doverb = do
- marker <- anyChar
- code <$> manyTill (satisfy (/='\n')) (char marker)
-
-doLHSverb :: LP Inlines
-doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ (code . T.unpack . untokenize) <$>
+ manyTill (verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok pos toktype txt) <- satisfyTok (not . isNewlineTok)
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+ : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+ return $ Tok pos toktype t1
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+ options <- option [] keyvals
+ let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ (codeWith ("",classes,[]) . T.unpack . untokenize) <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
+
+keyval :: PandocMonad m => LP m (String, String)
+keyval = try $ do
+ Tok _ Word key <- satisfyTok isWordTok
+ let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= ","
+ isSpecSym _ = False
+ optional sp
+ val <- option [] $ do
+ symbol '='
+ optional sp
+ braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
+ <|> anyControlSeq)
+ optional sp
+ optional (symbol ',')
+ optional sp
+ return (T.unpack key, T.unpack . untokenize $ val)
-lit :: String -> LP Inlines
-lit = pure . str
+keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
-accent :: (Char -> String) -> Inlines -> LP Inlines
-accent f ils =
+accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines
+accent c f = try $ do
+ ils <- tok
case toList ils of
(Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
- [] -> mzero
+ [Space] -> return $ str [c]
+ [] -> return $ str [c]
_ -> return ils
grave :: Char -> String
@@ -855,6 +994,19 @@ hacek 'Z' = "Ž"
hacek 'z' = "ž"
hacek c = [c]
+ogonek :: Char -> String
+ogonek 'a' = "ą"
+ogonek 'e' = "ę"
+ogonek 'o' = "ǫ"
+ogonek 'i' = "į"
+ogonek 'u' = "ų"
+ogonek 'A' = "Ą"
+ogonek 'E' = "Ę"
+ogonek 'I' = "Į"
+ogonek 'O' = "Ǫ"
+ogonek 'U' = "Ų"
+ogonek c = [c]
+
breve :: Char -> String
breve 'A' = "Ă"
breve 'a' = "ă"
@@ -870,368 +1022,1181 @@ breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
-tok :: LP Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+mathDisplay :: String -> Inlines
+mathDisplay = displayMath . trim
-opt :: LP Inlines
-opt = bracketed inline
+mathInline :: String -> Inlines
+mathInline = math . trim
-rawopt :: LP String
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ contents <- trim . toksToString <$>
+ many (notFollowedBy (symbol '$') >> anyTok)
+ if display
+ then
+ mathDisplay contents <$ try (symbol '$' >> symbol '$')
+ <|> (guard (null contents) >> return (mathInline ""))
+ else mathInline contents <$ symbol '$'
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: PandocMonad m => LP m [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> manyTill citationLabel egroup
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+citationLabel :: PandocMonad m => LP m String
+citationLabel = do
+ optional spaces
+ toksToString <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* optional spaces
+ <* optional (symbol ',')
+ <* optional spaces)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char]
+
+cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+
+citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
+complexNatbibCitation mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` (symbol ';')
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
+inlineCommand' :: PandocMonad m => LP m Inlines
+inlineCommand' = try $ do
+ Tok _ (CtrlSeq name) cmd <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name] -- check non-starred as fallback
+ let raw = do
+ guard $ isInlineCommand name || not (isBlockCommand name)
+ rawcommand <- getRawCommand name (cmd <> star)
+ (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand))
+ <|> ignore rawcommand
+ lookupListDefault raw names inlineCommands
+
+tok :: PandocMonad m => LP m Inlines
+tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
+ where singleChar' = do
+ Tok _ _ t <- singleChar
+ return (str (T.unpack t))
+
+singleChar :: PandocMonad m => LP m Tok
+singleChar = try $ do
+ Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
+ return $ Tok pos toktype t1
+ else return $ Tok pos toktype t
+
+opt :: PandocMonad m => LP m Inlines
+opt = bracketed inline <|> (str . T.unpack <$> rawopt)
+
+rawopt :: PandocMonad m => LP m Text
rawopt = do
- contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
- try (string "\\[") <|> rawopt)
+ inner <- untokenize <$> bracketedToks
optional sp
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> inner <> "]"
-skipopts :: LP ()
+skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
-rawangle :: LP ()
+rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
- char '<'
- skipMany (noneOf ">")
- char '>'
- return ()
+ symbol '<'
+ () <$ manyTill anyTok (symbol '>')
-skipangles :: LP ()
+skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
-inlineText :: LP Inlines
-inlineText = str <$> many1 inlineChar
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
-inlineChar :: LP Char
-inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
-environment :: LP Blocks
-environment = do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name environments
- <|> rawEnv name
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
-inlineEnvironment :: LP Inlines
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
+ "\\end{" ++ T.unpack y ++ "}"
+
+mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ T.unpack $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
controlSeq "begin"
- name <- braced
+ name <- untokenize <$> braced
M.findWithDefault mzero name inlineEnvironments
-rawEnv :: String -> LP Blocks
-rawEnv name = do
- parseRaw <- getOption readerParseRaw
- rawOptions <- mconcat <$> many rawopt
- let addBegin x = "\\begin{" ++ name ++ "}" ++ rawOptions ++ x
- if parseRaw
- then (rawBlock "latex" . addBegin) <$>
- (withRaw (env name blocks) >>= applyMacros' . snd)
- else env name blocks
-
-----
-
-type IncludeParser = ParserT String [String] IO String
-
--- | Replace "include" commands with file contents.
-handleIncludes :: String -> IO (Either PandocError String)
-handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s
-
-includeParser' :: IncludeParser
-includeParser' =
- concat <$> many (comment' <|> escaped' <|> blob' <|> include'
- <|> startMarker' <|> endMarker'
- <|> verbCmd' <|> verbatimEnv' <|> backslash')
-
-comment' :: IncludeParser
-comment' = do
- char '%'
- xs <- manyTill anyChar newline
- return ('%':xs ++ "\n")
-
-escaped' :: IncludeParser
-escaped' = try $ string "\\%" <|> string "\\\\"
-
-verbCmd' :: IncludeParser
-verbCmd' = fmap snd <$>
- withRaw $ try $ do
- string "\\verb"
- c <- anyChar
- manyTill anyChar (char c)
-
-verbatimEnv' :: IncludeParser
-verbatimEnv' = fmap snd <$>
- withRaw $ try $ do
- string "\\begin"
- name <- braced'
- guard $ name `elem` ["verbatim", "Verbatim", "BVerbatim",
- "lstlisting", "minted", "alltt", "comment"]
- manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
-
-blob' :: IncludeParser
-blob' = try $ many1 (noneOf "\\%")
-
-backslash' :: IncludeParser
-backslash' = string "\\"
-
-braced' :: IncludeParser
-braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ ]
-maybeAddExtension :: String -> FilePath -> FilePath
-maybeAddExtension ext fp =
- if null (takeExtension fp)
- then addExtension fp ext
- else fp
+inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineCommands = M.union inlineLanguageCommands $ M.fromList
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+ , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+ , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+ , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+ , ("texttt", ttfamily)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+ , ("lq", return (str "‘"))
+ , ("rq", return (str "’"))
+ , ("textquoteleft", return (str "‘"))
+ , ("textquoteright", return (str "’"))
+ , ("textquotedblleft", return (str "“"))
+ , ("textquotedblright", return (str "”"))
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("sep", lit ",")
+ , ("label", rawInlineOr "label" dolabel)
+ , ("ref", rawInlineOr "ref" $ doref "ref")
+ , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty
+ , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty
+ , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty
+ , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok)
+ , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", accent '\779' hungarumlaut)
+ , ("`", accent '`' grave)
+ , ("'", accent '\'' acute)
+ , ("^", accent '^' circ)
+ , ("~", accent '~' tilde)
+ , ("\"", accent '\776' umlaut)
+ , (".", accent '\775' dot)
+ , ("=", accent '\772' macron)
+ , ("c", accent '\807' cedilla)
+ , ("v", accent 'ˇ' hacek)
+ , ("u", accent '\774' breve)
+ , ("k", accent '\808' ogonek)
+ , ("textogonekcentered", accent '\808' ogonek)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional opt
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", skipopts >> note <$> grouped block)
+ , ("footnote", skipopts >> note <$> grouped block)
+ , ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("Verb", doverb)
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ pure (link url "" (str url)))
+ , ("href", (unescapeURL . toksToString <$>
+ braced <* optional sp) >>= \url ->
+ tok >>= \lab -> pure (link url "" lab))
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL . T.unpack .
+ removeDoubleQuotes . untokenize <$> braced
+ mkImage options src)
+ , ("enquote", enquote)
+ , ("figurename", doTerm Translations.Figure)
+ , ("prefacename", doTerm Translations.Preface)
+ , ("refname", doTerm Translations.References)
+ , ("bibname", doTerm Translations.Bibliography)
+ , ("chaptername", doTerm Translations.Chapter)
+ , ("partname", doTerm Translations.Part)
+ , ("contentsname", doTerm Translations.Contents)
+ , ("listfigurename", doTerm Translations.ListOfFigures)
+ , ("listtablename", doTerm Translations.ListOfTables)
+ , ("indexname", doTerm Translations.Index)
+ , ("abstractname", doTerm Translations.Abstract)
+ , ("tablename", doTerm Translations.Table)
+ , ("enclname", doTerm Translations.Encl)
+ , ("ccname", doTerm Translations.Cc)
+ , ("headtoname", doTerm Translations.To)
+ , ("pagename", doTerm Translations.Page)
+ , ("seename", doTerm Translations.See)
+ , ("seealsoname", doTerm Translations.SeeAlso)
+ , ("proofname", doTerm Translations.Proof)
+ , ("glossaryname", doTerm Translations.Glossary)
+ , ("lstlistingname", doTerm Translations.Listing)
+ , ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ , ("hyperlink", hyperlink)
+ , ("hypertarget", hypertargetInline)
+ -- glossaries package
+ , ("gls", doAcronym "short")
+ , ("Gls", doAcronym "short")
+ , ("glsdesc", doAcronym "long")
+ , ("Glsdesc", doAcronym "long")
+ , ("GLSdesc", doAcronym "long")
+ , ("acrlong", doAcronym "long")
+ , ("Acrlong", doAcronym "long")
+ , ("acrfull", doAcronym "full")
+ , ("Acrfull", doAcronym "full")
+ , ("acrshort", doAcronym "abbrv")
+ , ("Acrshort", doAcronym "abbrv")
+ , ("glspl", doAcronymPlural "short")
+ , ("Glspl", doAcronymPlural "short")
+ , ("glsdescplural", doAcronymPlural "long")
+ , ("Glsdescplural", doAcronymPlural "long")
+ , ("GLSdescplural", doAcronymPlural "long")
+ -- acronyms package
+ , ("ac", doAcronym "short")
+ , ("acf", doAcronym "full")
+ , ("acs", doAcronym "abbrv")
+ , ("acp", doAcronymPlural "short")
+ , ("acfp", doAcronymPlural "full")
+ , ("acsp", doAcronymPlural "abbrv")
+ -- siuntix
+ , ("SI", dosiunitx)
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ , ("nohyphens", tok)
+ , ("textnhtt", ttfamily)
+ , ("nhttfamily", ttfamily)
+ -- LaTeX colors
+ , ("textcolor", coloredInline "color")
+ , ("colorbox", coloredInline "background-color")
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ -- xspace
+ , ("xspace", doxspace)
+ -- etoolbox
+ , ("ifstrequal", ifstrequal)
+ , ("newtoggle", braced >>= newToggle)
+ , ("toggletrue", braced >>= setToggle True)
+ , ("togglefalse", braced >>= setToggle False)
+ , ("iftoggle", try $ ifToggle >> inline)
+ -- biblatex misc
+ , ("RN", romanNumeralUpper)
+ , ("Rn", romanNumeralLower)
+ -- babel
+ , ("foreignlanguage", foreignlanguage)
+ ]
-include' :: IncludeParser
-include' = do
- fs' <- try $ do
- char '\\'
- name <- try (string "include")
- <|> try (string "input")
- <|> string "usepackage"
- -- skip options
- skipMany $ try $ char '[' *> manyTill anyChar (char ']')
- fs <- (map trim . splitBy (==',')) <$> braced'
- return $ if name == "usepackage"
- then map (maybeAddExtension ".sty") fs
- else map (maybeAddExtension ".tex") fs
- pos <- getPosition
- containers <- getState
- let fn = case containers of
- (f':_) -> f'
- [] -> "input"
- -- now process each include file in order...
- rest <- getInput
- results' <- forM fs' (\f -> do
- when (f `elem` containers) $
- fail "Include file loop!"
- contents <- lift $ readTeXFile f
- return $ "\\PandocStartInclude{" ++ f ++ "}" ++
- contents ++ "\\PandocEndInclude{" ++
- fn ++ "}{" ++ show (sourceLine pos) ++ "}{"
- ++ show (sourceColumn pos) ++ "}")
- setInput $ concat results' ++ rest
- return ""
-
-startMarker' :: IncludeParser
-startMarker' = try $ do
- string "\\PandocStartInclude"
- fn <- braced'
- updateState (fn:)
- setPosition $ newPos fn 1 1
- return $ "\\PandocStartInclude{" ++ fn ++ "}"
-
-endMarker' :: IncludeParser
-endMarker' = try $ do
- string "\\PandocEndInclude"
- fn <- braced'
- ln <- braced'
- co <- braced'
- updateState tail
- setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
- return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++
- co ++ "}"
-
-readTeXFile :: FilePath -> IO String
-readTeXFile f = do
- texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) ->
- return "."
- let ds = splitBy (==':') texinputs
- readFileFromDirs ds f
-
-readFileFromDirs :: [FilePath] -> FilePath -> IO String
-readFileFromDirs [] _ = return ""
-readFileFromDirs (d:ds) f =
- E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) ->
- readFileFromDirs ds f
-
-----
-
-keyval :: LP (String, String)
-keyval = try $ do
- key <- many1 alphaNum
- val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
- skipMany spaceChar
- optional (char ',')
- skipMany spaceChar
- return (key, val)
+foreignlanguage :: PandocMonad m => LP m Inlines
+foreignlanguage = do
+ babelLang <- T.unpack . untokenize <$> braced
+ case babelLangToBCP47 babelLang of
+ Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
+ _ -> tok
+inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
+ where
+ mk (polyglossia, bcp47Func) =
+ ("text" <> T.pack polyglossia, inlineLanguage bcp47Func)
+
+inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines
+inlineLanguage bcp47Func = do
+ o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ <$> rawopt
+ let lang = renderLang $ bcp47Func o
+ extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+hyperlink :: PandocMonad m => LP m Inlines
+hyperlink = try $ do
+ src <- toksToString <$> braced
+ lab <- tok
+ return $ link ('#':src) "" lab
+
+hypertargetBlock :: PandocMonad m => LP m Blocks
+hypertargetBlock = try $ do
+ ref <- toksToString <$> braced
+ bs <- grouped block
+ case toList bs of
+ [Header 1 (ident,_,_) _] | ident == ref -> return bs
+ _ -> return $ divWith (ref, [], []) bs
+
+hypertargetInline :: PandocMonad m => LP m Inlines
+hypertargetInline = try $ do
+ ref <- toksToString <$> braced
+ ils <- grouped inline
+ return $ spanWith (ref, [], []) ils
-keyvals :: LP [(String, String)]
-keyvals = try $ char '[' *> manyTill keyval (char ']')
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+ str . toRomanNumeral <$> romanNumeralArg
-alltt :: String -> LP Blocks
-alltt t = walk strToCode <$> parseFromString blocks
- (substitute " " "\\ " $ substitute "%" "\\%" $
- intercalate "\\\\\n" $ lines t)
- where strToCode (Str s) = Code nullAttr s
- strToCode x = x
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+ str . map toLower . toRomanNumeral <$> romanNumeralArg
-rawLaTeXBlock :: LP String
-rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+ where
+ inBraces = do
+ symbol '{'
+ spaces
+ res <- parser
+ spaces
+ symbol '}'
+ return res
+ parser = do
+ Tok _ Word s <- satisfyTok isWordTok
+ let (digits, rest) = T.span isDigit s
+ unless (T.null rest) $
+ fail "Non-digits in argument to \\Rn or \\RN"
+ safeRead $ T.unpack digits
+
+newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
+newToggle name = do
+ updateState $ \st ->
+ st{ sToggles = M.insert (toksToString name) False (sToggles st) }
+ return mempty
-rawLaTeXInline :: LP Inline
-rawLaTeXInline = do
- raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
- RawInline "latex" <$> applyMacros' raw
+setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a
+setToggle on name = do
+ updateState $ \st ->
+ st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) }
+ return mempty
-addImageCaption :: Blocks -> LP Blocks
-addImageCaption = walkM go
- where go (Image attr alt (src,tit)) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:")
- Nothing -> Image attr alt (src,tit)
- go x = return x
+ifToggle :: PandocMonad m => LP m ()
+ifToggle = do
+ name <- braced
+ spaces
+ yes <- braced
+ spaces
+ no <- braced
+ toggles <- sToggles <$> getState
+ inp <- getInput
+ let name' = toksToString name
+ case M.lookup name' toggles of
+ Just True -> setInput (yes ++ inp)
+ Just False -> setInput (no ++ inp)
+ Nothing -> do
+ pos <- getPosition
+ report $ UndefinedToggle name' pos
+ return ()
-addTableCaption :: Blocks -> LP Blocks
-addTableCaption = walkM go
- where go (Table c als ws hs rs) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Table (toList ils) als ws hs rs
- Nothing -> Table c als ws hs rs
- go x = return x
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+ifstrequal :: (PandocMonad m, Monoid a) => LP m a
+ifstrequal = do
+ str1 <- tok
+ str2 <- tok
+ ifequal <- braced
+ ifnotequal <- braced
+ if str1 == str2
+ then getInput >>= setInput . (ifequal ++)
+ else getInput >>= setInput . (ifnotequal ++)
+ return mempty
-environments :: M.Map String (LP Blocks)
-environments = M.fromList
- [ ("document", env "document" blocks <* skipMany anyChar)
- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
- , ("letter", env "letter" letterContents)
- , ("minipage", env "minipage" $
- skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- , ("figure", env "figure" $
- resetCaption *> skipopts *> blocks >>= addImageCaption)
- , ("center", env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable False >>= addTableCaption)
- , ("table", env "table" $
- resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular" $ simpTable True)
- , ("tabular", env "tabular" $ simpTable False)
- , ("quote", blockQuote <$> env "quote" blocks)
- , ("quotation", blockQuote <$> env "quotation" blocks)
- , ("verse", blockQuote <$> env "verse" blocks)
- , ("itemize", bulletList <$> listenv "itemize" (many item))
- , ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", orderedList')
- , ("alltt", alltt =<< verbEnv "alltt")
- , ("code", guardEnabled Ext_literate_haskell *>
- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- verbEnv "code"))
- , ("comment", mempty <$ verbEnv "comment")
- , ("verbatim", codeBlock <$> verbEnv "verbatim")
- , ("Verbatim", fancyverbEnv "Verbatim")
- , ("BVerbatim", fancyverbEnv "BVerbatim")
- , ("lstlisting", do options <- option [] keyvals
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- ++ maybeToList (lookup "language" options
- >>= fromListingsLanguage)
- let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
- codeBlockWith attr <$> verbEnv "lstlisting")
- , ("minted", do options <- option [] keyvals
- lang <- grouped (many1 $ satisfy (/='}'))
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
- [ "numberLines" |
- lookup "linenos" options == Just "true" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv "minted")
- , ("obeylines", parseFromString
- (para . trimInlines . mconcat <$> many inline) =<<
- intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnv para Nothing "displaymath")
- , ("equation", mathEnv para Nothing "equation")
- , ("equation*", mathEnv para Nothing "equation*")
- , ("gather", mathEnv para (Just "gathered") "gather")
- , ("gather*", mathEnv para (Just "gathered") "gather*")
- , ("multline", mathEnv para (Just "gathered") "multline")
- , ("multline*", mathEnv para (Just "gathered") "multline*")
- , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
- , ("align", mathEnv para (Just "aligned") "align")
- , ("align*", mathEnv para (Just "aligned") "align*")
- , ("alignat", mathEnv para (Just "aligned") "alignat")
- , ("alignat*", mathEnv para (Just "aligned") "alignat*")
+coloredInline :: PandocMonad m => String -> LP m Inlines
+coloredInline stylename = do
+ skipopts
+ color <- braced
+ spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok
+
+ttfamily :: PandocMonad m => LP m Inlines
+ttfamily = (code . stringify . toList) <$> tok
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+ else fallback
+
+getRawCommand :: PandocMonad m => Text -> Text -> LP m String
+getRawCommand name txt = do
+ (_, rawargs) <- withRaw $
+ case name of
+ "write" -> do
+ void $ satisfyTok isWordTok -- digits
+ void braced
+ "titleformat" -> do
+ void braced
+ skipopts
+ void $ count 4 braced
+ "def" ->
+ void $ manyTill anyTok braced
+ _ -> do
+ skipangles
+ skipopts
+ option "" (try (optional sp *> dimenarg))
+ void $ many braced
+ return $ T.unpack (txt <> untokenize rawargs)
+
+isBlockCommand :: Text -> Bool
+isBlockCommand s =
+ s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks))
+ || s `Set.member` treatAsBlock
+
+treatAsBlock :: Set.Set Text
+treatAsBlock = Set.fromList
+ [ "let", "def", "DeclareRobustCommand"
+ , "newcommand", "renewcommand"
+ , "newenvironment", "renewenvironment"
+ , "providecommand", "provideenvironment"
+ -- newcommand, etc. should be parsed by macroDef, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "markboth", "markright", "markleft"
+ , "hspace", "vspace"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ , "titleformat"
+ ]
+
+isInlineCommand :: Text -> Bool
+isInlineCommand s =
+ s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines))
+ || s `Set.member` treatAsInline
+
+treatAsInline :: Set.Set Text
+treatAsInline = Set.fromList
+ [ "index"
+ , "hspace"
+ , "vspace"
+ , "noindent"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
]
-letterContents :: LP Blocks
-letterContents = do
- bs <- blocks
- st <- getState
- -- add signature (author) and address (title)
- let addr = case lookupMeta "address" (stateMeta st) of
- Just (MetaBlocks [Plain xs]) ->
- para $ trimInlines $ fromList xs
- _ -> mempty
- return $ addr <> bs -- sig added by \closing
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+ v <- braced
+ let refstr = toksToString v
+ return $ spanWith (refstr,[],[("label", refstr)])
+ $ inBrackets $ str $ toksToString v
+
+doref :: PandocMonad m => String -> LP m Inlines
+doref cls = do
+ v <- braced
+ let refstr = toksToString v
+ return $ linkWith ("",[],[ ("reference-type", cls)
+ , ("reference", refstr)])
+ ('#':refstr)
+ ""
+ (inBrackets $ str refstr)
+
+lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where lookupList l m = msum $ map (`M.lookup` m) l
+
+inline :: PandocMonad m => LP m Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ whitespace)
+ <|> (softbreak <$ endline)
+ <|> word
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ <|> (symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-')))
+ <|> doubleQuote
+ <|> singleQuote
+ <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
+ <|> (str "”" <$ symbol '”')
+ <|> (str "’" <$ symbol '\'')
+ <|> (str "’" <$ symbol '’')
+ <|> (str "\160" <$ symbol '~')
+ <|> dollarsMath
+ <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
+ <|> (str . (:[]) <$> primEscape)
+ <|> regularSymbol
+ <|> (do res <- symbolIn "#^'`\"[]&"
+ pos <- getPosition
+ let s = T.unpack (untoken res)
+ report $ ParsingUnescaped s pos
+ return $ str s)
+
+inlines :: PandocMonad m => LP m Inlines
+inlines = mconcat <$> many inline
+
+-- block elements:
+
+begin_ :: PandocMonad m => Text -> LP m ()
+begin_ t = try (do
+ controlSeq "begin"
+ spaces
+ txt <- untokenize <$> braced
+ guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}")
+
+end_ :: PandocMonad m => Text -> LP m ()
+end_ t = try (do
+ controlSeq "end"
+ spaces
+ txt <- untokenize <$> braced
+ guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}")
+
+preamble :: PandocMonad m => LP m Blocks
+preamble = mempty <$ many preambleBlock
+ where preambleBlock = spaces1
+ <|> void include
+ <|> void macroDef
+ <|> void blockCommand
+ <|> void braced
+ <|> (notFollowedBy (begin_ "document") >> void anyTok)
+
+paragraph :: PandocMonad m => LP m Blocks
+paragraph = do
+ x <- trimInlines . mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para x
+
+include :: PandocMonad m => LP m Blocks
+include = do
+ (Tok _ (CtrlSeq name) _) <-
+ controlSeq "include" <|> controlSeq "input" <|>
+ controlSeq "subfile" <|> controlSeq "usepackage"
+ skipMany opt
+ fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," .
+ untokenize) <$> braced
+ let fs' = if name == "usepackage"
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mapM_ (insertIncluded dirs) fs'
+ return mempty
+
+insertIncluded :: PandocMonad m
+ => [FilePath]
+ -> FilePath
+ -> LP m ()
+insertIncluded dirs f = do
+ pos <- getPosition
+ containers <- getIncludeFiles <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show pos
+ updateState $ addIncludeFile f
+ mbcontents <- readFileFromDirs dirs f
+ contents <- case mbcontents of
+ Just s -> return s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f pos
+ return ""
+ getInput >>= setInput . (tokenize f (T.pack contents) ++)
+ updateState dropLatestIncludeFile
+
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
+
+authors :: PandocMonad m => LP m ()
+authors = try $ do
+ bgroup
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >>
+ (inline <|> mempty <$ blockCommand))
+ -- skip e.g. \vspace{10pt}
+ auths <- sepBy oneAuthor (controlSeq "and")
+ egroup
+ addMeta "author" (map trimInlines auths)
+
+macroDef :: PandocMonad m => LP m Blocks
+macroDef =
+ mempty <$ ((commandDef <|> environmentDef) <* doMacros 0)
+ where commandDef = do
+ (name, macro') <- newcommand <|> letmacro <|> defmacro
+ guardDisabled Ext_latex_macros <|>
+ updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+ environmentDef = do
+ (name, macro1, macro2) <- newenvironment
+ guardDisabled Ext_latex_macros <|>
+ do updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro = do
+ controlSeq "let"
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ optional $ symbol '='
+ spaces
+ contents <- bracedOrToken
+ return (name, Macro ExpandWhenDefined 0 Nothing contents)
+
+defmacro :: PandocMonad m => LP m (Text, Macro)
+defmacro = try $ do
+ controlSeq "def"
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ numargs <- option 0 $ argSeq 1
+ -- we use withVerbatimMode, because macros are to be expanded
+ -- at point of use, not point of definition
+ contents <- withVerbatimMode bracedOrToken
+ return (name, Macro ExpandWhenUsed numargs Nothing contents)
+
+-- Note: we don't yet support fancy things like #1.#2
+argSeq :: PandocMonad m => Int -> LP m Int
+argSeq n = do
+ Tok _ (Arg i) _ <- satisfyTok isArgTok
+ guard $ i == n
+ argSeq (n+1) <|> return n
+
+isArgTok :: Tok -> Bool
+isArgTok (Tok _ (Arg _) _) = True
+isArgTok _ = False
+
+bracedOrToken :: PandocMonad m => LP m [Tok]
+bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand" <|>
+ controlSeq "DeclareRobustCommand"
+ optional $ symbol '*'
+ Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents <- withVerbatimMode bracedOrToken
+ when (mtype == "newcommand") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
+ Nothing -> return ()
+ return (name, Macro ExpandWhenUsed numargs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ optional $ symbol '*'
+ spaces
+ name <- untokenize <$> braced
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ startcontents <- withVerbatimMode bracedOrToken
+ spaces
+ endcontents <- withVerbatimMode bracedOrToken
+ when (mtype == "newenvironment") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
+ Nothing -> return ()
+ return (name, Macro ExpandWhenUsed numargs optarg startcontents,
+ Macro ExpandWhenUsed 0 Nothing endcontents)
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead (T.unpack ds) of
+ Just i -> return i
+ _ -> return 0
+
+setCaption :: PandocMonad m => LP m Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ sCaption = Just ils' }
+ return mempty
-closing :: LP Blocks
+looseItem :: PandocMonad m => LP m Blocks
+looseItem = do
+ inListItem <- sInListItem <$> getState
+ guard $ not inListItem
+ skipopts
+ return mempty
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+
+section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks
+section starred (ident, classes, kvs) lvl = do
+ skipopts
+ contents <- grouped inline
+ lab <- option ident $
+ try (spaces >> controlSeq "label"
+ >> spaces >> toksToString <$> braced)
+ let classes' = if starred then "unnumbered" : classes else classes
+ unless starred $ do
+ hn <- sLastHeaderNum <$> getState
+ let num = incrementHeaderNum lvl hn
+ updateState $ \st -> st{ sLastHeaderNum = num }
+ updateState $ \st -> st{ sLabels = M.insert lab
+ [Str (renderHeaderNum num)]
+ (sLabels st) }
+ attr' <- registerHeader (lab, classes', kvs) contents
+ return $ headerWith attr' lvl contents
+
+blockCommand :: PandocMonad m => LP m Blocks
+blockCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name]
+ let rawDefiniteBlock = do
+ guard $ isBlockCommand name
+ rawBlock "latex" <$> getRawCommand name (txt <> star)
+ -- heuristic: if it could be either block or inline, we
+ -- treat it if block if we have a sequence of block
+ -- commands followed by a newline. But we stop if we
+ -- hit a \startXXX, since this might start a raw ConTeXt
+ -- environment (this is important because this parser is
+ -- used by the Markdown reader).
+ let startCommand = try $ do
+ Tok _ (CtrlSeq n) _ <- anyControlSeq
+ guard $ "start" `T.isPrefixOf` n
+ let rawMaybeBlock = try $ do
+ guard $ not $ isInlineCommand name
+ curr <- rawBlock "latex" <$> getRawCommand name (txt <> star)
+ rest <- many $ notFollowedBy startCommand *> blockCommand
+ lookAhead $ blankline <|> startCommand
+ return $ curr <> mconcat rest
+ let raw = rawDefiniteBlock <|> rawMaybeBlock
+ lookupListDefault raw names blockCommands
+
+closing :: PandocMonad m => LP m Blocks
closing = do
contents <- tok
st <- getState
let extractInlines (MetaBlocks [Plain ys]) = ys
extractInlines (MetaBlocks [Para ys ]) = ys
- extractInlines _ = []
- let sigs = case lookupMeta "author" (stateMeta st) of
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (sMeta st) of
Just (MetaList xs) ->
para $ trimInlines $ fromList $
intercalate [LineBreak] $ map extractInlines xs
_ -> mempty
return $ para (trimInlines contents) <> sigs
-item :: LP Blocks
-item = blocks *> controlSeq "item" *> skipopts *> blocks
-
-looseItem :: LP Blocks
-looseItem = do
- ctx <- stateParserContext `fmap` getState
- if ctx == ListItemState
- then mzero
- else return mempty
-
-descItem :: LP (Inlines, [Blocks])
-descItem = do
- blocks -- skip blocks before item
- controlSeq "item"
- optional sp
- ils <- opt
- bs <- blocks
- return (ils, [bs])
-
-env :: String -> LP a -> LP a
-env name p = p <*
- (try (controlSeq "end" *> braced >>= guard . (== name))
- <?> ("\\end{" ++ name ++ "}"))
+blockCommands :: PandocMonad m => M.Map Text (LP m Blocks)
+blockCommands = M.fromList
+ [ ("par", mempty <$ skipopts)
+ , ("parbox", skipopts >> braced >> grouped blocks)
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
+ -- Koma-script metadata commands
+ , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
+ -- sectioning
+ , ("part", section False nullAttr (-1))
+ , ("part*", section True nullAttr (-1))
+ , ("chapter", section False nullAttr 0)
+ , ("chapter*", section True ("",["unnumbered"],[]) 0)
+ , ("section", section False nullAttr 1)
+ , ("section*", section True ("",["unnumbered"],[]) 1)
+ , ("subsection", section False nullAttr 2)
+ , ("subsection*", section True ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section False nullAttr 3)
+ , ("subsubsection*", section True ("",["unnumbered"],[]) 3)
+ , ("paragraph", section False nullAttr 4)
+ , ("paragraph*", section True ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section False nullAttr 5)
+ , ("subparagraph*", section True ("",["unnumbered"],[]) 5)
+ -- beamer slides
+ , ("frametitle", section False nullAttr 3)
+ , ("framesubtitle", section False nullAttr 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ -- memoir
+ , ("plainbreak", braced >> pure horizontalRule)
+ , ("plainbreak*", braced >> pure horizontalRule)
+ , ("fancybreak", braced >> pure horizontalRule)
+ , ("fancybreak*", braced >> pure horizontalRule)
+ , ("plainfancybreak", braced >> braced >> braced >> pure horizontalRule)
+ , ("plainfancybreak*", braced >> braced >> braced >> pure horizontalRule)
+ , ("pfbreak", pure horizontalRule)
+ , ("pfbreak*", pure horizontalRule)
+ --
+ , ("hrule", pure horizontalRule)
+ , ("strut", pure mempty)
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("item", looseItem)
+ , ("documentclass", skipopts *> braced *> preamble)
+ , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+ , ("caption", skipopts *> setCaption)
+ , ("bibliography", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ , ("addbibresource", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ -- includes
+ , ("lstinputlisting", inputListing)
+ , ("graphicspath", graphicsPath)
+ -- polyglossia
+ , ("setdefaultlanguage", setDefaultLanguage)
+ , ("setmainlanguage", setDefaultLanguage)
+ -- hyperlink
+ , ("hypertarget", hypertargetBlock)
+ -- LaTeX colors
+ , ("textcolor", coloredBlock "color")
+ , ("colorbox", coloredBlock "background-color")
+ ]
+
+
+environments :: PandocMonad m => M.Map Text (LP m Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
+ , ("minipage", env "minipage" $
+ skipopts *> spaces *> optional braced *> spaces *> blocks)
+ , ("figure", env "figure" $ skipopts *> figure)
+ , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
+ , ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> simpTable "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable "tabular" False)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", orderedList')
+ , ("alltt", alltt <$> env "alltt" blocks)
+ , ("code", guardEnabled Ext_literate_haskell *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
+ , ("Verbatim", fancyverbEnv "Verbatim")
+ , ("BVerbatim", fancyverbEnv "BVerbatim")
+ , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
+ codeBlockWith attr <$> verbEnv "lstlisting")
+ , ("minted", minted)
+ , ("obeylines", obeylines)
+ , ("displaymath", mathEnvWith para Nothing "displaymath")
+ , ("equation", mathEnvWith para Nothing "equation")
+ , ("equation*", mathEnvWith para Nothing "equation*")
+ , ("gather", mathEnvWith para (Just "gathered") "gather")
+ , ("gather*", mathEnvWith para (Just "gathered") "gather*")
+ , ("multline", mathEnvWith para (Just "gathered") "multline")
+ , ("multline*", mathEnvWith para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith para (Just "aligned") "align")
+ , ("align*", mathEnvWith para (Just "aligned") "align*")
+ , ("alignat", mathEnvWith para (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
+ , ("tikzpicture", rawVerbEnv "tikzpicture")
+ -- etoolbox
+ , ("ifstrequal", ifstrequal)
+ , ("newtoggle", braced >>= newToggle)
+ , ("toggletrue", braced >>= setToggle True)
+ , ("togglefalse", braced >>= setToggle False)
+ , ("iftoggle", try $ ifToggle >> block)
+ ]
+
+environment :: PandocMonad m => LP m Blocks
+environment = do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name environments
+ <|> rawEnv name
-listenv :: String -> LP a -> LP a
-listenv name p = try $ do
- oldCtx <- stateParserContext `fmap` getState
- updateState $ \st -> st{ stateParserContext = ListItemState }
- res <- env name p
- updateState $ \st -> st{ stateParserContext = oldCtx }
- return res
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
-mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a
-mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
- "\\end{" ++ y ++ "}"
+rawEnv :: PandocMonad m => Text -> LP m Blocks
+rawEnv name = do
+ exts <- getOption readerExtensions
+ let parseRaw = extensionEnabled Ext_raw_tex exts
+ rawOptions <- mconcat <$> many rawopt
+ let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
+ pos1 <- getPosition
+ (bs, raw) <- withRaw $ env name blocks
+ if parseRaw
+ then return $ rawBlock "latex"
+ $ T.unpack $ beginCommand <> untokenize raw
+ else do
+ report $ SkippedContent (T.unpack beginCommand) pos1
+ pos2 <- getPosition
+ report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ return bs
+
+rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
+rawVerbEnv name = do
+ pos <- getPosition
+ (_, raw) <- withRaw $ verbEnv name
+ let raw' = "\\begin{tikzpicture}" ++ toksToString raw
+ exts <- getOption readerExtensions
+ let parseRaw = extensionEnabled Ext_raw_tex exts
+ if parseRaw
+ then return $ rawBlock "latex" raw'
+ else do
+ report $ SkippedContent raw' pos
+ return mempty
-verbEnv :: String -> LP String
-verbEnv name = do
+verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv name = withVerbatimMode $ do
skipopts
optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- res <- manyTill anyChar endEnv
- return $ stripTrailingNewlines res
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ toksToString res
-fancyverbEnv :: String -> LP Blocks
+fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
fancyverbEnv name = do
options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
@@ -1242,142 +2207,176 @@ fancyverbEnv name = do
let attr = ("",classes,kvs)
codeBlockWith attr <$> verbEnv name
-orderedList' :: LP Blocks
-orderedList' = do
- optional sp
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ char '[' *> anyOrderedListMarker <* char ']'
- spaces
- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
- spaces
- start <- option 1 $ try $ do controlSeq "setcounter"
- grouped (string "enum" *> many1 (oneOf "iv"))
- optional sp
- num <- grouped (many1 digit)
- spaces
- return (read num + 1 :: Int)
- bs <- listenv "enumerate" (many item)
- return $ orderedListWith (start, style, delim) bs
-
-paragraph :: LP Blocks
-paragraph = do
- x <- trimInlines . mconcat <$> many1 inline
- if x == mempty
- then return mempty
- else return $ para x
-
-preamble :: LP Blocks
-preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = void comment
- <|> void sp
- <|> void blanklines
- <|> void macro
- <|> void blockCommand
- <|> void anyControlSeq
- <|> void braced
- <|> void anyChar
-
--------
+obeylines :: PandocMonad m => LP m Blocks
+obeylines =
+ para . fromList . removeLeadingTrailingBreaks .
+ walk softBreakToHard . toList <$> env "obeylines" inlines
+ where softBreakToHard SoftBreak = LineBreak
+ softBreakToHard x = x
+ removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak .
+ reverse . dropWhile isLineBreak
+ isLineBreak LineBreak = True
+ isLineBreak _ = False
+
+minted :: PandocMonad m => LP m Blocks
+minted = do
+ options <- option [] keyvals
+ lang <- toksToString <$> braced
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ lang | not (null lang) ] ++
+ [ "numberLines" |
+ lookup "linenos" options == Just "true" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv "minted"
--- citations
+letterContents :: PandocMonad m => LP m Blocks
+letterContents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case lookupMeta "address" (sMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
+ return $ addr <> bs -- sig added by \closing
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
+figure :: PandocMonad m => LP m Blocks
+figure = try $ do
+ resetCaption
+ blocks >>= addImageCaption
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
+addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
+addImageCaption = walkM go
+ where go (Image attr alt (src,tit))
+ | not ("fig:" `isPrefixOf` tit) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
+ Nothing -> Image attr alt (src,tit)
+ go x = return x
-simpleCiteArgs :: LP [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- char '{'
- optional sp
- keys <- manyTill citationLabel (char '}')
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
+coloredBlock :: PandocMonad m => String -> LP m Blocks
+coloredBlock stylename = try $ do
+ skipopts
+ color <- braced
+ notFollowedBy (grouped inline)
+ let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)])
+ constructor <$> grouped block
+
+graphicsPath :: PandocMonad m => LP m Blocks
+graphicsPath = do
+ ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ getResourcePath >>= setResourcePath . (++ ps)
+ return mempty
-citationLabel :: LP String
-citationLabel = optional sp *>
- (many1 (satisfy isBibtexKeyChar)
- <* optional sp
- <* optional (char ',')
- <* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-cites :: CitationMode -> Bool -> LP [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then many1 simpleCiteArgs
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
+alltt :: Blocks -> Blocks
+alltt = walk strToCode
+ where strToCode (Str s) = Code nullAttr s
+ strToCode Space = RawInline (Format "latex") "\\ "
+ strToCode SoftBreak = LineBreak
+ strToCode x = x
-citation :: String -> CitationMode -> Bool -> LP Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions options =
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ ++ maybeToList (lookup "language" options
+ >>= fromListingsLanguage)
+ in (fromMaybe "" (lookup "label" options), classes, kvs)
-complexNatbibCitation :: CitationMode -> LP Inlines
-complexNatbibCitation mode = try $ do
- let ils = (toList . trimInlines . mconcat) <$>
- many (notFollowedBy (oneOf "\\};") >> inline)
- let parseOne = try $ do
- skipSpaces
- pref <- ils
- cit' <- inline -- expect a citation
- let citlist = toList cit'
- cits' <- case citlist of
- [Cite cs _] -> return cs
- _ -> mzero
- suff <- ils
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff cits'
- (c:cits, raw) <- withRaw $ grouped parseOne
- return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ raw)
+inputListing :: PandocMonad m => LP m Blocks
+inputListing = do
+ pos <- getPosition
+ options <- option [] keyvals
+ f <- filter (/='"') . toksToString <$> braced
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs f
+ codeLines <- case mbCode of
+ Just s -> return $ lines s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f pos
+ return []
+ let (ident,classes,kvs) = parseListingsOptions options
+ let language = case lookup "language" options >>= fromListingsLanguage of
+ Just l -> [l]
+ Nothing -> take 1 $ languagesByExtension (takeExtension f)
+ let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
+ let lastline = fromMaybe (length codeLines) $
+ lookup "lastline" options >>= safeRead
+ let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
+ drop (firstline - 1) codeLines
+ return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
+
+-- lists
+
+item :: PandocMonad m => LP m Blocks
+item = void blocks *> controlSeq "item" *> skipopts *> blocks
+
+descItem :: PandocMonad m => LP m (Inlines, [Blocks])
+descItem = do
+ blocks -- skip blocks before item
+ controlSeq "item"
+ optional sp
+ ils <- opt
+ bs <- blocks
+ return (ils, [bs])
--- tables
+listenv :: PandocMonad m => Text -> LP m a -> LP m a
+listenv name p = try $ do
+ oldInListItem <- sInListItem `fmap` getState
+ updateState $ \st -> st{ sInListItem = True }
+ res <- env name p
+ updateState $ \st -> st{ sInListItem = oldInListItem }
+ return res
-parseAligns :: LP [Alignment]
-parseAligns = try $ do
- char '{'
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
- maybeBar
- let cAlign = AlignCenter <$ char 'c'
- let lAlign = AlignLeft <$ char 'l'
- let rAlign = AlignRight <$ char 'r'
- let parAlign = AlignLeft <$ (char 'p' >> braced)
- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
- aligns' <- sepEndBy alignChar maybeBar
+orderedList' :: PandocMonad m => LP m Blocks
+orderedList' = try $ do
spaces
- char '}'
+ let markerSpec = do
+ symbol '['
+ ts <- toksToString <$> manyTill anyTok (symbol ']')
+ case runParser anyOrderedListMarker def "option" ts of
+ Right r -> return r
+ Left _ -> do
+ pos <- getPosition
+ report $ SkippedContent ("[" ++ ts ++ "]") pos
+ return (1, DefaultStyle, DefaultDelim)
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
- return aligns'
+ optional $ try $ controlSeq "setlength"
+ *> grouped (count 1 $ controlSeq "itemindent")
+ *> braced
+ spaces
+ start <- option 1 $ try $ do pos <- getPosition
+ controlSeq "setcounter"
+ ctr <- toksToString <$> braced
+ guard $ "enum" `isPrefixOf` ctr
+ guard $ all (`elem` ['i','v']) (drop 4 ctr)
+ optional sp
+ num <- toksToString <$> braced
+ case safeRead num of
+ Just i -> return (i + 1 :: Int)
+ Nothing -> do
+ report $ SkippedContent
+ ("\\setcounter{" ++ ctr ++
+ "}{" ++ num ++ "}") pos
+ return 1
+ bs <- listenv "enumerate" (many item)
+ return $ orderedListWith (start, style, delim) bs
+
+-- tables
-hline :: LP ()
+hline :: PandocMonad m => LP m ()
hline = try $ do
- spaces'
+ spaces
controlSeq "hline" <|>
-- booktabs rules:
controlSeq "toprule" <|>
@@ -1385,80 +2384,312 @@ hline = try $ do
controlSeq "midrule" <|>
controlSeq "endhead" <|>
controlSeq "endfirsthead"
- spaces'
- optional $ bracketed (many1 (satisfy (/=']')))
+ spaces
+ optional opt
return ()
-lbreak :: LP ()
-lbreak = () <$ try (spaces' *>
- (controlSeq "\\" <|> controlSeq "tabularnewline") <*
- spaces')
-
-amp :: LP ()
-amp = () <$ try (spaces' *> char '&' <* spaces')
-
-parseTableRow :: Int -- ^ number of columns
- -> LP [Blocks]
-parseTableRow cols = try $ do
- let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline
- let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
- env "minipage"
- (skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- let tableCell = minipage <|>
- ((plain . trimInlines . mconcat) <$> many tableCellInline)
- cells' <- sepBy1 tableCell amp
- let numcells = length cells'
- guard $ numcells <= cols && numcells >= 1
- guard $ cells' /= [mempty]
- -- note: a & b in a three-column table leaves an empty 3rd cell:
- let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces'
- return cells''
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
-spaces' :: LP ()
-spaces' = spaces *> skipMany (comment *> spaces)
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) ->
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ _ -> return ()
-simpTable :: Bool -> LP Blocks
-simpTable hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces' >> tok)
+parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
+parseAligns = try $ do
+ let maybeBar = skipMany $
+ sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced)
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
+ let colWidth = try $ do
+ symbol '{'
+ ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
+ spaces
+ symbol '}'
+ case safeRead ds of
+ Just w -> return w
+ Nothing -> return 0.0
+ let alignSpec = do
+ pref <- option [] alignPrefix
+ spaces
+ al <- alignChar
+ width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
+ pos <- getPosition
+ report $ SkippedContent s pos
+ return 0.0)
+ spaces
+ suff <- option [] alignSuffix
+ return (al, width, (pref, suff))
+ let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+ symbol '*'
+ spaces
+ ds <- trim . toksToString <$> braced
+ spaces
+ spec <- braced
+ case safeRead ds of
+ Just n ->
+ getInput >>= setInput . (mconcat (replicate n spec) ++)
+ Nothing -> fail $ "Could not parse " ++ ds ++ " as number"
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many $ try $ spaces >> optional starAlign >>
+ (alignSpec <* maybeBar)
+ spaces
+ egroup
+ spaces
+ return aligns'
+
+parseTableRow :: PandocMonad m
+ => Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
+ -> LP m [Blocks]
+parseTableRow envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
+ let cols = length prefsufs
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- many (notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ >> anyTok)
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+ rawcells <- mapM celltoks prefsufs
+ oldInput <- getInput
+ cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
+ setInput oldInput
+ spaces
+ let numcells = length cells
+ guard $ numcells <= cols && numcells >= 1
+ guard $ cells /= [mempty]
+ -- note: a & b in a three-column table leaves an empty 3rd cell:
+ return $ cells ++ replicate (cols - numcells) mempty
+
+parseTableCell :: PandocMonad m => LP m Blocks
+parseTableCell = do
+ let plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+ updateState $ \st -> st{ sInTableCell = True }
+ cells <- plainify <$> blocks
+ updateState $ \st -> st{ sInTableCell = False }
+ return cells
+
+simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
+simpTable envname hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ (spaces >> tok)
skipopts
- aligns <- parseAligns
- let cols = length aligns
+ colspecs <- parseAligns
+ let (aligns, widths, prefsufs) = unzip3 colspecs
+ let cols = length colspecs
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
skipMany hline
- spaces'
- header' <- option [] $ try (parseTableRow cols <* lbreak <* many1 hline)
- spaces'
- rows <- sepEndBy (parseTableRow cols) (lbreak <* optional (skipMany hline))
- spaces'
+ spaces
+ header' <- option [] $ try (parseTableRow envname prefsufs <*
+ lbreak <* many1 hline)
+ spaces
+ rows <- sepEndBy (parseTableRow envname prefsufs)
+ (lbreak <* optional (skipMany hline))
+ spaces
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
let header'' = if null header'
then replicate cols mempty
else header'
lookAhead $ controlSeq "end" -- make sure we're at end
- return $ table mempty (zip aligns (repeat 0)) header'' rows
+ return $ table mempty (zip aligns widths) header'' rows
-startInclude :: LP Blocks
-startInclude = do
- fn <- braced
- setPosition $ newPos fn 1 1
- return mempty
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table c als ws hs rs) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
+ go x = return x
-endInclude :: LP Blocks
-endInclude = do
- fn <- braced
- ln <- braced
- co <- braced
- setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
- return mempty
-removeDoubleQuotes :: String -> String
-removeDoubleQuotes ('"':xs) =
- case reverse xs of
- '"':ys -> reverse ys
- _ -> '"':xs
-removeDoubleQuotes xs = xs
+block :: PandocMonad m => LP m Blocks
+block = do
+ res <- (mempty <$ spaces1)
+ <|> environment
+ <|> include
+ <|> macroDef
+ <|> blockCommand
+ <|> paragraph
+ <|> grouped block
+ trace (take 60 $ show $ B.toList res)
+ return res
+
+blocks :: PandocMonad m => LP m Blocks
+blocks = mconcat <$> many block
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+ o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ <$> rawopt
+ polylang <- toksToString <$> braced
+ case M.lookup polylang polyglossiaLangToBCP47 of
+ Nothing -> return mempty -- TODO mzero? warning?
+ Just langFunc -> do
+ let l = langFunc o
+ setTranslations l
+ updateState $ setMeta "lang" $ str (renderLang l)
+ return mempty
+
+polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 = M.fromList
+ [ ("arabic", \o -> case filter (/=' ') o of
+ "locale=algeria" -> Lang "ar" "" "DZ" []
+ "locale=mashriq" -> Lang "ar" "" "SY" []
+ "locale=libya" -> Lang "ar" "" "LY" []
+ "locale=morocco" -> Lang "ar" "" "MA" []
+ "locale=mauritania" -> Lang "ar" "" "MR" []
+ "locale=tunisia" -> Lang "ar" "" "TN" []
+ _ -> Lang "ar" "" "" [])
+ , ("german", \o -> case filter (/=' ') o of
+ "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "variant=austrian,spelling=old"
+ -> Lang "de" "" "AT" ["1901"]
+ "variant=austrian" -> Lang "de" "" "AT" []
+ "variant=swiss,spelling=old"
+ -> Lang "de" "" "CH" ["1901"]
+ "variant=swiss" -> Lang "de" "" "CH" []
+ _ -> Lang "de" "" "" [])
+ , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ , ("greek", \o -> case filter (/=' ') o of
+ "variant=poly" -> Lang "el" "" "polyton" []
+ "variant=ancient" -> Lang "grc" "" "" []
+ _ -> Lang "el" "" "" [])
+ , ("english", \o -> case filter (/=' ') o of
+ "variant=australian" -> Lang "en" "" "AU" []
+ "variant=canadian" -> Lang "en" "" "CA" []
+ "variant=british" -> Lang "en" "" "GB" []
+ "variant=newzealand" -> Lang "en" "" "NZ" []
+ "variant=american" -> Lang "en" "" "US" []
+ _ -> Lang "en" "" "" [])
+ , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ , ("latin", \o -> case filter (/=' ') o of
+ "variant=classic" -> Lang "la" "" "" ["x-classic"]
+ _ -> Lang "la" "" "" [])
+ , ("slovenian", \_ -> Lang "sl" "" "" [])
+ , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
+ , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
+ , ("afrikaans", \_ -> Lang "af" "" "" [])
+ , ("amharic", \_ -> Lang "am" "" "" [])
+ , ("assamese", \_ -> Lang "as" "" "" [])
+ , ("asturian", \_ -> Lang "ast" "" "" [])
+ , ("bulgarian", \_ -> Lang "bg" "" "" [])
+ , ("bengali", \_ -> Lang "bn" "" "" [])
+ , ("tibetan", \_ -> Lang "bo" "" "" [])
+ , ("breton", \_ -> Lang "br" "" "" [])
+ , ("catalan", \_ -> Lang "ca" "" "" [])
+ , ("welsh", \_ -> Lang "cy" "" "" [])
+ , ("czech", \_ -> Lang "cs" "" "" [])
+ , ("coptic", \_ -> Lang "cop" "" "" [])
+ , ("danish", \_ -> Lang "da" "" "" [])
+ , ("divehi", \_ -> Lang "dv" "" "" [])
+ , ("esperanto", \_ -> Lang "eo" "" "" [])
+ , ("spanish", \_ -> Lang "es" "" "" [])
+ , ("estonian", \_ -> Lang "et" "" "" [])
+ , ("basque", \_ -> Lang "eu" "" "" [])
+ , ("farsi", \_ -> Lang "fa" "" "" [])
+ , ("finnish", \_ -> Lang "fi" "" "" [])
+ , ("french", \_ -> Lang "fr" "" "" [])
+ , ("friulan", \_ -> Lang "fur" "" "" [])
+ , ("irish", \_ -> Lang "ga" "" "" [])
+ , ("scottish", \_ -> Lang "gd" "" "" [])
+ , ("ethiopic", \_ -> Lang "gez" "" "" [])
+ , ("galician", \_ -> Lang "gl" "" "" [])
+ , ("hebrew", \_ -> Lang "he" "" "" [])
+ , ("hindi", \_ -> Lang "hi" "" "" [])
+ , ("croatian", \_ -> Lang "hr" "" "" [])
+ , ("magyar", \_ -> Lang "hu" "" "" [])
+ , ("armenian", \_ -> Lang "hy" "" "" [])
+ , ("interlingua", \_ -> Lang "ia" "" "" [])
+ , ("indonesian", \_ -> Lang "id" "" "" [])
+ , ("icelandic", \_ -> Lang "is" "" "" [])
+ , ("italian", \_ -> Lang "it" "" "" [])
+ , ("japanese", \_ -> Lang "jp" "" "" [])
+ , ("khmer", \_ -> Lang "km" "" "" [])
+ , ("kurmanji", \_ -> Lang "kmr" "" "" [])
+ , ("kannada", \_ -> Lang "kn" "" "" [])
+ , ("korean", \_ -> Lang "ko" "" "" [])
+ , ("lao", \_ -> Lang "lo" "" "" [])
+ , ("lithuanian", \_ -> Lang "lt" "" "" [])
+ , ("latvian", \_ -> Lang "lv" "" "" [])
+ , ("malayalam", \_ -> Lang "ml" "" "" [])
+ , ("mongolian", \_ -> Lang "mn" "" "" [])
+ , ("marathi", \_ -> Lang "mr" "" "" [])
+ , ("dutch", \_ -> Lang "nl" "" "" [])
+ , ("nynorsk", \_ -> Lang "nn" "" "" [])
+ , ("norsk", \_ -> Lang "no" "" "" [])
+ , ("nko", \_ -> Lang "nqo" "" "" [])
+ , ("occitan", \_ -> Lang "oc" "" "" [])
+ , ("panjabi", \_ -> Lang "pa" "" "" [])
+ , ("polish", \_ -> Lang "pl" "" "" [])
+ , ("piedmontese", \_ -> Lang "pms" "" "" [])
+ , ("portuguese", \_ -> Lang "pt" "" "" [])
+ , ("romansh", \_ -> Lang "rm" "" "" [])
+ , ("romanian", \_ -> Lang "ro" "" "" [])
+ , ("russian", \_ -> Lang "ru" "" "" [])
+ , ("sanskrit", \_ -> Lang "sa" "" "" [])
+ , ("samin", \_ -> Lang "se" "" "" [])
+ , ("slovak", \_ -> Lang "sk" "" "" [])
+ , ("albanian", \_ -> Lang "sq" "" "" [])
+ , ("serbian", \_ -> Lang "sr" "" "" [])
+ , ("swedish", \_ -> Lang "sv" "" "" [])
+ , ("syriac", \_ -> Lang "syr" "" "" [])
+ , ("tamil", \_ -> Lang "ta" "" "" [])
+ , ("telugu", \_ -> Lang "te" "" "" [])
+ , ("thai", \_ -> Lang "th" "" "" [])
+ , ("turkmen", \_ -> Lang "tk" "" "" [])
+ , ("turkish", \_ -> Lang "tr" "" "" [])
+ , ("ukrainian", \_ -> Lang "uk" "" "" [])
+ , ("urdu", \_ -> Lang "ur" "" "" [])
+ , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ ]
+
+babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 s =
+ case s of
+ "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
+ "naustrian" -> Just $ Lang "de" "" "AT" []
+ "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
+ "nswissgerman" -> Just $ Lang "de" "" "CH" []
+ "german" -> Just $ Lang "de" "" "DE" ["1901"]
+ "ngerman" -> Just $ Lang "de" "" "DE" []
+ "lowersorbian" -> Just $ Lang "dsb" "" "" []
+ "uppersorbian" -> Just $ Lang "hsb" "" "" []
+ "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
+ "slovene" -> Just $ Lang "sl" "" "" []
+ "australian" -> Just $ Lang "en" "" "AU" []
+ "canadian" -> Just $ Lang "en" "" "CA" []
+ "british" -> Just $ Lang "en" "" "GB" []
+ "newzealand" -> Just $ Lang "en" "" "NZ" []
+ "american" -> Just $ Lang "en" "" "US" []
+ "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
new file mode 100644
index 000000000..c9cbaa9b9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -0,0 +1,51 @@
+{-
+Copyright (C) 2017-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Types
+ Copyright : Copyright (C) 2017-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Types for LaTeX tokens and macros.
+-}
+module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
+ , TokType(..)
+ , Macro(..)
+ , ExpansionPoint(..)
+ , SourcePos
+ )
+where
+import Data.Text (Text)
+import Text.Parsec.Pos (SourcePos)
+
+data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
+ Esc1 | Esc2 | Arg Int
+ deriving (Eq, Ord, Show)
+
+data Tok = Tok SourcePos TokType Text
+ deriving (Eq, Ord, Show)
+
+data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
+ deriving (Eq, Ord, Show)
+
+data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok]
+ deriving Show
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index cd35a8738..14cf73de4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
+
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,63 +30,55 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown ( readMarkdown,
- readMarkdownWithWarnings ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-import Data.List ( transpose, sortBy, findIndex, intercalate )
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
+import qualified Data.HashMap.Strict as H
+import Data.List (intercalate, sortBy, transpose, elemIndex)
import qualified Data.Map as M
-import Data.Scientific (coefficient, base10Exponent)
-import Data.Ord ( comparing )
-import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
import Data.Maybe
-import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
-import Text.Pandoc.Generic (bottomUp)
-import qualified Data.Text as T
+import Data.Monoid ((<>))
+import Data.Ord (comparing)
+import Data.Scientific (base10Exponent, coefficient)
+import qualified Data.Set as Set
import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Vector as V
+import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
import qualified Data.Yaml as Yaml
-import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..))
-import qualified Data.HashMap.Strict as H
+import System.FilePath (addExtension, takeExtension)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Data.Vector as V
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import Text.Pandoc.Class (PandocMonad (..), report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (tableWith)
+import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
+ isCommentTag, isInlineTag, isTextTag)
+import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
-import Text.Pandoc.Pretty (charWidth)
+import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
- isTextTag, isCommentTag )
-import Control.Monad
-import System.FilePath (takeExtension, addExtension)
-import Text.HTML.TagSoup
-import qualified Data.Set as Set
-import Text.Printf (printf)
-import Debug.Trace (trace)
-import Data.Monoid ((<>))
-import Text.Pandoc.Error
-type MarkdownParser = Parser [Char] ParserState
+type MarkdownParser m = ParserT [Char] ParserState m
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readMarkdown opts s =
- (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
-
--- | Read markdown from an input string and return a pair of a Pandoc document
--- and a list of warnings.
-readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError (Pandoc, [String])
-readMarkdownWithWarnings opts s =
- (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
+readMarkdown :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readMarkdown opts s = do
+ parsed <- readWithM parseMarkdown def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
--
-- Constants and data structure definitions
@@ -117,44 +110,43 @@ isBlank _ = False
--
-- | Succeeds when we're in list context.
-inList :: MarkdownParser ()
+inList :: PandocMonad m => MarkdownParser m ()
inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: Parser [Char] st ()
+spnl :: PandocMonad m => ParserT [Char] st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-indentSpaces :: MarkdownParser String
+spnl' :: PandocMonad m => ParserT [Char] st m String
+spnl' = try $ do
+ xs <- many spaceChar
+ ys <- option "" $ try $ (:) <$> newline
+ <*> (many spaceChar <* notFollowedBy (char '\n'))
+ return (xs ++ ys)
+
+indentSpaces :: PandocMonad m => MarkdownParser m String
indentSpaces = try $ do
tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
-nonindentSpaces :: MarkdownParser String
+nonindentSpaces :: PandocMonad m => MarkdownParser m String
nonindentSpaces = do
- tabStop <- getOption readerTabStop
- sps <- many (char ' ')
- if length sps < tabStop
- then return sps
- else unexpected "indented line"
+ n <- skipNonindentSpaces
+ return $ replicate n ' '
-- returns number of spaces parsed
-skipNonindentSpaces :: MarkdownParser Int
+skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
skipNonindentSpaces = do
tabStop <- getOption readerTabStop
- atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ')
+ gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar
-atMostSpaces :: Int -> MarkdownParser Int
-atMostSpaces n
- | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
- | otherwise = return 0
-
-litChar :: MarkdownParser Char
+litChar :: PandocMonad m => MarkdownParser m Char
litChar = escapedChar'
<|> characterReference
<|> noneOf "\n"
@@ -162,30 +154,32 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
-inlinesInBalancedBrackets = do
- char '['
- (_, raw) <- withRaw $ charsInBalancedBrackets 1
- guard $ not $ null raw
- parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
-
-charsInBalancedBrackets :: Int -> MarkdownParser ()
-charsInBalancedBrackets 0 = return ()
-charsInBalancedBrackets openBrackets =
- (char '[' >> charsInBalancedBrackets (openBrackets + 1))
- <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
- <|> (( (() <$ code)
- <|> (() <$ (escapedChar'))
- <|> (newline >> notFollowedBy blankline)
- <|> skipMany1 (noneOf "[]`\n\\")
- <|> (() <$ count 1 (oneOf "`\\"))
- ) >> charsInBalancedBrackets openBrackets)
+inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
+inlinesInBalancedBrackets =
+ try $ char '[' >> withRaw (go 1) >>=
+ parseFromString inlines . stripBracket . snd
+ where stripBracket [] = []
+ stripBracket xs = if last xs == ']' then init xs else xs
+ go :: PandocMonad m => Int -> MarkdownParser m ()
+ go 0 = return ()
+ go openBrackets =
+ (() <$ (escapedChar <|>
+ code <|>
+ rawHtmlInline <|>
+ rawLaTeXInline') >> go openBrackets)
+ <|>
+ (do char ']'
+ Control.Monad.when (openBrackets > 1) $ go (openBrackets - 1))
+ <|>
+ (char '[' >> go (openBrackets + 1))
+ <|>
+ (anyChar >> go openBrackets)
--
-- document structure
--
-rawTitleBlockLine :: MarkdownParser String
+rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
rawTitleBlockLine = do
char '%'
skipSpaces
@@ -196,13 +190,13 @@ rawTitleBlockLine = do
anyLine
return $ trim $ unlines (first:rest)
-titleLine :: MarkdownParser (F Inlines)
+titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
+ res <- parseFromString' inlines raw
+ return $ trimInlinesF res
-authorsLine :: MarkdownParser (F [Inlines])
+authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
authorsLine = try $ do
raw <- rawTitleBlockLine
let sep = (char ';' <* spaces) <|> newline
@@ -210,18 +204,18 @@ authorsLine = try $ do
(trimInlinesF . mconcat <$> many
(try $ notFollowedBy sep >> inline))
sep
- sequence <$> parseFromString pAuthors raw
+ sequence <$> parseFromString' pAuthors raw
-dateLine :: MarkdownParser (F Inlines)
+dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
dateLine = try $ do
raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
+ res <- parseFromString' inlines raw
+ return $ trimInlinesF res
-titleBlock :: MarkdownParser ()
+titleBlock :: PandocMonad m => MarkdownParser m ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
-pandocTitleBlock :: MarkdownParser ()
+pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
@@ -239,7 +233,8 @@ pandocTitleBlock = try $ do
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-yamlMetaBlock :: MarkdownParser (F Blocks)
+
+yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
pos <- getPosition
@@ -250,87 +245,105 @@ yamlMetaBlock = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- opts <- stateOptions <$> getState
- meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> return $ return $
- H.foldrWithKey (\k v m ->
- if ignorable k
- then m
- else case yamlToMeta opts v of
- Left _ -> m
- Right v' -> B.setMeta (T.unpack k) v' m)
- nullMeta hashmap
- Right Yaml.Null -> return $ return nullMeta
- Right _ -> do
- addWarning (Just pos) "YAML header is not an object"
- return $ return nullMeta
- Left err' -> do
- case err' of
- InvalidYaml (Just YamlParseException{
- yamlProblem = problem
- , yamlContext = _ctxt
- , yamlProblemMark = Yaml.YamlMark {
- yamlLine = yline
- , yamlColumn = ycol
- }}) ->
- addWarning (Just $ setSourceLine
- (setSourceColumn pos
- (sourceColumn pos + ycol))
- (sourceLine pos + 1 + yline))
- $ "Could not parse YAML header: " ++
- problem
- _ -> addWarning (Just pos)
- $ "Could not parse YAML header: " ++
- show err'
- return $ return nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+ case Yaml.decodeEither' $ UTF8.fromString rawYaml of
+ Right (Yaml.Object hashmap) -> do
+ let alist = H.toList hashmap
+ mapM_ (\(k, v) ->
+ if ignorable k
+ then return ()
+ else do
+ v' <- yamlToMeta v
+ let k' = T.unpack k
+ updateState $ \st -> st{ stateMeta' =
+ do m <- stateMeta' st
+ -- if there's already a value, leave it unchanged
+ case lookupMeta k' m of
+ Just _ -> return m
+ Nothing -> do
+ v'' <- v'
+ return $ B.setMeta (T.unpack k) v'' m}
+ ) alist
+ Right Yaml.Null -> return ()
+ Right _ -> do
+ logMessage $
+ CouldNotParseYamlMetadata "not an object"
+ pos
+ return ()
+ Left err' -> do
+ case err' of
+ InvalidYaml (Just YamlParseException{
+ yamlProblem = problem
+ , yamlContext = _ctxt
+ , yamlProblemMark = Yaml.YamlMark {
+ yamlLine = yline
+ , yamlColumn = ycol
+ }}) ->
+ logMessage $ CouldNotParseYamlMetadata
+ problem (setSourceLine
+ (setSourceColumn pos
+ (sourceColumn pos + ycol))
+ (sourceLine pos + 1 + yline))
+ _ -> logMessage $ CouldNotParseYamlMetadata
+ (show err') pos
+ return ()
return mempty
-- ignore fields ending with _
ignorable :: Text -> Bool
ignorable t = (T.pack "_") `T.isSuffixOf` t
-toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
-toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
- where
- toMeta p =
- case p of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
- | endsWithNewline x -> MetaBlocks [Para xs]
- | otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
- opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
- meta_exts = Set.fromList [ Ext_pandoc_title_block
- , Ext_mmd_title_block
- , Ext_yaml_metadata_block
- ]
-
-yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
-yamlToMeta opts (Yaml.String t) = toMetaValue opts t
-yamlToMeta _ (Yaml.Number n)
+toMetaValue :: PandocMonad m
+ => Text -> MarkdownParser m (F MetaValue)
+toMetaValue x =
+ parseFromString' parser' (T.unpack x)
+ where parser' = (asInlines <$> ((trimInlinesF . mconcat)
+ <$> try (guard (not endsWithNewline)
+ *> manyTill inline eof)))
+ <|> (asBlocks <$> parseBlocks)
+ asBlocks p = do
+ p' <- p
+ return $ MetaBlocks (B.toList p')
+ asInlines p = do
+ p' <- p
+ return $ MetaInlines (B.toList p')
+ endsWithNewline = T.pack "\n" `T.isSuffixOf` x
+ -- Note: a standard quoted or unquoted YAML value will
+ -- not end in a newline, but a "block" set off with
+ -- `|` or `>` will.
+
+yamlToMeta :: PandocMonad m
+ => Yaml.Value -> MarkdownParser m (F MetaValue)
+yamlToMeta (Yaml.String t) = toMetaValue t
+yamlToMeta (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = return $ MetaString $ show
+ | base10Exponent n >= 0 = return $ return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
- | otherwise = return $ MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
- (V.toList xs)
-yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
- if ignorable k
- then m
- else (do
- v' <- yamlToMeta opts v
- m' <- m
- return (M.insert (T.unpack k) v' m')))
- (return M.empty) o
-yamlToMeta _ _ = return $ MetaString ""
-
-stopLine :: MarkdownParser ()
+ | otherwise = return $ return $ MetaString $ show n
+yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
+yamlToMeta (Yaml.Array xs) = do
+ xs' <- mapM yamlToMeta (V.toList xs)
+ return $ do
+ xs'' <- sequence xs'
+ return $ B.toMetaValue xs''
+yamlToMeta (Yaml.Object o) = do
+ let alist = H.toList o
+ foldM (\m (k,v) ->
+ if ignorable k
+ then return m
+ else do
+ v' <- yamlToMeta v
+ return $ do
+ MetaMap m' <- m
+ v'' <- v'
+ return (MetaMap $ M.insert (T.unpack k) v'' m'))
+ (return $ MetaMap M.empty)
+ alist
+yamlToMeta _ = return $ return $ MetaString ""
+
+stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-mmdTitleBlock :: MarkdownParser ()
+mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
firstPair <- kvPair False
@@ -340,49 +353,44 @@ mmdTitleBlock = try $ do
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: Bool -> MarkdownParser (String, MetaValue)
+kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
kvPair allowEmpty = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
val <- trim <$> manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
guard $ allowEmpty || not (null val)
let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
+ let val' = MetaBlocks $ B.toList $ B.plain $B.text val
return (key',val')
-parseMarkdown :: MarkdownParser Pandoc
+parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
parseMarkdown = do
- -- markdown allows raw HTML
- updateState $ \state -> state { stateOptions =
- let oldOpts = stateOptions state in
- oldOpts{ readerParseRaw = True } }
optional titleBlock
blocks <- parseBlocks
st <- getState
- let meta = runF (stateMeta' st) st
- let Pandoc _ bs = B.doc $ runF blocks st
- eastAsianLineBreaks <- option False $
- True <$ guardEnabled Ext_east_asian_line_breaks
- return $ (if eastAsianLineBreaks
- then bottomUp softBreakFilter
- else id) $ Pandoc meta bs
-
-softBreakFilter :: [Inline] -> [Inline]
-softBreakFilter (x:SoftBreak:y:zs) =
- case (stringify x, stringify y) of
- (xs@(_:_), (c:_))
- | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
- _ -> x:SoftBreak:y:zs
-softBreakFilter xs = xs
-
-referenceKey :: MarkdownParser (F Blocks)
+ -- check for notes with no corresponding note references
+ let notesUsed = stateNoteRefs st
+ let notesDefined = M.keys (stateNotes' st)
+ mapM_ (\n -> unless (n `Set.member` notesUsed) $
+ case M.lookup n (stateNotes' st) of
+ Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
+ Nothing -> throwError $
+ PandocShouldNeverHappenError "note not found")
+ notesDefined
+ let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+ meta <- stateMeta' st
+ return $ Pandoc meta bs) st
+ reportLogMessages
+ return doc
+
+referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL = liftM unwords $ many $ try $ do
+ let sourceURL = fmap unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
@@ -392,7 +400,9 @@ referenceKey = try $ do
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
- guardEnabled Ext_link_attributes >> skipSpaces >> attributes
+ do guardEnabled Ext_link_attributes
+ skipSpaces >> optional newline >> skipSpaces
+ attributes
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
@@ -402,18 +412,22 @@ referenceKey = try $ do
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
- Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
- Nothing -> return ()
+ Just (t,a) | not (t == target && a == attr') ->
+ -- We don't warn on two duplicate keys if the targets are also
+ -- the same. This can happen naturally with --reference-location=block
+ -- or section. See #3701.
+ logMessage $ DuplicateLinkReference raw pos
+ _ -> return ()
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
-referenceTitle :: MarkdownParser String
+referenceTitle :: PandocMonad m => MarkdownParser m String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-- A link title in quotes
-quotedTitle :: Char -> MarkdownParser String
+quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
quotedTitle c = try $ do
char c
notFollowedBy spaces
@@ -425,7 +439,7 @@ quotedTitle c = try $ do
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
-- an abbreviation.
-abbrevKey :: MarkdownParser (F Blocks)
+abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
@@ -436,23 +450,23 @@ abbrevKey = do
blanklines
return $ return mempty
-noteMarker :: MarkdownParser String
+noteMarker :: PandocMonad m => MarkdownParser m String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-rawLine :: MarkdownParser String
+rawLine :: PandocMonad m => MarkdownParser m String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: MarkdownParser String
+rawLines :: PandocMonad m => MarkdownParser m String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: MarkdownParser (F Blocks)
+noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -464,26 +478,23 @@ noteBlock = try $ do
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
- parsed <- parseFromString parseBlocks raw
- let newnote = (ref, parsed)
+ parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
- case lookup ref oldnotes of
- Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'"
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
Nothing -> return ()
- updateState $ \s -> s { stateNotes' = newnote : oldnotes }
+ updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes }
return mempty
--
-- parsing blocks
--
-parseBlocks :: MarkdownParser (F Blocks)
+parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
-block :: MarkdownParser (F Blocks)
+block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
- tr <- getOption readerTrace
- pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
@@ -493,10 +504,10 @@ block = do
, header
, lhsCodeBlock
, divHtml
+ , divFenced
, htmlBlock
, table
, codeBlockIndented
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
, rawTeXBlock
, lineBlock
, blockQuote
@@ -509,30 +520,29 @@ block = do
, para
, plain
] <?> "block"
- when tr $ do
- st <- getState
- trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList $ runF res st)) (return ())
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
return res
--
-- header blocks
--
-header :: MarkdownParser (F Blocks)
+header :: PandocMonad m => MarkdownParser m (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
-atxChar :: MarkdownParser Char
+atxChar :: PandocMonad m => MarkdownParser m Char
atxChar = do
exts <- getOption readerExtensions
- return $ if Set.member Ext_literate_haskell exts
- then '=' else '#'
+ return $ if extensionEnabled Ext_literate_haskell exts
+ then '='
+ else '#'
-atxHeader :: MarkdownParser (F Blocks)
+atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
atxHeader = try $ do
- level <- atxChar >>= many1 . char >>= return . length
+ level <- fmap length (atxChar >>= many1 . char)
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
+ guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar
skipSpaces
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
@@ -542,7 +552,7 @@ atxHeader = try $ do
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-atxClosing :: MarkdownParser Attr
+atxClosing :: PandocMonad m => MarkdownParser m Attr
atxClosing = try $ do
attr' <- option nullAttr
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
@@ -553,7 +563,7 @@ atxClosing = try $ do
blanklines
return attr
-setextHeaderEnd :: MarkdownParser Attr
+setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
setextHeaderEnd = try $ do
attr <- option nullAttr
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
@@ -561,13 +571,18 @@ setextHeaderEnd = try $ do
blanklines
return attr
-mmdHeaderIdentifier :: MarkdownParser Attr
+mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
- ident <- stripFirstAndLast . snd <$> reference
+ (_, raw) <- reference
+ let raw' = trim $ stripFirstAndLast raw
+ let ident = concat $ words $ map toLower raw'
+ let attr = (ident, [], [])
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw' attr
skipSpaces
- return (ident,[],[])
+ return attr
-setextHeader :: MarkdownParser (F Blocks)
+setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
@@ -579,13 +594,13 @@ setextHeader = try $ do
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
+ let level = fromMaybe 0 (elemIndex underlineChar setextHChars) + 1
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: String -> Attr -> MarkdownParser ()
+registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _) = do
let key = toKey $ "[" ++ raw ++ "]"
updateState (\s -> s { stateHeaderKeys =
@@ -595,7 +610,7 @@ registerImplicitHeader raw attr@(ident, _, _) = do
-- hrule block
--
-hrule :: Parser [Char] st (F Blocks)
+hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -609,20 +624,21 @@ hrule = try $ do
-- code blocks
--
-indentedLine :: MarkdownParser String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+indentedLine :: PandocMonad m => MarkdownParser m String
+indentedLine = indentSpaces >> anyLineNewline
-blockDelimiter :: (Char -> Bool)
+blockDelimiter :: PandocMonad m
+ => (Char -> Bool)
-> Maybe Int
- -> Parser [Char] st Int
+ -> ParserT [Char] ParserState m Int
blockDelimiter f len = try $ do
+ skipNonindentSpaces
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
+ Nothing -> fmap ((+ 3) . length) (count 3 (char c) >> many (char c))
-attributes :: MarkdownParser Attr
+attributes :: PandocMonad m => MarkdownParser m Attr
attributes = try $ do
char '{'
spnl
@@ -630,28 +646,28 @@ attributes = try $ do
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
-attribute :: MarkdownParser (Attr -> Attr)
+attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
-identifier :: MarkdownParser String
+identifier :: PandocMonad m => MarkdownParser m String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
-identifierAttr :: MarkdownParser (Attr -> Attr)
+identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
char '#'
result <- identifier
return $ \(_,cs,kvs) -> (result,cs,kvs)
-classAttr :: MarkdownParser (Attr -> Attr)
+classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
classAttr = try $ do
char '.'
result <- identifier
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
-keyValAttr :: MarkdownParser (Attr -> Attr)
+keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
@@ -664,33 +680,53 @@ keyValAttr = try $ do
"class" -> (id',cs ++ words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
-specialAttr :: MarkdownParser (Attr -> Attr)
+specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-codeBlockFenced :: MarkdownParser (F Blocks)
+rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute = do
+ char '{'
+ skipMany spaceChar
+ char '='
+ format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ skipMany spaceChar
+ char '}'
+ return format
+
+codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
+ indentchars <- nonindentSpaces
+ let indentLevel = length indentchars
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
- attr <- option ([],[],[]) $
- try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
blankline
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+ contents <- intercalate "\n" <$>
+ manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
+ (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawBlock syn contents
+ Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
toLanguageId = map toLower . go
- where go "c++" = "cpp"
+ where go "c++" = "cpp"
go "objective-c" = "objectivec"
- go x = x
+ go x = x
-codeBlockIndented :: MarkdownParser (F Blocks)
+codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
@@ -701,7 +737,7 @@ codeBlockIndented = do
return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: MarkdownParser (F Blocks)
+lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
(return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
@@ -709,7 +745,7 @@ lhsCodeBlock = do
<|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: MarkdownParser String
+lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
@@ -717,13 +753,13 @@ lhsCodeBlockLaTeX = try $ do
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: MarkdownParser String
+lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: MarkdownParser String
+lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: Char -> MarkdownParser String
+lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -735,7 +771,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> Parser [Char] st String
+birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -746,10 +782,10 @@ birdTrackLine c = try $ do
-- block quotes
--
-emailBlockQuoteStart :: MarkdownParser Char
+emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
-emailBlockQuote :: MarkdownParser [String]
+emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
let emailLine = many $ nonEndline <|> try
@@ -763,113 +799,136 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: MarkdownParser (F Blocks)
+blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n"
return $ B.blockQuote <$> contents
--
-- list blocks
--
-bulletListStart :: MarkdownParser ()
+bulletListStart :: PandocMonad m => MarkdownParser m ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- () <$ atMostSpaces (tabStop - (endpos - startpos))
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return ()
-anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
-anyOrderedListStart = try $ do
+orderedListStart :: PandocMonad m
+ => Maybe (ListNumberStyle, ListNumberDelim)
+ -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
+orderedListStart mbstydelim = try $ do
optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- res <- do guardDisabled Ext_fancy_lists
- start <- many1 digit >>= safeRead
- char '.'
- return (start, DefaultStyle, DefaultDelim)
- <|> do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name,
- -- insist on more than one space
- when (delim == Period && (style == UpperAlpha ||
- (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
- () <$ spaceChar
- return (num, style, delim)
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res
-
-listStart :: MarkdownParser ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-
-listLine :: MarkdownParser String
-listLine = try $ do
- notFollowedBy' (do indentSpaces
- many spaceChar
+ (do guardDisabled Ext_fancy_lists
+ start <- many1 digit >>= safeRead
+ char '.'
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
+ return (start, DefaultStyle, DefaultDelim))
+ <|>
+ (do (num, style, delim) <- maybe
+ anyOrderedListMarker
+ (\(sty,delim) -> (\start -> (start,sty,delim)) <$>
+ orderedListMarker sty delim)
+ mbstydelim
+ gobbleSpaces 1 <|> () <$ lookAhead newline
+ -- if it could be an abbreviated first name,
+ -- insist on more than one space
+ when (delim == Period && (style == UpperAlpha ||
+ (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
+ () <$ lookAhead (newline <|> spaceChar)
+ optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
+ return (num, style, delim))
+
+listStart :: PandocMonad m => MarkdownParser m ()
+listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing)
+
+listLine :: PandocMonad m => Int -> MarkdownParser m String
+listLine continuationIndent = try $ do
+ notFollowedBy' (do gobbleSpaces continuationIndent
+ skipMany spaceChar
listStart)
notFollowedByHtmlCloser
- optional (() <$ indentSpaces)
+ notFollowedByDivCloser
+ optional (() <$ gobbleSpaces continuationIndent)
listLineCommon
-listLineCommon :: MarkdownParser String
+listLineCommon :: PandocMonad m => MarkdownParser m String
listLineCommon = concat <$> manyTill
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
- <|> liftM snd (htmlTag isCommentTag)
+ <|> fmap snd (htmlTag isCommentTag)
<|> count 1 anyChar
) newline
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: MarkdownParser a
- -> MarkdownParser String
-rawListItem start = try $ do
+rawListItem :: PandocMonad m
+ => Bool -- four space rule
+ -> MarkdownParser m a
+ -> MarkdownParser m (String, Int)
+rawListItem fourSpaceRule start = try $ do
+ pos1 <- getPosition
start
+ pos2 <- getPosition
+ let continuationIndent = if fourSpaceRule
+ then 4
+ else sourceColumn pos2 - sourceColumn pos1
first <- listLineCommon
- rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
+ rest <- many (do notFollowedBy listStart
+ notFollowedBy (() <$ codeBlockFenced)
+ notFollowedBy blankline
+ listLine continuationIndent)
blanks <- many blankline
- return $ unlines (first:rest) ++ blanks
+ let result = unlines (first:rest) ++ blanks
+ return (result, continuationIndent)
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: MarkdownParser String
-listContinuation = try $ do
- lookAhead indentSpaces
- result <- many1 listContinuationLine
+listContinuation :: PandocMonad m => Int -> MarkdownParser m String
+listContinuation continuationIndent = try $ do
+ x <- try $ do
+ notFollowedBy blankline
+ notFollowedByHtmlCloser
+ notFollowedByDivCloser
+ gobbleSpaces continuationIndent
+ anyLineNewline
+ xs <- many $ try $ do
+ notFollowedBy blankline
+ notFollowedByHtmlCloser
+ notFollowedByDivCloser
+ gobbleSpaces continuationIndent <|> notFollowedBy' listStart
+ anyLineNewline
blanks <- many blankline
- return $ concat result ++ blanks
+ return $ concat (x:xs) ++ blanks
+
+notFollowedByDivCloser :: PandocMonad m => MarkdownParser m ()
+notFollowedByDivCloser =
+ guardDisabled Ext_fenced_divs <|>
+ do divLevel <- stateFencedDivLevel <$> getState
+ guard (divLevel < 1) <|> notFollowedBy divFenceEnd
-notFollowedByHtmlCloser :: MarkdownParser ()
+notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByHtmlCloser = do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
Nothing -> return ()
-listContinuationLine :: MarkdownParser String
-listContinuationLine = try $ do
- notFollowedBy blankline
- notFollowedBy' listStart
- notFollowedByHtmlCloser
- optional indentSpaces
- result <- anyLine
- return $ result ++ "\n"
-
-listItem :: MarkdownParser a
- -> MarkdownParser (F Blocks)
-listItem start = try $ do
- first <- rawListItem start
- continuations <- many listContinuation
+listItem :: PandocMonad m
+ => Bool -- four-space rule
+ -> MarkdownParser m a
+ -> MarkdownParser m (F Blocks)
+listItem fourSpaceRule start = try $ do
+ (first, continuationIndent) <- rawListItem fourSpaceRule start
+ continuations <- many (listContinuation continuationIndent)
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -878,39 +937,34 @@ listItem start = try $ do
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may contain various block elements:
let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
+ contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: MarkdownParser (F Blocks)
+orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
orderedList = try $ do
- (start, style, delim) <- lookAhead anyOrderedListStart
+ (start, style, delim) <- lookAhead (orderedListStart Nothing)
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
- items <- fmap sequence $ many1 $ listItem
- ( try $ do
- optional newline -- if preceded by Plain block in a list
- startpos <- sourceColumn <$> getPosition
- skipNonindentSpaces
- res <- orderedListMarker style delim
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res )
- start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items
-
-bulletList :: MarkdownParser (F Blocks)
+ fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
+ <|> return (style == Example)
+ items <- fmap sequence $ many1 $ listItem fourSpaceRule
+ (orderedListStart (Just (style, delim)))
+ start' <- (start <$ guardEnabled Ext_startnum) <|> return 1
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
bulletList = do
- items <- fmap sequence $ many1 $ listItem bulletListStart
- return $ B.bulletList <$> fmap compactify' items
+ fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
+ <|> return False
+ items <- fmap sequence $ many1 $ listItem fourSpaceRule bulletListStart
+ return $ B.bulletList <$> fmap compactify items
-- definition lists
-defListMarker :: MarkdownParser ()
+defListMarker :: PandocMonad m => MarkdownParser m ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
@@ -921,52 +975,53 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
- contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
+ term <- parseFromString' (trimInlinesF <$> inlines) rawLine'
+ contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
-defRawBlock :: Bool -> MarkdownParser String
+defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
- firstline <- anyLine
+ firstline <- anyLineNewline
let dline = try
( do notFollowedBy blankline
notFollowedByHtmlCloser
+ notFollowedByDivCloser
if compact -- laziness not compatible with compact
then () <$ indentSpaces
else (() <$ indentSpaces)
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
- cont <- liftM concat $ many $ try $ do
+ cont <- fmap concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+ return $ trimr (firstline ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
-definitionList :: MarkdownParser (F Blocks)
+definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
lookAhead (anyLine >>
- optional (blankline >> notFollowedBy (table >> return ())) >>
+ optional (blankline >> notFollowedBy (Control.Monad.void table)) >>
-- don't capture table caption as def list!
defListMarker)
compactDefinitionList <|> normalDefinitionList
-compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
items <- fmap sequence $ many1 $ definitionListItem True
- return $ B.definitionList <$> fmap compactify'DL items
+ return $ B.definitionList <$> fmap compactifyDL items
-normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
normalDefinitionList = do
guardEnabled Ext_definition_lists
items <- fmap sequence $ many1 $ definitionListItem False
@@ -976,10 +1031,10 @@ normalDefinitionList = do
-- paragraph block
--
-para :: MarkdownParser (F Blocks)
+para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
- result <- trimInlinesF . mconcat <$> many1 inline
+ result <- trimInlinesF <$> inlines1
option (B.plain <$> result)
$ try $ do
newline
@@ -997,29 +1052,35 @@ para = try $ do
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose "div"))
_ -> mzero
+ <|> do guardEnabled Ext_fenced_divs
+ divLevel <- stateFencedDivLevel <$> getState
+ if divLevel > 0
+ then lookAhead divFenceEnd
+ else mzero
return $ do
result' <- result
case B.toList result' of
[Image attr alt (src,tit)]
- | Ext_implicit_figures `Set.member` exts ->
+ | not (null alt) &&
+ Ext_implicit_figures `extensionEnabled` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
$ Image attr alt (src,'f':'i':'g':':':tit)
_ -> return $ B.para result'
-plain :: MarkdownParser (F Blocks)
-plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
+plain :: PandocMonad m => MarkdownParser m (F Blocks)
+plain = fmap B.plain . trimInlinesF <$> inlines1
--
-- raw html
--
-htmlElement :: MarkdownParser String
+htmlElement :: PandocMonad m => MarkdownParser m String
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
- <|> liftM snd (htmlTag isBlockTag)
+ <|> fmap snd (htmlTag isBlockTag)
-htmlBlock :: MarkdownParser (F Blocks)
+htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock = do
guardEnabled Ext_raw_html
try (do
@@ -1044,43 +1105,59 @@ htmlBlock = do
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
-htmlBlock' :: MarkdownParser (F Blocks)
+htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ return $ B.rawBlock "html" first
+ return $ if null first
+ then mempty
+ else return $ B.rawBlock "html" first
-strictHtmlBlock :: MarkdownParser String
+strictHtmlBlock :: PandocMonad m => MarkdownParser m String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: MarkdownParser String
+rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
isVerbTag (TagOpen "script" _) = True
isVerbTag _ = False
-rawTeXBlock :: MarkdownParser (F Blocks)
+rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
- rawConTeXtEnvironment `sepEndBy1` blankline)
- spaces
- return $ return result
-
-rawHtmlBlocks :: MarkdownParser (F Blocks)
+ lookAhead $ try $ char '\\' >> letter
+ result <- (B.rawBlock "context" . trim . concat <$>
+ many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand)
+ <*> spnl'))
+ <|> (B.rawBlock "latex" . trim . concat <$>
+ many1 ((++) <$> rawLaTeXBlock <*> spnl'))
+ return $ case B.toList result of
+ [RawBlock _ cs]
+ | all (`elem` [' ','\t','\n']) cs -> return mempty
+ -- don't create a raw block for suppressed macro defs
+ _ -> return result
+
+conTeXtCommand :: PandocMonad m => MarkdownParser m String
+conTeXtCommand = oneOfStrings ["\\placeformula"]
+
+rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ -- we don't want '<td> text' to be a code block:
+ skipMany spaceChar
+ indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (\x -> x ~== TagClose tagtype)
- contents <- mconcat <$> many (notFollowedBy' closer >> block)
+ let block' = do notFollowedBy' closer
+ gobbleAtMostSpaces indentlevel
+ block
+ contents <- mconcat <$> many block'
result <-
(closer >>= \(_, rawcloser) -> return (
return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
@@ -1101,11 +1178,11 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
-- line block
--
-lineBlock :: MarkdownParser (F Blocks)
+lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
+ mapM (parseFromString' (trimInlinesF <$> inlines))
return $ B.lineBlock <$> sequence lines'
--
@@ -1114,8 +1191,9 @@ lineBlock = try $ do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> Parser [Char] st (Int, Int)
+dashedLine :: PandocMonad m
+ => Char
+ -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1125,8 +1203,9 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
-simpleTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -1137,17 +1216,17 @@ simpleTableHeader headless = try $ do
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
- rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
+ rawHeads <- fmap (tail . splitStringByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ let aligns = zipWith alignType (map (: []) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
- $ map trim rawHeads'
+ $
+ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads'
return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
@@ -1161,25 +1240,26 @@ alignType strLst len =
let nonempties = filter (not . null) $ map trimr strLst
(leftSpace, rightSpace) =
case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: MarkdownParser String
+tableFooter :: PandocMonad m => MarkdownParser m String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep :: MarkdownParser Char
+tableSep :: PandocMonad m => MarkdownParser m Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
-rawTableLine :: [Int]
- -> MarkdownParser [String]
+rawTableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m [String]
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
@@ -1187,31 +1267,34 @@ rawTableLine indices = do
splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
-tableLine :: [Int]
- -> MarkdownParser (F [Blocks])
+tableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+ fmap sequence . mapM (parseFromString' (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
-multilineRow :: [Int]
- -> MarkdownParser (F [Blocks])
+multilineRow :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+ fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: MarkdownParser (F Inlines)
+tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
- string ":" <|> string "Table:"
- trimInlinesF . mconcat <$> many1 inline <* blanklines
+ (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+ trimInlinesF <$> inlines1 <* blanklines
-- Parse a simple table with '---' header and one line per row.
-simpleTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
@@ -1224,13 +1307,15 @@ simpleTable headless = do
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-multilineTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+multilineTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
-multilineTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+multilineTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
@@ -1243,7 +1328,7 @@ multilineTableHeader headless = try $ do
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
rawHeadsList <- if headless
- then liftM (map (:[]) . tail .
+ then fmap (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
(tail . splitStringByIndices (init indices))
@@ -1253,101 +1338,18 @@ multilineTableHeader headless = try $ do
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
- mapM (parseFromString (mconcat <$> many plain)) $
- map trim rawHeads
+ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads
return (heads, aligns, indices)
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: Char -> Parser [Char] st ((Int, Int), Alignment)
-gridPart ch = do
- leftColon <- option False (True <$ char ':')
- dashes <- many1 (char ch)
- rightColon <- option False (True <$ char ':')
- char '+'
- let lengthDashes = length dashes + (if leftColon then 1 else 0) +
- (if rightColon then 1 else 0)
- let alignment = case (leftColon, rightColon) of
- (True, True) -> AlignCenter
- (True, False) -> AlignLeft
- (False, True) -> AlignRight
- (False, False) -> AlignDefault
- return ((lengthDashes, lengthDashes + 1), alignment)
-
-gridDashedLines :: Char -> Parser [Char] st [((Int, Int), Alignment)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: Char -> MarkdownParser Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return []
- else many1 (try (char '|' >> anyLine))
- underDashes <- if headless
- then return dashes
- else gridDashedLines '='
- guard $ length dashes == length underDashes
- let lines' = map (snd . fst) underDashes
- let indices = scanl (+) 0 lines'
- let aligns = map snd underDashes
- let rawHeads = if headless
- then replicate (length underDashes) ""
- else map (unlines . map trim) $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
- return (heads, aligns, indices)
+gridTable :: PandocMonad m => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless = gridTableWith' parseBlocks headless
-gridTableRawLine :: [Int] -> MarkdownParser [String]
-gridTableRawLine indices = do
- char '|'
- line <- anyLine
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: [Int]
- -> MarkdownParser (F [Blocks])
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- | Parse footer for a grid table.
-gridTableFooter :: MarkdownParser [Char]
-gridTableFooter = blanklines
-
-pipeBreak :: MarkdownParser ([Alignment], [Int])
+pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1359,7 +1361,7 @@ pipeBreak = try $ do
blankline
return $ unzip (first:rest)
-pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
@@ -1372,41 +1374,40 @@ pipeTable = try $ do
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
- fromIntegral (len + 1) / fromIntegral numColumns)
- seplengths
+ fromIntegral len / fromIntegral (sum seplengths))
+ seplengths
else replicate (length aligns) 0.0
- return $ (aligns, widths, heads', sequence lines'')
+ return (aligns, widths, heads', sequence lines'')
-sepPipe :: MarkdownParser ()
+sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: MarkdownParser (F [Blocks])
+pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
openPipe <- (True <$ char '|') <|> return False
-- split into cells
- let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
+ let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
- parseFromString pipeTableCell
- cells <- cellContents `sepEndBy1` (char '|')
+ parseFromString' pipeTableCell
+ cells <- cellContents `sepEndBy1` char '|'
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
blankline
return $ sequence cells
-pipeTableCell :: MarkdownParser (F Blocks)
-pipeTableCell = do
- result <- many inline
- if null result
- then return mempty
- else return $ B.plain . mconcat <$> sequence result
+pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
+pipeTableCell =
+ (do result <- inlines1
+ return $ B.plain <$> result)
+ <|> return mempty
-pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1414,15 +1415,15 @@ pipeTableHeaderPart = try $ do
right <- optionMaybe (char ':')
skipMany spaceChar
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
- return $
- ((case (left,right) of
- (Nothing,Nothing) -> AlignDefault
- (Just _,Nothing) -> AlignLeft
- (Nothing,Just _) -> AlignRight
- (Just _,Just _) -> AlignCenter), len)
+ return
+ (case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: Parser [Char] st ()
+scanForPipe :: PandocMonad m => ParserT [Char] st m ()
scanForPipe = do
inp <- getInput
case break (\c -> c == '\n' || c == '|') inp of
@@ -1432,22 +1433,23 @@ scanForPipe = do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
-tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int])
- -> ([Int] -> MarkdownParser (F [Blocks]))
- -> MarkdownParser sep
- -> MarkdownParser end
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith :: PandocMonad m
+ => MarkdownParser m (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> MarkdownParser m (F [Blocks]))
+ -> MarkdownParser m sep
+ -> MarkdownParser m end
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
- let widths = if (indices == [])
+ let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
- return $ (aligns, widths, heads, lines')
+ return (aligns, widths, heads, lines')
-table :: MarkdownParser (F Blocks)
+table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
@@ -1462,8 +1464,8 @@ table = try $ do
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
- Nothing -> option (return mempty) tableCaption
- Just c -> return c
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
-- renormalize widths if greater than 100%:
let totalWidth = sum widths
let widths' = if totalWidth < 1
@@ -1479,7 +1481,13 @@ table = try $ do
-- inline
--
-inline :: MarkdownParser (F Inlines)
+inlines :: PandocMonad m => MarkdownParser m (F Inlines)
+inlines = mconcat <$> many inline
+
+inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
+inlines1 = mconcat <$> many1 inline
+
+inline :: PandocMonad m => MarkdownParser m (F Inlines)
inline = choice [ whitespace
, bareURL
, str
@@ -1499,6 +1507,7 @@ inline = choice [ whitespace
, autoLink
, spanHtml
, rawHtmlInline
+ , escapedNewline
, escapedChar
, rawLaTeXInline'
, exampleRef
@@ -1509,32 +1518,36 @@ inline = choice [ whitespace
, ltSign
] <?> "inline"
-escapedChar' :: MarkdownParser Char
+escapedChar' :: PandocMonad m => MarkdownParser m Char
escapedChar' = try $ do
char '\\'
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> (guardEnabled Ext_angle_brackets_escapable >>
oneOf "\\`*_{}[]()>#+-.!~\"<>")
- <|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
<|> oneOf "\\`*_{}[]()>#+-.!~\""
-escapedChar :: MarkdownParser (F Inlines)
+escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
+escapedNewline = try $ do
+ guardEnabled Ext_escaped_line_breaks
+ char '\\'
+ lookAhead (char '\n') -- don't consume the newline (see #3730)
+ return $ return B.linebreak
+
+escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
- ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- '\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
- _ -> return $ return $ B.str [result]
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ _ -> return $ return $ B.str [result]
-ltSign :: MarkdownParser (F Inlines)
+ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
return $ return $ B.str "<"
-exampleRef :: MarkdownParser (F Inlines)
+exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
@@ -1542,10 +1555,10 @@ exampleRef = try $ do
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
-symbol :: MarkdownParser (F Inlines)
+symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
@@ -1554,28 +1567,36 @@ symbol = do
return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: MarkdownParser (F Inlines)
+code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ result <- (trim . concat) <$>
+ many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
- >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
-
-math :: MarkdownParser (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
- ((getOption readerSmart >>= guard) *> (return <$> apostrophe)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawInline syn result
+ Right attr -> B.codeWith attr result
+
+math :: PandocMonad m => MarkdownParser m (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
+ <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
+ (guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
-enclosure :: Char
- -> MarkdownParser (F Inlines)
+enclosure :: PandocMonad m
+ => Char
+ -> MarkdownParser m (F Inlines)
enclosure c = do
-- we can't start an enclosure with _ if after a string and
-- the intraword_underscores extension is enabled:
@@ -1584,14 +1605,14 @@ enclosure c = do
<|> (guard =<< notAfterString)
cs <- many1 (char c)
(return (B.str cs) <>) <$> whitespace
- <|> do
+ <|>
case length cs of
- 3 -> three c
- 2 -> two c mempty
- 1 -> one c mempty
- _ -> return (return $ B.str cs)
+ 3 -> three c
+ 2 -> two c mempty
+ 1 -> one c mempty
+ _ -> return (return $ B.str cs)
-ender :: Char -> Int -> MarkdownParser ()
+ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
ender c n = try $ do
count n (char c)
guard (c == '*')
@@ -1602,104 +1623,96 @@ ender c n = try $ do
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
-three :: Char -> MarkdownParser (F Inlines)
+three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
+ (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
+ <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
+ <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
<|> return (return (B.str [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
-two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
+ (ender c 2 >> updateLastStrPos >>
+ return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
-one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
+ (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
-strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
-- | Parses a list of inlines between start and end delimiters.
-inlinesBetween :: (Show b)
- => MarkdownParser a
- -> MarkdownParser b
- -> MarkdownParser (F Inlines)
+inlinesBetween :: PandocMonad m
+ => (Show b)
+ => MarkdownParser m a
+ -> MarkdownParser m b
+ -> MarkdownParser m (F Inlines)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
-strikeout :: MarkdownParser (F Inlines)
+strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
strikeout = fmap B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: MarkdownParser (F Inlines)
+superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = fmap B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: MarkdownParser (F Inlines)
+subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = fmap B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: MarkdownParser (F Inlines)
+whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: Parser [Char] st Char
+nonEndline :: PandocMonad m => ParserT [Char] st m Char
nonEndline = satisfy (/='\n')
-str :: MarkdownParser (F Inlines)
+str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
- result <- many1 alphaNum
+ result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
- let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- isSmart <- getOption readerSmart
- if isSmart
- then case likelyAbbrev result of
- [] -> return $ return $ B.str result
- xs -> choice (map (\x ->
- try (string x >> oneOf " \n" >>
- lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
-
--- | if the string matches the beginning of an abbreviation (before
--- the first period, return strings that would finish the abbreviation.
-likelyAbbrev :: String -> [String]
-likelyAbbrev x =
- let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
- "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
- "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
- "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
- "ch.", "sec.", "cf.", "cp."]
- abbrPairs = map (break (=='.')) abbrevs
- in map snd $ filter (\(y,_) -> y == x) abbrPairs
+ (do guardEnabled Ext_smart
+ abbrevs <- getOption readerAbbreviations
+ if not (null result) && last result == '.' && result `Set.member` abbrevs
+ then try (do ils <- whitespace <|> endline
+ lookAhead alphaNum
+ return $ do
+ ils' <- ils
+ if ils' == B.space
+ then return (B.str result <> B.str "\160")
+ else -- linebreak or softbreak
+ return (ils' <> B.str result <> B.str "\160"))
+ <|> return (return (B.str result))
+ else return (return (B.str result)))
+ <|> return (return (B.str result))
-- an endline character that can be treated as a space, not a structural break
-endline :: MarkdownParser (F Inlines)
+endline :: PandocMonad m => MarkdownParser m (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
@@ -1711,6 +1724,7 @@ endline = try $ do
guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
+ notFollowedByDivCloser
(eof >> return mempty)
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
@@ -1721,23 +1735,25 @@ endline = try $ do
--
-- a reference label for a link
-reference :: MarkdownParser (F Inlines, String)
-reference = do notFollowedBy' (string "[^") -- footnote reference
- withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
+reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
+reference = do
+ guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
+ guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-parenthesizedChars :: MarkdownParser [Char]
+parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
return $ '(' : result ++ ")"
-- source for a link, with optional title
-source :: MarkdownParser (String, String)
+source :: PandocMonad m => MarkdownParser m (String, String)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
- <|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
+ <|> (notFollowedBy (oneOf " )") >> count 1 litChar)
<|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
let sourceURL = (unwords . words . concat) <$> many urlChunk
let betweenAngles = try $
@@ -1748,10 +1764,10 @@ source = do
char ')'
return (escapeURI $ trimr src, tit)
-linkTitle :: MarkdownParser String
+linkTitle :: PandocMonad m => MarkdownParser m String
linkTitle = quotedTitle '"' <|> quotedTitle '\''
-link :: MarkdownParser (F Inlines)
+link :: PandocMonad m => MarkdownParser m (F Inlines)
link = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -1760,21 +1776,31 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-bracketedSpan :: MarkdownParser (F Inlines)
+bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
bracketedSpan = try $ do
guardEnabled Ext_bracketed_spans
(lab,_) <- reference
attr <- attributes
- let (ident,classes,keyvals) = attr
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> lab
- _ -> return $ B.spanWith attr <$> lab
-
-regLink :: (Attr -> String -> String -> Inlines -> Inlines)
- -> F Inlines -> MarkdownParser (F Inlines)
+ return $ if isSmallCaps attr
+ then B.smallcaps <$> lab
+ else B.spanWith attr <$> lab
+
+-- | We treat a span as SmallCaps if class is "smallcaps" (and
+-- no other attributes are set or if style is "font-variant:small-caps"
+-- (and no other attributes are set).
+isSmallCaps :: Attr -> Bool
+isSmallCaps ("",["smallcaps"],[]) = True
+isSmallCaps ("",[],kvs) =
+ case lookup "style" kvs of
+ Just s -> map toLower (filter (`notElem` " \t;") s) ==
+ "font-variant:small-caps"
+ Nothing -> False
+isSmallCaps _ = False
+
+regLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> F Inlines
+ -> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
attr <- option nullAttr $
@@ -1782,20 +1808,24 @@ regLink constructor lab = try $ do
return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
- -> (F Inlines, String) -> MarkdownParser (F Inlines)
+referenceLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String)
+ -> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(_,raw') <- option (mempty, "") $
- lookAhead (try (guardEnabled Ext_citations >>
- spnl >> normalCite >> return (mempty, "")))
+ lookAhead (try (do guardEnabled Ext_citations
+ guardDisabled Ext_spaced_reference_links <|> spnl
+ normalCite
+ return (mempty, "")))
<|>
- try (spnl >> reference)
+ try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
- parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ parsedRaw <- parseFromString' inlines raw'
+ fallback <- parseFromString' inlines $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
@@ -1820,11 +1850,11 @@ referenceLink constructor (lab, raw) = do
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
where dropRB (']':xs) = xs
- dropRB xs = xs
+ dropRB xs = xs
dropLB ('[':xs) = xs
- dropLB xs = xs
+ dropLB xs = xs
-bareURL :: MarkdownParser (F Inlines)
+bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
@@ -1832,7 +1862,7 @@ bareURL = try $ do
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig)
-autoLink :: MarkdownParser (F Inlines)
+autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
@@ -1846,7 +1876,7 @@ autoLink = try $ do
guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
-image :: MarkdownParser (F Inlines)
+image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
@@ -1856,54 +1886,55 @@ image = try $ do
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
-note :: MarkdownParser (F Inlines)
+note :: PandocMonad m => MarkdownParser m (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
+ updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) }
return $ do
notes <- asksF stateNotes'
- case lookup ref notes of
+ case M.lookup ref notes of
Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
+ Just (_pos, contents) -> do
st <- askF
-- process the note in a context that doesn't resolve
-- notes, to avoid infinite looping with notes inside
-- notes:
- let contents' = runF contents st{ stateNotes' = [] }
+ let contents' = runF contents st{ stateNotes' = M.empty }
return $ B.note contents'
-inlineNote :: MarkdownParser (F Inlines)
+inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
return $ B.note . B.para <$> contents
-rawLaTeXInline' :: MarkdownParser (F Inlines)
+rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
+ lookAhead $ try $ char '\\' >> letter
+ notFollowedBy' rawConTeXtEnvironment
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: Parser [Char] st String
+rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
+ <|> many1 letter
+ contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar)
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
+inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-spanHtml :: MarkdownParser (F Inlines)
+spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
@@ -1911,14 +1942,11 @@ spanHtml = try $ do
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> contents
- _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
-
-divHtml :: MarkdownParser (F Blocks)
+ return $ if isSmallCaps (ident, classes, keyvals)
+ then B.smallcaps <$> contents
+ else B.spanWith (ident, classes, keyvals) <$> contents
+
+divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
@@ -1940,7 +1968,29 @@ divHtml = try $ do
else -- avoid backtracing
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
-rawHtmlInline :: MarkdownParser (F Inlines)
+divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
+divFenced = try $ do
+ guardEnabled Ext_fenced_divs
+ string ":::"
+ skipMany (char ':')
+ skipMany spaceChar
+ attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar)
+ skipMany spaceChar
+ skipMany (char ':')
+ blankline
+ updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+ bs <- mconcat <$> manyTill block divFenceEnd
+ updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+ return $ B.divWith attribs <$> bs
+
+divFenceEnd :: PandocMonad m => MarkdownParser m ()
+divFenceEnd = try $ do
+ string ":::"
+ skipMany (char ':')
+ blanklines
+ return ()
+
+rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1962,7 +2012,7 @@ rawHtmlInline = do
emojiChars :: [Char]
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
-emoji :: MarkdownParser (F Inlines)
+emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
@@ -1974,21 +2024,22 @@ emoji = try $ do
-- Citations
-cite :: MarkdownParser (F Inlines)
+cite :: PandocMonad m => MarkdownParser m (F Inlines)
cite = do
guardEnabled Ext_citations
- citations <- textualCite
+ textualCite
<|> do (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
- return citations
+ return $ flip B.cite (B.text raw) <$> cs
-textualCite :: MarkdownParser (F Inlines)
+textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
- (_, key) <- citeKey
+ (suppressAuthor, key) <- citeKey
let first = Citation{ citationId = key
, citationPrefix = []
, citationSuffix = []
- , citationMode = AuthorInText
+ , citationMode = if suppressAuthor
+ then SuppressAuthor
+ else AuthorInText
, citationNoteNum = 0
, citationHash = 0
}
@@ -2003,7 +2054,7 @@ textualCite = try $ do
let (spaces',raw') = span isSpace raw
spc | null spaces' = mempty
| otherwise = B.space
- lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
+ lab <- parseFromString' inlines $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
@@ -2017,7 +2068,7 @@ textualCite = try $ do
Just n -> B.str (show n)
_ -> B.cite [first] $ B.str $ '@':key)
-bareloc :: Citation -> MarkdownParser (F [Citation])
+bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
spnl
char '['
@@ -2032,7 +2083,7 @@ bareloc c = try $ do
rest' <- rest
return $ c{ citationSuffix = B.toList suff' } : rest'
-normalCite :: MarkdownParser (F [Citation])
+normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
normalCite = try $ do
char '['
spnl
@@ -2041,7 +2092,7 @@ normalCite = try $ do
char ']'
return citations
-suffix :: MarkdownParser (F Inlines)
+suffix :: PandocMonad m => MarkdownParser m (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
@@ -2050,14 +2101,14 @@ suffix = try $ do
then (B.space <>) <$> rest
else rest
-prefix :: MarkdownParser (F Inlines)
+prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
+ manyTill inline (char ']' <|> fmap (const ']') (lookAhead citeKey))
-citeList :: MarkdownParser (F [Citation])
+citeList :: PandocMonad m => MarkdownParser m (F [Citation])
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-citation :: MarkdownParser (F Citation)
+citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
@@ -2065,23 +2116,23 @@ citation = try $ do
return $ do
x <- pref
y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
-
-smart :: MarkdownParser (F Inlines)
+ return Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+smart :: PandocMonad m => MarkdownParser m (F Inlines)
smart = do
- getOption readerSmart >>= guard
+ guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [apostrophe, dash, ellipses])
-singleQuoted :: MarkdownParser (F Inlines)
+singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
@@ -2091,10 +2142,10 @@ singleQuoted = try $ do
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
-doubleQuoted :: MarkdownParser (F Inlines)
+doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
+ withQuoteContext InDoubleQuote (doubleQuoteEnd >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
+ <|> return (return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 0dea22c53..c19ef2f46 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE TypeSynonymInstances #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{-
- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+ Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.MediaWiki
- Copyright : Copyright (C) 2012-2015 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -36,43 +38,50 @@ _ parse templates?
-}
module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
-import Text.Pandoc.Definition
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import Data.Monoid ((<>))
-import Text.Pandoc.Options
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
-import Text.Pandoc.XML ( fromEntities )
-import Text.Pandoc.Parsing hiding ( nested )
-import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
import Control.Monad
-import Data.List (intersperse, intercalate, isPrefixOf )
-import Text.HTML.TagSoup
-import Data.Sequence (viewl, ViewL(..), (<|))
+import Control.Monad.Except (throwError)
+import Data.Char (isDigit, isSpace)
import qualified Data.Foldable as F
+import Data.List (intercalate, intersperse, isPrefixOf)
import qualified Data.Map as M
+import Data.Maybe (fromMaybe, maybeToList)
+import Data.Monoid ((<>))
+import Data.Sequence (ViewL (..), viewl, (<|))
import qualified Data.Set as Set
-import Data.Char (isDigit, isSpace)
-import Data.Maybe (fromMaybe)
-import Text.Printf (printf)
-import Debug.Trace (trace)
-
-import Text.Pandoc.Error
+import Data.Text (Text, unpack)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (nested)
+import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
+import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+ trim)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readMediaWiki opts s =
- readWith parseMediaWiki MWState{ mwOptions = opts
- , mwMaxNestingLevel = 4
- , mwNextLinkNumber = 1
- , mwCategoryLinks = []
- , mwHeaderMap = M.empty
- , mwIdentifierList = Set.empty
- }
- (s ++ "\n")
+readMediaWiki :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readMediaWiki opts s = do
+ parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
+ , mwMaxNestingLevel = 4
+ , mwNextLinkNumber = 1
+ , mwCategoryLinks = []
+ , mwHeaderMap = M.empty
+ , mwIdentifierList = Set.empty
+ , mwLogMessages = []
+ , mwInTT = False
+ }
+ (unpack (crFilter s) ++ "\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
@@ -80,9 +89,11 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
, mwIdentifierList :: Set.Set String
+ , mwLogMessages :: [LogMessage]
+ , mwInTT :: Bool
}
-type MWParser = Parser [Char] MWState
+type MWParser m = ParserT [Char] MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
@@ -95,13 +106,17 @@ instance HasIdentifierList MWState where
extractIdentifierList = mwIdentifierList
updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
+instance HasLogMessages MWState where
+ addLogMessage m s = s{ mwLogMessages = m : mwLogMessages s }
+ getLogMessages = reverse . mwLogMessages
+
--
-- auxiliary functions
--
-- This is used to prevent exponential blowups for things like:
-- ''a'''a''a'''a''a'''a''a'''a
-nested :: MWParser a -> MWParser a
+nested :: PandocMonad m => MWParser m a -> MWParser m a
nested p = do
nestlevel <- mwMaxNestingLevel `fmap` getState
guard $ nestlevel > 0
@@ -116,7 +131,7 @@ specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
-sym :: String -> MWParser ()
+sym :: PandocMonad m => String -> MWParser m ()
sym s = () <$ try (string s)
newBlockTags :: [String]
@@ -131,16 +146,16 @@ isBlockTag' tag = isBlockTag tag
isInlineTag' :: Tag String -> Bool
isInlineTag' (TagComment _) = True
-isInlineTag' t = not (isBlockTag' t)
+isInlineTag' t = not (isBlockTag' t)
eitherBlockOrInline :: [String]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
-htmlComment :: MWParser ()
+htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
-inlinesInTags :: String -> MWParser Inlines
+inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
inlinesInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
@@ -148,7 +163,7 @@ inlinesInTags tag = try $ do
else trimInlines . mconcat <$>
manyTill inline (htmlTag (~== TagClose tag))
-blocksInTags :: String -> MWParser Blocks
+blocksInTags :: PandocMonad m => String -> MWParser m Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
let closer = if tag == "li"
@@ -162,7 +177,7 @@ blocksInTags tag = try $ do
then return mempty
else mconcat <$> manyTill block closer
-charsInTags :: String -> MWParser [Char]
+charsInTags :: PandocMonad m => String -> MWParser m [Char]
charsInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
if '/' `elem` raw -- self-closing tag
@@ -173,7 +188,7 @@ charsInTags tag = try $ do
-- main parser
--
-parseMediaWiki :: MWParser Pandoc
+parseMediaWiki :: PandocMonad m => MWParser m Pandoc
parseMediaWiki = do
bs <- mconcat <$> many block
spaces
@@ -182,16 +197,15 @@ parseMediaWiki = do
let categories = if null categoryLinks
then mempty
else B.para $ mconcat $ intersperse B.space categoryLinks
+ reportLogMessages
return $ B.doc $ bs <> categories
--
-- block parsers
--
-block :: MWParser Blocks
+block :: PandocMonad m => MWParser m Blocks
block = do
- tr <- getOption readerTrace
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> table
<|> header
@@ -204,28 +218,28 @@ block = do
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
- when tr $
- trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList res)) (return ())
+ trace (take 60 $ show $ B.toList res)
return res
-para :: MWParser Blocks
+para :: PandocMonad m => MWParser m Blocks
para = do
contents <- trimInlines . mconcat <$> many1 inline
if F.all (==Space) contents
then return mempty
else return $ B.para contents
-table :: MWParser Blocks
+table :: PandocMonad m => MWParser m Blocks
table = do
tableStart
- styles <- option [] parseAttrs <* blankline
+ styles <- option [] parseAttrs
+ skipMany spaceChar
+ optional blanklines
let tableWidth = case lookup "width" styles of
Just w -> fromMaybe 1.0 $ parseWidth w
Nothing -> 1.0
caption <- option mempty tableCaption
optional rowsep
- hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!'))
+ hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!')
(cellspecs',hdr) <- unzip <$> tableRow
let widths = map ((tableWidth *) . snd) cellspecs'
let restwidth = tableWidth - sum widths
@@ -244,10 +258,10 @@ table = do
else (replicate cols mempty, hdr:rows')
return $ B.table caption cellspecs headers rows
-parseAttrs :: MWParser [(String,String)]
+parseAttrs :: PandocMonad m => MWParser m [(String,String)]
parseAttrs = many1 parseAttr
-parseAttr :: MWParser (String, String)
+parseAttr :: PandocMonad m => MWParser m (String, String)
parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
@@ -256,27 +270,23 @@ parseAttr = try $ do
<|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
-tableStart :: MWParser ()
+tableStart :: PandocMonad m => MWParser m ()
tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|"
-tableEnd :: MWParser ()
+tableEnd :: PandocMonad m => MWParser m ()
tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}"
-rowsep :: MWParser ()
+rowsep :: PandocMonad m => MWParser m ()
rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
- optional parseAttr <* blanklines
-
-cellsep :: MWParser ()
-cellsep = try $
- (guardColumnOne *> skipSpaces <*
- ( (char '|' <* notFollowedBy (oneOf "-}+"))
- <|> (char '!')
- )
- )
- <|> (() <$ try (string "||"))
- <|> (() <$ try (string "!!"))
-
-tableCaption :: MWParser Inlines
+ many (char '-') <* optional parseAttr <* blanklines
+
+cellsep :: PandocMonad m => MWParser m ()
+cellsep = try $ do
+ skipSpaces
+ (char '|' *> notFollowedBy (oneOf "-}+") *> optional (char '|'))
+ <|> (char '!' *> optional (char '!'))
+
+tableCaption :: PandocMonad m => MWParser m Inlines
tableCaption = try $ do
guardColumnOne
skipSpaces
@@ -284,19 +294,21 @@ tableCaption = try $ do
optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces)
(trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
-tableRow :: MWParser [((Alignment, Double), Blocks)]
+tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
tableRow = try $ skipMany htmlComment *> many tableCell
-tableCell :: MWParser ((Alignment, Double), Blocks)
+tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
tableCell = try $ do
cellsep
skipMany spaceChar
attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
notFollowedBy (char '|')
skipMany spaceChar
+ pos' <- getPosition
ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
((snd <$> withRaw table) <|> count 1 anyChar))
- bs <- parseFromString (mconcat <$> many block) ls
+ bs <- parseFromString (do setPosition pos'
+ mconcat <$> many block) ls
let align = case lookup "align" attrs of
Just "left" -> AlignLeft
Just "right" -> AlignRight
@@ -311,9 +323,9 @@ parseWidth :: String -> Maybe Double
parseWidth s =
case reverse s of
('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
- _ -> Nothing
+ _ -> Nothing
-template :: MWParser String
+template :: PandocMonad m => MWParser m String
template = try $ do
string "{{"
notFollowedBy (char '{')
@@ -322,7 +334,7 @@ template = try $ do
contents <- manyTill chunk (try $ string "}}")
return $ "{{" ++ concat contents ++ "}}"
-blockTag :: MWParser Blocks
+blockTag :: PandocMonad m => MWParser m Blocks
blockTag = do
(tag, _) <- lookAhead $ htmlTag isBlockTag'
case tag of
@@ -341,27 +353,27 @@ trimCode :: String -> String
trimCode ('\n':xs) = stripTrailingNewlines xs
trimCode xs = stripTrailingNewlines xs
-syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks
+syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
syntaxhighlight tag attrs = try $ do
let mblang = lookup "lang" attrs
let mbstart = lookup "start" attrs
let mbline = lookup "line" attrs
- let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
+ let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline
let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
contents <- charsInTags tag
return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
-hrule :: MWParser Blocks
+hrule :: PandocMonad m => MWParser m Blocks
hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
-guardColumnOne :: MWParser ()
+guardColumnOne :: PandocMonad m => MWParser m ()
guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-preformatted :: MWParser Blocks
+preformatted :: PandocMonad m => MWParser m Blocks
preformatted = try $ do
guardColumnOne
char ' '
- let endline' = B.linebreak <$ (try $ newline <* char ' ')
+ let endline' = B.linebreak <$ try (newline <* char ' ')
let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
let spToNbsp ' ' = '\160'
spToNbsp x = x
@@ -370,7 +382,7 @@ preformatted = try $ do
(htmlTag (~== TagOpen "nowiki" []) *>
manyTill anyChar (htmlTag (~== TagClose "nowiki")))
let inline' = whitespace' <|> endline' <|> nowiki'
- <|> (try $ notFollowedBy newline *> inline)
+ <|> try (notFollowedBy newline *> inline)
contents <- mconcat <$> many1 inline'
let spacesStr (Str xs) = all isSpace xs
spacesStr _ = False
@@ -385,10 +397,10 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode
strToCode x = x
normalizeCode [] = []
normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
- normalizeCode $ (Code a1 (x ++ y)) : zs
+ normalizeCode $ Code a1 (x ++ y) : zs
normalizeCode (x:xs) = x : normalizeCode xs
-header :: MWParser Blocks
+header :: PandocMonad m => MWParser m Blocks
header = try $ do
guardColumnOne
eqs <- many1 (char '=')
@@ -398,13 +410,13 @@ header = try $ do
attr <- registerHeader nullAttr contents
return $ B.headerWith attr lev contents
-bulletList :: MWParser Blocks
+bulletList :: PandocMonad m => MWParser m Blocks
bulletList = B.bulletList <$>
( many1 (listItem '*')
<|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
optional (htmlTag (~== TagClose "ul"))) )
-orderedList :: MWParser Blocks
+orderedList :: PandocMonad m => MWParser m Blocks
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> try
@@ -415,10 +427,10 @@ orderedList =
let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
-definitionList :: MWParser Blocks
+definitionList :: PandocMonad m => MWParser m Blocks
definitionList = B.definitionList <$> many1 defListItem
-defListItem :: MWParser (Inlines, [Blocks])
+defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks])
defListItem = try $ do
terms <- mconcat . intersperse B.linebreak <$> many defListTerm
-- we allow dd with no dt, or dt with no dd
@@ -429,44 +441,49 @@ defListItem = try $ do
else many (listItem ':')
return (terms, defs)
-defListTerm :: MWParser Inlines
-defListTerm = char ';' >> skipMany spaceChar >> anyLine >>=
- parseFromString (trimInlines . mconcat <$> many inline)
+defListTerm :: PandocMonad m => MWParser m Inlines
+defListTerm = do
+ guardColumnOne
+ char ';'
+ skipMany spaceChar
+ pos' <- getPosition
+ anyLine >>= parseFromString (do setPosition pos'
+ trimInlines . mconcat <$> many inline)
-listStart :: Char -> MWParser ()
+listStart :: PandocMonad m => Char -> MWParser m ()
listStart c = char c *> notFollowedBy listStartChar
-listStartChar :: MWParser Char
+listStartChar :: PandocMonad m => MWParser m Char
listStartChar = oneOf "*#;:"
-anyListStart :: MWParser Char
-anyListStart = char '*'
- <|> char '#'
- <|> char ':'
- <|> char ';'
+anyListStart :: PandocMonad m => MWParser m Char
+anyListStart = guardColumnOne >> oneOf "*#:;"
-li :: MWParser Blocks
+li :: PandocMonad m => MWParser m Blocks
li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
(firstParaToPlain <$> blocksInTags "li") <* spaces
-listItem :: Char -> MWParser Blocks
+listItem :: PandocMonad m => Char -> MWParser m Blocks
listItem c = try $ do
+ guardColumnOne
extras <- many (try $ char c <* lookAhead listStartChar)
if null extras
then listItem' c
else do
skipMany spaceChar
+ pos' <- getPosition
first <- concat <$> manyTill listChunk newline
rest <- many
(try $ string extras *> lookAhead listStartChar *>
(concat <$> manyTill listChunk newline))
- contents <- parseFromString (many1 $ listItem' c)
+ contents <- parseFromString (do setPosition pos'
+ many1 $ listItem' c)
(unlines (first : rest))
case c of
- '*' -> return $ B.bulletList contents
- '#' -> return $ B.orderedList contents
- ':' -> return $ B.definitionList [(mempty, contents)]
- _ -> mzero
+ '*' -> return $ B.bulletList contents
+ '#' -> return $ B.orderedList contents
+ ':' -> return $ B.definitionList [(mempty, contents)]
+ _ -> mzero
-- The point of this is to handle stuff like
-- * {{cite book
@@ -475,30 +492,32 @@ listItem c = try $ do
-- }}
-- * next list item
-- which seems to be valid mediawiki.
-listChunk :: MWParser String
+listChunk :: PandocMonad m => MWParser m String
listChunk = template <|> count 1 anyChar
-listItem' :: Char -> MWParser Blocks
+listItem' :: PandocMonad m => Char -> MWParser m Blocks
listItem' c = try $ do
listStart c
skipMany spaceChar
+ pos' <- getPosition
first <- concat <$> manyTill listChunk newline
rest <- many (try $ char c *> lookAhead listStartChar *>
(concat <$> manyTill listChunk newline))
- parseFromString (firstParaToPlain . mconcat <$> many1 block)
+ parseFromString (do setPosition pos'
+ firstParaToPlain . mconcat <$> many1 block)
$ unlines $ first : rest
firstParaToPlain :: Blocks -> Blocks
firstParaToPlain contents =
case viewl (B.unMany contents) of
- (Para xs) :< ys -> B.Many $ (Plain xs) <| ys
- _ -> contents
+ Para xs :< ys -> B.Many $ Plain xs <| ys
+ _ -> contents
--
-- inline parsers
--
-inline :: MWParser Inlines
+inline :: PandocMonad m => MWParser m Inlines
inline = whitespace
<|> url
<|> str
@@ -516,10 +535,10 @@ inline = whitespace
<|> (B.rawInline "mediawiki" <$> template)
<|> special
-str :: MWParser Inlines
+str :: PandocMonad m => MWParser m Inlines
str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
-math :: MWParser Inlines
+math :: PandocMonad m => MWParser m Inlines
math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
<|> (B.math . trim <$> charsInTags "math")
<|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
@@ -529,13 +548,13 @@ math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
mStart = string "\\("
mEnd = try (string "\\)")
-variable :: MWParser String
+variable :: PandocMonad m => MWParser m String
variable = try $ do
string "{{{"
contents <- manyTill anyChar (try $ string "}}}")
return $ "{{{" ++ contents ++ "}}}"
-inlineTag :: MWParser Inlines
+inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
(tag, _) <- lookAhead $ htmlTag isInlineTag'
case tag of
@@ -553,22 +572,27 @@ inlineTag = do
TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
TagOpen "code" _ -> encode <$> inlinesInTags "code"
- TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
+ TagOpen "tt" _ -> do
+ inTT <- mwInTT <$> getState
+ updateState $ \st -> st{ mwInTT = True }
+ result <- encode <$> inlinesInTags "tt"
+ updateState $ \st -> st{ mwInTT = inTT }
+ return result
TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
-special :: MWParser Inlines
+special :: PandocMonad m => MWParser m Inlines
special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
oneOf specialChars)
-inlineHtml :: MWParser Inlines
+inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
-whitespace :: MWParser Inlines
+whitespace :: PandocMonad m => MWParser m Inlines
whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment)
<|> B.softbreak <$ endline
-endline :: MWParser ()
+endline :: PandocMonad m => MWParser m ()
endline = () <$ try (newline <*
notFollowedBy spaceChar <*
notFollowedBy newline <*
@@ -577,30 +601,30 @@ endline = () <$ try (newline <*
notFollowedBy' header <*
notFollowedBy anyListStart)
-imageIdentifiers :: [MWParser ()]
+imageIdentifiers :: PandocMonad m => [MWParser m ()]
imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
"Bild"]
-image :: MWParser Inlines
+image :: PandocMonad m => MWParser m Inlines
image = try $ do
sym "[["
choice imageIdentifiers
fname <- addUnderscores <$> many1 (noneOf "|]")
_ <- many imageOption
- dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px")
+ dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px")
<|> return []
_ <- many imageOption
let kvs = case dims of
- w:[] -> [("width", w)]
- w:(h:[]) -> [("width", w), ("height", h)]
- _ -> []
+ [w] -> [("width", w)]
+ [w, h] -> [("width", w), ("height", h)]
+ _ -> []
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
-imageOption :: MWParser String
+imageOption :: PandocMonad m => MWParser m String
imageOption = try $ char '|' *> opt
where
opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
@@ -612,14 +636,14 @@ imageOption = try $ char '|' *> opt
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
collapseUnderscores :: String -> String
-collapseUnderscores [] = []
+collapseUnderscores [] = []
collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
-collapseUnderscores (x:xs) = x : collapseUnderscores xs
+collapseUnderscores (x:xs) = x : collapseUnderscores xs
addUnderscores :: String -> String
addUnderscores = collapseUnderscores . intercalate "_" . words
-internalLink :: MWParser Inlines
+internalLink :: PandocMonad m => MWParser m Inlines
internalLink = try $ do
sym "[["
pagename <- unwords . words <$> many (noneOf "|]")
@@ -627,7 +651,7 @@ internalLink = try $ do
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
-- the "pipe trick"
-- [[Help:Contents|] -> "Contents"
- <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
+ <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) )
sym "]]"
linktrail <- B.text <$> many letter
let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
@@ -637,7 +661,7 @@ internalLink = try $ do
return mempty
else return link
-externalLink :: MWParser Inlines
+externalLink :: PandocMonad m => MWParser m Inlines
externalLink = try $ do
char '['
(_, src) <- uri
@@ -649,29 +673,33 @@ externalLink = try $ do
return $ B.str $ show num
return $ B.link src "" lab
-url :: MWParser Inlines
+url :: PandocMonad m => MWParser m Inlines
url = do
(orig, src) <- uri
return $ B.link src "" (B.str orig)
-- | Parses a list of inlines between start and end delimiters.
-inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
+inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
inlinesBetween start end =
(trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
-emph :: MWParser Inlines
+emph :: PandocMonad m => MWParser m Inlines
emph = B.emph <$> nested (inlinesBetween start end)
where start = sym "''" >> lookAhead nonspaceChar
end = try $ notFollowedBy' (() <$ strong) >> sym "''"
-strong :: MWParser Inlines
+strong :: PandocMonad m => MWParser m Inlines
strong = B.strong <$> nested (inlinesBetween start end)
where start = sym "'''" >> lookAhead nonspaceChar
end = try $ sym "'''"
-doubleQuotes :: MWParser Inlines
-doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
+doubleQuotes :: PandocMonad m => MWParser m Inlines
+doubleQuotes = do
+ guardEnabled Ext_smart
+ inTT <- mwInTT <$> getState
+ guard (not inTT)
+ B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar
closeDoubleQuote = try $ sym "\""
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
new file mode 100644
index 000000000..4a9523e84
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -0,0 +1,924 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-
+ Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Muse
+ Copyright : Copyright (C) 2017-2018 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of Muse text to 'Pandoc' document.
+-}
+{-
+TODO:
+- Page breaks (five "*")
+- Org tables
+- table.el tables
+- Images with attributes (floating and width)
+- Citations and <biblio>
+- <play> environment
+-}
+module Text.Pandoc.Readers.Muse (readMuse) where
+
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isLetter)
+import Data.Default
+import Data.List (stripPrefix, intercalate)
+import Data.List.Split (splitOn)
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Maybe (fromMaybe, isNothing)
+import Data.Text (Text, unpack)
+import System.FilePath (takeExtension)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (F)
+import Text.Pandoc.Readers.HTML (htmlTag)
+import Text.Pandoc.Shared (crFilter, underlineSpan)
+
+-- | Read Muse from an input string and return a Pandoc document.
+readMuse :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readMuse opts s = do
+ res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type F = Future MuseState
+
+data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
+ , museOptions :: ReaderOptions
+ , museHeaders :: M.Map Inlines String -- ^ List of headers and ids (used for implicit ref links)
+ , museIdentifierList :: Set.Set String
+ , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
+ , museLogMessages :: [LogMessage]
+ , museNotes :: M.Map String (SourcePos, F Blocks)
+ , museInLink :: Bool
+ , museInPara :: Bool
+ }
+
+instance Default MuseState where
+ def = defaultMuseState
+
+defaultMuseState :: MuseState
+defaultMuseState = MuseState { museMeta = return nullMeta
+ , museOptions = def
+ , museHeaders = M.empty
+ , museIdentifierList = Set.empty
+ , museLastStrPos = Nothing
+ , museLogMessages = []
+ , museNotes = M.empty
+ , museInLink = False
+ , museInPara = False
+ }
+
+type MuseParser = ParserT String MuseState
+
+instance HasReaderOptions MuseState where
+ extractReaderOptions = museOptions
+
+instance HasHeaderMap MuseState where
+ extractHeaderMap = museHeaders
+ updateHeaderMap f st = st{ museHeaders = f $ museHeaders st }
+
+instance HasIdentifierList MuseState where
+ extractIdentifierList = museIdentifierList
+ updateIdentifierList f st = st{ museIdentifierList = f $ museIdentifierList st }
+
+instance HasLastStrPosition MuseState where
+ setLastStrPos pos st = st{ museLastStrPos = Just pos }
+ getLastStrPos st = museLastStrPos st
+
+instance HasLogMessages MuseState where
+ addLogMessage m s = s{ museLogMessages = m : museLogMessages s }
+ getLogMessages = reverse . museLogMessages
+
+--
+-- main parser
+--
+
+parseMuse :: PandocMonad m => MuseParser m Pandoc
+parseMuse = do
+ many directive
+ blocks <- parseBlocks
+ st <- getState
+ let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+ meta <- museMeta st
+ return $ Pandoc meta bs) st
+ reportLogMessages
+ return doc
+
+--
+-- utility functions
+--
+
+eol :: Stream s m Char => ParserT s st m ()
+eol = void newline <|> eof
+
+htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlElement tag = try $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar endtag
+ return (htmlAttrToPandoc attr, content)
+ where
+ endtag = void $ htmlTag (~== TagClose tag)
+
+htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlBlock tag = try $ do
+ many spaceChar
+ res <- htmlElement tag
+ manyTill spaceChar eol
+ return res
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContent :: PandocMonad m
+ => String -> MuseParser m (Attr, F Blocks)
+parseHtmlContent tag = try $ do
+ many spaceChar
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ manyTill spaceChar eol
+ content <- parseBlocksTill (try $ manyTill spaceChar endtag)
+ manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
+ return (htmlAttrToPandoc attr, content)
+ where
+ endtag = void $ htmlTag (~== TagClose tag)
+
+commonPrefix :: String -> String -> String
+commonPrefix _ [] = []
+commonPrefix [] _ = []
+commonPrefix (x:xs) (y:ys)
+ | x == y = x : commonPrefix xs ys
+ | otherwise = []
+
+atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ guard $ museLastStrPos st /= Just pos
+ p
+
+someUntil :: (Stream s m t)
+ => ParserT s u m a
+ -> ParserT s u m b
+ -> ParserT s u m ([a], b)
+someUntil p end = do
+ first <- p
+ (rest, e) <- manyUntil p end
+ return (first:rest, e)
+
+--
+-- directive parsers
+--
+
+-- While not documented, Emacs Muse allows "-" in directive name
+parseDirectiveKey :: PandocMonad m => MuseParser m String
+parseDirectiveKey = do
+ char '#'
+ many (letter <|> char '-')
+
+parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseEmacsDirective = do
+ key <- parseDirectiveKey
+ spaceChar
+ value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol
+ return (key, value)
+
+parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseAmuseDirective = do
+ key <- parseDirectiveKey
+ many1 spaceChar
+ value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective
+ many blankline
+ return (key, value)
+ where
+ endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey))
+
+directive :: PandocMonad m => MuseParser m ()
+directive = do
+ ext <- getOption readerExtensions
+ (key, value) <- if extensionEnabled Ext_amuse ext then parseAmuseDirective else parseEmacsDirective
+ updateState $ \st -> st { museMeta = B.setMeta (translateKey key) <$> value <*> museMeta st }
+ where translateKey "cover" = "cover-image"
+ translateKey x = x
+
+--
+-- block parsers
+--
+
+parseBlocks :: PandocMonad m
+ => MuseParser m (F Blocks)
+parseBlocks =
+ try parseEnd <|>
+ try blockStart <|>
+ try listStart <|>
+ try paraStart
+ where
+ parseEnd = mempty <$ eof
+ blockStart = do first <- header <|> blockElements <|> emacsNoteBlock
+ rest <- parseBlocks
+ return $ first B.<> rest
+ listStart = do
+ updateState (\st -> st { museInPara = False })
+ (first, rest) <- anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks
+ return $ first B.<> rest
+ paraStart = do
+ indent <- length <$> many spaceChar
+ (first, rest) <- paraUntil parseBlocks
+ let first' = if indent >= 2 && indent < 6 then B.blockQuote <$> first else first
+ return $ first' B.<> rest
+
+parseBlocksTill :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks)
+parseBlocksTill end =
+ try parseEnd <|>
+ try blockStart <|>
+ try listStart <|>
+ try paraStart
+ where
+ parseEnd = mempty <$ end
+ blockStart = do first <- blockElements
+ rest <- continuation
+ return $ first B.<> rest
+ listStart = do
+ updateState (\st -> st { museInPara = False })
+ (first, e) <- anyListUntil ((Left <$> end) <|> (Right <$> continuation))
+ case e of
+ Left _ -> return first
+ Right rest -> return $ first B.<> rest
+ paraStart = do (first, e) <- paraUntil ((Left <$> end) <|> (Right <$> continuation))
+ case e of
+ Left _ -> return first
+ Right rest -> return $ first B.<> rest
+ continuation = parseBlocksTill end
+
+listItemContentsUntil :: PandocMonad m
+ => Int
+ -> MuseParser m a
+ -> MuseParser m a
+ -> MuseParser m (F Blocks, a)
+listItemContentsUntil col pre end =
+ try blockStart <|>
+ try listStart <|>
+ try paraStart
+ where
+ parsePre = do e <- pre
+ return (mempty, e)
+ parseEnd = do e <- end
+ return (mempty, e)
+ paraStart = do
+ (first, e) <- paraUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end))
+ case e of
+ Left ee -> return (first, ee)
+ Right (rest, ee) -> return (first B.<> rest, ee)
+ blockStart = do first <- blockElements
+ (rest, e) <- parsePre <|> continuation <|> parseEnd
+ return (first B.<> rest, e)
+ listStart = do
+ updateState (\st -> st { museInPara = False })
+ (first, e) <- anyListUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end))
+ case e of
+ Left ee -> return (first, ee)
+ Right (rest, ee) -> return (first B.<> rest, ee)
+ continuation = try $ do blank <- optionMaybe blankline
+ skipMany blankline
+ indentWith col
+ updateState (\st -> st { museInPara = museInPara st && isNothing blank })
+ listItemContentsUntil col pre end
+
+parseBlock :: PandocMonad m => MuseParser m (F Blocks)
+parseBlock = do
+ res <- blockElements <|> para
+ trace (take 60 $ show $ B.toList $ runF res def)
+ return res
+ where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
+
+blockElements :: PandocMonad m => MuseParser m (F Blocks)
+blockElements = do
+ updateState (\st -> st { museInPara = False })
+ choice [ mempty <$ blankline
+ , comment
+ , separator
+ , example
+ , exampleTag
+ , literalTag
+ , centerTag
+ , rightTag
+ , quoteTag
+ , divTag
+ , verseTag
+ , lineBlock
+ , table
+ , commentTag
+ ]
+
+comment :: PandocMonad m => MuseParser m (F Blocks)
+comment = try $ do
+ char ';'
+ optional (spaceChar >> many (noneOf "\n"))
+ eol
+ return mempty
+
+separator :: PandocMonad m => MuseParser m (F Blocks)
+separator = try $ do
+ string "----"
+ many $ char '-'
+ many spaceChar
+ eol
+ return $ return B.horizontalRule
+
+header :: PandocMonad m => MuseParser m (F Blocks)
+header = try $ do
+ getPosition >>= \pos -> guard (sourceColumn pos == 1)
+ level <- fmap length $ many1 $ char '*'
+ guard $ level <= 5
+ spaceChar
+ content <- trimInlinesF . mconcat <$> manyTill inline eol
+ anchorId <- option "" parseAnchor
+ attr <- registerHeader (anchorId, [], []) (runF content def)
+ return $ B.headerWith attr level <$> content
+
+example :: PandocMonad m => MuseParser m (F Blocks)
+example = try $ do
+ string "{{{"
+ optional blankline
+ contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
+ return $ return $ B.codeBlock contents
+
+-- Trim up to one newline from the beginning of the string.
+lchop :: String -> String
+lchop s = case s of
+ '\n':ss -> ss
+ _ -> s
+
+-- Trim up to one newline from the end of the string.
+rchop :: String -> String
+rchop = reverse . lchop . reverse
+
+dropSpacePrefix :: [String] -> [String]
+dropSpacePrefix lns =
+ map (drop maxIndent) lns
+ where flns = filter (not . all (== ' ')) lns
+ maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
+
+exampleTag :: PandocMonad m => MuseParser m (F Blocks)
+exampleTag = try $ do
+ (attr, contents) <- htmlBlock "example"
+ return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
+
+literalTag :: PandocMonad m => MuseParser m (F Blocks)
+literalTag =
+ (return . rawBlock) <$> htmlBlock "literal"
+ where
+ -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
+
+-- <center> tag is ignored
+centerTag :: PandocMonad m => MuseParser m (F Blocks)
+centerTag = snd <$> parseHtmlContent "center"
+
+-- <right> tag is ignored
+rightTag :: PandocMonad m => MuseParser m (F Blocks)
+rightTag = snd <$> parseHtmlContent "right"
+
+quoteTag :: PandocMonad m => MuseParser m (F Blocks)
+quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote"
+
+-- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025
+divTag :: PandocMonad m => MuseParser m (F Blocks)
+divTag = do
+ (attrs, content) <- parseHtmlContent "div"
+ return $ B.divWith attrs <$> content
+
+verseLine :: PandocMonad m => MuseParser m (F Inlines)
+verseLine = do
+ indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty
+ rest <- manyTill (choice inlineList) newline
+ return $ trimInlinesF $ mconcat (pure indent : rest)
+
+verseLines :: PandocMonad m => MuseParser m (F Blocks)
+verseLines = do
+ lns <- many verseLine
+ return $ B.lineBlock <$> sequence lns
+
+verseTag :: PandocMonad m => MuseParser m (F Blocks)
+verseTag = do
+ (_, content) <- htmlBlock "verse"
+ parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
+
+commentTag :: PandocMonad m => MuseParser m (F Blocks)
+commentTag = htmlBlock "comment" >> return mempty
+
+-- Indented paragraph is either center, right or quote
+paraUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+paraUntil end = do
+ state <- getState
+ guard $ not $ museInPara state
+ setState $ state{ museInPara = True }
+ (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
+ updateState (\st -> st { museInPara = False })
+ return (fmap B.para $ trimInlinesF $ mconcat l, e)
+
+noteMarker :: PandocMonad m => MuseParser m String
+noteMarker = try $ do
+ char '['
+ first <- oneOf "123456789"
+ rest <- manyTill digit (char ']')
+ return $ first:rest
+
+-- Amusewiki version of note
+-- Parsing is similar to list item, except that note marker is used instead of list marker
+amuseNoteBlockUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+amuseNoteBlockUntil end = try $ do
+ guardEnabled Ext_amuse
+ ref <- noteMarker <* spaceChar
+ pos <- getPosition
+ updateState (\st -> st { museInPara = False })
+ (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end
+ oldnotes <- museNotes <$> getState
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
+ Nothing -> return ()
+ updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
+ return (mempty, e)
+
+-- Emacs version of note
+-- Notes are allowed only at the end of text, no indentation is required.
+emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks)
+emacsNoteBlock = try $ do
+ guardDisabled Ext_amuse
+ pos <- getPosition
+ ref <- noteMarker <* skipSpaces
+ content <- mconcat <$> blocksTillNote
+ oldnotes <- museNotes <$> getState
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
+ Nothing -> return ()
+ updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
+ return mempty
+ where
+ blocksTillNote =
+ many1Till parseBlock (eof <|> () <$ lookAhead noteMarker)
+
+--
+-- Verse markup
+--
+
+lineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
+lineVerseLine = try $ do
+ string "> "
+ indent <- B.str <$> many (char ' ' >> pure '\160')
+ rest <- manyTill (choice inlineList) eol
+ return $ trimInlinesF $ mconcat (pure indent : rest)
+
+blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
+blanklineVerseLine = try $ do
+ char '>'
+ blankline
+ pure mempty
+
+lineBlock :: PandocMonad m => MuseParser m (F Blocks)
+lineBlock = try $ do
+ col <- sourceColumn <$> getPosition
+ lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
+ return $ B.lineBlock <$> sequence lns
+
+--
+-- lists
+--
+
+bulletListItemsUntil :: PandocMonad m
+ => Int
+ -> MuseParser m a
+ -> MuseParser m ([F Blocks], a)
+bulletListItemsUntil indent end = try $ do
+ char '-'
+ void spaceChar <|> lookAhead eol
+ updateState (\st -> st { museInPara = False })
+ (x, e) <- listItemContentsUntil (indent + 2) (Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (Left <$> end)
+ case e of
+ Left ee -> return ([x], ee)
+ Right (xs, ee) -> return (x:xs, ee)
+
+bulletListUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+bulletListUntil end = try $ do
+ many spaceChar
+ pos <- getPosition
+ let indent = sourceColumn pos - 1
+ guard $ indent /= 0
+ (items, e) <- bulletListItemsUntil indent end
+ return (B.bulletList <$> sequence items, e)
+
+-- | Parses an ordered list marker and returns list attributes.
+anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes
+anyMuseOrderedListMarker = do
+ (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
+ char '.'
+ return (start, style, Period)
+
+museOrderedListMarker :: PandocMonad m
+ => ListNumberStyle
+ -> MuseParser m Int
+museOrderedListMarker style = do
+ (_, start) <- case style of
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ _ -> fail "Unhandled case"
+ char '.'
+ return start
+
+orderedListItemsUntil :: PandocMonad m
+ => Int
+ -> ListNumberStyle
+ -> MuseParser m a
+ -> MuseParser m ([F Blocks], a)
+orderedListItemsUntil indent style end =
+ continuation
+ where
+ continuation = try $ do
+ pos <- getPosition
+ void spaceChar <|> lookAhead eol
+ updateState (\st -> st { museInPara = False })
+ (x, e) <- listItemContentsUntil (sourceColumn pos) (Right <$> try (optionMaybe blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (Left <$> end)
+ case e of
+ Left ee -> return ([x], ee)
+ Right (xs, ee) -> return (x:xs, ee)
+
+orderedListUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+orderedListUntil end = try $ do
+ many spaceChar
+ pos <- getPosition
+ let indent = sourceColumn pos - 1
+ guard $ indent /= 0
+ p@(_, style, _) <- anyMuseOrderedListMarker
+ guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
+ (items, e) <- orderedListItemsUntil indent style end
+ return (B.orderedListWith p <$> sequence items, e)
+
+descriptionsUntil :: PandocMonad m
+ => Int
+ -> MuseParser m a
+ -> MuseParser m ([F Blocks], a)
+descriptionsUntil indent end = do
+ void spaceChar <|> lookAhead eol
+ updateState (\st -> st { museInPara = False })
+ (x, e) <- listItemContentsUntil indent (Right <$> try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (Left <$> end)
+ case e of
+ Right (xs, ee) -> return (x:xs, ee)
+ Left ee -> return ([x], ee)
+
+definitionListItemsUntil :: PandocMonad m
+ => Int
+ -> MuseParser m a
+ -> MuseParser m ([F (Inlines, [Blocks])], a)
+definitionListItemsUntil indent end =
+ continuation
+ where
+ continuation = try $ do
+ pos <- getPosition
+ term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::")
+ (x, e) <- descriptionsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> indentWith indent >> continuation)) <|> (Left <$> end))
+ let xx = do
+ term' <- term
+ x' <- sequence x
+ return (term', x')
+ case e of
+ Left ee -> return ([xx], ee)
+ Right (xs, ee) -> return (xx:xs, ee)
+
+definitionListUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+definitionListUntil end = try $ do
+ many spaceChar
+ pos <- getPosition
+ let indent = sourceColumn pos - 1
+ guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse
+ (items, e) <- definitionListItemsUntil indent end
+ return (B.definitionList <$> sequence items, e)
+
+anyListUntil :: PandocMonad m
+ => MuseParser m a
+ -> MuseParser m (F Blocks, a)
+anyListUntil end =
+ bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end
+
+--
+-- tables
+--
+
+data MuseTable = MuseTable
+ { museTableCaption :: Inlines
+ , museTableHeaders :: [[Blocks]]
+ , museTableRows :: [[Blocks]]
+ , museTableFooters :: [[Blocks]]
+ }
+
+data MuseTableElement = MuseHeaderRow (F [Blocks])
+ | MuseBodyRow (F [Blocks])
+ | MuseFooterRow (F [Blocks])
+ | MuseCaption (F Inlines)
+
+museToPandocTable :: MuseTable -> Blocks
+museToPandocTable (MuseTable caption headers body footers) =
+ B.table caption attrs headRow rows
+ where ncol = maximum (0 : map length (headers ++ body ++ footers))
+ attrs = replicate ncol (AlignDefault, 0.0)
+ headRow = if null headers then [] else head headers
+ rows = (if null headers then [] else tail headers) ++ body ++ footers
+
+museAppendElement :: MuseTable
+ -> MuseTableElement
+ -> F MuseTable
+museAppendElement tbl element =
+ case element of
+ MuseHeaderRow row -> do
+ row' <- row
+ return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] }
+ MuseBodyRow row -> do
+ row' <- row
+ return tbl{ museTableRows = museTableRows tbl ++ [row'] }
+ MuseFooterRow row-> do
+ row' <- row
+ return tbl{ museTableFooters = museTableFooters tbl ++ [row'] }
+ MuseCaption inlines -> do
+ inlines' <- inlines
+ return tbl{ museTableCaption = inlines' }
+
+tableCell :: PandocMonad m => MuseParser m (F Blocks)
+tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
+ where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol
+
+tableElements :: PandocMonad m => MuseParser m [MuseTableElement]
+tableElements = tableParseElement `sepEndBy1` eol
+
+elementsToTable :: [MuseTableElement] -> F MuseTable
+elementsToTable = foldM museAppendElement emptyTable
+ where emptyTable = MuseTable mempty mempty mempty mempty
+
+table :: PandocMonad m => MuseParser m (F Blocks)
+table = try $ do
+ rows <- tableElements
+ let tbl = elementsToTable rows
+ let pandocTbl = museToPandocTable <$> tbl :: F Blocks
+ return pandocTbl
+
+tableParseElement :: PandocMonad m => MuseParser m MuseTableElement
+tableParseElement = tableParseHeader
+ <|> tableParseBody
+ <|> tableParseFooter
+ <|> tableParseCaption
+
+tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow n = try $ do
+ fields <- tableCell `sepBy2` fieldSep
+ return $ sequence fields
+ where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
+ fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+
+tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
+tableParseHeader = MuseHeaderRow <$> tableParseRow 2
+
+tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
+tableParseBody = MuseBodyRow <$> tableParseRow 1
+
+tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
+tableParseFooter = MuseFooterRow <$> tableParseRow 3
+
+tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
+tableParseCaption = try $ do
+ many spaceChar
+ string "|+"
+ MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
+
+--
+-- inline parsers
+--
+
+inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
+inlineList = [ whitespace
+ , br
+ , anchor
+ , footnote
+ , strong
+ , strongTag
+ , emph
+ , emphTag
+ , underlined
+ , superscriptTag
+ , subscriptTag
+ , strikeoutTag
+ , verbatimTag
+ , nbsp
+ , link
+ , code
+ , codeTag
+ , inlineLiteralTag
+ , str
+ , symbol
+ ]
+
+inline :: PandocMonad m => MuseParser m (F Inlines)
+inline = endline <|> choice inlineList <?> "inline"
+
+endline :: PandocMonad m => MuseParser m (F Inlines)
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ returnF B.softbreak
+
+parseAnchor :: PandocMonad m => MuseParser m String
+parseAnchor = try $ do
+ getPosition >>= \pos -> guard (sourceColumn pos == 1)
+ char '#'
+ first <- letter
+ rest <- many (letter <|> digit)
+ skipMany spaceChar <|> void newline
+ return $ first:rest
+
+anchor :: PandocMonad m => MuseParser m (F Inlines)
+anchor = try $ do
+ anchorId <- parseAnchor
+ return $ return $ B.spanWith (anchorId, [], []) mempty
+
+footnote :: PandocMonad m => MuseParser m (F Inlines)
+footnote = try $ do
+ ref <- noteMarker
+ return $ do
+ notes <- asksF museNotes
+ case M.lookup ref notes of
+ Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Just (_pos, contents) -> do
+ st <- askF
+ let contents' = runF contents st { museNotes = M.empty }
+ return $ B.note contents'
+
+whitespace :: PandocMonad m => MuseParser m (F Inlines)
+whitespace = try $ do
+ skipMany1 spaceChar
+ return $ return B.space
+
+br :: PandocMonad m => MuseParser m (F Inlines)
+br = try $ do
+ string "<br>"
+ return $ return B.linebreak
+
+emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
+emphasisBetween c = try $ enclosedInlines c c
+
+enclosedInlines :: (PandocMonad m, Show a, Show b)
+ => MuseParser m a
+ -> MuseParser m b
+ -> MuseParser m (F Inlines)
+enclosedInlines start end = try $
+ trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
+
+inlineTag :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> String
+ -> MuseParser m (F Inlines)
+inlineTag f tag = try $ do
+ htmlTag (~== TagOpen tag [])
+ res <- manyTill inline (void $ htmlTag (~== TagClose tag))
+ return $ f <$> mconcat res
+
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = inlineTag B.strong "strong"
+
+strong :: PandocMonad m => MuseParser m (F Inlines)
+strong = fmap B.strong <$> emphasisBetween (string "**")
+
+emph :: PandocMonad m => MuseParser m (F Inlines)
+emph = fmap B.emph <$> emphasisBetween (char '*')
+
+underlined :: PandocMonad m => MuseParser m (F Inlines)
+underlined = do
+ guardDisabled Ext_amuse -- Supported only by Emacs Muse
+ fmap underlineSpan <$> emphasisBetween (char '_')
+
+emphTag :: PandocMonad m => MuseParser m (F Inlines)
+emphTag = inlineTag B.emph "em"
+
+superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+superscriptTag = inlineTag B.superscript "sup"
+
+subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+subscriptTag = inlineTag B.subscript "sub"
+
+strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
+strikeoutTag = inlineTag B.strikeout "del"
+
+verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
+verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
+
+nbsp :: PandocMonad m => MuseParser m (F Inlines)
+nbsp = try $ do
+ string "~~"
+ return $ return $ B.str "\160"
+
+code :: PandocMonad m => MuseParser m (F Inlines)
+code = try $ do
+ atStart $ char '='
+ contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '='
+ guard $ not $ null contents
+ guard $ head contents `notElem` " \t\n"
+ guard $ last contents `notElem` " \t\n"
+ notFollowedBy $ satisfy isLetter
+ return $ return $ B.code contents
+
+codeTag :: PandocMonad m => MuseParser m (F Inlines)
+codeTag = do
+ (attrs, content) <- htmlElement "code"
+ return $ return $ B.codeWith attrs content
+
+inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
+inlineLiteralTag =
+ (return . rawInline) <$> htmlElement "literal"
+ where
+ -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
+ rawInline (attrs, content) = B.rawInline (format attrs) content
+
+str :: PandocMonad m => MuseParser m (F Inlines)
+str = do
+ result <- many1 alphaNum
+ updateLastStrPos
+ return $ return $ B.str result
+
+symbol :: PandocMonad m => MuseParser m (F Inlines)
+symbol = return . B.str <$> count 1 nonspaceChar
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
+ st <- getState
+ guard $ not $ museInLink st
+ setState $ st{ museInLink = True }
+ (url, title, content) <- linkText
+ updateState (\state -> state { museInLink = False })
+ return $ case stripPrefix "URL:" url of
+ Nothing -> if isImageUrl url
+ then B.image url title <$> fromMaybe (return mempty) content
+ else B.link url title <$> fromMaybe (return $ B.str url) content
+ Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content
+ where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ isImageUrl = (`elem` imageExtensions) . takeExtension
+
+linkContent :: PandocMonad m => MuseParser m (F Inlines)
+linkContent = do
+ char '['
+ trimInlinesF . mconcat <$> many1Till inline (string "]")
+
+linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines))
+linkText = do
+ string "[["
+ url <- many1Till anyChar $ char ']'
+ content <- optionMaybe linkContent
+ char ']'
+ return (url, "", content)
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 4ec164e19..88f6bfe8f 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Native
- Copyright : Copyright (C) 2011-2015 John MacFarlane
+ Copyright : Copyright (C) 2011-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,8 +31,12 @@ Conversion of a string representation of a pandoc type (@Pandoc@,
module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (safeRead)
+import Control.Monad.Except (throwError)
+import Data.Text (Text, unpack)
+import Text.Pandoc.Class
import Text.Pandoc.Error
-- | Read native formatted text and return a Pandoc document.
@@ -45,19 +49,23 @@ import Text.Pandoc.Error
--
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
-readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s)
+readNative :: PandocMonad m
+ => ReaderOptions
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readNative _ s =
+ case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "couldn't read native"
-readBlocks :: String -> Either PandocError [Block]
-readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
+readBlocks :: Text -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s))
-readBlock :: String -> Either PandocError Block
-readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
+readBlock :: Text -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s))
-readInlines :: String -> Either PandocError [Inline]
-readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
-
-readInline :: String -> Either PandocError Inline
-readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s)
+readInlines :: Text -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s))
+readInline :: Text -> Either PandocError Inline
+readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s))
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 4dcf5e5a0..82266748f 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,26 +1,28 @@
{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
+import Control.Monad.State.Strict
import Data.Char (toUpper)
-import Text.Pandoc.Options
-import Text.Pandoc.Definition
+import Data.Default
+import Data.Generics
+import Data.Maybe (fromMaybe)
+import Data.Text (Text, pack, unpack)
+import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Options
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
+import Text.Pandoc.Shared (crFilter, blocksToInlines')
import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
-import Data.Generics
-import Control.Monad.State
-import Data.Default
-import Control.Monad.Except
-import Text.Pandoc.Error
-type OPML = ExceptT PandocError (State OPMLState)
+type OPML m = StateT OPMLState m
data OPMLState = OPMLState{
opmlSectionLevel :: Int
, opmlDocTitle :: Inlines
, opmlDocAuthors :: [Inlines]
, opmlDocDate :: Inlines
+ , opmlOptions :: ReaderOptions
} deriving Show
instance Default OPMLState where
@@ -28,14 +30,19 @@ instance Default OPMLState where
, opmlDocTitle = mempty
, opmlDocAuthors = []
, opmlDocDate = mempty
- }
+ , opmlOptions = def
+ }
-readOPML :: ReaderOptions -> String -> Either PandocError Pandoc
-readOPML _ inp = setTitle (opmlDocTitle st')
- . setAuthors (opmlDocAuthors st')
- . setDate (opmlDocDate st')
- . doc . mconcat <$> bs
- where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp)
+readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML opts inp = do
+ (bs, st') <- runStateT
+ (mapM parseBlock $ normalizeTree $
+ parseXML (unpack (crFilter inp))) def{ opmlOptions = opts }
+ return $
+ setTitle (opmlDocTitle st') $
+ setAuthors (opmlDocAuthors st') $
+ setDate (opmlDocDate st') $
+ doc $ mconcat bs
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
@@ -53,30 +60,32 @@ normalizeTree = everywhere (mkT go)
go xs = xs
convertEntity :: String -> String
-convertEntity e = maybe (map toUpper e) id (lookupEntity e)
+convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
- case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
- Just z -> z
- Nothing -> ""
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-exceptT :: Either PandocError a -> OPML a
-exceptT = either throwError return
+-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
+-- exceptT = either throwError return
-asHtml :: String -> OPML Inlines
-asHtml s = (\(Pandoc _ bs) -> case bs of
- [Plain ils] -> fromList ils
- _ -> mempty) <$> exceptT (readHtml def s)
+asHtml :: PandocMonad m => String -> OPML m Inlines
+asHtml s = do
+ opts <- gets opmlOptions
+ Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s)
+ return $ blocksToInlines' bs
-asMarkdown :: String -> OPML Blocks
-asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s)
+asMarkdown :: PandocMonad m => String -> OPML m Blocks
+asMarkdown s = do
+ opts <- gets opmlOptions
+ Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s)
+ return $ fromList bs
-getBlocks :: Element -> OPML Blocks
-getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+getBlocks :: PandocMonad m => Element -> OPML m Blocks
+getBlocks e = mconcat <$> mapM parseBlock (elContent e)
-parseBlock :: Content -> OPML Blocks
+parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 046fb4d6d..875c18a85 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -32,31 +32,45 @@ Entry point to the odt reader.
module Text.Pandoc.Readers.Odt ( readOdt ) where
-import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
+import Codec.Archive.Zip
+import qualified Text.XML.Light as XML
-import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy as B
-import System.FilePath
+import System.FilePath
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import Text.Pandoc.Options
-import Text.Pandoc.MediaBag
-import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad.Except (throwError)
-import Text.Pandoc.Readers.Odt.ContentReader
-import Text.Pandoc.Readers.Odt.StyleReader
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.MediaBag
+import Text.Pandoc.Options
+import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Shared (filteredFilesFromArchive)
+import Text.Pandoc.Readers.Odt.ContentReader
+import Text.Pandoc.Readers.Odt.StyleReader
---
-readOdt :: ReaderOptions
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Shared (filteredFilesFromArchive)
+
+readOdt :: PandocMonad m
+ => ReaderOptions
-> B.ByteString
- -> Either PandocError (Pandoc, MediaBag)
-readOdt _ bytes = bytesToOdt bytes-- of
+ -> m Pandoc
+readOdt opts bytes = case readOdt' opts bytes of
+ Right (doc, mb) -> do
+ P.setMediaBag mb
+ return doc
+ Left e -> throwError e
+
+--
+readOdt' :: ReaderOptions
+ -> B.ByteString
+ -> Either PandocError (Pandoc, MediaBag)
+readOdt' _ bytes = bytesToOdt bytes-- of
-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
-- Left err -> Left err
@@ -64,7 +78,7 @@ readOdt _ bytes = bytesToOdt bytes-- of
bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
bytesToOdt bytes = case toArchiveOrFail bytes of
Right archive -> archiveToOdt archive
- Left _ -> Left $ ParseFailure "Couldn't parse odt file."
+ Left _ -> Left $ PandocParseError "Couldn't parse odt file."
--
archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
@@ -85,7 +99,7 @@ archiveToOdt archive
| otherwise
-- Not very detailed, but I don't think more information would be helpful
- = Left $ ParseFailure "Couldn't parse odt file."
+ = Left $ PandocParseError "Couldn't parse odt file."
where
filePathIsOdtMedia :: FilePath -> Bool
filePathIsOdtMedia fp =
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index b056f1ecc..73bed545e 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -1,6 +1,6 @@
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE TupleSections #-}
+
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
@@ -38,17 +38,17 @@ faster and easier to implement this way.
module Text.Pandoc.Readers.Odt.Arrows.State where
-import Prelude hiding ( foldr, foldl )
+import Prelude hiding (foldl, foldr)
-import qualified Control.Category as Cat
-import Control.Arrow
-import Control.Monad
+import Control.Arrow
+import qualified Control.Category as Cat
+import Control.Monad
-import Data.Foldable
-import Data.Monoid
+import Data.Foldable
+import Data.Monoid
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
newtype ArrowState state a b = ArrowState
@@ -59,10 +59,6 @@ withState :: (state -> a -> (state, b)) -> ArrowState state a b
withState = ArrowState . uncurry
-- | Constructor
-withState' :: ((state, a) -> (state, b)) -> ArrowState state a b
-withState' = ArrowState
-
--- | Constructor
modifyState :: (state -> state ) -> ArrowState state a a
modifyState = ArrowState . first
@@ -79,10 +75,6 @@ extractFromState :: (state -> b ) -> ArrowState state x b
extractFromState f = ArrowState $ \(state,_) -> (state, f state)
-- | Constructor
-withUnchangedState :: (state -> a -> b ) -> ArrowState state a b
-withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a)
-
--- | Constructor
tryModifyState :: (state -> Either f state)
-> ArrowState state a (Either f a)
tryModifyState f = ArrowState $ \(state,a)
@@ -90,7 +82,7 @@ tryModifyState f = ArrowState $ \(state,a)
instance Cat.Category (ArrowState s) where
id = ArrowState id
- arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1)
+ arrow2 . arrow1 = ArrowState $ runArrowState arrow2 . runArrowState arrow1
instance Arrow (ArrowState state) where
arr = ignoringState
@@ -107,43 +99,9 @@ instance ArrowChoice (ArrowState state) where
Left l -> (s, Left l)
Right r -> second Right $ runArrowState a (s,r)
-instance ArrowLoop (ArrowState state) where
- loop a = ArrowState $ \(s, x)
- -> let (s', (x', _d)) = runArrowState a (s, (x, _d))
- in (s', x')
-
instance ArrowApply (ArrowState state) where
app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b)
-
--- | Embedding of a state arrow in a state arrow with a different state type.
-switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y
-switchState there back a = ArrowState $ first there
- >>> runArrowState a
- >>> first back
-
--- | Lift a state arrow to modify the state of an arrow
--- with a different state type.
-liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x
-liftToState unlift a = modifyState $ unlift &&& id
- >>> runArrowState a
- >>> snd
-
--- | Switches the type of the state temporarily.
--- Drops the intermediate result state, behaving like the identity arrow,
--- save for side effects in the state.
-withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x
-withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst
-
--- | Switches the type of the state temporarily.
--- Returns the resulting sub-state.
-withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s'
-withSubState' unlift a = ArrowState $ runArrowState unlift
- >>> switch
- >>> runArrowState a
- >>> switch
- where switch (x,y) = (y,x)
-
-- | Switches the type of the state temporarily.
-- Drops the intermediate result state, behaving like a fallible
-- identity arrow, save for side effects in the state.
@@ -175,49 +133,13 @@ foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f
where a' x (s',m) = second (m <>) $ runArrowState a (s',x)
--- | Fold a state arrow through something 'Foldable'. Collect the results
--- in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
-foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
-foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f
- where a' (s',m) x = second (m <>) $ runArrowState a (s',x)
-
--- | Fold a fallible state arrow through something 'Foldable'. Collect the
--- results in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
--- If the iteration fails, the state will be reset to the initial one.
-foldS' :: (Foldable f, Monoid m)
- => ArrowState s x (Either e m)
- -> ArrowState s (f x) (Either e m)
-foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f
- where a' s x (s',Right m) = case runArrowState a (s',x) of
- (s'',Right m') -> (s'', Right (m <> m'))
- (_ ,Left e ) -> (s , Left e)
- a' _ _ e = e
-
--- | Fold a fallible state arrow through something 'Foldable'. Collect the
--- results in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
--- If the iteration fails, the state will be reset to the initial one.
-foldSL' :: (Foldable f, Monoid m)
- => ArrowState s x (Either e m)
- -> ArrowState s (f x) (Either e m)
-foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f
- where a' s (s',Right m) x = case runArrowState a (s',x) of
- (s'',Right m') -> (s'', Right (m <> m'))
- (_ ,Left e ) -> (s , Left e)
- a' _ e _ = e
-
-- | Fold a state arrow through something 'Foldable'. Collect the results in a
-- 'MonadPlus'.
iterateS :: (Foldable f, MonadPlus m)
=> ArrowState s x y
-> ArrowState s (f x) (m y)
iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
- where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x)
+ where a' x (s',m) = second (mplus m.return) $ runArrowState a (s',x)
-- | Fold a state arrow through something 'Foldable'. Collect the results in a
-- 'MonadPlus'.
@@ -225,7 +147,7 @@ iterateSL :: (Foldable f, MonadPlus m)
=> ArrowState s x y
-> ArrowState s (f x) (m y)
iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
- where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x)
+ where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
-- | Fold a fallible state arrow through something 'Foldable'.
@@ -239,15 +161,3 @@ iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f
(s'',Right m') -> (s'',Right $ mplus m $ return m')
(_ ,Left e ) -> (s ,Left e )
a' _ _ e = e
-
--- | Fold a fallible state arrow through something 'Foldable'.
--- Collect the results in a 'MonadPlus'.
--- If the iteration fails, the state will be reset to the initial one.
-iterateSL' :: (Foldable f, MonadPlus m)
- => ArrowState s x (Either e y )
- -> ArrowState s (f x) (Either e (m y))
-iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f
- where a' s (s',Right m) x = case runArrowState a (s',x) of
- (s'',Right m') -> (s'',Right $ mplus m $ return m')
- (_ ,Left e ) -> (s ,Left e )
- a' _ e _ = e
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
index 218a85661..ef8b2d18a 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -39,14 +39,11 @@ with an equivalent return value.
-- We export everything
module Text.Pandoc.Readers.Odt.Arrows.Utils where
-import Control.Arrow
-import Control.Monad ( join, MonadPlus(..) )
+import Control.Arrow
+import Control.Monad (join)
-import qualified Data.Foldable as F
-import Data.Monoid
-
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c')
and2 = (&&&)
@@ -63,12 +60,6 @@ and5 :: (Arrow a)
and6 :: (Arrow a)
=> a b c0->a b c1->a b c2->a b c3->a b c4->a b c5
-> a b (c0,c1,c2,c3,c4,c5 )
-and7 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6
- -> a b (c0,c1,c2,c3,c4,c5,c6 )
-and8 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7
- -> a b (c0,c1,c2,c3,c4,c5,c6,c7)
and3 a b c = (and2 a b ) &&& c
>>^ \((z,y ) , x) -> (z,y,x )
@@ -78,10 +69,6 @@ and5 a b c d e = (and4 a b c d ) &&& e
>>^ \((z,y,x,w ) , v) -> (z,y,x,w,v )
and6 a b c d e f = (and5 a b c d e ) &&& f
>>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u )
-and7 a b c d e f g = (and6 a b c d e f ) &&& g
- >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t )
-and8 a b c d e f g h = (and7 a b c d e f g) &&& h
- >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s)
liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z
liftA2 f a b = a &&& b >>^ uncurry f
@@ -98,19 +85,11 @@ liftA5 :: (Arrow a) => (z->y->x->w->v -> r)
liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r)
-> a b z->a b y->a b x->a b w->a b v->a b u
-> a b r
-liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r)
- -> a b z->a b y->a b x->a b w->a b v->a b u->a b t
- -> a b r
-liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r)
- -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s
- -> a b r
liftA3 fun a b c = and3 a b c >>^ uncurry3 fun
liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun
liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun
liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun
-liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun
-liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun
liftA :: (Arrow a) => (y -> z) -> a b y -> a b z
liftA fun a = a >>^ fun
@@ -124,28 +103,12 @@ liftA fun a = a >>^ fun
duplicate :: (Arrow a) => a b (b,b)
duplicate = arr $ join (,)
--- | Lifts the combination of two values into an arrow.
-joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z
-joinOn = arr.uncurry
-
-- | Applies a function to the uncurried result-pair of an arrow-application.
-- (The %-symbol was chosen to evoke an association with pairs.)
(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
a >>% f = a >>^ uncurry f
--- | '(>>%)' with its arguments flipped
-(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
-(%<<) = flip (>>%)
-
--- | Precomposition with an uncurried function
-(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
-f %>> a = uncurry f ^>> a
-
--- | Precomposition with an uncurried function (right to left variant)
-(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
-(<<%) = flip (%>>)
-
-infixr 2 >>%, %<<, %>>, <<%
+infixr 2 >>%
-- | Duplicate a value and apply an arrow to the second instance.
@@ -156,56 +119,6 @@ infixr 2 >>%, %<<, %>>, <<%
keepingTheValue :: (Arrow a) => a b c -> a b (b,c)
keepingTheValue a = returnA &&& a
--- | Duplicate a value and apply an arrow to the first instance.
--- Aequivalent to
--- > \a -> duplicate >>> first a
--- or
--- > \a -> a &&& returnA
-keepingTheValue' :: (Arrow a) => a b c -> a b (c,b)
-keepingTheValue' a = a &&& returnA
-
--- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'.
--- Actually, it's the more complex '(>=>)', because 'bind' alone does not
--- combine as nicely in arrow form.
--- The current implementation is not the most efficient one, because it can
--- not return directly if a 'Nothing' is encountered. That in turn follows
--- from the type system, as 'Nothing' has an "invisible" type parameter that
--- can not be dropped early.
---
--- Also, there probably is a way to generalize this to other monads
--- or applicatives, but I'm leaving that as an exercise to the reader.
--- I have a feeling there is a new Arrow-typeclass to be found that is less
--- restrictive than 'ArrowApply'. If it is already out there,
--- I have not seen it yet. ('ArrowPlus' for example is not general enough.)
-(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c)
-a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join
-
-infixr 2 >>>=
-
--- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required.
--- (But still different from a true bind)
-(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b)
-(>++<) = liftA2 mplus
-
--- | Left-compose with a pure function
-leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r)
-leftLift = left.arr
-
--- | Right-compose with a pure function
-rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r')
-rightLift = right.arr
-
-
-( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c')
-( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c')
-( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c')
-
-l ^+++ r = leftLift l >>> right r
-l +++^ r = left l >>> rightLift r
-l ^+++^ r = leftLift l >>> rightLift r
-
-infixr 2 ^+++, +++^, ^+++^
-
( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d
( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d
( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d
@@ -218,33 +131,12 @@ infixr 2 ^||| , |||^, ^|||^
( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c')
( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c')
-( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c')
l ^&&& r = arr l &&& r
l &&&^ r = l &&& arr r
-l ^&&&^ r = arr l &&& arr r
-
-infixr 3 ^&&&, &&&^, ^&&&^
-( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c')
-( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c')
-( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c')
+infixr 3 ^&&&, &&&^
-l ^*** r = arr l *** r
-l ***^ r = l *** arr r
-l ^***^ r = arr l *** arr r
-
-infixr 3 ^***, ***^, ^***^
-
--- | A version of
---
--- >>> \p -> arr (\x -> if p x the Right x else Left x)
---
--- but with p being an arrow
-choose :: (ArrowChoice a) => a b Bool -> a b (Either b b)
-choose checkValue = keepingTheValue checkValue >>^ select
- where select (x,True ) = Right x
- select (x,False ) = Left x
-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@.
choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r)
@@ -258,130 +150,15 @@ maybeToChoice = arr maybeToEither
returnV :: (Arrow a) => c -> a x c
returnV = arr.const
--- | 'returnA' dropping everything
-returnA_ :: (Arrow a) => a _b ()
-returnA_ = returnV ()
-
--- | Wrapper for an arrow that can be evaluated im parallel. All
--- Arrows can be evaluated in parallel, as long as they return a
--- monoid.
-newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c }
- deriving (Eq, Ord, Show)
-
-instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where
- mempty = CoEval $ returnV mempty
- (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend
-
--- | Evaluates a collection of arrows in a parallel fashion.
---
--- This is in essence a fold of '(&&&)' over the collection,
--- so the actual execution order and parallelity depends on the
--- implementation of '(&&&)' in the arrow in question.
--- The default implementation of '(&&&)' for example keeps the
--- order as given in the collection.
---
--- This function can be seen as a generalization of
--- 'Control.Applicative.sequenceA' to arrows or as an alternative to
--- a fold with 'Control.Applicative.WrappedArrow', which
--- substitutes the monoid with function application.
---
-coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m
-coEval = evalParallelArrow . (F.foldMap CoEval)
-
-- | Defines Left as failure, Right as success
type FallibleArrow a input failure success = a input (Either failure success)
-type ReFallibleArrow a failure success success'
- = FallibleArrow a (Either failure success) failure success'
-
--- | Wrapper for fallible arrows. Fallible arrows are all arrows that return
--- an Either value where left is a faliure and right is a success value.
-newtype AlternativeArrow a input failure success
- = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success }
-
-
-instance (ArrowChoice a, Monoid failure)
- => Monoid (AlternativeArrow a input failure success) where
- mempty = TryArrow $ returnV $ Left mempty
- (TryArrow a) `mappend` (TryArrow b)
- = TryArrow $ a &&& b
- >>^ \(a',~b')
- -> ( (\a'' -> left (mappend a'') b') ||| Right )
- a'
-
--- | Evaluates a collection of fallible arrows, trying each one in succession.
--- Left values are interpreted as failures, right values as successes.
---
--- The evaluation is stopped once an arrow succeeds.
--- Up to that point, all failures are collected in the failure-monoid.
--- Note that '()' is a monoid, and thus can serve as a failure-collector if
--- you are uninterested in the exact failures.
---
--- This is in essence a fold of '(&&&)' over the collection, enhanced with a
--- little bit of repackaging, so the actual execution order depends on the
--- implementation of '(&&&)' in the arrow in question.
--- The default implementation of '(&&&)' for example keeps the
--- order as given in the collection.
---
-tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure)
- => f (FallibleArrow a b failure success)
- -> FallibleArrow a b failure success
-tryArrows = evalAlternativeArrow . (F.foldMap TryArrow)
-
---
-liftSuccess :: (ArrowChoice a)
- => (success -> success')
- -> ReFallibleArrow a failure success success'
-liftSuccess = rightLift
-
--
liftAsSuccess :: (ArrowChoice a)
=> a x success
-> FallibleArrow a x failure success
liftAsSuccess a = a >>^ Right
---
-asFallibleArrow :: (ArrowChoice a)
- => a x success
- -> FallibleArrow a x failure success
-asFallibleArrow a = a >>^ Right
-
--- | Raises an error into a 'ReFallibleArrow' if the arrow is already in
--- "error mode"
-liftError :: (ArrowChoice a, Monoid failure)
- => failure
- -> ReFallibleArrow a failure success success
-liftError e = leftLift (e <>)
-
--- | Raises an error into a 'FallibleArrow', droping both the arrow input
--- and any previously stored error value.
-_raiseA :: (ArrowChoice a)
- => failure
- -> FallibleArrow a x failure success
-_raiseA e = returnV (Left e)
-
--- | Raises an empty error into a 'FallibleArrow', droping both the arrow input
--- and any previously stored error value.
-_raiseAEmpty :: (ArrowChoice a, Monoid failure)
- => FallibleArrow a x failure success
-_raiseAEmpty = _raiseA mempty
-
--- | Raises an error into a 'ReFallibleArrow', possibly appending the new error
--- to an existing one
-raiseA :: (ArrowChoice a, Monoid failure)
- => failure
- -> ReFallibleArrow a failure success success
-raiseA e = arr $ Left.(either (<> e) (const e))
-
--- | Raises an empty error into a 'ReFallibleArrow'. If there already is an
--- error, nothing changes.
--- (Note that this function is only aequivalent to @raiseA mempty@ iff the
--- failure monoid follows the monoid laws.)
-raiseAEmpty :: (ArrowChoice a, Monoid failure)
- => ReFallibleArrow a failure success success
-raiseAEmpty = arr (fromRight (const mempty) >>> Left)
-
-
-- | Execute the second arrow if the first succeeds
(>>?) :: (ArrowChoice a)
=> FallibleArrow a x failure success
@@ -410,20 +187,6 @@ a >>?^? b = a >>> Left ^|||^ b
-> FallibleArrow a x failure success'
a ^>>? b = a ^>> Left ^||| b
--- | Execute the lifted second arrow if the lifted first arrow succeeds
-(^>>?^) :: (ArrowChoice a)
- => (x -> Either failure success)
- -> (success -> success')
- -> FallibleArrow a x failure success'
-a ^>>?^ f = arr $ a >>> right f
-
--- | Execute the lifted second arrow if the lifted first arrow succeeds
-(^>>?^?) :: (ArrowChoice a)
- => (x -> Either failure success)
- -> (success -> Either failure success')
- -> FallibleArrow a x failure success'
-a ^>>?^? f = a ^>> Left ^|||^ f
-
-- | Execute the second, non-fallible arrow if the first arrow succeeds
(>>?!) :: (ArrowChoice a)
=> FallibleArrow a x failure success
@@ -448,38 +211,14 @@ a ^>>?% f = arr a >>?^ (uncurry f)
---
(>>?%?) :: (ArrowChoice a)
=> FallibleArrow a x f (b,b')
- -> (b -> b' -> (Either f c))
+ -> (b -> b' -> Either f c)
-> FallibleArrow a x f c
-a >>?%? f = a >>?^? (uncurry f)
+a >>?%? f = a >>?^? uncurry f
infixr 1 >>?, >>?^, >>?^?
-infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?!
+infixr 1 ^>>?, >>?!
infixr 1 >>?%, ^>>?%, >>?%?
--- | Keep values that are Right, replace Left values by a constant.
-ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v
-ifFailedUse v = arr $ either (const v) id
-
--- | '(&&)' lifted into an arrow
-(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
-(<&&>) = liftA2 (&&)
-
--- | '(||)' lifted into an arrow
-(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
-(<||>) = liftA2 (||)
-
--- | An equivalent of '(&&)' in a fallible arrow
-(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s
- -> FallibleArrow a x f s'
- -> FallibleArrow a x f (s,s')
-(>&&<) = liftA2 chooseMin
-
--- | An equivalent of '(||)' in some forms of fallible arrows
-(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s
- -> FallibleArrow a x f s
- -> FallibleArrow a x f s
-(>||<) = liftA2 chooseMax
-
-- | An arrow version of a short-circuit (<|>)
ifFailedDo :: (ArrowChoice a)
=> FallibleArrow a x f y
@@ -489,7 +228,4 @@ ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right)
where repackage (x , Left _) = Left x
repackage (_ , Right y) = Right y
-infixr 4 <&&>, <||>, >&&<, >||<
infixr 1 `ifFailedDo`
-
-
diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs
index 1f095bade..51c2da788 100644
--- a/src/Text/Pandoc/Readers/Odt/Base.hs
+++ b/src/Text/Pandoc/Readers/Odt/Base.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards #-}
+
{-
Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
@@ -32,12 +32,11 @@ Core types of the odt reader.
module Text.Pandoc.Readers.Odt.Base where
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Namespaces
type OdtConverterState s = XMLConverterState Namespace s
type XMLReader s a b = FallibleXMLConverter Namespace s a b
type XMLReaderSafe s a b = XMLConverter Namespace s a b
-
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 2672b01ef..380f16c66 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,8 +1,8 @@
{-# LANGUAGE Arrows #-}
-{-# LANGUAGE TupleSections #-}
{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
{-
Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
@@ -39,29 +39,28 @@ module Text.Pandoc.Readers.Odt.ContentReader
, read_body
) where
-import Control.Arrow
-import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+import Control.Applicative hiding (liftA, liftA2, liftA3)
+import Control.Arrow
-import qualified Data.ByteString.Lazy as B
-import qualified Data.Map as M
-import Data.List ( find, intercalate )
-import Data.Maybe
+import qualified Data.ByteString.Lazy as B
+import Data.List (find, intercalate)
+import qualified Data.Map as M
+import Data.Maybe
-import qualified Text.XML.Light as XML
+import qualified Text.XML.Light as XML
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Text.Pandoc.Shared
+import Text.Pandoc.Builder
+import Text.Pandoc.MediaBag (MediaBag, insertMedia)
+import Text.Pandoc.Shared
-import Text.Pandoc.Readers.Odt.Base
-import Text.Pandoc.Readers.Odt.Namespaces
-import Text.Pandoc.Readers.Odt.StyleReader
+import Text.Pandoc.Readers.Odt.Base
+import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.StyleReader
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
import qualified Data.Set as Set
@@ -94,8 +93,6 @@ data ReaderState
, envMedia :: Media
-- | Hold binary resources used in the document
, odtMediaBag :: MediaBag
--- , sequences
--- , trackedChangeIDs
}
deriving ( Show )
@@ -250,7 +247,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty
-- | Input: basis for a new header anchor
--- Ouput: saved new anchor
+-- Output: saved new anchor
getHeaderAnchor :: OdtReaderSafe Inlines Anchor
getHeaderAnchor = proc title -> do
state <- getExtraState -< ()
@@ -296,7 +293,7 @@ withNewStyle a = proc x -> do
modifier <- arr modifierFromStyleDiff -< triple
fShouldTrace <- isStyleToTrace -< style
case fShouldTrace of
- Right shouldTrace -> do
+ Right shouldTrace ->
if shouldTrace
then do
pushStyle -< style
@@ -325,7 +322,7 @@ type InlineModifier = Inlines -> Inlines
modifierFromStyleDiff :: PropertyTriple -> InlineModifier
modifierFromStyleDiff propertyTriple =
composition $
- (getVPosModifier propertyTriple)
+ getVPosModifier propertyTriple
: map (first ($ propertyTriple) >>> ifThen_else ignore)
[ (hasEmphChanged , emph )
, (hasChanged isStrong , strong )
@@ -344,9 +341,9 @@ modifierFromStyleDiff propertyTriple =
Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps)
getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore
- getVPosModifier' ( _ , VPosSub ) = subscript
- getVPosModifier' ( _ , VPosSuper ) = superscript
- getVPosModifier' ( _ , _ ) = ignore
+ getVPosModifier' ( _ , VPosSub ) = subscript
+ getVPosModifier' ( _ , VPosSuper ) = superscript
+ getVPosModifier' ( _ , _ ) = ignore
hasEmphChanged :: PropertyTriple -> Bool
hasEmphChanged = swing any [ hasChanged isEmphasised
@@ -355,17 +352,17 @@ modifierFromStyleDiff propertyTriple =
]
hasChanged property triple@(_, property -> newProperty, _) =
- maybe True (/=newProperty) (lookupPreviousValue property triple)
+ (/= Just newProperty) (lookupPreviousValue property triple)
hasChangedM property triple@(_, textProps,_) =
fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple
- lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties)
+ lookupPreviousValue f = lookupPreviousStyleValue (fmap f . textProperties)
lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties)
lookupPreviousStyleValue f (ReaderState{..},_,mFamily)
- = ( findBy f $ extendedStylePropertyChain styleTrace styleSet )
+ = findBy f (extendedStylePropertyChain styleTrace styleSet)
<|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily )
@@ -569,7 +566,7 @@ read_text_seq = matchingElement NsText "sequence"
-- specifically. I honor that, although the current implementation of '(<>)'
--- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein.
+-- for 'Inlines' in "Text.Pandoc.Builder" will collapse them again.
-- The rational is to be prepared for future modifications.
read_spaces :: InlineMatcher
read_spaces = matchingElement NsText "s" (
@@ -663,7 +660,7 @@ read_list = matchingElement NsText "list"
--
read_list_item :: ElementMatcher [Blocks]
read_list_item = matchingElement NsText "list-item"
- $ liftA (compactify'.(:[]))
+ $ liftA (compactify.(:[]))
( matchChildContent' [ read_paragraph
, read_header
, read_list
@@ -749,7 +746,7 @@ read_table_row = matchingElement NsTable "table-row"
--
read_table_cell :: ElementMatcher [Blocks]
read_table_cell = matchingElement NsTable "table-cell"
- $ liftA (compactify'.(:[]))
+ $ liftA (compactify.(:[]))
$ matchChildContent' [ read_paragraph
]
@@ -796,8 +793,7 @@ read_image_src = matchingElement NsDraw "image"
Left _ -> returnV "" -< ()
read_frame_title :: InlineMatcher
-read_frame_title = matchingElement NsSVG "title"
- $ (matchChildContent [] read_plain_text)
+read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
read_frame_text_box :: InlineMatcher
read_frame_text_box = matchingElement NsDraw "text-box"
@@ -806,12 +802,12 @@ read_frame_text_box = matchingElement NsDraw "text-box"
arr read_img_with_caption -< toList paragraphs
read_img_with_caption :: [Block] -> Inlines
-read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) =
+read_img_with_caption (Para [Image attr alt (src,title)] : _) =
singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
-read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) =
+read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) =
singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
-read_img_with_caption ( (Para (_ : xs)) : ys) =
- read_img_with_caption ((Para xs) : ys)
+read_img_with_caption ( Para (_ : xs) : ys) =
+ read_img_with_caption (Para xs : ys)
read_img_with_caption _ =
mempty
@@ -899,9 +895,6 @@ read_reference_ref = matchingElement NsText "reference-ref"
-- Entry point
----------------------
---read_plain_content :: OdtReaderSafe _x Inlines
---read_plain_content = strContent >>^ text
-
read_text :: OdtReaderSafe _x Pandoc
read_text = matchChildContent' [ read_header
, read_paragraph
@@ -915,8 +908,8 @@ post_process (Pandoc m blocks) =
Pandoc m (post_process' blocks)
post_process' :: [Block] -> [Block]
-post_process' ((Table _ a w h r) : (Div ("", ["caption"], _) [Para inlines] ) : xs) =
- (Table inlines a w h r) : ( post_process' xs )
+post_process' (Table _ a w h r : Div ("", ["caption"], _) [Para inlines] : xs) =
+ Table inlines a w h r : post_process' xs
post_process' bs = bs
read_body :: OdtReader _x (Pandoc, MediaBag)
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
index 877443543..f8ea5c605 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
{-
Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
@@ -39,11 +39,7 @@ compatible instances of "ArrowChoice".
-- We export everything
module Text.Pandoc.Readers.Odt.Generic.Fallible where
-import Control.Applicative
-import Control.Monad
-
-import qualified Data.Foldable as F
-import Data.Monoid ((<>))
+import Data.Monoid ((<>))
-- | Default for now. Will probably become a class at some point.
type Failure = ()
@@ -51,16 +47,6 @@ type Failure = ()
type Fallible a = Either Failure a
--- | False -> Left (), True -> Right ()
-boolToEither :: Bool -> Fallible ()
-boolToEither False = Left ()
-boolToEither True = Right ()
-
--- | False -> Left (), True -> Right ()
-boolToChoice :: Bool -> Fallible ()
-boolToChoice False = Left ()
-boolToChoice True = Right ()
-
--
maybeToEither :: Maybe a -> Fallible a
maybeToEither (Just a) = Right a
@@ -71,21 +57,11 @@ eitherToMaybe :: Either _l a -> Maybe a
eitherToMaybe (Left _) = Nothing
eitherToMaybe (Right a) = Just a
--- | > untagEither === either id id
-untagEither :: Either a a -> a
-untagEither (Left a) = a
-untagEither (Right a) = a
-
-- | > fromLeft f === either f id
fromLeft :: (a -> b) -> Either a b -> b
fromLeft f (Left a) = f a
fromLeft _ (Right b) = b
--- | > fromRight f === either id f
-fromRight :: (a -> b) -> Either b a -> b
-fromRight _ (Left b) = b
-fromRight f (Right a) = f a
-
-- | > recover a === fromLeft (const a) === either (const a) id
recover :: a -> Either _f a -> a
recover a (Left _) = a
@@ -110,24 +86,6 @@ collapseEither (Left f ) = Left f
collapseEither (Right (Left f)) = Left f
collapseEither (Right (Right x)) = Right x
--- | If either of the values represents an error, the result is a
--- (possibly combined) error. If both values represent a success,
--- both are returned.
-chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b')
-chooseMin = chooseMinWith (,)
-
--- | If either of the values represents an error, the result is a
--- (possibly combined) error. If both values represent a success,
--- a combination is returned.
-chooseMinWith :: (Monoid a) => (b -> b' -> c)
- -> Either a b
- -> Either a b'
- -> Either a c
-chooseMinWith (><) (Right a) (Right b) = Right $ a >< b
-chooseMinWith _ (Left a) (Left b) = Left $ a <> b
-chooseMinWith _ (Left a) _ = Left a
-chooseMinWith _ _ (Left b) = Left b
-
-- | If either of the values represents a non-error, the result is a
-- (possibly combined) non-error. If both values represent an error, an error
-- is returned.
@@ -152,109 +110,17 @@ chooseMaxWith _ _ (Right b) = Right b
class ChoiceVector v where
spreadChoice :: v (Either f a) -> Either f (v a)
--- Let's do a few examples first
-
-instance ChoiceVector Maybe where
- spreadChoice (Just (Left f)) = Left f
- spreadChoice (Just (Right x)) = Right (Just x)
- spreadChoice Nothing = Right Nothing
-
-instance ChoiceVector (Either l) where
- spreadChoice (Right (Left f)) = Left f
- spreadChoice (Right (Right x)) = Right (Right x)
- spreadChoice (Left x ) = Right (Left x)
-
instance ChoiceVector ((,) a) where
spreadChoice (_, Left f) = Left f
spreadChoice (x, Right y) = Right (x,y)
-- Wasn't there a newtype somewhere with the elements flipped?
---
--- More instances later, first some discussion.
---
--- I'll have to freshen up on type system details to see how (or if) to do
--- something like
---
--- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where
--- > :
---
--- But maybe it would be even better to use something like
---
--- > class ChoiceVector v v' f | v -> v' f where
--- > spreadChoice :: v -> Either f v'
---
--- That way, more places in @v@ could spread the cheer, e.g.:
---
--- As before:
--- -- ( a , Either f b) (a , b) f
--- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where
--- > spreadChoice (_, Left f) = Left f
--- > spreadChoice (a, Right b) = Right (a,b)
---
--- But also:
--- -- ( Either f a , b) (a , b) f
--- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where
--- > spreadChoice (Right a,b) = Right (a,b)
--- > spreadChoice (Left f,_) = Left f
---
--- And maybe even:
--- -- ( Either f a , Either f b) (a , b) f
--- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where
--- > spreadChoice (Right a , Right b) = Right (a,b)
--- > spreadChoice (Left f , _ ) = Left f
--- > spreadChoice ( _ , Left f) = Left f
---
--- Of course that would lead to a lot of overlapping instances...
--- But I can't think of a different way. A selector function might help,
--- but not even a "Data.Traversable" is powerful enough for that.
--- But maybe someone has already solved all this with a lens library.
---
--- Well, it's an interesting academic question. But for practical purposes,
--- I have more than enough right now.
-
-instance ChoiceVector ((,,) a b) where
- spreadChoice (_,_, Left f) = Left f
- spreadChoice (a,b, Right x) = Right (a,b,x)
-
-instance ChoiceVector ((,,,) a b c) where
- spreadChoice (_,_,_, Left f) = Left f
- spreadChoice (a,b,c, Right x) = Right (a,b,c,x)
-
-instance ChoiceVector ((,,,,) a b c d) where
- spreadChoice (_,_,_,_, Left f) = Left f
- spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x)
-
-instance ChoiceVector (Const a) where
- spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types
-
--- | Fails on the first error
-instance ChoiceVector [] where
- spreadChoice = sequence -- using the monad instance of Either.
- -- Could be generalized to "Data.Traversable" - but why play
- -- with UndecidableInstances unless this is really needed.
-
-- | Wrapper for a list. While the normal list instance of 'ChoiceVector'
-- fails whenever it can, this type will never fail.
newtype SuccessList a = SuccessList { collectNonFailing :: [a] }
deriving ( Eq, Ord, Show )
instance ChoiceVector SuccessList where
- spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
+ spreadChoice = Right . SuccessList . foldr unTagRight [] . collectNonFailing
where unTagRight (Right x) = (x:)
unTagRight _ = id
-
--- | Like 'catMaybes', but for 'Either'.
-collectRights :: [Either _l r] -> [r]
-collectRights = collectNonFailing . untag . spreadChoice . SuccessList
- where untag = fromLeft (error "Unexpected Left")
-
--- | A version of 'collectRights' generalized to other containers. The
--- container must be both "reducible" and "buildable". Most general containers
--- should fullfill these requirements, but there is no single typeclass
--- (that I know of) for that.
--- Therefore, they are split between 'Foldable' and 'MonadPlus'.
--- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.)
-collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r
-collectRightsF = F.foldr unTagRight mzero
- where unTagRight (Right x) = mplus $ return x
- unTagRight _ = id
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6c10ed61d..556517259 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -1,6 +1,6 @@
+
+
{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ViewPatterns #-}
{-
@@ -38,8 +38,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, uncurry4
, uncurry5
, uncurry6
-, uncurry7
-, uncurry8
, swap
, reverseComposition
, bool
@@ -53,12 +51,12 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, composition
) where
-import Control.Category ( Category, (>>>), (<<<) )
-import qualified Control.Category as Cat ( id )
-import Control.Monad ( msum )
+import Control.Category (Category, (<<<), (>>>))
+import qualified Control.Category as Cat (id)
+import Control.Monad (msum)
-import qualified Data.Foldable as F ( Foldable, foldr )
-import Data.Maybe
+import qualified Data.Foldable as F (Foldable, foldr)
+import Data.Maybe
-- | Aequivalent to
@@ -148,15 +146,11 @@ uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z
uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z
-uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z
-uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z
uncurry3 fun (a,b,c ) = fun a b c
uncurry4 fun (a,b,c,d ) = fun a b c d
uncurry5 fun (a,b,c,d,e ) = fun a b c d e
uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f
-uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g
-uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h
swap :: (a,b) -> (b,a)
swap (a,b) = (b,a)
@@ -168,4 +162,3 @@ findBy :: (a -> Maybe b) -> [a] -> Maybe b
findBy _ [] = Nothing
findBy f ((f -> Just x):_ ) = Just x
findBy f ( _:xs) = findBy f xs
-
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 8c03d1a09..428048427 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -41,50 +41,17 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
, XMLConverterState
, XMLConverter
, FallibleXMLConverter
-, swapPosition
-, runConverter
-, runConverter''
, runConverter'
-, runConverterF'
-, runConverterF
-, getCurrentElement
, getExtraState
, setExtraState
, modifyExtraState
-, convertingExtraState
, producingExtraState
-, lookupNSiri
-, lookupNSprefix
-, readNSattributes
-, elemName
-, elemNameIs
-, strContent
-, elContent
-, currentElem
-, currentElemIs
-, expectElement
-, elChildren
-, findChildren
-, filterChildren
-, filterChildrenName
, findChild'
-, findChild
-, filterChild'
-, filterChild
-, filterChildName'
-, filterChildName
-, isSet
, isSet'
, isSetWithDefault
-, hasAttrValueOf'
-, failIfNotAttrValueOf
-, isThatTheAttrValue
-, searchAttrIn
-, searchAttrWith
, searchAttr
, lookupAttr
, lookupAttr'
-, lookupAttrWithDefault
, lookupDefaultingAttr
, findAttr'
, findAttr
@@ -93,25 +60,9 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
, readAttr'
, readAttrWithDefault
, getAttr
--- , (>/<)
--- , (?>/<)
, executeIn
-, collectEvery
, withEveryL
-, withEvery
, tryAll
-, tryAll'
-, IdXMLConverter
-, MaybeEConverter
-, ElementMatchConverter
-, MaybeCConverter
-, ContentMatchConverter
-, makeMatcherE
-, makeMatcherC
-, prepareMatchersE
-, prepareMatchersC
-, matchChildren
-, matchContent''
, matchContent'
, matchContent
) where
@@ -120,8 +71,8 @@ import Control.Applicative hiding ( liftA, liftA2 )
import Control.Monad ( MonadPlus )
import Control.Arrow
+import Data.Either ( rights )
import qualified Data.Map as M
-import qualified Data.Foldable as F
import Data.Default
import Data.Maybe
@@ -210,17 +161,6 @@ currentElement state = head (parentElements state)
-- | Replace the current position by another, modifying the extra state
-- in the process
-swapPosition :: (extraState -> extraState')
- -> [XML.Element]
- -> XMLConverterState nsID extraState
- -> XMLConverterState nsID extraState'
-swapPosition f stack state
- = state { parentElements = stack
- , moreState = f (moreState state)
- }
-
--- | Replace the current position by another, modifying the extra state
--- in the process
swapStack' :: XMLConverterState nsID extraState
-> [XML.Element]
-> ( XMLConverterState nsID extraState , [XML.Element] )
@@ -264,14 +204,6 @@ runConverter :: XMLConverter nsID extraState input output
-> output
runConverter converter state input = snd $ runArrowState converter (state,input)
---
-runConverter'' :: (NameSpaceID nsID)
- => XMLConverter nsID extraState (Fallible ()) output
- -> extraState
- -> XML.Element
- -> output
-runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) ()
-
runConverter' :: (NameSpaceID nsID)
=> FallibleXMLConverter nsID extraState () success
-> extraState
@@ -280,20 +212,6 @@ runConverter' :: (NameSpaceID nsID)
runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) ()
--
-runConverterF' :: FallibleXMLConverter nsID extraState x y
- -> XMLConverterState nsID extraState
- -> Fallible x -> Fallible y
-runConverterF' a s e = runConverter (returnV e >>? a) s e
-
---
-runConverterF :: (NameSpaceID nsID)
- => FallibleXMLConverter nsID extraState XML.Element x
- -> extraState
- -> Fallible XML.Element -> Fallible x
-runConverterF a s = either failWith
- (\e -> runConverter a (createStartState e s) e)
-
---
getCurrentElement :: XMLConverter nsID extraState x XML.Element
getCurrentElement = extractFromState currentElement
@@ -430,57 +348,15 @@ elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName
--------------------------------------------------------------------------------
--
-strContent :: XMLConverter nsID extraState x String
-strContent = getCurrentElement
- >>^ XML.strContent
-
---
elContent :: XMLConverter nsID extraState x [XML.Content]
elContent = getCurrentElement
>>^ XML.elContent
--------------------------------------------------------------------------------
--- Current element
---------------------------------------------------------------------------------
-
---
-currentElem :: XMLConverter nsID extraState x (XML.QName)
-currentElem = getCurrentElement
- >>^ XML.elName
-
-currentElemIs :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState x Bool
-currentElemIs nsID name = getCurrentElement
- >>> elemNameIs nsID name
-
-
-
-{-
-currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>>
- (XML.qName >>^ (&&).(== name) )
- ^&&&^
- (XML.qIRI >>^ (==) )
- ) >>% (.)
- ) &&& lookupNSiri nsID >>% ($)
--}
-
---
-expectElement :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState x ()
-expectElement nsID name = currentElemIs nsID name
- >>^ boolToChoice
-
---------------------------------------------------------------------------------
-- Chilren
--------------------------------------------------------------------------------
--
-elChildren :: XMLConverter nsID extraState x [XML.Element]
-elChildren = getCurrentElement
- >>^ XML.elChildren
-
--
findChildren :: (NameSpaceID nsID)
=> nsID -> ElementName
@@ -490,18 +366,6 @@ findChildren nsID name = elemName nsID name
>>% XML.findChildren
--
-filterChildren :: (XML.Element -> Bool)
- -> XMLConverter nsID extraState x [XML.Element]
-filterChildren p = getCurrentElement
- >>^ XML.filterChildren p
-
---
-filterChildrenName :: (XML.QName -> Bool)
- -> XMLConverter nsID extraState x [XML.Element]
-filterChildrenName p = getCurrentElement
- >>^ XML.filterChildrenName p
-
---
findChild' :: (NameSpaceID nsID)
=> nsID
-> ElementName
@@ -517,45 +381,12 @@ findChild :: (NameSpaceID nsID)
findChild nsID name = findChild' nsID name
>>> maybeToChoice
---
-filterChild' :: (XML.Element -> Bool)
- -> XMLConverter nsID extraState x (Maybe XML.Element)
-filterChild' p = getCurrentElement
- >>^ XML.filterChild p
-
---
-filterChild :: (XML.Element -> Bool)
- -> FallibleXMLConverter nsID extraState x XML.Element
-filterChild p = filterChild' p
- >>> maybeToChoice
-
---
-filterChildName' :: (XML.QName -> Bool)
- -> XMLConverter nsID extraState x (Maybe XML.Element)
-filterChildName' p = getCurrentElement
- >>^ XML.filterChildName p
-
---
-filterChildName :: (XML.QName -> Bool)
- -> FallibleXMLConverter nsID extraState x XML.Element
-filterChildName p = filterChildName' p
- >>> maybeToChoice
-
--------------------------------------------------------------------------------
-- Attributes
--------------------------------------------------------------------------------
--
-isSet :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> (Either Failure Bool)
- -> FallibleXMLConverter nsID extraState x Bool
-isSet nsID attrName deflt
- = findAttr' nsID attrName
- >>^ maybe deflt stringToBool
-
---
isSet' :: (NameSpaceID nsID)
=> nsID -> AttributeName
-> XMLConverter nsID extraState x (Maybe Bool)
@@ -570,34 +401,6 @@ isSetWithDefault nsID attrName def'
= isSet' nsID attrName
>>^ fromMaybe def'
---
-hasAttrValueOf' :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> AttributeValue
- -> XMLConverter nsID extraState x Bool
-hasAttrValueOf' nsID attrName attrValue
- = findAttr nsID attrName
- >>> ( const False ^|||^ (==attrValue))
-
---
-failIfNotAttrValueOf :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> AttributeValue
- -> FallibleXMLConverter nsID extraState x ()
-failIfNotAttrValueOf nsID attrName attrValue
- = hasAttrValueOf' nsID attrName attrValue
- >>^ boolToChoice
-
--- | Is the value that is currently transported in the arrow the value of
--- the specified attribute?
-isThatTheAttrValue :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> FallibleXMLConverter nsID extraState AttributeValue Bool
-isThatTheAttrValue nsID attrName
- = keepingTheValue
- (findAttr nsID attrName)
- >>% right.(==)
-
-- | Lookup value in a dictionary, fail if no attribute found or value
-- not in dictionary
searchAttrIn :: (NameSpaceID nsID)
@@ -608,18 +411,6 @@ searchAttrIn nsID attrName dict
= findAttr nsID attrName
>>?^? maybeToChoice.(`lookup` dict )
-
--- | Lookup value in a dictionary. Fail if no attribute found. If value not in
--- dictionary, return default value
-searchAttrWith :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> a
- -> [(AttributeValue,a)]
- -> FallibleXMLConverter nsID extraState x a
-searchAttrWith nsID attrName defV dict
- = findAttr nsID attrName
- >>?^ (fromMaybe defV).(`lookup` dict )
-
-- | Lookup value in a dictionary. If attribute or value not found,
-- return default value
searchAttr :: (NameSpaceID nsID)
@@ -789,16 +580,6 @@ prepareIteration nsID name = keepingTheValue
(findChildren nsID name)
>>% distributeValue
--- | Applies a converter to every child element of a specific type.
--- Collects results in a 'Monoid'.
--- Fails completely if any conversion fails.
-collectEvery :: (NameSpaceID nsID, Monoid m)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a m
- -> FallibleXMLConverter nsID extraState a m
-collectEvery nsID name a = prepareIteration nsID name
- >>> foldS' (switchingTheStack a)
-
--
withEveryL :: (NameSpaceID nsID)
=> nsID -> ElementName
@@ -824,17 +605,7 @@ tryAll :: (NameSpaceID nsID)
-> XMLConverter nsID extraState b [a]
tryAll nsID name a = prepareIteration nsID name
>>> iterateS (switchingTheStack a)
- >>^ collectRights
-
--- | Applies a converter to every child element of a specific type.
--- Collects all successful results.
-tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState b a
- -> XMLConverter nsID extraState b (c a)
-tryAll' nsID name a = prepareIteration nsID name
- >>> iterateS (switchingTheStack a)
- >>^ collectRightsF
+ >>^ rights
--------------------------------------------------------------------------------
-- Matching children
@@ -843,15 +614,6 @@ tryAll' nsID name a = prepareIteration nsID name
type IdXMLConverter nsID moreState x
= XMLConverter nsID moreState x x
-type MaybeEConverter nsID moreState x
- = Maybe (IdXMLConverter nsID moreState (x, XML.Element))
-
--- Chainable converter that helps deciding which converter to actually use.
-type ElementMatchConverter nsID extraState x
- = IdXMLConverter nsID
- extraState
- (MaybeEConverter nsID extraState x, XML.Element)
-
type MaybeCConverter nsID moreState x
= Maybe (IdXMLConverter nsID moreState (x, XML.Content))
@@ -862,26 +624,6 @@ type ContentMatchConverter nsID extraState x
(MaybeCConverter nsID extraState x, XML.Content)
-- Helper function: The @c@ is actually a converter that is to be selected by
--- matching XML elements to the first two parameters.
--- The fold used to match elements however is very simple, so to use it,
--- this function wraps the converter in another converter that unifies
--- the accumulator. Think of a lot of converters with the resulting type
--- chained together. The accumulator not only transports the element
--- unchanged to the next matcher, it also does the actual selecting by
--- combining the intermediate results with '(<|>)'.
-makeMatcherE :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a a
- -> ElementMatchConverter nsID extraState a
-makeMatcherE nsID name c = ( second (
- elemNameIs nsID name
- >>^ bool Nothing (Just tryC)
- )
- >>% (<|>)
- ) &&&^ snd
- where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd
-
--- Helper function: The @c@ is actually a converter that is to be selected by
-- matching XML content to the first two parameters.
-- The fold used to match elements however is very simple, so to use it,
-- this function wraps the converter in another converter that unifies
@@ -914,13 +656,6 @@ makeMatcherC nsID name c = ( second ( contentToElem
_ -> failEmpty
-- Creates and chains a bunch of matchers
-prepareMatchersE :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
- -> ElementMatchConverter nsID extraState x
---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE)
-prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE)
-
--- Creates and chains a bunch of matchers
prepareMatchersC :: (NameSpaceID nsID)
=> [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
-> ContentMatchConverter nsID extraState x
@@ -928,52 +663,6 @@ prepareMatchersC :: (NameSpaceID nsID)
prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC)
-- | Takes a list of element-data - converter groups and
--- * Finds all children of the current element
--- * Matches each group to each child in order (at most one group per child)
--- * Filters non-matched children
--- * Chains all found converters in child-order
--- * Applies the chain to the input element
-matchChildren :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState a a
-matchChildren lookups = let matcher = prepareMatchersE lookups
- in keepingTheValue (
- elChildren
- >>> map (Nothing,)
- ^>> iterateSL matcher
- >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m)
- -- >>> foldSs
- >>> reverseComposition
- )
- >>> swap
- ^>> app
- where
- -- let the converter swallow the element and drop the element
- -- in the return value
- swallowElem element converter = (,element) ^>> converter >>^ fst
-
---
-matchContent'' :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState a a
-matchContent'' lookups = let matcher = prepareMatchersC lookups
- in keepingTheValue (
- elContent
- >>> map (Nothing,)
- ^>> iterateSL matcher
- >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m)
- -- >>> foldSs
- >>> reverseComposition
- )
- >>> swap
- ^>> app
- where
- -- let the converter swallow the content and drop the content
- -- in the return value
- swallowContent content converter = (,content) ^>> converter >>^ fst
-
-
--- | Takes a list of element-data - converter groups and
-- * Finds all content of the current element
-- * Matches each group to each piece of content in order
-- (at most one group per piece of content)
@@ -1018,14 +707,6 @@ matchContent lookups fallback
-- Internals
--------------------------------------------------------------------------------
-stringToBool :: (Monoid failure) => String -> Either failure Bool
-stringToBool val -- stringToBool' val >>> maybeToChoice
- | val `elem` trueValues = succeedWith True
- | val `elem` falseValues = succeedWith False
- | otherwise = failEmpty
- where trueValues = ["true" ,"on" ,"1"]
- falseValues = ["false","off","0"]
-
stringToBool' :: String -> Maybe Bool
stringToBool' val | val `elem` trueValues = Just True
| val `elem` falseValues = Just False
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index deb009998..92e12931d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -31,11 +31,11 @@ Namespaces used in odt files.
module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
) where
-import Data.List ( isPrefixOf )
-import Data.Maybe ( fromMaybe, listToMaybe )
-import qualified Data.Map as M ( empty, insert )
+import Data.List (isPrefixOf)
+import qualified Data.Map as M (empty, insert)
+import Data.Maybe (fromMaybe, listToMaybe)
-import Text.Pandoc.Readers.Odt.Generic.Namespaces
+import Text.Pandoc.Readers.Odt.Generic.Namespaces
instance NameSpaceID Namespace where
@@ -48,7 +48,7 @@ instance NameSpaceID Namespace where
findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
nsIDmap :: NameSpaceIRIs Namespace
nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 26ba6df82..58be8e4a3 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -1,9 +1,8 @@
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE Arrows #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+
{-
Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
@@ -50,49 +49,36 @@ module Text.Pandoc.Readers.Odt.StyleReader
, ListLevelType (..)
, LengthOrPercent (..)
, lookupStyle
-, getTextProperty
-, getTextProperty'
-, getParaProperty
-, getListStyle
, getListLevelStyle
, getStyleFamily
-, lookupDefaultStyle
, lookupDefaultStyle'
, lookupListStyleByName
-, getPropertyChain
-, textPropertyChain
-, stylePropertyChain
-, stylePropertyChain'
-, getStylePropertyChain
, extendedStylePropertyChain
-, extendedStylePropertyChain'
-, liftStyles
, readStylesAt
) where
-import Control.Arrow
-import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+import Control.Applicative hiding (liftA, liftA2, liftA3)
+import Control.Arrow
-import qualified Data.Foldable as F
-import qualified Data.Map as M
-import qualified Data.Set as S
-import Data.Char ( isDigit )
-import Data.Default
-import Data.List ( unfoldr )
-import Data.Maybe
+import Data.Char (isDigit)
+import Data.Default
+import qualified Data.Foldable as F
+import Data.List (unfoldr)
+import qualified Data.Map as M
+import Data.Maybe
+import qualified Data.Set as S
-import qualified Text.XML.Light as XML
+import qualified Text.XML.Light as XML
-import Text.Pandoc.Readers.Odt.Arrows.State
-import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Arrows.Utils
-import Text.Pandoc.Readers.Odt.Generic.Utils
-import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Namespaces
-import Text.Pandoc.Readers.Odt.Base
+import Text.Pandoc.Readers.Odt.Base
+import Text.Pandoc.Readers.Odt.Namespaces
readStylesAt :: XML.Element -> Fallible Styles
@@ -118,7 +104,7 @@ instance Lookupable FontPitch where
instance Default FontPitch where
def = PitchVariable
--- The font pitch can be specifed in a style directly. Normally, however,
+-- The font pitch can be specified in a style directly. Normally, however,
-- it is defined in the font. That is also the specs' recommendation.
--
-- Thus, we want
@@ -145,13 +131,12 @@ type StyleReaderSafe a b = XMLReaderSafe FontPitches a b
-- | A reader for font pitches
fontPitchReader :: XMLReader _s _x FontPitches
fontPitchReader = executeIn NsOffice "font-face-decls" (
- ( withEveryL NsStyle "font-face" $ liftAsSuccess (
+ withEveryL NsStyle "font-face" (liftAsSuccess (
findAttr' NsStyle "name"
&&&
lookupDefaultingAttr NsStyle "font-pitch"
- )
- )
- >>?^ ( M.fromList . (foldl accumLegalPitches []) )
+ ))
+ >>?^ ( M.fromList . foldl accumLegalPitches [] )
)
where accumLegalPitches ls (Nothing,_) = ls
accumLegalPitches ls (Just n,p) = (n,p):ls
@@ -230,15 +215,15 @@ instance Lookupable StyleFamily where
]
-- | A named style
-data Style = Style { styleFamily :: Maybe StyleFamily
- , styleParentName :: Maybe StyleName
- , listStyle :: Maybe StyleName
- , styleProperties :: StyleProperties
+data Style = Style { styleFamily :: Maybe StyleFamily
+ , styleParentName :: Maybe StyleName
+ , listStyle :: Maybe StyleName
+ , styleProperties :: StyleProperties
}
deriving ( Eq, Show )
-data StyleProperties = SProps { textProperties :: Maybe TextProperties
- , paraProperties :: Maybe ParaProperties
+data StyleProperties = SProps { textProperties :: Maybe TextProperties
+ , paraProperties :: Maybe ParaProperties
-- , tableColProperties :: Maybe TColProperties
-- , tableRowProperties :: Maybe TRowProperties
-- , tableCellProperties :: Maybe TCellProperties
@@ -354,8 +339,8 @@ instance Read XslUnit where
readsPrec _ "em" = [(XslUnitEM , "")]
readsPrec _ _ = []
--- | Rough conversion of measures into millimeters.
--- Pixels and em's are actually implemetation dependant/relative measures,
+-- | Rough conversion of measures into millimetres.
+-- Pixels and em's are actually implementation dependant/relative measures,
-- so I could not really easily calculate anything exact here even if I wanted.
-- But I do not care about exactness right now, as I only use measures
-- to determine if a paragraph is "indented" or not.
@@ -397,11 +382,11 @@ data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
instance Show ListLevelStyle where
show ListLevelStyle{..} = "<LLS|"
- ++ (show listLevelType)
+ ++ show listLevelType
++ "|"
- ++ (maybeToString listItemPrefix)
- ++ (show listItemFormat)
- ++ (maybeToString listItemSuffix)
+ ++ maybeToString listItemPrefix
+ ++ show listItemFormat
+ ++ maybeToString listItemSuffix
++ ">"
where maybeToString = fromMaybe ""
@@ -439,7 +424,7 @@ instance Read ListItemNumberFormat where
--------------------------------------------------------------------------------
-- Readers
--
--- ...it seems like a whole lot of this should be automatically deriveable
+-- ...it seems like a whole lot of this should be automatically derivable
-- or at least moveable into a class. Most of this is data concealed in
-- code.
--------------------------------------------------------------------------------
@@ -497,14 +482,14 @@ readTextProperties =
( liftA6 PropT
( searchAttr NsXSL_FO "font-style" False isFontEmphasised )
( searchAttr NsXSL_FO "font-weight" False isFontBold )
- ( findPitch )
+ findPitch
( getAttr NsStyle "text-position" )
- ( readUnderlineMode )
- ( readStrikeThroughMode )
+ readUnderlineMode
+ readStrikeThroughMode
)
where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
isFontBold = ("normal",False):("bold",True)
- :(map ((,True).show) ([100,200..900]::[Int]))
+ :map ((,True).show) ([100,200..900]::[Int])
readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readUnderlineMode = readLineMode "text-underline-mode"
@@ -524,7 +509,7 @@ readLineMode modeAttr styleAttr = proc x -> do
Nothing -> returnA -< Just UnderlineModeNormal
else returnA -< Nothing
where
- isLinePresent = [("none",False)] ++ map (,True)
+ isLinePresent = ("none",False) : map (,True)
[ "dash" , "dot-dash" , "dot-dot-dash" , "dotted"
, "long-dash" , "solid" , "wave"
]
@@ -561,20 +546,18 @@ readListStyle =
findAttr NsStyle "name"
>>?! keepingTheValue
( liftA ListStyle
- $ ( liftA3 SM.union3
+ $ liftA3 SM.union3
( readListLevelStyles NsText "list-level-style-number" LltNumbered )
( readListLevelStyles NsText "list-level-style-bullet" LltBullet )
- ( readListLevelStyles NsText "list-level-style-image" LltImage )
- ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle
+ ( readListLevelStyles NsText "list-level-style-image" LltImage ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle
)
--
readListLevelStyles :: Namespace -> ElementName
-> ListLevelType
-> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle)
readListLevelStyles namespace elementName levelType =
- ( tryAll namespace elementName (readListLevelStyle levelType)
+ tryAll namespace elementName (readListLevelStyle levelType)
>>^ SM.fromList
- )
--
readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
@@ -624,20 +607,11 @@ lookupStyle :: StyleName -> Styles -> Maybe Style
lookupStyle name Styles{..} = M.lookup name stylesByName
--
-lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties
-lookupDefaultStyle family Styles{..} = fromMaybe def
- (M.lookup family defaultStyleMap)
-
---
lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties
lookupDefaultStyle' Styles{..} family = fromMaybe def
(M.lookup family defaultStyleMap)
--
-getListStyle :: Style -> Styles -> Maybe ListStyle
-getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles)
-
---
lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle
lookupListStyleByName name Styles{..} = M.lookup name listStylesByName
@@ -655,7 +629,7 @@ parents style styles = unfoldr findNextParent style -- Ha!
getStyleFamily :: Style -> Styles -> Maybe StyleFamily
getStyleFamily style@Style{..} styles
= styleFamily
- <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles)
+ <|> F.asum (map (`getStyleFamily` styles) $ parents style styles)
-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property
-- values are specified. Instead, a value might be inherited from a
@@ -677,68 +651,7 @@ stylePropertyChain style styles
--
extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties]
extendedStylePropertyChain [] _ = []
-extendedStylePropertyChain [style] styles = (stylePropertyChain style styles)
- ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
-extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles)
- ++ (extendedStylePropertyChain trace styles)
--- Optimizable with Data.Sequence
-
---
-extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties]
-extendedStylePropertyChain' [] _ = Nothing
-extendedStylePropertyChain' [style] styles = Just (
- (stylePropertyChain style styles)
- ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
- )
-extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++)
- (extendedStylePropertyChain' trace styles)
-
---
-stylePropertyChain' :: Styles -> Style -> [StyleProperties]
-stylePropertyChain' = flip stylePropertyChain
-
---
-getStylePropertyChain :: StyleName -> Styles -> [StyleProperties]
-getStylePropertyChain name styles = maybe []
- (`stylePropertyChain` styles)
- (lookupStyle name styles)
-
---
-getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a]
-getPropertyChain extract style styles = catMaybes
- $ map extract
- $ stylePropertyChain style styles
-
---
-textPropertyChain :: Style -> Styles -> [TextProperties]
-textPropertyChain = getPropertyChain textProperties
-
---
-paraPropertyChain :: Style -> Styles -> [ParaProperties]
-paraPropertyChain = getPropertyChain paraProperties
-
---
-getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a
-getTextProperty extract style styles = fmap extract
- $ listToMaybe
- $ textPropertyChain style styles
-
---
-getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a
-getTextProperty' extract style styles = F.asum
- $ map extract
- $ textPropertyChain style styles
-
---
-getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a
-getParaProperty extract style styles = fmap extract
- $ listToMaybe
- $ paraPropertyChain style styles
-
--- | Lifts the reader into another readers' state.
-liftStyles :: (OdtConverterState s -> OdtConverterState Styles)
- -> (OdtConverterState Styles -> OdtConverterState s )
- -> XMLReader s x x
-liftStyles extract inject = switchState extract inject
- $ convertingExtraState M.empty readAllStyles
-
+extendedStylePropertyChain [style] styles = stylePropertyChain style styles
+ ++ maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))
+extendedStylePropertyChain (style:trace) styles = stylePropertyChain style styles
+ ++ extendedStylePropertyChain trace styles
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 4e1c926da..292830bd2 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Org
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -27,29 +27,42 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
-import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
-import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
-import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState )
+import Text.Pandoc.Readers.Org.Blocks (blockList, meta)
+import Text.Pandoc.Readers.Org.ParserState (optionsToParserState)
+import Text.Pandoc.Readers.Org.Parsing (OrgParser, readWithM)
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import Text.Pandoc.Options
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing (reportLogMessages)
+import Text.Pandoc.Shared (crFilter)
-import Control.Monad.Reader ( runReader )
+import Control.Monad.Except (throwError)
+import Control.Monad.Reader (runReaderT)
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Parse org-mode string and return a Pandoc document.
-readOrg :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readOrg opts s = flip runReader def $
- readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
+readOrg :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readOrg opts s = do
+ parsed <- flip runReaderT def $
+ readWithM parseOrg (optionsToParserState opts)
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left _ -> throwError $ PandocParseError "problem parsing org"
--
-- Parser
--
-parseOrg :: OrgParser Pandoc
+parseOrg :: PandocMonad m => OrgParser m Pandoc
parseOrg = do
blocks' <- blockList
meta' <- meta
+ reportLogMessages
return $ Pandoc meta' blocks'
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index b1004dda6..424102cb0 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.BlockStarts
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -40,11 +40,11 @@ module Text.Pandoc.Readers.Org.BlockStarts
, endOfBlock
) where
-import Control.Monad ( void )
+import Control.Monad (void)
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
-hline :: OrgParser ()
+hline :: Monad m => OrgParser m ()
hline = try $ do
skipSpaces
string "-----"
@@ -54,58 +54,66 @@ hline = try $ do
return ()
-- | Read the start of a header line, return the header level
-headerStart :: OrgParser Int
+headerStart :: Monad m => OrgParser m Int
headerStart = try $
(length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-tableStart :: OrgParser Char
+tableStart :: Monad m => OrgParser m Char
tableStart = try $ skipSpaces *> char '|'
-latexEnvStart :: OrgParser String
-latexEnvStart = try $ do
+gridTableStart :: Monad m => OrgParser m ()
+gridTableStart = try $ skipSpaces <* char '+' <* char '-'
+
+
+latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart = try $
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
- latexEnvName :: OrgParser String
+ latexEnvName :: Monad m => OrgParser m String
latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
-
--- | Parses bullet list marker.
-bulletListStart :: OrgParser ()
-bulletListStart = try $
- choice
- [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
- , () <$ skipSpaces1 <* char '*' <* skipSpaces1
- ]
-
-genericListStart :: OrgParser String
- -> OrgParser Int
-genericListStart listMarker = try $
- (+) <$> (length <$> many spaceChar)
- <*> (length <$> listMarker <* many1 spaceChar)
-
-orderedListStart :: OrgParser Int
+bulletListStart :: Monad m => OrgParser m Int
+bulletListStart = try $ do
+ ind <- length <$> many spaceChar
+ -- Unindented lists cannot use '*' bullets.
+ oneOf (if ind == 0 then "+-" else "*+-")
+ skipSpaces1 <|> lookAhead eol
+ return (ind + 1)
+
+genericListStart :: Monad m
+ => OrgParser m String
+ -> OrgParser m Int
+genericListStart listMarker = try $ do
+ ind <- length <$> many spaceChar
+ void listMarker
+ skipSpaces1 <|> lookAhead eol
+ return (ind + 1)
+
+eol :: Monad m => OrgParser m ()
+eol = void (char '\n')
+
+orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
-drawerStart :: OrgParser String
-drawerStart = try $
- skipSpaces *> drawerName <* skipSpaces <* newline
+drawerStart :: Monad m => OrgParser m String
+drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline
where drawerName = char ':' *> manyTill nonspaceChar (char ':')
-metaLineStart :: OrgParser ()
+metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
-commentLineStart :: OrgParser ()
+commentLineStart :: Monad m => OrgParser m ()
commentLineStart = try $ skipSpaces <* string "# "
-exampleLineStart :: OrgParser ()
+exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
-noteMarker :: OrgParser String
+noteMarker :: Monad m => OrgParser m String
noteMarker = try $ do
char '['
choice [ many1Till digit (char ']')
@@ -114,17 +122,18 @@ noteMarker = try $ do
]
-- | Succeeds if the parser is at the end of a block.
-endOfBlock :: OrgParser ()
-endOfBlock = lookAhead . try $ do
- void blankline <|> anyBlockStart
+endOfBlock :: Monad m => OrgParser m ()
+endOfBlock = lookAhead . try $
+ void blankline <|> anyBlockStart
where
-- Succeeds if there is a new block starting at this position.
- anyBlockStart :: OrgParser ()
+ anyBlockStart :: Monad m => OrgParser m ()
anyBlockStart = try . choice $
[ exampleLineStart
, hline
, metaLineStart
, commentLineStart
+ , gridTableStart
, void noteMarker
, void tableStart
, void drawerStart
@@ -133,4 +142,3 @@ endOfBlock = lookAhead . try $ do
, void bulletListStart
, void orderedListStart
]
-
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 484d97482..fa016283c 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,8 +1,5 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,10 +15,11 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
-
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.Blocks
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -33,247 +31,61 @@ module Text.Pandoc.Readers.Org.Blocks
, meta
) where
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks)
+import Text.Pandoc.Readers.Org.Inlines
+import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+ originalLang, translateLang)
+
+import Text.Pandoc.Builder (Blocks, Inlines)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
+
+import Control.Monad (foldM, guard, mzero, void)
+import Data.Char (isSpace, toLower, toUpper)
+import Data.Default (Default)
+import Data.List (foldl', isPrefixOf)
+import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Monoid ((<>))
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks )
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared ( compactify', compactify'DL, safeRead )
-
-import Control.Monad ( foldM, guard, mzero, void )
-import Data.Char ( isSpace, toLower, toUpper)
-import Data.Default ( Default )
-import Data.List ( foldl', isPrefixOf )
-import Data.Maybe ( fromMaybe, isNothing )
-import Data.Monoid ((<>))
-
---
--- Org headers
---
-newtype Tag = Tag { fromTag :: String }
- deriving (Show, Eq)
-
--- | Create a tag containing the given string.
-toTag :: String -> Tag
-toTag = Tag
-
--- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
- deriving (Show, Eq, Ord)
-
--- | Create a property key containing the given string. Org mode keys are
--- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
-
--- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
-
--- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
-toPropertyValue = PropertyValue
-
--- | Check whether the property value is non-nil (i.e. truish).
-isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-
--- | Key/value pairs from a PROPERTIES drawer
-type Properties = [(PropertyKey, PropertyValue)]
-
--- | Org mode headline (i.e. a document subtree).
-data Headline = Headline
- { headlineLevel :: Int
- , headlineTodoMarker :: Maybe TodoMarker
- , headlineText :: Inlines
- , headlineTags :: [Tag]
- , headlineProperties :: Properties
- , headlineContents :: Blocks
- , headlineChildren :: [Headline]
- }
-
---
--- Parsing headlines and subtrees
---
-
--- | Read an Org mode headline and its contents (i.e. a document subtree).
--- @lvl@ gives the minimum acceptable level of the tree.
-headline :: Int -> OrgParser (F Headline)
-headline lvl = try $ do
- level <- headerStart
- guard (lvl <= level)
- todoKw <- optionMaybe todoKeyword
- title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
- tags <- option [] headerTags
- newline
- properties <- option mempty propertiesDrawer
- contents <- blocks
- children <- many (headline (level + 1))
- return $ do
- title' <- title
- contents' <- contents
- children' <- sequence children
- return $ Headline
- { headlineLevel = level
- , headlineTodoMarker = todoKw
- , headlineText = title'
- , headlineTags = tags
- , headlineProperties = properties
- , headlineContents = contents'
- , headlineChildren = children'
- }
- where
- endOfTitle :: OrgParser ()
- endOfTitle = void . lookAhead $ optional headerTags *> newline
-
- headerTags :: OrgParser [Tag]
- headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
- in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-
--- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
-headlineToBlocks :: Headline -> OrgParser Blocks
-headlineToBlocks hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- case () of
- _ | any isNoExportTag headlineTags -> return mempty
- _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
- _ | isCommentTitle headlineText -> return mempty
- _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
- _ | otherwise -> headlineToHeaderWithContents hdln
-
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
-
-isArchiveTag :: Tag -> Bool
-isArchiveTag = (== toTag "ARCHIVE")
-
--- | Check if the title starts with COMMENT.
--- FIXME: This accesses builder internals not intended for use in situations
--- like these. Replace once keyword parsing is supported.
-isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _ = False
-
-archivedHeadlineToBlocks :: Headline -> OrgParser Blocks
-archivedHeadlineToBlocks hdln = do
- archivedTreesOption <- getExportSetting exportArchivedTrees
- case archivedTreesOption of
- ArchivedTreesNoExport -> return mempty
- ArchivedTreesExport -> headlineToHeaderWithContents hdln
- ArchivedTreesHeadlineOnly -> headlineToHeader hdln
-
-headlineToHeaderWithList :: Headline -> OrgParser Blocks
-headlineToHeaderWithList hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- header <- headlineToHeader hdln
- listElements <- sequence (map headlineToBlocks headlineChildren)
- let listBlock = if null listElements
- then mempty
- else B.orderedList listElements
- let headerText = if maxHeadlineLevels == headlineLevel
- then header
- else flattenHeader header
- return $ headerText <> headlineContents <> listBlock
- where
- flattenHeader :: Blocks -> Blocks
- flattenHeader blks =
- case B.toList blks of
- (Header _ _ inlns:_) -> B.para (B.fromList inlns)
- _ -> mempty
-
-headlineToHeaderWithContents :: Headline -> OrgParser Blocks
-headlineToHeaderWithContents hdln@(Headline {..}) = do
- header <- headlineToHeader hdln
- childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
- return $ header <> headlineContents <> childrenBlocks
-
-headlineToHeader :: Headline -> OrgParser Blocks
-headlineToHeader (Headline {..}) = do
- exportTodoKeyword <- getExportSetting exportWithTodoKeywords
- let todoText = if exportTodoKeyword
- then case headlineTodoMarker of
- Just kw -> todoKeywordToInlines kw <> B.space
- Nothing -> mempty
- else mempty
- let text = tagTitle (todoText <> headlineText) headlineTags
- let propAttr = propertiesToAttr headlineProperties
- attr <- registerHeader propAttr headlineText
- return $ B.headerWith attr headlineLevel text
-
-todoKeyword :: OrgParser TodoMarker
-todoKeyword = try $ do
- taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
- choice (map kwParser taskStates)
-
-todoKeywordToInlines :: TodoMarker -> Inlines
-todoKeywordToInlines tdm =
- let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
- classes = [todoState, todoText]
- in B.spanWith (mempty, classes, mempty) (B.str todoText)
-
-propertiesToAttr :: Properties -> Attr
-propertiesToAttr properties =
- let
- toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
- customIdKey = toPropertyKey "custom_id"
- classKey = toPropertyKey "class"
- unnumberedKey = toPropertyKey "unnumbered"
- specialProperties = [customIdKey, classKey, unnumberedKey]
- id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
- cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
- $ properties
- isUnnumbered =
- fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties
- in
- (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs')
-
-tagTitle :: Inlines -> [Tag] -> Inlines
-tagTitle title tags = title <> (mconcat $ map tagToInline tags)
-
-tagToInline :: Tag -> Inlines
-tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
-
+import qualified Text.Pandoc.Walk as Walk
--
-- parsing blocks
--
-- | Get a list of blocks.
-blockList :: OrgParser [Block]
+blockList :: PandocMonad m => OrgParser m [Block]
blockList = do
- initialBlocks <- blocks
- headlines <- sequence <$> manyTill (headline 1) eof
+ headlines <- documentTree blocks inline
st <- getState
- headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st
- return . B.toList $ (runF initialBlocks st) <> headlineBlocks
+ headlineBlocks <- headlineToBlocks $ runF headlines st
+ -- ignore first headline, it's the document's title
+ return . drop 1 . B.toList $ headlineBlocks
--- | Get the meta information safed in the state.
-meta :: OrgParser Meta
+-- | Get the meta information saved in the state.
+meta :: Monad m => OrgParser m Meta
meta = do
meta' <- metaExport
runF meta' <$> getState
-blocks :: OrgParser (F Blocks)
+blocks :: PandocMonad m => OrgParser m (F Blocks)
blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
-block :: OrgParser (F Blocks)
+block :: PandocMonad m => OrgParser m (F Blocks)
block = choice [ mempty <$ blanklines
, table
, orgBlock
, figure
, example
, genericDrawer
+ , include
, specialLine
, horizontalRule
, list
@@ -283,6 +95,11 @@ block = choice [ mempty <$ blanklines
] <?> "block"
+-- | Parse a horizontal rule into a block element
+horizontalRule :: Monad m => OrgParser m (F Blocks)
+horizontalRule = return B.horizontalRule <$ try hline
+
+
--
-- Block Attributes
--
@@ -297,7 +114,7 @@ data BlockAttributes = BlockAttributes
-- | Convert BlockAttributes into pandoc Attr
attrFromBlockAttributes :: BlockAttributes -> Attr
-attrFromBlockAttributes (BlockAttributes{..}) =
+attrFromBlockAttributes BlockAttributes{..} =
let
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
@@ -306,18 +123,18 @@ attrFromBlockAttributes (BlockAttributes{..}) =
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
-stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
-stringyMetaAttribute attrCheck = try $ do
+stringyMetaAttribute :: Monad m => OrgParser m (String, String)
+stringyMetaAttribute = try $ do
metaLineStart
attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
- guard $ attrCheck attrName
skipSpaces
- attrValue <- anyLine
+ attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
-blockAttributes :: OrgParser BlockAttributes
+blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
blockAttributes = try $ do
- kv <- many (stringyMetaAttribute attrCheck)
+ kv <- many stringyMetaAttribute
+ guard $ all (attrCheck . fst) kv
let caption = foldl' (appendValues "CAPTION") Nothing kv
let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
let name = lookup "NAME" kv
@@ -326,7 +143,7 @@ blockAttributes = try $ do
Nothing -> return Nothing
Just s -> Just <$> parseFromString inlines (s ++ "\n")
kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
- return $ BlockAttributes
+ return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
, blockAttrCaption = caption'
@@ -334,13 +151,7 @@ blockAttributes = try $ do
}
where
attrCheck :: String -> Bool
- attrCheck attr =
- case attr of
- "NAME" -> True
- "LABEL" -> True
- "CAPTION" -> True
- "ATTR_HTML" -> True
- _ -> False
+ attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"]
appendValues :: String -> Maybe String -> (String, String) -> Maybe String
appendValues attrName accValue (key, value) =
@@ -350,17 +161,18 @@ blockAttributes = try $ do
Just acc -> Just $ acc ++ ' ':value
Nothing -> Just value
-keyValues :: OrgParser [(String, String)]
+-- | Parse key-value pairs for HTML attributes
+keyValues :: Monad m => OrgParser m [(String, String)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
- key :: OrgParser String
+ key :: Monad m => OrgParser m String
key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
- value :: OrgParser String
+ value :: Monad m => OrgParser m String
value = skipSpaces *> manyTill anyChar endOfValue
- endOfValue :: OrgParser ()
+ endOfValue :: Monad m => OrgParser m ()
endOfValue =
lookAhead $ (() <$ try (many1 spaceChar <* key))
<|> () <$ newline
@@ -371,12 +183,12 @@ keyValues = try $
--
-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
-orgBlock :: OrgParser (F Blocks)
+orgBlock :: PandocMonad m => OrgParser m (F Blocks)
orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
- case (map toLower blkType) of
+ case map toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
@@ -390,25 +202,25 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
- blockHeaderStart :: OrgParser String
+ blockHeaderStart :: Monad m => OrgParser m String
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
lowercase :: String -> String
lowercase = map toLower
-rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks)
-rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
+rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
-parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks)
-parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
- parsedBlockContent :: OrgParser (F Blocks)
+ parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
parseFromString blocks (raw ++ "\n")
-- | Read the raw string content of a block
-rawBlockContent :: String -> OrgParser String
+rawBlockContent :: Monad m => String -> OrgParser m String
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
@@ -418,18 +230,17 @@ rawBlockContent blockType = try $ do
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines
where
- rawLine :: OrgParser String
+ rawLine :: Monad m => OrgParser m String
rawLine = try $ ("" <$ blankline) <|> anyLine
- blockEnder :: OrgParser ()
+ blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
stripIndent :: [String] -> [String]
stripIndent strs = map (drop (shortestIndent strs)) strs
shortestIndent :: [String] -> Int
- shortestIndent = foldr min maxBound
- . map (length . takeWhile isSpace)
+ shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
. filter (not . null)
tabsToSpaces :: Int -> String -> String
@@ -437,7 +248,7 @@ rawBlockContent blockType = try $ do
tabsToSpaces tabLen cs'@(c:cs) =
case c of
' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs
+ '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
_ -> cs'
commaEscaped :: String -> String
@@ -448,18 +259,18 @@ rawBlockContent blockType = try $ do
commaEscaped cs = cs
-- | Read but ignore all remaining block headers.
-ignHeaders :: OrgParser ()
+ignHeaders :: Monad m => OrgParser m ()
ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
-exportBlock :: String -> OrgParser (F Blocks)
+exportBlock :: Monad m => String -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
returnF (B.rawBlock (map toLower exportType) contents)
-verseBlock :: String -> OrgParser (F Blocks)
+verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
@@ -468,7 +279,7 @@ verseBlock blockType = try $ do
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
- parseVerseLine :: String -> OrgParser (F Inlines)
+ parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
parseVerseLine cs = do
let (initialSpaces, indentedLine) = span isSpace cs
let nbspIndent = if null initialSpaces
@@ -480,23 +291,20 @@ verseBlock blockType = try $ do
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
-codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks)
+codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
content <- rawBlockContent blockType
- resultsContent <- trailingResultsBlock
+ resultsContent <- option mempty babelResultsBlock
let id' = fromMaybe mempty $ blockAttrName blockAttrs
- let includeCode = exportsCode kv
- let includeResults = exportsResults kv
let codeBlck = B.codeBlockWith ( id', classes, kv ) content
let labelledBlck = maybe (pure codeBlck)
(labelDiv codeBlck)
(blockAttrCaption blockAttrs)
- let resultBlck = fromMaybe mempty resultsContent
return $
- (if includeCode then labelledBlck else mempty) <>
- (if includeResults then resultBlck else mempty)
+ (if exportsCode kv then labelledBlck else mempty) <>
+ (if exportsResults kv then resultsContent else mempty)
where
labelDiv :: Blocks -> F Inlines -> F Blocks
labelDiv blk value =
@@ -505,60 +313,97 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
- || ("rundoc-exports", "results") `elem` attrs)
+ exportsCode :: [(String, String)] -> Bool
+ exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
- || ("rundoc-exports", "both") `elem` attrs
+ exportsResults :: [(String, String)] -> Bool
+ exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-trailingResultsBlock :: OrgParser (Maybe (F Blocks))
-trailingResultsBlock = optionMaybe . try $ do
+-- | Parse the result of an evaluated babel code block.
+babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks)
+babelResultsBlock = try $ do
blanklines
- stringAnyCase "#+RESULTS:"
- blankline
+ resultsMarker <|>
+ (lookAhead . void . try $
+ manyTill (metaLineStart *> anyLineNewline) resultsMarker)
block
+ where
+ resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
--- TODO: We currently don't handle switches.
-codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
- _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+ (switchClasses, switchKv) <- switchesAsAttributes
parameters <- manyTill blockOption newline
- let pandocLang = translateLang language
- return $
- if hasRundocParameters parameters
- then ( [ pandocLang, rundocBlockClass ]
- , map toRundocAttrib (("language", language) : parameters)
+ return ( translateLang language : switchClasses
+ , originalLang language <> switchKv <> parameters
)
- else ([ pandocLang ], parameters)
- where
- hasRundocParameters = not . null
-switch :: OrgParser (Char, Maybe String)
-switch = try $ simpleSwitch <|> lineNumbersSwitch
+switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
+switchesAsAttributes = try $ do
+ switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
+ return $ foldr addToAttr ([], []) switches
where
- simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
- lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
- (string "-l \"" *> many1Till nonspaceChar (char '"'))
+ addToAttr :: (Char, Maybe String, SwitchPolarity)
+ -> ([String], [(String, String)])
+ -> ([String], [(String, String)])
+ addToAttr ('n', lineNum, pol) (cls, kv) =
+ let kv' = case lineNum of
+ Just num -> ("startFrom", num):kv
+ Nothing -> kv
+ cls' = case pol of
+ SwitchPlus -> "continuedSourceBlock":cls
+ SwitchMinus -> cls
+ in ("numberLines":cls', kv')
+ addToAttr _ x = x
+
+-- | Whether a switch flag is specified with @+@ or @-@.
+data SwitchPolarity = SwitchPlus | SwitchMinus
+ deriving (Show, Eq)
+
+-- | Parses a switch's polarity.
+switchPolarity :: Monad m => OrgParser m SwitchPolarity
+switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-blockOption :: OrgParser (String, String)
+-- | Parses a source block switch option.
+switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch
+ where
+ simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
+ labelSwitch = genericSwitch 'l' $
+ char '"' *> many1Till nonspaceChar (char '"')
+
+-- | Generic source block switch-option parser.
+genericSwitch :: Monad m
+ => Char
+ -> OrgParser m String
+ -> OrgParser m (Char, Maybe String, SwitchPolarity)
+genericSwitch c p = try $ do
+ polarity <- switchPolarity <* char c <* skipSpaces
+ arg <- optionMaybe p
+ return (c, arg, polarity)
+
+-- | Reads a line number switch option. The line number switch can be used with
+-- example and source blocks.
+lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+lineNumberSwitch = genericSwitch 'n' (many digit)
+
+blockOption :: Monad m => OrgParser m (String, String)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
-orgParamValue :: OrgParser String
+orgParamValue :: Monad m => OrgParser m String
orgParamValue = try $
skipSpaces
- *> notFollowedBy (char ':' )
- *> many1 nonspaceChar
+ *> notFollowedBy orgArgKey
+ *> noneOf "\n\r" `many1Till` endOfValue
<* skipSpaces
-
-horizontalRule :: OrgParser (F Blocks)
-horizontalRule = return B.horizontalRule <$ try hline
+ where
+ endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r")
+ <|> try (skipSpaces1 <* orgArgKey)
--
@@ -568,7 +413,7 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | A generic drawer which has no special meaning for org-mode.
-- Whether or not this drawer is included in the output depends on the drawers
-- export setting.
-genericDrawer :: OrgParser (F Blocks)
+genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
name <- map toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
@@ -576,44 +421,25 @@ genericDrawer = try $ do
-- Include drawer if it is explicitly included in or not explicitly excluded
-- from the list of drawers that should be exported. PROPERTIES drawers are
-- never exported.
- case (exportDrawers . orgStateExportSettings $ state) of
+ case exportDrawers . orgStateExportSettings $ state of
_ | name == "PROPERTIES" -> return mempty
Left names | name `elem` names -> return mempty
Right names | name `notElem` names -> return mempty
- _ -> drawerDiv name <$> parseLines content
+ _ -> drawerDiv name <$> parseLines content
where
- parseLines :: [String] -> OrgParser (F Blocks)
+ parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
parseLines = parseFromString blocks . (++ "\n") . unlines
drawerDiv :: String -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-drawerLine :: OrgParser String
+drawerLine :: Monad m => OrgParser m String
drawerLine = anyLine
-drawerEnd :: OrgParser String
+drawerEnd :: Monad m => OrgParser m String
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
--- | Read a :PROPERTIES: drawer and return the key/value pairs contained
--- within.
-propertiesDrawer :: OrgParser Properties
-propertiesDrawer = try $ do
- drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
- manyTill property (try drawerEnd)
- where
- property :: OrgParser (PropertyKey, PropertyValue)
- property = try $ (,) <$> key <*> value
-
- key :: OrgParser PropertyKey
- key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
-
- value :: OrgParser PropertyValue
- value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
-
--
-- Figures
@@ -621,7 +447,7 @@ propertiesDrawer = try $ do
-- | Figures or an image paragraph (i.e. an image on a line by itself). Only
-- images with a caption attribute are interpreted as figures.
-figure :: OrgParser (F Blocks)
+figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
@@ -629,10 +455,10 @@ figure = try $ do
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
- let isFigure = not . isNothing $ blockAttrCaption figAttrs
+ let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
- selfTarget :: OrgParser String
+ selfTarget :: PandocMonad m => OrgParser m String
selfTarget = try $ char '[' *> linkTarget <* char ']'
imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
@@ -654,7 +480,7 @@ figure = try $ do
else "fig:" ++ cs
-- | Succeeds if looking at the end of the current paragraph
-endOfParagraph :: OrgParser ()
+endOfParagraph :: Monad m => OrgParser m ()
endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
@@ -663,11 +489,10 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
--
-- | Example code marked up by a leading colon.
-example :: OrgParser (F Blocks)
-example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
+example :: Monad m => OrgParser m (F Blocks)
+example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
where
- exampleLine :: OrgParser String
+ exampleLine :: Monad m => OrgParser m String
exampleLine = try $ exampleLineStart *> anyLine
exampleCode :: String -> Blocks
@@ -678,10 +503,59 @@ exampleCode = B.codeBlockWith ("", ["example"], [])
-- Comments, Options and Metadata
--
-specialLine :: OrgParser (F Blocks)
+specialLine :: PandocMonad m => OrgParser m (F Blocks)
specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
-rawExportLine :: OrgParser Blocks
+-- | Include the content of a file.
+include :: PandocMonad m => OrgParser m (F Blocks)
+include = try $ do
+ metaLineStart <* stringAnyCase "include:" <* skipSpaces
+ filename <- includeTarget
+ includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ params <- keyValues
+ blocksParser <- case includeArgs of
+ ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
+ ["export"] -> return . returnF $ B.fromList []
+ ["export", format] -> return $ pure . B.rawBlock format <$> parseRaw
+ ("src" : rest) -> do
+ let attr = case rest of
+ [lang] -> (mempty, [lang], mempty)
+ _ -> nullAttr
+ return $ pure . B.codeBlockWith attr <$> parseRaw
+ _ -> return $ return . B.fromList . blockFilter params <$> blockList
+ insertIncludedFileF blocksParser ["."] filename
+ where
+ includeTarget :: PandocMonad m => OrgParser m FilePath
+ includeTarget = do
+ char '"'
+ manyTill (noneOf "\n\r\t") (char '"')
+
+ parseRaw :: PandocMonad m => OrgParser m String
+ parseRaw = many anyChar
+
+ blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter params blks =
+ let minlvl = lookup "minlevel" params
+ in case (minlvl >>= safeRead :: Maybe Int) of
+ Nothing -> blks
+ Just lvl -> let levels = Walk.query headerLevel blks
+ -- CAVE: partial function in else
+ curMin = if null levels then 0 else minimum levels
+ in Walk.walk (shiftHeader (curMin - lvl)) blks
+
+ headerLevel :: Block -> [Int]
+ headerLevel (Header lvl _attr _content) = [lvl]
+ headerLevel _ = []
+
+ shiftHeader :: Int -> Block -> Block
+ shiftHeader shift blk =
+ if shift <= 0
+ then blk
+ else case blk of
+ (Header lvl attr content) -> Header (lvl - shift) attr content
+ _ -> blk
+
+rawExportLine :: PandocMonad m => OrgParser m Blocks
rawExportLine = try $ do
metaLineStart
key <- metaKey
@@ -689,7 +563,7 @@ rawExportLine = try $ do
then B.rawBlock key <$> anyLine
else mzero
-commentLine :: OrgParser Blocks
+commentLine :: Monad m => OrgParser m Blocks
commentLine = commentLineStart *> anyLine *> pure mempty
@@ -714,12 +588,21 @@ data OrgTableRow = OrgContentRow (F [Blocks])
-- should be generated using a builder function.
data OrgTable = OrgTable
{ orgTableColumnProperties :: [ColumnProperty]
- , orgTableHeader :: [Blocks]
- , orgTableRows :: [[Blocks]]
+ , orgTableHeader :: [Blocks]
+ , orgTableRows :: [[Blocks]]
}
-table :: OrgParser (F Blocks)
-table = try $ do
+table :: PandocMonad m => OrgParser m (F Blocks)
+table = gridTableWith blocks True <|> orgTable
+
+-- | A normal org table
+orgTable :: PandocMonad m => OrgParser m (F Blocks)
+orgTable = try $ do
+ -- don't allow a table on the first line of a list item; org requires that
+ -- tables start at first non-space character on the line
+ let isFirstInListItem st = orgStateParserContext st == ListItemState &&
+ isNothing (orgStateLastPreCharPos st)
+ guard =<< not . isFirstInListItem <$> getState
blockAttrs <- blockAttributes
lookAhead tableStart
do
@@ -731,7 +614,7 @@ orgToPandocTable :: OrgTable
-> Inlines
-> Blocks
orgToPandocTable (OrgTable colProps heads lns) caption =
- let totalWidth = if any (not . isNothing) (map columnRelWidth colProps)
+ let totalWidth = if any isJust (map columnRelWidth colProps)
then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
else Nothing
in B.table caption (map (convertColProp totalWidth) colProps) heads lns
@@ -741,22 +624,22 @@ orgToPandocTable (OrgTable colProps heads lns) caption =
let
align' = fromMaybe AlignDefault $ columnAlignment colProp
width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
- <$> (columnRelWidth colProp)
+ <$> columnRelWidth colProp
<*> totalWidth
in (align', width')
-tableRows :: OrgParser [OrgTableRow]
+tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
-tableContentRow :: OrgParser OrgTableRow
+tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
tableContentRow = try $
OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
-tableContentCell :: OrgParser (F Blocks)
+tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
tableContentCell = try $
fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
-tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow :: Monad m => OrgParser m OrgTableRow
tableAlignRow = try $ do
tableStart
colProps <- many1Till columnPropertyCell newline
@@ -764,10 +647,10 @@ tableAlignRow = try $ do
guard $ any (/= def) colProps
return $ OrgAlignRow colProps
-columnPropertyCell :: OrgParser ColumnProperty
+columnPropertyCell :: Monad m => OrgParser m ColumnProperty
columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
where
- emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell)
+ emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell)
propCell = try $ ColumnProperty
<$> (skipSpaces
*> char '<'
@@ -776,18 +659,18 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<* char '>'
<* emptyCell)
-tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar :: Monad m => OrgParser m Alignment
tableAlignFromChar = try $
choice [ char 'l' *> return AlignLeft
, char 'c' *> return AlignCenter
, char 'r' *> return AlignRight
]
-tableHline :: OrgParser OrgTableRow
+tableHline :: Monad m => OrgParser m OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
-endOfCell :: OrgParser Char
+endOfCell :: Monad m => OrgParser m Char
endOfCell = try $ char '|' <|> lookAhead newline
rowsToTable :: [OrgTableRow]
@@ -813,45 +696,45 @@ normalizeTable (OrgTable colProps heads rows) =
rowToContent :: OrgTable
-> OrgTableRow
-> F OrgTable
-rowToContent orgTable row =
+rowToContent tbl row =
case row of
OrgHlineRow -> return singleRowPromotedToHeader
OrgAlignRow props -> return . setProperties $ props
OrgContentRow cs -> appendToBody cs
where
singleRowPromotedToHeader :: OrgTable
- singleRowPromotedToHeader = case orgTable of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- orgTable{ orgTableHeader = b , orgTableRows = [] }
- _ -> orgTable
+ singleRowPromotedToHeader = case tbl of
+ OrgTable{ orgTableHeader = [], orgTableRows = [b] } ->
+ tbl{ orgTableHeader = b , orgTableRows = [] }
+ _ -> tbl
setProperties :: [ColumnProperty] -> OrgTable
- setProperties ps = orgTable{ orgTableColumnProperties = ps }
+ setProperties ps = tbl{ orgTableColumnProperties = ps }
appendToBody :: F [Blocks] -> F OrgTable
appendToBody frow = do
newRow <- frow
- let oldRows = orgTableRows orgTable
+ let oldRows = orgTableRows tbl
-- NOTE: This is an inefficient O(n) operation. This should be changed
-- if performance ever becomes a problem.
- return orgTable{ orgTableRows = oldRows ++ [newRow] }
+ return tbl{ orgTableRows = oldRows ++ [newRow] }
--
-- LaTeX fragments
--
-latexFragment :: OrgParser (F Blocks)
+latexFragment :: Monad m => OrgParser m (F Blocks)
latexFragment = try $ do
envName <- latexEnvStart
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ returnF $ B.rawBlock "latex" (content `inLatexEnv` envName)
where
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
, c
, "\\end{", e, "}\n"
]
-latexEnd :: String -> OrgParser ()
+latexEnd :: Monad m => String -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
<* string ("\\end{" ++ envName ++ "}")
@@ -861,74 +744,70 @@ latexEnd envName = try $
--
-- Footnote defintions
--
-noteBlock :: OrgParser (F Blocks)
+noteBlock :: PandocMonad m => OrgParser m (F Blocks)
noteBlock = try $ do
- ref <- noteMarker <* skipSpaces
- content <- mconcat <$> blocksTillHeaderOrNote
+ ref <- noteMarker <* skipSpaces <* updateLastPreCharPos
+ content <- mconcat <$> many1Till block endOfFootnote
addToNotesTable (ref, content)
return mempty
where
- blocksTillHeaderOrNote =
- many1Till block (eof <|> () <$ lookAhead noteMarker
- <|> () <$ lookAhead headerStart)
+ endOfFootnote = eof
+ <|> () <$ lookAhead noteMarker
+ <|> () <$ lookAhead headerStart
+ <|> () <$ lookAhead (try $ blankline *> blankline)
-- Paragraphs or Plain text
-paraOrPlain :: OrgParser (F Blocks)
+paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
paraOrPlain = try $ do
-- Make sure we are not looking at a headline
- notFollowedBy' (char '*' *> (oneOf " *"))
+ notFollowedBy' headerStart
ils <- inlines
nl <- option False (newline *> return True)
-- Read block as paragraph, except if we are in a list context and the block
-- is directly followed by a list item, in which case the block is read as
-- plain text.
try (guard nl
- *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
+ *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
*> return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
+ <|> return (B.plain <$> ils)
--
-- list blocks
--
-list :: OrgParser (F Blocks)
+list :: PandocMonad m => OrgParser m (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-definitionList :: OrgParser (F Blocks)
-definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem $ bulletListStart' (Just n))
-
-bulletList :: OrgParser (F Blocks)
-bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem (bulletListStart' $ Just n))
-
-orderedList :: OrgParser (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify' . sequence
- <$> many1 (listItem orderedListStart)
-
-bulletListStart' :: Maybe Int -> OrgParser Int
--- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing = do ind <- length <$> many spaceChar
- oneOf (bullets $ ind == 0)
- skipSpaces1
- return (ind + 1)
-bulletListStart' (Just n) = do count (n-1) spaceChar
- oneOf (bullets $ n == 1)
- many1 spaceChar
- return n
-
--- Unindented lists are legal, but they can't use '*' bullets.
--- We return n to maintain compatibility with the generic listItem.
-bullets :: Bool -> String
-bullets unindented = if unindented then "+-" else "*+-"
-
-definitionListItem :: OrgParser Int
- -> OrgParser (F (Inlines, [Blocks]))
-definitionListItem parseMarkerGetLength = try $ do
- markerLength <- parseMarkerGetLength
+definitionList :: PandocMonad m => OrgParser m (F Blocks)
+definitionList = try $ do
+ indent <- lookAhead bulletListStart
+ fmap (B.definitionList . compactifyDL) . sequence
+ <$> many1 (definitionListItem (bulletListStart `indented` indent))
+
+bulletList :: PandocMonad m => OrgParser m (F Blocks)
+bulletList = try $ do
+ indent <- lookAhead bulletListStart
+ fmap (B.bulletList . compactify) . sequence
+ <$> many1 (listItem (bulletListStart `indented` indent))
+
+indented :: Monad m => OrgParser m Int -> Int -> OrgParser m Int
+indented indentedMarker minIndent = try $ do
+ n <- indentedMarker
+ guard (minIndent <= n)
+ return n
+
+orderedList :: PandocMonad m => OrgParser m (F Blocks)
+orderedList = try $ do
+ indent <- lookAhead orderedListStart
+ fmap (B.orderedList . compactify) . sequence
+ <$> many1 (listItem (orderedListStart `indented` indent))
+
+definitionListItem :: PandocMonad m
+ => OrgParser m Int
+ -> OrgParser m (F (Inlines, [Blocks]))
+definitionListItem parseIndentedMarker = try $ do
+ markerLength <- parseIndentedMarker
term <- manyTill (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
@@ -940,12 +819,12 @@ definitionListItem parseMarkerGetLength = try $ do
definitionMarker =
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: OrgParser Int
- -> OrgParser (F Blocks)
-listItem start = try . withContext ListItemState $ do
- markerLength <- try start
+-- | parse raw text for one list item
+listItem :: PandocMonad m
+ => OrgParser m Int
+ -> OrgParser m (F Blocks)
+listItem parseIndentedMarker = try . withContext ListItemState $ do
+ markerLength <- try parseIndentedMarker
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- concat <$> many (listContinuation markerLength)
@@ -953,24 +832,11 @@ listItem start = try . withContext ListItemState $ do
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Int
- -> OrgParser String
-listContinuation markerLength = try $
+listContinuation :: Monad m => Int
+ -> OrgParser m String
+listContinuation markerLength = try $ do
notFollowedBy' blankline
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
+ mappend <$> (concat <$> many1 listLine)
+ <*> many blankline
where
listLine = try $ indentWith markerLength *> anyLineNewline
-
- -- indent by specified number of spaces (or equiv. tabs)
- indentWith :: Int -> OrgParser String
- indentWith num = do
- tabStop <- getOption readerTabStop
- if num < tabStop
- then count num (char ' ')
- else choice [ try (count num (char ' '))
- , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-
--- | Parse any line, include the final newline in the output.
-anyLineNewline :: OrgParser String
-anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
new file mode 100644
index 000000000..f77778ec9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -0,0 +1,304 @@
+{-
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
+{- |
+ Module : Text.Pandoc.Readers.Org.DocumentTree
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for org-mode headlines and document subtrees
+-}
+module Text.Pandoc.Readers.Org.DocumentTree
+ ( documentTree
+ , headlineToBlocks
+ ) where
+
+import Control.Arrow ((***))
+import Control.Monad (guard, void)
+import Data.Char (toLower, toUpper)
+import Data.List (intersperse)
+import Data.Monoid ((<>))
+import Text.Pandoc.Builder (Blocks, Inlines)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
+import qualified Data.Map as Map
+import qualified Text.Pandoc.Builder as B
+
+--
+-- Org headers
+--
+
+-- | Parse input as org document tree.
+documentTree :: PandocMonad m
+ => OrgParser m (F Blocks)
+ -> OrgParser m (F Inlines)
+ -> OrgParser m (F Headline)
+documentTree blocks inline = do
+ initialBlocks <- blocks
+ headlines <- sequence <$> manyTill (headline blocks inline 1) eof
+ title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState
+ return $ do
+ headlines' <- headlines
+ initialBlocks' <- initialBlocks
+ title' <- title
+ return Headline
+ { headlineLevel = 0
+ , headlineTodoMarker = Nothing
+ , headlineText = B.fromList title'
+ , headlineTags = mempty
+ , headlineProperties = mempty
+ , headlineContents = initialBlocks'
+ , headlineChildren = headlines'
+ }
+ where
+ getTitle :: Map.Map String MetaValue -> [Inline]
+ getTitle metamap =
+ case Map.lookup "title" metamap of
+ Just (MetaInlines inlns) -> inlns
+ _ -> []
+
+newtype Tag = Tag { fromTag :: String }
+ deriving (Show, Eq)
+
+-- | Create a tag containing the given string.
+toTag :: String -> Tag
+toTag = Tag
+
+-- | The key (also called name or type) of a property.
+newtype PropertyKey = PropertyKey { fromKey :: String }
+ deriving (Show, Eq, Ord)
+
+-- | Create a property key containing the given string. Org mode keys are
+-- case insensitive and are hence converted to lower case.
+toPropertyKey :: String -> PropertyKey
+toPropertyKey = PropertyKey . map toLower
+
+-- | The value assigned to a property.
+newtype PropertyValue = PropertyValue { fromValue :: String }
+
+-- | Create a property value containing the given string.
+toPropertyValue :: String -> PropertyValue
+toPropertyValue = PropertyValue
+
+-- | Check whether the property value is non-nil (i.e. truish).
+isNonNil :: PropertyValue -> Bool
+isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+
+-- | Key/value pairs from a PROPERTIES drawer
+type Properties = [(PropertyKey, PropertyValue)]
+
+-- | Org mode headline (i.e. a document subtree).
+data Headline = Headline
+ { headlineLevel :: Int
+ , headlineTodoMarker :: Maybe TodoMarker
+ , headlineText :: Inlines
+ , headlineTags :: [Tag]
+ , headlineProperties :: Properties
+ , headlineContents :: Blocks
+ , headlineChildren :: [Headline]
+ }
+
+-- | Read an Org mode headline and its contents (i.e. a document subtree).
+-- @lvl@ gives the minimum acceptable level of the tree.
+headline :: PandocMonad m
+ => OrgParser m (F Blocks)
+ -> OrgParser m (F Inlines)
+ -> Int
+ -> OrgParser m (F Headline)
+headline blocks inline lvl = try $ do
+ level <- headerStart
+ guard (lvl <= level)
+ todoKw <- optionMaybe todoKeyword
+ title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
+ tags <- option [] headerTags
+ newline
+ properties <- option mempty propertiesDrawer
+ contents <- blocks
+ children <- many (headline blocks inline (level + 1))
+ return $ do
+ title' <- title
+ contents' <- contents
+ children' <- sequence children
+ return Headline
+ { headlineLevel = level
+ , headlineTodoMarker = todoKw
+ , headlineText = title'
+ , headlineTags = tags
+ , headlineProperties = properties
+ , headlineContents = contents'
+ , headlineChildren = children'
+ }
+ where
+ endOfTitle :: Monad m => OrgParser m ()
+ endOfTitle = void . lookAhead $ optional headerTags *> newline
+
+ headerTags :: Monad m => OrgParser m [Tag]
+ headerTags = try $
+ let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
+
+-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
+headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
+headlineToBlocks hdln@Headline {..} = do
+ maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+ case () of
+ _ | any isNoExportTag headlineTags -> return mempty
+ _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
+ _ | isCommentTitle headlineText -> return mempty
+ _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
+ _ | otherwise -> headlineToHeaderWithContents hdln
+
+isNoExportTag :: Tag -> Bool
+isNoExportTag = (== toTag "noexport")
+
+isArchiveTag :: Tag -> Bool
+isArchiveTag = (== toTag "ARCHIVE")
+
+-- | Check if the title starts with COMMENT.
+-- FIXME: This accesses builder internals not intended for use in situations
+-- like these. Replace once keyword parsing is supported.
+isCommentTitle :: Inlines -> Bool
+isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
+isCommentTitle _ = False
+
+archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
+archivedHeadlineToBlocks hdln = do
+ archivedTreesOption <- getExportSetting exportArchivedTrees
+ case archivedTreesOption of
+ ArchivedTreesNoExport -> return mempty
+ ArchivedTreesExport -> headlineToHeaderWithContents hdln
+ ArchivedTreesHeadlineOnly -> headlineToHeader hdln
+
+headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeaderWithList hdln@Headline {..} = do
+ maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+ header <- headlineToHeader hdln
+ listElements <- mapM headlineToBlocks headlineChildren
+ let listBlock = if null listElements
+ then mempty
+ else B.orderedList listElements
+ let headerText = if maxHeadlineLevels == headlineLevel
+ then header
+ else flattenHeader header
+ return $ headerText <> headlineContents <> listBlock
+ where
+ flattenHeader :: Blocks -> Blocks
+ flattenHeader blks =
+ case B.toList blks of
+ (Header _ _ inlns:_) -> B.para (B.fromList inlns)
+ _ -> mempty
+
+headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeaderWithContents hdln@Headline {..} = do
+ header <- headlineToHeader hdln
+ childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren
+ return $ header <> headlineContents <> childrenBlocks
+
+headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeader Headline {..} = do
+ exportTodoKeyword <- getExportSetting exportWithTodoKeywords
+ exportTags <- getExportSetting exportWithTags
+ let todoText = if exportTodoKeyword
+ then case headlineTodoMarker of
+ Just kw -> todoKeywordToInlines kw <> B.space
+ Nothing -> mempty
+ else mempty
+ let text = todoText <> headlineText <>
+ if exportTags
+ then tagsToInlines headlineTags
+ else mempty
+ let propAttr = propertiesToAttr headlineProperties
+ attr <- registerHeader propAttr headlineText
+ return $ B.headerWith attr headlineLevel text
+
+todoKeyword :: Monad m => OrgParser m TodoMarker
+todoKeyword = try $ do
+ taskStates <- activeTodoMarkers <$> getState
+ let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) <* spaceChar)
+ choice (map kwParser taskStates)
+
+todoKeywordToInlines :: TodoMarker -> Inlines
+todoKeywordToInlines tdm =
+ let todoText = todoMarkerName tdm
+ todoState = map toLower . show $ todoMarkerState tdm
+ classes = [todoState, todoText]
+ in B.spanWith (mempty, classes, mempty) (B.str todoText)
+
+propertiesToAttr :: Properties -> Attr
+propertiesToAttr properties =
+ let
+ toStringPair = fromKey *** fromValue
+ customIdKey = toPropertyKey "custom_id"
+ classKey = toPropertyKey "class"
+ unnumberedKey = toPropertyKey "unnumbered"
+ specialProperties = [customIdKey, classKey, unnumberedKey]
+ id' = maybe mempty fromValue . lookup customIdKey $ properties
+ cls = maybe mempty fromValue . lookup classKey $ properties
+ kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ $ properties
+ isUnnumbered =
+ maybe False isNonNil . lookup unnumberedKey $ properties
+ in
+ (id', words cls ++ ["unnumbered" | isUnnumbered], kvs')
+
+tagsToInlines :: [Tag] -> Inlines
+tagsToInlines [] = mempty
+tagsToInlines tags =
+ (B.space <>) . mconcat . intersperse (B.str "\160") . map tagToInline $ tags
+ where
+ tagToInline :: Tag -> Inlines
+ tagToInline t = tagSpan t . B.smallcaps . B.str $ fromTag t
+
+-- | Wrap the given inline in a span, marking it as a tag.
+tagSpan :: Tag -> Inlines -> Inlines
+tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)])
+
+
+
+
+
+-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
+-- within.
+propertiesDrawer :: Monad m => OrgParser m Properties
+propertiesDrawer = try $ do
+ drawerType <- drawerStart
+ guard $ map toUpper drawerType == "PROPERTIES"
+ manyTill property (try endOfDrawer)
+ where
+ property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
+ property = try $ (,) <$> key <*> value
+
+ key :: Monad m => OrgParser m PropertyKey
+ key = fmap toPropertyKey . try $
+ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+
+ value :: Monad m => OrgParser m PropertyValue
+ value = fmap toPropertyValue . try $
+ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+
+ endOfDrawer :: Monad m => OrgParser m String
+ endOfDrawer = try $
+ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 764e5b0d5..6a70c50b9 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.ExportSettings
+ Copyright : © 2016–2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,22 +29,22 @@ module Text.Pandoc.Readers.Org.ExportSettings
( exportSettings
) where
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.Maybe ( listToMaybe )
+import Control.Monad (mzero, void)
+import Data.Char (toLower)
+import Data.Maybe (listToMaybe)
-- | Read and handle space separated org-mode export settings.
-exportSettings :: OrgParser ()
+exportSettings :: Monad m => OrgParser m ()
exportSettings = void $ sepBy spaces exportSetting
-- | Setter function for export settings.
type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
-- | Read and process a single org-mode export option.
-exportSetting :: OrgParser ()
+exportSetting :: Monad m => OrgParser m ()
exportSetting = choice
[ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val })
, booleanSetting "'" (\val es -> es { exportSmartQuotes = val })
@@ -52,7 +52,7 @@ exportSetting = choice
, booleanSetting "-" (\val es -> es { exportSpecialStrings = val })
, ignoredSetting ":"
, ignoredSetting "<"
- , ignoredSetting "\\n"
+ , booleanSetting "\\n" (\val es -> es { exportPreserveBreaks = val })
, archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val })
, booleanSetting "author" (\val es -> es { exportWithAuthor = val })
, ignoredSetting "c"
@@ -71,7 +71,7 @@ exportSetting = choice
, ignoredSetting "pri"
, ignoredSetting "prop"
, ignoredSetting "stat"
- , ignoredSetting "tags"
+ , booleanSetting "tags" (\val es -> es { exportWithTags = val })
, ignoredSetting "tasks"
, ignoredSetting "tex"
, ignoredSetting "timestamp"
@@ -81,10 +81,11 @@ exportSetting = choice
, ignoredSetting "|"
] <?> "export setting"
-genericExportSetting :: OrgParser a
+genericExportSetting :: Monad m
+ => OrgParser m a
-> String
-> ExportSettingSetter a
- -> OrgParser ()
+ -> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
_ <- string settingIdentifier *> char ':'
value <- optionParser
@@ -94,11 +95,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
+booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
-integerSetting :: String -> ExportSettingSetter Int -> OrgParser ()
+integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@@ -106,9 +107,10 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
-archivedTreeSetting :: String
+archivedTreeSetting :: Monad m
+ => String
-> ExportSettingSetter ArchivedTreesOption
- -> OrgParser ()
+ -> OrgParser m ()
archivedTreeSetting =
genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean
where
@@ -125,9 +127,10 @@ archivedTreeSetting =
else ArchivedTreesNoExport
-- | A list or a complement list (i.e. a list starting with `not`).
-complementableListSetting :: String
+complementableListSetting :: Monad m
+ => String
-> ExportSettingSetter (Either [String] [String])
- -> OrgParser ()
+ -> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
[ Left <$> complementStringList
, Right <$> stringList
@@ -135,31 +138,31 @@ complementableListSetting = genericExportSetting $ choice
]
where
-- Read a plain list of strings.
- stringList :: OrgParser [String]
+ stringList :: Monad m => OrgParser m [String]
stringList = try $
char '('
*> sepBy elispString spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
- complementStringList :: OrgParser [String]
+ complementStringList :: Monad m => OrgParser m [String]
complementStringList = try $
string "(not "
*> sepBy elispString spaces
<* char ')'
- elispString :: OrgParser String
+ elispString :: Monad m => OrgParser m String
elispString = try $
char '"'
*> manyTill alphaNum (char '"')
-- | Read but ignore the export setting.
-ignoredSetting :: String -> OrgParser ()
+ignoredSetting :: Monad m => String -> OrgParser m ()
ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
-- interpreted as true.
-elispBoolean :: OrgParser Bool
+elispBoolean :: Monad m => OrgParser m Bool
elispBoolean = try $ do
value <- many1 nonspaceChar
return $ case map toLower value of
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 7e1bb61c2..3a12f38d0 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.Inlines
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -33,73 +33,75 @@ module Text.Pandoc.Readers.Org.Inlines
, linkTarget
) where
-import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
+import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+ originalLang, translateLang)
+import Text.Pandoc.Builder (Inlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines )
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
-import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
+import Text.Pandoc.Shared (underlineSpan)
+import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Prelude hiding (sequence)
-import Control.Monad ( guard, mplus, mzero, when, void )
-import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( intersperse )
-import Data.Maybe ( fromMaybe )
+import Control.Monad (guard, mplus, mzero, unless, void, when)
+import Control.Monad.Trans (lift)
+import Data.Char (isAlphaNum, isSpace)
+import Data.List (intersperse)
import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Data.Traversable (sequence)
+import Data.Maybe (fromMaybe)
+import Data.Monoid ((<>))
+import Data.Traversable (sequence)
+import Prelude hiding (sequence)
--
-- Functions acting on the parser state
--
-recordAnchorId :: String -> OrgParser ()
+recordAnchorId :: PandocMonad m => String -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+ s{ orgStateAnchorIds = i : orgStateAnchorIds s }
-pushToInlineCharStack :: Char -> OrgParser ()
+pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m ()
pushToInlineCharStack c = updateState $ \s ->
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
-popInlineCharStack :: OrgParser ()
+popInlineCharStack :: PandocMonad m => OrgParser m ()
popInlineCharStack = updateState $ \s ->
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
-surroundingEmphasisChar :: OrgParser [Char]
+surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char]
surroundingEmphasisChar =
take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
-startEmphasisNewlinesCounting :: Int -> OrgParser ()
+startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m ()
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
s{ orgStateEmphasisNewlines = Just maxNewlines }
-decEmphasisNewlinesCount :: OrgParser ()
+decEmphasisNewlinesCount :: PandocMonad m => OrgParser m ()
decEmphasisNewlinesCount = updateState $ \s ->
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
-newlinesCountWithinLimits :: OrgParser Bool
+newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool
newlinesCountWithinLimits = do
st <- getState
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
-resetEmphasisNewlines :: OrgParser ()
+resetEmphasisNewlines :: PandocMonad m => OrgParser m ()
resetEmphasisNewlines = updateState $ \s ->
s{ orgStateEmphasisNewlines = Nothing }
-addToNotesTable :: OrgNoteRecord -> OrgParser ()
+addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m ()
addToNotesTable note = do
oldnotes <- orgStateNotes' <$> getState
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-- | Parse a single Org-mode inline element
-inline :: OrgParser (F Inlines)
+inline :: PandocMonad m => OrgParser m (F Inlines)
inline =
choice [ whitespace
, linebreak
@@ -119,13 +121,14 @@ inline =
, superscript
, inlineLaTeX
, exportSnippet
+ , macro
, smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
-- | Read the rest of the input as inlines.
-inlines :: OrgParser (F Inlines)
+inlines :: PandocMonad m => OrgParser m (F Inlines)
inlines = trimInlinesF . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
@@ -133,30 +136,31 @@ specialChars :: [Char]
specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~"
-whitespace :: OrgParser (F Inlines)
+whitespace :: PandocMonad m => OrgParser m (F Inlines)
whitespace = pure B.space <$ skipMany1 spaceChar
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
<?> "whitespace"
-linebreak :: OrgParser (F Inlines)
+linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-str :: OrgParser (F Inlines)
+str :: PandocMonad m => OrgParser m (F Inlines)
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
-- break. This should reflect the values of the Emacs variable
-- @org-element-pagaraph-separate@.
-endline :: OrgParser (F Inlines)
+endline :: PandocMonad m => OrgParser m (F Inlines)
endline = try $ do
newline
notFollowedBy' endOfBlock
decEmphasisNewlinesCount
guard =<< newlinesCountWithinLimits
updateLastPreCharPos
- return . return $ B.softbreak
+ useHardBreaks <- exportPreserveBreaks . orgStateExportSettings <$> getState
+ returnF (if useHardBreaks then B.linebreak else B.softbreak)
--
@@ -174,7 +178,7 @@ endline = try $ do
-- contributors. All this should be consolidated once an official Org-mode
-- citation syntax has emerged.
-cite :: OrgParser (F Inlines)
+cite :: PandocMonad m => OrgParser m (F Inlines)
cite = try $ berkeleyCite <|> do
guardEnabled Ext_citations
(cs, raw) <- withRaw $ choice
@@ -182,43 +186,44 @@ cite = try $ berkeleyCite <|> do
, orgRefCite
, berkeleyTextualCite
]
- return $ (flip B.cite (B.text raw)) <$> cs
+ return $ flip B.cite (B.text raw) <$> cs
-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: OrgParser (F [Citation])
+pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
pandocOrgCite = try $
char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
-orgRefCite :: OrgParser (F [Citation])
+orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
orgRefCite = try $ choice
[ normalOrgRefCite
, fmap (:[]) <$> linkLikeOrgRefCite
]
-normalOrgRefCite :: OrgParser (F [Citation])
+normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation])
normalOrgRefCite = try $ do
mode <- orgRefCiteMode
- -- org-ref style citation key, parsed into a citation of the given mode
- let orgRefCiteItem :: OrgParser (F Citation)
- orgRefCiteItem = try $ do
- key <- orgRefCiteKey
- returnF $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = mode
- , citationNoteNum = 0
- , citationHash = 0
- }
- firstCitation <- orgRefCiteItem
- moreCitations <- many (try $ char ',' *> orgRefCiteItem)
+ firstCitation <- orgRefCiteList mode
+ moreCitations <- many (try $ char ',' *> orgRefCiteList mode)
return . sequence $ firstCitation : moreCitations
- where
+ where
+ -- | A list of org-ref style citation keys, parsed as citation of the given
+ -- citation mode.
+ orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation)
+ orgRefCiteList citeMode = try $ do
+ key <- orgRefCiteKey
+ returnF Citation
+ { citationId = key
+ , citationPrefix = mempty
+ , citationSuffix = mempty
+ , citationMode = citeMode
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
-- develop and adjusted to Org-mode style by John MacFarlane and Richard
-- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: OrgParser (F Inlines)
+berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
berkeleyCite = try $ do
bcl <- berkeleyCitationList
return $ do
@@ -229,11 +234,11 @@ berkeleyCite = try $ do
return $
if parens
then toCite
- . maybe id (\p -> alterFirst (prependPrefix p)) prefix
- . maybe id (\s -> alterLast (appendSuffix s)) suffix
+ . maybe id (alterFirst . prependPrefix) prefix
+ . maybe id (alterLast . appendSuffix) suffix
$ citationList
else maybe mempty (<> " ") prefix
- <> (toListOfCites $ map toInTextMode citationList)
+ <> toListOfCites (map toInTextMode citationList)
<> maybe mempty (", " <>) suffix
where
toCite :: [Citation] -> Inlines
@@ -247,7 +252,7 @@ berkeleyCite = try $ do
alterFirst, alterLast :: (a -> a) -> [a] -> [a]
alterFirst _ [] = []
- alterFirst f (c:cs) = (f c):cs
+ alterFirst f (c:cs) = f c : cs
alterLast f = reverse . alterFirst f . reverse
prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
@@ -255,12 +260,12 @@ berkeleyCite = try $ do
appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
+ { berkeleyCiteParens :: Bool
, berkeleyCiteCommonPrefix :: Maybe Inlines
, berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
+ , berkeleyCiteCitations :: [Citation]
}
-berkeleyCitationList :: OrgParser (F BerkeleyCitationList)
+berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
berkeleyCitationList = try $ do
char '['
parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
@@ -268,29 +273,29 @@ berkeleyCitationList = try $ do
skipSpaces
commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
citations <- citeList
- commonSuffix <- optionMaybe (try $ citationListPart)
+ commonSuffix <- optionMaybe (try citationListPart)
char ']'
return (BerkeleyCitationList parens
<$> sequence commonPrefix
<*> sequence commonSuffix
<*> citations)
where
- citationListPart :: OrgParser (F Inlines)
+ citationListPart :: PandocMonad m => OrgParser m (F Inlines)
citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
notFollowedBy' citeKey
notFollowedBy (oneOf ";]")
inline
-berkeleyBareTag :: OrgParser ()
+berkeleyBareTag :: PandocMonad m => OrgParser m ()
berkeleyBareTag = try $ void berkeleyBareTag'
-berkeleyParensTag :: OrgParser ()
+berkeleyParensTag :: PandocMonad m => OrgParser m ()
berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag'
-berkeleyBareTag' :: OrgParser ()
+berkeleyBareTag' :: PandocMonad m => OrgParser m ()
berkeleyBareTag' = try $ void (string "cite")
-berkeleyTextualCite :: OrgParser (F [Citation])
+berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
berkeleyTextualCite = try $ do
(suppressAuthor, key) <- citeKey
returnF . return $ Citation
@@ -305,14 +310,14 @@ berkeleyTextualCite = try $ do
-- The following is what a Berkeley-style bracketed textual citation parser
-- would look like. However, as these citations are a subset of Pandoc's Org
-- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: OrgParser (F [Citation])
+-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-- berkeleyBracketedTextualCite = try . (fmap head) $
-- enclosedByPair '[' ']' berkeleyTextualCite
-- | Read a link-like org-ref style citation. The citation includes pre and
-- post text. However, multiple citations are not possible due to limitations
-- in the syntax.
-linkLikeOrgRefCite :: OrgParser (F Citation)
+linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation)
linkLikeOrgRefCite = try $ do
_ <- string "[["
mode <- orgRefCiteMode
@@ -335,13 +340,20 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: OrgParser String
-orgRefCiteKey = try . many1 . satisfy $ \c ->
- isAlphaNum c || c `elem` ("-_:\\./"::String)
+orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey =
+ let citeKeySpecialChars = "-_:\\./," :: String
+ isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
+ isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
+ endOfCitation = try $ do
+ many $ satisfy isCiteKeySpecialChar
+ satisfy $ not . isCiteKeyChar
+ in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation
+
-- | Supported citation types. Only a small subset of org-ref types is
-- supported for now. TODO: rewrite this, use LaTeX reader as template.
-orgRefCiteMode :: OrgParser CitationMode
+orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode
orgRefCiteMode =
choice $ map (\(s, mode) -> mode <$ try (string s <* char ':'))
[ ("cite", AuthorInText)
@@ -352,10 +364,10 @@ orgRefCiteMode =
, ("citeyear", SuppressAuthor)
]
-citeList :: OrgParser (F [Citation])
+citeList :: PandocMonad m => OrgParser m (F [Citation])
citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-citation :: OrgParser (F Citation)
+citation :: PandocMonad m => OrgParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
@@ -363,15 +375,16 @@ citation = try $ do
return $ do
x <- pref
y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
+ return Citation
+ { citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
where
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
@@ -384,39 +397,39 @@ citation = try $ do
then (B.space <>) <$> rest
else rest
-footnote :: OrgParser (F Inlines)
+footnote :: PandocMonad m => OrgParser m (F Inlines)
footnote = try $ inlineNote <|> referencedNote
-inlineNote :: OrgParser (F Inlines)
+inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
ref <- many alphaNum
char ':'
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- when (not $ null ref) $
+ unless (null ref) $
addToNotesTable ("fn:" ++ ref, note)
return $ B.note <$> note
-referencedNote :: OrgParser (F Inlines)
+referencedNote :: PandocMonad m => OrgParser m (F Inlines)
referencedNote = try $ do
ref <- noteMarker
return $ do
notes <- asksF orgStateNotes'
case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Nothing -> return . B.str $ "[" ++ ref ++ "]"
Just contents -> do
st <- askF
let contents' = runF contents st{ orgStateNotes' = [] }
return $ B.note contents'
-linkOrImage :: OrgParser (F Inlines)
+linkOrImage :: PandocMonad m => OrgParser m (F Inlines)
linkOrImage = explicitOrImageLink
<|> selflinkOrImage
<|> angleLink
<|> plainLink
<?> "link or image"
-explicitOrImageLink :: OrgParser (F Inlines)
+explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines)
explicitOrImageLink = try $ do
char '['
srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
@@ -427,34 +440,34 @@ explicitOrImageLink = try $ do
src <- srcF
case cleanLinkString title of
Just imgSrc | isImageFilename imgSrc ->
- pure $ B.link src "" $ B.image imgSrc mempty mempty
+ pure . B.link src "" $ B.image imgSrc mempty mempty
_ ->
linkToInlinesF src =<< title'
-selflinkOrImage :: OrgParser (F Inlines)
+selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
src <- char '[' *> linkTarget <* char ']'
return $ linkToInlinesF src (B.str src)
-plainLink :: OrgParser (F Inlines)
+plainLink :: PandocMonad m => OrgParser m (F Inlines)
plainLink = try $ do
(orig, src) <- uri
returnF $ B.link src "" (B.str orig)
-angleLink :: OrgParser (F Inlines)
+angleLink :: PandocMonad m => OrgParser m (F Inlines)
angleLink = try $ do
char '<'
link <- plainLink
char '>'
return link
-linkTarget :: OrgParser String
+linkTarget :: PandocMonad m => OrgParser m String
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat :: String -> OrgParser m (F String)
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
return $ do
@@ -487,7 +500,7 @@ internalLink link title = do
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.
-anchor :: OrgParser (F Inlines)
+anchor :: PandocMonad m => OrgParser m (F Inlines)
anchor = try $ do
anchorId <- parseAnchor
recordAnchorId anchorId
@@ -509,23 +522,23 @@ solidify = map replaceSpecialChar
| otherwise = '-'
-- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
- let attrClasses = [translateLang lang, rundocBlockClass]
- let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
+ let attrClasses = [translateLang lang]
+ let attrKeyVal = originalLang lang <> opts
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where
- inlineBlockOption :: OrgParser (String, String)
+ inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
- orgInlineParamValue :: OrgParser String
+ orgInlineParamValue :: PandocMonad m => OrgParser m String
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
@@ -533,7 +546,7 @@ inlineCodeBlock = try $ do
<* skipSpaces
-emphasizedText :: OrgParser (F Inlines)
+emphasizedText :: PandocMonad m => OrgParser m (F Inlines)
emphasizedText = do
state <- getState
guard . exportEmphasizedText . orgStateExportSettings $ state
@@ -544,60 +557,64 @@ emphasizedText = do
, underline
]
-enclosedByPair :: Char -- ^ opening char
+enclosedByPair :: PandocMonad m
+ => Char -- ^ opening char
-> Char -- ^ closing char
- -> OrgParser a -- ^ parser
- -> OrgParser [a]
+ -> OrgParser m a -- ^ parser
+ -> OrgParser m [a]
enclosedByPair s e p = char s *> many1Till p (char e)
-emph :: OrgParser (F Inlines)
+emph :: PandocMonad m => OrgParser m (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
-strong :: OrgParser (F Inlines)
+strong :: PandocMonad m => OrgParser m (F Inlines)
strong = fmap B.strong <$> emphasisBetween '*'
-strikeout :: OrgParser (F Inlines)
+strikeout :: PandocMonad m => OrgParser m (F Inlines)
strikeout = fmap B.strikeout <$> emphasisBetween '+'
--- There is no underline, so we use strong instead.
-underline :: OrgParser (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
+underline :: PandocMonad m => OrgParser m (F Inlines)
+underline = fmap underlineSpan <$> emphasisBetween '_'
-verbatim :: OrgParser (F Inlines)
+verbatim :: PandocMonad m => OrgParser m (F Inlines)
verbatim = return . B.code <$> verbatimBetween '='
-code :: OrgParser (F Inlines)
+code :: PandocMonad m => OrgParser m (F Inlines)
code = return . B.code <$> verbatimBetween '~'
-subscript :: OrgParser (F Inlines)
+subscript :: PandocMonad m => OrgParser m (F Inlines)
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
-superscript :: OrgParser (F Inlines)
+superscript :: PandocMonad m => OrgParser m (F Inlines)
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
-math :: OrgParser (F Inlines)
+math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
, mathStringBetween '$'
, rawMathBetween "\\(" "\\)"
]
-displayMath :: OrgParser (F Inlines)
+displayMath :: PandocMonad m => OrgParser m (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
-updatePositions :: Char
- -> OrgParser Char
+updatePositions :: PandocMonad m
+ => Char
+ -> OrgParser m Char
updatePositions c = do
+ st <- getState
+ let emphasisPreChars = orgStateEmphasisPreChars st
when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c
-symbol :: OrgParser (F Inlines)
+symbol :: PandocMonad m => OrgParser m (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
-emphasisBetween :: Char
- -> OrgParser (F Inlines)
+emphasisBetween :: PandocMonad m
+ => Char
+ -> OrgParser m (F Inlines)
emphasisBetween c = try $ do
startEmphasisNewlinesCounting emphasisAllowedNewlines
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@@ -606,8 +623,9 @@ emphasisBetween c = try $ do
resetEmphasisNewlines
return res
-verbatimBetween :: Char
- -> OrgParser String
+verbatimBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@@ -615,8 +633,9 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: Char
- -> OrgParser String
+mathStringBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
mathStringBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
@@ -626,8 +645,9 @@ mathStringBetween c = try $ do
return $ body ++ [final]
-- | Parse a single character between @c@ using math rules
-math1CharBetween :: Char
- -> OrgParser String
+math1CharBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
@@ -635,13 +655,14 @@ math1CharBetween c = try $ do
eof <|> () <$ lookAhead (oneOf mathPostChars)
return [res]
-rawMathBetween :: String
+rawMathBetween :: PandocMonad m
+ => String
-> String
- -> OrgParser String
+ -> OrgParser m String
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-- | Parses the start (opening character) of emphasis
-emphasisStart :: Char -> OrgParser Char
+emphasisStart :: PandocMonad m => Char -> OrgParser m Char
emphasisStart c = try $ do
guard =<< afterEmphasisPreChar
guard =<< notAfterString
@@ -654,7 +675,7 @@ emphasisStart c = try $ do
return c
-- | Parses the closing character of emphasis
-emphasisEnd :: Char -> OrgParser Char
+emphasisEnd :: PandocMonad m => Char -> OrgParser m Char
emphasisEnd c = try $ do
guard =<< notAfterForbiddenBorderChar
char c
@@ -662,14 +683,16 @@ emphasisEnd c = try $ do
updateLastStrPos
popInlineCharStack
return c
- where acceptablePostChars =
- surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
+ where
+ acceptablePostChars = do
+ emphasisPostChars <- orgStateEmphasisPostChars <$> getState
+ surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
-mathStart :: Char -> OrgParser Char
+mathStart :: PandocMonad m => Char -> OrgParser m Char
mathStart c = try $
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
-mathEnd :: Char -> OrgParser Char
+mathEnd :: PandocMonad m => Char -> OrgParser m Char
mathEnd c = try $ do
res <- noneOf (c:mathForbiddenBorderChars)
char c
@@ -677,15 +700,15 @@ mathEnd c = try $ do
return res
-enclosedInlines :: OrgParser a
- -> OrgParser b
- -> OrgParser (F Inlines)
+enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a
+ -> OrgParser m b
+ -> OrgParser m (F Inlines)
enclosedInlines start end = try $
trimInlinesF . mconcat <$> enclosed start end inline
-enclosedRaw :: OrgParser a
- -> OrgParser b
- -> OrgParser String
+enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a
+ -> OrgParser m b
+ -> OrgParser m String
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
where onSingleLine = try $ many1Till (noneOf "\n\r") end
@@ -694,10 +717,10 @@ enclosedRaw start end = try $
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
-- newlines.
-many1TillNOrLessNewlines :: Int
- -> OrgParser Char
- -> OrgParser a
- -> OrgParser String
+many1TillNOrLessNewlines :: PandocMonad m => Int
+ -> OrgParser m Char
+ -> OrgParser m a
+ -> OrgParser m String
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@@ -715,17 +738,9 @@ many1TillNOrLessNewlines n p end = try $
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
-- for details).
--- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "\t \"'({"
-
--- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "\t\n !\"'),-.:;?\\}"
-
-- | Chars not allowed at the (inner) border of emphasis
emphasisForbiddenBorderChars :: [Char]
-emphasisForbiddenBorderChars = "\t\n\r \"',"
+emphasisForbiddenBorderChars = "\t\n\r "
-- | The maximum number of newlines within
emphasisAllowedNewlines :: Int
@@ -746,29 +761,29 @@ mathAllowedNewlines :: Int
mathAllowedNewlines = 2
-- | Whether we are right behind a char allowed before emphasis
-afterEmphasisPreChar :: OrgParser Bool
+afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool
afterEmphasisPreChar = do
pos <- getPosition
lastPrePos <- orgStateLastPreCharPos <$> getState
return . fromMaybe True $ (== pos) <$> lastPrePos
-- | Whether the parser is right after a forbidden border char
-notAfterForbiddenBorderChar :: OrgParser Bool
+notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool
notAfterForbiddenBorderChar = do
pos <- getPosition
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
return $ lastFBCPos /= Just pos
-- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser (F Inlines)
+subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
- choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
+ choice [ charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
, simpleSubOrSuperString
] >>= parseFromString (mconcat <$> many inline)
where enclosing (left, right) s = left : s ++ [right]
-simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString :: PandocMonad m => OrgParser m String
simpleSubOrSuperString = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
@@ -777,17 +792,18 @@ simpleSubOrSuperString = try $ do
<*> many1 alphaNum
]
-inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
inlineLaTeX = try $ do
cmd <- inlineLaTeXCommand
+ ils <- (lift . lift) $ parseAsInlineLaTeX cmd
maybe mzero returnF $
- parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
+ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils
where
parseAsMath :: String -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
- parseAsInlineLaTeX :: String -> Maybe Inlines
- parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+ parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
parseAsMathMLSym :: String -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
@@ -795,19 +811,22 @@ inlineLaTeX = try $ do
where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
state :: ParserState
- state = def{ stateOptions = def{ readerParseRaw = True }}
+ state = def{ stateOptions = def{ readerExtensions =
+ enableExtension Ext_raw_tex (readerExtensions def) } }
texMathToPandoc :: String -> Maybe [Inline]
- texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
+ texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
-inlineLaTeXCommand :: OrgParser String
+inlineLaTeXCommand :: PandocMonad m => OrgParser m String
inlineLaTeXCommand = try $ do
rest <- getInput
- case runParser rawLaTeXInline def "source" rest of
- Right (RawInline _ cs) -> do
+ st <- getState
+ parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest
+ case parsed of
+ Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
let cmdNoSpc = dropWhileEnd isSpace cs
@@ -820,33 +839,56 @@ inlineLaTeXCommand = try $ do
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-exportSnippet :: OrgParser (F Inlines)
+exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
format <- many1Till (alphaNum <|> char '-') (char ':')
snippet <- manyTill anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
-smart :: OrgParser (F Inlines)
-smart = do
- getOption readerSmart >>= guard
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+macro :: PandocMonad m => OrgParser m (F Inlines)
+macro = try $ do
+ recursionDepth <- orgStateMacroDepth <$> getState
+ guard $ recursionDepth < 15
+ string "{{{"
+ name <- many alphaNum
+ args <- ([] <$ string "}}}")
+ <|> char '(' *> argument `sepBy` char ',' <* eoa
+ expander <- lookupMacro name <$> getState
+ case expander of
+ Nothing -> mzero
+ Just fn -> do
+ updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 }
+ res <- parseFromString (mconcat <$> many inline) $ fn args
+ updateState $ \s -> s { orgStateMacroDepth = recursionDepth }
+ return res
+ where
+ argument = many $ notFollowedBy eoa *> noneOf ","
+ eoa = string ")}}}"
+
+smart :: PandocMonad m => OrgParser m (F Inlines)
+smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses]
where
orgDash = do
- guard =<< getExportSetting exportSpecialStrings
- dash <* updatePositions '-'
+ guardOrSmartEnabled =<< getExportSetting exportSpecialStrings
+ pure <$> dash <* updatePositions '-'
orgEllipses = do
- guard =<< getExportSetting exportSpecialStrings
- ellipses <* updatePositions '.'
- orgApostrophe =
- (char '\'' <|> char '\8217') <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- *> return (B.str "\x2019")
-
-singleQuoted :: OrgParser (F Inlines)
+ guardOrSmartEnabled =<< getExportSetting exportSpecialStrings
+ pure <$> ellipses <* updatePositions '.'
+ orgApostrophe = do
+ guardEnabled Ext_smart
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ returnF (B.str "\x2019")
+
+guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m ()
+guardOrSmartEnabled b = do
+ smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ guard (b || smartExtension)
+
+singleQuoted :: PandocMonad m => OrgParser m (F Inlines)
singleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
+ guardOrSmartEnabled =<< getExportSetting exportSmartQuotes
singleQuoteStart
updatePositions '\''
withQuoteContext InSingleQuote $
@@ -856,12 +898,15 @@ singleQuoted = try $ do
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
-doubleQuoted :: OrgParser (F Inlines)
+doubleQuoted :: PandocMonad m => OrgParser m (F Inlines)
doubleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
+ guardOrSmartEnabled =<< getExportSetting exportSmartQuotes
doubleQuoteStart
updatePositions '"'
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
+ let doubleQuotedContent = withQuoteContext InDoubleQuote $ do
+ doubleQuoteEnd
+ updateLastForbiddenCharPos
+ return . fmap B.doubleQuoted . trimInlinesF $ contents
+ let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents
+ doubleQuotedContent <|> leftQuoteAndContent
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 1fea3e890..6ad403fd8 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TupleSections #-}
{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Org.Meta
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -33,25 +33,26 @@ module Text.Pandoc.Readers.Org.Meta
, metaLine
) where
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.ExportSettings (exportSettings)
+import Text.Pandoc.Readers.Org.Inlines
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Blocks, Inlines )
-import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.List ( intersperse )
+import Control.Monad (mzero, void, when)
+import Data.Char (toLower)
+import Data.List (intersperse)
import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Network.HTTP ( urlEncode )
+import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
-metaExport :: OrgParser (F Meta)
+metaExport :: Monad m => OrgParser m (F Meta)
metaExport = do
st <- getState
let settings = orgStateExportSettings st
@@ -68,29 +69,32 @@ removeMeta key meta' =
-- | Parse and handle a single line containing meta information
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
-metaLine :: OrgParser Blocks
+metaLine :: PandocMonad m => OrgParser m Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-declarationLine :: OrgParser ()
+declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
key <- map toLower <$> metaKey
(key', value) <- metaValue key
- updateState $ \st ->
- let meta' = B.setMeta key' <$> value <*> pure nullMeta
- in st { orgStateMeta = meta' <> orgStateMeta st }
+ let addMetaValue st =
+ st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st }
+ when (key' /= "results") $ updateState addMetaValue
-metaKey :: OrgParser String
+metaKey :: Monad m => OrgParser m String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
<* char ':'
<* skipSpaces
-metaValue :: String -> OrgParser (String, (F MetaValue))
+metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue)
metaValue key =
let inclKey = "header-includes"
in case key of
"author" -> (key,) <$> metaInlinesCommaSeparated
+ "keywords" -> (key,) <$> metaInlinesCommaSeparated
"title" -> (key,) <$> metaInlines
+ "subtitle" -> (key,) <$> metaInlines
"date" -> (key,) <$> metaInlines
+ "nocite" -> (key,) <$> accumulatingList key metaInlines
"header-includes" -> (key,) <$> accumulatingList key metaInlines
"latex_header" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "latex")
@@ -103,32 +107,32 @@ metaValue key =
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
-metaInlines :: OrgParser (F MetaValue)
+metaInlines :: PandocMonad m => OrgParser m (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
-metaInlinesCommaSeparated :: OrgParser (F MetaValue)
+metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
- authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
+ itemStrs <- many1 (noneOf ",\n") `sepBy1` char ','
newline
- authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
+ items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs
let toMetaInlines = MetaInlines . B.toList
- return $ MetaList . map toMetaInlines <$> sequence authors
+ return $ MetaList . map toMetaInlines <$> sequence items
-metaString :: OrgParser (F MetaValue)
+metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
-metaModifiedString :: (String -> String) -> OrgParser (F MetaValue)
+metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
-metaExportSnippet :: String -> OrgParser (F MetaValue)
+metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: String
- -> OrgParser (F MetaValue)
- -> OrgParser (F MetaValue)
+accumulatingList :: Monad m => String
+ -> OrgParser m (F MetaValue)
+ -> OrgParser m (F MetaValue)
accumulatingList key p = do
value <- p
meta' <- orgStateMeta <$> getState
@@ -141,7 +145,7 @@ accumulatingList key p = do
--
-- export options
--
-optionLine :: OrgParser ()
+optionLine :: Monad m => OrgParser m ()
optionLine = try $ do
key <- metaKey
case key of
@@ -150,16 +154,19 @@ optionLine = try $ do
"todo" -> todoSequence >>= updateState . registerTodoSequence
"seq_todo" -> todoSequence >>= updateState . registerTodoSequence
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence
- _ -> mzero
+ "macro" -> macroDefinition >>= updateState . registerMacro
+ "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
+ "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
+ _ -> mzero
-addLinkFormat :: String
+addLinkFormat :: Monad m => String
-> (String -> String)
- -> OrgParser ()
+ -> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
-parseLinkFormat :: OrgParser ((String, String -> String))
+parseLinkFormat :: Monad m => OrgParser m (String, String -> String)
parseLinkFormat = try $ do
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
@@ -167,9 +174,8 @@ parseLinkFormat = try $ do
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
-parseFormat :: OrgParser (String -> String)
-parseFormat = try $ do
- replacePlain <|> replaceUrl <|> justAppend
+parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
replacePlain = try $ (\x -> concat . flip intersperse x)
@@ -181,13 +187,34 @@ parseFormat = try $ do
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
-inlinesTillNewline :: OrgParser (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
+setEmphasisPreChar csMb st =
+ let preChars = case csMb of
+ Nothing -> orgStateEmphasisPreChars defaultOrgParserState
+ Just cs -> cs
+ in st { orgStateEmphasisPreChars = preChars }
+
+setEmphasisPostChar :: Maybe [Char] -> OrgParserState -> OrgParserState
+setEmphasisPostChar csMb st =
+ let postChars = case csMb of
+ Nothing -> orgStateEmphasisPostChars defaultOrgParserState
+ Just cs -> cs
+ in st { orgStateEmphasisPostChars = postChars }
+
+emphChars :: Monad m => OrgParser m (Maybe [Char])
+emphChars = do
+ skipSpaces
+ safeRead <$> anyLine
+
+inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
+inlinesTillNewline = do
+ updateLastPreCharPos
+ trimInlinesF . mconcat <$> manyTill inline newline
--
-- ToDo Sequences and Keywords
--
-todoSequence :: OrgParser TodoSequence
+todoSequence :: Monad m => OrgParser m TodoSequence
todoSequence = try $ do
todoKws <- todoKeywords
doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
@@ -201,13 +228,13 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
- todoKeywords :: OrgParser [String]
+ todoKeywords :: Monad m => OrgParser m [String]
todoKeywords = try $
let keyword = many1 nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
- todoDoneSep :: OrgParser ()
+ todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
keywordsToSequence :: [String] -> [String] -> TodoSequence
@@ -215,3 +242,27 @@ todoSequence = try $ do
let todoMarkers = map (TodoMarker Todo) todo
doneMarkers = map (TodoMarker Done) done
in todoMarkers ++ doneMarkers
+
+macroDefinition :: Monad m => OrgParser m (String, [String] -> String)
+macroDefinition = try $ do
+ macroName <- many1 nonspaceChar <* skipSpaces
+ firstPart <- expansionPart
+ (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart)
+ let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder
+ return (macroName, expander)
+ where
+ placeholder :: Monad m => OrgParser m Int
+ placeholder = try . fmap read $ char '$' *> many1 digit
+
+ expansionPart :: Monad m => OrgParser m String
+ expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r")
+
+ alternate :: [a] -> [a] -> [a]
+ alternate [] ys = ys
+ alternate xs [] = xs
+ alternate (x:xs) (y:ys) = x : y : alternate xs ys
+
+ reorder :: [Int] -> [String] -> [String]
+ reorder perm xs =
+ let element n = take 1 $ drop (n - 1) xs
+ in concatMap element perm
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 38f95ca95..6316766fa 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -1,8 +1,7 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,8 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.ParserState
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -30,16 +29,21 @@ Define the Org-mode parser state.
-}
module Text.Pandoc.Readers.Org.ParserState
( OrgParserState (..)
+ , defaultOrgParserState
, OrgParserLocal (..)
, OrgNoteRecord
, HasReaderOptions (..)
, HasQuoteContext (..)
+ , HasMacros (..)
, TodoMarker (..)
, TodoSequence
, TodoState (..)
, activeTodoMarkers
, registerTodoSequence
- , F(..)
+ , MacroExpander
+ , lookupMacro
+ , registerMacro
+ , F
, askF
, asksF
, trimInlinesF
@@ -50,24 +54,28 @@ module Text.Pandoc.Readers.Org.ParserState
, optionsToParserState
) where
-import Control.Monad (liftM, liftM2)
-import Control.Monad.Reader (Reader, runReader, ask, asks, local)
+import Control.Monad.Reader (ReaderT, asks, local)
-import Data.Default (Default(..))
+import Data.Default (Default (..))
import qualified Data.Map as M
import qualified Data.Set as Set
-
-import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
-import Text.Pandoc.Definition ( Meta(..), nullMeta )
-import Text.Pandoc.Options ( ReaderOptions(..) )
-import Text.Pandoc.Parsing ( HasHeaderMap(..)
- , HasIdentifierList(..)
- , HasLastStrPosition(..)
- , HasQuoteContext(..)
- , HasReaderOptions(..)
- , ParserContext(..)
- , QuoteContext(..)
- , SourcePos )
+import Data.Text (Text)
+
+import Text.Pandoc.Builder (Blocks, Inlines)
+import Text.Pandoc.Definition (Meta (..), nullMeta)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..),
+ HasIncludeFiles (..), HasLastStrPosition (..),
+ HasLogMessages (..), HasMacros (..),
+ HasQuoteContext (..), HasReaderOptions (..),
+ ParserContext (..), QuoteContext (..), SourcePos,
+ askF, asksF, returnF, runF, trimInlinesF)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
+
+-- | This is used to delay evaluation until all relevant information has been
+-- parsed and made available in the parser state.
+type F = Future OrgParserState
-- | An inline note / footnote containing the note key and its (inline) value.
type OrgNoteRecord = (String, F Blocks)
@@ -76,6 +84,8 @@ type OrgNoteTable = [OrgNoteRecord]
-- | Map of functions for link transformations. The map key is refers to the
-- link-type, the corresponding function transforms the given link string.
type OrgLinkFormatters = M.Map String (String -> String)
+-- | Macro expander function
+type MacroExpander = [String] -> String
-- | The states in which a todo item can be
data TodoState = Todo | Done
@@ -95,22 +105,34 @@ type TodoSequence = [TodoMarker]
data OrgParserState = OrgParserState
{ orgStateAnchorIds :: [String]
, orgStateEmphasisCharStack :: [Char]
+ , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before
+ -- emphasis; spaces and newlines are
+ -- always ok in addition to what is
+ -- specified here.
+ , orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis
, orgStateEmphasisNewlines :: Maybe Int
, orgStateExportSettings :: ExportSettings
, orgStateHeaderMap :: M.Map Inlines String
, orgStateIdentifiers :: Set.Set String
+ , orgStateIncludeFiles :: [String]
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
+ , orgStateMacros :: M.Map String MacroExpander
+ , orgStateMacroDepth :: Int
, orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
, orgStateOptions :: ReaderOptions
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
+ , orgLogMessages :: [LogMessage]
+ , orgMacros :: M.Map Text Macro
}
-data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
+data OrgParserLocal = OrgParserLocal
+ { orgLocalQuoteContext :: QuoteContext
+ }
instance Default OrgParserLocal where
def = OrgParserLocal NoQuote
@@ -122,7 +144,7 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
-instance HasQuoteContext st (Reader OrgParserLocal) where
+instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where
getQuoteContext = asks orgLocalQuoteContext
withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
@@ -134,26 +156,47 @@ instance HasHeaderMap OrgParserState where
extractHeaderMap = orgStateHeaderMap
updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+instance HasLogMessages OrgParserState where
+ addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
+ getLogMessages st = reverse $ orgLogMessages st
+
+instance HasMacros OrgParserState where
+ extractMacros st = orgMacros st
+ updateMacros f st = st{ orgMacros = f (orgMacros st) }
+
+instance HasIncludeFiles OrgParserState where
+ getIncludeFiles = orgStateIncludeFiles
+ addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st }
+ dropLatestIncludeFile st =
+ st { orgStateIncludeFiles = drop 1 $ orgStateIncludeFiles st }
+
instance Default OrgParserState where
def = defaultOrgParserState
defaultOrgParserState :: OrgParserState
defaultOrgParserState = OrgParserState
{ orgStateAnchorIds = []
+ , orgStateEmphasisPreChars = "-\t ('\"{"
+ , orgStateEmphasisPostChars = "-\t\n .,:!?;'\")}["
, orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing
, orgStateExportSettings = def
, orgStateHeaderMap = M.empty
, orgStateIdentifiers = Set.empty
+ , orgStateIncludeFiles = []
, orgStateLastForbiddenCharPos = Nothing
, orgStateLastPreCharPos = Nothing
, orgStateLastStrPos = Nothing
, orgStateLinkFormatters = M.empty
+ , orgStateMacros = M.empty
+ , orgStateMacroDepth = 0
, orgStateMeta = return nullMeta
, orgStateNotes' = []
, orgStateOptions = def
, orgStateParserContext = NullState
, orgStateTodoSequences = []
+ , orgLogMessages = []
+ , orgMacros = M.empty
}
optionsToParserState :: ReaderOptions -> OrgParserState
@@ -177,6 +220,15 @@ activeTodoSequences st =
activeTodoMarkers :: OrgParserState -> TodoSequence
activeTodoMarkers = concat . activeTodoSequences
+lookupMacro :: String -> OrgParserState -> Maybe MacroExpander
+lookupMacro macroName = M.lookup macroName . orgStateMacros
+
+registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState
+registerMacro (name, expander) st =
+ let curMacros = orgStateMacros st
+ in st{ orgStateMacros = M.insert name expander curMacros }
+
+
--
-- Export Settings
@@ -191,20 +243,22 @@ data ArchivedTreesOption =
-- | Export settings <http://orgmode.org/manual/Export-settings.html>
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
- { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
+ { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
+ , exportDrawers :: Either [String] [String]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
- , exportEmphasizedText :: Bool -- ^ Parse emphasized text
- , exportHeadlineLevels :: Int
+ , exportEmphasizedText :: Bool -- ^ Parse emphasized text
+ , exportHeadlineLevels :: Int
-- ^ Maximum depth of headlines, deeper headlines are convert to list
- , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
- , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
- , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
- , exportWithAuthor :: Bool -- ^ Include author in final meta-data
- , exportWithCreator :: Bool -- ^ Include creator in final meta-data
- , exportWithEmail :: Bool -- ^ Include email in final meta-data
+ , exportPreserveBreaks :: Bool -- ^ Whether to preserve linebreaks
+ , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
+ , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
+ , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
+ , exportWithAuthor :: Bool -- ^ Include author in final meta-data
+ , exportWithCreator :: Bool -- ^ Include creator in final meta-data
+ , exportWithEmail :: Bool -- ^ Include email in final meta-data
+ , exportWithTags :: Bool -- ^ Keep tags as part of headlines
, exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
}
@@ -217,43 +271,13 @@ defaultExportSettings = ExportSettings
, exportDrawers = Left ["LOGBOOK"]
, exportEmphasizedText = True
, exportHeadlineLevels = 3
- , exportSmartQuotes = True
+ , exportPreserveBreaks = False
+ , exportSmartQuotes = False
, exportSpecialStrings = True
, exportSubSuperscripts = True
, exportWithAuthor = True
, exportWithCreator = True
, exportWithEmail = True
+ , exportWithTags = True
, exportWithTodoKeywords = True
}
-
-
---
--- Parser state reader
---
-
--- | Reader monad wrapping the parser state. This is used to delay evaluation
--- until all relevant information has been parsed and made available in the
--- parser state. See also the newtype of the same name in
--- Text.Pandoc.Parsing.
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Functor, Applicative, Monad)
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
-
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
-
-askF :: F OrgParserState
-askF = F ask
-
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
-returnF :: Monad m => a -> m (F a)
-returnF = return . return
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 95415f823..36420478b 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.Parsing
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -31,6 +31,8 @@ functions are adapted to Org-mode specific functionality.
module Text.Pandoc.Readers.Org.Parsing
( OrgParser
, anyLine
+ , anyLineNewline
+ , indentWith
, blanklines
, newline
, parseFromString
@@ -70,8 +72,11 @@ module Text.Pandoc.Readers.Org.Parsing
, dash
, ellipses
, citeKey
+ , gridTableWith
+ , insertIncludedFileF
-- * Re-exports from Text.Pandoc.Parsec
, runParser
+ , runParserT
, getInput
, char
, letter
@@ -107,24 +112,24 @@ module Text.Pandoc.Readers.Org.Parsing
, getPosition
) where
-import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
+ parseFromString)
import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
- , parseFromString )
-import Control.Monad ( guard )
-import Control.Monad.Reader ( Reader )
+import Control.Monad (guard)
+import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
-anyLine :: OrgParser String
+anyLine :: Monad m => OrgParser m String
anyLine =
P.anyLine
<* updateLastPreCharPos
@@ -132,7 +137,7 @@ anyLine =
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
parseFromString parser str' = do
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
@@ -141,33 +146,34 @@ parseFromString parser str' = do
return result
-- | Skip one or more tab or space characters.
-skipSpaces1 :: OrgParser ()
+skipSpaces1 :: Monad m => OrgParser m ()
skipSpaces1 = skipMany1 spaceChar
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: OrgParser Char
+newline :: Monad m => OrgParser m Char
newline =
P.newline
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: OrgParser [Char]
+blanklines :: Monad m => OrgParser m [Char]
blanklines =
P.blanklines
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
-- | Succeeds when we're in list context.
-inList :: OrgParser ()
+inList :: Monad m => OrgParser m ()
inList = do
ctx <- orgStateParserContext <$> getState
guard (ctx == ListItemState)
-- | Parse in different context
-withContext :: ParserContext -- ^ New parser context
- -> OrgParser a -- ^ Parser to run in that context
- -> OrgParser a
+withContext :: Monad m
+ => ParserContext -- ^ New parser context
+ -> OrgParser m a -- ^ Parser to run in that context
+ -> OrgParser m a
withContext context parser = do
oldContext <- orgStateParserContext <$> getState
updateState $ \s -> s{ orgStateParserContext = context }
@@ -180,19 +186,19 @@ withContext context parser = do
--
-- | Get an export setting.
-getExportSetting :: (ExportSettings -> a) -> OrgParser a
+getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a
getExportSetting s = s . orgStateExportSettings <$> getState
-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
-updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos :: Monad m => OrgParser m ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-- | Set the current parser position as the position at which a character was
-- seen which allows inline markup to follow.
-updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos :: Monad m => OrgParser m ()
updateLastPreCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
@@ -201,15 +207,15 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
-orgArgKey :: OrgParser String
+orgArgKey :: Monad m => OrgParser m String
orgArgKey = try $
skipSpaces *> char ':'
*> many1 orgArgWordChar
-- | Read the value of a plist style key-value list.
-orgArgWord :: OrgParser String
+orgArgWord :: Monad m => OrgParser m String
orgArgWord = many1 orgArgWordChar
-- | Chars treated as part of a word in plists.
-orgArgWordChar :: OrgParser Char
+orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 8c87cfa25..cba72cc07 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ Module : Text.Pandoc.Readers.Org.Shared
+ Copyright : Copyright (C) 2014-2018 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,14 +29,12 @@ Utility functions used in other Pandoc Org modules.
module Text.Pandoc.Readers.Org.Shared
( cleanLinkString
, isImageFilename
- , rundocBlockClass
- , toRundocAttrib
+ , originalLang
, translateLang
) where
-import Control.Arrow ( first )
-import Data.Char ( isAlphaNum )
-import Data.List ( isPrefixOf, isSuffixOf )
+import Data.Char (isAlphaNum)
+import Data.List (isPrefixOf, isSuffixOf)
-- | Check whether the given string looks like the path to of URL of an image.
@@ -58,8 +56,8 @@ cleanLinkString s =
'.':'/':_ -> Just s -- relative path
'.':'.':'/':_ -> Just s -- relative path
-- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
+ 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s'
+ _ | isUrl s -> Just s -- URL
_ -> Nothing
where
isUrl :: String -> Bool
@@ -68,17 +66,17 @@ cleanLinkString s =
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
+-- | Creates an key-value pair marking the original language name specified for
+-- a piece of source code.
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
--- | Prefix the name of a attribute, marking it as a code execution parameter.
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first (rundocPrefix ++)
+-- | Creates an key-value attributes marking the original language name
+-- specified for a piece of source code.
+originalLang :: String -> [(String, String)]
+originalLang lang =
+ let transLang = translateLang lang
+ in if transLang == lang
+ then []
+ else [("org-language", lang)]
-- | Translate from Org-mode's programming language identifiers to those used
-- by Pandoc. This is useful to allow for proper syntax highlighting in
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e05b6cba2..e88d997f0 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,38 +30,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.RST (
- readRST,
- readRSTWithWarnings
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder (setMeta, fromList)
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
-import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero )
-import Data.List ( findIndex, intercalate,
- transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
-import Data.Maybe (fromMaybe)
+module Text.Pandoc.Readers.RST ( readRST ) where
+import Control.Arrow (second)
+import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
+import Control.Monad.Except (throwError)
+import Control.Monad.Identity (Identity (..))
+import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
+ nub, sort, transpose, union)
import qualified Data.Map as M
-import Text.Printf ( printf )
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import qualified Text.Pandoc.Builder as B
-import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit, isSpace)
+import Data.Maybe (fromMaybe, isJust)
import Data.Monoid ((<>))
+import Data.Sequence (ViewR (..), viewr)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs)
+import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
+import Text.Pandoc.Definition
import Text.Pandoc.Error
+import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Printf (printf)
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
-
-readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
-readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+-- TODO:
+-- [ ] .. parsed-literal
-type RSTParser = Parser [Char] ParserState
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readRST opts s = do
+ parsed <- readWithM parseRST def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+type RSTParser m = ParserT [Char] ParserState m
--
-- Constants and data structure definitions
@@ -87,9 +100,9 @@ isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level attr text):rest) =
- (Header (level - num) attr text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num (Header level attr text:rest) =
+ Header (level - num) attr text:promoteHeaders num rest
+promoteHeaders num (other:rest) = other:promoteHeaders num rest
promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
@@ -101,11 +114,11 @@ titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
titleTransform (bs, meta) =
let (bs', meta') =
case bs of
- ((Header 1 _ head1):(Header 2 _ head2):rest)
+ (Header 1 _ head1:Header 2 _ head2:rest)
| not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
(promoteHeaders 2 rest, setMeta "title" (fromList head1) $
setMeta "subtitle" (fromList head2) meta)
- ((Header 1 _ head1):rest)
+ (Header 1 _ head1:rest)
| not (any (isHeader 1) rest) -> -- title only
(promoteHeaders 1 rest,
setMeta "title" (fromList head1) meta)
@@ -121,8 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
$ M.adjust toPlain "date"
$ M.adjust toPlain "title"
- $ M.mapKeys (\k -> if k == "authors" then "author" else k)
- $ metamap
+ $ M.mapKeys (\k ->
+ if k == "authors"
+ then "author"
+ else k) metamap
toPlain (MetaBlocks [Para xs]) = MetaInlines xs
toPlain x = x
splitAuthors (MetaBlocks [Para xs])
@@ -131,6 +146,12 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
splitAuthors x = x
splitAuthors' = map normalizeSpaces .
splitOnSemi . concatMap factorSemi
+ normalizeSpaces = reverse . dropWhile isSp . reverse .
+ dropWhile isSp
+ isSp Space = True
+ isSp SoftBreak = True
+ isSp LineBreak = True
+ isSp _ = False
splitOnSemi = splitBy (==Str ";")
factorSemi (Str []) = []
factorSemi (Str s) = case break (==';') s of
@@ -141,41 +162,61 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
factorSemi (Str ys)
factorSemi x = [x]
-parseRST :: RSTParser Pandoc
+parseRST :: PandocMonad m => RSTParser m Pandoc
parseRST = do
optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- concat <$>
- manyTill (referenceKey <|> noteBlock <|> lineClump) eof
+ manyTill (referenceKey <|> anchorDef <|>
+ noteBlock <|> citationBlock <|>
+ headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ updateState $ \s -> s { stateNotes = reverse reversedNotes
+ , stateHeaders = mempty
+ , stateIdentifiers = mempty }
-- now parse it for real...
blocks <- B.toList <$> parseBlocks
+ citations <- (sort . M.toList . stateCitations) <$> getState
+ citationItems <- mapM parseCitation citations
+ let refBlock = if null citationItems
+ then []
+ else [Div ("citations",[],[]) $
+ B.toList $ B.definitionList citationItems]
standalone <- getOption readerStandalone
state <- getState
let meta = stateMeta state
let (blocks', meta') = if standalone
then titleTransform (blocks, meta)
else (blocks, meta)
- return $ Pandoc meta' blocks'
+ reportLogMessages
+ return $ Pandoc meta' (blocks' ++ refBlock)
+
+parseCitation :: PandocMonad m
+ => (String, String) -> RSTParser m (Inlines, [Blocks])
+parseCitation (ref, raw) = do
+ contents <- parseFromString' parseBlocks raw
+ return (B.spanWith (ref, ["citation-label"], []) (B.str ref),
+ [contents])
+
--
-- parsing blocks
--
-parseBlocks :: RSTParser Blocks
+parseBlocks :: PandocMonad m => RSTParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: RSTParser Blocks
+block :: PandocMonad m => RSTParser m Blocks
block = choice [ codeBlock
, blockQuote
, fieldList
, directive
+ , anchor
, comment
, header
, hrule
@@ -191,7 +232,7 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: Int -> RSTParser (String, String)
+rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
rawFieldListItem minIndent = try $ do
indent <- length <$> many (char ' ')
guard $ indent >= minIndent
@@ -204,15 +245,15 @@ rawFieldListItem minIndent = try $ do
let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
return (name, raw)
-fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
+fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
term <- parseInlineFromString name
- contents <- parseFromString parseBlocks raw
+ contents <- parseFromString' parseBlocks raw
optional blanklines
return (term, [contents])
-fieldList :: RSTParser Blocks
+fieldList :: PandocMonad m => RSTParser m Blocks
fieldList = try $ do
indent <- length <$> lookAhead (many spaceChar)
items <- many1 $ fieldListItem indent
@@ -224,43 +265,62 @@ fieldList = try $ do
-- line block
--
-lineBlock :: RSTParser Blocks
+lineBlock :: PandocMonad m => RSTParser m Blocks
lineBlock = try $ do
lines' <- lineBlockLines
lines'' <- mapM parseInlineFromString lines'
return $ B.lineBlock lines''
+lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
+lineBlockDirective body = do
+ lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
+ return $ B.lineBlock lines'
+
--
-- paragraph block
--
-- note: paragraph can end in a :: starting a code block
-para :: RSTParser Blocks
+para :: PandocMonad m => RSTParser m Blocks
para = try $ do
result <- trimInlines . mconcat <$> many1 inline
option (B.plain result) $ try $ do
newline
blanklines
case viewr (B.unMany result) of
- ys :> (Str xs) | "::" `isSuffixOf` xs -> do
+ ys :> Str xs | "::" `isSuffixOf` xs -> do
raw <- option mempty codeBlockBody
return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
<> raw
_ -> return (B.para result)
-plain :: RSTParser Blocks
+plain :: PandocMonad m => RSTParser m Blocks
plain = B.plain . trimInlines . mconcat <$> many1 inline
--
-- header blocks
--
-header :: RSTParser Blocks
+header :: PandocMonad m => RSTParser m Blocks
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader :: RSTParser Blocks
-doubleHeader = try $ do
+doubleHeader :: PandocMonad m => RSTParser m Blocks
+doubleHeader = do
+ (txt, c) <- doubleHeader'
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case elemIndex (DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], length headerTable + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ attr <- registerHeader nullAttr txt
+ return $ B.headerWith attr level txt
+
+doubleHeader' :: PandocMonad m => RSTParser m (Inlines, Char)
+doubleHeader' = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
@@ -268,48 +328,45 @@ doubleHeader = try $ do
newline
txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else return ()
+ let len = sourceColumn pos - 1
+ when (len > lenTop) $ fail "title longer than border"
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
+ return (txt, c)
+
+-- a header with line on the bottom only
+singleHeader :: PandocMonad m => RSTParser m Blocks
+singleHeader = do
+ (txt, c) <- singleHeader'
state <- getState
let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ let (headerTable',level) = case elemIndex (SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], length headerTable + 1)
setState (state { stateHeaderTable = headerTable' })
attr <- registerHeader nullAttr txt
return $ B.headerWith attr level txt
--- a header with line on the bottom only
-singleHeader :: RSTParser Blocks
-singleHeader = try $ do
+singleHeader' :: PandocMonad m => RSTParser m (Inlines, Char)
+singleHeader' = try $ do
notFollowedBy' whitespace
- txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
+ lookAhead $ anyLine >> oneOf underlineChars
+ txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
pos <- getPosition
- let len = (sourceColumn pos) - 1
+ let len = sourceColumn pos - 1
blankline
c <- oneOf underlineChars
count (len - 1) (char c)
many (char c)
blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- attr <- registerHeader nullAttr txt
- return $ B.headerWith attr level txt
+ return (txt, c)
--
-- hrule block
--
-hrule :: Parser [Char] st Blocks
+hrule :: Monad m => ParserT [Char] st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -323,14 +380,14 @@ hrule = try $ do
--
-- read a line indented by a given string
-indentedLine :: String -> Parser [Char] st [Char]
+indentedLine :: Monad m => String -> ParserT [Char] st m [Char]
indentedLine indents = try $ do
string indents
anyLine
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
-indentedBlock :: Parser [Char] st [Char]
+indentedBlock :: Monad m => ParserT [Char] st m [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many1 $ try $ do b <- option "" blanklines
@@ -339,24 +396,24 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
-quotedBlock :: Parser [Char] st [Char]
+quotedBlock :: Monad m => ParserT [Char] st m [Char]
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ unlines lns
-codeBlockStart :: Parser [Char] st Char
+codeBlockStart :: Monad m => ParserT [Char] st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: Parser [Char] st Blocks
+codeBlock :: Monad m => ParserT [Char] st m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: Parser [Char] st Blocks
+codeBlockBody :: Monad m => ParserT [Char] st m Blocks
codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
(indentedBlock <|> quotedBlock)
-lhsCodeBlock :: RSTParser Blocks
+lhsCodeBlock :: Monad m => RSTParser m Blocks
lhsCodeBlock = try $ do
getPosition >>= guard . (==1) . sourceColumn
guardEnabled Ext_literate_haskell
@@ -366,14 +423,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
$ intercalate "\n" lns
-latexCodeBlock :: Parser [Char] st [[Char]]
+latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Parser [Char] st [[Char]]
+birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@@ -381,99 +438,159 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (drop 1) lns
else lns
-birdTrackLine :: Parser [Char] st [Char]
+birdTrackLine :: Monad m => ParserT [Char] st m [Char]
birdTrackLine = char '>' >> anyLine
--
-- block quotes
--
-blockQuote :: RSTParser Blocks
+blockQuote :: PandocMonad m => RSTParser m Blocks
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ raw ++ "\n\n"
return $ B.blockQuote contents
+{-
+Unsupported options for include:
+tab-width
+encoding
+-}
+
+includeDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+includeDirective top fields body = do
+ let f = trim top
+ guard $ not (null f)
+ guard $ null (trim body)
+ -- options
+ let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
+ let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
+ oldPos <- getPosition
+ oldInput <- getInput
+ containers <- stateContainers <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ updateState $ \s -> s{ stateContainers = f : stateContainers s }
+ mbContents <- readFileFromDirs ["."] f
+ contentLines <- case mbContents of
+ Just s -> return $ lines s
+ Nothing -> do
+ logMessage $ CouldNotLoadIncludeFile f oldPos
+ return []
+ let numLines = length contentLines
+ let startLine' = case startLine of
+ Nothing -> 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let endLine' = case endLine of
+ Nothing -> numLines + 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let contentLines' = drop (startLine' - 1)
+ $ take (endLine' - 1) contentLines
+ let contentLines'' = (case trim <$> lookup "end-before" fields of
+ Just patt -> takeWhile (not . (patt `isInfixOf`))
+ Nothing -> id) .
+ (case trim <$> lookup "start-after" fields of
+ Just patt -> drop 1 .
+ dropWhile (not . (patt `isInfixOf`))
+ Nothing -> id) $ contentLines'
+ let contents' = unlines contentLines'' ++ "\n"
+ case lookup "code" fields of
+ Just lang -> do
+ let numberLines = lookup "number-lines" fields
+ let classes = trimr lang : ["numberLines" | isJust numberLines] ++
+ maybe [] words (lookup "class" fields)
+ let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines
+ let ident = maybe "" trimr $ lookup "name" fields
+ let attribs = (ident, classes, kvs)
+ return $ B.codeBlockWith attribs contents'
+ Nothing -> case lookup "literal" fields of
+ Just _ -> return $ B.rawBlock "rst" contents'
+ Nothing -> do
+ setPosition $ newPos f 1 1
+ setInput contents'
+ bs <- optional blanklines >>
+ (mconcat <$> many block)
+ setInput oldInput
+ setPosition oldPos
+ updateState $ \s -> s{ stateContainers =
+ tail $ stateContainers s }
+ return bs
+
+
--
-- list blocks
--
-list :: RSTParser Blocks
+list :: PandocMonad m => RSTParser m Blocks
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-definitionListItem :: RSTParser (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks])
definitionListItem = try $ do
-- avoid capturing a directive or comment
notFollowedBy (try $ char '.' >> char '.')
term <- trimInlines . mconcat <$> many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n"
+ contents <- parseFromString' parseBlocks $ raw ++ "\n"
return (term, [contents])
-definitionList :: RSTParser Blocks
+definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Parser [Char] st Int
+bulletListStart :: Monad m => ParserT [Char] st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
+ white <- many1 spaceChar <|> "" <$ lookAhead (char '\n')
return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart :: ListNumberStyle
+orderedListStart :: Monad m => ListNumberStyle
-> ListNumberDelim
- -> RSTParser Int
+ -> RSTParser m Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
- white <- many1 spaceChar
+ white <- many1 spaceChar <|> "" <$ lookAhead (char '\n')
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Int -> RSTParser [Char]
+listLine :: Monad m => Int -> RSTParser m [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
- line <- anyLine
- return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> RSTParser [Char]
-indentWith num = do
- tabStop <- getOption readerTabStop
- if (num < tabStop)
- then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+ anyLineNewline
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: RSTParser Int
- -> RSTParser (Int, [Char])
+rawListItem :: Monad m => RSTParser m Int
+ -> RSTParser m (Int, [Char])
rawListItem start = try $ do
markerLength <- start
- firstLine <- anyLine
+ firstLine <- anyLineNewline
restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+ return (markerLength, firstLine ++ concat restLines)
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Int -> RSTParser [Char]
+listContinuation :: Monad m => Int -> RSTParser m [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
-listItem :: RSTParser Int
- -> RSTParser Blocks
+listItem :: PandocMonad m
+ => RSTParser m Int
+ -> RSTParser m Blocks
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline <* lookAhead start),
- many1 blankline ] -- whole list must end with blank.
+ skipMany1 blankline <|> () <$ lookAhead start
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -481,52 +598,52 @@ listItem start = try $ do
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+ parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n"
updateState (\st -> st {stateParserContext = oldContext})
return $ case B.toList parsed of
- [Para xs] -> B.singleton $ Plain xs
- [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys]
- [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys]
- [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
+ [Para xs] ->
+ B.singleton $ Plain xs
+ [Para xs, BulletList ys] ->
+ B.fromList [Plain xs, BulletList ys]
+ [Para xs, OrderedList s ys] ->
+ B.fromList [Plain xs, OrderedList s ys]
+ [Para xs, DefinitionList ys] ->
+ B.fromList [Plain xs, DefinitionList ys]
_ -> parsed
-orderedList :: RSTParser Blocks
+orderedList :: PandocMonad m => RSTParser m Blocks
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify' items
+ let items' = compactify items
return $ B.orderedListWith (start, style, delim) items'
-bulletList :: RSTParser Blocks
-bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+bulletList :: PandocMonad m => RSTParser m Blocks
+bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart)
--
-- directive (e.g. comment, container, compound-paragraph)
--
-comment :: RSTParser Blocks
+comment :: Monad m => RSTParser m Blocks
comment = try $ do
string ".."
skipMany1 spaceChar <|> (() <$ lookAhead newline)
- notFollowedBy' directiveLabel
+ -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
manyTill anyChar blanklines
optional indentedBlock
return mempty
-directiveLabel :: RSTParser String
+directiveLabel :: Monad m => RSTParser m String
directiveLabel = map toLower
<$> many1Till (letter <|> char '-') (try $ string "::")
-directive :: RSTParser Blocks
+directive :: PandocMonad m => RSTParser m Blocks
directive = try $ do
string ".."
directive'
--- TODO: line-block, parsed-literal, table, csv-table, list-table
--- date
--- include
--- title
-directive' :: RSTParser Blocks
+directive' :: PandocMonad m => RSTParser m Blocks
directive' = do
skipMany1 spaceChar
label <- directiveLabel
@@ -541,43 +658,71 @@ directive' = do
body <- option "" $ try $ blanklines >> indentedBlock
optional blanklines
let body' = body ++ "\n\n"
- imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height")
+ name = trim $ fromMaybe "" (lookup "name" fields)
+ imgAttr cl = ("", classes, widthAttr ++ heightAttr)
where
- classes = words $ maybe "" trim $ lookup cl fields
- getAtt k = case lookup k fields of
- Just v -> [(k, filter (not . isSpace) v)]
- Nothing -> []
+ classes = words $ maybe "" trim (lookup cl fields) ++
+ maybe "" (\x -> "align-" ++ trim x)
+ (lookup "align" fields)
+ scale = case trim <$> lookup "scale" fields of
+ Just v -> case reverse v of
+ '%':vv ->
+ case safeRead (reverse vv) of
+ Just (percent :: Double)
+ -> percent / 100.0
+ Nothing -> 1.0
+ _ ->
+ case safeRead v of
+ Just (s :: Double) -> s
+ Nothing -> 1.0
+ Nothing -> 1.0
+ widthAttr = maybe [] (\x -> [("width",
+ show $ scaleDimension scale x)])
+ $ lookup "width" fields >>=
+ (lengthToDim . filter (not . isSpace))
+ heightAttr = maybe [] (\x -> [("height",
+ show $ scaleDimension scale x)])
+ $ lookup "height" fields >>=
+ (lengthToDim . filter (not . isSpace))
case label of
+ "include" -> includeDirective top fields body'
+ "table" -> tableDirective top fields body'
+ "list-table" -> listTableDirective top fields body'
+ "csv-table" -> csvTableDirective top fields body'
+ "line-block" -> lineBlockDirective body'
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
- "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
- "container" -> parseFromString parseBlocks body'
+ "role" -> addNewRole top $ map (second trim) fields
+ "container" -> B.divWith (name, "container" : words top, []) <$>
+ parseFromString' parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
parseInlineFromString (trim top)
"unicode" -> B.para <$> -- consumed by substKey
parseInlineFromString (trim $ unicodeTransform top)
- "compound" -> parseFromString parseBlocks body'
- "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
- "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
- "highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
+ "compound" -> parseFromString' parseBlocks body'
+ "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body'
+ "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body'
+ "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body'
"rubric" -> B.para . B.strong <$> parseInlineFromString top
_ | label `elem` ["attention","caution","danger","error","hint",
- "important","note","tip","warning"] ->
- do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
- return $ B.divWith ("",["admonition", label],[]) bod
- "admonition" ->
- do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
- return $ B.divWith ("",["admonition"],[]) bod
+ "important","note","tip","warning","admonition"] ->
+ do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ let lab = case label of
+ "admonition" -> mempty
+ (l:ls) -> B.divWith ("",["admonition-title"],[])
+ (B.para (B.str (toUpper l : ls)))
+ [] -> mempty
+ return $ B.divWith ("",[label],[]) (lab <> bod)
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
tit <- B.para . B.strong <$> parseInlineFromString
(trim top ++ if null subtit
then ""
else (": " ++ subtit))
- bod <- parseFromString parseBlocks body'
+ bod <- parseFromString' parseBlocks body'
return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
"topic" ->
do tit <- B.para . B.strong <$> parseInlineFromString top
- bod <- parseFromString parseBlocks body'
+ bod <- parseFromString' parseBlocks body'
return $ B.divWith ("",["topic"],[]) $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
s { stateRstDefaultRole =
@@ -588,14 +733,15 @@ directive' = do
codeblock (words $ fromMaybe [] $ lookup "class" fields)
(lookup "number-lines" fields) (trim top) body
"aafig" -> do
- let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields)
+ let attribs = ("", ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
"math" -> return $ B.para $ mconcat $ map B.displayMath
$ toChunks $ top ++ "\n\n" ++ body
"figure" -> do
- (caption, legend) <- parseFromString extractCaption body'
+ (caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
- return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend
+ return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
+ caption) <> legend
"image" -> do
let src = escapeURI $ trim top
let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -606,28 +752,150 @@ directive' = do
$ B.imageWith attr src "" alt
Nothing -> B.imageWith attr src "" alt
"class" -> do
- let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ let attrs = ("", splitBy isSpace $ trim top,
+ map (second trimr) fields)
-- directive content or the first immediately following element
children <- case body of
"" -> block
- _ -> parseFromString parseBlocks body'
+ _ -> parseFromString' parseBlocks body'
return $ B.divWith attrs children
other -> do
pos <- getPosition
- addWarning (Just pos) $ "ignoring unknown directive: " ++ other
- return mempty
+ logMessage $ SkippedContent (".. " ++ other) pos
+ bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ return $ B.divWith ("",[other],[]) bod
+
+tableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String -> RSTParser m Blocks
+tableDirective top fields body = do
+ bs <- parseFromString' parseBlocks body
+ case B.toList bs of
+ [Table _ aligns' widths' header' rows'] -> do
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ columns <- getOption readerColumns
+ let numOfCols = length header'
+ let normWidths ws =
+ map (/ max 1.0 (fromIntegral (columns - numOfCols))) ws
+ let widths = case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0.0
+ Just "grid" -> widths'
+ Just specs -> normWidths
+ $ map (fromMaybe (0 :: Double) . safeRead)
+ $ splitBy (`elem` (" ," :: String)) specs
+ Nothing -> widths'
+ -- align is not applicable since we can't represent whole table align
+ return $ B.singleton $ Table (B.toList title)
+ aligns' widths header' rows'
+ _ -> return mempty
+
+
+-- TODO: :stub-columns:.
+-- Only the first row becomes the header even if header-rows: > 1,
+-- since Pandoc doesn't support a table with multiple header rows.
+-- We don't need to parse :align: as it represents the whole table align.
+listTableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+listTableDirective top fields body = do
+ bs <- parseFromString' parseBlocks body
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ let rows = takeRows $ B.toList bs
+ headerRowsNum = fromMaybe (0 :: Int) $
+ lookup "header-rows" fields >>= safeRead
+ (headerRow,bodyRows,numOfCols) = case rows of
+ x:xs -> if headerRowsNum > 0
+ then (x, xs, length x)
+ else ([], rows, length x)
+ _ -> ([],[],0)
+ widths = case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0
+ Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
+ splitBy (`elem` (" ," :: String)) specs
+ _ -> replicate numOfCols 0
+ return $ B.table title
+ (zip (replicate numOfCols AlignDefault) widths)
+ headerRow
+ bodyRows
+ where takeRows [BulletList rows] = map takeCells rows
+ takeRows _ = []
+ takeCells [BulletList cells] = map B.fromList cells
+ takeCells _ = []
+ normWidths ws = map (/ max 1 (sum ws)) ws
+
+csvTableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String
+ -> RSTParser m Blocks
+csvTableDirective top fields rawcsv = do
+ let explicitHeader = trim <$> lookup "header" fields
+ let opts = defaultCSVOptions{
+ csvDelim = case trim <$> lookup "delim" fields of
+ Just "tab" -> '\t'
+ Just "space" -> ' '
+ Just [c] -> c
+ _ -> ','
+ , csvQuote = case trim <$> lookup "quote" fields of
+ Just [c] -> c
+ _ -> '"'
+ , csvEscape = case trim <$> lookup "escape" fields of
+ Just [c] -> Just c
+ _ -> Nothing
+ , csvKeepSpace = case trim <$> lookup "keepspace" fields of
+ Just "true" -> True
+ _ -> False
+ }
+ let headerRowsNum = fromMaybe (case explicitHeader of
+ Just _ -> 1 :: Int
+ Nothing -> 0 :: Int) $
+ lookup "header-rows" fields >>= safeRead
+ rawcsv' <- case trim <$>
+ lookup "file" fields `mplus` lookup "url" fields of
+ Just u -> do
+ (bs, _) <- fetchItem u
+ return $ UTF8.toString bs
+ Nothing -> return rawcsv
+ let res = parseCSV opts (T.pack $ case explicitHeader of
+ Just h -> h ++ "\n" ++ rawcsv'
+ Nothing -> rawcsv')
+ case res of
+ Left e ->
+ throwError $ PandocParsecError "csv table" e
+ Right rawrows -> do
+ let parseCell = parseFromString' (plain <|> return mempty) . T.unpack
+ let parseRow = mapM parseCell
+ rows <- mapM parseRow rawrows
+ let (headerRow,bodyRows,numOfCols) =
+ case rows of
+ x:xs -> if headerRowsNum > 0
+ then (x, xs, length x)
+ else ([], rows, length x)
+ _ -> ([],[],0)
+ title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+ let normWidths ws = map (/ max 1 (sum ws)) ws
+ let widths =
+ case trim <$> lookup "widths" fields of
+ Just "auto" -> replicate numOfCols 0
+ Just specs -> normWidths
+ $ map (fromMaybe (0 :: Double) . safeRead)
+ $ splitBy (`elem` (" ," :: String)) specs
+ _ -> replicate numOfCols 0
+ return $ B.table title
+ (zip (replicate numOfCols AlignDefault) widths)
+ headerRow
+ bodyRows
-- TODO:
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
-addNewRole :: String -> [(String, String)] -> RSTParser Blocks
+addNewRole :: PandocMonad m
+ => String -> [(String, String)] -> RSTParser m Blocks
addNewRole roleString fields = do
- (role, parentRole) <- parseFromString inheritedRole roleString
+ pos <- getPosition
+ (role, parentRole) <- parseFromString' inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
let getBaseRole (r, f, a) roles =
case M.lookup r roles of
Just (r', f', a') -> getBaseRole (r', f', a') roles
- Nothing -> (r, f, a)
+ Nothing -> (r, f, a)
(baseRole, baseFmt, baseAttr) =
getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
@@ -641,34 +909,32 @@ addNewRole roleString fields = do
in (ident, nub . (role :) . annotate $ classes, keyValues)
-- warn about syntax we ignore
- flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (baseRole /= "code") $ addWarning Nothing $
- "ignoring :language: field because the parent of role :" ++
- role ++ ": is :" ++ baseRole ++ ": not :code:"
- "format" -> when (baseRole /= "raw") $ addWarning Nothing $
- "ignoring :format: field because the parent of role :" ++
- role ++ ": is :" ++ baseRole ++ ": not :raw:"
- _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
- ": in definition of role :" ++ role ++ ": in"
+ forM_ fields $ \(key, _) -> case key of
+ "language" -> when (baseRole /= "code") $ logMessage $
+ SkippedContent ":language: [because parent of role is not :code:]"
+ pos
+ "format" -> when (baseRole /= "raw") $ logMessage $
+ SkippedContent ":format: [because parent of role is not :raw:]" pos
+ _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
when (parentRole == "raw" && countKeys "format" > 1) $
- addWarning Nothing $
- "ignoring :format: fields after the first in the definition of role :"
- ++ role ++": in"
+ logMessage $ SkippedContent
+ ":format: [after first in definition of role]"
+ pos
when (parentRole == "code" && countKeys "language" > 1) $
- addWarning Nothing $
- "ignoring :language: fields after the first in the definition of role :"
- ++ role ++": in"
+ logMessage $ SkippedContent
+ ":language: [after first in definition of role]" pos
updateState $ \s -> s {
stateRstCustomRoles =
M.insert role (baseRole, fmt, attr) customRoles
}
- return $ B.singleton Null
+ return mempty
where
countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
- (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')')
+ <|> pure "span")
-- Can contain character codes as decimal numbers or
@@ -700,24 +966,29 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
where (ds,rest) = span isHexDigit s
mbc = safeRead ('\'':'\\':'x':ds ++ "'")
-extractCaption :: RSTParser (Inlines, Blocks)
+extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
extractCaption = do
capt <- trimInlines . mconcat <$> many inline
legend <- optional blanklines >> (mconcat <$> many block)
return (capt,legend)
--- divide string by blanklines
+-- divide string by blanklines, and surround with
+-- \begin{aligned}...\end{aligned} if needed.
toChunks :: String -> [String]
toChunks = dropWhile null
- . map (trim . unlines)
+ . map (addAligned . trim . unlines)
. splitBy (all (`elem` (" \t" :: String))) . lines
+ -- we put this in an aligned environment if it contains \\, see #4254
+ where addAligned s = if "\\\\" `isInfixOf` s
+ then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}"
+ else s
-codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks
+codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
codeblock classes numberLines lang body =
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
where attribs = ("", classes', kvs)
classes' = "sourceCode" : lang
- : maybe [] (\_ -> ["numberLines"]) numberLines
+ : maybe [] (const ["numberLines"]) numberLines
++ classes
kvs = case numberLines of
Just "" -> []
@@ -728,30 +999,52 @@ codeblock classes numberLines lang body =
--- note block
---
-noteBlock :: RSTParser [Char]
+noteBlock :: Monad m => RSTParser m [Char]
noteBlock = try $ do
+ (ref, raw, replacement) <- noteBlock' noteMarker
+ updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s }
+ -- return blanks so line count isn't affected
+ return replacement
+
+citationBlock :: Monad m => RSTParser m [Char]
+citationBlock = try $ do
+ (ref, raw, replacement) <- noteBlock' citationMarker
+ updateState $ \s ->
+ s { stateCitations = M.insert ref raw (stateCitations s),
+ stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[]))
+ (stateKeys s) }
+ -- return blanks so line count isn't affected
+ return replacement
+
+noteBlock' :: Monad m
+ => RSTParser m String -> RSTParser m (String, String, String)
+noteBlock' marker = try $ do
startPos <- getPosition
string ".."
spaceChar >> skipMany spaceChar
- ref <- noteMarker
+ ref <- marker
first <- (spaceChar >> skipMany spaceChar >> anyLine)
<|> (newline >> return "")
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
- let newnote = (ref, raw)
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return (ref, raw, replacement)
+
+citationMarker :: Monad m => RSTParser m [Char]
+citationMarker = do
+ char '['
+ res <- simpleReferenceName
+ char ']'
+ return res
-noteMarker :: RSTParser [Char]
+noteMarker :: Monad m => RSTParser m [Char]
noteMarker = do
char '['
res <- many1 digit
- <|> (try $ char '#' >> liftM ('#':) simpleReferenceName')
+ <|>
+ try (char '#' >> liftM ('#':) simpleReferenceName)
<|> count 1 (oneOf "#*")
char ']'
return res
@@ -760,39 +1053,26 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: RSTParser Inlines
+quotedReferenceName :: PandocMonad m => RSTParser m String
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- trimInlines . mconcat <$> many1Till inline (char '`')
- return label'
-
-unquotedReferenceName :: RSTParser Inlines
-unquotedReferenceName = try $ do
- label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
- return label'
+ manyTill anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName' :: Parser [Char] st String
-simpleReferenceName' = do
+simpleReferenceName :: Monad m => ParserT [Char] st m String
+simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
- <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
+ <|> try (oneOf "-_:+." <* lookAhead alphaNum)
return (x:xs)
-simpleReferenceName :: Parser [Char] st Inlines
-simpleReferenceName = do
- raw <- simpleReferenceName'
- return $ B.str raw
+referenceName :: PandocMonad m => RSTParser m String
+referenceName = quotedReferenceName <|> simpleReferenceName
-referenceName :: RSTParser Inlines
-referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName <* lookAhead (char ':')) <|>
- unquotedReferenceName
-
-referenceKey :: RSTParser [Char]
+referenceKey :: PandocMonad m => RSTParser m [Char]
referenceKey = do
startPos <- getPosition
choice [substKey, anonymousKey, regularKey]
@@ -801,16 +1081,16 @@ referenceKey = do
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-targetURI :: Parser [Char] st [Char]
+targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
- return $ escapeURI $ trim $ contents
+ return $ escapeURI $ trim contents
-substKey :: RSTParser ()
+substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do
string ".."
skipMany1 spaceChar
@@ -826,31 +1106,85 @@ substKey = try $ do
[Para ils] -> return $ B.fromList ils
_ -> mzero
let key = toKey $ stripFirstAndLast ref
- updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
+ updateState $ \s -> s{ stateSubstitutions =
+ M.insert key il $ stateSubstitutions s }
-anonymousKey :: RSTParser ()
+anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
-
-stripTicks :: String -> String
-stripTicks = reverse . stripTick . reverse . stripTick
- where stripTick ('`':xs) = xs
- stripTick xs = xs
-
-regularKey :: RSTParser ()
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames = do
+ let rn = try $ do
+ string ".. _"
+ ref <- quotedReferenceName
+ <|> many ( noneOf ":\n"
+ <|> try (char '\n' <*
+ string " " <*
+ notFollowedBy blankline)
+ <|> try (char ':' <* lookAhead alphaNum)
+ )
+ char ':'
+ return ref
+ first <- rn
+ rest <- many (try (blanklines *> rn))
+ return (first:rest)
+
+regularKey :: PandocMonad m => RSTParser m ()
regularKey = try $ do
- string ".. _"
- (_,ref) <- withRaw referenceName
- char ':'
+ -- we allow several references to the same URL, e.g.
+ -- .. _hello:
+ -- .. _goodbye: url.com
+ refs <- referenceNames
src <- targetURI
- let key = toKey $ stripTicks ref
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+ guard $ not (null src)
+ let keys = map toKey refs
+ forM_ keys $ \key ->
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef = try $ do
+ (refs, raw) <- withRaw $ try (referenceNames <* blanklines)
+ forM_ refs $ \rawkey ->
+ updateState $ \s -> s { stateKeys =
+ M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ -- keep this for 2nd round of parsing, where we'll add the divs (anchor)
+ return raw
+
+anchor :: PandocMonad m => RSTParser m Blocks
+anchor = try $ do
+ refs <- referenceNames
+ blanklines
+ b <- block
+ let addDiv ref = B.divWith (ref, [], [])
+ let emptySpanWithId id' = Span (id',[],[]) []
+ -- put identifier on next block:
+ case B.toList b of
+ [Header lev (_,classes,kvs) txt] ->
+ case reverse refs of
+ [] -> return b
+ (r:rs) -> return $ B.singleton $
+ Header lev (r,classes,kvs)
+ (txt ++ map emptySpanWithId rs)
+ -- we avoid generating divs for headers,
+ -- because it hides them from promoteHeader, see #4240
+ _ -> return $ foldr addDiv b refs
+
+headerBlock :: PandocMonad m => RSTParser m [Char]
+headerBlock = do
+ ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader')
+ (ident,_,_) <- registerHeader nullAttr txt
+ let key = toKey (stringify txt)
+ updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr)
+ $ stateKeys s }
+ return raw
+
--
-- tables
@@ -869,45 +1203,53 @@ regularKey = try $ do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Char -> Parser [Char] st (Int, Int)
+dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
-simpleTableSep :: Char -> RSTParser Char
+simpleTableSep :: Monad m => Char -> RSTParser m Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: RSTParser [Char]
+simpleTableFooter :: Monad m => RSTParser m [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: [Int] -> RSTParser [String]
-simpleTableRawLine indices = do
- line <- many1Till anyChar newline
- return (simpleTableSplitLine indices line)
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
+
+simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLineWithEmptyCell indices = try $ do
+ cs <- simpleTableRawLine indices
+ let isEmptyCell = all (\c -> c == ' ' || c == '\t')
+ guard $ any isEmptyCell cs
+ return cs
-- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: [Int] -> RSTParser [[Block]]
+simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
- colLines <- return [] -- TODO
- let cols = map unlines . transpose $ firstLine : colLines
- mapM (parseFromString (B.toList . mconcat <$> many plain)) cols
+ conLines <- many $ simpleTableRawLineWithEmptyCell indices
+ let cols = map unlines . transpose $ firstLine : conLines ++
+ [replicate (length indices) ""
+ | not (null conLines)]
+ mapM (parseFromString' parseBlocks) cols
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
- map trim
+ map trimr
$ tail $ splitByIndices (init indices) line
-simpleTableHeader :: Bool -- ^ Headerless table
- -> RSTParser ([[Block]], [Alignment], [Int])
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m ([Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -921,26 +1263,37 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $
- map trim rawHeads
+ heads <- mapM ( parseFromString' (mconcat <$> many plain) . trim) rawHeads
return (heads, aligns, indices)
-- Parse a simple table.
-simpleTable :: Bool -- ^ Headerless table
- -> RSTParser Blocks
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ let wrapIdFst (a, b, c) = (Identity a, b, c)
+ wrapId = fmap Identity
+ tbl <- runIdentity <$> tableWith
+ (wrapIdFst <$> simpleTableHeader headless)
+ (wrapId <$> simpleTableRow)
+ sep simpleTableFooter
-- Simple tables get 0s for relative column widths (i.e., use default)
- return $ B.singleton $ Table c a (replicate (length a) 0) h l
+ case B.toList tbl of
+ [Table c a _w h l] -> return $ B.singleton $
+ Table c a (replicate (length a) 0) h l
+ _ ->
+ throwError $ PandocShouldNeverHappenError
+ "tableWith returned something unexpected"
where
sep = return () -- optional (simpleTableSep '-')
-gridTable :: Bool -- ^ Headerless table
- -> RSTParser Blocks
-gridTable headerless = B.singleton
- <$> gridTableWith (B.toList <$> parseBlocks) headerless
+gridTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
+gridTable headerless = runIdentity <$>
+ gridTableWith (Identity <$> parseBlocks) headerless
-table :: RSTParser Blocks
+table :: PandocMonad m => RSTParser m Blocks
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
@@ -948,7 +1301,7 @@ table = gridTable False <|> simpleTable False <|>
-- inline
--
-inline :: RSTParser Inlines
+inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws
, whitespace
, link
@@ -964,29 +1317,30 @@ inline = choice [ note -- can start with whitespace, so try before ws
, escapedChar
, symbol ] <?> "inline"
-parseInlineFromString :: String -> RSTParser Inlines
-parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
+parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
-hyphens :: RSTParser Inlines
+hyphens :: Monad m => RSTParser m Inlines
hyphens = do
result <- many1 (char '-')
optional endline
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Parser [Char] st Inlines
+escapedChar :: Monad m => ParserT [Char] st m Inlines
escapedChar = do c <- escaped anyChar
- return $ if c == ' ' -- '\ ' is null in RST
+ return $ if c == ' ' || c == '\n' || c == '\r'
+ -- '\ ' is null in RST
then mempty
else B.str [c]
-symbol :: RSTParser Inlines
+symbol :: Monad m => RSTParser m Inlines
symbol = do
result <- oneOf specialChars
return $ B.str [result]
-- parses inline code, between codeStart and codeEnd
-code :: RSTParser Inlines
+code :: Monad m => RSTParser m Inlines
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
@@ -994,7 +1348,7 @@ code = try $ do
$ trim $ unwords $ lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
-atStart :: RSTParser a -> RSTParser a
+atStart :: Monad m => RSTParser m a -> RSTParser m a
atStart p = do
pos <- getPosition
st <- getState
@@ -1002,11 +1356,11 @@ atStart p = do
guard $ stateLastStrPos st /= Just pos
p
-emph :: RSTParser Inlines
+emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$>
enclosed (atStart $ char '*') (char '*') inline
-strong :: RSTParser Inlines
+strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$>
enclosed (atStart $ string "**") (try $ string "**") inline
@@ -1018,12 +1372,13 @@ strong = B.strong . trimInlines . mconcat <$>
-- - Classes are silently discarded in addNewRole
-- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
-interpretedRole :: RSTParser Inlines
+interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
(role, contents) <- roleBefore <|> roleAfter
renderRole contents Nothing role nullAttr
-renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
+renderRole :: PandocMonad m
+ => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ B.str contents
"superscript" -> return $ B.superscript $ B.str contents
@@ -1048,10 +1403,8 @@ renderRole contents fmt role attr = case role of
case M.lookup custom customRoles of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
- return $ B.str contents -- Undefined role
+ Nothing -> -- undefined role
+ return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@@ -1061,33 +1414,33 @@ renderRole contents fmt role attr = case role of
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
addClass :: String -> Attr -> Attr
-addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
-roleName :: RSTParser String
+roleName :: PandocMonad m => RSTParser m String
roleName = many1 (letter <|> char '-')
-roleMarker :: RSTParser String
+roleMarker :: PandocMonad m => RSTParser m String
roleMarker = char ':' *> roleName <* char ':'
-roleBefore :: RSTParser (String,String)
+roleBefore :: PandocMonad m => RSTParser m (String,String)
roleBefore = try $ do
role <- roleMarker
contents <- unmarkedInterpretedText
return (role,contents)
-roleAfter :: RSTParser (String,String)
+roleAfter :: PandocMonad m => RSTParser m (String,String)
roleAfter = try $ do
contents <- unmarkedInterpretedText
role <- roleMarker <|> (stateRstDefaultRole <$> getState)
return (role,contents)
-unmarkedInterpretedText :: RSTParser [Char]
+unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
-whitespace :: RSTParser Inlines
+whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
-str :: RSTParser Inlines
+str :: Monad m => RSTParser m Inlines
str = do
let strChar = noneOf ("\t\n " ++ specialChars)
result <- many1 strChar
@@ -1095,26 +1448,24 @@ str = do
return $ B.str result
-- an endline character that can be treated as a space, not a structural break
-endline :: RSTParser Inlines
+endline :: Monad m => RSTParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts at beginning of line differently in a list:
st <- getState
- if (stateParserContext st) == ListItemState
- then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ when (stateParserContext st == ListItemState) $ notFollowedBy (anyOrderedListMarker >> spaceChar) >>
notFollowedBy' bulletListStart
- else return ()
return B.softbreak
--
-- links
--
-link :: RSTParser Inlines
+link :: PandocMonad m => RSTParser m Inlines
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink :: RSTParser Inlines
+explicitLink :: PandocMonad m => RSTParser m Inlines
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
@@ -1128,61 +1479,73 @@ explicitLink = try $ do
then B.str src
else label'
-- `link <google_>` is a reference link to _google!
- (src',tit,attr) <- case reverse src of
- '_':xs -> do
- keyTable <- stateKeys <$> getState
- let key = toKey $ reverse xs
- case M.lookup key keyTable of
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++
- show key
- return ("","",nullAttr)
- Just ((s,t),a) -> return (s,t,a)
- _ -> return (src, "", nullAttr)
+ ((src',tit),attr) <- case reverse src of
+ '_':xs -> lookupKey [] (toKey (reverse xs))
+ _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
-referenceLink :: RSTParser Inlines
+citationName :: PandocMonad m => RSTParser m String
+citationName = do
+ raw <- citationMarker
+ return $ "[" ++ raw ++ "]"
+
+referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
- char '_'
- state <- getState
- let keyTable = stateKeys state
+ ref <- (referenceName <|> citationName) <* char '_'
+ let label' = B.text ref
let isAnonKey (Key ('_':_)) = True
isAnonKey _ = False
- key <- option (toKey $ stripTicks ref) $
+ state <- getState
+ let keyTable = stateKeys state
+ key <- option (toKey ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
- if null anonKeys
- then mzero
- else return (head anonKeys)
- ((src,tit), attr) <- case M.lookup key keyTable of
- Nothing -> do
- pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++
- show key
- return (("",""),nullAttr)
- Just val -> return val
+ case anonKeys of
+ [] -> mzero
+ (k:_) -> return k
+ ((src,tit), attr) <- lookupKey [] key
-- if anonymous link, remove key so it won't be used again
- when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
+ when (isAnonKey key) $ updateState $ \s ->
+ s{ stateKeys = M.delete key keyTable }
return $ B.linkWith attr src tit label'
-autoURI :: RSTParser Inlines
+-- We keep a list of oldkeys so we can detect lookup loops.
+lookupKey :: PandocMonad m
+ => [Key] -> Key -> RSTParser m ((String, String), Attr)
+lookupKey oldkeys key = do
+ pos <- getPosition
+ state <- getState
+ let keyTable = stateKeys state
+ case M.lookup key keyTable of
+ Nothing -> do
+ let Key key' = key
+ logMessage $ ReferenceNotFound key' pos
+ return (("",""),nullAttr)
+ -- check for keys of the form link_, which need to be resolved:
+ Just ((u@(_:_),""),_) | last u == '_' -> do
+ let rawkey = init u
+ let newkey = toKey rawkey
+ if newkey `elem` oldkeys
+ then do
+ logMessage $ CircularReference rawkey pos
+ return (("",""),nullAttr)
+ else lookupKey (key:oldkeys) newkey
+ Just val -> return val
+
+autoURI :: Monad m => RSTParser m Inlines
autoURI = do
(orig, src) <- uri
return $ B.link src "" $ B.str orig
-autoEmail :: RSTParser Inlines
+autoEmail :: Monad m => RSTParser m Inlines
autoEmail = do
(orig, src) <- emailAddress
return $ B.link src "" $ B.str orig
-autoLink :: RSTParser Inlines
+autoLink :: PandocMonad m => RSTParser m Inlines
autoLink = autoURI <|> autoEmail
-subst :: RSTParser Inlines
+subst :: PandocMonad m => RSTParser m Inlines
subst = try $ do
(_,ref) <- withRaw $ enclosed (char '|') (char '|') inline
state <- getState
@@ -1191,12 +1554,11 @@ subst = try $ do
case M.lookup key substTable of
Nothing -> do
pos <- getPosition
- addWarning (Just pos) $
- "Could not find reference for " ++ show key
+ logMessage $ ReferenceNotFound (show key) pos
return mempty
Just target -> return target
-note :: RSTParser Inlines
+note :: PandocMonad m => RSTParser m Inlines
note = try $ do
optional whitespace
ref <- noteMarker
@@ -1206,8 +1568,7 @@ note = try $ do
case lookup ref notes of
Nothing -> do
pos <- getPosition
- addWarning (Just pos) $
- "Could not find note for " ++ show ref
+ logMessage $ ReferenceNotFound ref pos
return mempty
Just raw -> do
-- We temporarily empty the note list while parsing the note,
@@ -1215,8 +1576,8 @@ note = try $ do
-- Note references inside other notes are allowed in reST, but
-- not yet in this implementation.
updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
+ contents <- parseFromString' parseBlocks raw
+ let newnotes = if ref == "*" || ref == "#" -- auto-numbered
-- delete the note so the next auto-numbered note
-- doesn't get the same contents:
then deleteFirstsBy (==) notes [(ref,raw)]
@@ -1224,20 +1585,20 @@ note = try $ do
updateState $ \st -> st{ stateNotes = newnotes }
return $ B.note contents
-smart :: RSTParser Inlines
+smart :: PandocMonad m => RSTParser m Inlines
smart = do
- getOption readerSmart >>= guard
+ guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice [apostrophe, dash, ellipses]
-singleQuoted :: RSTParser Inlines
+singleQuoted :: PandocMonad m => RSTParser m Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
B.singleQuoted . trimInlines . mconcat <$>
many1Till inline singleQuoteEnd
-doubleQuoted :: RSTParser Inlines
+doubleQuoted :: PandocMonad m => RSTParser m Inlines
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 76a25ad82..75e3f89eb 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -1,4 +1,7 @@
-{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE TypeSynonymInstances #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{-
Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
@@ -30,54 +33,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of twiki text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.TWiki ( readTWiki
- , readTWikiWithWarnings
) where
-import Text.Pandoc.Definition
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.Char (isAlphaNum)
+import qualified Data.Foldable as F
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup
import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Control.Monad
-import Text.Printf (printf)
-import Debug.Trace (trace)
+import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.XML (fromEntities)
-import Data.Maybe (fromMaybe)
-import Text.HTML.TagSoup
-import Data.Char (isAlphaNum)
-import qualified Data.Foldable as F
-import Text.Pandoc.Error
-- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readTWiki opts s =
- (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
-
-readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError (Pandoc, [String])
-readTWikiWithWarnings opts s =
- (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
- where parseTWikiWithWarnings = do
- doc <- parseTWiki
- warnings <- stateWarnings <$> getState
- return (doc, warnings)
-
-type TWParser = Parser [Char] ParserState
+readTWiki :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readTWiki opts s = do
+ res <- readWithM parseTWiki def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type TWParser = ParserT [Char] ParserState
--
-- utility functions
--
-tryMsg :: String -> TWParser a -> TWParser a
+tryMsg :: String -> TWParser m a -> TWParser m a
tryMsg msg p = try p <?> msg
-skip :: TWParser a -> TWParser ()
+skip :: TWParser m a -> TWParser m ()
skip parser = parser >> return ()
-nested :: TWParser a -> TWParser a
+nested :: PandocMonad m => TWParser m a -> TWParser m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
@@ -86,7 +85,7 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-htmlElement :: String -> TWParser (Attr, String)
+htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
htmlElement tag = tryMsg tag $ do
(TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
content <- manyTill anyChar (endtag <|> endofinput)
@@ -103,23 +102,24 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals)
classes = maybe [] words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a])
+parseHtmlContentWithAttrs :: PandocMonad m
+ => String -> TWParser m a -> TWParser m (Attr, [a])
parseHtmlContentWithAttrs tag parser = do
(attr, content) <- htmlElement tag
parsedContent <- try $ parseContent content
return (attr, parsedContent)
where
- parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ parseContent = parseFromString' $ nested $ manyTill parser endOfContent
endOfContent = try $ skipMany blankline >> skipSpaces >> eof
-parseHtmlContent :: String -> TWParser a -> TWParser [a]
+parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
--
-- main parser
--
-parseTWiki :: TWParser Pandoc
+parseTWiki :: PandocMonad m => TWParser m Pandoc
parseTWiki = do
bs <- mconcat <$> many block
spaces
@@ -131,20 +131,16 @@ parseTWiki = do
-- block parsers
--
-block :: TWParser B.Blocks
+block :: PandocMonad m => TWParser m B.Blocks
block = do
- tr <- getOption readerTrace
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> blockElements
<|> para
skipMany blankline
- when tr $
- trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList res)) (return ())
+ trace (take 60 $ show $ B.toList res)
return res
-blockElements :: TWParser B.Blocks
+blockElements :: PandocMonad m => TWParser m B.Blocks
blockElements = choice [ separator
, header
, verbatim
@@ -155,10 +151,10 @@ blockElements = choice [ separator
, noautolink
]
-separator :: TWParser B.Blocks
+separator :: PandocMonad m => TWParser m B.Blocks
separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
-header :: TWParser B.Blocks
+header :: PandocMonad m => TWParser m B.Blocks
header = tryMsg "header" $ do
string "---"
level <- many1 (char '+') >>= return . length
@@ -167,45 +163,47 @@ header = tryMsg "header" $ do
skipSpaces
content <- B.trimInlines . mconcat <$> manyTill inline newline
attr <- registerHeader ("", classes, []) content
- return $ B.headerWith attr level $ content
+ return $ B.headerWith attr level content
-verbatim :: TWParser B.Blocks
+verbatim :: PandocMonad m => TWParser m B.Blocks
verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
>>= return . (uncurry B.codeBlockWith)
-literal :: TWParser B.Blocks
+literal :: PandocMonad m => TWParser m B.Blocks
literal = htmlElement "literal" >>= return . rawBlock
where
format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) content
-list :: String -> TWParser B.Blocks
+list :: PandocMonad m => String -> TWParser m B.Blocks
list prefix = choice [ bulletList prefix
, orderedList prefix
, definitionList prefix]
-definitionList :: String -> TWParser B.Blocks
+definitionList :: PandocMonad m => String -> TWParser m B.Blocks
definitionList prefix = tryMsg "definitionList" $ do
indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
return $ B.definitionList elements
where
- parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks])
+ parseDefinitionListItem :: PandocMonad m
+ => String -> TWParser m (B.Inlines, [B.Blocks])
parseDefinitionListItem indent = do
string (indent ++ "$ ") >> skipSpaces
term <- many1Till inline $ string ": "
line <- listItemLine indent $ string "$ "
return $ (mconcat term, [line])
-bulletList :: String -> TWParser B.Blocks
+bulletList :: PandocMonad m => String -> TWParser m B.Blocks
bulletList prefix = tryMsg "bulletList" $
parseList prefix (char '*') (char ' ')
-orderedList :: String -> TWParser B.Blocks
+orderedList :: PandocMonad m => String -> TWParser m B.Blocks
orderedList prefix = tryMsg "orderedList" $
parseList prefix (oneOf "1iIaA") (string ". ")
-parseList :: String -> TWParser Char -> TWParser a -> TWParser B.Blocks
+parseList :: PandocMonad m
+ => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
parseList prefix marker delim = do
(indent, style) <- lookAhead $ string prefix *> listStyle <* delim
blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
@@ -222,10 +220,12 @@ parseList prefix marker delim = do
style <- marker
return (concat indent, style)
-parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks
+parseListItem :: (PandocMonad m, Show a)
+ => String -> TWParser m a -> TWParser m B.Blocks
parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
-listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks
+listItemLine :: (PandocMonad m, Show a)
+ => String -> TWParser m a -> TWParser m B.Blocks
listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
where
lineContent = do
@@ -235,14 +235,14 @@ listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
filterSpaces = reverse . dropWhile (== ' ') . reverse
listContinuation = notFollowedBy (string prefix >> marker) >>
string " " >> lineContent
- parseContent = parseFromString $ many1 $ nestedList <|> parseInline
+ parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
return . B.plain . mconcat
nestedList = list prefix
lastNewline = try $ char '\n' <* eof
newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
-table :: TWParser B.Blocks
+table :: PandocMonad m => TWParser m B.Blocks
table = try $ do
tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
rows <- many1 tableParseRow
@@ -254,7 +254,7 @@ table = try $ do
columns rows = replicate (columCount rows) mempty
columCount rows = length $ head rows
-tableParseHeader :: TWParser ((Alignment, Double), B.Blocks)
+tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
tableParseHeader = try $ do
char '|'
leftSpaces <- many spaceChar >>= return . length
@@ -270,27 +270,27 @@ tableParseHeader = try $ do
| left > right = (AlignRight, 0)
| otherwise = (AlignLeft, 0)
-tableParseRow :: TWParser [B.Blocks]
+tableParseRow :: PandocMonad m => TWParser m [B.Blocks]
tableParseRow = many1Till tableParseColumn newline
-tableParseColumn :: TWParser B.Blocks
+tableParseColumn :: PandocMonad m => TWParser m B.Blocks
tableParseColumn = char '|' *> skipSpaces *>
tableColumnContent (skipSpaces >> char '|')
<* skipSpaces <* optional tableEndOfRow
-tableEndOfRow :: TWParser Char
+tableEndOfRow :: PandocMonad m => TWParser m Char
tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
-tableColumnContent :: TWParser a -> TWParser B.Blocks
+tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
where
content = continuation <|> inline
continuation = try $ char '\\' >> newline >> return mempty
-blockQuote :: TWParser B.Blocks
+blockQuote :: PandocMonad m => TWParser m B.Blocks
blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
-noautolink :: TWParser B.Blocks
+noautolink :: PandocMonad m => TWParser m B.Blocks
noautolink = do
(_, content) <- htmlElement "noautolink"
st <- getState
@@ -299,9 +299,9 @@ noautolink = do
setState $ st{ stateAllowLinks = True }
return $ mconcat blocks
where
- parseContent = parseFromString $ many $ block
+ parseContent = parseFromString' $ many $ block
-para :: TWParser B.Blocks
+para :: PandocMonad m => TWParser m B.Blocks
para = many1Till inline endOfParaElement >>= return . result . mconcat
where
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
@@ -317,7 +317,7 @@ para = many1Till inline endOfParaElement >>= return . result . mconcat
-- inline parsers
--
-inline :: TWParser B.Inlines
+inline :: PandocMonad m => TWParser m B.Inlines
inline = choice [ whitespace
, br
, macro
@@ -338,36 +338,39 @@ inline = choice [ whitespace
, symbol
] <?> "inline"
-whitespace :: TWParser B.Inlines
+whitespace :: PandocMonad m => TWParser m B.Inlines
whitespace = (lb <|> regsp) >>= return
where lb = try $ skipMany spaceChar >> linebreak >> return B.space
regsp = try $ skipMany1 spaceChar >> return B.space
-br :: TWParser B.Inlines
+br :: PandocMonad m => TWParser m B.Inlines
br = try $ string "%BR%" >> return B.linebreak
-linebreak :: TWParser B.Inlines
+linebreak :: PandocMonad m => TWParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
where lastNewline = eof >> return mempty
innerNewline = return B.space
-between :: (Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c
+between :: (Monoid c, PandocMonad m, Show b)
+ => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c)
+ -> TWParser m c
between start end p =
mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
-enclosed :: (Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b
+enclosed :: (Monoid b, PandocMonad m, Show a)
+ => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
enclosed sep p = between sep (try $ sep <* endMarker) p
where
endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
endSpace = (spaceChar <|> newline) >> return B.space
-macro :: TWParser B.Inlines
+macro :: PandocMonad m => TWParser m B.Inlines
macro = macroWithParameters <|> withoutParameters
where
withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
emptySpan name = buildSpan name [] mempty
-macroWithParameters :: TWParser B.Inlines
+macroWithParameters :: PandocMonad m => TWParser m B.Inlines
macroWithParameters = try $ do
char '%'
name <- macroName
@@ -382,22 +385,22 @@ buildSpan className kvs = B.spanWith attrs
additionalClasses = maybe [] words $ lookup "class" kvs
kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
-macroName :: TWParser String
+macroName :: PandocMonad m => TWParser m String
macroName = do
first <- letter
rest <- many $ alphaNum <|> char '_'
return (first:rest)
-attributes :: TWParser (String, [(String, String)])
+attributes :: PandocMonad m => TWParser m (String, [(String, String)])
attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
return . foldr (either mkContent mkKvs) ([], [])
where
spnl = skipMany (spaceChar <|> newline)
- mkContent c ([], kvs) = (c, kvs)
- mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkContent c ([], kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
mkKvs kv (cont, rest) = (cont, (kv : rest))
-attribute :: TWParser (Either String (String, String))
+attribute :: PandocMonad m => TWParser m (Either String (String, String))
attribute = withKey <|> withoutKey
where
withKey = try $ do
@@ -411,49 +414,51 @@ attribute = withKey <|> withoutKey
| allowSpaces == True = many1 $ noneOf "}"
| otherwise = many1 $ noneOf " }"
-nestedInlines :: Show a => TWParser a -> TWParser B.Inlines
+nestedInlines :: (Show a, PandocMonad m)
+ => TWParser m a -> TWParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
where
innerSpace = try $ whitespace <* (notFollowedBy end)
nestedInline = notFollowedBy whitespace >> nested inline
-strong :: TWParser B.Inlines
+strong :: PandocMonad m => TWParser m B.Inlines
strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
-strongHtml :: TWParser B.Inlines
+strongHtml :: PandocMonad m => TWParser m B.Inlines
strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
>>= return . B.strong . mconcat
-strongAndEmph :: TWParser B.Inlines
+strongAndEmph :: PandocMonad m => TWParser m B.Inlines
strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
-emph :: TWParser B.Inlines
+emph :: PandocMonad m => TWParser m B.Inlines
emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
-emphHtml :: TWParser B.Inlines
+emphHtml :: PandocMonad m => TWParser m B.Inlines
emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
>>= return . B.emph . mconcat
-nestedString :: Show a => TWParser a -> TWParser String
+nestedString :: (Show a, PandocMonad m)
+ => TWParser m a -> TWParser m String
nestedString end = innerSpace <|> (count 1 nonspaceChar)
where
innerSpace = try $ many1 spaceChar <* notFollowedBy end
-boldCode :: TWParser B.Inlines
+boldCode :: PandocMonad m => TWParser m B.Inlines
boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
-htmlComment :: TWParser B.Inlines
+htmlComment :: PandocMonad m => TWParser m B.Inlines
htmlComment = htmlTag isCommentTag >> return mempty
-code :: TWParser B.Inlines
+code :: PandocMonad m => TWParser m B.Inlines
code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
-codeHtml :: TWParser B.Inlines
+codeHtml :: PandocMonad m => TWParser m B.Inlines
codeHtml = do
(attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
return $ B.codeWith attrs $ fromEntities content
-autoLink :: TWParser B.Inlines
+autoLink :: PandocMonad m => TWParser m B.Inlines
autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
@@ -467,36 +472,36 @@ autoLink = try $ do
| c == '/' = True
| otherwise = isAlphaNum c
-str :: TWParser B.Inlines
+str :: PandocMonad m => TWParser m B.Inlines
str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
-nop :: TWParser B.Inlines
+nop :: PandocMonad m => TWParser m B.Inlines
nop = try $ (skip exclamation <|> skip nopTag) >> followContent
where
exclamation = char '!'
nopTag = stringAnyCase "<nop>"
followContent = many1 nonspaceChar >>= return . B.str . fromEntities
-symbol :: TWParser B.Inlines
+symbol :: PandocMonad m => TWParser m B.Inlines
symbol = count 1 nonspaceChar >>= return . B.str
-smart :: TWParser B.Inlines
+smart :: PandocMonad m => TWParser m B.Inlines
smart = do
- getOption readerSmart >>= guard
+ guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice [ apostrophe
, dash
, ellipses
]
-singleQuoted :: TWParser B.Inlines
+singleQuoted :: PandocMonad m => TWParser m B.Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
many1Till inline singleQuoteEnd >>=
(return . B.singleQuoted . B.trimInlines . mconcat)
-doubleQuoted :: TWParser B.Inlines
+doubleQuoted :: PandocMonad m => TWParser m B.Inlines
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
@@ -504,7 +509,7 @@ doubleQuoted = try $ do
return (B.doubleQuoted $ B.trimInlines contents))
<|> (return $ (B.str "\8220") B.<> contents)
-link :: TWParser B.Inlines
+link :: PandocMonad m => TWParser m B.Inlines
link = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -513,13 +518,13 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return $ B.link url title content
-linkText :: TWParser (String, String, B.Inlines)
+linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
linkText = do
string "[["
url <- many1Till anyChar (char ']')
- content <- option [B.str url] linkContent
+ content <- option (B.str url) (mconcat <$> linkContent)
char ']'
- return (url, "", mconcat content)
+ return (url, "", content)
where
linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
- parseLinkContent = parseFromString $ many1 inline
+ parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
deleted file mode 100644
index e5778b123..000000000
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ /dev/null
@@ -1,48 +0,0 @@
-{-
-Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.TeXMath
- Copyright : Copyright (C) 2007-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of TeX math to a list of 'Pandoc' inline elements.
--}
-module Text.Pandoc.Readers.TeXMath ( texMathToInlines ) where
-
-import Text.Pandoc.Definition
-import Text.TeXMath
-
--- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ or @$$@ characters if entire formula
--- can't be converted.
-texMathToInlines :: MathType
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> [Inline]
-texMathToInlines mt inp =
- case writePandoc dt `fmap` readTeX inp of
- Right (Just ils) -> ils
- _ -> [Str (delim ++ inp ++ delim)]
- where (dt, delim) = case mt of
- DisplayMath -> (DisplayBlock, "$$")
- InlineMath -> (DisplayInline, "$")
-
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8dbbf7be2..30bb6a715 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -1,6 +1,6 @@
{-
-Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
- and John MacFarlane
+Copyright (C) 2010-2012 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+ 2010-2018 John MacFarlane
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Textile
- Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane
+ Copyright : Copyright (C) 2010-2012 Paul Rivier
+ 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Paul Rivier <paul*rivier#demotera*com>
@@ -51,44 +52,42 @@ TODO : refactor common patterns across readers :
module Text.Pandoc.Readers.Textile ( readTextile) where
+import Control.Monad (guard, liftM)
+import Control.Monad.Except (throwError)
+import Data.Char (digitToInt, isUpper)
+import Data.List (intercalate, intersperse, transpose)
+import Data.Monoid ((<>))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup (Tag (..), fromAttrib)
+import Text.HTML.TagSoup.Match
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.CSS
import Text.Pandoc.Definition
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
-import Text.Pandoc.Shared (trim)
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.HTML.TagSoup (fromAttrib, Tag(..))
-import Text.HTML.TagSoup.Match
-import Data.List ( intercalate, transpose, intersperse )
-import Data.Char ( digitToInt, isUpper )
-import Control.Monad ( guard, liftM, when )
-import Data.Monoid ((<>))
-import Text.Printf
-import Debug.Trace (trace)
-import Text.Pandoc.Error
+import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
+import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Either PandocError Pandoc
-readTextile opts s =
- (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
+readTextile :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readTextile opts s = do
+ parsed <- readWithM parseTextile def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: Parser [Char] ParserState Pandoc
+parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
parseTextile = do
- -- textile allows raw HTML and does smart punctuation by default,
- -- but we do not enable smart punctuation unless it is explicitly
- -- asked for, for better conversion to other light markup formats
- oldOpts <- stateOptions `fmap` getState
- updateState $ \state -> state{ stateOptions =
- oldOpts{ readerParseRaw = True
- , readerOldDashes = True
- } }
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
@@ -103,15 +102,15 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc nullMeta (B.toList blocks) -- FIXME
-noteMarker :: Parser [Char] ParserState [Char]
+noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
-noteBlock :: Parser [Char] ParserState [Char]
+noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
- contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
+ contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
let newnote = (ref, contents ++ "\n")
st <- getState
@@ -121,11 +120,11 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-- | Parse document blocks
-parseBlocks :: Parser [Char] ParserState Blocks
+parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: [Parser [Char] ParserState Blocks]
+blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -140,26 +139,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: Parser [Char] ParserState Blocks
+block :: PandocMonad m => ParserT [Char] ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
- pos <- getPosition
- tr <- getOption readerTrace
- when tr $
- trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList res)) (return ())
+ trace (take 60 $ show $ B.toList res)
return res
-commentBlock :: Parser [Char] ParserState Blocks
+commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: Parser [Char] ParserState Blocks
+codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: Parser [Char] ParserState Blocks
+codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlockBc = try $ do
string "bc."
extended <- option False (True <$ char '.')
@@ -179,11 +174,10 @@ trimTrailingNewlines :: String -> String
trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: Parser [Char] ParserState Blocks
+codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
- optional blanklines
-- drop leading newline if any
let result'' = case result' of
'\n':xs -> xs
@@ -198,7 +192,7 @@ codeBlockPre = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: Parser [Char] ParserState Blocks
+header :: PandocMonad m => ParserT [Char] ParserState m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -210,14 +204,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: Parser [Char] ParserState Blocks
+blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: Parser [Char] st Blocks
+hrule :: PandocMonad m => ParserT [Char] st m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -232,66 +226,67 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: Parser [Char] ParserState Blocks
+anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
+anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
+bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
+orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
- p <- mconcat <$> many listInline
+ contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
+ try (newline >> codeBlockPre))
newline
sublist <- option mempty (anyListAtDepth (depth + 1))
- return $ (B.plain p) <> sublist
+ return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: Parser [Char] ParserState Blocks
+definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: Parser [Char] ParserState ()
+listStart :: PandocMonad m => ParserT [Char] ParserState m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: Char -> Parser [Char] st ()
+genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: Parser [Char] ParserState ()
+basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: Parser [Char] ParserState Inlines
+definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -300,34 +295,30 @@ definitionListStart = try $ do
<|> try (lookAhead (() <$ string ":="))
)
-listInline :: Parser [Char] ParserState Inlines
-listInline = try (notFollowedBy newline >> inline)
- <|> try (endline <* notFollowedBy listStart)
-
-- | A definition list item in textile begins with '- ', followed by
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
definitionListItem = try $ do
term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: Parser [Char] ParserState [Blocks]
+ where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
- $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
- multilineDef :: Parser [Char] ParserState [Blocks]
+ $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
+ multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
-- this ++ "\n\n" does not look very good
- ds <- parseFromString parseBlocks (s ++ "\n\n")
+ ds <- parseFromString' parseBlocks (s ++ "\n\n")
return [ds]
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: Parser [Char] ParserState Blocks
+rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -335,14 +326,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Blocks
+rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: Parser [Char] ParserState Blocks
+para :: PandocMonad m => ParserT [Char] ParserState m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -353,7 +344,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: Parser [Char] ParserState (Bool, Alignment)
+cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -366,18 +357,18 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
- (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes
+ (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
notFollowedBy blankline
raw <- trim <$>
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
- content <- mconcat <$> parseFromString (many inline) raw
+ content <- mconcat <$> parseFromString' (many inline) raw
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: Parser [Char] ParserState [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -387,7 +378,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: Parser [Char] ParserState Blocks
+table :: PandocMonad m => ParserT [Char] ParserState m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -395,8 +386,8 @@ table = try $ do
_ <- attributes
char '.'
rawcapt <- trim <$> anyLine
- parseFromString (mconcat <$> many inline) rawcapt
- rawrows <- many1 $ (skipMany ignorableRow) >> tableRow
+ parseFromString' (mconcat <$> many inline) rawcapt
+ rawrows <- many1 $ skipMany ignorableRow >> tableRow
skipMany ignorableRow
blanklines
let (headers, rows) = case rawrows of
@@ -411,7 +402,7 @@ table = try $ do
(map (map snd) rows)
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: Parser [Char] ParserState ()
+ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -420,7 +411,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: String -> Parser [Char] ParserState ()
+explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
explicitBlockStart name = try $ do
string name
attributes
@@ -430,9 +421,10 @@ explicitBlockStart name = try $ do
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
-maybeExplicitBlock :: String -- ^ block tag name
- -> Parser [Char] ParserState Blocks -- ^ implicit block
- -> Parser [Char] ParserState Blocks
+maybeExplicitBlock :: PandocMonad m
+ => String -- ^ block tag name
+ -> ParserT [Char] ParserState m Blocks -- ^ implicit block
+ -> ParserT [Char] ParserState m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -445,12 +437,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: Parser [Char] ParserState Inlines
-inline = do
- choice inlineParsers <?> "inline"
+inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: [Parser [Char] ParserState Inlines]
+inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -470,13 +461,13 @@ inlineParsers = [ str
]
-- | Inline markups
-inlineMarkup :: Parser [Char] ParserState Inlines
+inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
, simpleInline (char '*') B.strong
, simpleInline (char '_') B.emph
- , simpleInline (char '+') B.emph -- approximates underline
+ , simpleInline (char '+') underlineSpan
, simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
, simpleInline (char '^') B.superscript
, simpleInline (char '~') B.subscript
@@ -484,35 +475,35 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: Parser [Char] st Inlines
+mark :: PandocMonad m => ParserT [Char] st m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: Parser [Char] st Inlines
+reg :: PandocMonad m => ParserT [Char] st m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: Parser [Char] st Inlines
+tm :: PandocMonad m => ParserT [Char] st m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: Parser [Char] st Inlines
+copy :: PandocMonad m => ParserT [Char] st m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: Parser [Char] ParserState Inlines
+note :: PandocMonad m => ParserT [Char] ParserState m Inlines
note = try $ do
- ref <- (char '[' *> many1 digit <* char ']')
+ ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
case lookup ref notes of
- Nothing -> fail "note not found"
- Just raw -> B.note <$> parseFromString parseBlocks raw
+ Nothing -> fail "note not found"
+ Just raw -> B.note <$> parseFromString' parseBlocks raw
-- | Special chars
markupChars :: [Char]
@@ -530,22 +521,22 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: Parser [Char] ParserState String
+hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ intercalate "-" (x:xs)
-wordChunk :: Parser [Char] ParserState String
+wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
wordChunk = try $ do
hd <- noneOf wordBoundaries
- tl <- many ( (noneOf wordBoundaries) <|>
+ tl <- many ( noneOf wordBoundaries <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
return $ hd:tl
-- | Any string
-str :: Parser [Char] ParserState Inlines
+str :: PandocMonad m => ParserT [Char] ParserState m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -558,11 +549,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: Parser [Char] st Inlines
+whitespace :: PandocMonad m => ParserT [Char] st m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: Parser [Char] ParserState Inlines
+endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -570,18 +561,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: Parser [Char] ParserState Inlines
+rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inlines
+rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- B.singleton <$> rawLaTeXInline
+ B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: Parser [Char] ParserState Inlines
+link :: PandocMonad m => ParserT [Char] ParserState m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -591,8 +582,9 @@ link = try $ do
char ':'
let stop = if bracketed
then char ']'
- else lookAhead $ space <|>
- try (oneOf "!.,;:" *> (space <|> newline))
+ else lookAhead $ space <|> eof' <|>
+ try (oneOf "!.,;:" *>
+ (space <|> newline <|> eof'))
url <- many1Till nonspaceChar stop
let name' = if B.toList name == [Str "$"] then B.str url else name
return $ if attr == nullAttr
@@ -600,7 +592,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: Parser [Char] ParserState Inlines
+image :: PandocMonad m => ParserT [Char] ParserState m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@@ -612,50 +604,51 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: Parser [Char] ParserState Inlines
+escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: Parser [Char] ParserState Inlines
+escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedEqs = B.str <$>
- (try $ string "==" *> manyTill anyChar' (try $ string "=="))
+ try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: Parser [Char] ParserState Inlines
+escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
escapedTag = B.str <$>
- (try $ string "<notextile>" *>
+ try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: Parser [Char] ParserState Inlines
+symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
symbol = B.str . singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
-code :: Parser [Char] ParserState Inlines
+code :: PandocMonad m => ParserT [Char] ParserState m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: Parser [Char] ParserState Char
+anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
anyChar' =
- satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
+ satisfy (/='\n') <|>
+ try (char '\n' <* notFollowedBy blankline)
-code1 :: Parser [Char] ParserState Inlines
+code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
code1 = B.code <$> surrounded (char '@') anyChar'
-code2 :: Parser [Char] ParserState Inlines
+code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: Parser [Char] ParserState Attr
-attributes = (foldl (flip ($)) ("",[],[])) <$>
+attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
+attributes = foldl (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: Parser [Char] ParserState (Attr -> Attr)
+specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@@ -664,11 +657,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle ("text-align:" ++ alignStr)
-attribute :: Parser [Char] ParserState (Attr -> Attr)
+attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: Parser [Char] ParserState (Attr -> Attr)
+classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- words `fmap` manyTill anyChar' (char ')')
@@ -679,7 +672,7 @@ classIdAttr = try $ do -- (class class #id)
classes' -> return $ \(_,_,keyvals) ->
("",classes',keyvals)
-styleAttr :: Parser [Char] ParserState (Attr -> Attr)
+styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle style
@@ -690,21 +683,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
-langAttr :: Parser [Char] ParserState (Attr -> Attr)
+langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
-- | Parses material surrounded by a parser.
-surrounded :: Parser [Char] st t -- ^ surrounding parser
- -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
- -> Parser [Char] st [a]
+surrounded :: (PandocMonad m, Show t)
+ => ParserT [Char] st m t -- ^ surrounding parser
+ -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT [Char] st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
-simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
- -> (Inlines -> Inlines) -- ^ Inline constructor
- -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly)
+simpleInline :: PandocMonad m
+ => ParserT [Char] ParserState m t -- ^ surrounding parser
+ -> (Inlines -> Inlines) -- ^ Inline constructor
+ -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -718,7 +713,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: Parser [Char] ParserState Inlines
+groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -731,3 +726,5 @@ groupedInlineMarkup = try $ do
singleton :: a -> [a]
singleton x = [x]
+eof' :: Monad m => ParserT [Char] s m Char
+eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
new file mode 100644
index 000000000..a92f7bed2
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -0,0 +1,654 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+
+{- |
+ Module : Text.Pandoc.Readers.TikiWiki
+ Copyright : Copyright (C) 2017 Robin Lee Powell
+ License : GPLv2
+
+ Maintainer : Robin Lee Powell <robinleepowell@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of TikiWiki text to 'Pandoc' document.
+-}
+
+module Text.Pandoc.Readers.TikiWiki ( readTikiWiki
+ ) where
+
+import Control.Monad
+import Control.Monad.Except (throwError)
+import qualified Data.Foldable as F
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (CommonState (..), PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging (Verbosity (..))
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, nested)
+import Text.Pandoc.Shared (crFilter)
+import Text.Pandoc.XML (fromEntities)
+import Text.Printf (printf)
+
+-- | Read TikiWiki from an input string and return a Pandoc document.
+readTikiWiki :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readTikiWiki opts s = do
+ res <- readWithM parseTikiWiki def{ stateOptions = opts }
+ (T.unpack (crFilter s) ++ "\n\n")
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type TikiWikiParser = ParserT [Char] ParserState
+
+--
+-- utility functions
+--
+
+tryMsg :: PandocMonad m => String -> TikiWikiParser m a -> TikiWikiParser m a
+tryMsg msg p = try p <?> msg
+
+skip :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m ()
+skip parser = Control.Monad.void parser
+
+nested :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+--
+-- main parser
+--
+
+parseTikiWiki :: PandocMonad m => TikiWikiParser m Pandoc
+parseTikiWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+block :: PandocMonad m => TikiWikiParser m B.Blocks
+block = do
+ verbosity <- getsCommonState stVerbosity
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ when (verbosity >= INFO) $
+ trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
+ return res
+
+blockElements :: PandocMonad m => TikiWikiParser m B.Blocks
+blockElements = choice [ table
+ , hr
+ , header
+ , mixedList
+ , definitionList
+ , codeMacro
+ ]
+
+-- top
+-- ----
+-- bottom
+--
+-- ----
+--
+hr :: PandocMonad m => TikiWikiParser m B.Blocks
+hr = try $ do
+ string "----"
+ many (char '-')
+ newline
+ return B.horizontalRule
+
+-- ! header
+--
+-- !! header level two
+--
+-- !!! header level 3
+--
+header :: PandocMonad m => TikiWikiParser m B.Blocks
+header = tryMsg "header" $ do
+ level <- fmap length (many1 (char '!'))
+ guard $ level <= 6
+ skipSpaces
+ content <- B.trimInlines . mconcat <$> manyTill inline newline
+ attr <- registerHeader nullAttr content
+ return $B.headerWith attr level content
+
+tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks]
+tableRow = try $ do
+-- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n"))
+-- return $ map (B.plain . mconcat) row
+ row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
+ return $ map B.plain row
+ where
+ parseColumn x = do
+ parsed <- parseFromString (many1 inline) x
+ return $ mconcat parsed
+
+
+
+-- Tables:
+--
+-- ||foo||
+--
+-- ||row1-column1|row1-column2||row2-column1|row2-column2||
+--
+-- ||row1-column1|row1-column2
+-- row2-column1|row2-column2||
+--
+-- ||row1-column1|row1-column2
+-- row2-column1|row2-column2||row3-column1|row3-column2||
+--
+-- || Orange | Apple | more
+-- Bread | Pie | more
+-- Butter | Ice cream | and more ||
+--
+table :: PandocMonad m => TikiWikiParser m B.Blocks
+table = try $ do
+ string "||"
+ rows <- sepBy1 tableRow (try $ string "\n" <|> (string "||" <* notFollowedBy (string "\n")))
+ string "||"
+ newline
+ -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows
+ return $B.simpleTable (headers rows) rows
+ where
+ -- The headers are as many empty srings as the number of columns
+ -- in the first row
+ headers rows = map (B.plain . B.str) $replicate (length $ head rows) ""
+
+para :: PandocMonad m => TikiWikiParser m B.Blocks
+para = fmap (result . mconcat) ( many1Till inline endOfParaElement)
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> skip blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+-- ;item 1: definition 1
+-- ;item 2: definition 2-1
+-- + definition 2-2
+-- ;item ''3'': definition ''3''
+--
+definitionList :: PandocMonad m => TikiWikiParser m B.Blocks
+definitionList = tryMsg "definitionList" $ do
+ elements <-many1 parseDefinitionListItem
+ return $ B.definitionList elements
+ where
+ parseDefinitionListItem :: PandocMonad m => TikiWikiParser m (B.Inlines, [B.Blocks])
+ parseDefinitionListItem = do
+ skipSpaces >> char ';' <* skipSpaces
+ term <- many1Till inline $ char ':' <* skipSpaces
+ line <- listItemLine 1
+ return (mconcat term, [B.plain line])
+
+data ListType = None | Numbered | Bullet deriving (Ord, Eq, Show)
+
+data ListNesting = LN { lntype :: ListType, lnnest :: Int } deriving (Ord, Eq, Show)
+
+-- The first argument is a stack (most recent == head) of our list
+-- nesting status; the list type and the nesting level; if we're in
+-- a number list in a bullet list it'd be
+-- [LN Numbered 2, LN Bullet 1]
+--
+-- Mixed list example:
+--
+-- # one
+-- # two
+-- ** two point one
+-- ** two point two
+-- # three
+-- # four
+--
+mixedList :: PandocMonad m => TikiWikiParser m B.Blocks
+mixedList = try $ do
+ items <- try $ many1 listItem
+ return $ mconcat $ fixListNesting $ spanFoldUpList (LN None 0) items
+
+-- See the "Handling Lists" section of DESIGN-CODE for why this
+-- function exists. It's to post-process the lists and do some
+-- mappends.
+--
+-- We need to walk the tree two items at a time, so we can see what
+-- we're going to join *to* before we get there.
+--
+-- Because of that, it seemed easier to do it by hand than to try to
+-- figre out a fold or something.
+fixListNesting :: [B.Blocks] -> [B.Blocks]
+fixListNesting [] = []
+fixListNesting [first] = [recurseOnList first]
+-- fixListNesting nestall | trace ("\n\nfixListNesting: " ++ (show nestall)) False = undefined
+-- fixListNesting nestall@(first:second:rest) =
+fixListNesting (first:second:rest) =
+ let secondBlock = head $ B.toList second in
+ case secondBlock of
+ BulletList _ -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest
+ OrderedList _ _ -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest
+ _ -> recurseOnList first : fixListNesting (second:rest)
+
+-- This function walks the Block structure for fixListNesting,
+-- because it's a bit complicated, what with converting to and from
+-- lists and so on.
+recurseOnList :: B.Blocks -> B.Blocks
+-- recurseOnList item | trace ("rOL: " ++ (show $ length $ B.toList item) ++ ", " ++ (show $ B.toList item)) False = undefined
+recurseOnList items
+ | length (B.toList items) == 1 =
+ let itemBlock = head $ B.toList items in
+ case itemBlock of
+ BulletList listItems -> B.bulletList $ fixListNesting $ map B.fromList listItems
+ OrderedList _ listItems -> B.orderedList $ fixListNesting $ map B.fromList listItems
+ _ -> items
+
+ -- The otherwise works because we constructed the blocks, and we
+ -- know for a fact that no mappends have been run on them; each
+ -- Blocks consists of exactly one Block.
+ --
+ -- Anything that's not like that has already been processed by
+ -- fixListNesting; don't bother to process it again.
+ | otherwise = items
+
+
+-- Turn the list if list items into a tree by breaking off the first
+-- item, splitting the remainder of the list into items that are in
+-- the tree of the first item and those that aren't, wrapping the
+-- tree of the first item in its list time, and recursing on both
+-- sections.
+spanFoldUpList :: ListNesting -> [(ListNesting, B.Blocks)] -> [B.Blocks]
+spanFoldUpList _ [] = []
+spanFoldUpList ln [first] =
+ listWrap ln (fst first) [snd first]
+spanFoldUpList ln (first:rest) =
+ let (span1, span2) = span (splitListNesting (fst first)) rest
+ newTree1 = listWrap ln (fst first) $ snd first : spanFoldUpList (fst first) span1
+ newTree2 = spanFoldUpList ln span2
+ in
+ newTree1 ++ newTree2
+
+-- Decide if the second item should be in the tree of the first
+-- item, which is true if the second item is at a deeper nesting
+-- level and of the same type.
+splitListNesting :: ListNesting -> (ListNesting, B.Blocks) -> Bool
+splitListNesting ln1 (ln2, _)
+ | lnnest ln1 < lnnest ln2 =
+ True
+ | ln1 == ln2 =
+ True
+ | otherwise =
+ False
+
+-- If we've moved to a deeper nesting level, wrap the new level in
+-- the appropriate type of list.
+listWrap :: ListNesting -> ListNesting -> [B.Blocks] -> [B.Blocks]
+listWrap upperLN curLN retTree =
+ if upperLN == curLN then
+ retTree
+ else
+ case lntype curLN of
+ None -> []
+ Bullet -> [B.bulletList retTree]
+ Numbered -> [B.orderedList retTree]
+
+listItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
+listItem = choice [
+ bulletItem
+ , numberedItem
+ ]
+
+
+-- * Start each line
+-- * with an asterisk (*).
+-- ** More asterisks gives deeper
+-- *** and deeper levels.
+--
+bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
+bulletItem = try $ do
+ prefix <- many1 $ char '*'
+ many1 $ char ' '
+ content <- listItemLine (length prefix)
+ return (LN Bullet (length prefix), B.plain content)
+
+-- # Start each line
+-- # with a number (1.).
+-- ## More number signs gives deeper
+-- ### and deeper
+--
+numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
+numberedItem = try $ do
+ prefix <- many1 $ char '#'
+ many1 $ char ' '
+ content <- listItemLine (length prefix)
+ return (LN Numbered (length prefix), B.plain content)
+
+listItemLine :: PandocMonad m => Int -> TikiWikiParser m B.Inlines
+listItemLine nest = lineContent >>= parseContent
+ where
+ lineContent = do
+ content <- anyLine
+ continuation <- optionMaybe listContinuation
+ return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation
+ filterSpaces = reverse . dropWhile (== ' ') . reverse
+ listContinuation = string (replicate nest '+') >> lineContent
+ parseContent x = do
+ parsed <- parseFromString (many1 inline) x
+ return $ mconcat parsed
+
+-- Turn the CODE macro attributes into Pandoc code block attributes.
+mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
+mungeAttrs rawAttrs = ("", classes, rawAttrs)
+ where
+ -- "colors" is TikiWiki CODE macro for "name of language to do
+ -- highlighting for"; turn the value into a class
+ color = fromMaybe "" $ lookup "colors" rawAttrs
+ -- ln = 1 means line numbering. It's also the default. So we
+ -- emit numberLines as a class unless ln = 0
+ lnRaw = fromMaybe "1" $ lookup "ln" rawAttrs
+ ln = if lnRaw == "0" then
+ ""
+ else
+ "numberLines"
+ classes = filter (/= "") [color, ln]
+
+codeMacro :: PandocMonad m => TikiWikiParser m B.Blocks
+codeMacro = try $ do
+ string "{CODE("
+ rawAttrs <- macroAttrs
+ string ")}"
+ body <- manyTill anyChar (try (string "{CODE}"))
+ newline
+ if not (null rawAttrs)
+ then
+ return $ B.codeBlockWith (mungeAttrs rawAttrs) body
+ else
+ return $ B.codeBlock body
+
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => TikiWikiParser m B.Inlines
+inline = choice [ whitespace
+ , noparse
+ , strong
+ , emph
+ , nbsp
+ , image
+ , htmlComment
+ , strikeout
+ , code
+ , wikiLink
+ , notExternalLink
+ , externalLink
+ , superTag
+ , superMacro
+ , subTag
+ , subMacro
+ , escapedChar
+ , colored
+ , centered
+ , underlined
+ , boxed
+ , breakChars
+ , str
+ , symbol
+ ] <?> "inline"
+
+whitespace :: PandocMonad m => TikiWikiParser m B.Inlines
+whitespace = lb <|> regsp
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
+-- for this
+nbsp :: PandocMonad m => TikiWikiParser m B.Inlines
+nbsp = try $ do
+ string "~hs~"
+ return $ B.str " NOT SUPPORTED BEGIN: ~hs~ (non-breaking space) :END "
+
+-- UNSUPPORTED, as the desired behaviour (that the data be
+-- *retained* and stored as a comment) doesn't exist in calibre, and
+-- silently throwing data out seemed bad.
+htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines
+htmlComment = try $ do
+ string "~hc~"
+ inner <- many1 $ noneOf "~"
+ string "~/hc~"
+ return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END "
+
+linebreak :: PandocMonad m => TikiWikiParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+between :: (Monoid c, PandocMonad m, Show b) => TikiWikiParser m a -> TikiWikiParser m b -> (TikiWikiParser m b -> TikiWikiParser m c) -> TikiWikiParser m c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Monoid b, PandocMonad m, Show a) => TikiWikiParser m a -> (TikiWikiParser m a -> TikiWikiParser m b) -> TikiWikiParser m b
+enclosed sep p = between sep (try $ sep <* endMarker) p
+ where
+ endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|'_") <|> eof
+ endSpace = (spaceChar <|> newline) >> return B.space
+
+
+nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* notFollowedBy end
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+-- {img attId="39" imalign="right" link="http://info.tikiwiki.org" alt="Panama Hat"}
+--
+-- {img attId="37", thumb="mouseover", styleimage="border", desc="150"}
+--
+-- {img src="img/wiki_up/393px-Pears.jpg" thumb="y" imalign="center" stylebox="border" button="y" desc="Pretty pears" max="200" rel="box"}
+--
+image :: PandocMonad m => TikiWikiParser m B.Inlines
+image = try $ do
+ string "{img "
+ rawAttrs <- sepEndBy1 imageAttr spaces
+ string "}"
+ let src = fromMaybe "" $ lookup "src" rawAttrs
+ let title = fromMaybe src $ lookup "desc" rawAttrs
+ let alt = fromMaybe title $ lookup "alt" rawAttrs
+ let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs
+ if not (null src)
+ then
+ return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt)
+ else
+ return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END "
+ where
+ printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs
+
+imageAttr :: PandocMonad m => TikiWikiParser m (String, String)
+imageAttr = try $ do
+ key <- many1 (noneOf "=} \t\n")
+ char '='
+ optional $ char '"'
+ value <- many1 (noneOf "}\"\n")
+ optional $ char '"'
+ optional $ char ','
+ return (key, value)
+
+
+-- __strong__
+strong :: PandocMonad m => TikiWikiParser m B.Inlines
+strong = try $ fmap B.strong (enclosed (string "__") nestedInlines)
+
+-- ''emph''
+emph :: PandocMonad m => TikiWikiParser m B.Inlines
+emph = try $ fmap B.emph (enclosed (string "''") nestedInlines)
+
+-- ~246~
+escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines
+escapedChar = try $ do
+ string "~"
+ inner <- many1 $ oneOf "0123456789"
+ string "~"
+ return $B.str [toEnum (read inner :: Int) :: Char]
+
+-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
+-- for this
+centered :: PandocMonad m => TikiWikiParser m B.Inlines
+centered = try $ do
+ string "::"
+ inner <- many1 $ noneOf ":\n"
+ string "::"
+ return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END "
+
+-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
+-- for this
+colored :: PandocMonad m => TikiWikiParser m B.Inlines
+colored = try $ do
+ string "~~"
+ inner <- many1 $ noneOf "~\n"
+ string "~~"
+ return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END "
+
+-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
+-- for this
+underlined :: PandocMonad m => TikiWikiParser m B.Inlines
+underlined = try $ do
+ string "==="
+ inner <- many1 $ noneOf "=\n"
+ string "==="
+ return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END "
+
+-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
+-- for this
+boxed :: PandocMonad m => TikiWikiParser m B.Inlines
+boxed = try $ do
+ string "^"
+ inner <- many1 $ noneOf "^\n"
+ string "^"
+ return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END "
+
+-- --text--
+strikeout :: PandocMonad m => TikiWikiParser m B.Inlines
+strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines)
+
+nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String
+nestedString end = innerSpace <|> count 1 nonspaceChar
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+breakChars :: PandocMonad m => TikiWikiParser m B.Inlines
+breakChars = try $ string "%%%" >> return B.linebreak
+
+-- superscript: foo{TAG(tag=>sup)}super{TAG}foo / bar{SUP()}super2{SUP}bar
+superTag :: PandocMonad m => TikiWikiParser m B.Inlines
+superTag = try $ fmap (B.superscript . B.text . fromEntities) ( between (string "{TAG(tag=>sup)}") (string "{TAG}") nestedString)
+
+superMacro :: PandocMonad m => TikiWikiParser m B.Inlines
+superMacro = try $ do
+ string "{SUP("
+ manyTill anyChar (string ")}")
+ body <- manyTill anyChar (string "{SUP}")
+ return $ B.superscript $ B.text body
+
+-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux
+subTag :: PandocMonad m => TikiWikiParser m B.Inlines
+subTag = try $ fmap (B.subscript . B.text . fromEntities) ( between (string "{TAG(tag=>sub)}") (string "{TAG}") nestedString)
+
+subMacro :: PandocMonad m => TikiWikiParser m B.Inlines
+subMacro = try $ do
+ string "{SUB("
+ manyTill anyChar (string ")}")
+ body <- manyTill anyChar (string "{SUB}")
+ return $ B.subscript $ B.text body
+
+-- -+text+-
+code :: PandocMonad m => TikiWikiParser m B.Inlines
+code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString)
+
+macroAttr :: PandocMonad m => TikiWikiParser m (String, String)
+macroAttr = try $ do
+ key <- many1 (noneOf "=)")
+ char '='
+ optional $ char '"'
+ value <- many1 (noneOf " )\"")
+ optional $ char '"'
+ return (key, value)
+
+macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)]
+macroAttrs = try $ sepEndBy macroAttr spaces
+
+-- ~np~ __not bold__ ~/np~
+noparse :: PandocMonad m => TikiWikiParser m B.Inlines
+noparse = try $ do
+ string "~np~"
+ body <- manyTill anyChar (string "~/np~")
+ return $ B.str body
+
+str :: PandocMonad m => TikiWikiParser m B.Inlines
+str = fmap B.str (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => TikiWikiParser m B.Inlines
+symbol = fmap B.str (count 1 nonspaceChar)
+
+-- [[not a link]
+notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines
+notExternalLink = try $ do
+ start <- string "[["
+ body <- many (noneOf "\n[]")
+ end <- string "]"
+ return $ B.text (start ++ body ++ end)
+
+-- [http://www.somesite.org url|Some Site title]
+-- ((internal link))
+--
+-- The ((...)) wiki links and [...] external links are handled
+-- exactly the same; this abstracts that out
+makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines
+makeLink start middle end = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, anchor) <- wikiLinkText start middle end
+ parsedTitle <- parseFromString (many1 inline) title
+ setState $ st{ stateAllowLinks = True }
+ return $ B.link (url++anchor) "" $mconcat parsedTitle
+
+wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String)
+wikiLinkText start middle end = do
+ string start
+ url <- many1 (noneOf $ middle ++ "\n")
+ seg1 <- option url linkContent
+ seg2 <- option "" linkContent
+ string end
+ if seg2 /= ""
+ then
+ return (url, seg2, seg1)
+ else
+ return (url, seg1, "")
+ where
+ linkContent = do
+ char '|'
+ many (noneOf middle)
+
+externalLink :: PandocMonad m => TikiWikiParser m B.Inlines
+externalLink = makeLink "[" "]|" "]"
+
+-- NB: this wiki linking is unlikely to work for anyone besides me
+-- (rlpowell); it happens to work for me because my Hakyll code has
+-- post-processing that treats pandoc .md titles as valid link
+-- targets, so something like
+-- [see also this other post](My Other Page) is perfectly valid.
+wikiLink :: PandocMonad m => TikiWikiParser m B.Inlines
+wikiLink = makeLink "((" ")|" "))"
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 0aafc83c7..f4dda7a11 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE ViewPatterns #-}
{-
Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
@@ -29,39 +28,39 @@ Conversion of txt2tags formatted plain text to 'Pandoc' document.
module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
, getT2TMeta
, T2TMeta (..)
- , readTxt2TagsNoMacros)
+ )
where
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
-import Data.Monoid ((<>))
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL)
-import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
+import Control.Monad (guard, void, when)
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad.Reader (Reader, asks, runReader)
import Data.Char (toLower)
-import Data.List (transpose, intersperse, intercalate)
-import Data.Maybe (fromMaybe)
---import Network.URI (isURI) -- Not sure whether to use this function
-import Control.Monad (void, guard, when)
import Data.Default
-import Control.Monad.Reader (Reader, runReader, asks)
-import Text.Pandoc.Error
-
-import Data.Time.LocalTime (getZonedTime)
-import System.Directory(getModificationTime)
+import Data.List (intercalate, transpose)
+import Data.Maybe (fromMaybe)
+import Data.Monoid ((<>))
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Time.Format (formatTime)
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Compat.Time (defaultTimeLocale)
-import System.IO.Error (catchIOError)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (space, spaces, uri)
+import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI,
+ underlineSpan)
type T2T = ParserT String ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
data T2TMeta = T2TMeta {
- date :: String -- ^ Current date
- , mtime :: String -- ^ Last modification time of infile
- , infile :: FilePath -- ^ Input file
+ date :: String -- ^ Current date
+ , mtime :: String -- ^ Last modification time of infile
+ , infile :: FilePath -- ^ Input file
, outfile :: FilePath -- ^ Output file
} deriving Show
@@ -69,26 +68,38 @@ instance Default T2TMeta where
def = T2TMeta "" "" "" ""
-- | Get the meta information required by Txt2Tags macros
-getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta
-getT2TMeta inps out = do
- curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime
+getT2TMeta :: PandocMonad m => m T2TMeta
+getT2TMeta = do
+ inps <- P.getInputFiles
+ outp <- fromMaybe "" <$> P.getOutputFile
+ curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
let getModTime = fmap (formatTime defaultTimeLocale "%T") .
- getModificationTime
+ P.getModificationTime
curMtime <- case inps of
- [] -> formatTime defaultTimeLocale "%T" <$> getZonedTime
- _ -> catchIOError
+ [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+ _ -> catchError
(maximum <$> mapM getModTime inps)
(const (return ""))
- return $ T2TMeta curDate curMtime (intercalate ", " inps) out
+ return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc
-readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+readTxt2Tags :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readTxt2Tags opts s = do
+ meta <- getT2TMeta
+ let parsed = flip runReader meta $
+ readWithM parseT2T (def {stateOptions = opts}) $
+ T.unpack (crFilter s) ++ "\n\n"
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
-readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc
-readTxt2TagsNoMacros = readTxt2Tags def
+-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+-- readTxt2TagsNoMacros = readTxt2Tags
parseT2T :: T2T Pandoc
parseT2T = do
@@ -137,7 +148,7 @@ setting = do
string "%!"
keyword <- ignoreSpacesCap (many1 alphaNum)
char ':'
- value <- ignoreSpacesCap (manyTill anyChar (newline))
+ value <- ignoreSpacesCap (manyTill anyChar newline)
return (keyword, value)
-- Blocks
@@ -146,7 +157,7 @@ parseBlocks :: T2T Blocks
parseBlocks = mconcat <$> manyTill block eof
block :: T2T Blocks
-block = do
+block =
choice
[ mempty <$ blanklines
, quote
@@ -184,7 +195,7 @@ para = try $ do
listStart = try bulletListStart <|> orderedListStart
commentBlock :: T2T Blocks
-commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
+commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment
-- Seperator and Strong line treated the same
hrule :: T2T Blocks
@@ -198,7 +209,7 @@ quote :: T2T Blocks
quote = try $ do
lookAhead tab
rawQuote <- many1 (tab *> optional spaces *> anyLine)
- contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
return $ B.blockQuote contents
commentLine :: T2T Inlines
@@ -210,16 +221,16 @@ list :: T2T Blocks
list = choice [bulletList, orderedList, definitionList]
bulletList :: T2T Blocks
-bulletList = B.bulletList . compactify'
+bulletList = B.bulletList . compactify
<$> many1 (listItem bulletListStart parseBlocks)
orderedList :: T2T Blocks
-orderedList = B.orderedList . compactify'
+orderedList = B.orderedList . compactify
<$> many1 (listItem orderedListStart parseBlocks)
definitionList :: T2T Blocks
-definitionList = try $ do
- B.definitionList . compactify'DL <$>
+definitionList = try $
+ B.definitionList . compactifyDL <$>
many1 (listItem definitionListStart definitionListEnd)
definitionListEnd :: T2T (Inlines, [Blocks])
@@ -250,7 +261,7 @@ listItem start end = try $ do
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- concat <$> many (listContinuation markerLength)
- parseFromString end $ firstLine ++ blank ++ rest
+ parseFromString' end $ firstLine ++ blank ++ rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
@@ -262,12 +273,6 @@ listContinuation markerLength = try $
<*> many blankline)
where listLine = try $ indentWith markerLength *> anyLineNewline
-anyLineNewline :: T2T String
-anyLineNewline = (++ "\n") <$> anyLine
-
-indentWith :: Int -> T2T String
-indentWith n = count n space
-
-- Table
table :: T2T Blocks
@@ -276,17 +281,17 @@ table = try $ do
rows <- many1 (many commentLine *> tableRow)
let columns = transpose rows
let ncolumns = length columns
- let aligns = map (foldr1 findAlign) (map (map fst) columns)
+ let aligns = map (foldr1 findAlign . map fst) columns
let rows' = map (map snd) rows
let size = maximum (map length rows')
let rowsPadded = map (pad size) rows'
- let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty
+ let headerPadded = if null tableHeader then mempty else pad size tableHeader
return $ B.table mempty
(zip aligns (replicate ncolumns 0.0))
headerPadded rowsPadded
pad :: (Monoid a) => Int -> [a] -> [a]
-pad n xs = xs ++ (replicate (n - length xs) mempty)
+pad n xs = xs ++ replicate (n - length xs) mempty
findAlign :: Alignment -> Alignment -> Alignment
@@ -309,7 +314,7 @@ genericRow start = try $ do
tableCell :: T2T (Alignment, Blocks)
tableCell = try $ do
leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
- content <- (manyTill inline (try $ lookAhead (cellEnd)))
+ content <- manyTill inline (try $ lookAhead cellEnd)
rightSpaces <- length <$> many space
let align =
case compare leftSpaces rightSpaces of
@@ -317,9 +322,9 @@ tableCell = try $ do
EQ -> AlignCenter
GT -> AlignRight
endOfCell
- return $ (align, B.plain (B.trimInlines $ mconcat content))
+ return (align, B.plain (B.trimInlines $ mconcat content))
where
- cellEnd = (void newline <|> (many1 space *> endOfCell))
+ cellEnd = void newline <|> (many1 space *> endOfCell)
endOfCell :: T2T ()
endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
@@ -342,10 +347,10 @@ taggedBlock = do
genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
-blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
-blockMarkupArea p f s = try $ (do
+blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea p f s = try (do
string s *> blankline
- f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
+ f . mconcat <$> manyTill p (eof <|> void (string s *> blankline)))
blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
blockMarkupLine p f s = try (f <$> (string s *> space *> p))
@@ -363,7 +368,7 @@ parseInlines :: T2T Inlines
parseInlines = trimInlines . mconcat <$> many1 inline
inline :: T2T Inlines
-inline = do
+inline =
choice
[ endline
, macro
@@ -385,16 +390,16 @@ inline = do
]
bold :: T2T Inlines
-bold = inlineMarkup inline B.strong '*' (B.str)
+bold = inlineMarkup inline B.strong '*' B.str
underline :: T2T Inlines
-underline = inlineMarkup inline B.emph '_' (B.str)
+underline = inlineMarkup inline underlineSpan '_' B.str
strike :: T2T Inlines
-strike = inlineMarkup inline B.strikeout '-' (B.str)
+strike = inlineMarkup inline B.strikeout '-' B.str
italic :: T2T Inlines
-italic = inlineMarkup inline B.emph '/' (B.str)
+italic = inlineMarkup inline B.emph '/' B.str
code :: T2T Inlines
code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
@@ -413,7 +418,7 @@ tagged = do
-- Glued meaning that markup must be tight to content
-- Markup can't pass newlines
inlineMarkup :: Monoid a
- => (T2T a) -- Content parser
+ => T2T a -- Content parser
-> (a -> Inlines) -- Constructor
-> Char -- Fence
-> (String -> a) -- Special Case to handle ******
@@ -425,20 +430,24 @@ inlineMarkup p f c special = try $ do
when (l == 2) (void $ notFollowedBy space)
-- We must make sure that there is no space before the start of the
-- closing tags
- body <- optionMaybe (try $ manyTill (noneOf "\n\r") $
+ body <- optionMaybe (try $ manyTill (noneOf "\n\r")
(try $ lookAhead (noneOf " " >> string [c,c] )))
case body of
Just middle -> do
lastChar <- anyChar
end <- many1 (char c)
- let parser inp = parseFromString (mconcat <$> many p) inp
- let start' = special (drop 2 start)
+ let parser inp = parseFromString' (mconcat <$> many p) inp
+ let start' = case drop 2 start of
+ "" -> mempty
+ xs -> special xs
body' <- parser (middle ++ [lastChar])
- let end' = special (drop 2 end)
+ let end' = case drop 2 end of
+ "" -> mempty
+ xs -> special xs
return $ f (start' <> body' <> end')
Nothing -> do -- Either bad or case such as *****
guard (l >= 5)
- let body' = (replicate (l - 4) c)
+ let body' = replicate (l - 4) c
return $ f (special body')
link :: T2T Inlines
@@ -453,8 +462,8 @@ titleLink = try $ do
guard (length tokens >= 2)
char ']'
let link' = last tokens
- guard (length link' > 0)
- let tit = concat (intersperse " " (init tokens))
+ guard $ not $ null link'
+ let tit = unwords (init tokens)
return $ B.link link' "" (B.text tit)
-- Link with image
@@ -479,7 +488,7 @@ macro = try $ do
-- raw URLs in text are automatically linked
url :: T2T Inlines
url = try $ do
- (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
+ (rawUrl, escapedUrl) <- try uri <|> emailAddress
return $ B.link rawUrl "" (B.str escapedUrl)
uri :: T2T (String, String)
@@ -520,8 +529,7 @@ image = try $ do
-- List taken from txt2tags source
let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
char '['
- path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
- ext <- oneOfStrings extensions
+ (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions)
char ']'
return $ B.image (path ++ ext) "" mempty
@@ -550,11 +558,10 @@ endline = try $ do
notFollowedBy quote
notFollowedBy list
notFollowedBy table
- return $ B.softbreak
+ return B.softbreak
str :: T2T Inlines
-str = try $ do
- B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
whitespace :: T2T Inlines
whitespace = try $ B.space <$ spaceChar
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
new file mode 100644
index 000000000..d717a1ba8
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -0,0 +1,673 @@
+{-
+ Copyright (C) 2017-2018 Yuchen Pei <me@ypei.me>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Vimwiki
+ Copyright : Copyright (C) 2017-2018 Yuchen Pei
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Yuchen Pei <me@ypei.me>
+ Stability : alpha
+ Portability : portable
+
+Conversion of vimwiki text to 'Pandoc' document.
+-}
+{--
+[X]: implemented
+[O]: not implemented
+* block parsers:
+ * [X] header
+ * [X] hrule
+ * [X] comment
+ * [X] blockquote
+ * [X] preformatted -- using codeblock
+ * [X] displaymath
+ * [X] bulletlist / orderedlist
+ * [X] todo lists -- using span.
+ * [X] table
+ * [X] centered table -- using div
+ * [O] colspan and rowspan -- see issue #1024
+ * [X] paragraph
+ * [X] definition list
+* inline parsers:
+ * [X] bareURL
+ * [X] strong
+ * [X] emph
+ * [X] strikeout
+ * [X] code
+ * [X] link
+ * [X] image
+ * [X] inline math
+ * [X] tag
+ * [X] sub- and super-scripts
+* misc:
+ * [X] `TODO:` mark
+ * [X] metadata placeholders: %title and %date
+ * [O] control placeholders: %template and %nohtml -- ignored
+--}
+
+module Text.Pandoc.Readers.Vimwiki ( readVimwiki
+ ) where
+import Control.Monad (guard)
+import Control.Monad.Except (throwError)
+import Data.Default
+import Data.List (isInfixOf, isPrefixOf)
+import Data.Maybe
+import Data.Monoid ((<>))
+import Data.Text (Text, unpack)
+import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines)
+import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code,
+ codeBlockWith, definitionList,
+ displayMath, divWith, emph,
+ headerWith, horizontalRule, image,
+ imageWith, link, math, orderedList,
+ para, plain, setMeta, simpleTable,
+ softbreak, space, spanWith, str,
+ strikeout, strong, subscript,
+ superscript)
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition (Attr, Block (BulletList, OrderedList),
+ Inline (Space), ListNumberDelim (..),
+ ListNumberStyle (..), Meta, Pandoc (..),
+ nullMeta)
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Parsing (F, ParserState, ParserT, blanklines, emailAddress,
+ many1Till, orderedListMarker, readWithM,
+ registerHeader, runF, spaceChar, stateMeta',
+ stateOptions, uri)
+import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast)
+import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
+ spaces, string)
+import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1,
+ manyTill, notFollowedBy, option, skipMany1)
+import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
+
+readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki opts s = do
+ res <- readWithM parseVimwiki def{ stateOptions = opts }
+ (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right result -> return result
+
+type VwParser = ParserT [Char] ParserState
+
+
+-- constants
+
+specialChars :: [Char]
+specialChars = "=*-#[]_~{}`$|:%^,"
+
+spaceChars :: [Char]
+spaceChars = " \t\n"
+
+-- main parser
+
+parseVimwiki :: PandocMonad m => VwParser m Pandoc
+parseVimwiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ st <- getState
+ let meta = runF (stateMeta' st) st
+ return $ Pandoc meta (toList bs)
+
+-- block parser
+
+block :: PandocMonad m => VwParser m Blocks
+block = do
+ res <- choice [ mempty <$ blanklines
+ , header
+ , hrule
+ , mempty <$ comment
+ , mixedList
+ , preformatted
+ , displayMath
+ , table
+ , mempty <$ placeholder
+ , blockQuote
+ , definitionList
+ , para
+ ]
+ trace (take 60 $ show $ toList res)
+ return res
+
+blockML :: PandocMonad m => VwParser m Blocks
+blockML = choice [preformatted, displayMath, table]
+
+header :: PandocMonad m => VwParser m Blocks
+header = try $ do
+ sp <- many spaceChar
+ eqs <- many1 (char '=')
+ spaceChar
+ let lev = length eqs
+ guard $ lev <= 6
+ contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar
+ >> string eqs >> many spaceChar >> newline)
+ attr <- registerHeader (makeId contents,
+ if sp == "" then [] else ["justcenter"], []) contents
+ return $ B.headerWith attr lev contents
+
+para :: PandocMonad m => VwParser m Blocks
+para = try $ do
+ contents <- trimInlines . mconcat <$> many1 inline
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.para contents
+
+hrule :: PandocMonad m => VwParser m Blocks
+hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline)
+
+comment :: PandocMonad m => VwParser m ()
+comment = try $ do
+ many spaceChar >> string "%%" >> many (noneOf "\n")
+ return ()
+
+blockQuote :: PandocMonad m => VwParser m Blocks
+blockQuote = try $ do
+ string " "
+ contents <- trimInlines . mconcat <$> many1 inlineBQ
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.blockQuote $ B.plain contents
+
+definitionList :: PandocMonad m => VwParser m Blocks
+definitionList = try $
+ B.definitionList <$> many1 (dlItemWithDT <|> dlItemWithoutDT)
+
+dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithDT = do
+ dt <- definitionTerm
+ dds <- many definitionDef
+ return (dt, dds)
+
+dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithoutDT = do
+ dds <- many1 definitionDef
+ return (mempty, dds)
+
+definitionDef :: PandocMonad m => VwParser m Blocks
+definitionDef = try $
+ notFollowedBy definitionTerm >> many spaceChar
+ >> (definitionDef1 <|> definitionDef2)
+
+definitionDef1 :: PandocMonad m => VwParser m Blocks
+definitionDef1 = try $ mempty <$ defMarkerE
+
+definitionDef2 :: PandocMonad m => VwParser m Blocks
+definitionDef2 = try $ B.plain <$>
+ (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline)
+
+
+definitionTerm :: PandocMonad m => VwParser m Inlines
+definitionTerm = try $ do
+ x <- definitionTerm1 <|> definitionTerm2
+ guard (stringify x /= "")
+ return x
+
+definitionTerm1 :: PandocMonad m => VwParser m Inlines
+definitionTerm1 = try $
+ trimInlines . mconcat <$> manyTill inline' (try defMarkerE)
+
+definitionTerm2 :: PandocMonad m => VwParser m Inlines
+definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline'
+ (try $lookAhead (defMarkerM >> notFollowedBy hasDefMarkerM))
+
+defMarkerM :: PandocMonad m => VwParser m Char
+defMarkerM = string "::" >> spaceChar
+
+defMarkerE :: PandocMonad m => VwParser m Char
+defMarkerE = string "::" >> newline
+
+hasDefMarkerM :: PandocMonad m => VwParser m String
+hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
+
+preformatted :: PandocMonad m => VwParser m Blocks
+preformatted = try $ do
+ many spaceChar >> string "{{{"
+ attrText <- many (noneOf "\n")
+ lookAhead newline
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
+ >> many spaceChar >> newline))
+ if (contents /= "") && (head contents == '\n')
+ then return $ B.codeBlockWith (makeAttr attrText) (tail contents)
+ else return $ B.codeBlockWith (makeAttr attrText) contents
+
+makeAttr :: String -> Attr
+makeAttr s =
+ let xs = splitBy (`elem` " \t") s in
+ ("", [], mapMaybe nameValue xs)
+
+nameValue :: String -> Maybe (String, String)
+nameValue s =
+ let t = splitBy (== '=') s in
+ if length t /= 2
+ then Nothing
+ else let (a, b) = (head t, last t) in
+ if (length b < 2) || ((head b, last b) /= ('"', '"'))
+ then Nothing
+ else Just (a, stripFirstAndLast b)
+
+
+displayMath :: PandocMonad m => VwParser m Blocks
+displayMath = try $ do
+ many spaceChar >> string "{{$"
+ mathTag <- option "" mathTagParser
+ many space
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$"
+ >> many spaceChar >> newline))
+ let contentsWithTags
+ | mathTag == "" = contents
+ | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents
+ ++ "\n\\end{" ++ mathTag ++ "}"
+ return $ B.para $ B.displayMath contentsWithTags
+
+
+mathTagLaTeX :: String -> String
+mathTagLaTeX s = case s of
+ "equation" -> ""
+ "equation*" -> ""
+ "gather" -> "gathered"
+ "gather*" -> "gathered"
+ "multline" -> "gathered"
+ "multline*" -> "gathered"
+ "eqnarray" -> "aligned"
+ "eqnarray*" -> "aligned"
+ "align" -> "aligned"
+ "align*" -> "aligned"
+ "alignat" -> "aligned"
+ "alignat*" -> "aligned"
+ _ -> s
+
+
+mixedList :: PandocMonad m => VwParser m Blocks
+mixedList = try $ do
+ (bl, _) <- mixedList' (-1)
+ return $ head bl
+
+mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int)
+mixedList' prevInd = do
+ (curInd, builder) <- option (-1, "na") (lookAhead listStart)
+ if curInd < prevInd
+ then return ([], curInd)
+ else do
+ listStart
+ curLine <- listItemContent
+ let listBuilder =
+ if builder == "ul" then B.bulletList else B.orderedList
+ (subList, lowInd) <- mixedList' curInd
+ if lowInd >= curInd
+ then do
+ (sameIndList, endInd) <- mixedList' lowInd
+ let curList = combineList curLine subList ++ sameIndList
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+ else do
+ let (curList, endInd) = (combineList curLine subList,
+ lowInd)
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+
+plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks
+plainInlineML' w = do
+ xs <- many inlineML
+ newline
+ return $ B.plain $ trimInlines $ mconcat $ w:xs
+
+plainInlineML :: PandocMonad m => VwParser m Blocks
+plainInlineML = notFollowedBy listStart >> spaceChar >> plainInlineML' mempty
+
+
+listItemContent :: PandocMonad m => VwParser m Blocks
+listItemContent = try $ do
+ w <- option mempty listTodoMarker
+ x <- plainInlineML' w
+ y <- many blocksThenInline
+ z <- many blockML
+ return $ mconcat $ x:y ++ z
+
+blocksThenInline :: PandocMonad m => VwParser m Blocks
+blocksThenInline = try $ do
+ y <- many1 blockML
+ x <- plainInlineML
+ return $ mconcat $ y ++ [x]
+
+listTodoMarker :: PandocMonad m => VwParser m Inlines
+listTodoMarker = try $ do
+ x <- between (many spaceChar >> char '[') (char ']' >> spaceChar)
+ (oneOf " .oOX")
+ return $ makeListMarkerSpan x
+
+makeListMarkerSpan :: Char -> Inlines
+makeListMarkerSpan x =
+ let cl = case x of
+ ' ' -> "done0"
+ '.' -> "done1"
+ 'o' -> "done2"
+ 'O' -> "done3"
+ 'X' -> "done4"
+ _ -> ""
+ in
+ B.spanWith ("", [cl], []) mempty
+
+combineList :: Blocks -> [Blocks] -> [Blocks]
+combineList x [y] = case toList y of
+ [BulletList z] -> [fromList $ toList x
+ ++ [BulletList z]]
+ [OrderedList attr z] -> [fromList $ toList x
+ ++ [OrderedList attr z]]
+ _ -> x:[y]
+combineList x xs = x:xs
+
+listStart :: PandocMonad m => VwParser m (Int, String)
+listStart = try $ do
+ s <- many spaceChar
+ listType <- bulletListMarkers <|> orderedListMarkers
+ spaceChar
+ return (length s, listType)
+
+bulletListMarkers :: PandocMonad m => VwParser m String
+bulletListMarkers = "ul" <$ (char '*' <|> char '-')
+
+orderedListMarkers :: PandocMonad m => VwParser m String
+orderedListMarkers =
+ ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
+ <|> ("ol" <$ char '#')
+
+--many need trimInlines
+table :: PandocMonad m => VwParser m Blocks
+table = try $ do
+ indent <- lookAhead (many spaceChar)
+ (th, trs) <- table1 <|> table2
+ let tab = B.simpleTable th trs
+ if indent == ""
+ then return tab
+ else return $ B.divWith ("", ["center"], []) tab
+
+-- table with header
+table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table1 = try $ do
+ th <- tableRow
+ many1 tableHeaderSeparator
+ trs <- many tableRow
+ return (th, trs)
+
+-- headerless table
+table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table2 = try $ do
+ trs <- many1 tableRow
+ return (replicate (length $ head trs) mempty, trs)
+
+tableHeaderSeparator :: PandocMonad m => VwParser m ()
+tableHeaderSeparator = try $ do
+ many spaceChar >> char '|' >> many1 (many1 (char '-') >> char '|')
+ >> many spaceChar >> newline
+ return ()
+
+tableRow :: PandocMonad m => VwParser m [Blocks]
+tableRow = try $ do
+ many spaceChar >> char '|'
+ s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
+ >> newline))
+ guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
+ tr <- many tableCell
+ many spaceChar >> char '\n'
+ return tr
+
+tableCell :: PandocMonad m => VwParser m Blocks
+tableCell = try $
+ B.plain . trimInlines . mconcat <$> manyTill inline' (char '|')
+
+placeholder :: PandocMonad m => VwParser m ()
+placeholder = try $
+ choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh
+
+ph :: PandocMonad m => String -> VwParser m ()
+ph s = try $ do
+ many spaceChar >>string ('%':s) >> spaceChar
+ contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline)
+ --use lookAhead because of placeholder in the whitespace parser
+ let meta' = return $ B.setMeta s contents nullMeta :: F Meta
+ updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' }
+
+noHtmlPh :: PandocMonad m => VwParser m ()
+noHtmlPh = try $
+ () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
+ >> lookAhead newline)
+
+templatePh :: PandocMonad m => VwParser m ()
+templatePh = try $
+ () <$ (many spaceChar >> string "%template" >>many (noneOf "\n")
+ >> lookAhead newline)
+
+-- inline parser
+
+inline :: PandocMonad m => VwParser m Inlines
+inline = choice $ whitespace endlineP:inlineList
+
+inlineList :: PandocMonad m => [VwParser m Inlines]
+inlineList = [ bareURL
+ , todoMark
+ , str
+ , strong
+ , emph
+ , strikeout
+ , code
+ , link
+ , image
+ , inlineMath
+ , tag
+ , superscript
+ , subscript
+ , special
+ ]
+
+-- inline parser without softbreaks or comment breaks
+inline' :: PandocMonad m => VwParser m Inlines
+inline' = choice $ whitespace':inlineList
+
+-- inline parser for blockquotes
+inlineBQ :: PandocMonad m => VwParser m Inlines
+inlineBQ = choice $ whitespace endlineBQ:inlineList
+
+-- inline parser for mixedlists
+inlineML :: PandocMonad m => VwParser m Inlines
+inlineML = choice $ whitespace endlineML:inlineList
+
+str :: PandocMonad m => VwParser m Inlines
+str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars)
+
+whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
+whitespace endline = B.space <$ (skipMany1 spaceChar <|>
+ try (newline >> (comment <|> placeholder)))
+ <|> B.softbreak <$ endline
+
+whitespace' :: PandocMonad m => VwParser m Inlines
+whitespace' = B.space <$ skipMany1 spaceChar
+
+special :: PandocMonad m => VwParser m Inlines
+special = B.str <$> count 1 (oneOf specialChars)
+
+bareURL :: PandocMonad m => VwParser m Inlines
+bareURL = try $ do
+ (orig, src) <- uri <|> emailAddress
+ return $ B.link src "" (B.str orig)
+
+strong :: PandocMonad m => VwParser m Inlines
+strong = try $ do
+ s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*")
+ guard $ (head s `notElem` spaceChars)
+ && (last s `notElem` spaceChars)
+ char '*'
+ contents <- mconcat <$>manyTill inline' (char '*'
+ >> notFollowedBy alphaNum)
+ return $ B.spanWith (makeId contents, [], []) mempty
+ <> B.strong contents
+
+makeId :: Inlines -> String
+makeId i = concat (stringify <$> toList i)
+
+emph :: PandocMonad m => VwParser m Inlines
+emph = try $ do
+ s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_")
+ guard $ (head s `notElem` spaceChars)
+ && (last s `notElem` spaceChars)
+ char '_'
+ contents <- mconcat <$>manyTill inline' (char '_'
+ >> notFollowedBy alphaNum)
+ return $ B.emph contents
+
+strikeout :: PandocMonad m => VwParser m Inlines
+strikeout = try $ do
+ string "~~"
+ contents <- mconcat <$>many1Till inline' (string "~~")
+ return $ B.strikeout contents
+
+code :: PandocMonad m => VwParser m Inlines
+code = try $ do
+ char '`'
+ contents <- many1Till (noneOf "\n") (char '`')
+ return $ B.code contents
+
+superscript :: PandocMonad m => VwParser m Inlines
+superscript = try $
+ B.superscript . mconcat <$> (char '^' >> many1Till inline' (char '^'))
+
+subscript :: PandocMonad m => VwParser m Inlines
+subscript = try $
+ B.subscript . mconcat <$> (string ",,"
+ >> many1Till inline' (try $ string ",,"))
+
+link :: PandocMonad m => VwParser m Inlines
+link = try $ do
+ string "[["
+ contents <- lookAhead $ manyTill anyChar (string "]]")
+ case '|' `elem` contents of
+ False -> do
+ manyTill anyChar (string "]]")
+-- not using try here because [[hell]o]] is not rendered as a link in vimwiki
+ return $ B.link (procLink contents) "" (B.str contents)
+ True -> do
+ url <- manyTill anyChar $ char '|'
+ lab <- mconcat <$> manyTill inline (string "]]")
+ return $ B.link (procLink url) "" lab
+
+image :: PandocMonad m => VwParser m Inlines
+image = try $ do
+ string "{{"
+ contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}")
+ images $ length $ filter (== '|') contentText
+
+images :: PandocMonad m => Int -> VwParser m Inlines
+images k
+ | k == 0 = do
+ imgurl <- manyTill anyChar (try $ string "}}")
+ return $ B.image (procImgurl imgurl) "" (B.str "")
+ | k == 1 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> manyTill inline (try $ string "}}")
+ return $ B.image (procImgurl imgurl) "" alt
+ | k == 2 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$>manyTill inline (char '|')
+ attrText <- manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+ | otherwise = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$>manyTill inline (char '|')
+ attrText <- manyTill anyChar (char '|')
+ manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+
+procLink' :: String -> String
+procLink' s
+ | take 6 s == "local:" = "file" ++ drop 5 s
+ | take 6 s == "diary:" = "diary/" ++ drop 6 s ++ ".html"
+ | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+ "news:", "telnet:" ])
+ = s
+ | s == "" = ""
+ | last s == '/' = s
+ | otherwise = s ++ ".html"
+
+procLink :: String -> String
+procLink s = procLink' x ++ y
+ where (x, y) = break (=='#') s
+
+procImgurl :: String -> String
+procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
+
+inlineMath :: PandocMonad m => VwParser m Inlines
+inlineMath = try $ do
+ char '$'
+ contents <- many1Till (noneOf "\n") (char '$')
+ return $ B.math contents
+
+tag :: PandocMonad m => VwParser m Inlines
+tag = try $ do
+ char ':'
+ s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space))
+ guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
+ let ss = splitBy (==':') s
+ return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss)
+
+todoMark :: PandocMonad m => VwParser m Inlines
+todoMark = try $ do
+ string "TODO:"
+ return $ B.spanWith ("", ["todo"], []) (B.str "TODO:")
+
+-- helper functions and parsers
+endlineP :: PandocMonad m => VwParser m ()
+endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote)
+
+endlineBQ :: PandocMonad m => VwParser m ()
+endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ")
+
+endlineML :: PandocMonad m => VwParser m ()
+endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar)
+
+--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks
+nFBTTBSB :: PandocMonad m => VwParser m ()
+nFBTTBSB =
+ notFollowedBy newline <*
+ notFollowedBy hrule <*
+ notFollowedBy tableRow <*
+ notFollowedBy header <*
+ notFollowedBy listStart <*
+ notFollowedBy preformatted <*
+ notFollowedBy displayMath <*
+ notFollowedBy hasDefMarker
+
+hasDefMarker :: PandocMonad m => VwParser m ()
+hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)
+
+makeTagSpan' :: String -> Inlines
+makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
+ B.spanWith (s, ["tag"], []) (B.str s)
+
+makeTagSpan :: String -> Inlines
+makeTagSpan s = B.space <> makeTagSpan' s
+
+mathTagParser :: PandocMonad m => VwParser m String
+mathTagParser = do
+ s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars)
+ (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))
+ char '%' >> string s >> char '%'
+ return $ mathTagLaTeX s
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index d08d636df..a1c5c919e 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.SelfContained
- Copyright : Copyright (C) 2011-2016 John MacFarlane
+ Copyright : Copyright (C) 2011-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,32 +30,36 @@ Functions for converting an HTML file into one that can be viewed
offline, by incorporating linked images, CSS, and scripts into
the HTML using data URIs.
-}
-module Text.Pandoc.SelfContained ( makeSelfContained ) where
-import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString, URI(..), parseURI)
+module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where
+import Codec.Compression.GZip as Gzip
+import Control.Applicative ((<|>))
+import Control.Monad.Except (throwError)
+import Control.Monad.Trans (lift)
+import Data.ByteString (ByteString)
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
-import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, takeDirectory, (</>))
-import Data.Char (toLower, isAscii, isAlphaNum)
-import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim)
-import Text.Pandoc.MediaBag (MediaBag)
+import Data.Char (isAlphaNum, isAscii, toLower)
+import Data.List (isPrefixOf)
+import Data.Monoid ((<>))
+import Network.URI (escapeURIString)
+import System.FilePath (takeDirectory, takeExtension, (</>))
+import Text.HTML.TagSoup
+import Text.Pandoc.Class (PandocMonad (..), fetchItem, getInputFiles, report,
+ setInputFiles)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
import Text.Pandoc.MIME (MimeType)
+import Text.Pandoc.Shared (isURI, renderTags', trim)
import Text.Pandoc.UTF8 (toString)
-import Text.Pandoc.Options (WriterOptions(..))
-import Data.List (isPrefixOf)
-import Control.Applicative ((<|>))
-import Text.Parsec (runParserT, ParsecT)
+import Text.Parsec (ParsecT, runParserT)
import qualified Text.Parsec as P
-import Control.Monad.Trans (lift)
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
-makeDataURI :: String -> ByteString -> String
-makeDataURI mime raw =
+makeDataURI :: (MimeType, ByteString) -> String
+makeDataURI (mime, raw) =
if textual
then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
@@ -64,64 +68,143 @@ makeDataURI mime raw =
then mime ++ ";charset=utf-8"
else mime -- mime type already has charset
-convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
-convertTag media sourceURL t@(TagOpen tagname as)
+convertTags :: PandocMonad m => [Tag String] -> m [Tag String]
+convertTags [] = return []
+convertTags (t@TagOpen{}:ts)
+ | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts
+convertTags (t@(TagOpen tagname as):ts)
| tagname `elem`
- ["img", "embed", "video", "input", "audio", "source", "track"] = do
+ ["img", "embed", "video", "input", "audio", "source", "track",
+ "section"] = do
as' <- mapM processAttribute as
- return $ TagOpen tagname as'
+ rest <- convertTags ts
+ return $ TagOpen tagname as' : rest
where processAttribute (x,y) =
- if x == "src" || x == "href" || x == "poster"
+ if x `elem` ["src", "data-src", "href", "poster", "data-background-image"]
then do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) y
+ enc <- getDataURI (fromAttrib "type" t) y
return (x, enc)
else return (x,y)
-convertTag media sourceURL t@(TagOpen "script" as) =
+convertTags (t@(TagOpen "script" as):TagClose "script":ts) =
case fromAttrib "src" t of
- [] -> return t
+ [] -> (t:) <$> convertTags ts
src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media sourceURL t@(TagOpen "link" as) =
+ let typeAttr = fromAttrib "type" t
+ res <- getData typeAttr src
+ rest <- convertTags ts
+ case res of
+ Left dataUri -> return $ TagOpen "script"
+ (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) :
+ TagClose "script" : rest
+ Right (mime, bs)
+ | ("text/javascript" `isPrefixOf` mime ||
+ "application/javascript" `isPrefixOf` mime ||
+ "application/x-javascript" `isPrefixOf` mime) &&
+ not ("</script" `B.isInfixOf` bs) ->
+ return $
+ TagOpen "script" [("type", typeAttr)|not (null typeAttr)]
+ : TagText (toString bs)
+ : TagClose "script"
+ : rest
+ | otherwise ->
+ return $ TagOpen "script"
+ (("src",makeDataURI (mime, bs)) :
+ [(x,y) | (x,y) <- as, x /= "src"]) :
+ TagClose "script" : rest
+convertTags (t@(TagOpen "link" as):ts) =
case fromAttrib "href" t of
- [] -> return t
+ [] -> (t:) <$> convertTags ts
src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ _ t = return t
-
-cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
- -> IO ByteString
-cssURLs media sourceURL d orig = do
- res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
+ res <- getData (fromAttrib "type" t) src
+ case res of
+ Left dataUri -> do
+ rest <- convertTags ts
+ return $ TagOpen "link"
+ (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) :
+ rest
+ Right (mime, bs)
+ | "text/css" `isPrefixOf` mime
+ && null (fromAttrib "media" t)
+ && not ("</" `B.isInfixOf` bs) -> do
+ rest <- convertTags $
+ dropWhile (==TagClose "link") ts
+ return $
+ TagOpen "style" [("type", mime)]
+ : TagText (toString bs)
+ : TagClose "style"
+ : rest
+ | otherwise -> do
+ rest <- convertTags ts
+ return $ TagOpen "link"
+ (("href",makeDataURI (mime, bs)) :
+ [(x,y) | (x,y) <- as, x /= "href"]) : rest
+convertTags (t:ts) = (t:) <$> convertTags ts
+
+cssURLs :: PandocMonad m
+ => FilePath -> ByteString -> m ByteString
+cssURLs d orig = do
+ res <- runParserT (parseCSSUrls d) () "css" orig
case res of
- Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
+ Left e -> do
+ report $ CouldNotParseCSS (show e)
+ return orig
Right bs -> return bs
-parseCSSUrls :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-parseCSSUrls media sourceURL d = B.concat <$> P.many
- (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
+parseCSSUrls :: PandocMonad m
+ => FilePath -> ParsecT ByteString () m ByteString
+parseCSSUrls d = B.concat <$> P.many
+ (pCSSWhite <|> pCSSComment <|> pCSSImport d <|> pCSSUrl d <|> pCSSOther)
+
+pCSSImport :: PandocMonad m
+ => FilePath -> ParsecT ByteString () m ByteString
+pCSSImport d = P.try $ do
+ P.string "@import"
+ P.spaces
+ res <- (pQuoted <|> pUrl) >>= handleCSSUrl d
+ P.spaces
+ P.char ';'
+ P.spaces
+ case res of
+ Left b -> return $ B.pack "@import " <> b
+ Right (_, b) -> return b
-- Note: some whitespace in CSS is significant, so we can't collapse it!
-pCSSWhite :: ParsecT ByteString () IO ByteString
+pCSSWhite :: PandocMonad m => ParsecT ByteString () m ByteString
pCSSWhite = B.singleton <$> P.space <* P.spaces
-pCSSComment :: ParsecT ByteString () IO ByteString
+pCSSComment :: PandocMonad m => ParsecT ByteString () m ByteString
pCSSComment = P.try $ do
P.string "/*"
P.manyTill P.anyChar (P.try (P.string "*/"))
return B.empty
-pCSSOther :: ParsecT ByteString () IO ByteString
-pCSSOther = do
+pCSSOther :: PandocMonad m => ParsecT ByteString () m ByteString
+pCSSOther =
(B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
- (B.singleton <$> P.char 'u') <|>
- (B.singleton <$> P.char '/')
+ (B.singleton <$> P.char 'u') <|>
+ (B.singleton <$> P.char '/')
+
+pCSSUrl :: PandocMonad m
+ => FilePath -> ParsecT ByteString () m ByteString
+pCSSUrl d = P.try $ do
+ res <- pUrl >>= handleCSSUrl d
+ case res of
+ Left b -> return b
+ Right (mt,b) -> do
+ let enc = makeDataURI (mt, b)
+ return (B.pack $ "url(" ++ enc ++ ")")
+
+pQuoted :: PandocMonad m
+ => ParsecT ByteString () m (String, ByteString)
+pQuoted = P.try $ do
+ quote <- P.oneOf "\"'"
+ url <- P.manyTill P.anyChar (P.char quote)
+ let fallback = B.pack ([quote] ++ trim url ++ [quote])
+ return (url, fallback)
-pCSSUrl :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-pCSSUrl media sourceURL d = P.try $ do
+pUrl :: PandocMonad m
+ => ParsecT ByteString () m (String, ByteString)
+pUrl = P.try $ do
P.string "url("
P.spaces
quote <- P.option Nothing (Just <$> P.oneOf "\"'")
@@ -130,49 +213,66 @@ pCSSUrl media sourceURL d = P.try $ do
P.char ')'
let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
maybe "" (:[]) quote ++ ")")
- case trim url of
- '#':_ -> return fallback
- 'd':'a':'t':'a':':':_ -> return fallback
+ return (url, fallback)
+
+handleCSSUrl :: PandocMonad m
+ => FilePath -> (String, ByteString)
+ -> ParsecT ByteString () m
+ (Either ByteString (MimeType, ByteString))
+handleCSSUrl d (url, fallback) =
+ case escapeURIString (/='|') (trim url) of
+ '#':_ -> return $ Left fallback
+ 'd':'a':'t':'a':':':_ -> return $ Left fallback
u -> do let url' = if isURI u then u else d </> u
- enc <- lift $ getDataURI media sourceURL "" url'
- return (B.pack $ "url(" ++ enc ++ ")")
+ res <- lift $ getData "" url'
+ case res of
+ Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")")
+ Right (mt, raw) -> do
+ -- note that the downloaded CSS may
+ -- itself contain url(...).
+ b <- if "text/css" `isPrefixOf` mt
+ then cssURLs d raw
+ else return raw
+ return $ Right (mt, b)
+getDataURI :: PandocMonad m => MimeType -> String -> m String
+getDataURI mimetype src = do
+ res <- getData mimetype src
+ case res of
+ Left uri -> return uri
+ Right x -> return $ makeDataURI x
-getDataURI :: MediaBag -> Maybe String -> MimeType -> String
- -> IO String
-getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri
-getDataURI media sourceURL mimetype src = do
+getData :: PandocMonad m
+ => MimeType -> String
+ -> m (Either String (MimeType, ByteString))
+getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri
+getData mimetype src = do
let ext = map toLower $ takeExtension src
- fetchResult <- fetchItem' media sourceURL src
- (raw, respMime) <- case fetchResult of
- Left msg -> err 67 $ "Could not fetch " ++ src ++
- "\n" ++ show msg
- Right x -> return x
+ (raw, respMime) <- fetchItem src
let raw' = if ext == ".gz"
- then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
- $ [raw]
+ then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw]
else raw
- let mime = case (mimetype, respMime) of
- ("",Nothing) -> error
+ mime <- case (mimetype, respMime) of
+ ("",Nothing) -> throwError $ PandocSomeError
$ "Could not determine mime type for `" ++ src ++ "'"
- (x, Nothing) -> x
- (_, Just x ) -> x
- let cssSourceURL = case parseURI src of
- Just u
- | uriScheme u `elem` ["http:","https:"] ->
- Just $ show u{ uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- _ -> Nothing
- result <- if mime == "text/css"
- then cssURLs media cssSourceURL (takeDirectory src) raw'
+ (x, Nothing) -> return x
+ (_, Just x ) -> return x
+ result <- if "text/css" `isPrefixOf` mime
+ then do
+ oldInputs <- getInputFiles
+ setInputFiles [src]
+ res <- cssURLs (takeDirectory src) raw'
+ setInputFiles oldInputs
+ return res
else return raw'
- return $ makeDataURI mime result
+ return $ Right (mime, result)
+
+
-- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs.
-makeSelfContained :: WriterOptions -> String -> IO String
-makeSelfContained opts inp = do
+makeSelfContained :: PandocMonad m => String -> m String
+makeSelfContained inp = do
let tags = parseTags inp
- out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
+ out' <- convertTags tags
return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index bd2da945e..52e1447db 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,8 +1,12 @@
-{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables, PatternGuards,
- ViewPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Shared
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -49,21 +53,18 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
tabFilter,
+ crFilter,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
orderedListMarkers,
- normalizeSpaces,
extractSpaces,
- normalize,
- normalizeInlines,
- normalizeBlocks,
removeFormatting,
+ deNote,
stringify,
capitalize,
compactify,
- compactify',
- compactify'DL,
+ compactifyDL,
linesToPara,
Element (..),
hierarchicalize,
@@ -71,29 +72,26 @@ module Text.Pandoc.Shared (
inlineListToIdentifier,
isHeaderBlock,
headerShift,
+ stripEmptyParagraphs,
isTightList,
addMetaField,
makeMeta,
+ eastAsianLineBreakFilter,
+ underlineSpan,
-- * TagSoup HTML handling
renderTags',
-- * File handling
inDirectory,
- getDefaultReferenceDocx,
- getDefaultReferenceODT,
- readDataFile,
- readDataFileUTF8,
- fetchItem,
- fetchItem',
- openURL,
collapseFilePath,
filteredFilesFromArchive,
+ -- * URI handling
+ schemes,
+ isURI,
-- * Error handling
- err,
- warn,
mapLeft,
- hush,
-- * for squashing blocks
blocksToInlines,
+ blocksToInlines',
-- * Safe read
safeRead,
-- * Temp directory
@@ -102,74 +100,37 @@ module Text.Pandoc.Shared (
pandocVersion
) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import System.Environment (getProgName)
-import System.Exit (exitWith, ExitCode(..))
-import Data.Char ( toLower, isLower, isUpper, isAlpha,
- isLetter, isDigit, isSpace )
-import Data.List ( find, stripPrefix, intercalate )
-import Data.Maybe (mapMaybe)
-import Data.Version ( showVersion )
+import Codec.Archive.Zip
+import qualified Control.Exception as E
+import Control.Monad (MonadPlus (..), msum, unless)
+import qualified Control.Monad.State.Strict as S
+import qualified Data.ByteString.Lazy as BL
+import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper,
+ toLower)
+import Data.Data (Data, Typeable)
+import Data.List (find, intercalate, intersperse, stripPrefix)
import qualified Data.Map as M
-import Network.URI ( escapeURIString, nonStrictRelativeTo,
- unEscapeString, parseURIReference, isAllowedInURI,
- parseURI, URI(..) )
+import Data.Maybe (mapMaybe)
+import Data.Monoid ((<>))
+import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
import qualified Data.Set as Set
+import qualified Data.Text as T
+import Data.Version (showVersion)
+import Network.URI (URI (uriScheme), escapeURIString, parseURI)
+import Paths_pandoc (version)
import System.Directory
-import System.FilePath (splitDirectories, isPathSeparator)
+import System.FilePath (isPathSeparator, splitDirectories)
import qualified System.FilePath.Posix as Posix
-import Text.Pandoc.MIME (MimeType, getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension)
-import Data.Generics (Typeable, Data)
-import qualified Control.Monad.State as S
-import Control.Monad.Trans (MonadIO (..))
-import qualified Control.Exception as E
-import Control.Monad (msum, unless, MonadPlus(..))
-import Text.Pandoc.Pretty (charWidth)
-import Text.Pandoc.Compat.Time
-import Data.Time.Clock.POSIX
-import System.IO (stderr)
import System.IO.Temp
-import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
- renderOptions)
-import Data.Monoid ((<>))
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as B8
-import Data.ByteString.Base64 (decodeLenient)
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-import qualified Data.Text as T (toUpper, pack, unpack)
-import Data.ByteString.Lazy (toChunks, fromChunks)
-import qualified Data.ByteString.Lazy as BL
-import Paths_pandoc (version)
-
-import Codec.Archive.Zip
-
-#ifdef EMBED_DATA_FILES
-import Text.Pandoc.Data (dataFiles)
-#else
-import Paths_pandoc (getDataFileName)
-#endif
-#ifdef HTTP_CLIENT
-import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
- Request(port,host))
-import Network.HTTP.Client (parseRequest)
-import Network.HTTP.Client (newManager)
-import Network.HTTP.Client.Internal (addProxy)
-import Network.HTTP.Client.TLS (tlsManagerSettings)
-import System.Environment (getEnv)
-import Network.HTTP.Types.Header ( hContentType)
-import Network (withSocketsDo)
-#else
-import Network.URI (parseURI)
-import Network.HTTP (findHeader, rspBody,
- RequestMethod(..), HeaderName(..), mkRequest)
-import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
-#endif
+import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
+ renderTagsOptions)
+import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Compat.Time
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.Pretty (charWidth)
+import Text.Pandoc.Walk
-- | Version number of pandoc library.
pandocVersion :: String
@@ -185,11 +146,11 @@ splitBy _ [] = []
splitBy isSep lst =
let (first, rest) = break isSep lst
rest' = dropWhile isSep rest
- in first:(splitBy isSep rest')
+ in first:splitBy isSep rest'
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
+splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest
where (first, rest) = splitAt x lst
-- | Split string into chunks divided at specified indices.
@@ -197,7 +158,7 @@ splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
let (first, rest) = splitAt' x lst in
- first : (splitStringByIndices (map (\y -> y - x) xs) rest)
+ first : splitStringByIndices (map (\y -> y - x) xs) rest
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ [] = ([],[])
@@ -236,9 +197,9 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch]))
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
- case (lookup x escapeTable) of
- Just str -> str ++ rest
- Nothing -> x:rest
+ case lookup x escapeTable of
+ Just str -> str ++ rest
+ Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
-- | Strip trailing newlines from string.
@@ -260,35 +221,33 @@ trimr = reverse . triml . reverse
-- | Strip leading and trailing characters from string
stripFirstAndLast :: String -> String
stripFirstAndLast str =
- drop 1 $ take ((length str) - 1) str
+ drop 1 $ take (length str - 1) str
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
- a:'-':(toLower b):(camelCaseToHyphenated rest)
-camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+ a:'-':toLower b:camelCaseToHyphenated rest
+camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
-- | Convert number < 4000 to uppercase roman numeral.
toRomanNumeral :: Int -> String
-toRomanNumeral x =
- if x >= 4000 || x < 0
- then "?"
- else case x of
- _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
- _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
- _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
- _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
- _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
- _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
- _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
- _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
- _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
- _ | x == 9 -> "IX"
- _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
- _ | x == 4 -> "IV"
- _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
- _ -> ""
+toRomanNumeral x
+ | x >= 4000 || x < 0 = "?"
+ | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
+ | x >= 900 = "CM" ++ toRomanNumeral (x - 900)
+ | x >= 500 = "D" ++ toRomanNumeral (x - 500)
+ | x >= 400 = "CD" ++ toRomanNumeral (x - 400)
+ | x >= 100 = "C" ++ toRomanNumeral (x - 100)
+ | x >= 90 = "XC" ++ toRomanNumeral (x - 90)
+ | x >= 50 = "L" ++ toRomanNumeral (x - 50)
+ | x >= 40 = "XL" ++ toRomanNumeral (x - 40)
+ | x >= 10 = "X" ++ toRomanNumeral (x - 10)
+ | x == 9 = "IX"
+ | x >= 5 = "V" ++ toRomanNumeral (x - 5)
+ | x == 4 = "IV"
+ | x >= 1 = "I" ++ toRomanNumeral (x - 1)
+ | otherwise = ""
-- | Escape whitespace and some punctuation characters in URI.
escapeURI :: String -> String
@@ -296,26 +255,23 @@ escapeURI = escapeURIString (not . needsEscaping)
where needsEscaping c = isSpace c || c `elem`
['<','>','|','"','{','}','[',']','^', '`']
-
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
+-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int -- ^ Tab stop
- -> String -- ^ Input
- -> String
-tabFilter tabStop =
- let go _ [] = ""
- go _ ('\n':xs) = '\n' : go tabStop xs
- go _ ('\r':'\n':xs) = '\n' : go tabStop xs
- go _ ('\r':xs) = '\n' : go tabStop xs
- go spsToNextStop ('\t':xs) =
- if tabStop == 0
- then '\t' : go tabStop xs
- else replicate spsToNextStop ' ' ++ go tabStop xs
- go 1 (x:xs) =
- x : go tabStop xs
- go spsToNextStop (x:xs) =
- x : go (spsToNextStop - 1) xs
- in go tabStop
+ -> T.Text -- ^ Input
+ -> T.Text
+tabFilter 0 = id
+tabFilter tabStop = T.unlines . map go . T.lines
+ where go s =
+ let (s1, s2) = T.break (== '\t') s
+ in if T.null s2
+ then s1
+ else s1 <> T.replicate
+ (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
+ <> go (T.drop 1 s2)
+
+-- | Strip out DOS line endings.
+crFilter :: T.Text -> T.Text
+crFilter = T.filter (/= '\r')
--
-- Date/time
@@ -329,7 +285,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
where rejectBadYear day = case toGregorian day of
(y, _, _) | y >= 1601 && y <= 9999 -> Just day
- _ -> Nothing
+ _ -> Nothing
parsetimeWith =
#if MIN_VERSION_time(1,5,0)
parseTimeM True defaultTimeLocale
@@ -337,7 +293,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
parseTime defaultTimeLocale
#endif
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
- "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
+ "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
"%Y%m%d", "%Y%m", "%Y"]
--
@@ -366,23 +322,6 @@ orderedListMarkers (start, numstyle, numdelim) =
TwoParens -> "(" ++ str ++ ")"
in map inDelim nums
--- | Normalize a list of inline elements: remove leading and trailing
--- @Space@ elements, collapse double @Space@s into singles, and
--- remove empty Str elements.
-normalizeSpaces :: [Inline] -> [Inline]
-normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
- where cleanup [] = []
- cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
- [] -> []
- (x:xs) -> Space : x : cleanup xs
- cleanup ((Str ""):rest) = cleanup rest
- cleanup (x:rest) = x : cleanup rest
-
-isSpaceOrEmpty :: Inline -> Bool
-isSpaceOrEmpty Space = True
-isSpaceOrEmpty (Str "") = True
-isSpaceOrEmpty _ = False
-
-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element. SoftBreaks count as Spaces for
-- these purposes.
@@ -399,183 +338,43 @@ extractSpaces f is =
_ -> mempty in
(left <> f (B.trimInlines . B.Many $ contents) <> right)
--- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
--- combining adjacent 'Str's and 'Emph's, remove 'Null's and
--- empty elements, etc.
-normalize :: Pandoc -> Pandoc
-normalize (Pandoc (Meta meta) blocks) =
- Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
- where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
- go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
- go (MetaList ms) = MetaList $ map go ms
- go (MetaMap m) = MetaMap $ M.map go m
- go x = x
-
-normalizeBlocks :: [Block] -> [Block]
-normalizeBlocks (Null : xs) = normalizeBlocks xs
-normalizeBlocks (Div attr bs : xs) =
- Div attr (normalizeBlocks bs) : normalizeBlocks xs
-normalizeBlocks (BlockQuote bs : xs) =
- case normalizeBlocks bs of
- [] -> normalizeBlocks xs
- bs' -> BlockQuote bs' : normalizeBlocks xs
-normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
-normalizeBlocks (BulletList items : xs) =
- BulletList (map normalizeBlocks items) : normalizeBlocks xs
-normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
-normalizeBlocks (OrderedList attr items : xs) =
- OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
-normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
-normalizeBlocks (DefinitionList items : xs) =
- DefinitionList (map go items) : normalizeBlocks xs
- where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
-normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
-normalizeBlocks (RawBlock f x : xs) =
- case normalizeBlocks xs of
- (RawBlock f' x' : rest) | f' == f ->
- RawBlock f (x ++ ('\n':x')) : rest
- rest -> RawBlock f x : rest
-normalizeBlocks (Para ils : xs) =
- case normalizeInlines ils of
- [] -> normalizeBlocks xs
- ils' -> Para ils' : normalizeBlocks xs
-normalizeBlocks (Plain ils : xs) =
- case normalizeInlines ils of
- [] -> normalizeBlocks xs
- ils' -> Plain ils' : normalizeBlocks xs
-normalizeBlocks (Header lev attr ils : xs) =
- Header lev attr (normalizeInlines ils) : normalizeBlocks xs
-normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
- Table (normalizeInlines capt) aligns widths
- (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
- : normalizeBlocks xs
-normalizeBlocks (x:xs) = x : normalizeBlocks xs
-normalizeBlocks [] = []
-
-normalizeInlines :: [Inline] -> [Inline]
-normalizeInlines (Str x : ys) =
- case concat (x : map fromStr strs) of
- "" -> rest
- n -> Str n : rest
- where
- (strs, rest) = span isStr $ normalizeInlines ys
- isStr (Str _) = True
- isStr _ = False
- fromStr (Str z) = z
- fromStr _ = error "normalizeInlines - fromStr - not a Str"
-normalizeInlines (Space : SoftBreak : ys) =
- SoftBreak : normalizeInlines ys
-normalizeInlines (Space : ys) =
- if null rest
- then []
- else Space : rest
- where isSp Space = True
- isSp _ = False
- rest = dropWhile isSp $ normalizeInlines ys
-normalizeInlines (Emph xs : zs) =
- case normalizeInlines zs of
- (Emph ys : rest) -> normalizeInlines $
- Emph (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> Emph xs' : rest
-normalizeInlines (Strong xs : zs) =
- case normalizeInlines zs of
- (Strong ys : rest) -> normalizeInlines $
- Strong (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> Strong xs' : rest
-normalizeInlines (Subscript xs : zs) =
- case normalizeInlines zs of
- (Subscript ys : rest) -> normalizeInlines $
- Subscript (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> Subscript xs' : rest
-normalizeInlines (Superscript xs : zs) =
- case normalizeInlines zs of
- (Superscript ys : rest) -> normalizeInlines $
- Superscript (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> Superscript xs' : rest
-normalizeInlines (SmallCaps xs : zs) =
- case normalizeInlines zs of
- (SmallCaps ys : rest) -> normalizeInlines $
- SmallCaps (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> SmallCaps xs' : rest
-normalizeInlines (Strikeout xs : zs) =
- case normalizeInlines zs of
- (Strikeout ys : rest) -> normalizeInlines $
- Strikeout (normalizeInlines $ xs ++ ys) : rest
- rest -> case normalizeInlines xs of
- [] -> rest
- xs' -> Strikeout xs' : rest
-normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
-normalizeInlines (RawInline f xs : zs) =
- case normalizeInlines zs of
- (RawInline f' ys : rest) | f == f' -> normalizeInlines $
- RawInline f (xs ++ ys) : rest
- rest -> RawInline f xs : rest
-normalizeInlines (Code _ "" : ys) = normalizeInlines ys
-normalizeInlines (Code attr xs : zs) =
- case normalizeInlines zs of
- (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
- Code attr (xs ++ ys) : rest
- rest -> Code attr xs : rest
--- allow empty spans, they may carry identifiers etc.
--- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
-normalizeInlines (Span attr xs : zs) =
- case normalizeInlines zs of
- (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
- Span attr (normalizeInlines $ xs ++ ys) : rest
- rest -> Span attr (normalizeInlines xs) : rest
-normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
- normalizeInlines ys
-normalizeInlines (Quoted qt ils : ys) =
- Quoted qt (normalizeInlines ils) : normalizeInlines ys
-normalizeInlines (Link attr ils t : ys) =
- Link attr (normalizeInlines ils) t : normalizeInlines ys
-normalizeInlines (Image attr ils t : ys) =
- Image attr (normalizeInlines ils) t : normalizeInlines ys
-normalizeInlines (Cite cs ils : ys) =
- Cite cs (normalizeInlines ils) : normalizeInlines ys
-normalizeInlines (x : xs) = x : normalizeInlines xs
-normalizeInlines [] = []
-
-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
-removeFormatting = query go . walk deNote
+removeFormatting = query go . walk (deNote . deQuote)
where go :: Inline -> [Inline]
- go (Str xs) = [Str xs]
- go Space = [Space]
- go SoftBreak = [SoftBreak]
- go (Code _ x) = [Str x]
- go (Math _ x) = [Str x]
- go LineBreak = [Space]
- go _ = []
- deNote (Note _) = Str ""
- deNote x = x
+ go (Str xs) = [Str xs]
+ go Space = [Space]
+ go SoftBreak = [SoftBreak]
+ go (Code _ x) = [Str x]
+ go (Math _ x) = [Str x]
+ go LineBreak = [Space]
+ go _ = []
+
+deNote :: Inline -> Inline
+deNote (Note _) = Str ""
+deNote x = x
+
+deQuote :: Inline -> Inline
+deQuote (Quoted SingleQuote xs) =
+ Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
+deQuote (Quoted DoubleQuote xs) =
+ Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
+deQuote x = x
-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
stringify :: Walkable Inline a => a -> String
-stringify = query go . walk deNote
+stringify = query go . walk (deNote . deQuote)
where go :: Inline -> [Char]
- go Space = " "
- go SoftBreak = " "
- go (Str x) = x
- go (Code _ x) = x
- go (Math _ x) = x
+ go Space = " "
+ go SoftBreak = " "
+ go (Str x) = x
+ go (Code _ x) = x
+ go (Math _ x) = x
go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
- go LineBreak = " "
- go _ = ""
- deNote (Note _) = Str ""
- deNote x = x
+ go LineBreak = " "
+ go _ = ""
-- | Bring all regular text in a pandoc structure to uppercase.
--
@@ -589,28 +388,12 @@ capitalize = walk go
go x = x
-- | Change final list item from @Para@ to @Plain@ if the list contains
--- no other @Para@ blocks.
-compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
- -> [[Block]]
-compactify [] = []
-compactify items =
- case (init items, last items) of
- (_,[]) -> items
- (others, final) ->
- case last final of
- Para a -> case (filter isPara $ concat items) of
- -- if this is only Para, change to Plain
- [_] -> others ++ [init final ++ [Plain a]]
- _ -> items
- _ -> items
-
--- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
-- than @[Block]@.
-compactify' :: [Blocks] -- ^ List of list items (each a list of blocks)
+compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
-> [Blocks]
-compactify' [] = []
-compactify' items =
+compactify [] = []
+compactify items =
let (others, final) = (init items, last items)
in case reverse (B.toList final) of
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
@@ -619,9 +402,9 @@ compactify' items =
_ -> items
_ -> items
--- | Like @compactify'@, but acts on items of definition lists.
-compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
-compactify'DL items =
+-- | Like @compactify@, but acts on items of definition lists.
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
let defs = concatMap snd items
in case reverse (concatMap B.toList defs) of
(Para x:xs)
@@ -663,7 +446,7 @@ instance Walkable Inline Element where
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
- query f (Blk x) = query f x
+ query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
instance Walkable Block Element where
@@ -674,7 +457,7 @@ instance Walkable Block Element where
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
- query f (Blk x) = query f x
+ query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
@@ -687,8 +470,8 @@ inlineListToIdentifier =
map (nbspToSp . toLower) .
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
stringify
- where nbspToSp '\160' = ' '
- nbspToSp x = x
+ where nbspToSp '\160' = ' '
+ nbspToSp x = x
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
@@ -696,7 +479,7 @@ hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
-hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
+hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do
lastnum <- S.get
let lastnum' = take level lastnum
let newnum = case length lastnum' of
@@ -709,26 +492,26 @@ hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level newnum attr title' sectionContents' : rest'
-hierarchicalizeWithIds ((Div ("",["references"],[])
- (Header level (ident,classes,kvs) title' : xs)):ys) =
- hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
- title') : (xs ++ ys))
+hierarchicalizeWithIds (Div ("",["references"],[])
+ (Header level (ident,classes,kvs) title' : xs):ys) =
+ hierarchicalizeWithIds (Header level (ident,"references":classes,kvs)
+ title' : (xs ++ ys))
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
- return $ (Blk x) : rest'
+ return $ Blk x : rest'
headerLtEq :: Int -> Block -> Bool
-headerLtEq level (Header l _ _) = l <= level
-headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
-headerLtEq _ _ = False
+headerLtEq level (Header l _ _) = l <= level
+headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
+headerLtEq _ _ = False
-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> Set.Set String -> String
uniqueIdent title' usedIdents
= let baseIdent = case inlineListToIdentifier title' of
- "" -> "section"
- x -> x
+ "" -> "section"
+ x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `Set.member` usedIdents
then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
@@ -738,8 +521,8 @@ uniqueIdent title' usedIdents
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
-isHeaderBlock (Header _ _ _) = True
-isHeaderBlock _ = False
+isHeaderBlock Header{} = True
+isHeaderBlock _ = False
-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
@@ -748,6 +531,14 @@ headerShift n = walk shift
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
+-- | Remove empty paragraphs.
+stripEmptyParagraphs :: Pandoc -> Pandoc
+stripEmptyParagraphs = walk go
+ where go :: [Block] -> [Block]
+ go = filter (not . isEmptyParagraph)
+ isEmptyParagraph (Para []) = True
+ isEmptyParagraph _ = False
+
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
@@ -765,8 +556,8 @@ addMetaField key val (Meta meta) =
Meta $ M.insertWith combine key (toMetaValue val) meta
where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
combine newval x = MetaList [x, newval]
- tolist (MetaList ys) = ys
- tolist y = [y]
+ tolist (MetaList ys) = ys
+ tolist y = [y]
-- | Create 'Meta' from old-style title, authors, date. This is
-- provided to ease the transition from the old API.
@@ -774,8 +565,24 @@ makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
addMetaField "title" (B.fromList title)
$ addMetaField "author" (map B.fromList authors)
- $ addMetaField "date" (B.fromList date)
- $ nullMeta
+ $ addMetaField "date" (B.fromList date) nullMeta
+
+-- | Remove soft breaks between East Asian characters.
+eastAsianLineBreakFilter :: Pandoc -> Pandoc
+eastAsianLineBreakFilter = bottomUp go
+ where go (x:SoftBreak:y:zs) =
+ case (stringify x, stringify y) of
+ (xs@(_:_), c:_)
+ | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+ _ -> x:SoftBreak:y:zs
+ go xs = xs
+
+-- | Builder for underline.
+-- This probably belongs in Builder.hs in pandoc-types.
+-- Will be replaced once Underline is an element.
+underlineSpan :: Inlines -> Inlines
+underlineSpan = B.spanWith ("", ["underline"], [])
+
--
-- TagSoup HTML handling
@@ -787,7 +594,7 @@ renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
"meta", "link"]
, optRawTag = matchTags ["script", "style"] }
- where matchTags = \tags -> flip elem tags . map toLower
+ where matchTags tags = flip elem tags . map toLower
--
-- File handling
@@ -800,226 +607,14 @@ inDirectory path action = E.bracket
setCurrentDirectory
(const $ setCurrentDirectory path >> action)
-getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
-getDefaultReferenceDocx datadir = do
- let paths = ["[Content_Types].xml",
- "_rels/.rels",
- "docProps/app.xml",
- "docProps/core.xml",
- "word/document.xml",
- "word/fontTable.xml",
- "word/footnotes.xml",
- "word/numbering.xml",
- "word/settings.xml",
- "word/webSettings.xml",
- "word/styles.xml",
- "word/_rels/document.xml.rels",
- "word/_rels/footnotes.xml.rels",
- "word/theme/theme1.xml"]
- let toLazy = fromChunks . (:[])
- let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
- getCurrentTime
- contents <- toLazy <$> readDataFile datadir
- ("docx/" ++ path)
- return $ toEntry path epochtime contents
- mbArchive <- case datadir of
- Nothing -> return Nothing
- Just d -> do
- exists <- doesFileExist (d </> "reference.docx")
- if exists
- then return (Just (d </> "reference.docx"))
- else return Nothing
- case mbArchive of
- Just arch -> toArchive <$> BL.readFile arch
- Nothing -> foldr addEntryToArchive emptyArchive <$>
- mapM pathToEntry paths
-
-getDefaultReferenceODT :: Maybe FilePath -> IO Archive
-getDefaultReferenceODT datadir = do
- let paths = ["mimetype",
- "manifest.rdf",
- "styles.xml",
- "content.xml",
- "meta.xml",
- "settings.xml",
- "Configurations2/accelerator/current.xml",
- "Thumbnails/thumbnail.png",
- "META-INF/manifest.xml"]
- let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
- contents <- (fromChunks . (:[])) `fmap`
- readDataFile datadir ("odt/" ++ path)
- return $ toEntry path epochtime contents
- mbArchive <- case datadir of
- Nothing -> return Nothing
- Just d -> do
- exists <- doesFileExist (d </> "reference.odt")
- if exists
- then return (Just (d </> "reference.odt"))
- else return Nothing
- case mbArchive of
- Just arch -> toArchive <$> BL.readFile arch
- Nothing -> foldr addEntryToArchive emptyArchive <$>
- mapM pathToEntry paths
-
-
-readDefaultDataFile :: FilePath -> IO BS.ByteString
-readDefaultDataFile "reference.docx" =
- (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
-readDefaultDataFile "reference.odt" =
- (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-readDefaultDataFile fname =
-#ifdef EMBED_DATA_FILES
- case lookup (makeCanonical fname) dataFiles of
- Nothing -> err 97 $ "Could not find data file " ++ fname
- Just contents -> return contents
- where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- transformPathParts = reverse . foldl go []
- go as "." = as
- go (_:as) ".." = as
- go as x = x : as
-#else
- getDataFileName fname' >>= checkExistence >>= BS.readFile
- where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-
-checkExistence :: FilePath -> IO FilePath
-checkExistence fn = do
- exists <- doesFileExist fn
- if exists
- then return fn
- else err 97 ("Could not find data file " ++ fn)
-#endif
-
--- | Read file from specified user data directory or, if not found there, from
--- Cabal data directory.
-readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-readDataFile Nothing fname = readDefaultDataFile fname
-readDataFile (Just userDir) fname = do
- exists <- doesFileExist (userDir </> fname)
- if exists
- then BS.readFile (userDir </> fname)
- else readDefaultDataFile fname
-
--- | Same as 'readDataFile' but returns a String instead of a ByteString.
-readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
-readDataFileUTF8 userDir fname =
- UTF8.toString `fmap` readDataFile userDir fname
-
--- | Specialized version of parseURIReference that disallows
--- single-letter schemes. Reason: these are usually windows absolute
--- paths.
-parseURIReference' :: String -> Maybe URI
-parseURIReference' s =
- case parseURIReference s of
- Just u
- | length (uriScheme u) > 2 -> Just u
- | null (uriScheme u) -> Just u -- protocol-relative
- _ -> Nothing
-
--- | Fetch an image or other item from the local filesystem or the net.
--- Returns raw content and maybe mime type.
-fetchItem :: Maybe String -> String
- -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-fetchItem sourceURL s =
- case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of
- (Just u, s') -> -- try fetching from relative path at source
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
- Nothing -> openURL s' -- will throw error
- (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
- Nothing -> openURL s' -- will throw error
- (Nothing, s') ->
- case parseURI s' of -- requires absolute URI
- -- We don't want to treat C:/ as a scheme:
- Just u' | length (uriScheme u') > 2 -> openURL (show u')
- Just u' | uriScheme u' == "file:" ->
- E.try $ readLocalFile $ dropWhile (=='/') (uriPath u')
- _ -> E.try $ readLocalFile fp -- get from local file system
- where readLocalFile f = do
- cont <- BS.readFile f
- return (cont, mime)
- httpcolon = URI{ uriScheme = "http:",
- uriAuthority = Nothing,
- uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
- fp = unEscapeString $ dropFragmentAndQuery s
- mime = case takeExtension fp of
- ".gz" -> getMimeType $ dropExtension fp
- ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
- x -> getMimeType x
- ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
- convertSlash '\\' = '/'
- convertSlash x = x
-
--- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
-fetchItem' :: MediaBag -> Maybe String -> String
- -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-fetchItem' media sourceURL s = do
- case lookupMedia s media of
- Nothing -> fetchItem sourceURL s
- Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-
--- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-openURL u
- | Just u'' <- stripPrefix "data:" u =
- let mime = takeWhile (/=',') u''
- contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
- in return $ Right (decodeLenient contents, Just mime)
-#ifdef HTTP_CLIENT
- | otherwise = withSocketsDo $ E.try $ do
- let parseReq = parseRequest
- (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
- req <- parseReq u
- req' <- case proxy of
- Left _ -> return req
- Right pr -> (parseReq pr >>= \r ->
- return $ addProxy (host r) (port r) req)
- `mplus` return req
- resp <- newManager tlsManagerSettings >>= httpLbs req'
- return (BS.concat $ toChunks $ responseBody resp,
- UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
-#else
- | otherwise = E.try $ getBodyAndMimeType `fmap` browse
- (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
- setOutHandler $ const (return ())
- setAllowRedirects True
- request (getRequest' u'))
- where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
- getRequest' uriString = case parseURI uriString of
- Nothing -> error ("Not a valid URL: " ++
- uriString)
- Just v -> mkRequest GET v
- u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
-#endif
-
--
-- Error reporting
--
-err :: Int -> String -> IO a
-err exitCode msg = do
- name <- getProgName
- UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
- exitWith $ ExitFailure exitCode
- return undefined
-
-warn :: MonadIO m => String -> m ()
-warn msg = liftIO $ do
- name <- getProgName
- UTF8.hPutStrLn stderr $ "[" ++ name ++ " warning] " ++ msg
-
mapLeft :: (a -> b) -> Either a c -> Either b c
-mapLeft f (Left x) = Left (f x)
+mapLeft f (Left x) = Left (f x)
mapLeft _ (Right x) = Right x
-hush :: Either a b -> Maybe b
-hush (Left _) = Nothing
-hush (Right x) = Just x
-
-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
@@ -1034,14 +629,14 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
where
go rs "." = rs
go r@(p:rs) ".." = case p of
- ".." -> ("..":r)
- (checkPathSeperator -> Just True) -> ("..":r)
- _ -> rs
+ ".." -> "..":r
+ (checkPathSeperator -> Just True) -> "..":r
+ _ -> rs
go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
go rs x = x:rs
- isSingleton [] = Nothing
+ isSingleton [] = Nothing
isSingleton [x] = Just x
- isSingleton _ = Nothing
+ isSingleton _ = Nothing
checkPathSeperator = fmap isPathSeparator . isSingleton
--
@@ -1054,41 +649,109 @@ filteredFilesFromArchive zf f =
fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
+
+--
+-- IANA URIs
+--
+
+-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
+-- the unofficial schemes doi, javascript, isbn, pmid.
+schemes :: Set.Set String
+schemes = Set.fromList
+ -- Official IANA schemes
+ [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
+ , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
+ , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
+ , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
+ , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
+ , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
+ , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
+ , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
+ , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
+ , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
+ , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
+ , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
+ , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
+ , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
+ , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
+ , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
+ , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
+ , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
+ , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
+ , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
+ , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
+ , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
+ , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
+ , "ms-settings-nfctransactions", "ms-settings-notifications"
+ , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
+ , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
+ , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
+ , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
+ , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
+ , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
+ , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
+ , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
+ , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
+ , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
+ , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
+ , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
+ , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
+ , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
+ , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
+ , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
+ , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
+ , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
+ , "z39.50s"
+ -- Inofficial schemes
+ , "doi", "isbn", "javascript", "pmid"
+ ]
+
+-- | Check if the string is a valid URL with a IANA or frequently used but
+-- unofficial scheme (see @schemes@).
+isURI :: String -> Bool
+isURI = maybe False hasKnownScheme . parseURI
+ where
+ hasKnownScheme = (`Set.member` schemes) . map toLower .
+ filter (/= ':') . uriScheme
+
---
--- Squash blocks into inlines
---
-blockToInlines :: Block -> [Inline]
-blockToInlines (Plain ils) = ils
-blockToInlines (Para ils) = ils
-blockToInlines (LineBlock lns) = combineLines lns
-blockToInlines (CodeBlock attr str) = [Code attr str]
-blockToInlines (RawBlock fmt str) = [RawInline fmt str]
-blockToInlines (BlockQuote blks) = blocksToInlines blks
+blockToInlines :: Block -> Inlines
+blockToInlines (Plain ils) = B.fromList ils
+blockToInlines (Para ils) = B.fromList ils
+blockToInlines (LineBlock lns) = B.fromList $ combineLines lns
+blockToInlines (CodeBlock attr str) = B.codeWith attr str
+blockToInlines (RawBlock (Format fmt) str) = B.rawInline fmt str
+blockToInlines (BlockQuote blks) = blocksToInlines' blks
blockToInlines (OrderedList _ blkslst) =
- concatMap blocksToInlines blkslst
+ mconcat $ map blocksToInlines' blkslst
blockToInlines (BulletList blkslst) =
- concatMap blocksToInlines blkslst
+ mconcat $ map blocksToInlines' blkslst
blockToInlines (DefinitionList pairslst) =
- concatMap f pairslst
+ mconcat $ map f pairslst
where
- f (ils, blkslst) = ils ++
- [Str ":", Space] ++
- (concatMap blocksToInlines blkslst)
-blockToInlines (Header _ _ ils) = ils
-blockToInlines (HorizontalRule) = []
+ f (ils, blkslst) = B.fromList ils <> B.str ":" <> B.space <>
+ mconcat (map blocksToInlines' blkslst)
+blockToInlines (Header _ _ ils) = B.fromList ils
+blockToInlines HorizontalRule = mempty
blockToInlines (Table _ _ _ headers rows) =
- intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
- where
- tbl = headers : rows
-blockToInlines (Div _ blks) = blocksToInlines blks
-blockToInlines Null = []
+ mconcat $ intersperse B.linebreak $
+ map (mconcat . map blocksToInlines') (headers:rows)
+blockToInlines (Div _ blks) = blocksToInlines' blks
+blockToInlines Null = mempty
+
+blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
+blocksToInlinesWithSep sep =
+ mconcat . intersperse sep . map blockToInlines
-blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
-blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
+blocksToInlines' :: [Block] -> Inlines
+blocksToInlines' = blocksToInlinesWithSep parSep
+ where parSep = B.space <> B.str "¶" <> B.space
blocksToInlines :: [Block] -> [Inline]
-blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
+blocksToInlines = B.toList . blocksToInlines'
--
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index e19dba3e2..9d63555c2 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Slides
- Copyright : Copyright (C) 2012-2016 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -40,9 +40,9 @@ getSlideLevel = go 6
| otherwise = go least (x:xs)
go least (_ : xs) = go least xs
go least [] = least
- nonHOrHR (Header{}) = False
- nonHOrHR (HorizontalRule) = False
- nonHOrHR _ = True
+ nonHOrHR Header{} = False
+ nonHOrHR HorizontalRule = False
+ nonHOrHR _ = True
-- | Prepare a block list to be passed to hierarchicalize.
prepSlides :: Int -> [Block] -> [Block]
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index d15d27438..4be0d081c 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- OverloadedStrings, GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleInstances #-}
+
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeSynonymInstances #-}
{-
-Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2009-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Templates
- Copyright : Copyright (C) 2009-2016 John MacFarlane
+ Copyright : Copyright (C) 2009-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,44 +33,52 @@ A simple templating system with variable substitution and conditionals.
-}
-module Text.Pandoc.Templates ( renderTemplate
+module Text.Pandoc.Templates ( module Text.DocTemplates
, renderTemplate'
- , TemplateTarget
- , varListToJSON
- , compileTemplate
- , Template
- , getDefaultTemplate ) where
+ , getDefaultTemplate
+ ) where
-import Text.DocTemplates (Template, TemplateTarget, compileTemplate,
- renderTemplate, applyTemplate,
- varListToJSON)
-import Data.Aeson (ToJSON(..))
+import Control.Monad.Except (throwError)
+import Data.Aeson (ToJSON (..))
import qualified Data.Text as T
-import System.FilePath ((</>), (<.>))
-import qualified Control.Exception.Extensible as E (try, IOException)
-import Text.Pandoc.Shared (readDataFileUTF8)
+import System.FilePath ((<.>), (</>))
+import Text.DocTemplates (Template, TemplateTarget, applyTemplate,
+ compileTemplate, renderTemplate, varListToJSON)
+import Text.Pandoc.Class (PandocMonad, readDataFile)
+import Text.Pandoc.Error
+import qualified Text.Pandoc.UTF8 as UTF8
-- | Get default template for the specified writer.
-getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
- -> String -- ^ Name of writer
- -> IO (Either E.IOException String)
-getDefaultTemplate user writer = do
+getDefaultTemplate :: PandocMonad m
+ => String -- ^ Name of writer
+ -> m String
+getDefaultTemplate writer = do
let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
case format of
- "native" -> return $ Right ""
- "json" -> return $ Right ""
- "docx" -> return $ Right ""
- "fb2" -> return $ Right ""
- "odt" -> getDefaultTemplate user "opendocument"
- "markdown_strict" -> getDefaultTemplate user "markdown"
- "multimarkdown" -> getDefaultTemplate user "markdown"
- "markdown_github" -> getDefaultTemplate user "markdown"
- "markdown_mmd" -> getDefaultTemplate user "markdown"
- "markdown_phpextra" -> getDefaultTemplate user "markdown"
+ "native" -> return ""
+ "json" -> return ""
+ "docx" -> return ""
+ "fb2" -> return ""
+ "pptx" -> return ""
+ "odt" -> getDefaultTemplate "opendocument"
+ "html" -> getDefaultTemplate "html5"
+ "docbook" -> getDefaultTemplate "docbook5"
+ "epub" -> getDefaultTemplate "epub3"
+ "beamer" -> getDefaultTemplate "latex"
+ "markdown_strict" -> getDefaultTemplate "markdown"
+ "multimarkdown" -> getDefaultTemplate "markdown"
+ "markdown_github" -> getDefaultTemplate "markdown"
+ "markdown_mmd" -> getDefaultTemplate "markdown"
+ "markdown_phpextra" -> getDefaultTemplate "markdown"
+ "gfm" -> getDefaultTemplate "commonmark"
_ -> let fname = "templates" </> "default" <.> format
- in E.try $ readDataFileUTF8 user fname
-
--- | Like 'applyTemplate', but raising an error if compilation fails.
-renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b
-renderTemplate' template = either error id . applyTemplate (T.pack template)
+ in UTF8.toString <$> readDataFile fname
+-- | Like 'applyTemplate', but runs in PandocMonad and
+-- raises an error if compilation fails.
+renderTemplate' :: (PandocMonad m, ToJSON a, TemplateTarget b)
+ => String -> a -> m b
+renderTemplate' template context =
+ case applyTemplate (T.pack template) context of
+ Left e -> throwError (PandocTemplateError e)
+ Right r -> return r
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
new file mode 100644
index 000000000..949618178
--- /dev/null
+++ b/src/Text/Pandoc/Translations.hs
@@ -0,0 +1,112 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2017-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Translations
+ Copyright : Copyright (C) 2017-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data types for localization.
+
+Translations are stored in @data/translations/langname.trans@,
+where langname can be the full BCP47 language specifier, or
+just the language part. File format is:
+
+> # A comment, ignored
+> Figure: Figura
+> Index: Indeksi
+
+-}
+module Text.Pandoc.Translations (
+ Term(..)
+ , Translations
+ , lookupTerm
+ , readTranslations
+ )
+where
+import Data.Aeson.Types (typeMismatch)
+import qualified Data.HashMap.Strict as HM
+import qualified Data.Map as M
+import Data.Text as T
+import Data.Yaml as Yaml
+import GHC.Generics (Generic)
+import Text.Pandoc.Shared (safeRead)
+import qualified Text.Pandoc.UTF8 as UTF8
+
+data Term =
+ Preface
+ | References
+ | Abstract
+ | Bibliography
+ | Chapter
+ | Appendix
+ | Contents
+ | ListOfFigures
+ | ListOfTables
+ | Index
+ | Figure
+ | Table
+ | Part
+ | Page
+ | See
+ | SeeAlso
+ | Encl
+ | Cc
+ | To
+ | Proof
+ | Glossary
+ | Listing
+ deriving (Show, Eq, Ord, Generic, Enum, Read)
+
+newtype Translations = Translations (M.Map Term String)
+ deriving (Show, Generic, Monoid)
+
+instance FromJSON Term where
+ parseJSON (String t) = case safeRead (T.unpack t) of
+ Just t' -> pure t'
+ Nothing -> fail $ "Invalid Term name " ++
+ show t
+ parseJSON invalid = typeMismatch "Term" invalid
+
+instance FromJSON Translations where
+ parseJSON (Object hm) = do
+ xs <- mapM addItem (HM.toList hm)
+ return $ Translations (M.fromList xs)
+ where addItem (k,v) =
+ case safeRead (T.unpack k) of
+ Nothing -> fail $ "Invalid Term name " ++ show k
+ Just t ->
+ case v of
+ (String s) -> return (t, T.unpack $ T.strip s)
+ inv -> typeMismatch "String" inv
+ parseJSON invalid = typeMismatch "Translations" invalid
+
+lookupTerm :: Term -> Translations -> Maybe String
+lookupTerm t (Translations tm) = M.lookup t tm
+
+readTranslations :: String -> Either String Translations
+readTranslations s =
+ case Yaml.decodeEither' $ UTF8.fromString s of
+ Left err' -> Left $ prettyPrintParseException err'
+ Right t -> Right t
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 62a662029..3f759958f 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,5 +1,6 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.UTF8
- Copyright : Copyright (C) 2010-2016 John MacFarlane
+ Copyright : Copyright (C) 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -28,16 +29,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
-}
module Text.Pandoc.UTF8 ( readFile
- , writeFile
, getContents
+ , writeFileWith
+ , writeFile
+ , putStrWith
, putStr
+ , putStrLnWith
, putStrLn
+ , hPutStrWith
, hPutStr
+ , hPutStrLnWith
, hPutStrLn
, hGetContents
, toString
+ , toText
, fromString
+ , fromText
, toStringLazy
+ , fromTextLazy
+ , toTextLazy
, fromStringLazy
, encodePath
, decodeArg
@@ -45,39 +55,59 @@ module Text.Pandoc.UTF8 ( readFile
where
-import System.IO hiding (readFile, writeFile, getContents,
- putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
-import qualified System.IO as IO
import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Text.Encoding as T
+import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
+import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
+import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
+ putStrLn, readFile, writeFile)
+import qualified System.IO as IO
readFile :: FilePath -> IO String
readFile f = do
h <- openFile (encodePath f) ReadMode
hGetContents h
-writeFile :: FilePath -> String -> IO ()
-writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s
-
getContents :: IO String
getContents = hGetContents stdin
+writeFileWith :: Newline -> FilePath -> String -> IO ()
+writeFileWith eol f s =
+ withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
+
+writeFile :: FilePath -> String -> IO ()
+writeFile = writeFileWith nativeNewline
+
+putStrWith :: Newline -> String -> IO ()
+putStrWith eol s = hPutStrWith eol stdout s
+
putStr :: String -> IO ()
-putStr s = hPutStr stdout s
+putStr = putStrWith nativeNewline
+
+putStrLnWith :: Newline -> String -> IO ()
+putStrLnWith eol s = hPutStrLnWith eol stdout s
putStrLn :: String -> IO ()
-putStrLn s = hPutStrLn stdout s
+putStrLn = putStrLnWith nativeNewline
+
+hPutStrWith :: Newline -> Handle -> String -> IO ()
+hPutStrWith eol h s =
+ hSetNewlineMode h (NewlineMode eol eol) >>
+ hSetEncoding h utf8 >> IO.hPutStr h s
hPutStr :: Handle -> String -> IO ()
-hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s
+hPutStr = hPutStrWith nativeNewline
+
+hPutStrLnWith :: Newline -> Handle -> String -> IO ()
+hPutStrLnWith eol h s =
+ hSetNewlineMode h (NewlineMode eol eol) >>
+ hSetEncoding h utf8 >> IO.hPutStrLn h s
hPutStrLn :: Handle -> String -> IO ()
-hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
+hPutStrLn = hPutStrLnWith nativeNewline
hGetContents :: Handle -> IO String
hGetContents = fmap toString . B.hGetContents
@@ -85,34 +115,47 @@ hGetContents = fmap toString . B.hGetContents
-- >> hSetNewlineMode h universalNewlineMode
-- >> IO.hGetContents h
--- | Drop BOM (byte order marker) if present at beginning of string.
--- Note that Data.Text converts the BOM to code point FEFF, zero-width
--- no-break space, so if the string begins with this we strip it off.
-dropBOM :: String -> String
-dropBOM ('\xFEFF':xs) = xs
-dropBOM xs = xs
-
-filterCRs :: String -> String
-filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
-filterCRs ('\r':xs) = '\n' : filterCRs xs
-filterCRs (x:xs) = x : filterCRs xs
-filterCRs [] = []
+-- | Convert UTF8-encoded ByteString to Text, also
+-- removing '\r' characters.
+toText :: B.ByteString -> T.Text
+toText = T.decodeUtf8 . filterCRs . dropBOM
+ where dropBOM bs =
+ if "\xEF\xBB\xBF" `B.isPrefixOf` bs
+ then B.drop 3 bs
+ else bs
+ filterCRs = B.filter (/='\r')
-- | Convert UTF8-encoded ByteString to String, also
-- removing '\r' characters.
toString :: B.ByteString -> String
-toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
+toString = T.unpack . toText
-fromString :: String -> B.ByteString
-fromString = T.encodeUtf8 . T.pack
+-- | Convert UTF8-encoded ByteString to Text, also
+-- removing '\r' characters.
+toTextLazy :: BL.ByteString -> TL.Text
+toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
+ where dropBOM bs =
+ if "\xEF\xBB\xBF" `BL.isPrefixOf` bs
+ then BL.drop 3 bs
+ else bs
+ filterCRs = BL.filter (/='\r')
-- | Convert UTF8-encoded ByteString to String, also
-- removing '\r' characters.
toStringLazy :: BL.ByteString -> String
-toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
+toStringLazy = TL.unpack . toTextLazy
+
+fromText :: T.Text -> B.ByteString
+fromText = T.encodeUtf8
+
+fromTextLazy :: TL.Text -> BL.ByteString
+fromTextLazy = TL.encodeUtf8
+
+fromString :: String -> B.ByteString
+fromString = fromText . T.pack
fromStringLazy :: String -> BL.ByteString
-fromStringLazy = TL.encodeUtf8 . TL.pack
+fromStringLazy = fromTextLazy . TL.pack
encodePath :: FilePath -> FilePath
encodePath = id
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index 5d05fa303..4d99324db 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.UUID
- Copyright : Copyright (C) 2010-2016 John MacFarlane
+ Copyright : Copyright (C) 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,13 +29,12 @@ UUID generation using Version 4 (random method) described
in RFC4122. See http://tools.ietf.org/html/rfc4122
-}
-module Text.Pandoc.UUID ( UUID, getRandomUUID ) where
+module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where
-import Text.Printf ( printf )
-import System.Random ( randomIO )
+import Data.Bits (clearBit, setBit)
import Data.Word
-import Data.Bits ( setBit, clearBit )
-import Control.Monad ( liftM )
+import System.Random (RandomGen, getStdGen, randoms)
+import Text.Printf (printf)
data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
@@ -64,14 +63,15 @@ instance Show UUID where
printf "%02x" o ++
printf "%02x" p
-getRandomUUID :: IO UUID
-getRandomUUID = do
- let getRN :: a -> IO Word8
- getRN _ = liftM fromIntegral (randomIO :: IO Int)
- [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int])
+getUUID :: RandomGen g => g -> UUID
+getUUID gen =
+ let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8]
-- set variant
- let i' = i `setBit` 7 `clearBit` 6
+ i' = i `setBit` 7 `clearBit` 6
-- set version (0100 for random)
- let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
- return $ UUID a b c d e f g' h i' j k l m n o p
+ g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
+ in
+ UUID a b c d e f g' h i' j k l m n o p
+getRandomUUID :: IO UUID
+getRandomUUID = getUUID <$> getStdGen
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
new file mode 100644
index 000000000..596a8680e
--- /dev/null
+++ b/src/Text/Pandoc/Writers.hs
@@ -0,0 +1,195 @@
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{- |
+ Module : Text.Pandoc
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This helper module exports all writers functions.
+-}
+module Text.Pandoc.Writers
+ (
+ -- * Writers: converting /from/ Pandoc format
+ Writer(..)
+ , writers
+ , writeAsciiDoc
+ , writeBeamer
+ , writeCommonMark
+ , writeConTeXt
+ , writeCustom
+ , writeDZSlides
+ , writeDocbook4
+ , writeDocbook5
+ , writeDocx
+ , writeDokuWiki
+ , writeEPUB2
+ , writeEPUB3
+ , writeFB2
+ , writeHaddock
+ , writeHtml4
+ , writeHtml4String
+ , writeHtml5
+ , writeHtml5String
+ , writeICML
+ , writeJATS
+ , writeJSON
+ , writeLaTeX
+ , writeMan
+ , writeMarkdown
+ , writeMediaWiki
+ , writeMs
+ , writeMuse
+ , writeNative
+ , writeODT
+ , writeOPML
+ , writeOpenDocument
+ , writeOrg
+ , writePlain
+ , writePowerpoint
+ , writeRST
+ , writeRTF
+ , writeRevealJs
+ , writeS5
+ , writeSlideous
+ , writeSlidy
+ , writeTEI
+ , writeTexinfo
+ , writeTextile
+ , writeZimWiki
+ , getWriter
+ ) where
+
+import Data.Aeson
+import qualified Data.ByteString.Lazy as BL
+import Data.List (intercalate)
+import Data.Text (Text)
+import Text.Pandoc.Class
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Writers.AsciiDoc
+import Text.Pandoc.Writers.CommonMark
+import Text.Pandoc.Writers.ConTeXt
+import Text.Pandoc.Writers.Custom
+import Text.Pandoc.Writers.Docbook
+import Text.Pandoc.Writers.Docx
+import Text.Pandoc.Writers.DokuWiki
+import Text.Pandoc.Writers.EPUB
+import Text.Pandoc.Writers.FB2
+import Text.Pandoc.Writers.Haddock
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.ICML
+import Text.Pandoc.Writers.JATS
+import Text.Pandoc.Writers.LaTeX
+import Text.Pandoc.Writers.Man
+import Text.Pandoc.Writers.Markdown
+import Text.Pandoc.Writers.MediaWiki
+import Text.Pandoc.Writers.Ms
+import Text.Pandoc.Writers.Muse
+import Text.Pandoc.Writers.Native
+import Text.Pandoc.Writers.ODT
+import Text.Pandoc.Writers.OpenDocument
+import Text.Pandoc.Writers.OPML
+import Text.Pandoc.Writers.Org
+import Text.Pandoc.Writers.Powerpoint
+import Text.Pandoc.Writers.RST
+import Text.Pandoc.Writers.RTF
+import Text.Pandoc.Writers.TEI
+import Text.Pandoc.Writers.Texinfo
+import Text.Pandoc.Writers.Textile
+import Text.Pandoc.Writers.ZimWiki
+import Text.Parsec.Error
+
+data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text)
+ | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString)
+
+-- | Association list of formats and writers.
+writers :: PandocMonad m => [ ( String, Writer m) ]
+writers = [
+ ("native" , TextWriter writeNative)
+ ,("json" , TextWriter $ \o d -> return $ writeJSON o d)
+ ,("docx" , ByteStringWriter writeDocx)
+ ,("odt" , ByteStringWriter writeODT)
+ ,("pptx" , ByteStringWriter writePowerpoint)
+ ,("epub" , ByteStringWriter writeEPUB3)
+ ,("epub2" , ByteStringWriter writeEPUB2)
+ ,("epub3" , ByteStringWriter writeEPUB3)
+ ,("fb2" , TextWriter writeFB2)
+ ,("html" , TextWriter writeHtml5String)
+ ,("html4" , TextWriter writeHtml4String)
+ ,("html5" , TextWriter writeHtml5String)
+ ,("icml" , TextWriter writeICML)
+ ,("s5" , TextWriter writeS5)
+ ,("slidy" , TextWriter writeSlidy)
+ ,("slideous" , TextWriter writeSlideous)
+ ,("dzslides" , TextWriter writeDZSlides)
+ ,("revealjs" , TextWriter writeRevealJs)
+ ,("docbook" , TextWriter writeDocbook5)
+ ,("docbook4" , TextWriter writeDocbook4)
+ ,("docbook5" , TextWriter writeDocbook5)
+ ,("jats" , TextWriter writeJATS)
+ ,("opml" , TextWriter writeOPML)
+ ,("opendocument" , TextWriter writeOpenDocument)
+ ,("latex" , TextWriter writeLaTeX)
+ ,("beamer" , TextWriter writeBeamer)
+ ,("context" , TextWriter writeConTeXt)
+ ,("texinfo" , TextWriter writeTexinfo)
+ ,("man" , TextWriter writeMan)
+ ,("ms" , TextWriter writeMs)
+ ,("markdown" , TextWriter writeMarkdown)
+ ,("markdown_strict" , TextWriter writeMarkdown)
+ ,("markdown_phpextra" , TextWriter writeMarkdown)
+ ,("markdown_github" , TextWriter writeMarkdown)
+ ,("markdown_mmd" , TextWriter writeMarkdown)
+ ,("plain" , TextWriter writePlain)
+ ,("rst" , TextWriter writeRST)
+ ,("mediawiki" , TextWriter writeMediaWiki)
+ ,("dokuwiki" , TextWriter writeDokuWiki)
+ ,("zimwiki" , TextWriter writeZimWiki)
+ ,("textile" , TextWriter writeTextile)
+ ,("rtf" , TextWriter writeRTF)
+ ,("org" , TextWriter writeOrg)
+ ,("asciidoc" , TextWriter writeAsciiDoc)
+ ,("haddock" , TextWriter writeHaddock)
+ ,("commonmark" , TextWriter writeCommonMark)
+ ,("gfm" , TextWriter writeCommonMark)
+ ,("tei" , TextWriter writeTEI)
+ ,("muse" , TextWriter writeMuse)
+ ]
+
+-- | Retrieve writer, extensions based on formatSpec (format+extensions).
+getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions)
+getWriter s
+ = case parseFormatSpec s of
+ Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
+ Right (writerName, setExts) ->
+ case lookup writerName writers of
+ Nothing -> Left $ "Unknown writer: " ++ writerName
+ Just r -> Right (r, setExts $
+ getDefaultExtensions writerName)
+
+writeJSON :: WriterOptions -> Pandoc -> Text
+writeJSON _ = UTF8.toText . BL.toStrict . encode
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e9d3dccf1..f91fa8fa0 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.AsciiDoc
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -37,49 +37,59 @@ that it has omitted the construct.
AsciiDoc: <http://www.methods.co.nz/asciidoc/>
-}
module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
+import Control.Monad.State.Strict
+import Data.Aeson (Result (..), Value (String), fromJSON, toJSON)
+import Data.Char (isPunctuation, isSpace)
+import Data.List (intercalate, intersperse, stripPrefix)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isJust)
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, space)
-import Data.Maybe (fromMaybe)
-import Data.List ( stripPrefix, intersperse, intercalate )
import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Control.Monad.State
-import qualified Data.Map as M
-import Data.Aeson (Value(String), fromJSON, toJSON, Result(..))
-import qualified Data.Text as T
-import Data.Char (isSpace, isPunctuation)
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
-data WriterState = WriterState { defListMarker :: String
+data WriterState = WriterState { defListMarker :: String
, orderedListLevel :: Int
, bulletListLevel :: Int
, intraword :: Bool
+ , autoIds :: Set.Set String
}
-- | Convert Pandoc to AsciiDoc.
-writeAsciiDoc :: WriterOptions -> Pandoc -> String
+writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeAsciiDoc opts document =
- evalState (pandocToAsciiDoc opts document) WriterState{
+ evalStateT (pandocToAsciiDoc opts document) WriterState{
defListMarker = "::"
, orderedListLevel = 1
, bulletListLevel = 1
, intraword = False
+ , autoIds = Set.empty
}
+type ADW = StateT WriterState
+
-- | Return asciidoc representation of document.
-pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String
+pandocToAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> ADW m Text
pandocToAsciiDoc opts (Pandoc meta blocks) = do
let titleblock = not $ null (docTitle meta) && null (docAuthors meta) &&
null (docDate meta)
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToAsciiDoc opts)
- (fmap (render colwidth) . inlineListToAsciiDoc opts)
+ (fmap render' . blockListToAsciiDoc opts)
+ (fmap render' . inlineListToAsciiDoc opts)
meta
let addTitleLine (String t) = String $
t <> "\n" <> T.replicate (T.length t) "="
@@ -93,12 +103,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
let context = defField "body" main
$ defField "toc"
(writerTableOfContents opts &&
- writerTemplate opts /= Nothing)
- $ defField "titleblock" titleblock
- $ metadata'
+ isJust (writerTemplate opts))
+ $defField "titleblock" titleblock metadata'
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape special characters for AsciiDoc.
escapeString :: String -> String
@@ -118,18 +127,19 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker
beginsWithOrderedListMarker :: String -> Bool
beginsWithOrderedListMarker str =
case runParser olMarker defaultParserState "para start" (take 10 str) of
- Left _ -> False
- Right _ -> True
+ Left _ -> False
+ Right _ -> True
-- | Convert Pandoc block element to asciidoc.
-blockToAsciiDoc :: WriterOptions -- ^ Options
+blockToAsciiDoc :: PandocMonad m
+ => WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> State WriterState Doc
+ -> ADW m Doc
blockToAsciiDoc _ Null = return empty
blockToAsciiDoc opts (Plain inlines) = do
contents <- inlineListToAsciiDoc opts inlines
return $ contents <> blankline
-blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
+blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
blockToAsciiDoc opts (Para [Image attr alt (src,tit)])
blockToAsciiDoc opts (Para inlines) = do
contents <- inlineListToAsciiDoc opts inlines
@@ -145,9 +155,11 @@ blockToAsciiDoc opts (LineBlock lns) = do
let joinWithLinefeeds = nowrap . mconcat . intersperse cr
contents <- joinWithLinefeeds <$> mapM docify lns
return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline
-blockToAsciiDoc _ (RawBlock f s)
+blockToAsciiDoc _ b@(RawBlock f s)
| f == "asciidoc" = return $ text s
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToAsciiDoc _ HorizontalRule =
return $ blankline <> text "'''''" <> blankline
blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
@@ -155,21 +167,25 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
let len = offset contents
-- ident seem to be empty most of the time and asciidoc will generate them automatically
-- so lets make them not show up when null
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ ids <- gets autoIds
+ let autoId = uniqueIdent inlines ids
+ modify $ \st -> st{ autoIds = Set.insert autoId ids }
+ let identifier = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId)
+ then empty else "[[" <> text ident <> "]]"
let setext = writerSetextHeaders opts
- return $
+ return
(if setext
then
identifier $$ contents $$
(case level of
- 1 -> text $ replicate len '-'
- 2 -> text $ replicate len '~'
- 3 -> text $ replicate len '^'
- 4 -> text $ replicate len '+'
- _ -> empty) <> blankline
+ 1 -> text $ replicate len '-'
+ 2 -> text $ replicate len '~'
+ 3 -> text $ replicate len '^'
+ 4 -> text $ replicate len '+'
+ _ -> empty) <> blankline
else
identifier $$ text (replicate level '=') <> space <> contents <> blankline)
-blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $
+blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush (
if null classes
then "...." $$ text str $$ "...."
else attrs $$ "----" $$ text str $$ "----")
@@ -194,7 +210,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do
let isSimple = all (== 0) widths
let relativePercentWidths = if isSimple
then widths
- else map (/ (sum widths)) widths
+ else map (/ sum widths) widths
let widths'' :: [Integer]
widths'' = map (floor . (* 100)) relativePercentWidths
-- ensure that the widths sum to 100
@@ -210,7 +226,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do
AlignCenter -> "^"
AlignRight -> ">"
AlignDefault -> "") ++
- if wi == 0 then "" else (show wi ++ "%")
+ if wi == 0 then "" else show wi ++ "%"
let headerspec = if all null headers
then empty
else text "options=\"header\","
@@ -256,21 +272,21 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $
- zip markers' items
+ contents <- zipWithM (orderedListItemToAsciiDoc opts) markers' items
return $ cat contents <> blankline
blockToAsciiDoc opts (DefinitionList items) = do
contents <- mapM (definitionListItemToAsciiDoc opts) items
return $ cat contents <> blankline
blockToAsciiDoc opts (Div (ident,_,_) bs) = do
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ let identifier = if null ident then empty else ("[[" <> text ident <> "]]")
contents <- blockListToAsciiDoc opts bs
return $ identifier $$ contents
-- | Convert bullet list item (list of blocks) to asciidoc.
-bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToAsciiDoc :: PandocMonad m
+ => WriterOptions -> [Block] -> ADW m Doc
bulletListItemToAsciiDoc opts blocks = do
- let addBlock :: Doc -> Block -> State WriterState Doc
+ let addBlock :: PandocMonad m => Doc -> Block -> ADW m Doc
addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
return $ d <> cr <> chomp x
@@ -278,7 +294,7 @@ bulletListItemToAsciiDoc opts blocks = do
return $ d <> cr <> chomp x
addBlock d b = do x <- blockToAsciiDoc opts b
return $ d <> cr <> text "+" <> cr <> chomp x
- lev <- bulletListLevel `fmap` get
+ lev <- gets bulletListLevel
modify $ \s -> s{ bulletListLevel = lev + 1 }
contents <- foldM addBlock empty blocks
modify $ \s -> s{ bulletListLevel = lev }
@@ -286,37 +302,38 @@ bulletListItemToAsciiDoc opts blocks = do
return $ marker <> text " " <> contents <> cr
-- | Convert ordered list item (a list of blocks) to asciidoc.
-orderedListItemToAsciiDoc :: WriterOptions -- ^ options
+orderedListItemToAsciiDoc :: PandocMonad m
+ => WriterOptions -- ^ options
-> String -- ^ list item marker
-> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
+ -> ADW m Doc
orderedListItemToAsciiDoc opts marker blocks = do
- let addBlock :: Doc -> Block -> State WriterState Doc
- addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
+ let addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
return $ d <> cr <> chomp x
addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
return $ d <> cr <> chomp x
addBlock d b = do x <- blockToAsciiDoc opts b
return $ d <> cr <> text "+" <> cr <> chomp x
- lev <- orderedListLevel `fmap` get
+ lev <- gets orderedListLevel
modify $ \s -> s{ orderedListLevel = lev + 1 }
contents <- foldM addBlock empty blocks
modify $ \s -> s{ orderedListLevel = lev }
return $ text marker <> text " " <> contents <> cr
-- | Convert definition list item (label, list of blocks) to asciidoc.
-definitionListItemToAsciiDoc :: WriterOptions
+definitionListItemToAsciiDoc :: PandocMonad m
+ => WriterOptions
-> ([Inline],[[Block]])
- -> State WriterState Doc
+ -> ADW m Doc
definitionListItemToAsciiDoc opts (label, defs) = do
labelText <- inlineListToAsciiDoc opts label
- marker <- defListMarker `fmap` get
+ marker <- gets defListMarker
if marker == "::"
then modify (\st -> st{ defListMarker = ";;"})
else modify (\st -> st{ defListMarker = "::"})
let divider = cr <> text "+" <> cr
- let defsToAsciiDoc :: [Block] -> State WriterState Doc
+ let defsToAsciiDoc :: PandocMonad m => [Block] -> ADW m Doc
defsToAsciiDoc ds = (vcat . intersperse divider . map chomp)
`fmap` mapM (blockToAsciiDoc opts) ds
defs' <- mapM defsToAsciiDoc defs
@@ -325,15 +342,16 @@ definitionListItemToAsciiDoc opts (label, defs) = do
return $ labelText <> text marker <> cr <> contents <> cr
-- | Convert list of Pandoc block elements to asciidoc.
-blockListToAsciiDoc :: WriterOptions -- ^ Options
+blockListToAsciiDoc :: PandocMonad m
+ => WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> ADW m Doc
blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
data SpacyLocation = End | Start
-- | Convert list of Pandoc inline elements to asciidoc.
-inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToAsciiDoc :: PandocMonad m => WriterOptions -> [Inline] -> ADW m Doc
inlineListToAsciiDoc opts lst = do
oldIntraword <- gets intraword
setIntraword False
@@ -369,14 +387,14 @@ inlineListToAsciiDoc opts lst = do
isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c
isSpacy _ _ = False
-setIntraword :: Bool -> State WriterState ()
+setIntraword :: PandocMonad m => Bool -> ADW m ()
setIntraword b = modify $ \st -> st{ intraword = b }
-withIntraword :: State WriterState a -> State WriterState a
+withIntraword :: PandocMonad m => ADW m a -> ADW m a
withIntraword p = setIntraword True *> p <* setIntraword False
-- | Convert Pandoc inline element to asciidoc.
-inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
+inlineToAsciiDoc :: PandocMonad m => WriterOptions -> Inline -> ADW m Doc
inlineToAsciiDoc opts (Emph lst) = do
contents <- inlineListToAsciiDoc opts lst
isIntraword <- gets intraword
@@ -408,16 +426,19 @@ inlineToAsciiDoc _ (Math InlineMath str) =
return $ "latexmath:[$" <> text str <> "$]"
inlineToAsciiDoc _ (Math DisplayMath str) =
return $ "latexmath:[\\[" <> text str <> "\\]]"
-inlineToAsciiDoc _ (RawInline f s)
+inlineToAsciiDoc _ il@(RawInline f s)
| f == "asciidoc" = return $ text s
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
| otherwise = return empty
-inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
+inlineToAsciiDoc _ LineBreak = return $ " +" <> cr
inlineToAsciiDoc _ Space = return space
inlineToAsciiDoc opts SoftBreak =
case writerWrapText opts of
- WrapAuto -> return space
+ WrapAuto -> return space
WrapPreserve -> return cr
- WrapNone -> return space
+ WrapNone -> return space
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do
-- relative: link:downloads/foo.zip[download foo.zip]
@@ -431,20 +452,20 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do
let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
let useAuto = case txt of
[Str s] | escapeURI s == srcSuffix -> True
- _ -> False
+ _ -> False
return $ if useAuto
then text srcSuffix
else prefix <> text src <> "[" <> linktext <> "]"
inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do
-- image:images/logo.png[Company logo, title="blah"]
- let txt = if (null alternate) || (alternate == [Str ""])
+ let txt = if null alternate || (alternate == [Str ""])
then [Str "image"]
else alternate
linktext <- inlineListToAsciiDoc opts txt
let linktitle = if null tit
then empty
else ",title=\"" <> text tit <> "\""
- showDim dir = case (dimension dir attr) of
+ showDim dir = case dimension dir attr of
Just (Percent a) ->
["scaledwidth=" <> text (show (Percent a))]
Just dim ->
@@ -464,6 +485,6 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do
-- asciidoc can't handle blank lines in notes
inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
inlineToAsciiDoc opts (Span (ident,_,_) ils) = do
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ let identifier = if null ident then empty else ("[[" <> text ident <> "]]")
contents <- inlineListToAsciiDoc opts ils
return $ identifier <> contents
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 88a92eb47..7a6eb2948 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -1,5 +1,6 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.CommonMark
- Copyright : Copyright (C) 2015 John MacFarlane
+ Copyright : Copyright (C) 2015-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,34 +32,43 @@ CommonMark: <http://commonmark.org>
-}
module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
-import Text.Pandoc.Writers.HTML (writeHtmlString)
+import CMarkGFM
+import Control.Monad.State.Strict (State, get, modify, runState)
+import Data.Foldable (foldrM)
+import Data.List (transpose)
+import Data.Monoid (Any (..), (<>))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.HTTP (urlEncode)
+import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
-import Text.Pandoc.Shared (isTightList, linesToPara)
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (isTightList, linesToPara, substitute)
import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Walk (query, walk, walkM)
+import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import CMark
-import qualified Data.Text as T
-import Control.Monad.Identity (runIdentity, Identity)
-import Control.Monad.State (runState, State, modify, get)
-import Text.Pandoc.Walk (walkM)
-- | Convert Pandoc to CommonMark.
-writeCommonMark :: WriterOptions -> Pandoc -> String
-writeCommonMark opts (Pandoc meta blocks) = rendered
- where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes')
- (blocks', notes) = runState (walkM processNotes blocks) []
- notes' = if null notes
- then []
- else [OrderedList (1, Decimal, Period) $ reverse notes]
- metadata = runIdentity $ metaToJSON opts
- (blocksToCommonMark opts)
- (inlinesToCommonMark opts)
- meta
- context = defField "body" main $ metadata
- rendered = case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeCommonMark opts (Pandoc meta blocks) = do
+ let (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ main <- blocksToCommonMark opts (blocks' ++ notes')
+ metadata <- metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ let context = defField "body" main metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
+
+softBreakToSpace :: Inline -> Inline
+softBreakToSpace SoftBreak = Space
+softBreakToSpace x = x
processNotes :: Inline -> State [[Block]] Inline
processNotes (Note bs) = do
@@ -70,111 +80,235 @@ processNotes x = return x
node :: NodeType -> [Node] -> Node
node = Node Nothing
-blocksToCommonMark :: WriterOptions -> [Block] -> Identity String
-blocksToCommonMark opts bs = return $
- T.unpack $ nodeToCommonmark cmarkOpts colwidth
- $ node DOCUMENT (blocksToNodes bs)
- where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
+blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text
+blocksToCommonMark opts bs = do
+ let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ nodes <- blocksToNodes opts bs
+ return $ T.stripEnd $
+ nodeToCommonmark cmarkOpts colwidth $
+ node DOCUMENT nodes
-inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String
+inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text
inlinesToCommonMark opts ils = return $
- T.unpack $ nodeToCommonmark cmarkOpts colwidth
- $ node PARAGRAPH (inlinesToNodes ils)
+ nodeToCommonmark cmarkOpts colwidth $
+ node PARAGRAPH (inlinesToNodes opts ils)
where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
-blocksToNodes :: [Block] -> [Node]
-blocksToNodes = foldr blockToNodes []
-
-blockToNodes :: Block -> [Node] -> [Node]
-blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :)
-blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :)
-blockToNodes (LineBlock lns) = blockToNodes $ linesToPara lns
-blockToNodes (CodeBlock (_,classes,_) xs) =
- (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :)
-blockToNodes (RawBlock fmt xs)
- | fmt == Format "html" = (node (HTML_BLOCK (T.pack xs)) [] :)
- | otherwise = (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] :)
-blockToNodes (BlockQuote bs) =
- (node BLOCK_QUOTE (blocksToNodes bs) :)
-blockToNodes (BulletList items) =
- (node (LIST ListAttributes{
- listType = BULLET_LIST,
- listDelim = PERIOD_DELIM,
- listTight = isTightList items,
- listStart = 1 }) (map (node ITEM . blocksToNodes) items) :)
-blockToNodes (OrderedList (start, _sty, delim) items) =
- (node (LIST ListAttributes{
- listType = ORDERED_LIST,
- listDelim = case delim of
- OneParen -> PAREN_DELIM
- TwoParens -> PAREN_DELIM
- _ -> PERIOD_DELIM,
- listTight = isTightList items,
- listStart = start }) (map (node ITEM . blocksToNodes) items) :)
-blockToNodes HorizontalRule = (node THEMATIC_BREAK [] :)
-blockToNodes (Header lev _ ils) = (node (HEADING lev) (inlinesToNodes ils) :)
-blockToNodes (Div _ bs) = (blocksToNodes bs ++)
-blockToNodes (DefinitionList items) = blockToNodes (BulletList items')
+blocksToNodes :: PandocMonad m => WriterOptions -> [Block] -> m [Node]
+blocksToNodes opts = foldrM (blockToNodes opts) []
+
+blockToNodes :: PandocMonad m => WriterOptions -> Block -> [Node] -> m [Node]
+blockToNodes opts (Plain xs) ns =
+ return (node PARAGRAPH (inlinesToNodes opts xs) : ns)
+blockToNodes opts (Para xs) ns =
+ return (node PARAGRAPH (inlinesToNodes opts xs) : ns)
+blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns
+blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns)
+blockToNodes opts (RawBlock fmt xs) ns
+ | fmt == Format "html" && isEnabled Ext_raw_html opts
+ = return (node (HTML_BLOCK (T.pack xs)) [] : ns)
+ | fmt == Format "latex" || fmt == Format "tex" && isEnabled Ext_raw_tex opts
+ = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns)
+ | otherwise = return ns
+blockToNodes opts (BlockQuote bs) ns = do
+ nodes <- blocksToNodes opts bs
+ return (node BLOCK_QUOTE nodes : ns)
+blockToNodes opts (BulletList items) ns = do
+ nodes <- mapM (blocksToNodes opts) items
+ return (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM) nodes) : ns)
+blockToNodes opts (OrderedList (start, _sty, delim) items) ns = do
+ nodes <- mapM (blocksToNodes opts) items
+ return (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM) nodes) : ns)
+blockToNodes _ HorizontalRule ns = return (node THEMATIC_BREAK [] : ns)
+blockToNodes opts (Header lev _ ils) ns =
+ return (node (HEADING lev) (inlinesToNodes opts ils) : ns)
+blockToNodes opts (Div attr bs) ns = do
+ nodes <- blocksToNodes opts bs
+ let op = tagWithAttributes opts True False "div" attr
+ if isEnabled Ext_raw_html opts
+ then return (node (HTML_BLOCK op) [] : nodes ++
+ [node (HTML_BLOCK (T.pack "</div>")) []] ++ ns)
+ else return (nodes ++ ns)
+blockToNodes opts (DefinitionList items) ns =
+ blockToNodes opts (BulletList items') ns
where items' = map dlToBullet items
- dlToBullet (term, ((Para xs : ys) : zs)) =
+ dlToBullet (term, (Para xs : ys) : zs) =
Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
- dlToBullet (term, ((Plain xs : ys) : zs)) =
+ dlToBullet (term, (Plain xs : ys) : zs) =
Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
dlToBullet (term, xs) =
Para term : concat xs
-blockToNodes t@(Table _ _ _ _ _) =
- (node (HTML_BLOCK (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :)
-blockToNodes Null = id
-
-inlinesToNodes :: [Inline] -> [Node]
-inlinesToNodes = foldr inlineToNodes []
-
-inlineToNodes :: Inline -> [Node] -> [Node]
-inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
-inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
-inlineToNodes LineBreak = (node LINEBREAK [] :)
-inlineToNodes SoftBreak = (node SOFTBREAK [] :)
-inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
-inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
-inlineToNodes (Strikeout xs) =
- ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++
- [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
-inlineToNodes (Superscript xs) =
- ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++
+blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do
+ let allcells = concat (headers:rows)
+ let isLineBreak LineBreak = Any True
+ isLineBreak _ = Any False
+ let isPlainOrPara [Para _] = True
+ isPlainOrPara [Plain _] = True
+ isPlainOrPara [] = True
+ isPlainOrPara _ = False
+ let isSimple = all isPlainOrPara allcells &&
+ not ( getAny (query isLineBreak allcells) )
+ if isEnabled Ext_pipe_tables opts && isSimple
+ then do
+ -- We construct a table manually as a CUSTOM_BLOCK, for
+ -- two reasons: (1) cmark-gfm currently doesn't support
+ -- rendering TABLE nodes; (2) we can align the column sides;
+ -- (3) we can render the caption as a regular paragraph.
+ let capt' = node PARAGRAPH (inlinesToNodes opts capt)
+ -- backslash | in code and raw:
+ let fixPipe (Code attr xs) =
+ Code attr (substitute "|" "\\|" xs)
+ fixPipe (RawInline format xs) =
+ RawInline format (substitute "|" "\\|" xs)
+ fixPipe x = x
+ let toCell [Plain ils] = T.strip
+ $ nodeToCommonmark [] Nothing
+ $ node (CUSTOM_INLINE mempty mempty)
+ $ inlinesToNodes opts
+ $ walk (fixPipe . softBreakToSpace) ils
+ toCell [Para ils] = T.strip
+ $ nodeToCommonmark [] Nothing
+ $ node (CUSTOM_INLINE mempty mempty)
+ $ inlinesToNodes opts
+ $ walk (fixPipe . softBreakToSpace) ils
+ toCell [] = ""
+ toCell xs = error $ "toCell encountered " ++ show xs
+ let separator = " | "
+ let starter = "| "
+ let ender = " |"
+ let rawheaders = map toCell headers
+ let rawrows = map (map toCell) rows
+ let maximum' [] = 0
+ maximum' xs = maximum xs
+ let colwidths = map (maximum' . map T.length) $
+ transpose (rawheaders:rawrows)
+ let toHeaderLine len AlignDefault = T.replicate len "-"
+ toHeaderLine len AlignLeft = ":" <>
+ T.replicate (max (len - 1) 1) "-"
+ toHeaderLine len AlignRight =
+ T.replicate (max (len - 1) 1) "-" <> ":"
+ toHeaderLine len AlignCenter = ":" <>
+ T.replicate (max (len - 2) 1) (T.pack "-") <> ":"
+ let rawheaderlines = zipWith toHeaderLine colwidths aligns
+ let headerlines = starter <> T.intercalate separator rawheaderlines <>
+ ender
+ let padContent (align, w) t' =
+ let padding = w - T.length t'
+ halfpadding = padding `div` 2
+ in case align of
+ AlignRight -> T.replicate padding " " <> t'
+ AlignCenter -> T.replicate halfpadding " " <> t' <>
+ T.replicate (padding - halfpadding) " "
+ _ -> t' <> T.replicate padding " "
+ let toRow xs = starter <> T.intercalate separator
+ (zipWith padContent (zip aligns colwidths) xs) <>
+ ender
+ let table' = toRow rawheaders <> "\n" <> headerlines <> "\n" <>
+ T.intercalate "\n" (map toRow rawrows)
+ return (node (CUSTOM_BLOCK table' mempty) [] :
+ if null capt
+ then ns
+ else capt' : ns)
+ else do -- fall back to raw HTML
+ s <- writeHtml5String def $! Pandoc nullMeta [t]
+ return (node (HTML_BLOCK s) [] : ns)
+blockToNodes _ Null ns = return ns
+
+inlinesToNodes :: WriterOptions -> [Inline] -> [Node]
+inlinesToNodes opts = foldr (inlineToNodes opts) []
+
+inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node]
+inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :)
+ where s' = if isEnabled Ext_smart opts
+ then unsmartify opts s
+ else s
+inlineToNodes _ Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes _ LineBreak = (node LINEBREAK [] :)
+inlineToNodes opts SoftBreak
+ | isEnabled Ext_hard_line_breaks opts = (node LINEBREAK [] :)
+ | writerWrapText opts == WrapNone = (node (TEXT " ") [] :)
+ | otherwise = (node SOFTBREAK [] :)
+inlineToNodes opts (Emph xs) = (node EMPH (inlinesToNodes opts xs) :)
+inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)
+inlineToNodes opts (Strikeout xs) =
+ if isEnabled Ext_strikeout opts
+ then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
+ else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+inlineToNodes opts (Superscript xs) =
+ ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
[node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
-inlineToNodes (Subscript xs) =
- ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++
+inlineToNodes opts (Subscript xs) =
+ ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
[node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
-inlineToNodes (SmallCaps xs) =
- ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) []
- : inlinesToNodes xs ++
+inlineToNodes opts (SmallCaps xs) =
+ ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
+ : inlinesToNodes opts xs ++
[node (HTML_INLINE (T.pack "</span>")) []]) ++ )
-inlineToNodes (Link _ ils (url,tit)) =
- (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
-inlineToNodes (Image _ ils (url,tit)) =
- (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
-inlineToNodes (RawInline fmt xs)
- | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :)
- | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :)
-inlineToNodes (Quoted qt ils) =
- ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+inlineToNodes opts (Link _ ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
+-- title beginning with fig: indicates implicit figure
+inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) =
+ inlineToNodes opts (Image alt ils (url,tit))
+inlineToNodes opts (Image _ ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
+inlineToNodes opts (RawInline fmt xs)
+ | fmt == Format "html" && isEnabled Ext_raw_html opts
+ = (node (HTML_INLINE (T.pack xs)) [] :)
+ | (fmt == Format "latex" || fmt == Format "tex") && isEnabled Ext_raw_tex opts
+ = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :)
+ | otherwise = id
+inlineToNodes opts (Quoted qt ils) =
+ ((node (TEXT start) [] :
+ inlinesToNodes opts ils ++ [node (TEXT end) []]) ++)
where (start, end) = case qt of
- SingleQuote -> (T.pack "‘", T.pack "’")
- DoubleQuote -> (T.pack "“", T.pack "”")
-inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
-inlineToNodes (Math mt str) =
- case mt of
- InlineMath ->
- (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
- DisplayMath ->
- (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
-inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
-inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
-inlineToNodes (Note _) = id -- should not occur
+ SingleQuote
+ | isEnabled Ext_smart opts -> ("'","'")
+ | otherwise -> ("‘", "’")
+ DoubleQuote
+ | isEnabled Ext_smart opts -> ("\"", "\"")
+ | otherwise -> ("“", "”")
+inlineToNodes _ (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes opts (Math mt str) =
+ case writerHTMLMathMethod opts of
+ WebTeX url ->
+ let core = inlineToNodes opts
+ (Image nullAttr [Str str] (url ++ urlEncode str, str))
+ sep = if mt == DisplayMath
+ then (node LINEBREAK [] :)
+ else id
+ in (sep . core . sep)
+ _ ->
+ case mt of
+ InlineMath ->
+ (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes opts (Span attr ils) =
+ let nodes = inlinesToNodes opts ils
+ op = tagWithAttributes opts True False "span" attr
+ in if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE op) [] : nodes ++
+ [node (HTML_INLINE (T.pack "</span>")) []]) ++)
+ else (nodes ++)
+inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
+inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index c663c75ce..f94c12d89 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ConTeXt
- Copyright : Copyright (C) 2007-2015 John MacFarlane
+ Copyright : Copyright (C) 2007-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,20 +30,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into ConTeXt.
-}
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
+import Control.Monad.State.Strict
+import Data.Char (ord, isDigit)
+import Data.List (intercalate, intersperse)
+import Data.Maybe (mapMaybe)
+import Data.Text (Text)
+import Network.URI (unEscapeString)
+import Text.Pandoc.BCP47
+import Text.Pandoc.Class (PandocMonad, report, toLang)
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Walk (query)
-import Text.Printf ( printf )
-import Data.List ( intercalate, intersperse )
-import Data.Char ( ord )
-import Data.Maybe ( catMaybes )
-import Control.Monad.State
import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Network.URI ( isURI, unEscapeString )
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Walk (query)
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
data WriterState =
WriterState { stNextRef :: Int -- number of next URL reference
@@ -50,37 +55,43 @@ data WriterState =
, stOptions :: WriterOptions -- writer options
}
+data Tabl = Xtb | Ntb deriving (Show, Eq)
+
orderedListStyles :: [Char]
orderedListStyles = cycle "narg"
-- | Convert Pandoc to ConTeXt.
-writeConTeXt :: WriterOptions -> Pandoc -> String
+writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeConTeXt options document =
let defaultWriterState = WriterState { stNextRef = 1
, stOrderedListLevel = 0
, stOptions = options
}
- in evalState (pandocToConTeXt options document) defaultWriterState
+ in evalStateT (pandocToConTeXt options document) defaultWriterState
-pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
+type WM = StateT WriterState
+
+pandocToConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> WM m Text
pandocToConTeXt options (Pandoc meta blocks) = do
let colwidth = if writerWrapText options == WrapAuto
then Just $ writerColumns options
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToConTeXt)
- (fmap (render colwidth) . inlineListToConTeXt)
+ (fmap render' . blockListToConTeXt)
+ (fmap render' . inlineListToConTeXt)
meta
body <- mapM (elementToConTeXt options) $ hierarchicalize blocks
- let main = (render colwidth . vcat) body
- let layoutFromMargins = intercalate [','] $ catMaybes $
- map (\(x,y) ->
+ let main = (render' . vcat) body
+ let layoutFromMargins = intercalate [','] $ mapMaybe (\(x,y) ->
((x ++ "=") ++) <$> getField y metadata)
[("leftmargin","margin-left")
,("rightmargin","margin-right")
,("top","margin-top")
,("bottom","margin-bottom")
]
+ mblang <- fromBCP47 (getLang options meta)
let context = defField "toc" (writerTableOfContents options)
$ defField "placelist" (intercalate ("," :: String) $
take (writerTOCDepth options +
@@ -93,14 +104,17 @@ pandocToConTeXt options (Pandoc meta blocks) = do
$ defField "body" main
$ defField "layout" layoutFromMargins
$ defField "number-sections" (writerNumberSections options)
- $ metadata
- let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
- getField "lang" context)
- $ defField "context-dir" (toContextDir $ getField "dir" context)
- $ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
+ $ maybe id (defField "context-lang") mblang
+ $ (case getField "papersize" metadata of
+ Just (('a':d:ds) :: String)
+ | all isDigit (d:ds) -> resetField "papersize"
+ (('A':d:ds) :: String)
+ _ -> id) metadata
+ let context' = defField "context-dir" (toContextDir
+ $ getField "dir" context) context
+ case writerTemplate options of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context'
toContextDir :: Maybe String -> String
toContextDir (Just "rtl") = "r2l"
@@ -110,24 +124,24 @@ toContextDir _ = ""
-- | escape things as needed for ConTeXt
escapeCharForConTeXt :: WriterOptions -> Char -> String
escapeCharForConTeXt opts ch =
- let ligatures = writerTeXLigatures opts in
+ let ligatures = isEnabled Ext_smart opts in
case ch of
- '{' -> "\\{"
- '}' -> "\\}"
- '\\' -> "\\letterbackslash{}"
- '$' -> "\\$"
- '|' -> "\\letterbar{}"
- '%' -> "\\letterpercent{}"
- '~' -> "\\lettertilde{}"
- '#' -> "\\#"
- '[' -> "{[}"
- ']' -> "{]}"
- '\160' -> "~"
+ '{' -> "\\{"
+ '}' -> "\\}"
+ '\\' -> "\\letterbackslash{}"
+ '$' -> "\\$"
+ '|' -> "\\letterbar{}"
+ '%' -> "\\letterpercent{}"
+ '~' -> "\\lettertilde{}"
+ '#' -> "\\#"
+ '[' -> "{[}"
+ ']' -> "{]}"
+ '\160' -> "~"
'\x2014' | ligatures -> "---"
'\x2013' | ligatures -> "--"
'\x2019' | ligatures -> "'"
'\x2026' -> "\\ldots{}"
- x -> [x]
+ x -> [x]
-- | Escape string for ConTeXt
stringToConTeXt :: WriterOptions -> String -> String
@@ -137,20 +151,20 @@ stringToConTeXt opts = concatMap (escapeCharForConTeXt opts)
toLabel :: String -> String
toLabel z = concatMap go z
where go x
- | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x)
+ | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x)
| otherwise = [x]
-- | Convert Elements to ConTeXt
-elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
+elementToConTeXt :: PandocMonad m => WriterOptions -> Element -> WM m Doc
elementToConTeXt _ (Blk block) = blockToConTeXt block
elementToConTeXt opts (Sec level _ attr title' elements) = do
header' <- sectionHeader attr level title'
+ footer' <- sectionFooter attr level
innerContents <- mapM (elementToConTeXt opts) elements
- return $ vcat (header' : innerContents)
+ return $ header' $$ vcat innerContents $$ footer'
-- | Convert Pandoc block element to ConTeXt.
-blockToConTeXt :: Block
- -> State WriterState Doc
+blockToConTeXt :: PandocMonad m => Block -> WM m Doc
blockToConTeXt Null = return empty
blockToConTeXt (Plain lst) = inlineListToConTeXt lst
-- title beginning with fig: indicates that the image is a figure
@@ -175,9 +189,12 @@ blockToConTeXt (CodeBlock _ str) =
return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline
-- blankline because \stoptyping can't have anything after it, inc. '}'
blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
-blockToConTeXt (RawBlock _ _ ) = return empty
+blockToConTeXt b@(RawBlock _ _ ) = do
+ report $ BlockNotRendered b
+ return empty
blockToConTeXt (Div (ident,_,kvs) bs) = do
let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
+ mblang <- fromBCP47 (lookup "lang" kvs)
let wrapRef txt = if null ident
then txt
else ("\\reference" <> brackets (text $ toLabel ident) <>
@@ -186,12 +203,12 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do
Just "rtl" -> align "righttoleft"
Just "ltr" -> align "lefttoright"
_ -> id
- wrapLang txt = case lookup "lang" kvs of
+ wrapLang txt = case mblang of
Just lng -> "\\start\\language["
- <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
+ <> text lng <> "]" $$ txt $$ "\\stop"
Nothing -> txt
wrapBlank txt = blankline <> txt <> blankline
- fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
+ (wrapBlank . wrapLang . wrapDir . wrapRef) <$> blockListToConTeXt bs
blockToConTeXt (BulletList lst) = do
contents <- mapM listItemToConTeXt lst
return $ ("\\startitemize" <> if isTightList lst
@@ -201,9 +218,9 @@ blockToConTeXt (BulletList lst) = do
blockToConTeXt (OrderedList (start, style', delim) lst) = do
st <- get
let level = stOrderedListLevel st
- put $ st {stOrderedListLevel = level + 1}
+ put st {stOrderedListLevel = level + 1}
contents <- mapM listItemToConTeXt lst
- put $ st {stOrderedListLevel = level}
+ put st {stOrderedListLevel = level}
let start' = if start == 1 then "" else "start=" ++ show start
let delim' = case delim of
DefaultDelim -> ""
@@ -238,39 +255,83 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
-- If this is ever executed, provide a default for the reference identifier.
blockToConTeXt (Header level attr lst) = sectionHeader attr level lst
blockToConTeXt (Table caption aligns widths heads rows) = do
- let colDescriptor colWidth alignment = (case alignment of
- AlignLeft -> 'l'
- AlignRight -> 'r'
- AlignCenter -> 'c'
- AlignDefault -> 'l'):
- if colWidth == 0
- then "|"
- else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|")
- let colDescriptors = "|" ++ (concat $
- zipWith colDescriptor widths aligns)
- headers <- if all null heads
- then return empty
- else liftM ($$ "\\HL") $ tableRowToConTeXt heads
+ opts <- gets stOptions
+ let tabl = if isEnabled Ext_ntb opts
+ then Ntb
+ else Xtb
captionText <- inlineListToConTeXt caption
- rows' <- mapM tableRowToConTeXt rows
- return $ "\\placetable" <> (if null caption
- then brackets "none"
- else empty)
- <> braces captionText $$
- "\\starttable" <> brackets (text colDescriptors) $$
- "\\HL" $$ headers $$
- vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline
-
-tableRowToConTeXt :: [[Block]] -> State WriterState Doc
-tableRowToConTeXt cols = do
- cols' <- mapM blockListToConTeXt cols
- return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR"
-
-listItemToConTeXt :: [Block] -> State WriterState Doc
+ headers <- if all null heads
+ then return empty
+ else tableRowToConTeXt tabl aligns widths heads
+ rows' <- mapM (tableRowToConTeXt tabl aligns widths) rows
+ body <- tableToConTeXt tabl headers rows'
+ return $ "\\startplacetable" <> brackets (
+ if null caption
+ then "location=none"
+ else "title=" <> braces captionText
+ ) $$ body $$ "\\stopplacetable" <> blankline
+
+tableToConTeXt :: PandocMonad m => Tabl -> Doc -> [Doc] -> WM m Doc
+tableToConTeXt Xtb heads rows =
+ return $ "\\startxtable" $$
+ (if isEmpty heads
+ then empty
+ else "\\startxtablehead[head]" $$ heads $$ "\\stopxtablehead") $$
+ (if null rows
+ then empty
+ else "\\startxtablebody[body]" $$ vcat (init rows) $$ "\\stopxtablebody" $$
+ "\\startxtablefoot[foot]" $$ last rows $$ "\\stopxtablefoot") $$
+ "\\stopxtable"
+tableToConTeXt Ntb heads rows =
+ return $ "\\startTABLE" $$
+ (if isEmpty heads
+ then empty
+ else "\\startTABLEhead" $$ heads $$ "\\stopTABLEhead") $$
+ (if null rows
+ then empty
+ else "\\startTABLEbody" $$ vcat (init rows) $$ "\\stopTABLEbody" $$
+ "\\startTABLEfoot" $$ last rows $$ "\\stopTABLEfoot") $$
+ "\\stopTABLE"
+
+tableRowToConTeXt :: PandocMonad m => Tabl -> [Alignment] -> [Double] -> [[Block]] -> WM m Doc
+tableRowToConTeXt Xtb aligns widths cols = do
+ cells <- mapM (tableColToConTeXt Xtb) $ zip3 aligns widths cols
+ return $ "\\startxrow" $$ vcat cells $$ "\\stopxrow"
+tableRowToConTeXt Ntb aligns widths cols = do
+ cells <- mapM (tableColToConTeXt Ntb) $ zip3 aligns widths cols
+ return $ vcat cells $$ "\\NC\\NR"
+
+tableColToConTeXt :: PandocMonad m => Tabl -> (Alignment, Double, [Block]) -> WM m Doc
+tableColToConTeXt tabl (align, width, blocks) = do
+ cellContents <- blockListToConTeXt blocks
+ let colwidth = if width == 0
+ then empty
+ else "width=" <> braces (text (printf "%.2f\\textwidth" width))
+ let halign = alignToConTeXt align
+ let options = (if keys == empty
+ then empty
+ else brackets keys) <> space
+ where keys = hcat $ intersperse "," $ filter (empty /=) [halign, colwidth]
+ tableCellToConTeXt tabl options cellContents
+
+tableCellToConTeXt :: PandocMonad m => Tabl -> Doc -> Doc -> WM m Doc
+tableCellToConTeXt Xtb options cellContents =
+ return $ "\\startxcell" <> options <> cellContents <> " \\stopxcell"
+tableCellToConTeXt Ntb options cellContents =
+ return $ "\\NC" <> options <> cellContents
+
+alignToConTeXt :: Alignment -> Doc
+alignToConTeXt align = case align of
+ AlignLeft -> "align=right"
+ AlignRight -> "align=left"
+ AlignCenter -> "align=middle"
+ AlignDefault -> empty
+
+listItemToConTeXt :: PandocMonad m => [Block] -> WM m Doc
listItemToConTeXt list = blockListToConTeXt list >>=
- return . ("\\item" $$) . (nest 2)
+ return . ("\\item" $$) . nest 2
-defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
+defListItemToConTeXt :: PandocMonad m => ([Inline], [[Block]]) -> WM m Doc
defListItemToConTeXt (term, defs) = do
term' <- inlineListToConTeXt term
def' <- liftM vsep $ mapM blockListToConTeXt defs
@@ -278,12 +339,13 @@ defListItemToConTeXt (term, defs) = do
"\\stopdescription" <> blankline
-- | Convert list of block elements to ConTeXt.
-blockListToConTeXt :: [Block] -> State WriterState Doc
+blockListToConTeXt :: PandocMonad m => [Block] -> WM m Doc
blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst
-- | Convert list of inline elements to ConTeXt.
-inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
+inlineListToConTeXt :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> WM m Doc
inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst
-- We add a \strut after a line break that precedes a space,
-- or the space gets swallowed
@@ -292,13 +354,14 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst
addStruts xs
addStruts (x:xs) = x : addStruts xs
addStruts [] = []
- isSpacey Space = True
+ isSpacey Space = True
isSpacey (Str ('\160':_)) = True
- isSpacey _ = False
+ isSpacey _ = False
-- | Convert inline element to ConTeXt
-inlineToConTeXt :: Inline -- ^ Inline to convert
- -> State WriterState Doc
+inlineToConTeXt :: PandocMonad m
+ => Inline -- ^ Inline to convert
+ -> WM m Doc
inlineToConTeXt (Emph lst) = do
contents <- inlineListToConTeXt lst
return $ braces $ "\\em " <> contents
@@ -338,8 +401,10 @@ inlineToConTeXt (Math DisplayMath str) =
return $ text "\\startformula " <> text str <> text " \\stopformula" <> space
inlineToConTeXt (RawInline "context" str) = return $ text str
inlineToConTeXt (RawInline "tex" str) = return $ text str
-inlineToConTeXt (RawInline _ _) = return empty
-inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
+inlineToConTeXt il@(RawInline _ _) = do
+ report $ InlineNotRendered il
+ return empty
+inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr
inlineToConTeXt SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
return $ case wrapText of
@@ -348,7 +413,7 @@ inlineToConTeXt SoftBreak = do
WrapPreserve -> cr
inlineToConTeXt Space = return space
-- Handle HTML-like internal document references to sections
-inlineToConTeXt (Link _ txt (('#' : ref), _)) = do
+inlineToConTeXt (Link _ txt ('#' : ref, _)) = do
opts <- gets stOptions
contents <- inlineListToConTeXt txt
let ref' = toLabel $ stringToConTeXt opts ref
@@ -374,7 +439,7 @@ inlineToConTeXt (Link _ txt (src, _)) = do
inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do
opts <- gets stOptions
let showDim dir = let d = text (show dir) <> "="
- in case (dimension dir attr) of
+ in case dimension dir attr of
Just (Pixel a) ->
[d <> text (showInInch opts (Pixel a)) <> "in"]
Just (Percent a) ->
@@ -397,84 +462,104 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do
inlineToConTeXt (Note contents) = do
contents' <- blockListToConTeXt contents
let codeBlock x@(CodeBlock _ _) = [x]
- codeBlock _ = []
+ codeBlock _ = []
let codeBlocks = query codeBlock contents
return $ if null codeBlocks
then text "\\footnote{" <> nest 2 contents' <> char '}'
else text "\\startbuffer " <> nest 2 contents' <>
text "\\stopbuffer\\footnote{\\getbuffer}"
inlineToConTeXt (Span (_,_,kvs) ils) = do
+ mblang <- fromBCP47 (lookup "lang" kvs)
let wrapDir txt = case lookup "dir" kvs of
Just "rtl" -> braces $ "\\righttoleft " <> txt
Just "ltr" -> braces $ "\\lefttoright " <> txt
_ -> txt
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
+ wrapLang txt = case mblang of
+ Just lng -> "\\start\\language[" <> text lng
<> "]" <> txt <> "\\stop "
Nothing -> txt
- fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
+ (wrapLang . wrapDir) <$> inlineListToConTeXt ils
-- | Craft the section header, inserting the section reference, if supplied.
-sectionHeader :: Attr
+sectionHeader :: PandocMonad m
+ => Attr
-> Int
-> [Inline]
- -> State WriterState Doc
-sectionHeader (ident,classes,_) hdrLevel lst = do
+ -> WM m Doc
+sectionHeader (ident,classes,kvs) hdrLevel lst = do
+ opts <- gets stOptions
contents <- inlineListToConTeXt lst
- st <- get
- let opts = stOptions st
+ levelText <- sectionLevelToText opts (ident,classes,kvs) hdrLevel
+ let ident' = if null ident
+ then empty
+ else "reference=" <> braces (text (toLabel ident))
+ let contents' = if contents == empty
+ then empty
+ else "title=" <> braces contents
+ let options = if keys == empty || levelText == empty
+ then empty
+ else brackets keys
+ where keys = hcat $ intersperse "," $ filter (empty /=) [contents', ident']
+ let starter = if writerSectionDivs opts
+ then "\\start"
+ else "\\"
+ return $ starter <> levelText <> options <> blankline
+
+-- | Craft the section footer
+sectionFooter :: PandocMonad m => Attr -> Int -> WM m Doc
+sectionFooter attr hdrLevel = do
+ opts <- gets stOptions
+ levelText <- sectionLevelToText opts attr hdrLevel
+ return $ if writerSectionDivs opts
+ then "\\stop" <> levelText <> blankline
+ else empty
+
+-- | Generate a textual representation of the section level
+sectionLevelToText :: PandocMonad m => WriterOptions -> Attr -> Int -> WM m Doc
+sectionLevelToText opts (_,classes,_) hdrLevel = do
let level' = case writerTopLevelDivision opts of
TopLevelPart -> hdrLevel - 2
TopLevelChapter -> hdrLevel - 1
TopLevelSection -> hdrLevel
TopLevelDefault -> hdrLevel
- let ident' = toLabel ident
let (section, chapter) = if "unnumbered" `elem` classes
then (text "subject", text "title")
else (text "section", text "chapter")
return $ case level' of
- -1 -> text "\\part" <> braces contents
- 0 -> char '\\' <> chapter <> braces contents
- n | n >= 1 && n <= 5 -> char '\\'
- <> text (concat (replicate (n - 1) "sub"))
- <> section
- <> (if (not . null) ident'
- then brackets (text ident')
- else empty)
- <> braces contents
- <> blankline
- _ -> contents <> blankline
-
-fromBcp47' :: String -> String
-fromBcp47' = fromBcp47 . splitBy (=='-')
+ -1 -> text "part"
+ 0 -> chapter
+ n | n >= 1 -> text (concat (replicate (n - 1) "sub"))
+ <> section
+ _ -> empty -- cannot happen
+
+fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String)
+fromBCP47 mbs = fromBCP47' <$> toLang mbs
-- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1
-- http://wiki.contextgarden.net/Language_Codes
-fromBcp47 :: [String] -> String
-fromBcp47 [] = ""
-fromBcp47 ("ar":"SY":_) = "ar-sy"
-fromBcp47 ("ar":"IQ":_) = "ar-iq"
-fromBcp47 ("ar":"JO":_) = "ar-jo"
-fromBcp47 ("ar":"LB":_) = "ar-lb"
-fromBcp47 ("ar":"DZ":_) = "ar-dz"
-fromBcp47 ("ar":"MA":_) = "ar-ma"
-fromBcp47 ("de":"1901":_) = "deo"
-fromBcp47 ("de":"DE":_) = "de-de"
-fromBcp47 ("de":"AT":_) = "de-at"
-fromBcp47 ("de":"CH":_) = "de-ch"
-fromBcp47 ("el":"poly":_) = "agr"
-fromBcp47 ("en":"US":_) = "en-us"
-fromBcp47 ("en":"GB":_) = "en-gb"
-fromBcp47 ("grc":_) = "agr"
-fromBcp47 x = fromIso $ head x
- where
- fromIso "el" = "gr"
- fromIso "eu" = "ba"
- fromIso "he" = "il"
- fromIso "jp" = "ja"
- fromIso "uk" = "ua"
- fromIso "vi" = "vn"
- fromIso "zh" = "cn"
- fromIso l = l
+fromBCP47' :: Maybe Lang -> Maybe String
+fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
+fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
+fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn"
+fromBCP47' (Just (Lang l _ _ _) ) = Just l
+fromBCP47' Nothing = Nothing
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index cf641dcd6..3daa8d0cf 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -1,11 +1,6 @@
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE FlexibleInstances, OverloadedStrings,
- ScopedTypeVariables, DeriveDataTypeable, CPP #-}
-#if MIN_VERSION_base(4,8,0)
-#else
-{-# LANGUAGE OverlappingInstances #-}
-#endif
-{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE FlexibleInstances #-}
+{- Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Custom
- Copyright : Copyright (C) 2012-2015 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -35,20 +30,27 @@ Conversion of 'Pandoc' documents to custom markup using
a lua writer.
-}
module Text.Pandoc.Writers.Custom ( writeCustom ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Data.List ( intersperse )
-import Data.Char ( toLower )
-import Data.Typeable
-import Scripting.Lua (LuaState, StackValue, callfunc)
-import Text.Pandoc.Writers.Shared
-import qualified Scripting.Lua as Lua
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad (when)
+import Control.Arrow ((***))
import Control.Exception
+import Control.Monad (when)
+import Control.Monad.Trans (MonadIO (liftIO))
+import Data.Char (toLower)
+import Data.List (intersperse)
import qualified Data.Map as M
+import Data.Text (Text, pack)
+import Data.Typeable
+import Foreign.Lua (Lua, ToLuaStack (..), callFunc)
+import Foreign.Lua.Api
+import Text.Pandoc.Class (PandocIO)
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath)
+import Text.Pandoc.Lua.StackInstances ()
+import Text.Pandoc.Lua.Util (addValue, dostring')
+import Text.Pandoc.Options
import Text.Pandoc.Templates
-import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Writers.Shared
attrToMap :: Attr -> M.Map String String
attrToMap (id',classes,keyvals) = M.fromList
@@ -56,119 +58,43 @@ attrToMap (id',classes,keyvals) = M.fromList
: ("class", unwords classes)
: keyvals
-#if MIN_VERSION_hslua(0,4,0)
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Char] where
-#else
-instance StackValue [Char] where
-#endif
- push lua cs = Lua.push lua (UTF8.fromString cs)
- peek lua i = do
- res <- Lua.peek lua i
- return $ UTF8.toString `fmap` res
- valuetype _ = Lua.TSTRING
-#else
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue a => StackValue [a] where
-#else
-instance StackValue a => StackValue [a] where
-#endif
- push lua xs = do
- Lua.createtable lua (length xs + 1) 0
- let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i
- mapM_ addValue $ zip [1..] xs
- peek lua i = do
- top <- Lua.gettop lua
- let i' = if i < 0 then top + i + 1 else i
- Lua.pushnil lua
- lst <- getList lua i'
- Lua.pop lua 1
- return (Just lst)
- valuetype _ = Lua.TTABLE
-
-getList :: StackValue a => LuaState -> Int -> IO [a]
-getList lua i' = do
- continue <- Lua.next lua i'
- if continue
- then do
- next <- Lua.peek lua (-1)
- Lua.pop lua 1
- x <- maybe (fail "peek returned Nothing") return next
- rest <- getList lua i'
- return (x : rest)
- else return []
-#endif
-
-instance StackValue Format where
- push lua (Format f) = Lua.push lua (map toLower f)
- peek l n = fmap Format `fmap` Lua.peek l n
- valuetype _ = Lua.TSTRING
-
-instance (StackValue a, StackValue b) => StackValue (M.Map a b) where
- push lua m = do
- let xs = M.toList m
- Lua.createtable lua (length xs + 1) 0
- let addValue (k, v) = Lua.push lua k >> Lua.push lua v >>
- Lua.rawset lua (-3)
- mapM_ addValue xs
- peek _ _ = undefined -- not needed for our purposes
- valuetype _ = Lua.TTABLE
-
-instance (StackValue a, StackValue b) => StackValue (a,b) where
- push lua (k,v) = do
- Lua.createtable lua 2 0
- Lua.push lua k
- Lua.push lua v
- Lua.rawset lua (-3)
- peek _ _ = undefined -- not needed for our purposes
- valuetype _ = Lua.TTABLE
-
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Inline] where
-#else
-instance StackValue [Inline] where
-#endif
- push l ils = Lua.push l =<< inlineListToCustom l ils
- peek _ _ = undefined
- valuetype _ = Lua.TSTRING
-
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Block] where
-#else
-instance StackValue [Block] where
-#endif
- push l ils = Lua.push l =<< blockListToCustom l ils
- peek _ _ = undefined
- valuetype _ = Lua.TSTRING
-
-instance StackValue MetaValue where
- push l (MetaMap m) = Lua.push l m
- push l (MetaList xs) = Lua.push l xs
- push l (MetaBool x) = Lua.push l x
- push l (MetaString s) = Lua.push l s
- push l (MetaInlines ils) = Lua.push l ils
- push l (MetaBlocks bs) = Lua.push l bs
- peek _ _ = undefined
- valuetype (MetaMap _) = Lua.TTABLE
- valuetype (MetaList _) = Lua.TTABLE
- valuetype (MetaBool _) = Lua.TBOOLEAN
- valuetype (MetaString _) = Lua.TSTRING
- valuetype (MetaInlines _) = Lua.TSTRING
- valuetype (MetaBlocks _) = Lua.TSTRING
-
-instance StackValue Citation where
- push lua cit = do
- Lua.createtable lua 6 0
- let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >>
- Lua.rawset lua (-3)
- addValue ("citationId", citationId cit)
- addValue ("citationPrefix", citationPrefix cit)
- addValue ("citationSuffix", citationSuffix cit)
- addValue ("citationMode", show (citationMode cit))
- addValue ("citationNoteNum", citationNoteNum cit)
- addValue ("citationHash", citationHash cit)
- peek = undefined
- valuetype _ = Lua.TTABLE
+newtype Stringify a = Stringify a
+
+instance ToLuaStack (Stringify Format) where
+ push (Stringify (Format f)) = push (map toLower f)
+
+instance ToLuaStack (Stringify [Inline]) where
+ push (Stringify ils) = push =<< inlineListToCustom ils
+
+instance ToLuaStack (Stringify [Block]) where
+ push (Stringify blks) = push =<< blockListToCustom blks
+
+instance ToLuaStack (Stringify MetaValue) where
+ push (Stringify (MetaMap m)) = push (fmap Stringify m)
+ push (Stringify (MetaList xs)) = push (map Stringify xs)
+ push (Stringify (MetaBool x)) = push x
+ push (Stringify (MetaString s)) = push s
+ push (Stringify (MetaInlines ils)) = push (Stringify ils)
+ push (Stringify (MetaBlocks bs)) = push (Stringify bs)
+
+instance ToLuaStack (Stringify Citation) where
+ push (Stringify cit) = do
+ createtable 6 0
+ addValue "citationId" $ citationId cit
+ addValue "citationPrefix" . Stringify $ citationPrefix cit
+ addValue "citationSuffix" . Stringify $ citationSuffix cit
+ addValue "citationMode" $ show (citationMode cit)
+ addValue "citationNoteNum" $ citationNoteNum cit
+ addValue "citationHash" $ citationHash cit
+
+-- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the
+-- associated value.
+newtype KeyValue a b = KeyValue (a, b)
+
+instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where
+ push (KeyValue (k, v)) = do
+ newtable
+ addValue k v
data PandocLuaException = PandocLuaException String
deriving (Show, Typeable)
@@ -176,147 +102,147 @@ data PandocLuaException = PandocLuaException String
instance Exception PandocLuaException
-- | Convert Pandoc to custom markup.
-writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
+writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text
writeCustom luaFile opts doc@(Pandoc meta _) = do
- luaScript <- UTF8.readFile luaFile
- enc <- getForeignEncoding
- setForeignEncoding utf8
- lua <- Lua.newstate
- Lua.openlibs lua
- status <- Lua.loadstring lua luaScript luaFile
- -- check for error in lua script (later we'll change the return type
- -- to handle this more gracefully):
- when (status /= 0) $
-#if MIN_VERSION_hslua(0,4,0)
- Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString
-#else
- Lua.tostring lua 1 >>= throw . PandocLuaException
-#endif
- Lua.call lua 0 0
- -- TODO - call hierarchicalize, so we have that info
- rendered <- docToCustom lua opts doc
- context <- metaToJSON opts
- (blockListToCustom lua)
- (inlineListToCustom lua)
- meta
- Lua.close lua
- setForeignEncoding enc
- let body = rendered
+ luaScript <- liftIO $ UTF8.readFile luaFile
+ res <- runPandocLua $ do
+ registerScriptPath luaFile
+ stat <- dostring' luaScript
+ -- check for error in lua script (later we'll change the return type
+ -- to handle this more gracefully):
+ when (stat /= OK) $
+ tostring 1 >>= throw . PandocLuaException . UTF8.toString
+ -- TODO - call hierarchicalize, so we have that info
+ rendered <- docToCustom opts doc
+ context <- metaToJSON opts
+ blockListToCustom
+ inlineListToCustom
+ meta
+ return (rendered, context)
+ let (body, context) = case res of
+ Left e -> throw (PandocLuaException (show e))
+ Right x -> x
case writerTemplate opts of
- Nothing -> return body
- Just tpl -> return $ renderTemplate' tpl $ setField "body" body context
+ Nothing -> return $ pack body
+ Just tpl ->
+ case applyTemplate (pack tpl) $ setField "body" body context of
+ Left e -> throw (PandocTemplateError e)
+ Right r -> return (pack r)
-docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String
-docToCustom lua opts (Pandoc (Meta metamap) blocks) = do
- body <- blockListToCustom lua blocks
- callfunc lua "Doc" body metamap (writerVariables opts)
+docToCustom :: WriterOptions -> Pandoc -> Lua String
+docToCustom opts (Pandoc (Meta metamap) blocks) = do
+ body <- blockListToCustom blocks
+ callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts)
-- | Convert Pandoc block element to Custom.
-blockToCustom :: LuaState -- ^ Lua state
- -> Block -- ^ Block element
- -> IO String
+blockToCustom :: Block -- ^ Block element
+ -> Lua String
-blockToCustom _ Null = return ""
+blockToCustom Null = return ""
-blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines
+blockToCustom (Plain inlines) = callFunc "Plain" (Stringify inlines)
-blockToCustom lua (Para [Image attr txt (src,tit)]) =
- callfunc lua "CaptionedImage" src tit txt (attrToMap attr)
+blockToCustom (Para [Image attr txt (src,tit)]) =
+ callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr)
-blockToCustom lua (Para inlines) = callfunc lua "Para" inlines
+blockToCustom (Para inlines) = callFunc "Para" (Stringify inlines)
-blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList
+blockToCustom (LineBlock linesList) = callFunc "LineBlock" (map Stringify linesList)
-blockToCustom lua (RawBlock format str) =
- callfunc lua "RawBlock" format str
+blockToCustom (RawBlock format str) =
+ callFunc "RawBlock" (Stringify format) str
-blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule"
+blockToCustom HorizontalRule = callFunc "HorizontalRule"
-blockToCustom lua (Header level attr inlines) =
- callfunc lua "Header" level inlines (attrToMap attr)
+blockToCustom (Header level attr inlines) =
+ callFunc "Header" level (Stringify inlines) (attrToMap attr)
-blockToCustom lua (CodeBlock attr str) =
- callfunc lua "CodeBlock" str (attrToMap attr)
+blockToCustom (CodeBlock attr str) =
+ callFunc "CodeBlock" str (attrToMap attr)
-blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks
+blockToCustom (BlockQuote blocks) = callFunc "BlockQuote" (Stringify blocks)
-blockToCustom lua (Table capt aligns widths headers rows') =
- callfunc lua "Table" capt (map show aligns) widths headers rows'
+blockToCustom (Table capt aligns widths headers rows) =
+ let aligns' = map show aligns
+ capt' = Stringify capt
+ headers' = map Stringify headers
+ rows' = map (map Stringify) rows
+ in callFunc "Table" capt' aligns' widths headers' rows'
-blockToCustom lua (BulletList items) = callfunc lua "BulletList" items
+blockToCustom (BulletList items) = callFunc "BulletList" (map Stringify items)
-blockToCustom lua (OrderedList (num,sty,delim) items) =
- callfunc lua "OrderedList" items num (show sty) (show delim)
+blockToCustom (OrderedList (num,sty,delim) items) =
+ callFunc "OrderedList" (map Stringify items) num (show sty) (show delim)
-blockToCustom lua (DefinitionList items) =
- callfunc lua "DefinitionList" items
+blockToCustom (DefinitionList items) =
+ callFunc "DefinitionList"
+ (map (KeyValue . (Stringify *** map Stringify)) items)
-blockToCustom lua (Div attr items) =
- callfunc lua "Div" items (attrToMap attr)
+blockToCustom (Div attr items) =
+ callFunc "Div" (Stringify items) (attrToMap attr)
-- | Convert list of Pandoc block elements to Custom.
-blockListToCustom :: LuaState -- ^ Options
- -> [Block] -- ^ List of block elements
- -> IO String
-blockListToCustom lua xs = do
- blocksep <- callfunc lua "Blocksep"
- bs <- mapM (blockToCustom lua) xs
+blockListToCustom :: [Block] -- ^ List of block elements
+ -> Lua String
+blockListToCustom xs = do
+ blocksep <- callFunc "Blocksep"
+ bs <- mapM blockToCustom xs
return $ mconcat $ intersperse blocksep bs
-- | Convert list of Pandoc inline elements to Custom.
-inlineListToCustom :: LuaState -> [Inline] -> IO String
-inlineListToCustom lua lst = do
- xs <- mapM (inlineToCustom lua) lst
- return $ concat xs
+inlineListToCustom :: [Inline] -> Lua String
+inlineListToCustom lst = do
+ xs <- mapM inlineToCustom lst
+ return $ mconcat xs
-- | Convert Pandoc inline element to Custom.
-inlineToCustom :: LuaState -> Inline -> IO String
+inlineToCustom :: Inline -> Lua String
-inlineToCustom lua (Str str) = callfunc lua "Str" str
+inlineToCustom (Str str) = callFunc "Str" str
-inlineToCustom lua Space = callfunc lua "Space"
+inlineToCustom Space = callFunc "Space"
-inlineToCustom lua SoftBreak = callfunc lua "SoftBreak"
+inlineToCustom SoftBreak = callFunc "SoftBreak"
-inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst
+inlineToCustom (Emph lst) = callFunc "Emph" (Stringify lst)
-inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst
+inlineToCustom (Strong lst) = callFunc "Strong" (Stringify lst)
-inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst
+inlineToCustom (Strikeout lst) = callFunc "Strikeout" (Stringify lst)
-inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst
+inlineToCustom (Superscript lst) = callFunc "Superscript" (Stringify lst)
-inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst
+inlineToCustom (Subscript lst) = callFunc "Subscript" (Stringify lst)
-inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst
+inlineToCustom (SmallCaps lst) = callFunc "SmallCaps" (Stringify lst)
-inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst
+inlineToCustom (Quoted SingleQuote lst) = callFunc "SingleQuoted" (Stringify lst)
-inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst
+inlineToCustom (Quoted DoubleQuote lst) = callFunc "DoubleQuoted" (Stringify lst)
-inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs
+inlineToCustom (Cite cs lst) = callFunc "Cite" (Stringify lst) (map Stringify cs)
-inlineToCustom lua (Code attr str) =
- callfunc lua "Code" str (attrToMap attr)
+inlineToCustom (Code attr str) =
+ callFunc "Code" str (attrToMap attr)
-inlineToCustom lua (Math DisplayMath str) =
- callfunc lua "DisplayMath" str
+inlineToCustom (Math DisplayMath str) =
+ callFunc "DisplayMath" str
-inlineToCustom lua (Math InlineMath str) =
- callfunc lua "InlineMath" str
+inlineToCustom (Math InlineMath str) =
+ callFunc "InlineMath" str
-inlineToCustom lua (RawInline format str) =
- callfunc lua "RawInline" format str
+inlineToCustom (RawInline format str) =
+ callFunc "RawInline" (Stringify format) str
-inlineToCustom lua (LineBreak) = callfunc lua "LineBreak"
+inlineToCustom LineBreak = callFunc "LineBreak"
-inlineToCustom lua (Link attr txt (src,tit)) =
- callfunc lua "Link" txt src tit (attrToMap attr)
+inlineToCustom (Link attr txt (src,tit)) =
+ callFunc "Link" (Stringify txt) src tit (attrToMap attr)
-inlineToCustom lua (Image attr alt (src,tit)) =
- callfunc lua "Image" alt src tit (attrToMap attr)
+inlineToCustom (Image attr alt (src,tit)) =
+ callFunc "Image" (Stringify alt) src tit (attrToMap attr)
-inlineToCustom lua (Note contents) = callfunc lua "Note" contents
+inlineToCustom (Note contents) = callFunc "Note" (Stringify contents)
-inlineToCustom lua (Span attr items) =
- callfunc lua "Span" items (attrToMap attr)
+inlineToCustom (Span attr items) =
+ callFunc "Span" (Stringify items) (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 44f96d700..3034fade5 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Docbook
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -28,34 +29,43 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to Docbook XML.
-}
-module Text.Pandoc.Writers.Docbook ( writeDocbook) where
+module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
+import Control.Monad.Reader
+import Data.Char (toLower)
+import Data.Generics (everywhere, mkT)
+import Data.List (isPrefixOf, isSuffixOf, stripPrefix)
+import Data.Monoid (Any (..))
+import Data.Text (Text)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.XML
+import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Pretty
import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Readers.TeXMath
-import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf )
-import Data.Char ( toLower )
-import Data.Monoid ( Any(..) )
-import Text.Pandoc.Highlighting ( languages, languagesByExtension )
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.XML
import Text.TeXMath
import qualified Text.XML.Light as Xml
-import Data.Generics (everywhere, mkT)
+
+data DocBookVersion = DocBook4 | DocBook5
+ deriving (Eq, Show)
+
+type DB = ReaderT DocBookVersion
-- | Convert list of authors to a docbook <author> section
-authorToDocbook :: WriterOptions -> [Inline] -> B.Inlines
-authorToDocbook opts name' =
- let name = render Nothing $ inlinesToDocbook opts name'
- colwidth = if writerWrapText opts == WrapAuto
+authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
+authorToDocbook opts name' = do
+ name <- render Nothing <$> inlinesToDocbook opts name'
+ let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
- in B.rawInline "docbook" $ render colwidth $
+ return $ B.rawInline "docbook" $ render colwidth $
if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
@@ -66,52 +76,63 @@ authorToDocbook opts name' =
let namewords = words name
lengthname = length namewords
(firstname, lastname) = case lengthname of
- 0 -> ("","")
- 1 -> ("", name)
- n -> (intercalate " " (take (n-1) namewords), last namewords)
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (unwords (take (n-1) namewords), last namewords)
in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
inTagsSimple "surname" (text $ escapeStringForXML lastname)
+writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeDocbook4 opts d =
+ runReaderT (writeDocbook opts d) DocBook4
+
+writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeDocbook5 opts d =
+ runReaderT (writeDocbook opts d) DocBook5
+
-- | Convert Pandoc document to string in Docbook format.
-writeDocbook :: WriterOptions -> Pandoc -> String
-writeDocbook opts (Pandoc meta blocks) =
+writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text
+writeDocbook opts (Pandoc meta blocks) = do
let elements = hierarchicalize blocks
- colwidth = if writerWrapText opts == WrapAuto
+ let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
- render' = render colwidth
- opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr)
+ let render' :: Doc -> Text
+ render' = render colwidth
+ let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr)
(writerTemplate opts) &&
- TopLevelDefault == writerTopLevelDivision opts)
+ TopLevelDefault == writerTopLevelDivision opts
then opts{ writerTopLevelDivision = TopLevelChapter }
else opts
- -- The numbering here follows LaTeX's internal numbering
- startLvl = case writerTopLevelDivision opts' of
+ -- The numbering here follows LaTeX's internal numbering
+ let startLvl = case writerTopLevelDivision opts' of
TopLevelPart -> -1
TopLevelChapter -> 0
TopLevelSection -> 1
TopLevelDefault -> 1
- auths' = map (authorToDocbook opts) $ docAuthors meta
- meta' = B.setMeta "author" auths' meta
- Just metadata = metaToJSON opts
- (Just . render colwidth . (vcat .
- (map (elementToDocbook opts' startLvl)) . hierarchicalize))
- (Just . render colwidth . inlinesToDocbook opts')
+ auths' <- mapM (authorToDocbook opts) $ docAuthors meta
+ let meta' = B.setMeta "author" auths' meta
+ metadata <- metaToJSON opts
+ (fmap (render' . vcat) .
+ mapM (elementToDocbook opts' startLvl) .
+ hierarchicalize)
+ (fmap render' . inlinesToDocbook opts')
meta'
- main = render' $ vcat (map (elementToDocbook opts' startLvl) elements)
- context = defField "body" main
- $ defField "mathml" (case writerHTMLMathMethod opts of
- MathML _ -> True
- _ -> False)
- $ metadata
- in case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ main <- (render' . vcat) <$> mapM (elementToDocbook opts' startLvl) elements
+ let context = defField "body" main
+ $
+ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False) metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to Docbook.
-elementToDocbook :: WriterOptions -> Int -> Element -> Doc
+elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
elementToDocbook opts _ (Blk block) = blockToDocbook opts block
-elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) =
+elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do
+ version <- ask
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
@@ -119,24 +140,25 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) =
tag = case lvl of
-1 -> "part"
0 -> "chapter"
- n | n >= 1 && n <= 5 -> if writerDocbook5 opts
+ n | n >= 1 && n <= 5 -> if version == DocBook5
then "section"
else "sect" ++ show n
_ -> "simplesect"
- idName = if writerDocbook5 opts
+ idName = if version == DocBook5
then "xml:id"
else "id"
idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')]
- nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+ nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
else []
attribs = nsAttr ++ idAttr
- in inTags True tag attribs $
- inTagsSimple "title" (inlinesToDocbook opts title) $$
- vcat (map (elementToDocbook opts (lvl + 1)) elements')
+ contents <- mapM (elementToDocbook opts (lvl + 1)) elements'
+ title' <- inlinesToDocbook opts title
+ return $ inTags True tag attribs $
+ inTagsSimple "title" title' $$ vcat contents
-- | Convert a list of Pandoc blocks to Docbook.
-blocksToDocbook :: WriterOptions -> [Block] -> Doc
-blocksToDocbook opts = vcat . map (blockToDocbook opts)
+blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
+blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts)
-- | Auxiliary function to convert Plain block to Para.
plainToPara :: Block -> Block
@@ -145,74 +167,82 @@ plainToPara x = x
-- | Convert a list of pairs of terms and definitions into a list of
-- Docbook varlistentrys.
-deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc
+deflistItemsToDocbook :: PandocMonad m
+ => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc
deflistItemsToDocbook opts items =
- vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items
+ vcat <$> mapM (uncurry (deflistItemToDocbook opts)) items
-- | Convert a term and a list of blocks into a Docbook varlistentry.
-deflistItemToDocbook :: WriterOptions -> [Inline] -> [[Block]] -> Doc
-deflistItemToDocbook opts term defs =
- let def' = concatMap (map plainToPara) defs
- in inTagsIndented "varlistentry" $
- inTagsIndented "term" (inlinesToDocbook opts term) $$
- inTagsIndented "listitem" (blocksToDocbook opts def')
+deflistItemToDocbook :: PandocMonad m
+ => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc
+deflistItemToDocbook opts term defs = do
+ term' <- inlinesToDocbook opts term
+ def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs
+ return $ inTagsIndented "varlistentry" $
+ inTagsIndented "term" term' $$
+ inTagsIndented "listitem" def'
-- | Convert a list of lists of blocks to a list of Docbook list items.
-listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
-listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
+listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc
+listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items
-- | Convert a list of blocks into a Docbook list item.
-listItemToDocbook :: WriterOptions -> [Block] -> Doc
+listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
listItemToDocbook opts item =
- inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
+ inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item)
imageToDocbook :: WriterOptions -> Attr -> String -> Doc
imageToDocbook _ attr src = selfClosingTag "imagedata" $
("fileref", src) : idAndRole attr ++ dims
where
dims = go Width "width" ++ go Height "depth"
- go dir dstr = case (dimension dir attr) of
+ go dir dstr = case dimension dir attr of
Just a -> [(dstr, show a)]
Nothing -> []
-- | Convert a Pandoc block element to Docbook.
-blockToDocbook :: WriterOptions -> Block -> Doc
-blockToDocbook _ Null = empty
+blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc
+blockToDocbook _ Null = return empty
-- Add ids to paragraphs in divs with ids - this is needed for
-- pandoc-citeproc to get link anchors in bibliographies:
blockToDocbook opts (Div (ident,_,_) [Para lst]) =
let attribs = [("id", ident) | not (null ident)] in
if hasLineBreaks lst
- then flush $ nowrap $ inTags False "literallayout" attribs
- $ inlinesToDocbook opts lst
- else inTags True "para" attribs $ inlinesToDocbook opts lst
-blockToDocbook opts (Div (ident,_,_) bs) =
- (if null ident
- then mempty
- else selfClosingTag "anchor" [("id", ident)]) $$
- blocksToDocbook opts (map plainToPara bs)
-blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize
+ then (flush . nowrap . inTags False "literallayout" attribs)
+ <$> inlinesToDocbook opts lst
+ else inTags True "para" attribs <$> inlinesToDocbook opts lst
+blockToDocbook opts (Div (ident,_,_) bs) = do
+ contents <- blocksToDocbook opts (map plainToPara bs)
+ return $
+ (if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) $$ contents
+blockToDocbook _ h@Header{} = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return empty
blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
-- title beginning with fig: indicates that the image is a figure
-blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
- let alt = inlinesToDocbook opts txt
- capt = if null txt
+blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
+ alt <- inlinesToDocbook opts txt
+ let capt = if null txt
then empty
else inTagsSimple "title" alt
- in inTagsIndented "figure" $
+ return $ inTagsIndented "figure" $
capt $$
- (inTagsIndented "mediaobject" $
- (inTagsIndented "imageobject"
- (imageToDocbook opts attr src)) $$
+ inTagsIndented "mediaobject" (
+ inTagsIndented "imageobject"
+ (imageToDocbook opts attr src) $$
inTagsSimple "textobject" (inTagsSimple "phrase" alt))
blockToDocbook opts (Para lst)
- | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst
- | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst
+ | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout")
+ <$> inlinesToDocbook opts lst
+ | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst
blockToDocbook opts (LineBlock lns) =
blockToDocbook opts $ linesToPara lns
blockToDocbook opts (BlockQuote blocks) =
- inTagsIndented "blockquote" $ blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) =
+ inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
+blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
text ("<programlisting" ++ lang ++ ">") <> cr <>
flush (text (escapeStringForXML str) <> cr <> text "</programlisting>")
where lang = if null langs
@@ -224,11 +254,11 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) =
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
-blockToDocbook opts (BulletList lst) =
+blockToDocbook opts (BulletList lst) = do
let attribs = [("spacing", "compact") | isTightList lst]
- in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst
-blockToDocbook _ (OrderedList _ []) = empty
-blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
+ inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst
+blockToDocbook _ (OrderedList _ []) = return empty
+blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do
let numeration = case numstyle of
DefaultStyle -> []
Decimal -> [("numeration", "arabic")]
@@ -239,40 +269,46 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
LowerRoman -> [("numeration", "lowerroman")]
spacing = [("spacing", "compact") | isTightList (first:rest)]
attribs = numeration ++ spacing
- items = if start == 1
- then listItemsToDocbook opts (first:rest)
- else (inTags True "listitem" [("override",show start)]
- (blocksToDocbook opts $ map plainToPara first)) $$
- listItemsToDocbook opts rest
- in inTags True "orderedlist" attribs items
-blockToDocbook opts (DefinitionList lst) =
+ items <- if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else do
+ first' <- blocksToDocbook opts (map plainToPara first)
+ rest' <- listItemsToDocbook opts rest
+ return $
+ inTags True "listitem" [("override",show start)] first' $$
+ rest'
+ return $ inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) = do
let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst]
- in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst
-blockToDocbook opts (RawBlock f str)
- | f == "docbook" = text str -- raw XML block
- | f == "html" = if writerDocbook5 opts
- then empty -- No html in Docbook5
- else text str -- allow html for backwards compatibility
- | otherwise = empty
-blockToDocbook _ HorizontalRule = empty -- not semantic
-blockToDocbook opts (Table caption aligns widths headers rows) =
- let captionDoc = if null caption
- then empty
- else inTagsIndented "title"
- (inlinesToDocbook opts caption)
- tableType = if isEmpty captionDoc then "informaltable" else "table"
+ inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst
+blockToDocbook _ b@(RawBlock f str)
+ | f == "docbook" = return $ text str -- raw XML block
+ | f == "html" = do
+ version <- ask
+ if version == DocBook5
+ then return empty -- No html in Docbook5
+ else return $ text str -- allow html for backwards compatibility
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToDocbook _ HorizontalRule = return empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) = do
+ captionDoc <- if null caption
+ then return empty
+ else inTagsIndented "title" <$>
+ inlinesToDocbook opts caption
+ let tableType = if isEmpty captionDoc then "informaltable" else "table"
percent w = show (truncate (100*w) :: Integer) ++ "*"
coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec"
([("colwidth", percent w) | w > 0] ++
[("align", alignmentToString al)])) widths aligns
- head' = if all null headers
- then empty
- else inTagsIndented "thead" $
- tableRowToDocbook opts headers
- body' = inTagsIndented "tbody" $
- vcat $ map (tableRowToDocbook opts) rows
- in inTagsIndented tableType $ captionDoc $$
- (inTags True "tgroup" [("cols", show (length headers))] $
+ head' <- if all null headers
+ then return empty
+ else inTagsIndented "thead" <$> tableRowToDocbook opts headers
+ body' <- (inTagsIndented "tbody" . vcat) <$>
+ mapM (tableRowToDocbook opts) rows
+ return $ inTagsIndented tableType $ captionDoc $$
+ inTags True "tgroup" [("cols", show (length headers))] (
coltags $$ head' $$ body')
hasLineBreaks :: [Inline] -> Bool
@@ -280,101 +316,111 @@ hasLineBreaks = getAny . query isLineBreak . walk removeNote
where
removeNote :: Inline -> Inline
removeNote (Note _) = Str ""
- removeNote x = x
+ removeNote x = x
isLineBreak :: Inline -> Any
isLineBreak LineBreak = Any True
- isLineBreak _ = Any False
+ isLineBreak _ = Any False
alignmentToString :: Alignment -> [Char]
alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
AlignDefault -> "left"
-tableRowToDocbook :: WriterOptions
+tableRowToDocbook :: PandocMonad m
+ => WriterOptions
-> [[Block]]
- -> Doc
+ -> DB m Doc
tableRowToDocbook opts cols =
- inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols
+ (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols
-tableItemToDocbook :: WriterOptions
+tableItemToDocbook :: PandocMonad m
+ => WriterOptions
-> [Block]
- -> Doc
+ -> DB m Doc
tableItemToDocbook opts item =
- inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item
+ (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item
-- | Convert a list of inline elements to Docbook.
-inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
-inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
+inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc
+inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst
-- | Convert an inline element to Docbook.
-inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook _ (Str str) = text $ escapeStringForXML str
+inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc
+inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str
inlineToDocbook opts (Emph lst) =
- inTagsSimple "emphasis" $ inlinesToDocbook opts lst
+ inTagsSimple "emphasis" <$> inlinesToDocbook opts lst
inlineToDocbook opts (Strong lst) =
- inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
+ inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst
inlineToDocbook opts (Strikeout lst) =
- inTags False "emphasis" [("role", "strikethrough")] $
+ inTags False "emphasis" [("role", "strikethrough")] <$>
inlinesToDocbook opts lst
inlineToDocbook opts (Superscript lst) =
- inTagsSimple "superscript" $ inlinesToDocbook opts lst
+ inTagsSimple "superscript" <$> inlinesToDocbook opts lst
inlineToDocbook opts (Subscript lst) =
- inTagsSimple "subscript" $ inlinesToDocbook opts lst
+ inTagsSimple "subscript" <$> inlinesToDocbook opts lst
inlineToDocbook opts (SmallCaps lst) =
- inTags False "emphasis" [("role", "smallcaps")] $
+ inTags False "emphasis" [("role", "smallcaps")] <$>
inlinesToDocbook opts lst
inlineToDocbook opts (Quoted _ lst) =
- inTagsSimple "quote" $ inlinesToDocbook opts lst
+ inTagsSimple "quote" <$> inlinesToDocbook opts lst
inlineToDocbook opts (Cite _ lst) =
inlinesToDocbook opts lst
inlineToDocbook opts (Span (ident,_,_) ils) =
- (if null ident
- then mempty
- else selfClosingTag "anchor" [("id", ident)]) <>
+ ((if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) <>) <$>
inlinesToDocbook opts ils
inlineToDocbook _ (Code _ str) =
- inTagsSimple "literal" $ text (escapeStringForXML str)
+ return $ inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math t str)
- | isMathML (writerHTMLMathMethod opts) =
- case writeMathML dt <$> readTeX str of
- Right r -> inTagsSimple tagtype
- $ text $ Xml.ppcElement conf
- $ fixNS
- $ removeAttr r
- Left _ -> inlinesToDocbook opts
- $ texMathToInlines t str
- | otherwise = inlinesToDocbook opts $ texMathToInlines t str
- where (dt, tagtype) = case t of
- InlineMath -> (DisplayInline,"inlineequation")
- DisplayMath -> (DisplayBlock,"informalequation")
+ | isMathML (writerHTMLMathMethod opts) = do
+ res <- convertMath writeMathML t str
+ case res of
+ Right r -> return $ inTagsSimple tagtype
+ $ text $ Xml.ppcElement conf
+ $ fixNS
+ $ removeAttr r
+ Left il -> inlineToDocbook opts il
+ | otherwise =
+ texMathToInlines t str >>= inlinesToDocbook opts
+ where tagtype = case t of
+ InlineMath -> "inlineequation"
+ DisplayMath -> "informalequation"
conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
removeAttr e = e{ Xml.elAttribs = [] }
fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
fixNS = everywhere (mkT fixNS')
-inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
- | otherwise = empty
-inlineToDocbook _ LineBreak = text "\n"
-inlineToDocbook _ Space = space
+inlineToDocbook _ il@(RawInline f x)
+ | f == "html" || f == "docbook" = return $ text x
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToDocbook _ LineBreak = return $ text "\n"
+-- currently ignore, would require the option to add custom
+-- styles to the document
+inlineToDocbook _ Space = return space
-- because we use \n for LineBreak, we can't do soft breaks:
-inlineToDocbook _ SoftBreak = space
+inlineToDocbook _ SoftBreak = return space
inlineToDocbook opts (Link attr txt (src, _))
| Just email <- stripPrefix "mailto:" src =
let emailLink = inTagsSimple "email" $ text $
- escapeStringForXML $ email
+ escapeStringForXML email
in case txt of
- [Str s] | escapeURI s == email -> emailLink
- _ -> inlinesToDocbook opts txt <+>
- char '(' <> emailLink <> char ')'
- | otherwise =
- (if isPrefixOf "#" src
- then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr
- else if writerDocbook5 opts
+ [Str s] | escapeURI s == email -> return emailLink
+ _ -> do contents <- inlinesToDocbook opts txt
+ return $ contents <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise = do
+ version <- ask
+ (if "#" `isPrefixOf` src
+ then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr
+ else if version == DocBook5
then inTags False "link" $ ("xlink:href", src) : idAndRole attr
- else inTags False "ulink" $ ("url", src) : idAndRole attr ) $
- inlinesToDocbook opts txt
-inlineToDocbook opts (Image attr _ (src, tit)) =
+ else inTags False "ulink" $ ("url", src) : idAndRole attr )
+ <$> inlinesToDocbook opts txt
+inlineToDocbook opts (Image attr _ (src, tit)) = return $
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
@@ -382,11 +428,11 @@ inlineToDocbook opts (Image attr _ (src, tit)) =
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
titleDoc $$ imageToDocbook opts attr src
inlineToDocbook opts (Note contents) =
- inTagsIndented "footnote" $ blocksToDocbook opts contents
+ inTagsIndented "footnote" <$> blocksToDocbook opts contents
isMathML :: HTMLMathMethod -> Bool
-isMathML (MathML _) = True
-isMathML _ = False
+isMathML MathML = True
+isMathML _ = False
idAndRole :: Attr -> [(String, String)]
idAndRole (id',cls,_) = ident ++ role
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 3fc5d22a2..4542389a2 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,6 +1,9 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Docx
- Copyright : Copyright (C) 2012-2015 John MacFarlane
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,44 +32,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Codec.Archive.Zip
+import Control.Applicative ((<|>))
+import Control.Monad.Except (catchError)
+import Control.Monad.Reader
+import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
+import Data.Char (isSpace, ord, toLower)
+import Data.List (intercalate, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
import qualified Data.Set as Set
-import qualified Text.Pandoc.UTF8 as UTF8
-import Codec.Archive.Zip
+import qualified Data.Text as T
import Data.Time.Clock.POSIX
-import System.Environment
+import Skylighting
+import System.Random (randomR, StdGen, mkStdGen)
+import Text.Pandoc.BCP47 (getLang, renderLang)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Compat.Time
import Text.Pandoc.Definition
import Text.Pandoc.Generic
+import Text.Pandoc.Highlighting (highlight)
import Text.Pandoc.ImageSize
-import Text.Pandoc.Shared hiding (Element)
-import Text.Pandoc.Writers.Shared (fixDisplayMath)
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType,
+ getMimeTypeDef)
import Text.Pandoc.Options
-import Text.Pandoc.Readers.TeXMath
-import Text.Pandoc.Highlighting ( highlight )
-import Text.Pandoc.Walk
-import Text.XML.Light as XML
-import Text.TeXMath
import Text.Pandoc.Readers.Docx.StyleMap
-import Text.Pandoc.Readers.Docx.Util (elemName)
-import Control.Monad.Reader
-import Control.Monad.State
-import Skylighting
-import Data.Unique (hashUnique, newUnique)
-import System.Random (randomRIO)
+import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared (fixDisplayMath, metaValueToInlines)
import Text.Printf (printf)
-import qualified Control.Exception as E
-import Data.Monoid ((<>))
-import qualified Data.Text as T
-import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
- extensionFromMimeType)
-import Control.Applicative ((<|>))
-import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing)
-import Data.Char (ord, isSpace, toLower)
+import Text.TeXMath
+import Text.XML.Light as XML
+import Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.Writers.OOXML
data ListMarker = NoMarker
| BulletMarker
@@ -79,28 +82,28 @@ listMarkerToId BulletMarker = "991"
listMarkerToId (NumberMarker sty delim n) =
'9' : '9' : styNum : delimNum : show n
where styNum = case sty of
- DefaultStyle -> '2'
- Example -> '3'
- Decimal -> '4'
- LowerRoman -> '5'
- UpperRoman -> '6'
- LowerAlpha -> '7'
- UpperAlpha -> '8'
+ DefaultStyle -> '2'
+ Example -> '3'
+ Decimal -> '4'
+ LowerRoman -> '5'
+ UpperRoman -> '6'
+ LowerAlpha -> '7'
+ UpperAlpha -> '8'
delimNum = case delim of
- DefaultDelim -> '0'
- Period -> '1'
- OneParen -> '2'
- TwoParens -> '3'
+ DefaultDelim -> '0'
+ Period -> '1'
+ OneParen -> '2'
+ TwoParens -> '3'
data WriterEnv = WriterEnv{ envTextProperties :: [Element]
, envParaProperties :: [Element]
- , envRTL :: Bool
- , envListLevel :: Int
- , envListNumId :: Int
- , envInDel :: Bool
- , envChangesAuthor :: String
- , envChangesDate :: String
- , envPrintWidth :: Integer
+ , envRTL :: Bool
+ , envListLevel :: Int
+ , envListNumId :: Int
+ , envInDel :: Bool
+ , envChangesAuthor :: String
+ , envChangesDate :: String
+ , envPrintWidth :: Integer
}
defaultWriterEnv :: WriterEnv
@@ -117,22 +120,25 @@ defaultWriterEnv = WriterEnv{ envTextProperties = []
data WriterState = WriterState{
stFootnotes :: [Element]
+ , stComments :: [([(String,String)], [Inline])]
, stSectionIds :: Set.Set String
, stExternalLinks :: M.Map String String
- , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
+ , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
, stLists :: [ListMarker]
, stInsId :: Int
, stDelId :: Int
, stStyleMaps :: StyleMaps
, stFirstPara :: Bool
, stTocTitle :: [Inline]
- , stDynamicParaProps :: [String]
- , stDynamicTextProps :: [String]
+ , stDynamicParaProps :: Set.Set String
+ , stDynamicTextProps :: Set.Set String
+ , stCurId :: Int
}
defaultWriterState :: WriterState
defaultWriterState = WriterState{
stFootnotes = defaultFootnotes
+ , stComments = []
, stSectionIds = Set.empty
, stExternalLinks = M.empty
, stImages = M.empty
@@ -141,44 +147,29 @@ defaultWriterState = WriterState{
, stDelId = 1
, stStyleMaps = defaultStyleMaps
, stFirstPara = False
- , stTocTitle = normalizeInlines [Str "Table of Contents"]
- , stDynamicParaProps = []
- , stDynamicTextProps = []
+ , stTocTitle = [Str "Table of Contents"]
+ , stDynamicParaProps = Set.empty
+ , stDynamicTextProps = Set.empty
+ , stCurId = 20
}
-type WS = ReaderT WriterEnv (StateT WriterState IO)
-
-mknode :: Node t => String -> [(String,String)] -> t -> Element
-mknode s attrs =
- add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s)
+type WS m = ReaderT WriterEnv (StateT WriterState m)
-nodename :: String -> QName
-nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
- (xs,[]) -> (xs, Nothing)
- (ys, _:zs) -> (zs, Just ys)
-
-toLazy :: B.ByteString -> BL.ByteString
-toLazy = BL.fromChunks . (:[])
-
-renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
- UTF8.fromStringLazy (showElement elt)
renumIdMap :: Int -> [Element] -> M.Map String String
renumIdMap _ [] = M.empty
renumIdMap n (e:es)
| Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
- M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es)
+ M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es)
| otherwise = renumIdMap n es
replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
replaceAttr _ _ [] = []
replaceAttr f val (a:as) | f (attrKey a) =
- (XML.Attr (attrKey a) val) : (replaceAttr f val as)
- | otherwise = a : (replaceAttr f val as)
+ XML.Attr (attrKey a) val : replaceAttr f val as
+ | otherwise = a : replaceAttr f val as
-renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element
+renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element
renumId f renumMap e
| Just oldId <- findAttrBy f e
, Just newId <- M.lookup oldId renumMap =
@@ -187,7 +178,7 @@ renumId f renumMap e
e { elAttribs = attrs' }
| otherwise = e
-renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
+renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element]
renumIds f renumMap = map (renumId f renumMap)
-- | Certain characters are invalid in XML even if escaped.
@@ -206,38 +197,36 @@ isValidChar (ord -> c)
| 0x10000 <= c && c <= 0x10FFFF = True
| otherwise = False
-metaValueToInlines :: MetaValue -> [Inline]
-metaValueToInlines (MetaString s) = normalizeInlines [Str s]
-metaValueToInlines (MetaInlines ils) = ils
-metaValueToInlines (MetaBlocks bs) = query return bs
-metaValueToInlines (MetaBool b) = [Str $ show b]
-metaValueToInlines _ = []
-
--- | Produce an Docx file from a Pandoc document.
-writeDocx :: WriterOptions -- ^ Writer options
+writeDocx :: (PandocMonad m)
+ => WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
- -> IO BL.ByteString
+ -> m BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
- let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath $ doc
- username <- lookup "USERNAME" <$> getEnvironment
- utctime <- getCurrentTime
- distArchive <- getDefaultReferenceDocx datadir
- refArchive <- case writerReferenceDocx opts of
- Just f -> liftM (toArchive . toLazy) $ B.readFile f
- Nothing -> getDefaultReferenceDocx datadir
+ let doc' = walk fixDisplayMath doc
+ username <- P.lookupEnv "USERNAME"
+ utctime <- P.getCurrentTime
+ distArchive <- (toArchive . BL.fromStrict) <$> do
+ oldUserDataDir <- P.getUserDataDir
+ P.setUserDataDir Nothing
+ res <- P.readDefaultDataFile "reference.docx"
+ P.setUserDataDir oldUserDataDir
+ return res
+ refArchive <- case writerReferenceDoc opts of
+ Just f -> toArchive <$> P.readFileLazy f
+ Nothing -> (toArchive . BL.fromStrict) <$>
+ P.readDataFile "reference.docx"
parsedDoc <- parseXml refArchive distArchive "word/document.xml"
let wname f qn = qPrefix qn == Just "w" && f (qName qn)
let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
-- Gets the template size
- let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz")))
- let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName))
+ let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz"))
+ let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrBy ((=="w") . qName)
- let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar")))
- let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName))
- let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName))
+ let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar"))
+ let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName)
+ let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName)
-- Get the avaible area (converting the size and the margins to int and
-- doing the difference
@@ -248,8 +237,29 @@ writeDocx opts doc@(Pandoc meta _) = do
)
-- styles
+ mblang <- toLang $ getLang opts meta
+ let addLang :: Element -> Element
+ addLang e = case mblang >>= \l ->
+ (return . XMLC.toTree . go (renderLang l)
+ . XMLC.fromElement) e of
+ Just (Elem e') -> e'
+ _ -> e -- return original
+ where go :: String -> Cursor -> Cursor
+ go l cursor = case XMLC.findRec (isLangElt . current) cursor of
+ Nothing -> cursor
+ Just t -> XMLC.modifyContent (setval l) t
+ setval :: String -> Content -> Content
+ setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $
+ elAttribs e' }
+ setval _ x = x
+ setvalattr :: String -> XML.Attr -> XML.Attr
+ setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
+ setvalattr _ x = x
+ isLangElt (Elem e') = qName (elName e') == "lang"
+ isLangElt _ = False
+
let stylepath = "word/styles.xml"
- styledoc <- parseXml refArchive distArchive stylepath
+ styledoc <- addLang <$> parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles
let styleMaps = getStyleMaps styledoc
@@ -271,20 +281,20 @@ writeDocx opts doc@(Pandoc meta _) = do
envRTL = isRTLmeta
, envChangesAuthor = fromMaybe "unknown" username
, envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
- , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
+ , envPrintWidth = maybe 420 (\x -> quot x 20) pgContentWidth
}
- ((contents, footnotes), st) <- runStateT
- (runReaderT
- (writeOpenXML opts{writerWrapText = WrapNone} doc')
- env)
- initialSt
+ ((contents, footnotes, comments), st) <- runStateT
+ (runReaderT
+ (writeOpenXML opts{writerWrapText = WrapNone} doc')
+ env)
+ initialSt
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
-- create entries for images in word/media/...
- let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
+ let toImageEntry (_,path,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
let imageEntries = map toImageEntry imgs
let stdAttributes =
@@ -316,7 +326,7 @@ writeDocx opts doc@(Pandoc meta _) = do
-- [Content_Types].xml
let mkOverrideNode (part', contentType') = mknode "Override"
[("PartName",part'),("ContentType",contentType')] ()
- let mkImageOverride (_, imgpath, mbMimeType, _, _) =
+ let mkImageOverride (_, imgpath, mbMimeType, _) =
mkOverrideNode ("/word/" ++ imgpath,
fromMaybe "application/octet-stream" mbMimeType)
let mkMediaOverride imgpath =
@@ -340,6 +350,8 @@ writeDocx opts doc@(Pandoc meta _) = do
"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml")
,("/word/document.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
+ ,("/word/comments.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml")
,("/word/footnotes.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
] ++
@@ -386,13 +398,16 @@ writeDocx opts doc@(Pandoc meta _) = do
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
"rId7",
"footnotes.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
+ "rId8",
+ "comments.xml")
]
let idMap = renumIdMap (length baserels' + 1) (headers ++ footers)
let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
let baserels = baserels' ++ renumHeaders ++ renumFooters
- let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+ let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
let imgrels = map toImgRel imgs
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
@@ -409,7 +424,7 @@ writeDocx opts doc@(Pandoc meta _) = do
(elChildren sectpr')
in
add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs
- Nothing -> (mknode "w:sectPr" [] ())
+ Nothing -> mknode "w:sectPr" [] ()
-- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
let contents' = contents ++ [sectpr]
@@ -431,6 +446,10 @@ writeDocx opts doc@(Pandoc meta _) = do
$ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
linkrels
+ -- comments
+ let commentsEntry = toEntry "word/comments.xml" epochtime
+ $ renderXml $ mknode "w:comments" stdAttributes comments
+
-- styles
-- We only want to inject paragraph and text properties that
@@ -438,26 +457,19 @@ writeDocx opts doc@(Pandoc meta _) = do
-- are normalized as lowercase.
let newDynamicParaProps = filter
(\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps)
- (stDynamicParaProps st)
+ (Set.toList $ stDynamicParaProps st)
newDynamicTextProps = filter
(\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps)
- (stDynamicTextProps st)
+ (Set.toList $ stDynamicTextProps st)
let newstyles = map newParaPropToOpenXml newDynamicParaProps ++
map newTextPropToOpenXml newDynamicTextProps ++
- (styleToOpenXml styleMaps $ writerHighlightStyle opts)
- let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
- where
- modifyContent
- | writerHighlight opts = (++ map Elem newstyles)
- | otherwise = filter notTokStyle
- notTokStyle (Elem el) = notStyle el || notTokId el
- notTokStyle _ = True
- notStyle = (/= elemName' "style") . elName
- notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
- tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
- elemName' = elemName (sNameSpaces styleMaps) "w"
+ (case writerHighlightStyle opts of
+ Nothing -> []
+ Just sty -> styleToOpenXml styleMaps sty)
+ let styledoc' = styledoc{ elContent = elContent styledoc ++
+ map Elem newstyles }
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
@@ -472,6 +484,11 @@ writeDocx opts doc@(Pandoc meta _) = do
, qName (elName e) == "abstractNum" ] ++
[Elem e | e <- allElts
, qName (elName e) == "num" ] }
+
+ let keywords = case lookupMeta "keywords" meta of
+ Just (MetaList xs) -> map stringify xs
+ _ -> []
+
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
[("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
@@ -481,6 +498,7 @@ writeDocx opts doc@(Pandoc meta _) = do
,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
$ mknode "dc:title" [] (stringify $ docTitle meta)
: mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
+ : mknode "cp:keywords" [] (intercalate ", " keywords)
: (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
, mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
@@ -509,6 +527,7 @@ writeDocx opts doc@(Pandoc meta _) = do
, "w:consecutiveHyphenLimit"
, "w:hyphenationZone"
, "w:doNotHyphenateCap"
+ , "w:evenAndOddHeaders"
]
settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
@@ -535,6 +554,7 @@ writeDocx opts doc@(Pandoc meta _) = do
let archive = foldr addEntryToArchive emptyArchive $
contentTypesEntry : relsEntry : contentEntry : relEntry :
footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
+ commentsEntry :
docPropsEntry : docPropsAppEntry : themeEntry :
fontTableEntry : settingsEntry : webSettingsEntry :
imageEntries ++ headerFooterEntries ++
@@ -583,12 +603,12 @@ styleToOpenXml sm style =
[ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
]
tokStyles = tokenStyles style
- tokFeature f toktype = maybe False f $ lookup toktype tokStyles
+ tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles
tokCol toktype = maybe "auto" (drop 1 . fromColor)
- $ (tokenColor =<< lookup toktype tokStyles)
+ $ (tokenColor =<< M.lookup toktype tokStyles)
`mplus` defaultColor style
tokBg toktype = maybe "auto" (drop 1 . fromColor)
- $ (tokenBackground =<< lookup toktype tokStyles)
+ $ (tokenBackground =<< M.lookup toktype tokStyles)
`mplus` backgroundColor style
parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
| otherwise = Just $
@@ -599,11 +619,11 @@ styleToOpenXml sm style =
, mknode "w:link" [("w:val","VerbatimChar")] ()
, mknode "w:pPr" []
$ mknode "w:wordWrap" [("w:val","off")] ()
- : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
- $ backgroundColor style )
+ :
+ maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style)
]
-copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry
+copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
copyChildren refArchive distArchive path timestamp elNames = do
ref <- parseXml refArchive distArchive path
dist <- parseXml distArchive distArchive path
@@ -622,11 +642,14 @@ copyChildren refArchive distArchive path timestamp elNames = do
baseListId :: Int
baseListId = 1000
-mkNumbering :: [ListMarker] -> IO [Element]
+mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element]
mkNumbering lists = do
- elts <- mapM mkAbstractNum (ordNub lists)
+ elts <- evalStateT (mapM mkAbstractNum (ordNub lists)) (mkStdGen 1848)
return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
+maxListLevel :: Int
+maxListLevel = 8
+
mkNum :: ListMarker -> Int -> Element
mkNum marker numid =
mknode "w:num" [("w:numId",show numid)]
@@ -636,15 +659,19 @@ mkNum marker numid =
BulletMarker -> []
NumberMarker _ _ start ->
map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
- $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
+ $ mknode "w:startOverride" [("w:val",show start)] ())
+ [0..maxListLevel]
-mkAbstractNum :: ListMarker -> IO Element
+mkAbstractNum :: (PandocMonad m) => ListMarker -> StateT StdGen m Element
mkAbstractNum marker = do
- nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer)
+ gen <- get
+ let (nsid, gen') = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen
+ put gen'
return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
$ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
: mknode "w:multiLevelType" [("w:val","multilevel")] ()
- : map (mkLvl marker) [0..6]
+ : map (mkLvl marker)
+ [0..maxListLevel]
mkLvl :: ListMarker -> Int -> Element
mkLvl marker lvl =
@@ -675,33 +702,35 @@ mkLvl marker lvl =
bulletFor 3 = "\x2013"
bulletFor 4 = "\x2022"
bulletFor 5 = "\x2013"
- bulletFor _ = "\x2022"
- styleFor UpperAlpha _ = "upperLetter"
- styleFor LowerAlpha _ = "lowerLetter"
- styleFor UpperRoman _ = "upperRoman"
- styleFor LowerRoman _ = "lowerRoman"
- styleFor Decimal _ = "decimal"
+ bulletFor x = bulletFor (x `mod` 6)
+ styleFor UpperAlpha _ = "upperLetter"
+ styleFor LowerAlpha _ = "lowerLetter"
+ styleFor UpperRoman _ = "upperRoman"
+ styleFor LowerRoman _ = "lowerRoman"
+ styleFor Decimal _ = "decimal"
styleFor DefaultStyle 1 = "decimal"
styleFor DefaultStyle 2 = "lowerLetter"
styleFor DefaultStyle 3 = "lowerRoman"
styleFor DefaultStyle 4 = "decimal"
styleFor DefaultStyle 5 = "lowerLetter"
- styleFor DefaultStyle 6 = "lowerRoman"
- styleFor _ _ = "decimal"
- patternFor OneParen s = s ++ ")"
+ styleFor DefaultStyle 0 = "lowerRoman"
+ styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6)
+ styleFor _ _ = "decimal"
+ patternFor OneParen s = s ++ ")"
patternFor TwoParens s = "(" ++ s ++ ")"
- patternFor _ s = s ++ "."
+ patternFor _ s = s ++ "."
-getNumId :: WS Int
+getNumId :: (PandocMonad m) => WS m Int
getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
-makeTOC :: WriterOptions -> WS [Element]
+
+makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
makeTOC opts | writerTableOfContents opts = do
- let depth = "1-"++(show (writerTOCDepth opts))
+ let depth = "1-"++show (writerTOCDepth opts)
let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
tocTitle <- gets stTocTitle
title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
- return $
+ return
[mknode "w:sdt" [] ([
mknode "w:sdtPr" [] (
mknode "w:docPartObj" [] (
@@ -725,22 +754,20 @@ makeTOC _ = return []
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
-writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
+writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element])
writeOpenXML opts (Pandoc meta blocks) = do
- let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> LineBreak : xs
- _ -> []
+ let tit = docTitle meta
let auths = docAuthors meta
let dat = docDate meta
let abstract' = case lookupMeta "abstract" meta of
- Just (MetaBlocks bs) -> bs
+ Just (MetaBlocks bs) -> bs
Just (MetaInlines ils) -> [Plain ils]
- _ -> []
+ _ -> []
let subtitle' = case lookupMeta "subtitle" meta of
Just (MetaBlocks [Plain xs]) -> xs
Just (MetaBlocks [Para xs]) -> xs
Just (MetaInlines xs) -> xs
- _ -> []
+ _ -> []
title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
@@ -750,23 +777,40 @@ writeOpenXML opts (Pandoc meta blocks) = do
then return []
else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
- convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
- convertSpace xs = xs
+ convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
+ convertSpace xs = xs
let blocks' = bottomUp convertSpace blocks
- doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
- notes' <- reverse `fmap` gets stFootnotes
+ doc' <- setFirstPara >> blocksToOpenXML opts blocks'
+ notes' <- reverse <$> gets stFootnotes
+ comments <- reverse <$> gets stComments
+ let toComment (kvs, ils) = do
+ annotation <- inlinesToOpenXML opts ils
+ return $
+ mknode "w:comment" [('w':':':k,v) | (k,v) <- kvs]
+ [ mknode "w:p" [] $
+ [ mknode "w:pPr" []
+ [ mknode "w:pStyle" [("w:val", "CommentText")] () ]
+ , mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ mknode "w:rStyle" [("w:val", "CommentReference")] ()
+ , mknode "w:annotationRef" [] ()
+ ]
+ ]
+ ] ++ annotation
+ ]
+ comments' <- mapM toComment comments
toc <- makeTOC opts
let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
- return (meta' ++ doc', notes')
+ return (meta' ++ doc', notes', comments')
-- | Convert a list of Pandoc blocks to OpenXML.
-blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
+blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
pCustomStyle :: String -> Element
pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
-pStyleM :: String -> WS XML.Element
+pStyleM :: (PandocMonad m) => String -> WS m XML.Element
pStyleM styleName = do
styleMaps <- gets stStyleMaps
let sty' = getStyleId styleName $ sParaStyleMap styleMaps
@@ -775,61 +819,75 @@ pStyleM styleName = do
rCustomStyle :: String -> Element
rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-rStyleM :: String -> WS XML.Element
+rStyleM :: (PandocMonad m) => String -> WS m XML.Element
rStyleM styleName = do
styleMaps <- gets stStyleMaps
let sty' = getStyleId styleName $ sCharStyleMap styleMaps
return $ mknode "w:rStyle" [("w:val",sty')] ()
-getUniqueId :: MonadIO m => m String
+getUniqueId :: (PandocMonad m) => WS m String
-- the + 20 is to ensure that there are no clashes with the rIds
-- already in word/document.xml.rel
-getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique
+getUniqueId = do
+ n <- gets stCurId
+ modify $ \st -> st{stCurId = n + 1}
+ return $ show n
-- | Key for specifying user-defined docx styles.
dynamicStyleKey :: String
dynamicStyleKey = "custom-style"
-- | Convert a Pandoc block element to OpenXML.
-blockToOpenXML :: WriterOptions -> Block -> WS [Element]
+blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
-blockToOpenXML' :: WriterOptions -> Block -> WS [Element]
+blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
blockToOpenXML' _ Null = return []
-blockToOpenXML' opts (Div (ident,classes,kvs) bs)
- | Just sty <- lookup dynamicStyleKey kvs = do
- modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)}
- withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs
- | Just "rtl" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "rtl")/=) kvs
- local (\env -> env { envRTL = True }) $
- blockToOpenXML opts (Div (ident,classes,kvs') bs)
- | Just "ltr" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "ltr")/=) kvs
- local (\env -> env { envRTL = False }) $
- blockToOpenXML opts (Div (ident,classes,kvs') bs)
-blockToOpenXML' opts (Div (_,["references"],_) bs) = do
- let (hs, bs') = span isHeaderBlock bs
- header <- blocksToOpenXML opts hs
- -- We put the Bibliography style on paragraphs after the header
- rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
- return (header ++ rest)
-blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs
+blockToOpenXML' opts (Div (ident,classes,kvs) bs) = do
+ stylemod <- case lookup dynamicStyleKey kvs of
+ Just sty -> do
+ modify $ \s ->
+ s{stDynamicParaProps = Set.insert sty
+ (stDynamicParaProps s)}
+ return $ withParaPropM (pStyleM sty)
+ _ -> return id
+ dirmod <- case lookup "dir" kvs of
+ Just "rtl" -> return $ local (\env -> env { envRTL = True })
+ Just "ltr" -> return $ local (\env -> env { envRTL = False })
+ _ -> return id
+ let (hs, bs') = if "references" `elem` classes
+ then span isHeaderBlock bs
+ else ([], bs)
+ let bibmod = if "references" `elem` classes
+ then withParaPropM (pStyleM "Bibliography")
+ else id
+ header <- dirmod $ stylemod $ blocksToOpenXML opts hs
+ contents <- dirmod $ bibmod $ stylemod $ blocksToOpenXML opts bs'
+ if null ident
+ then return $ header ++ contents
+ else do
+ id' <- getUniqueId
+ let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
+ ,("w:name",ident)] ()
+ let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
+ return $ bookmarkStart : header ++ contents ++ [bookmarkEnd]
blockToOpenXML' opts (Header lev (ident,_,_) lst) = do
setFirstPara
paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
getParaProps False
contents <- inlinesToOpenXML opts lst
- usedIdents <- gets stSectionIds
- let bookmarkName = if null ident
- then uniqueIdent lst usedIdents
- else ident
- modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s }
- id' <- getUniqueId
- let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
+ if null ident
+ then return [mknode "w:p" [] (paraProps ++contents)]
+ else do
+ let bookmarkName = ident
+ modify $ \s -> s{ stSectionIds = Set.insert bookmarkName
+ $ stSectionIds s }
+ id' <- getUniqueId
+ let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
,("w:name",bookmarkName)] ()
- let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
- return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
+ let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
+ return [mknode "w:p" [] (paraProps ++
+ [bookmarkStart] ++ contents ++ [bookmarkEnd])]
blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
@@ -838,31 +896,34 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
let prop = pCustomStyle $
if null alt
then "Figure"
- else "FigureWithCaption"
+ else "CaptionedFigure"
paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False)
contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
captionNode <- withParaProp (pCustomStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
--- fixDisplayMath sometimes produces a Para [] as artifact
-blockToOpenXML' _ (Para []) = return []
-blockToOpenXML' opts (Para lst) = do
- isFirstPara <- gets stFirstPara
- paraProps <- getParaProps $ case lst of
- [Math DisplayMath _] -> True
- _ -> False
- bodyTextStyle <- pStyleM "Body Text"
- let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
- [] -> [mknode "w:pPr" [] [bodyTextStyle]]
- ps -> ps
- modify $ \s -> s { stFirstPara = False }
- contents <- inlinesToOpenXML opts lst
- return [mknode "w:p" [] (paraProps' ++ contents)]
+blockToOpenXML' opts (Para lst)
+ | null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
+ | otherwise = do
+ isFirstPara <- gets stFirstPara
+ paraProps <- getParaProps $ case lst of
+ [Math DisplayMath _] -> True
+ _ -> False
+ bodyTextStyle <- pStyleM "Body Text"
+ let paraProps' = case paraProps of
+ [] | isFirstPara -> [mknode "w:pPr" []
+ [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
+ ps -> ps
+ modify $ \s -> s { stFirstPara = False }
+ contents <- inlinesToOpenXML opts lst
+ return [mknode "w:p" [] (paraProps' ++ contents)]
blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
-blockToOpenXML' _ (RawBlock format str)
+blockToOpenXML' _ b@(RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
- | otherwise = return []
+ | otherwise = do
+ report $ BlockNotRendered b
+ return []
blockToOpenXML' opts (BlockQuote blocks) = do
p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
setFirstPara
@@ -914,9 +975,9 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do
caption' ++
[mknode "w:tbl" []
( mknode "w:tblPr" []
- ( mknode "w:tblStyle" [("w:val","TableNormal")] () :
+ ( mknode "w:tblStyle" [("w:val","Table")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
- mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
+ mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
@@ -945,7 +1006,7 @@ blockToOpenXML' opts (DefinitionList items) = do
setFirstPara
return l
-definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
+definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
definitionListItemToOpenXML opts (term,defs) = do
term' <- withParaProp (pCustomStyle "DefinitionTerm")
$ blockToOpenXML opts (Para term)
@@ -953,12 +1014,12 @@ definitionListItemToOpenXML opts (term,defs) = do
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
-addList :: ListMarker -> WS ()
+addList :: (PandocMonad m) => ListMarker -> WS m ()
addList marker = do
lists <- gets stLists
modify $ \st -> st{ stLists = lists ++ [marker] }
-listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
+listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
listItemToOpenXML _ _ [] = return []
listItemToOpenXML opts numid (first:rest) = do
first' <- withNumId numid $ blockToOpenXML opts first
@@ -968,121 +1029,162 @@ listItemToOpenXML opts numid (first:rest) = do
alignmentToString :: Alignment -> [Char]
alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
AlignDefault -> "left"
-- | Convert a list of inline elements to OpenXML.
-inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
+inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
-withNumId :: Int -> WS a -> WS a
+withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
withNumId numid = local $ \env -> env{ envListNumId = numid }
-asList :: WS a -> WS a
+asList :: (PandocMonad m) => WS m a -> WS m a
asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
-getTextProps :: WS [Element]
+getTextProps :: (PandocMonad m) => WS m [Element]
getTextProps = do
props <- asks envTextProperties
return $ if null props
then []
else [mknode "w:rPr" [] props]
-withTextProp :: Element -> WS a -> WS a
+withTextProp :: PandocMonad m => Element -> WS m a -> WS m a
withTextProp d p =
local (\env -> env {envTextProperties = d : envTextProperties env}) p
-withTextPropM :: WS Element -> WS a -> WS a
+withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
withTextPropM = (. flip withTextProp) . (>>=)
-getParaProps :: Bool -> WS [Element]
+getParaProps :: PandocMonad m => Bool -> WS m [Element]
getParaProps displayMathPara = do
props <- asks envParaProperties
listLevel <- asks envListLevel
numid <- asks envListNumId
- let listPr = if listLevel >= 0 && not displayMathPara
- then [ mknode "w:numPr" []
- [ mknode "w:numId" [("w:val",show numid)] ()
- , mknode "w:ilvl" [("w:val",show listLevel)] () ]
- ]
- else []
+ let listPr = [mknode "w:numPr" []
+ [ mknode "w:numId" [("w:val",show numid)] ()
+ , mknode "w:ilvl" [("w:val",show listLevel)] () ] | listLevel >= 0 && not displayMathPara]
return $ case props ++ listPr of
[] -> []
ps -> [mknode "w:pPr" [] ps]
-withParaProp :: Element -> WS a -> WS a
+withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
withParaProp d p =
local (\env -> env {envParaProperties = d : envParaProperties env}) p
-withParaPropM :: WS Element -> WS a -> WS a
+withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
withParaPropM = (. flip withParaProp) . (>>=)
-formattedString :: String -> WS [Element]
-formattedString str = do
- props <- getTextProps
+formattedString :: PandocMonad m => String -> WS m [Element]
+formattedString str =
+ -- properly handle soft hyphens
+ case splitBy (=='\173') str of
+ [w] -> formattedString' w
+ ws -> do
+ sh <- formattedRun [mknode "w:softHyphen" [] ()]
+ intercalate sh <$> mapM formattedString' ws
+
+formattedString' :: PandocMonad m => String -> WS m [Element]
+formattedString' str = do
inDel <- asks envInDel
- return [ mknode "w:r" [] $
- props ++
- [ mknode (if inDel then "w:delText" else "w:t")
- [("xml:space","preserve")] (stripInvalidChars str) ] ]
+ formattedRun [ mknode (if inDel then "w:delText" else "w:t")
+ [("xml:space","preserve")] (stripInvalidChars str) ]
-setFirstPara :: WS ()
+formattedRun :: PandocMonad m => [Element] -> WS m [Element]
+formattedRun els = do
+ props <- getTextProps
+ return [ mknode "w:r" [] $ props ++ els ]
+
+setFirstPara :: PandocMonad m => WS m ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
-- | Convert an inline element to OpenXML.
-inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
+inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
-inlineToOpenXML' :: WriterOptions -> Inline -> WS [Element]
-inlineToOpenXML' _ (Str str) = formattedString str
+inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML' _ (Str str) =
+ formattedString str
inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
-inlineToOpenXML' opts (Span (ident,classes,kvs) ils)
- | Just sty <- lookup dynamicStyleKey kvs = do
- let kvs' = filter ((dynamicStyleKey, sty)/=) kvs
- modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)}
- withTextProp (rCustomStyle sty) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | Just "rtl" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "rtl")/=) kvs
- local (\env -> env { envRTL = True }) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | Just "ltr" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "ltr")/=) kvs
- local (\env -> env { envRTL = False }) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | "insertion" `elem` classes = do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
- insId <- gets stInsId
- modify $ \s -> s{stInsId = (insId + 1)}
- x <- inlinesToOpenXML opts ils
- return [ mknode "w:ins" [("w:id", (show insId)),
- ("w:author", author),
- ("w:date", date)]
- x ]
- | "deletion" `elem` classes = do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
- delId <- gets stDelId
- modify $ \s -> s{stDelId = (delId + 1)}
- x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils)
- return [ mknode "w:del" [("w:id", (show delId)),
- ("w:author", author),
- ("w:date", date)]
- x ]
- | otherwise = do
- let off x = withTextProp (mknode x [("w:val","0")] ())
- ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
- (if "csl-no-strong" `elem` classes then off "w:b" else id) .
- (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
- $ inlinesToOpenXML opts ils
+inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
+ -- prefer the "id" in kvs, since that is the one produced by the docx
+ -- reader.
+ let ident' = fromMaybe ident (lookup "id" kvs)
+ kvs' = filter (("id" /=) . fst) kvs
+ modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
+ return [ mknode "w:commentRangeStart" [("w:id", ident')] () ]
+inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
+ -- prefer the "id" in kvs, since that is the one produced by the docx
+ -- reader.
+ let ident' = fromMaybe ident (lookup "id" kvs)
+ in
+ return [ mknode "w:commentRangeEnd" [("w:id", ident')] ()
+ , mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
+ , mknode "w:commentReference" [("w:id", ident')] () ]
+ ]
+inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
+ stylemod <- case lookup dynamicStyleKey kvs of
+ Just sty -> do
+ modify $ \s ->
+ s{stDynamicTextProps = Set.insert sty
+ (stDynamicTextProps s)}
+ return $ withTextPropM (rStyleM sty)
+ _ -> return id
+ let dirmod = case lookup "dir" kvs of
+ Just "rtl" -> local (\env -> env { envRTL = True })
+ Just "ltr" -> local (\env -> env { envRTL = False })
+ _ -> id
+ let off x = withTextProp (mknode x [("w:val","0")] ())
+ let pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) .
+ (if "csl-no-strong" `elem` classes then off "w:b" else id) .
+ (if "csl-no-smallcaps" `elem` classes
+ then off "w:smallCaps"
+ else id)
+ insmod <- if "insertion" `elem` classes
+ then do
+ defaultAuthor <- asks envChangesAuthor
+ defaultDate <- asks envChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ insId <- gets stInsId
+ modify $ \s -> s{stInsId = insId + 1}
+ return $ \f -> do
+ x <- f
+ return [ mknode "w:ins"
+ [("w:id", show insId),
+ ("w:author", author),
+ ("w:date", date)] x ]
+ else return id
+ delmod <- if "deletion" `elem` classes
+ then do
+ defaultAuthor <- asks envChangesAuthor
+ defaultDate <- asks envChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ delId <- gets stDelId
+ modify $ \s -> s{stDelId = delId + 1}
+ return $ \f -> local (\env->env{envInDel=True}) $ do
+ x <- f
+ return [mknode "w:del"
+ [("w:id", show delId),
+ ("w:author", author),
+ ("w:date", date)] x]
+ else return id
+ contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
+ $ inlinesToOpenXML opts ils
+ if null ident
+ then return contents
+ else do
+ id' <- getUniqueId
+ let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
+ ,("w:name",ident)] ()
+ let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
+ return $ bookmarkStart : contents ++ [bookmarkEnd]
inlineToOpenXML' opts (Strong lst) =
withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
inlineToOpenXML' opts (Emph lst) =
@@ -1100,40 +1202,40 @@ inlineToOpenXML' opts (Strikeout lst) =
withTextProp (mknode "w:strike" [] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML' _ LineBreak = return [br]
-inlineToOpenXML' _ (RawInline f str)
+inlineToOpenXML' _ il@(RawInline f str)
| f == Format "openxml" = return [ x | Elem x <- parseXML str ]
- | otherwise = return []
+ | otherwise = do
+ report $ InlineNotRendered il
+ return []
inlineToOpenXML' opts (Quoted quoteType lst) =
inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
where (open, close) = case quoteType of
SingleQuote -> ("\x2018", "\x2019")
DoubleQuote -> ("\x201C", "\x201D")
inlineToOpenXML' opts (Math mathType str) = do
- let displayType = if mathType == DisplayMath
- then DisplayBlock
- else DisplayInline
- when (displayType == DisplayBlock) setFirstPara
- case writeOMML displayType <$> readTeX str of
- Right r -> return [r]
- Left e -> do
- warn $ "Cannot convert the following TeX math, skipping:\n" ++ str ++
- "\n" ++ e
- inlinesToOpenXML opts (texMathToInlines mathType str)
+ when (mathType == DisplayMath) setFirstPara
+ res <- (lift . lift) (convertMath writeOMML mathType str)
+ case res of
+ Right r -> return [r]
+ Left il -> inlineToOpenXML' opts il
inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML' opts (Code attrs str) = do
let unhighlighted = intercalate [br] `fmap`
- (mapM formattedString $ lines str)
+ mapM formattedString (lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
toHlTok (toktype,tok) = mknode "w:r" []
[ mknode "w:rPr" []
[ rCustomStyle (show toktype) ]
, mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
withTextProp (rCustomStyle "VerbatimChar")
- $ if writerHighlight opts
- then case highlight formatOpenXML attrs str of
- Nothing -> unhighlighted
- Just h -> return h
- else unhighlighted
+ $ if isNothing (writerHighlightStyle opts)
+ then unhighlighted
+ else case highlight (writerSyntaxMap opts)
+ formatOpenXML attrs str of
+ Right h -> return h
+ Left msg -> do
+ unless (null msg) $ report $ CouldNotHighlight msg
+ unhighlighted
inlineToOpenXML' opts (Note bs) = do
notes <- gets stFootnotes
notenum <- getUniqueId
@@ -1151,7 +1253,7 @@ inlineToOpenXML' opts (Note bs) = do
, envTextProperties = [] })
(withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
$ insertNoteRef bs)
- let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
+ let newnote = mknode "w:footnote" [("w:id", notenum)] contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
[ mknode "w:rPr" [] footnoteStyle
@@ -1173,81 +1275,109 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do
return i
return [ mknode "w:hyperlink" [("r:id",id')] contents ]
inlineToOpenXML' opts (Image attr alt (src, title)) = do
- -- first, check to see if we've already done this image
pageWidth <- asks envPrintWidth
imgs <- gets stImages
- case M.lookup src imgs of
- Just (_,_,_,elt,_) -> return [elt]
- Nothing -> do
- res <- liftIO $
- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
- case res of
- Left (_ :: E.SomeException) -> do
- warn $ "Could not find image `" ++ src ++ "', skipping..."
- -- emit alt text
- inlinesToOpenXML opts alt
- Right (img, mt) -> do
- ident <- ("rId"++) `fmap` getUniqueId
- let (xpt,ypt) = desiredSizeInPoints opts attr
- (either (const def) id (imageSize img))
- -- 12700 emu = 1 pt
- let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700)
- let cNvPicPr = mknode "pic:cNvPicPr" [] $
- mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
- let nvPicPr = mknode "pic:nvPicPr" []
- [ mknode "pic:cNvPr"
- [("descr",src),("id","0"),("name","Picture")] ()
- , cNvPicPr ]
- let blipFill = mknode "pic:blipFill" []
- [ mknode "a:blip" [("r:embed",ident)] ()
- , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
- let xfrm = mknode "a:xfrm" []
- [ mknode "a:off" [("x","0"),("y","0")] ()
- , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
- let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
- mknode "a:avLst" [] ()
- let ln = mknode "a:ln" [("w","9525")]
- [ mknode "a:noFill" [] ()
- , mknode "a:headEnd" [] ()
- , mknode "a:tailEnd" [] () ]
- let spPr = mknode "pic:spPr" [("bwMode","auto")]
- [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
- let graphic = mknode "a:graphic" [] $
- mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
- [ mknode "pic:pic" []
- [ nvPicPr
- , blipFill
- , spPr ] ]
- let imgElt = mknode "w:r" [] $
- mknode "w:drawing" [] $
- mknode "wp:inline" []
- [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
- , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
- , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] ()
- , graphic ]
- let imgext = case mt >>= extensionFromMimeType of
- Just x -> '.':x
- Nothing -> case imageType img of
- Just Png -> ".png"
- Just Jpeg -> ".jpeg"
- Just Gif -> ".gif"
- Just Pdf -> ".pdf"
- Just Eps -> ".eps"
- Nothing -> ""
- if null imgext
- then -- without an extension there is no rule for content type
- inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
- else do
- let imgpath = "media/" ++ ident ++ imgext
- let mbMimeType = mt <|> getMimeType imgpath
- -- insert mime type to use in constructing [Content_Types].xml
- modify $ \st -> st{ stImages =
- M.insert src (ident, imgpath, mbMimeType, imgElt, img)
- $ stImages st }
- return [imgElt]
+ let
+ stImage = M.lookup src imgs
+ generateImgElt (ident, _, _, img) =
+ let
+ (xpt,ypt) = desiredSizeInPoints opts attr
+ (either (const def) id (imageSize opts img))
+ -- 12700 emu = 1 pt
+ (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700)
+ (pageWidth * 12700)
+ cNvPicPr = mknode "pic:cNvPicPr" [] $
+ mknode "a:picLocks" [("noChangeArrowheads","1")
+ ,("noChangeAspect","1")] ()
+ nvPicPr = mknode "pic:nvPicPr" []
+ [ mknode "pic:cNvPr"
+ [("descr",src),("id","0"),("name","Picture")] ()
+ , cNvPicPr ]
+ blipFill = mknode "pic:blipFill" []
+ [ mknode "a:blip" [("r:embed",ident)] ()
+ , mknode "a:stretch" [] $
+ mknode "a:fillRect" [] ()
+ ]
+ xfrm = mknode "a:xfrm" []
+ [ mknode "a:off" [("x","0"),("y","0")] ()
+ , mknode "a:ext" [("cx",show xemu)
+ ,("cy",show yemu)] () ]
+ prstGeom = mknode "a:prstGeom" [("prst","rect")] $
+ mknode "a:avLst" [] ()
+ ln = mknode "a:ln" [("w","9525")]
+ [ mknode "a:noFill" [] ()
+ , mknode "a:headEnd" [] ()
+ , mknode "a:tailEnd" [] () ]
+ spPr = mknode "pic:spPr" [("bwMode","auto")]
+ [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
+ graphic = mknode "a:graphic" [] $
+ mknode "a:graphicData"
+ [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
+ [ mknode "pic:pic" []
+ [ nvPicPr
+ , blipFill
+ , spPr
+ ]
+ ]
+ imgElt = mknode "w:r" [] $
+ mknode "w:drawing" [] $
+ mknode "wp:inline" []
+ [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+ , mknode "wp:effectExtent"
+ [("b","0"),("l","0"),("r","0"),("t","0")] ()
+ , mknode "wp:docPr"
+ [ ("descr", stringify alt)
+ , ("title", title)
+ , ("id","1")
+ , ("name","Picture")
+ ] ()
+ , graphic
+ ]
+ in
+ imgElt
+
+ case stImage of
+ Just imgData -> return $ [generateImgElt imgData]
+ Nothing -> ( do --try
+ (img, mt) <- P.fetchItem src
+ ident <- ("rId"++) `fmap` getUniqueId
+
+ let
+ imgext = case mt >>= extensionFromMimeType of
+ Just x -> '.':x
+ Nothing -> case imageType img of
+ Just Png -> ".png"
+ Just Jpeg -> ".jpeg"
+ Just Gif -> ".gif"
+ Just Pdf -> ".pdf"
+ Just Eps -> ".eps"
+ Just Svg -> ".svg"
+ Just Emf -> ".emf"
+ Nothing -> ""
+ imgpath = "media/" ++ ident ++ imgext
+ mbMimeType = mt <|> getMimeType imgpath
+
+ imgData = (ident, imgpath, mbMimeType, img)
+
+ if null imgext
+ then -- without an extension there is no rule for content type
+ inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
+ else do
+ -- insert mime type to use in constructing [Content_Types].xml
+ modify $ \st -> st { stImages = M.insert src imgData $ stImages st }
+ return [generateImgElt imgData]
+ )
+ `catchError` ( \e -> do
+ report $ CouldNotFetchResource src (show e)
+ -- emit alt text
+ inlinesToOpenXML opts alt
+ )
br :: Element
-br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
+br = breakElement "textWrapping"
+
+breakElement :: String -> Element
+breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ]
-- Word will insert these footnotes into the settings.xml file
-- (whether or not they're visible in the document). If they're in the
@@ -1255,35 +1385,18 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
-- problems. So we want to make sure we insert them into our document.
defaultFootnotes :: [Element]
defaultFootnotes = [ mknode "w:footnote"
- [("w:type", "separator"), ("w:id", "-1")] $
+ [("w:type", "separator"), ("w:id", "-1")]
[ mknode "w:p" [] $
[mknode "w:r" [] $
[ mknode "w:separator" [] ()]]]
, mknode "w:footnote"
- [("w:type", "continuationSeparator"), ("w:id", "0")] $
+ [("w:type", "continuationSeparator"), ("w:id", "0")]
[ mknode "w:p" [] $
[ mknode "w:r" [] $
[ mknode "w:continuationSeparator" [] ()]]]]
-parseXml :: Archive -> Archive -> String -> IO Element
-parseXml refArchive distArchive relpath =
- case findEntryByPath relpath refArchive `mplus`
- findEntryByPath relpath distArchive of
- Nothing -> fail $ relpath ++ " missing in reference docx"
- Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
- Nothing -> fail $ relpath ++ " corrupt in reference docx"
- Just d -> return d
-
--- | Scales the image to fit the page
--- sizes are passed in emu
-fitToPage :: (Double, Double) -> Integer -> (Integer, Integer)
-fitToPage (x, y) pageWidth
- -- Fixes width to the page width and scales the height
- | x > fromIntegral pageWidth =
- (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y)
- | otherwise = (floor x, floor y)
-
-withDirection :: WS a -> WS a
+
+withDirection :: PandocMonad m => WS m a -> WS m a
withDirection x = do
isRTL <- asks envRTL
paraProps <- asks envParaProperties
@@ -1296,8 +1409,8 @@ withDirection x = do
if isRTL
-- if we are going right-to-left, we (re?)add the properties.
then flip local x $
- \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps'
- , envTextProperties = (mknode "w:rtl" [] ()) : textProps'
+ \env -> env { envParaProperties = mknode "w:bidi" [] () : paraProps'
+ , envTextProperties = mknode "w:rtl" [] () : textProps'
}
else flip local x $ \env -> env { envParaProperties = paraProps'
, envTextProperties = textProps'
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 7459f1b42..dda21d23d 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.DokuWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane
+ Copyright : Copyright (C) 2008-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Clare Macrae <clare.macrae@googlemail.com>
@@ -39,71 +39,64 @@ DokuWiki: <https://www.dokuwiki.org/dokuwiki>
-}
module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where
+import Control.Monad (zipWithM)
+import Control.Monad.Reader (ReaderT, ask, local, runReaderT)
+import Control.Monad.State.Strict (StateT, evalStateT)
+import Data.Default (Default (..))
+import Data.List (intercalate, intersect, isPrefixOf, transpose)
+import Data.Text (Text, pack)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Options ( WriterOptions(
- writerTableOfContents
- , writerTemplate
- , writerWrapText), WrapOption(..) )
-import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting
- , camelCaseToHyphenated, trimr, normalize, substitute )
-import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Data.List ( intersect, intercalate, isPrefixOf, transpose )
-import Data.Default (Default(..))
-import Network.URI ( isURI )
-import Control.Monad ( zipWithM )
-import Control.Monad.State ( modify, State, get, evalState )
-import Control.Monad.Reader ( ReaderT, runReaderT, ask, local )
+import Text.Pandoc.Logging
+import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText))
+import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara,
+ removeFormatting, substitute, trimr)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared (defField, metaToJSON)
data WriterState = WriterState {
- stNotes :: Bool -- True if there are notes
}
data WriterEnvironment = WriterEnvironment {
- stIndent :: String -- Indent after the marker at the beginning of list items
- , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ stIndent :: String -- Indent after the marker at the beginning of list items
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
, stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell)
}
instance Default WriterState where
- def = WriterState { stNotes = False }
+ def = WriterState {}
instance Default WriterEnvironment where
def = WriterEnvironment { stIndent = ""
, stUseTags = False
, stBackSlashLB = False }
-type DokuWiki = ReaderT WriterEnvironment (State WriterState)
+type DokuWiki m = ReaderT WriterEnvironment (StateT WriterState m)
-- | Convert Pandoc to DokuWiki.
-writeDokuWiki :: WriterOptions -> Pandoc -> String
+writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeDokuWiki opts document =
- runDokuWiki (pandocToDokuWiki opts $ normalize document)
+ runDokuWiki (pandocToDokuWiki opts document)
-runDokuWiki :: DokuWiki a -> a
-runDokuWiki = flip evalState def . flip runReaderT def
+runDokuWiki :: PandocMonad m => DokuWiki m a -> m a
+runDokuWiki = flip evalStateT def . flip runReaderT def
-- | Return DokuWiki representation of document.
-pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String
+pandocToDokuWiki :: PandocMonad m
+ => WriterOptions -> Pandoc -> DokuWiki m Text
pandocToDokuWiki opts (Pandoc meta blocks) = do
metadata <- metaToJSON opts
(fmap trimr . blockListToDokuWiki opts)
(inlineListToDokuWiki opts)
meta
body <- blockListToDokuWiki opts blocks
- notesExist <- stNotes <$> get
- let notes = if notesExist
- then "" -- TODO Was "\n<references />" Check whether I can really remove this:
- -- if it is definitely to do with footnotes, can remove this whole bit
- else ""
- let main = body ++ notes
+ let main = pack body
let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ metadata
+ $ defField "toc" (writerTableOfContents opts) metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape special characters for DokuWiki.
escapeString :: String -> String
@@ -112,9 +105,10 @@ escapeString = substitute "__" "%%__%%" .
substitute "//" "%%//%%"
-- | Convert Pandoc block element to DokuWiki.
-blockToDokuWiki :: WriterOptions -- ^ Options
+blockToDokuWiki :: PandocMonad m
+ => WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> DokuWiki String
+ -> DokuWiki m String
blockToDokuWiki _ Null = return ""
@@ -149,12 +143,13 @@ blockToDokuWiki opts (Para inlines) = do
blockToDokuWiki opts (LineBlock lns) =
blockToDokuWiki opts $ linesToPara lns
-blockToDokuWiki _ (RawBlock f str)
+blockToDokuWiki _ b@(RawBlock f str)
| f == Format "dokuwiki" = return str
-- See https://www.dokuwiki.org/wiki:syntax
-- use uppercase HTML tag for block-level content:
| f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>"
- | otherwise = return ""
+ | otherwise = "" <$
+ report (BlockNotRendered b)
blockToDokuWiki _ HorizontalRule = return "\n----\n"
@@ -177,7 +172,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
"visualfoxpro", "winbatch", "xml", "xpp", "z80"]
return $ "<code" ++
(case at of
- [] -> ">\n"
+ [] -> ">\n"
(x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>"
blockToDokuWiki opts (BlockQuote blocks) = do
@@ -198,7 +193,7 @@ blockToDokuWiki opts (Table capt aligns _ headers rows) = do
rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
let widths = map (maximum . map length) $ transpose (headers':rows')
let padTo (width, al) s =
- case (width - length s) of
+ case width - length s of
x | x > 0 ->
if al == AlignLeft || al == AlignDefault
then s ++ replicate x ' '
@@ -278,21 +273,27 @@ listAttribsToString (startnum, numstyle, _) =
else "")
-- | Convert bullet list item (list of blocks) to DokuWiki.
-listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
+listItemToDokuWiki :: PandocMonad m
+ => WriterOptions -> [Block] -> DokuWiki m String
listItemToDokuWiki opts items = do
- contents <- blockListToDokuWiki opts items
useTags <- stUseTags <$> ask
if useTags
- then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
+ then do
+ contents <- blockListToDokuWiki opts items
+ return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
else do
+ bs <- mapM (blockToDokuWiki opts) items
+ let contents = case items of
+ [_, CodeBlock _ _] -> concat bs
+ _ -> vcat bs
indent <- stIndent <$> ask
backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
+ let indent' = if backSlash then drop 2 indent else indent
return $ indent' ++ "* " ++ contents
-- | Convert ordered list item (list of blocks) to DokuWiki.
-- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki
-orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
+orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m String
orderedListItemToDokuWiki opts items = do
contents <- blockListToDokuWiki opts items
useTags <- stUseTags <$> ask
@@ -301,24 +302,25 @@ orderedListItemToDokuWiki opts items = do
else do
indent <- stIndent <$> ask
backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
+ let indent' = if backSlash then drop 2 indent else indent
return $ indent' ++ "- " ++ contents
-- | Convert definition list item (label, list of blocks) to DokuWiki.
-definitionListItemToDokuWiki :: WriterOptions
+definitionListItemToDokuWiki :: PandocMonad m
+ => WriterOptions
-> ([Inline],[[Block]])
- -> DokuWiki String
+ -> DokuWiki m String
definitionListItemToDokuWiki opts (label, items) = do
labelText <- inlineListToDokuWiki opts label
contents <- mapM (blockListToDokuWiki opts) items
useTags <- stUseTags <$> ask
if useTags
then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++
- (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents)
+ intercalate "\n" (map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents)
else do
indent <- stIndent <$> ask
backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
+ let indent' = if backSlash then drop 2 indent else indent
return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents
-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
@@ -337,18 +339,19 @@ isSimpleListItem :: [Block] -> Bool
isSimpleListItem [] = True
isSimpleListItem [x] =
case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- DefinitionList _ -> isSimpleList x
- _ -> False
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
isSimpleListItem [x, y] | isPlainOrPara x =
case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- DefinitionList _ -> isSimpleList y
- _ -> False
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ CodeBlock _ _ -> True
+ _ -> False
isSimpleListItem _ = False
isPlainOrPara :: Block -> Bool
@@ -368,14 +371,15 @@ backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs
where f '\n' = "\\\\ "
f c = [c]
g (' ' : '\\':'\\': xs) = xs
- g s = s
+ g s = s
-- Auxiliary functions for tables:
-tableItemToDokuWiki :: WriterOptions
- -> Alignment
- -> [Block]
- -> DokuWiki String
+tableItemToDokuWiki :: PandocMonad m
+ => WriterOptions
+ -> Alignment
+ -> [Block]
+ -> DokuWiki m String
tableItemToDokuWiki opts align' item = do
let mkcell x = (if align' == AlignRight || align' == AlignCenter
then " "
@@ -388,22 +392,32 @@ tableItemToDokuWiki opts align' item = do
return $ mkcell contents
-- | Convert list of Pandoc block elements to DokuWiki.
-blockListToDokuWiki :: WriterOptions -- ^ Options
+blockListToDokuWiki :: PandocMonad m
+ => WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> DokuWiki String
+ -> DokuWiki m String
blockListToDokuWiki opts blocks = do
backSlash <- stBackSlashLB <$> ask
+ let blocks' = consolidateRawBlocks blocks
if backSlash
- then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks
- else vcat <$> mapM (blockToDokuWiki opts) blocks
+ then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks'
+ else vcat <$> mapM (blockToDokuWiki opts) blocks'
+
+consolidateRawBlocks :: [Block] -> [Block]
+consolidateRawBlocks [] = []
+consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs)
+ | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs)
+consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs
-- | Convert list of Pandoc inline elements to DokuWiki.
-inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String
+inlineListToDokuWiki :: PandocMonad m
+ => WriterOptions -> [Inline] -> DokuWiki m String
inlineListToDokuWiki opts lst =
- concat <$> (mapM (inlineToDokuWiki opts) lst)
+ concat <$> mapM (inlineToDokuWiki opts) lst
-- | Convert Pandoc inline element to DokuWiki.
-inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String
+inlineToDokuWiki :: PandocMonad m
+ => WriterOptions -> Inline -> DokuWiki m String
inlineToDokuWiki opts (Span _attrs ils) =
inlineListToDokuWiki opts ils
@@ -460,12 +474,12 @@ inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim
DisplayMath -> "$$"
InlineMath -> "$"
-inlineToDokuWiki _ (RawInline f str)
+inlineToDokuWiki _ il@(RawInline f str)
| f == Format "dokuwiki" = return str
| f == Format "html" = return $ "<html>" ++ str ++ "</html>"
- | otherwise = return ""
+ | otherwise = "" <$ report (InlineNotRendered il)
-inlineToDokuWiki _ (LineBreak) = return "\\\\\n"
+inlineToDokuWiki _ LineBreak = return "\\\\\n"
inlineToDokuWiki opts SoftBreak =
case writerWrapText opts of
@@ -498,7 +512,6 @@ inlineToDokuWiki opts (Image attr alt (source, tit)) = do
inlineToDokuWiki opts (Note contents) = do
contents' <- blockListToDokuWiki opts contents
- modify (\s -> s { stNotes = True })
return $ "((" ++ contents' ++ "))"
-- note - may not work for notes with multiple blocks
@@ -507,7 +520,7 @@ imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height
where
toPx = fmap (showInPixel opts) . checkPct
checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
+ checkPct maybeDim = maybeDim
go (Just w) Nothing = "?" ++ w
go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
go Nothing (Just h) = "?0x" ++ h
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 00bf4a81c..7b4853a24 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,6 +1,9 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.EPUB
- Copyright : Copyright (C) 2010-2015 John MacFarlane
+ Copyright : Copyright (C) 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -28,47 +31,47 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to EPUB.
-}
-module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
-import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import Data.Maybe ( fromMaybe, catMaybes )
-import Data.List ( isPrefixOf, isInfixOf, intercalate )
-import System.Environment ( getEnv )
-import Text.Printf (printf)
-import System.FilePath ( takeExtension, takeFileName )
-import System.FilePath.Glob ( namesMatching )
-import Network.HTTP ( urlEncode )
+module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
+import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive,
+ fromArchive, fromEntry, toEntry)
+import Control.Monad (mplus, unless, when, zipWithM)
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get,
+ gets, lift, modify, put)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
-import qualified Text.Pandoc.UTF8 as UTF8
-import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
-import Data.Time.Clock.POSIX ( getPOSIXTime )
-import Text.Pandoc.Compat.Time
-import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim
- , normalizeDate, readDataFile, stringify, warn
- , hierarchicalize, fetchItem' )
-import qualified Text.Pandoc.Shared as S (Element(..))
+import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
+import Data.List (intercalate, isInfixOf, isPrefixOf)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
+import qualified Data.Set as Set
+import qualified Data.Text as TS
+import qualified Data.Text.Lazy as TL
+import Network.HTTP (urlEncode)
+import System.FilePath (takeExtension, takeFileName, makeRelative)
+import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
import Text.Pandoc.Builder (fromList, setMeta)
-import Text.Pandoc.Options ( WriterOptions(..)
- , WrapOption(..)
- , HTMLMathMethod(..)
- , EPUBVersion(..)
- , ObfuscationMethod(NoObfuscation) )
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Compat.Time
import Text.Pandoc.Definition
-import Text.Pandoc.Walk (walk, walkM, query)
-import Control.Monad.State (modify, get, State, put, evalState)
-import Control.Monad (mplus, liftM, when)
-import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
- , strContent, lookupAttr, Node(..), QName(..), parseXML
- , onlyElems, node, ppElement)
-import Text.Pandoc.UUID (getRandomUUID)
-import Text.Pandoc.Writers.HTML ( writeHtml )
-import Data.Char ( toLower, isDigit, isAlphaNum )
-import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import qualified Control.Exception as E
-import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType)
+import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
+ ObfuscationMethod (NoObfuscation), WrapOption (..),
+ WriterOptions (..))
+import Text.Pandoc.Shared (hierarchicalize, normalizeDate, renderTags',
+ safeRead, stringify, trim, uniqueIdent)
+import qualified Text.Pandoc.Shared as S (Element (..))
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.UUID (getUUID)
+import Text.Pandoc.Walk (query, walk, walkM)
+import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
+import Text.Printf (printf)
+import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
+ add_attrs, lookupAttr, node, onlyElems, parseXML,
+ ppElement, showElement, strContent, unode, unqual)
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -76,51 +79,55 @@ import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
-- in filenames, chapter0003.xhtml.
data Chapter = Chapter (Maybe [Int]) [Block]
+data EPUBState = EPUBState {
+ stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))]
+ , stEpubSubdir :: String
+ }
+
+type E m = StateT EPUBState m
+
data EPUBMetadata = EPUBMetadata{
- epubIdentifier :: [Identifier]
- , epubTitle :: [Title]
- , epubDate :: [Date]
- , epubLanguage :: String
- , epubCreator :: [Creator]
- , epubContributor :: [Creator]
- , epubSubject :: [String]
- , epubDescription :: Maybe String
- , epubType :: Maybe String
- , epubFormat :: Maybe String
- , epubPublisher :: Maybe String
- , epubSource :: Maybe String
- , epubRelation :: Maybe String
- , epubCoverage :: Maybe String
- , epubRights :: Maybe String
- , epubCoverImage :: Maybe String
- , epubStylesheet :: Maybe Stylesheet
- , epubPageDirection :: Maybe ProgressionDirection
+ epubIdentifier :: [Identifier]
+ , epubTitle :: [Title]
+ , epubDate :: [Date]
+ , epubLanguage :: String
+ , epubCreator :: [Creator]
+ , epubContributor :: [Creator]
+ , epubSubject :: [String]
+ , epubDescription :: Maybe String
+ , epubType :: Maybe String
+ , epubFormat :: Maybe String
+ , epubPublisher :: Maybe String
+ , epubSource :: Maybe String
+ , epubRelation :: Maybe String
+ , epubCoverage :: Maybe String
+ , epubRights :: Maybe String
+ , epubCoverImage :: Maybe String
+ , epubStylesheets :: [FilePath]
+ , epubPageDirection :: Maybe ProgressionDirection
+ , epubIbooksFields :: [(String, String)]
} deriving Show
-data Stylesheet = StylesheetPath FilePath
- | StylesheetContents String
- deriving Show
-
data Date = Date{
- dateText :: String
- , dateEvent :: Maybe String
+ dateText :: String
+ , dateEvent :: Maybe String
} deriving Show
data Creator = Creator{
- creatorText :: String
- , creatorRole :: Maybe String
- , creatorFileAs :: Maybe String
+ creatorText :: String
+ , creatorRole :: Maybe String
+ , creatorFileAs :: Maybe String
} deriving Show
data Identifier = Identifier{
- identifierText :: String
- , identifierScheme :: Maybe String
+ identifierText :: String
+ , identifierScheme :: Maybe String
} deriving Show
data Title = Title{
- titleText :: String
- , titleFileAs :: Maybe String
- , titleType :: Maybe String
+ titleText :: String
+ , titleFileAs :: Maybe String
+ , titleType :: Maybe String
} deriving Show
data ProgressionDirection = LTR | RTL deriving Show
@@ -143,15 +150,29 @@ removeNote :: Inline -> Inline
removeNote (Note _) = Str ""
removeNote x = x
-getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
+mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
+mkEntry path content = do
+ epubSubdir <- gets stEpubSubdir
+ let addEpubSubdir :: Entry -> Entry
+ addEpubSubdir e = e{ eRelativePath =
+ (if null epubSubdir
+ then ""
+ else epubSubdir ++ "/") ++ eRelativePath e }
+ epochtime <- floor <$> lift P.getPOSIXTime
+ return $
+ (if path == "mimetype" || "META-INF" `isPrefixOf` path
+ then id
+ else addEpubSubdir) $ toEntry path epochtime content
+
+getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
getEPUBMetadata opts meta = do
let md = metadataFromMeta opts meta
- let elts = onlyElems $ parseXML $ writerEpubMetadata opts
+ let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
let md' = foldr addMetadataFromXML md elts
let addIdentifier m =
if null (epubIdentifier m)
then do
- randomId <- fmap show getRandomUUID
+ randomId <- (show . getUUID) <$> lift P.newStdGen
return $ m{ epubIdentifier = [Identifier randomId Nothing] }
else return m
let addLanguage m =
@@ -159,16 +180,19 @@ getEPUBMetadata opts meta = do
then case lookup "lang" (writerVariables opts) of
Just x -> return m{ epubLanguage = x }
Nothing -> do
- localeLang <- E.catch (liftM
- (map (\c -> if c == '_' then '-' else c) .
- takeWhile (/='.')) $ getEnv "LANG")
- (\e -> let _ = (e :: E.SomeException) in return "en-US")
+ mLang <- lift $ P.lookupEnv "LANG"
+ let localeLang =
+ case mLang of
+ Just lang ->
+ map (\c -> if c == '_' then '-' else c) $
+ takeWhile (/='.') lang
+ Nothing -> "en-US"
return m{ epubLanguage = localeLang }
else return m
let fixDate m =
if null (epubDate m)
then do
- currentTime <- getCurrentTime
+ currentTime <- lift P.getCurrentTime
return $ m{ epubDate = [ Date{
dateText = showDateTimeISO8601 currentTime
, dateEvent = Nothing } ] }
@@ -225,12 +249,16 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
addMetadataFromXML _ md = md
metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s) = s
+metaValueToString (MetaString s) = s
metaValueToString (MetaInlines ils) = stringify ils
-metaValueToString (MetaBlocks bs) = stringify bs
-metaValueToString (MetaBool True) = "true"
-metaValueToString (MetaBool False) = "false"
-metaValueToString _ = ""
+metaValueToString (MetaBlocks bs) = stringify bs
+metaValueToString (MetaBool True) = "true"
+metaValueToString (MetaBool False) = "false"
+metaValueToString _ = ""
+
+metaValueToPaths:: MetaValue -> [FilePath]
+metaValueToPaths (MetaList xs) = map metaValueToString xs
+metaValueToPaths x = [metaValueToString x]
getList :: String -> Meta -> (MetaValue -> a) -> [a]
getList s meta handleMetaValue =
@@ -267,19 +295,18 @@ getCreator s meta = getList s meta handleMetaValue
getDate :: String -> Meta -> [Date]
getDate s meta = getList s meta handleMetaValue
where handleMetaValue (MetaMap m) =
- Date{ dateText = maybe "" id $
+ Date{ dateText = fromMaybe "" $
M.lookup "text" m >>= normalizeDate' . metaValueToString
, dateEvent = metaValueToString <$> M.lookup "event" m }
- handleMetaValue mv = Date { dateText = maybe ""
- id $ normalizeDate' $ metaValueToString mv
+ handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
, dateEvent = Nothing }
simpleList :: String -> Meta -> [String]
simpleList s meta =
case lookupMeta s meta of
Just (MetaList xs) -> map metaValueToString xs
- Just x -> [metaValueToString x]
- Nothing -> []
+ Just x -> [metaValueToString x]
+ Nothing -> []
metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
metadataFromMeta opts meta = EPUBMetadata{
@@ -299,8 +326,9 @@ metadataFromMeta opts meta = EPUBMetadata{
, epubCoverage = coverage
, epubRights = rights
, epubCoverImage = coverImage
- , epubStylesheet = stylesheet
+ , epubStylesheets = stylesheets
, epubPageDirection = pageDirection
+ , epubIbooksFields = ibooksFields
}
where identifiers = getIdentifier meta
titles = getTitle meta
@@ -320,70 +348,125 @@ metadataFromMeta opts meta = EPUBMetadata{
rights = metaValueToString <$> lookupMeta "rights" meta
coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
(metaValueToString <$> lookupMeta "cover-image" meta)
- stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
- ((StylesheetPath . metaValueToString) <$>
- lookupMeta "stylesheet" meta)
+ stylesheets = fromMaybe []
+ (metaValueToPaths <$> lookupMeta "stylesheet" meta) ++
+ [f | ("css",f) <- writerVariables opts]
pageDirection = case map toLower . metaValueToString <$>
lookupMeta "page-progression-direction" meta of
Just "ltr" -> Just LTR
Just "rtl" -> Just RTL
_ -> Nothing
+ ibooksFields = case lookupMeta "ibooks" meta of
+ Just (MetaMap mp)
+ -> M.toList $ M.map metaValueToString mp
+ _ -> []
+
+-- | Produce an EPUB2 file from a Pandoc document.
+writeEPUB2 :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeEPUB2 = writeEPUB EPUB2
+
+-- | Produce an EPUB3 file from a Pandoc document.
+writeEPUB3 :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeEPUB3 = writeEPUB EPUB3
-- | Produce an EPUB file from a Pandoc document.
-writeEPUB :: WriterOptions -- ^ Writer options
+writeEPUB :: PandocMonad m
+ => EPUBVersion
+ -> WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
- -> IO B.ByteString
-writeEPUB opts doc@(Pandoc meta _) = do
- let version = fromMaybe EPUB2 (writerEpubVersion opts)
+ -> m B.ByteString
+writeEPUB epubVersion opts doc = do
+ let epubSubdir = writerEpubSubdirectory opts
+ -- sanity check on epubSubdir
+ unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+ throwError $ PandocEpubSubdirectoryError epubSubdir
+ let initState = EPUBState { stMediaPaths = [], stEpubSubdir = epubSubdir }
+ evalStateT (pandocToEPUB epubVersion opts doc) initState
+
+pandocToEPUB :: PandocMonad m
+ => EPUBVersion
+ -> WriterOptions
+ -> Pandoc
+ -> E m B.ByteString
+pandocToEPUB version opts doc@(Pandoc meta _) = do
+ epubSubdir <- gets stEpubSubdir
let epub3 = version == EPUB3
- epochtime <- floor `fmap` getPOSIXTime
- let mkEntry path content = toEntry path epochtime content
+ let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) .
+ writeHtmlStringForEPUB version o
+ metadata <- getEPUBMetadata opts meta
+
+ -- stylesheet
+ stylesheets <- case epubStylesheets metadata of
+ [] -> (\x -> [B.fromChunks [x]]) <$>
+ P.readDataFile "epub.css"
+ fs -> mapM P.readFileLazy fs
+ stylesheetEntries <- zipWithM
+ (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs)
+ stylesheets [(1 :: Int)..]
+
let vars = ("epub3", if epub3 then "true" else "false")
- : ("css", "stylesheet.css")
- : writerVariables opts
+ : [(x,y) | (x,y) <- writerVariables opts, x /= "css"]
+
+ let cssvars useprefix = map (\e -> ("css",
+ (if useprefix
+ then "../"
+ else "")
+ ++ makeRelative epubSubdir (eRelativePath e)))
+ stylesheetEntries
+
let opts' = opts{ writerEmailObfuscation = NoObfuscation
, writerSectionDivs = True
- , writerHtml5 = epub3
, writerVariables = vars
, writerHTMLMathMethod =
if epub3
- then MathML Nothing
+ then MathML
else writerHTMLMathMethod opts
, writerWrapText = WrapAuto }
- metadata <- getEPUBMetadata opts' meta
-- cover page
(cpgEntry, cpicEntry) <-
case epubCoverImage metadata of
Nothing -> return ([],[])
Just img -> do
- let coverImage = "media/" ++ takeFileName img
- let cpContent = renderHtml $ writeHtml
- opts'{ writerVariables = ("coverpage","true"):vars }
- (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
- imgContent <- B.readFile img
- return ( [mkEntry "cover.xhtml" cpContent]
- , [mkEntry coverImage imgContent] )
+ let coverImage = takeFileName img
+ cpContent <- lift $ writeHtml
+ opts'{ writerVariables =
+ ("coverpage","true"):
+ cssvars True ++ vars }
+ (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+ imgContent <- lift $ P.readFileLazy img
+ coverEntry <- mkEntry "text/cover.xhtml" cpContent
+ coverImageEntry <- mkEntry ("media/" ++ coverImage)
+ imgContent
+ return ( [ coverEntry ]
+ , [ coverImageEntry ] )
-- title page
- let tpContent = renderHtml $ writeHtml opts'{
- writerVariables = ("titlepage","true"):vars }
- (Pandoc meta [])
- let tpEntry = mkEntry "title_page.xhtml" tpContent
+ tpContent <- lift $ writeHtml opts'{
+ writerVariables = ("titlepage","true"):
+ cssvars True ++ vars }
+ (Pandoc meta [])
+ tpEntry <- mkEntry "text/title_page.xhtml" tpContent
-- handle pictures
- mediaRef <- newIORef []
- Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
- walkM (transformBlock opts' mediaRef)
- picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef
-
+ -- mediaRef <- P.newIORef []
+ Pandoc _ blocks <- walkM (transformInline opts') doc >>=
+ walkM transformBlock
+ picEntries <- (mapMaybe (snd . snd)) <$> gets stMediaPaths
-- handle fonts
let matchingGlob f = do
- xs <- namesMatching f
+ xs <- lift $ P.glob f
when (null xs) $
- warn $ f ++ " did not match any font files."
+ report $ CouldNotFetchResource f "glob did not match any font files"
return xs
- let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
+ let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
+ lift (P.readFileLazy f)
fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
fontEntries <- mapM mkFontEntry fontFiles
@@ -420,7 +503,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
mbnum <- if "unnumbered" `elem` classes
then return Nothing
else case splitAt (n - 1) nums of
- (ks, (m:_)) -> do
+ (ks, m:_) -> do
let nums' = ks ++ [m+1]
put nums'
return $ Just (ks ++ [m])
@@ -467,77 +550,93 @@ writeEPUB opts doc@(Pandoc meta _) = do
Chapter mbnum $ walk fixInternalReferences bs)
chapters'
- let chapToEntry :: Int -> Chapter -> Entry
- chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
- $ renderHtml
- $ writeHtml opts'{ writerNumberOffset =
- fromMaybe [] mbnum }
- $ case bs of
- (Header _ _ xs : _) ->
- -- remove notes or we get doubled footnotes
- Pandoc (setMeta "title" (walk removeNote $ fromList xs)
- nullMeta) bs
- _ ->
- Pandoc nullMeta bs
-
- let chapterEntries = zipWith chapToEntry [1..] chapters
+ let chapToEntry num (Chapter mbnum bs) =
+ mkEntry ("text/" ++ showChapter num) =<<
+ writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum
+ , writerVariables = cssvars True ++ vars }
+ (case bs of
+ (Header _ _ xs : _) ->
+ -- remove notes or we get doubled footnotes
+ Pandoc (setMeta "title" (walk removeNote $ fromList xs)
+ nullMeta) bs
+ _ -> Pandoc nullMeta bs)
+
+ chapterEntries <- zipWithM chapToEntry [1..] chapters
-- incredibly inefficient (TODO):
let containsMathML ent = epub3 &&
- "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+ "<math" `isInfixOf`
+ B8.unpack (fromEntry ent)
let containsSVG ent = epub3 &&
- "<svg" `isInfixOf` (B8.unpack $ fromEntry ent)
+ "<svg" `isInfixOf`
+ B8.unpack (fromEntry ent)
let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
-- contents.opf
let chapterNode ent = unode "item" !
- ([("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
+ ([("id", toId $ makeRelative epubSubdir
+ $ eRelativePath ent),
+ ("href", makeRelative epubSubdir
+ $ eRelativePath ent),
("media-type", "application/xhtml+xml")]
++ case props ent of
- [] -> []
- xs -> [("properties", unwords xs)])
+ [] -> []
+ xs -> [("properties", unwords xs)])
$ ()
+
let chapterRefNode ent = unode "itemref" !
- [("idref", toId $ eRelativePath ent)] $ ()
+ [("idref", toId $ makeRelative epubSubdir
+ $ eRelativePath ent)] $ ()
let pictureNode ent = unode "item" !
- [("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
- ("media-type", fromMaybe "application/octet-stream"
+ [("id", toId $ makeRelative epubSubdir
+ $ eRelativePath ent),
+ ("href", makeRelative epubSubdir
+ $ eRelativePath ent),
+ ("media-type",
+ fromMaybe "application/octet-stream"
$ mediaTypeOf $ eRelativePath ent)] $ ()
let fontNode ent = unode "item" !
- [("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
- ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
+ [("id", toId $ makeRelative epubSubdir
+ $ eRelativePath ent),
+ ("href", makeRelative epubSubdir
+ $ eRelativePath ent),
+ ("media-type", fromMaybe "" $
+ getMimeType $ eRelativePath ent)] $ ()
let plainTitle = case docTitle' meta of
[] -> case epubTitle metadata of
- [] -> "UNTITLED"
+ [] -> "UNTITLED"
(x:_) -> titleText x
x -> stringify x
let tocTitle = fromMaybe plainTitle $
metaValueToString <$> lookupMeta "toc-title" meta
- let uuid = case epubIdentifier metadata of
- (x:_) -> identifierText x -- use first identifier as UUID
- [] -> error "epubIdentifier is null" -- shouldn't happen
- currentTime <- getCurrentTime
+ uuid <- case epubIdentifier metadata of
+ (x:_) -> return $ identifierText x -- use first identifier as UUID
+ [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen
+ currentTime <- lift P.getCurrentTime
let contentsData = UTF8.fromStringLazy $ ppTopElement $
- unode "package" ! [("version", case version of
- EPUB2 -> "2.0"
- EPUB3 -> "3.0")
- ,("xmlns","http://www.idpf.org/2007/opf")
- ,("unique-identifier","epub-id-1")] $
+ unode "package" !
+ ([("version", case version of
+ EPUB2 -> "2.0"
+ EPUB3 -> "3.0")
+ ,("xmlns","http://www.idpf.org/2007/opf")
+ ,("unique-identifier","epub-id-1")
+ ] ++
+ [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $
[ metadataElement version metadata currentTime
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
- , unode "item" ! [("id","style"), ("href","stylesheet.css")
- ,("media-type","text/css")] $ ()
, unode "item" ! ([("id","nav")
,("href","nav.xhtml")
,("media-type","application/xhtml+xml")] ++
[("properties","nav") | epub3 ]) $ ()
] ++
+ [ unode "item" ! [("id","style"), ("href",fp)
+ ,("media-type","text/css")] $ () |
+ fp <- map
+ (makeRelative epubSubdir . eRelativePath)
+ stylesheetEntries ] ++
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
(case cpicEntry of
[] -> []
@@ -546,7 +645,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
(pictureNode x)]) ++
map pictureNode picEntries ++
map fontNode fontEntries
- , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $
+ , unode "spine" ! (
+ ("toc","ncx") : progressionDirection) $
case epubCoverImage metadata of
Nothing -> []
Just _ -> [ unode "itemref" !
@@ -565,48 +665,54 @@ writeEPUB opts doc@(Pandoc meta _) = do
("href","nav.xhtml")] $ ()
] ++
[ unode "reference" !
- [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing
+ [("type","cover")
+ ,("title","Cover")
+ ,("href","text/cover.xhtml")] $ ()
+ | isJust (epubCoverImage metadata)
]
]
- let contentsEntry = mkEntry "content.opf" contentsData
+ contentsEntry <- mkEntry "content.opf" contentsData
-- toc.ncx
let secs = hierarchicalize blocks'
let tocLevel = writerTOCDepth opts
- let navPointNode :: (Int -> String -> String -> [Element] -> Element)
- -> S.Element -> State Int Element
+ let navPointNode :: PandocMonad m
+ => (Int -> [Inline] -> String -> [Element] -> Element)
+ -> S.Element -> StateT Int m Element
navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
n <- get
modify (+1)
let showNums :: [Int] -> String
showNums = intercalate "." . map show
- let tit' = stringify ils
let tit = if writerNumberSections opts && not (null nums)
- then showNums nums ++ " " ++ tit'
- else tit'
- let src = case lookup ident reftable of
- Just x -> x
- Nothing -> error (ident ++ " not found in reftable")
+ then Span ("", ["section-header-number"], [])
+ [Str (showNums nums)] : Space : ils
+ else ils
+ src <- case lookup ident reftable of
+ Just x -> return x
+ Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable"
let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
- isSec _ = False
+ isSec _ = False
let subsecs = filter isSec children
subs <- mapM (navPointNode formatter) subsecs
return $ formatter n tit src subs
- navPointNode _ (S.Blk _) = error "navPointNode encountered Blk"
+ navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk"
- let navMapFormatter :: Int -> String -> String -> [Element] -> Element
+ let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !
[("id", "navPoint-" ++ show n)] $
- [ unode "navLabel" $ unode "text" tit
- , unode "content" ! [("src", src)] $ ()
+ [ unode "navLabel" $ unode "text" $ stringify tit
+ , unode "content" ! [("src", "text/" ++ src)] $ ()
] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
[ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
- , unode "content" ! [("src","title_page.xhtml")] $ () ]
+ , unode "content" ! [("src", "text/title_page.xhtml")]
+ $ () ]
+ navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1
let tocData = UTF8.fromStringLazy $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
@@ -623,34 +729,48 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> []
Just img -> [unode "meta" ! [("name","cover"),
("content", toId img)] $ ()]
- , unode "docTitle" $ unode "text" $ plainTitle
+ , unode "docTitle" $ unode "text" plainTitle
, unode "navMap" $
- tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1
+ tpNode : navMap
]
- let tocEntry = mkEntry "toc.ncx" tocData
+ tocEntry <- mkEntry "toc.ncx" tocData
- let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
+ let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element
navXhtmlFormatter n tit src subs = unode "li" !
[("id", "toc-li-" ++ show n)] $
- (unode "a" ! [("href",src)]
- $ tit)
+ (unode "a" !
+ [("href", "text/" ++ src)]
+ $ titElements)
: case subs of
[] -> []
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
+ where titElements = parseXML titRendered
+ titRendered = case P.runPure
+ (writeHtmlStringForEPUB version
+ opts{ writerTemplate = Nothing }
+ (Pandoc nullMeta
+ [Plain $ walk delink tit])) of
+ Left _ -> TS.pack $ stringify tit
+ Right x -> x
+ -- can't have a element inside a...
+ delink (Link _ ils _) = Span ("", [], []) ils
+ delink x = x
let navtag = if epub3 then "nav" else "div"
- let navBlocks = [RawBlock (Format "html") $ ppElement $
+ tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1
+ let navBlocks = [RawBlock (Format "html")
+ $ showElement $ -- prettyprinting introduces bad spaces
unode navtag ! ([("epub:type","toc") | epub3] ++
[("id","toc")]) $
[ unode "h1" ! [("id","toc-title")] $ tocTitle
- , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]]
+ , unode "ol" ! [("class","toc")] $ tocBlocks ]]
let landmarks = if epub3
then [RawBlock (Format "html") $ ppElement $
unode "nav" ! [("epub:type","landmarks")
,("hidden","hidden")] $
[ unode "ol" $
[ unode "li"
- [ unode "a" ! [("href", "cover.xhtml")
+ [ unode "a" ! [("href", "text/cover.xhtml")
,("epub:type", "cover")] $
"Cover"] |
epubCoverImage metadata /= Nothing
@@ -664,52 +784,50 @@ writeEPUB opts doc@(Pandoc meta _) = do
]
]
else []
- let navData = renderHtml $ writeHtml
- opts'{ writerVariables = ("navpage","true"):vars }
+ navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):
+ cssvars False ++ vars }
(Pandoc (setMeta "title"
(walk removeNote $ fromList $ docTitle' meta) nullMeta)
(navBlocks ++ landmarks))
- let navEntry = mkEntry "nav.xhtml" navData
+ navEntry <- mkEntry "nav.xhtml" navData
-- mimetype
- let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
+ mimetypeEntry <- mkEntry "mimetype" $
+ UTF8.fromStringLazy "application/epub+zip"
-- container.xml
let containerData = UTF8.fromStringLazy $ ppTopElement $
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
- unode "rootfile" ! [("full-path","content.opf")
+ unode "rootfile" ! [("full-path",
+ (if null epubSubdir
+ then ""
+ else epubSubdir ++ "/") ++ "content.opf")
,("media-type","application/oebps-package+xml")] $ ()
- let containerEntry = mkEntry "META-INF/container.xml" containerData
+ containerEntry <- mkEntry "META-INF/container.xml" containerData
-- com.apple.ibooks.display-options.xml
let apple = UTF8.fromStringLazy $ ppTopElement $
unode "display_options" $
unode "platform" ! [("name","*")] $
unode "option" ! [("name","specified-fonts")] $ "true"
- let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
-
- -- stylesheet
- stylesheet <- case epubStylesheet metadata of
- Just (StylesheetPath fp) -> UTF8.readFile fp
- Just (StylesheetContents s) -> return s
- Nothing -> UTF8.toString `fmap`
- readDataFile (writerUserDataDir opts) "epub.css"
- let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
+ appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
-- construct archive
- let archive = foldr addEntryToArchive emptyArchive
- (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
- contentsEntry : tocEntry : navEntry :
- (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
+ let archive = foldr addEntryToArchive emptyArchive $
+ [mimetypeEntry, containerEntry, appleEntry,
+ contentsEntry, tocEntry, navEntry, tpEntry] ++
+ stylesheetEntries ++ picEntries ++ cpicEntry ++
+ cpgEntry ++ chapterEntries ++ fontEntries
return $ fromArchive archive
metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
metadataElement version md currentTime =
unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
- where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
+ where mdNodes = identifierNodes ++ titleNodes ++ dateNodes
+ ++ languageNodes ++ ibooksNodes
++ creatorNodes ++ contributorNodes ++ subjectNodes
++ descriptionNodes ++ typeNodes ++ formatNodes
++ publisherNodes ++ sourceNodes ++ relationNodes
@@ -728,6 +846,8 @@ metadataElement version md currentTime =
[] -> []
(x:_) -> [dcNode "date" ! [("id","epub-date")]
$ dateText x]
+ ibooksNodes = map ibooksNode (epubIbooksFields md)
+ ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
languageNodes = [dcTag "language" $ epubLanguage md]
creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
epubCreator md
@@ -747,7 +867,7 @@ metadataElement version md currentTime =
("content",toId img)] $ ()])
$ epubCoverImage md
modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
- (showDateTimeISO8601 currentTime) | version == EPUB3 ]
+ showDateTimeISO8601 currentTime | version == EPUB3 ]
dcTag n s = unode ("dc:" ++ n) s
dcTag' n s = [dcTag n s]
toIdentifierNode id' (Identifier txt scheme)
@@ -791,99 +911,92 @@ metadataElement version md currentTime =
(("id",id') :
maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
dateText date]
- schemeToOnix "ISBN-10" = "02"
- schemeToOnix "GTIN-13" = "03"
- schemeToOnix "UPC" = "04"
- schemeToOnix "ISMN-10" = "05"
- schemeToOnix "DOI" = "06"
- schemeToOnix "LCCN" = "13"
- schemeToOnix "GTIN-14" = "14"
- schemeToOnix "ISBN-13" = "15"
+ schemeToOnix "ISBN-10" = "02"
+ schemeToOnix "GTIN-13" = "03"
+ schemeToOnix "UPC" = "04"
+ schemeToOnix "ISMN-10" = "05"
+ schemeToOnix "DOI" = "06"
+ schemeToOnix "LCCN" = "13"
+ schemeToOnix "GTIN-14" = "14"
+ schemeToOnix "ISBN-13" = "15"
schemeToOnix "Legal deposit number" = "17"
- schemeToOnix "URN" = "22"
- schemeToOnix "OCLC" = "23"
- schemeToOnix "ISMN-13" = "25"
- schemeToOnix "ISBN-A" = "26"
- schemeToOnix "JP" = "27"
- schemeToOnix "OLCC" = "28"
- schemeToOnix _ = "01"
+ schemeToOnix "URN" = "22"
+ schemeToOnix "OCLC" = "23"
+ schemeToOnix "ISMN-13" = "25"
+ schemeToOnix "ISBN-A" = "26"
+ schemeToOnix "JP" = "27"
+ schemeToOnix "OLCC" = "28"
+ schemeToOnix _ = "01"
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
-transformTag :: WriterOptions
- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
- -> Tag String
- -> IO (Tag String)
-transformTag opts mediaRef tag@(TagOpen name attr)
+transformTag :: PandocMonad m
+ => Tag String
+ -> E m (Tag String)
+transformTag tag@(TagOpen name attr)
| name `elem` ["video", "source", "img", "audio"] &&
- lookup "data-external" attr == Nothing = do
+ isNothing (lookup "data-external" attr) = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
- newsrc <- modifyMediaRef opts mediaRef src
- newposter <- modifyMediaRef opts mediaRef poster
+ newsrc <- modifyMediaRef src
+ newposter <- modifyMediaRef poster
let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
- [("src", newsrc) | not (null newsrc)] ++
- [("poster", newposter) | not (null newposter)]
+ [("src", "../" ++ newsrc) | not (null newsrc)] ++
+ [("poster", "../" ++ newposter) | not (null newposter)]
return $ TagOpen name attr'
-transformTag _ _ tag = return tag
-
-modifyMediaRef :: WriterOptions
- -> IORef [(FilePath, (FilePath, Maybe Entry))]
- -> FilePath
- -> IO FilePath
-modifyMediaRef _ _ "" = return ""
-modifyMediaRef opts mediaRef oldsrc = do
- media <- readIORef mediaRef
+transformTag tag = return tag
+
+modifyMediaRef :: PandocMonad m
+ => FilePath
+ -> E m FilePath
+modifyMediaRef "" = return ""
+modifyMediaRef oldsrc = do
+ media <- gets stMediaPaths
case lookup oldsrc media of
Just (n,_) -> return n
- Nothing -> do
- res <- fetchItem' (writerMediaBag opts)
- (writerSourceURL opts) oldsrc
- (new, mbEntry) <-
- case res of
- Left _ -> do
- warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
- return (oldsrc, Nothing)
- Right (img,mbMime) -> do
- let new = "media/file" ++ show (length media) ++
- fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
- (('.':) <$> (mbMime >>= extensionFromMimeType))
- epochtime <- floor `fmap` getPOSIXTime
- let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
- return (new, Just entry)
- modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): )
- return new
-
-transformBlock :: WriterOptions
- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
- -> Block
- -> IO Block
-transformBlock opts mediaRef (RawBlock fmt raw)
+ Nothing -> catchError
+ (do (img, mbMime) <- P.fetchItem oldsrc
+ let new = "media/file" ++ show (length media) ++
+ fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
+ (('.':) <$> (mbMime >>= extensionFromMimeType))
+ entry <- mkEntry new (B.fromChunks . (:[]) $ img)
+ modify $ \st -> st{ stMediaPaths =
+ (oldsrc, (new, Just entry)):media}
+ return new)
+ (\e -> do
+ report $ CouldNotFetchResource oldsrc (show e)
+ return oldsrc)
+
+transformBlock :: PandocMonad m
+ => Block
+ -> E m Block
+transformBlock (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM transformTag tags
return $ RawBlock fmt (renderTags' tags')
-transformBlock _ _ b = return b
+transformBlock b = return b
-transformInline :: WriterOptions
- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
+transformInline :: PandocMonad m
+ => WriterOptions
-> Inline
- -> IO Inline
-transformInline opts mediaRef (Image attr lab (src,tit)) = do
- newsrc <- modifyMediaRef opts mediaRef src
- return $ Image attr lab (newsrc, tit)
-transformInline opts mediaRef (x@(Math t m))
+ -> E m Inline
+transformInline _opts (Image attr lab (src,tit)) = do
+ newsrc <- modifyMediaRef src
+ return $ Image attr lab ("../" ++ newsrc, tit)
+transformInline opts (x@(Math t m))
| WebTeX url <- writerHTMLMathMethod opts = do
- newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m)
+ newsrc <- modifyMediaRef (url ++ urlEncode m)
let mathclass = if t == DisplayMath then "display" else "inline"
- return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")]
-transformInline opts mediaRef (RawInline fmt raw)
+ return $ Span ("",["math",mathclass],[])
+ [Image nullAttr [x] ("../" ++ newsrc, "")]
+transformInline _opts (RawInline fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM transformTag tags
return $ RawInline fmt (renderTags' tags')
-transformInline _ _ x = return x
+transformInline _ x = return x
(!) :: (t -> Element) -> [(String, String)] -> t -> Element
(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
@@ -898,8 +1011,8 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
let (ds,ys) = break (==';') xs
rest = drop 1 ys
in case safeRead ('\'':'\\':ds ++ "'") of
- Just x -> x : unEntity rest
- Nothing -> '&':'#':unEntity xs
+ Just x -> x : unEntity rest
+ Nothing -> '&':'#':unEntity xs
unEntity (x:xs) = x : unEntity xs
mediaTypeOf :: FilePath -> Maybe MimeType
@@ -907,7 +1020,7 @@ mediaTypeOf x =
let mediaPrefixes = ["image", "video", "audio"] in
case getMimeType x of
Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
- _ -> Nothing
+ _ -> Nothing
-- Returns filename for chapter number.
showChapter :: Int -> String
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 5538ca061..e322c7d98 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -1,8 +1,8 @@
{-# LANGUAGE PatternGuards #-}
{-
-Copyright (c) 2011-2012, Sergey Astanin
-All rights reserved.
+Copyright (c) 2011-2012 Sergey Astanin
+ 2012-2018 John MacFarlane
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,17 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
-{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format.
+{- |
+Module : Text.Pandoc.Writers.FB2
+Copyright : Copyright (C) 2011-2012 Sergey Astanin
+ 2012-2018 John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : John MacFarlane
+Stability : alpha
+Portability : portable
+
+Conversion of 'Pandoc' documents to FB2 (FictionBook2) format.
FictionBook is an XML-based e-book format. For more information see:
<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1>
@@ -27,44 +37,42 @@ FictionBook is an XML-based e-book format. For more information see:
-}
module Text.Pandoc.Writers.FB2 (writeFB2) where
-import Control.Monad.State (StateT, evalStateT, get, modify)
-import Control.Monad.State (liftM, liftM2, liftIO)
+import Control.Monad (zipWithM)
+import Control.Monad.Except (catchError)
+import Control.Monad.State.Strict (StateT, evalStateT, get, lift, liftM, modify)
import Data.ByteString.Base64 (encode)
-import Data.Char (toLower, isSpace, isAscii, isControl)
-import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix)
+import qualified Data.ByteString.Char8 as B8
+import Data.Char (isAscii, isControl, isSpace, toLower)
import Data.Either (lefts, rights)
-import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
-import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody)
-import Network.HTTP (lookupHeader, HeaderName(..), urlEncode)
-import Network.URI (isURI, unEscapeString)
-import System.FilePath (takeExtension)
+import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix)
+import Data.Text (Text, pack)
+import Network.HTTP (urlEncode)
import Text.XML.Light
-import qualified Control.Exception as E
-import qualified Data.ByteString as B
import qualified Text.XML.Light as X
import qualified Text.XML.Light.Cursor as XC
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
-import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize,
- linesToPara)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
+import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, orderedListMarkers)
-- | Data to be written at the end of the document:
-- (foot)notes, URLs, references, images.
data FbRenderState = FbRenderState
- { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text
- , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path
- , parentListMarker :: String -- ^ list marker of the parent ordered list
- , parentBulletLevel :: Int -- ^ nesting level of the unordered list
- , writerOptions :: WriterOptions
+ { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text
+ , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path
+ , parentListMarker :: String -- ^ list marker of the parent ordered list
+ , writerOptions :: WriterOptions
} deriving (Show)
-- | FictionBook building monad.
-type FBM = StateT FbRenderState IO
+type FBM m = StateT FbRenderState m
newFB :: FbRenderState
newFB = FbRenderState { footnotes = [], imagesToFetch = []
- , parentListMarker = "", parentBulletLevel = 0
+ , parentListMarker = ""
, writerOptions = def }
data ImageMode = NormalImage | InlineImage deriving (Eq)
@@ -73,20 +81,27 @@ instance Show ImageMode where
show InlineImage = "inlineImageType"
-- | Produce an FB2 document from a 'Pandoc' document.
-writeFB2 :: WriterOptions -- ^ conversion options
+writeFB2 :: PandocMonad m
+ => WriterOptions -- ^ conversion options
-> Pandoc -- ^ document to convert
- -> IO String -- ^ FictionBook2 document (not encoded yet)
-writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do
+ -> m Text -- ^ FictionBook2 document (not encoded yet)
+writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc
+
+pandocToFB2 :: PandocMonad m
+ => WriterOptions
+ -> Pandoc
+ -> FBM m Text
+pandocToFB2 opts (Pandoc meta blocks) = do
modify (\s -> s { writerOptions = opts })
desc <- description meta
- fp <- frontpage meta
+ title <- cMapM toXml . docTitle $ meta
secs <- renderSections 1 blocks
- let body = el "body" $ fp ++ secs
+ let body = el "body" $ el "title" (el "p" title) : secs
notes <- renderFootnotes
- (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s)
+ (imgs,missing) <- fmap imagesToFetch get >>= \s -> lift (fetchImages s)
let body' = replaceImagesWithAlt missing body
let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
- return $ xml_head ++ (showContent fb2_xml) ++ "\n"
+ return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
where
xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
fb2_attrs =
@@ -94,67 +109,77 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do
xlink = "http://www.w3.org/1999/xlink"
in [ uattr "xmlns" xmlns
, attr ("xmlns", "l") xlink ]
- --
- frontpage :: Meta -> FBM [Content]
- frontpage meta' = do
- t <- cMapM toXml . docTitle $ meta'
- return $
- [ el "title" (el "p" t)
- , el "annotation" (map (el "p" . cMap plain)
- (docAuthors meta' ++ [docDate meta']))
- ]
- description :: Meta -> FBM Content
- description meta' = do
- bt <- booktitle meta'
- let as = authors meta'
- dd <- docdate meta'
- return $ el "description"
- [ el "title-info" (bt ++ as ++ dd)
- , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version
- ]
- booktitle :: Meta -> FBM [Content]
- booktitle meta' = do
- t <- cMapM toXml . docTitle $ meta'
- return $ if null t
- then []
- else [ el "book-title" t ]
- authors :: Meta -> [Content]
- authors meta' = cMap author (docAuthors meta')
- author :: [Inline] -> [Content]
- author ss =
- let ws = words . cMap plain $ ss
- email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws)
- ws' = filter ('@' `notElem`) ws
- names = case ws' of
- (nickname:[]) -> [ el "nickname" nickname ]
- (fname:lname:[]) -> [ el "first-name" fname
- , el "last-name" lname ]
- (fname:rest) -> [ el "first-name" fname
- , el "middle-name" (concat . init $ rest)
- , el "last-name" (last rest) ]
- ([]) -> []
- in list $ el "author" (names ++ email)
- docdate :: Meta -> FBM [Content]
- docdate meta' = do
- let ss = docDate meta'
- d <- cMapM toXml ss
- return $ if null d
- then []
- else [el "date" d]
+
+description :: PandocMonad m => Meta -> FBM m Content
+description meta' = do
+ let genre = el "genre" "unrecognised"
+ bt <- booktitle meta'
+ let as = authors meta'
+ dd <- docdate meta'
+ let lang = case lookupMeta "lang" meta' of
+ Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
+ Just (MetaString s) -> [el "lang" $ iso639 s]
+ _ -> []
+ where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639
+ let coverimage url = do
+ let img = Image nullAttr mempty (url, "")
+ im <- insertImage InlineImage img
+ return [el "coverpage" im]
+ coverpage <- case lookupMeta "cover-image" meta' of
+ Just (MetaInlines [Str s]) -> coverimage s
+ Just (MetaString s) -> coverimage s
+ _ -> return []
+ return $ el "description"
+ [ el "title-info" (genre : (bt ++ as ++ dd ++ lang))
+ , el "document-info" (el "program-used" "pandoc" : coverpage)
+ ]
+
+booktitle :: PandocMonad m => Meta -> FBM m [Content]
+booktitle meta' = do
+ t <- cMapM toXml . docTitle $ meta'
+ return $ if null t
+ then []
+ else [ el "book-title" t ]
+
+authors :: Meta -> [Content]
+authors meta' = cMap author (docAuthors meta')
+
+author :: [Inline] -> [Content]
+author ss =
+ let ws = words . cMap plain $ ss
+ email = el "email" <$> take 1 (filter ('@' `elem`) ws)
+ ws' = filter ('@' `notElem`) ws
+ names = case ws' of
+ [nickname] -> [ el "nickname" nickname ]
+ [fname, lname] -> [ el "first-name" fname
+ , el "last-name" lname ]
+ (fname:rest) -> [ el "first-name" fname
+ , el "middle-name" (concat . init $ rest)
+ , el "last-name" (last rest) ]
+ [] -> []
+ in list $ el "author" (names ++ email)
+
+docdate :: PandocMonad m => Meta -> FBM m [Content]
+docdate meta' = do
+ let ss = docDate meta'
+ d <- cMapM toXml ss
+ return $ if null d
+ then []
+ else [el "date" d]
-- | Divide the stream of blocks into sections and convert to XML
-- representation.
-renderSections :: Int -> [Block] -> FBM [Content]
+renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content]
renderSections level blocks = do
let secs = splitSections level blocks
mapM (renderSection level) secs
-renderSection :: Int -> ([Inline], [Block]) -> FBM Content
+renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content
renderSection level (ttl, body) = do
title <- if null ttl
then return []
else return . list . el "title" . formatTitle $ ttl
- content <- if (hasSubsections body)
+ content <- if hasSubsections body
then renderSections (level + 1) body
else cMapM blockToXml body
return $ el "section" (title ++ content)
@@ -175,7 +200,7 @@ split cond xs = let (b,a) = break cond xs
isLineBreak :: Inline -> Bool
isLineBreak LineBreak = True
-isLineBreak _ = False
+isLineBreak _ = False
-- | Divide the stream of block elements into sections: [(title, blocks)].
splitSections :: Int -> [Block] -> [([Inline], [Block])]
@@ -186,17 +211,17 @@ splitSections level blocks = reverse $ revSplit (reverse blocks)
let (lastsec, before) = break sameLevel rblocks
(header, prevblocks) =
case before of
- ((Header n _ title):prevblocks') ->
+ (Header n _ title:prevblocks') ->
if n == level
then (title, prevblocks')
else ([], before)
_ -> ([], before)
in (header, reverse lastsec) : revSplit prevblocks
sameLevel (Header n _ _) = n == level
- sameLevel _ = False
+ sameLevel _ = False
-- | Make another FictionBook body with footnotes.
-renderFootnotes :: FBM [Content]
+renderFootnotes :: PandocMonad m => FBM m [Content]
renderFootnotes = do
fns <- footnotes `liftM` get
if null fns
@@ -205,19 +230,19 @@ renderFootnotes = do
el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
where
renderFN (n, idstr, cs) =
- let fn_texts = (el "title" (el "p" (show n))) : cs
+ let fn_texts = el "title" (el "p" (show n)) : cs
in el "section" ([uattr "id" idstr], fn_texts)
-- | Fetch images and encode them for the FictionBook XML.
-- Return image data and a list of hrefs of the missing images.
-fetchImages :: [(String,String)] -> IO ([Content],[String])
+fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String])
fetchImages links = do
imgs <- mapM (uncurry fetchImage) links
- return $ (rights imgs, lefts imgs)
+ return (rights imgs, lefts imgs)
-- | Fetch image data from disk or from network and make a <binary> XML section.
-- Return either (Left hrefOfMissingImage) or (Right xmlContent).
-fetchImage :: String -> String -> IO (Either String Content)
+fetchImage :: PandocMonad m => String -> String -> m (Either String Content)
fetchImage href link = do
mbimg <-
case (isURI link, readDataURI link) of
@@ -227,28 +252,25 @@ fetchImage href link = do
then return (Just (mime',base64))
else return Nothing
(True, Just _) -> return Nothing -- not base64-encoded
- (True, Nothing) -> fetchURL link
- (False, _) -> do
- d <- nothingOnError $ B.readFile (unEscapeString link)
- let t = case map toLower (takeExtension link) of
- ".png" -> Just "image/png"
- ".jpg" -> Just "image/jpeg"
- ".jpeg" -> Just "image/jpeg"
- ".jpe" -> Just "image/jpeg"
- _ -> Nothing -- only PNG and JPEG are supported in FB2
- return $ liftM2 (,) t (liftM (toStr . encode) d)
+ _ ->
+ catchError (do (bs, mbmime) <- P.fetchItem link
+ case mbmime of
+ Nothing -> do
+ report $ CouldNotDetermineMimeType link
+ return Nothing
+ Just mime -> return $ Just (mime,
+ B8.unpack $ encode bs))
+ (\e ->
+ do report $ CouldNotFetchResource link (show e)
+ return Nothing)
case mbimg of
- Just (imgtype, imgdata) -> do
+ Just (imgtype, imgdata) ->
return . Right $ el "binary"
( [uattr "id" href
, uattr "content-type" imgtype]
, txt imgdata )
_ -> return (Left ('#':href))
- where
- nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString))
- nothingOnError action = liftM Just action `E.catch` omnihandler
- omnihandler :: E.SomeException -> IO (Maybe B.ByteString)
- omnihandler _ = return Nothing
+
-- | Extract mime type and encoded data from the Data URI.
readDataURI :: String -- ^ URI
@@ -276,8 +298,8 @@ isMimeType :: String -> Bool
isMimeType s =
case split (=='/') s of
[mtype,msubtype] ->
- ((map toLower mtype) `elem` types
- || "x-" `isPrefixOf` (map toLower mtype))
+ (map toLower mtype `elem` types
+ || "x-" `isPrefixOf` map toLower mtype)
&& all valid mtype
&& all valid msubtype
_ -> False
@@ -286,85 +308,63 @@ isMimeType s =
valid c = isAscii c && not (isControl c) && not (isSpace c) &&
c `notElem` "()<>@,;:\\\"/[]?="
--- | Fetch URL, return its Content-Type and binary data on success.
-fetchURL :: String -> IO (Maybe (String, String))
-fetchURL url = do
- flip catchIO_ (return Nothing) $ do
- r <- browse $ do
- setOutHandler (const (return ()))
- setAllowRedirects True
- liftM snd . request . getRequest $ url
- let content_type = lookupHeader HdrContentType (getHeaders r)
- content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r
- return $ liftM2 (,) content_type content
-
-toBS :: String -> B.ByteString
-toBS = B.pack . map (toEnum . fromEnum)
-
-toStr :: B.ByteString -> String
-toStr = map (toEnum . fromEnum) . B.unpack
-
footnoteID :: Int -> String
-footnoteID i = "n" ++ (show i)
+footnoteID i = "n" ++ show i
linkID :: Int -> String
-linkID i = "l" ++ (show i)
+linkID i = "l" ++ show i
-- | Convert a block-level Pandoc's element to FictionBook XML representation.
-blockToXml :: Block -> FBM [Content]
+blockToXml :: PandocMonad m => Block -> FBM m [Content]
blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2
blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
-- title beginning with fig: indicates that the image is a figure
blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) =
insertImage NormalImage (Image atr alt (src,tit))
-blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss
+blockToXml (Para ss) = (list . el "p") <$> cMapM toXml ss
blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
map (el "p" . el "code") . lines $ s
-blockToXml (RawBlock _ s) = return . spaceBeforeAfter .
- map (el "p" . el "code") . lines $ s
+blockToXml b@(RawBlock _ _) = do
+ report $ BlockNotRendered b
+ return []
blockToXml (Div _ bs) = cMapM blockToXml bs
-blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
-blockToXml (LineBlock lns) = blockToXml $ linesToPara lns
+blockToXml (BlockQuote bs) = (list . el "cite") <$> cMapM blockToXml bs
+blockToXml (LineBlock lns) =
+ (list . el "poem") <$> mapM stanza (split null lns)
+ where
+ v xs = el "v" <$> cMapM toXml xs
+ stanza xs = el "stanza" <$> mapM v xs
blockToXml (OrderedList a bss) = do
state <- get
let pmrk = parentListMarker state
- let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a
+ let markers = (pmrk ++) <$> orderedListMarkers a
let mkitem mrk bs = do
- modify (\s -> s { parentListMarker = mrk })
- itemtext <- cMapM blockToXml . paraToPlain $ bs
+ modify (\s -> s { parentListMarker = mrk ++ " "})
+ item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs
modify (\s -> s { parentListMarker = pmrk }) -- old parent marker
- return . el "p" $ [ txt mrk, txt " " ] ++ itemtext
- mapM (uncurry mkitem) (zip markers bss)
+ return item
+ concat <$> zipWithM mkitem markers bss
blockToXml (BulletList bss) = do
state <- get
- let level = parentBulletLevel state
let pmrk = parentListMarker state
- let prefix = replicate (length pmrk) ' '
- let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"]
- let mrk = prefix ++ bullets !! (level `mod` (length bullets))
+ let mrk = pmrk ++ "•"
let mkitem bs = do
- modify (\s -> s { parentBulletLevel = (level+1) })
- itemtext <- cMapM blockToXml . paraToPlain $ bs
- modify (\s -> s { parentBulletLevel = level }) -- restore bullet level
- return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext
- mapM mkitem bss
+ modify (\s -> s { parentListMarker = mrk ++ " "})
+ item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs
+ modify (\s -> s { parentListMarker = pmrk }) -- old parent marker
+ return item
+ cMapM mkitem bss
blockToXml (DefinitionList defs) =
cMapM mkdef defs
where
mkdef (term, bss) = do
- def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss
+ items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (replicate 4 ' ')) bss
t <- wrap "strong" term
- return [ el "p" t, el "p" def' ]
- sep blocks =
- if all needsBreak blocks then
- blocks ++ [Plain [LineBreak]]
- else
- blocks
- needsBreak (Para _) = False
- needsBreak (Plain ins) = LineBreak `notElem` ins
- needsBreak _ = True
-blockToXml (Header _ _ _) = -- should never happen, see renderSections
- error "unexpected header in section text"
+ return (el "p" t : items)
+blockToXml h@Header{} = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return []
blockToXml HorizontalRule = return
[ el "empty-line" ()
, el "p" (txt (replicate 10 '—'))
@@ -372,45 +372,42 @@ blockToXml HorizontalRule = return
blockToXml (Table caption aligns _ headers rows) = do
hd <- mkrow "th" headers aligns
bd <- mapM (\r -> mkrow "td" r aligns) rows
- c <- return . el "emphasis" =<< cMapM toXml caption
+ c <- el "emphasis" <$> cMapM toXml caption
return [el "table" (hd : bd), el "p" c]
where
- mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content
+ mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content
mkrow tag cells aligns' =
- (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns'))
+ el "tr" <$> mapM (mkcell tag) (zip cells aligns')
--
- mkcell :: String -> (TableCell, Alignment) -> FBM Content
+ mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content
mkcell tag (cell, align) = do
cblocks <- cMapM blockToXml cell
return $ el tag ([align_attr align], cblocks)
--
align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
- align_str AlignLeft = "left"
- align_str AlignCenter = "center"
- align_str AlignRight = "right"
+ align_str AlignLeft = "left"
+ align_str AlignCenter = "center"
+ align_str AlignRight = "right"
align_str AlignDefault = "left"
blockToXml Null = return []
--- Replace paragraphs with plain text and line break.
--- Necessary to simulate multi-paragraph lists in FB2.
-paraToPlain :: [Block] -> [Block]
-paraToPlain [] = []
-paraToPlain (Para inlines : rest) =
- let p = (Plain (inlines ++ [LineBreak]))
- in p : paraToPlain rest
-paraToPlain (p:rest) = p : paraToPlain rest
+-- Replace plain text with paragraphs and add line break after paragraphs.
+-- It is used to convert plain text from tight list items to paragraphs.
+plainToPara :: [Block] -> [Block]
+plainToPara [] = []
+plainToPara (Plain inlines : rest) =
+ Para inlines : plainToPara rest
+plainToPara (Para inlines : rest) =
+ Para inlines : Plain [LineBreak] : plainToPara rest
+plainToPara (p:rest) = p : plainToPara rest
-- Simulate increased indentation level. Will not really work
-- for multi-line paragraphs.
-indent :: Block -> Block
-indent = indentBlock
+indentPrefix :: String -> Block -> Block
+indentPrefix spacer = indentBlock
where
- -- indentation space
- spacer :: String
- spacer = replicate 4 ' '
- --
- indentBlock (Plain ins) = Plain ((Str spacer):ins)
- indentBlock (Para ins) = Para ((Str spacer):ins)
+ indentBlock (Plain ins) = Plain (Str spacer:ins)
+ indentBlock (Para ins) = Para (Str spacer:ins)
indentBlock (CodeBlock a s) =
let s' = unlines . map (spacer++) . lines $ s
in CodeBlock a s'
@@ -420,10 +417,21 @@ indent = indentBlock
-- indent every (explicit) line
indentLines :: [Inline] -> [Inline]
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
- in intercalate [LineBreak] $ map ((Str spacer):) lns
+ in intercalate [LineBreak] $ map (Str spacer:) lns
+
+indent :: Block -> Block
+indent = indentPrefix spacer
+ where
+ -- indentation space
+ spacer :: String
+ spacer = replicate 4 ' '
+
+indentBlocks :: String -> [Block] -> [Block]
+indentBlocks _ [] = []
+indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ replicate (length prefix) ' ') xs
-- | Convert a Pandoc's Inline element to FictionBook XML representation.
-toXml :: Inline -> FBM [Content]
+toXml :: PandocMonad m => Inline -> FBM m [Content]
toXml (Str s) = return [txt s]
toXml (Span _ ils) = cMapM toXml ils
toXml (Emph ss) = list `liftM` wrap "emphasis" ss
@@ -444,7 +452,9 @@ toXml Space = return [txt " "]
toXml SoftBreak = return [txt " "]
toXml LineBreak = return [el "empty-line" ()]
toXml (Math _ formula) = insertMath InlineImage formula
-toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed
+toXml il@(RawInline _ _) = do
+ report $ InlineNotRendered il
+ return [] -- raw TeX and raw HTML are suppressed
toXml (Link _ text (url,ttl)) = do
fns <- footnotes `liftM` get
let n = 1 + length fns
@@ -462,7 +472,7 @@ toXml (Link _ text (url,ttl)) = do
( [ attr ("l","href") ('#':ln_id)
, uattr "type" "note" ]
, ln_ref) ]
-toXml img@(Image _ _ _) = insertImage InlineImage img
+toXml img@Image{} = insertImage InlineImage img
toXml (Note bs) = do
fns <- footnotes `liftM` get
let n = 1 + length fns
@@ -474,9 +484,9 @@ toXml (Note bs) = do
, uattr "type" "note" ]
, fn_ref )
-insertMath :: ImageMode -> String -> FBM [Content]
+insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content]
insertMath immode formula = do
- htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get
+ htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get
case htmlMath of
WebTeX url -> do
let alt = [Code nullAttr formula]
@@ -485,7 +495,7 @@ insertMath immode formula = do
insertImage immode img
_ -> return [el "code" formula]
-insertImage :: ImageMode -> Inline -> FBM [Content]
+insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
insertImage immode (Image _ alt (url,ttl)) = do
images <- imagesToFetch `liftM` get
let n = 1 + length images
@@ -493,7 +503,7 @@ insertImage immode (Image _ alt (url,ttl)) = do
modify (\s -> s { imagesToFetch = (fname, url) : images })
let ttlattr = case (immode, null ttl) of
(NormalImage, False) -> [ uattr "title" ttl ]
- _ -> []
+ _ -> []
return . list $
el "image" $
[ attr ("l","href") ('#':fname)
@@ -517,20 +527,20 @@ replaceImagesWithAlt missingHrefs body =
else c
in case XC.nextDF c' of
(Just cnext) -> replaceAll cnext
- Nothing -> c' -- end of document
+ Nothing -> c' -- end of document
--
isImage :: Content -> Bool
- isImage (Elem e) = (elName e) == (uname "image")
- isImage _ = False
+ isImage (Elem e) = elName e == uname "image"
+ isImage _ = False
--
- isMissing (Elem img@(Element _ _ _ _)) =
+ isMissing (Elem img@Element{}) =
let imgAttrs = elAttribs img
badAttrs = map (attr ("l","href")) missingHrefs
in any (`elem` imgAttrs) badAttrs
isMissing _ = False
--
replaceNode :: Content -> Content
- replaceNode n@(Elem img@(Element _ _ _ _)) =
+ replaceNode n@(Elem img@Element{}) =
let attrs = elAttribs img
alt = getAttrVal attrs (uname "alt")
imtype = getAttrVal attrs (qname "l" "type")
@@ -551,7 +561,7 @@ replaceImagesWithAlt missingHrefs body =
-- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: String -> [Inline] -> FBM Content
+wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
-- " Create a singleton list.
@@ -560,25 +570,25 @@ list = (:[])
-- | Convert an 'Inline' to plaintext.
plain :: Inline -> String
-plain (Str s) = s
-plain (Emph ss) = concat (map plain ss)
-plain (Span _ ss) = concat (map plain ss)
-plain (Strong ss) = concat (map plain ss)
-plain (Strikeout ss) = concat (map plain ss)
-plain (Superscript ss) = concat (map plain ss)
-plain (Subscript ss) = concat (map plain ss)
-plain (SmallCaps ss) = concat (map plain ss)
-plain (Quoted _ ss) = concat (map plain ss)
-plain (Cite _ ss) = concat (map plain ss) -- FIXME
-plain (Code _ s) = s
-plain Space = " "
-plain SoftBreak = " "
-plain LineBreak = "\n"
-plain (Math _ s) = s
-plain (RawInline _ s) = s
+plain (Str s) = s
+plain (Emph ss) = cMap plain ss
+plain (Span _ ss) = cMap plain ss
+plain (Strong ss) = cMap plain ss
+plain (Strikeout ss) = cMap plain ss
+plain (Superscript ss) = cMap plain ss
+plain (Subscript ss) = cMap plain ss
+plain (SmallCaps ss) = cMap plain ss
+plain (Quoted _ ss) = cMap plain ss
+plain (Cite _ ss) = cMap plain ss -- FIXME
+plain (Code _ s) = s
+plain Space = " "
+plain SoftBreak = " "
+plain LineBreak = "\n"
+plain (Math _ s) = s
+plain (RawInline _ _) = ""
plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"])
-plain (Image _ alt _) = concat (map plain alt)
-plain (Note _) = "" -- FIXME
+plain (Image _ alt _) = cMap plain alt
+plain (Note _) = "" -- FIXME
-- | Create an XML element.
el :: (Node t)
@@ -599,11 +609,11 @@ txt s = Text $ CData CDataText s Nothing
-- | Create an XML attribute with an unqualified name.
uattr :: String -> String -> Text.XML.Light.Attr
-uattr name val = Attr (uname name) val
+uattr name = Attr (uname name)
-- | Create an XML attribute with a qualified name from given namespace.
attr :: (String, String) -> String -> Text.XML.Light.Attr
-attr (ns, name) val = Attr (qname ns name) val
+attr (ns, name) = Attr (qname ns name)
-- | Unqualified name
uname :: String -> QName
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 3c8c264d2..1647df7ea 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,6 +1,10 @@
-{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE MultiWayIf #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.HTML
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -28,60 +32,92 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to HTML.
-}
-module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
-import Text.Pandoc.Definition
+module Text.Pandoc.Writers.HTML (
+ writeHtml4,
+ writeHtml4String,
+ writeHtml5,
+ writeHtml5String,
+ writeHtmlStringForEPUB,
+ writeS5,
+ writeSlidy,
+ writeSlideous,
+ writeDZSlides,
+ writeRevealJs,
+ tagWithAttributes
+ ) where
+import Control.Monad.State.Strict
+import Data.Char (ord, toLower)
+import Data.List (intercalate, intersperse, isPrefixOf, partition)
+import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing)
import Data.Monoid ((<>))
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
+import qualified Data.Set as Set
+import Data.String (fromString)
+import Data.Text (Text)
+import qualified Data.Text.Lazy as TL
+import Network.HTTP (urlEncode)
+import Network.URI (URI (..), parseURIReference, unEscapeString)
+import Numeric (showHex)
+import Text.Blaze.Internal (customLeaf, MarkupM(Empty))
+#if MIN_VERSION_blaze_markup(0,6,3)
+#else
+import Text.Blaze.Internal (preEscapedString, preEscapedText)
+#endif
+import Text.Blaze.Html hiding (contents)
+import Text.Pandoc.Definition
+import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight,
+ styleToCss)
import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates
-import Text.Pandoc.Readers.TeXMath
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
import Text.Pandoc.Slides
-import Text.Pandoc.Highlighting ( highlight, styleToCss,
- formatHtmlInline, formatHtmlBlock )
-import Text.Pandoc.XML (fromEntities, escapeStringForXML)
-import Network.URI ( parseURIReference, URI(..), unEscapeString )
-import Network.HTTP ( urlEncode )
-import Numeric ( showHex )
-import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, intersperse )
-import Data.String ( fromString )
-import Data.Maybe ( catMaybes, fromMaybe, isJust )
-import Control.Monad.State
-import Text.Blaze.Html hiding(contents)
+import Text.Pandoc.Templates
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML (escapeStringForXML, fromEntities)
#if MIN_VERSION_blaze_markup(0,6,3)
#else
-import Text.Blaze.Internal(preEscapedString)
+import Text.Blaze.Internal (preEscapedString, preEscapedText)
#endif
#if MIN_VERSION_blaze_html(0,5,1)
import qualified Text.Blaze.XHtml5 as H5
#else
import qualified Text.Blaze.Html5 as H5
#endif
+import Control.Monad.Except (throwError)
+import Data.Aeson (Value)
+import System.FilePath (takeBaseName, takeExtension)
+import Text.Blaze.Html.Renderer.Text (renderHtml)
import qualified Text.Blaze.XHtml1.Transitional as H
import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
-import Text.Blaze.Html.Renderer.String (renderHtml)
+import Text.Pandoc.Class (PandocMonad, report, runPure)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
import Text.TeXMath
-import Text.XML.Light.Output
-import Text.XML.Light (unode, elChildren, unqual)
+import Text.XML.Light (elChildren, unode, unqual)
import qualified Text.XML.Light as XML
-import System.FilePath (takeExtension)
-import Data.Aeson (Value)
+import Text.XML.Light.Output
data WriterState = WriterState
- { stNotes :: [Html] -- ^ List of notes
- , stMath :: Bool -- ^ Math is used in document
- , stQuotes :: Bool -- ^ <q> tag is used
- , stHighlighting :: Bool -- ^ Syntax highlighting is used
- , stSecNum :: [Int] -- ^ Number of current section
- , stElement :: Bool -- ^ Processing an Element
+ { stNotes :: [Html] -- ^ List of notes
+ , stMath :: Bool -- ^ Math is used in document
+ , stQuotes :: Bool -- ^ <q> tag is used
+ , stHighlighting :: Bool -- ^ Syntax highlighting is used
+ , stSecNum :: [Int] -- ^ Number of current section
+ , stElement :: Bool -- ^ Processing an Element
+ , stHtml5 :: Bool -- ^ Use HTML5
+ , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub
+ , stSlideVariant :: HTMLSlideVariant
+ , stCodeBlockNum :: Int -- ^ Number of code block
}
defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
stHighlighting = False, stSecNum = [],
- stElement = False}
+ stElement = False, stHtml5 = False,
+ stEPUBVersion = Nothing,
+ stSlideVariant = NoSlides,
+ stCodeBlockNum = 0}
-- Helpers to render HTML with the appropriate function.
@@ -98,62 +134,141 @@ nl opts = if writerWrapText opts == WrapNone
then mempty
else preEscapedString "\n"
--- | Convert Pandoc document to Html string.
-writeHtmlString :: WriterOptions -> Pandoc -> String
-writeHtmlString opts d =
- let (body, context) = evalState (pandocToHtml opts d) defaultWriterState
- in case writerTemplate opts of
- Nothing -> renderHtml body
- Just tpl -> renderTemplate' tpl $
- defField "body" (renderHtml body) context
-
--- | Convert Pandoc document to Html structure.
-writeHtml :: WriterOptions -> Pandoc -> Html
-writeHtml opts d =
- let (body, context) = evalState (pandocToHtml opts d) defaultWriterState
- in case writerTemplate opts of
- Nothing -> body
- Just tpl -> renderTemplate' tpl $
- defField "body" (renderHtml body) context
+-- | Convert Pandoc document to Html 5 string.
+writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeHtml5String = writeHtmlString'
+ defaultWriterState{ stHtml5 = True }
+
+-- | Convert Pandoc document to Html 5 structure.
+writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
+writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True }
+
+-- | Convert Pandoc document to Html 4 string.
+writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeHtml4String = writeHtmlString'
+ defaultWriterState{ stHtml5 = False }
+
+-- | Convert Pandoc document to Html 4 structure.
+writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
+writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False }
+
+-- | Convert Pandoc document to Html appropriate for an epub version.
+writeHtmlStringForEPUB :: PandocMonad m
+ => EPUBVersion -> WriterOptions -> Pandoc
+ -> m Text
+writeHtmlStringForEPUB version o = writeHtmlString'
+ defaultWriterState{ stHtml5 = version == EPUB3,
+ stEPUBVersion = Just version } o
+
+-- | Convert Pandoc document to Reveal JS HTML slide show.
+writeRevealJs :: PandocMonad m
+ => WriterOptions -> Pandoc -> m Text
+writeRevealJs = writeHtmlSlideShow' RevealJsSlides
+
+-- | Convert Pandoc document to S5 HTML slide show.
+writeS5 :: PandocMonad m
+ => WriterOptions -> Pandoc -> m Text
+writeS5 = writeHtmlSlideShow' S5Slides
+
+-- | Convert Pandoc document to Slidy HTML slide show.
+writeSlidy :: PandocMonad m
+ => WriterOptions -> Pandoc -> m Text
+writeSlidy = writeHtmlSlideShow' SlidySlides
+
+-- | Convert Pandoc document to Slideous HTML slide show.
+writeSlideous :: PandocMonad m
+ => WriterOptions -> Pandoc -> m Text
+writeSlideous = writeHtmlSlideShow' SlideousSlides
+
+-- | Convert Pandoc document to DZSlides HTML slide show.
+writeDZSlides :: PandocMonad m
+ => WriterOptions -> Pandoc -> m Text
+writeDZSlides = writeHtmlSlideShow' DZSlides
+
+writeHtmlSlideShow' :: PandocMonad m
+ => HTMLSlideVariant -> WriterOptions -> Pandoc -> m Text
+writeHtmlSlideShow' variant = writeHtmlString'
+ defaultWriterState{ stSlideVariant = variant
+ , stHtml5 = case variant of
+ RevealJsSlides -> True
+ S5Slides -> False
+ SlidySlides -> False
+ DZSlides -> True
+ SlideousSlides -> False
+ NoSlides -> False
+ }
+
+renderHtml' :: Html -> Text
+renderHtml' = TL.toStrict . renderHtml
+
+writeHtmlString' :: PandocMonad m
+ => WriterState -> WriterOptions -> Pandoc -> m Text
+writeHtmlString' st opts d = do
+ (body, context) <- evalStateT (pandocToHtml opts d) st
+ case writerTemplate opts of
+ Nothing -> return $ renderHtml' body
+ Just tpl -> do
+ -- warn if empty lang
+ when (isNothing (getField "lang" context :: Maybe String)) $
+ report NoLangSpecified
+ -- check for empty pagetitle
+ context' <-
+ case getField "pagetitle" context of
+ Just (s :: String) | not (null s) -> return context
+ _ -> do
+ let fallback = fromMaybe "Untitled" $ takeBaseName <$>
+ lookup "sourcefile" (writerVariables opts)
+ report $ NoTitleElement fallback
+ return $ resetField "pagetitle" fallback context
+ renderTemplate' tpl $
+ defField "body" (renderHtml' body) context'
+
+writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
+writeHtml' st opts d =
+ case writerTemplate opts of
+ Just _ -> preEscapedText <$> writeHtmlString' st opts d
+ Nothing -> do
+ (body, _) <- evalStateT (pandocToHtml opts d) st
+ return body
-- result is (title, authors, date, toc, body, new variables)
-pandocToHtml :: WriterOptions
+pandocToHtml :: PandocMonad m
+ => WriterOptions
-> Pandoc
- -> State WriterState (Html, Value)
+ -> StateT WriterState m (Html, Value)
pandocToHtml opts (Pandoc meta blocks) = do
metadata <- metaToJSON opts
- (fmap renderHtml . blockListToHtml opts)
- (fmap renderHtml . inlineListToHtml opts)
+ (fmap renderHtml' . blockListToHtml opts)
+ (fmap renderHtml' . inlineListToHtml opts)
meta
let stringifyHTML = escapeStringForXML . stringify
let authsMeta = map stringifyHTML $ docAuthors meta
let dateMeta = stringifyHTML $ docDate meta
let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
+ slideVariant <- gets stSlideVariant
let sects = hierarchicalize $
- if writerSlideVariant opts == NoSlides
+ if slideVariant == NoSlides
then blocks
else prepSlides slideLevel blocks
- toc <- if writerTableOfContents opts
- then tableOfContents opts sects
+ toc <- if writerTableOfContents opts && slideVariant /= S5Slides
+ then fmap renderHtml' <$> tableOfContents opts sects
else return Nothing
blocks' <- liftM (mconcat . intersperse (nl opts)) $
mapM (elementToHtml slideLevel opts) sects
st <- get
- let notes = reverse (stNotes st)
- let thebody = blocks' >> footnoteSection opts notes
+ notes <- footnoteSection opts (reverse (stNotes st))
+ let thebody = blocks' >> notes
let math = case writerHTMLMathMethod opts of
LaTeXMathML (Just url) ->
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
$ mempty
- MathML (Just url) ->
+ MathJax url
+ | slideVariant /= RevealJsSlides ->
+ -- mathjax is handled via a special plugin in revealjs
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
- $ mempty
- MathJax url ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ case writerSlideVariant opts of
+ $ case slideVariant of
SlideousSlides ->
preEscapedString
"MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
@@ -162,37 +277,55 @@ pandocToHtml opts (Pandoc meta blocks) = do
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
$ mempty
- KaTeX js css ->
- (H.script ! A.src (toValue js) $ mempty) <>
- (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
- (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
+ KaTeX url ->
+ (H.script !
+ A.src (toValue $ url ++ "katex.min.js") $ mempty) <>
+ (H.script !
+ A.src (toValue $ url ++ "contrib/auto-render.min.js")
+ $ mempty) <>
+ (
+ H.script
+ "document.addEventListener(\"DOMContentLoaded\", function() {\n renderMathInElement(document.body);\n});") <>
+ (H.link ! A.rel "stylesheet" !
+ A.href (toValue $ url ++ "katex.min.css"))
+
_ -> case lookup "mathml-script" (writerVariables opts) of
- Just s | not (writerHtml5 opts) ->
+ Just s | not (stHtml5 st) ->
H.script ! A.type_ "text/javascript"
$ preEscapedString
("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
| otherwise -> mempty
Nothing -> mempty
let context = (if stHighlighting st
- then defField "highlighting-css"
- (styleToCss $ writerHighlightStyle opts)
+ then case writerHighlightStyle opts of
+ Just sty -> defField "highlighting-css"
+ (styleToCss sty)
+ Nothing -> id
else id) $
(if stMath st
- then defField "math" (renderHtml math)
+ then defField "math" (renderHtml' math)
else id) $
+ defField "mathjax"
+ (case writerHTMLMathMethod opts of
+ MathJax _ -> True
+ _ -> False) $
defField "quotes" (stQuotes st) $
- maybe id (defField "toc" . renderHtml) toc $
+ -- for backwards compatibility we populate toc
+ -- with the contents of the toc, rather than a
+ -- boolean:
+ maybe id (defField "toc") toc $
+ maybe id (defField "table-of-contents") toc $
defField "author-meta" authsMeta $
maybe id (defField "date-meta") (normalizeDate dateMeta) $
- defField "pagetitle" (stringifyHTML $ docTitle meta) $
+ defField "pagetitle" (stringifyHTML (docTitle meta)) $
defField "idprefix" (writerIdentifierPrefix opts) $
-- these should maybe be set in pandoc.hs
defField "slidy-url"
- ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $
+ ("https://www.w3.org/Talks/Tools/Slidy2" :: String) $
defField "slideous-url" ("slideous" :: String) $
defField "revealjs-url" ("reveal.js" :: String) $
defField "s5-url" ("s5/default" :: String) $
- defField "html5" (writerHtml5 opts) $
+ defField "html5" (stHtml5 st)
metadata
return (thebody, context)
@@ -200,44 +333,52 @@ pandocToHtml opts (Pandoc meta blocks) = do
prefixedId :: WriterOptions -> String -> Attribute
prefixedId opts s =
case s of
- "" -> mempty
- _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s
+ "" -> mempty
+ _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s
-toList :: (Html -> Html) -> WriterOptions -> ([Html] -> Html)
+toList :: PandocMonad m
+ => (Html -> Html)
+ -> WriterOptions
+ -> [Html]
+ -> StateT WriterState m Html
toList listop opts items = do
- if (writerIncremental opts)
- then if (writerSlideVariant opts /= RevealJsSlides)
- then (listop $ mconcat items) ! A.class_ "incremental"
- else listop $ mconcat $ map (! A.class_ "fragment") items
- else listop $ mconcat items
+ slideVariant <- gets stSlideVariant
+ return $
+ if writerIncremental opts
+ then if slideVariant /= RevealJsSlides
+ then listop (mconcat items) ! A.class_ "incremental"
+ else listop $ mconcat $ map (! A.class_ "fragment") items
+ else listop $ mconcat items
-unordList :: WriterOptions -> [Html] -> Html
+unordList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
unordList opts = toList H.ul opts . toListItems opts
-ordList :: WriterOptions -> [Html] -> Html
+ordList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
ordList opts = toList H.ol opts . toListItems opts
-defList :: WriterOptions -> [Html] -> Html
+defList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
defList opts items = toList H.dl opts (items ++ [nl opts])
-- | Construct table of contents from list of elements.
-tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html)
+tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html)
tableOfContents _ [] = return Nothing
tableOfContents opts sects = do
- let opts' = opts { writerIgnoreNotes = True }
- contents <- mapM (elementToListItem opts') sects
+ contents <- mapM (elementToListItem opts) sects
let tocList = catMaybes contents
- return $ if null tocList
- then Nothing
- else Just $ unordList opts tocList
+ if null tocList
+ then return Nothing
+ else Just <$> unordList opts tocList
-- | Convert section number to string
showSecNum :: [Int] -> String
-showSecNum = concat . intersperse "." . map show
+showSecNum = intercalate "." . map show
-- | Converts an Element to a list item for a table of contents,
-- retrieving the appropriate identifier from state.
-elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
+elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html)
-- Don't include the empty headers created in slide shows
-- shows when an hrule is used to separate slides without a new title:
elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing
@@ -249,45 +390,52 @@ elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs)
then (H.span ! A.class_ "toc-section-number"
$ toHtml $ showSecNum num') >> preEscapedString " "
else mempty
- txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText
+ txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText
subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
- let subList = if null subHeads
- then mempty
- else unordList opts subHeads
+ subList <- if null subHeads
+ then return mempty
+ else unordList opts subHeads
-- in reveal.js, we need #/apples, not #apples:
- let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides]
+ slideVariant <- gets stSlideVariant
+ let revealSlash = ['/' | slideVariant== RevealJsSlides]
return $ Just
$ if null id'
- then (H.a $ toHtml txt) >> subList
+ then H.a (toHtml txt) >> subList
else (H.a ! A.href (toValue $ "#" ++ revealSlash ++
writerIdentifierPrefix opts ++ id')
$ toHtml txt) >> subList
elementToListItem _ _ = return Nothing
-- | Convert an Element to Html.
-elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html
+elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html
elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block
elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do
- let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel
+ slideVariant <- gets stSlideVariant
+ let slide = slideVariant /= NoSlides && level <= slideLevel
let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
modify $ \st -> st{stSecNum = num'} -- update section number
+ html5 <- gets stHtml5
let titleSlide = slide && level < slideLevel
header' <- if title' == [Str "\0"] -- marker for hrule
then return mempty
else do
modify (\st -> st{ stElement = True})
+ let level' = if level <= slideLevel &&
+ slideVariant == SlidySlides
+ then 1 -- see #3566
+ else level
res <- blockToHtml opts
- (Header level (id',classes,keyvals) title')
+ (Header level' (id',classes,keyvals) title')
modify (\st -> st{ stElement = False})
return res
- let isSec (Sec _ _ _ _ _) = True
- isSec (Blk _) = False
+ let isSec Sec{} = True
+ isSec (Blk _) = False
let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
isPause _ = False
- let fragmentClass = case writerSlideVariant opts of
- RevealJsSlides -> "fragment"
- _ -> "incremental"
+ let fragmentClass = case slideVariant of
+ RevealJsSlides -> "fragment"
+ _ -> "incremental"
let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\""
++ fragmentClass ++ "\">")) :
(xs ++ [Blk (RawBlock (Format "html") "</div>")])
@@ -299,45 +447,51 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
[] -> []
(x:xs) -> x ++ concatMap inDiv xs
let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
- let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
+ let classes' = ["title-slide" | titleSlide] ++ ["slide" | slide] ++
["section" | (slide || writerSectionDivs opts) &&
- not (writerHtml5 opts) ] ++
+ not html5 ] ++
["level" ++ show level | slide || writerSectionDivs opts ]
++ classes
- let secttag = if writerHtml5 opts
+ let secttag = if html5
then H5.section
else H.div
let attr = (id',classes',keyvals)
- return $ if titleSlide
- then (if writerSlideVariant opts == RevealJsSlides
- then H5.section
- else id) $ mconcat $
- (addAttrs opts attr $ secttag $ header') : innerContents
- else if writerSectionDivs opts || slide
- then addAttrs opts attr
- $ secttag $ inNl $ header' : innerContents
- else mconcat $ intersperse (nl opts)
- $ addAttrs opts attr header' : innerContents
+ if titleSlide
+ then do
+ t <- addAttrs opts attr $
+ secttag header'
+ return $
+ (if slideVariant == RevealJsSlides
+ then H5.section
+ else id) $ mconcat $ t : innerContents
+ else if writerSectionDivs opts || slide
+ then addAttrs opts attr
+ $ secttag $ inNl $ header' : innerContents
+ else do
+ t <- addAttrs opts attr header'
+ return $ mconcat $ intersperse (nl opts) (t : innerContents)
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
-footnoteSection :: WriterOptions -> [Html] -> Html
-footnoteSection opts notes =
- if null notes
- then mempty
- else nl opts >> (container
- $ nl opts >> hrtag >> nl opts >>
- H.ol (mconcat notes >> nl opts) >> nl opts)
- where container x = if writerHtml5 opts
- then H5.section ! A.class_ "footnotes" $ x
- else if writerSlideVariant opts /= NoSlides
- then H.div ! A.class_ "footnotes slide" $ x
- else H.div ! A.class_ "footnotes" $ x
- hrtag = if writerHtml5 opts then H5.hr else H.hr
+footnoteSection :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
+footnoteSection opts notes = do
+ html5 <- gets stHtml5
+ slideVariant <- gets stSlideVariant
+ let hrtag = if html5 then H5.hr else H.hr
+ let container x
+ | html5 = H5.section ! A.class_ "footnotes" $ x
+ | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x
+ | otherwise = H.div ! A.class_ "footnotes" $ x
+ return $
+ if null notes
+ then mempty
+ else nl opts >> container (nl opts >> hrtag >> nl opts >>
+ H.ol (mconcat notes >> nl opts) >> nl opts)
-- | Parse a mailto link; return Just (name, domain) or Nothing.
parseMailto :: String -> Maybe (String, String)
-parseMailto s = do
+parseMailto s =
case break (==':') s of
(xs,':':addr) | map toLower xs == "mailto" -> do
let (name', rest) = span (/='@') addr
@@ -346,10 +500,12 @@ parseMailto s = do
_ -> fail "not a mailto: URL"
-- | Obfuscate a "mailto:" link.
-obfuscateLink :: WriterOptions -> Attr -> Html -> String -> Html
+obfuscateLink :: PandocMonad m
+ => WriterOptions -> Attr -> Html -> String
+ -> StateT WriterState m Html
obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation =
addAttrs opts attr $ H.a ! A.href (toValue s) $ txt
-obfuscateLink opts attr (renderHtml -> txt) s =
+obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s =
let meth = writerEmailObfuscation opts
s' = map toLower (take 7 s) ++ drop 7 s
in case parseMailto s' of
@@ -361,20 +517,25 @@ obfuscateLink opts attr (renderHtml -> txt) s =
then ("e", name' ++ " at " ++ domain')
else ("'" ++ obfuscateString txt ++ "'",
txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")")
+ (_, classNames, _) = attr
+ classNamesStr = concatMap (' ':) classNames
in case meth of
ReferenceObfuscation ->
-- need to use preEscapedString or &'s are escaped to &amp; in URL
- preEscapedString $ "<a href=\"" ++ (obfuscateString s')
- ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>"
+ return $
+ preEscapedString $ "<a href=\"" ++ obfuscateString s'
+ ++ "\" class=\"email\">" ++ obfuscateString txt ++ "</a>"
JavascriptObfuscation ->
+ return $
(H.script ! A.type_ "text/javascript" $
preEscapedString ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name' ++ "';e=n+a+h;\n" ++
- "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" ++
+ classNamesStr ++ "\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >>
H.noscript (preEscapedString $ obfuscateString altText)
- _ -> error $ "Unknown obfuscation method: " ++ show meth
+ _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth
_ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email
-- | Obfuscate character as entity.
@@ -388,36 +549,71 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . fromEntities
-addAttrs :: WriterOptions -> Attr -> Html -> Html
-addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr)
+-- | Create HTML tag with attributes.
+tagWithAttributes :: WriterOptions
+ -> Bool -- ^ True for HTML5
+ -> Bool -- ^ True if self-closing tag
+ -> Text -- ^ Tag text
+ -> Attr -- ^ Pandoc style tag attributes
+ -> Text
+tagWithAttributes opts html5 selfClosing tagname attr =
+ let mktag = (TL.toStrict . renderHtml <$> evalStateT
+ (addAttrs opts attr (customLeaf (textTag tagname) selfClosing))
+ defaultWriterState{ stHtml5 = html5 })
+ in case runPure mktag of
+ Left _ -> mempty
+ Right t -> t
-toAttrs :: [(String, String)] -> [Attribute]
-toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs
+addAttrs :: PandocMonad m
+ => WriterOptions -> Attr -> Html -> StateT WriterState m Html
+addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr
-attrsToHtml :: WriterOptions -> Attr -> [Attribute]
-attrsToHtml opts (id',classes',keyvals) =
- [prefixedId opts id' | not (null id')] ++
- [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals
+toAttrs :: PandocMonad m
+ => [(String, String)] -> StateT WriterState m [Attribute]
+toAttrs kvs = do
+ html5 <- gets stHtml5
+ return $ map (\(x,y) ->
+ customAttribute
+ (fromString (if not html5 || x `Set.member` html5Attributes
+ || "data-" `isPrefixOf` x
+ then x
+ else "data-" ++ x)) (toValue y)) kvs
-imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute]
-imgAttrsToHtml opts attr =
- attrsToHtml opts (ident,cls,kvs') ++
- toAttrs (dimensionsToAttrList opts attr)
+attrsToHtml :: PandocMonad m
+ => WriterOptions -> Attr -> StateT WriterState m [Attribute]
+attrsToHtml opts (id',classes',keyvals) = do
+ attrs <- toAttrs keyvals
+ return $
+ [prefixedId opts id' | not (null id')] ++
+ [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs
+
+imgAttrsToHtml :: PandocMonad m
+ => WriterOptions -> Attr -> StateT WriterState m [Attribute]
+imgAttrsToHtml opts attr = do
+ attrs <- attrsToHtml opts (ident,cls,kvs')
+ dimattrs <- toAttrs (dimensionsToAttrList attr)
+ return $ attrs ++ dimattrs
where
(ident,cls,kvs) = attr
kvs' = filter isNotDim kvs
isNotDim ("width", _) = False
isNotDim ("height", _) = False
- isNotDim _ = True
+ isNotDim _ = True
-dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)]
-dimensionsToAttrList opts attr = (go Width) ++ (go Height)
+dimensionsToAttrList :: Attr -> [(String, String)]
+dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height
where
- go dir = case (dimension dir attr) of
- (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))]
- (Just dim) -> [(show dir, showInPixel opts dim)]
- _ -> []
-
+ consolidateStyles :: [(String, String)] -> [(String, String)]
+ consolidateStyles xs =
+ case partition isStyle xs of
+ ([], _) -> xs
+ (ss, rest) -> ("style", intercalate ";" $ map snd ss) : rest
+ isStyle ("style", _) = True
+ isStyle _ = False
+ go dir = case dimension dir attr of
+ (Just (Pixel a)) -> [(show dir, show a)]
+ (Just x) -> [("style", show dir ++ ":" ++ show x)]
+ Nothing -> []
imageExts :: [String]
imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
@@ -427,71 +623,121 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
treatAsImage :: FilePath -> Bool
treatAsImage fp =
- let path = case uriPath `fmap` parseURIReference fp of
- Nothing -> fp
- Just up -> up
+ let path = maybe fp uriPath (parseURIReference fp)
ext = map toLower $ drop 1 $ takeExtension path
in null ext || ext `elem` imageExts
--- | Convert Pandoc block element to HTML.
-blockToHtml :: WriterOptions -> Block -> State WriterState Html
-blockToHtml _ Null = return mempty
-blockToHtml opts (Plain lst) = inlineListToHtml opts lst
--- title beginning with fig: indicates that the image is a figure
-blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do
+figure :: PandocMonad m
+ => WriterOptions -> Attr -> [Inline] -> (String, String)
+ -> StateT WriterState m Html
+figure opts attr txt (s,tit) = do
img <- inlineToHtml opts (Image attr txt (s,tit))
- let tocapt = if writerHtml5 opts
+ html5 <- gets stHtml5
+ let tocapt = if html5
then H5.figcaption
else H.p ! A.class_ "caption"
capt <- if null txt
then return mempty
else tocapt `fmap` inlineListToHtml opts txt
- return $ if writerHtml5 opts
+ return $ if html5
then H5.figure $ mconcat
[nl opts, img, capt, nl opts]
else H.div ! A.class_ "figure" $ mconcat
[nl opts, img, nl opts, capt, nl opts]
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
+blockToHtml _ Null = return mempty
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)])
+ | "stretch" `elem` classes = do
+ slideVariant <- gets stSlideVariant
+ case slideVariant of
+ RevealJsSlides ->
+ -- a "stretched" image in reveal.js must be a direct child
+ -- of the slide container
+ inlineToHtml opts (Image attr txt (src, tit))
+ _ -> figure opts attr txt (src, tit)
+-- title beginning with fig: indicates that the image is a figure
+blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) =
+ figure opts attr txt (s,tit)
blockToHtml opts (Para lst)
| isEmptyRaw lst = return mempty
+ | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty
| otherwise = do
contents <- inlineListToHtml opts lst
return $ H.p contents
where
- isEmptyRaw [RawInline f _] = f /= (Format "html")
- isEmptyRaw _ = False
+ isEmptyRaw [RawInline f _] = f `notElem` [Format "html",
+ Format "html4", Format "html5"]
+ isEmptyRaw _ = False
blockToHtml opts (LineBlock lns) =
if writerWrapText opts == WrapNone
then blockToHtml opts $ linesToPara lns
else do
- let lf = preEscapedString "\n"
- htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns
- return $ H.div ! A.style "white-space: pre-line;" $ htmlLines
-blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do
+ htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns
+ return $ H.div ! A.class_ "line-block" $ htmlLines
+blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
+ html5 <- gets stHtml5
+ slideVariant <- gets stSlideVariant
+ let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++
+ [("style", "width:" ++ w ++ ";")
+ | ("width",w) <- kvs', "column" `elem` classes]
let speakerNotes = "notes" `elem` classes
-- we don't want incremental output inside speaker notes, see #1394
- let opts' = if speakerNotes then opts{ writerIncremental = False } else opts
- contents <- blockListToHtml opts' bs
+ let opts' = if | speakerNotes -> opts{ writerIncremental = False }
+ | "incremental" `elem` classes -> opts{ writerIncremental = True }
+ | "nonincremental" `elem` classes -> opts{ writerIncremental = False }
+ | otherwise -> opts
+ -- we remove "incremental" and "nonincremental" if we're in a
+ -- slide presentaiton format.
+ classes' = case slideVariant of
+ NoSlides -> classes
+ _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes
+ contents <- if "columns" `elem` classes'
+ then -- we don't use blockListToHtml because it inserts
+ -- a newline between the column divs, which throws
+ -- off widths! see #4028
+ mconcat <$> mapM (blockToHtml opts) bs
+ else blockListToHtml opts' bs
let contents' = nl opts >> contents >> nl opts
- let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes
- then (H5.section, filter (/= "section") classes)
- else (H.div, classes)
- return $
- if speakerNotes
- then case writerSlideVariant opts of
- RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
- DZSlides -> (addAttrs opts' attr $ H5.div $ contents')
- ! (H5.customAttribute "role" "note")
- NoSlides -> addAttrs opts' attr $ H.div $ contents'
- _ -> mempty
- else addAttrs opts (ident, classes', kvs) $ divtag $ contents'
-blockToHtml opts (RawBlock f str)
- | f == Format "html" = return $ preEscapedString str
- | (f == Format "latex" || f == Format "tex") &&
- allowsMathEnvironments (writerHTMLMathMethod opts) &&
- isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str]
- | otherwise = return mempty
-blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
+ let (divtag, classes'') = if html5 && "section" `elem` classes'
+ then (H5.section, filter (/= "section") classes')
+ else (H.div, classes')
+ if speakerNotes
+ then case slideVariant of
+ RevealJsSlides -> addAttrs opts' attr $
+ H5.aside contents'
+ DZSlides -> do
+ t <- addAttrs opts' attr $
+ H5.div contents'
+ return $ t ! H5.customAttribute "role" "note"
+ NoSlides -> addAttrs opts' attr $
+ H.div contents'
+ _ -> return mempty
+ else addAttrs opts (ident, classes'', kvs) $
+ divtag contents'
+blockToHtml opts (RawBlock f str) = do
+ ishtml <- isRawHtml f
+ if ishtml
+ then return $ preEscapedString str
+ else if (f == Format "latex" || f == Format "tex") &&
+ allowsMathEnvironments (writerHTMLMathMethod opts) &&
+ isMathEnvironment str
+ then blockToHtml opts $ Plain [Math DisplayMath str]
+ else do
+ report $ BlockNotRendered (RawBlock f str)
+ return mempty
+blockToHtml _ HorizontalRule = do
+ html5 <- gets stHtml5
+ return $ if html5 then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
+ id'' <- if null id'
+ then do
+ modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 }
+ codeblocknum <- gets stCodeBlockNum
+ return ("cb" ++ show codeblocknum)
+ else return id'
let tolhs = isEnabled Ext_literate_haskell opts &&
any (\c -> map toLower c == "haskell") classes &&
any (\c -> map toLower c == "literate") classes
@@ -503,19 +749,24 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
adjCode = if tolhs
then unlines . map ("> " ++) . lines $ rawCode
else rawCode
- hlCode = if writerHighlight opts -- check highlighting options
- then highlight formatHtmlBlock (id',classes',keyvals) adjCode
- else Nothing
+ hlCode = if isJust (writerHighlightStyle opts)
+ then highlight (writerSyntaxMap opts) formatHtmlBlock
+ (id'',classes',keyvals) adjCode
+ else Left ""
case hlCode of
- Nothing -> return $ addAttrs opts (id',classes,keyvals)
- $ H.pre $ H.code $ toHtml adjCode
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (addAttrs opts (id',[],keyvals) h)
-blockToHtml opts (BlockQuote blocks) =
+ Left msg -> do
+ unless (null msg) $
+ report $ CouldNotHighlight msg
+ addAttrs opts (id',classes,keyvals)
+ $ H.pre $ H.code $ toHtml adjCode
+ Right h -> modify (\st -> st{ stHighlighting = True }) >>
+ addAttrs opts (id'',[],keyvals) h
+blockToHtml opts (BlockQuote blocks) = do
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
- if writerSlideVariant opts /= NoSlides
+ slideVariant <- gets stSlideVariant
+ if slideVariant /= NoSlides
then let inc = not (writerIncremental opts) in
case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
@@ -541,7 +792,7 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do
$ showSecNum secnum) >> strToHtml " " >> contents
else contents
inElement <- gets stElement
- return $ (if inElement then id else addAttrs opts attr)
+ (if inElement then return else addAttrs opts attr)
$ case level of
1 -> H.h1 contents'
2 -> H.h2 contents'
@@ -552,20 +803,17 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do
_ -> H.p contents'
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
- return $ unordList opts contents
+ unordList opts contents
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (blockListToHtml opts) lst
+ html5 <- gets stHtml5
let numstyle' = case numstyle of
Example -> "decimal"
_ -> camelCaseToHyphenated $ show numstyle
- let attribs = (if startnum /= 1
- then [A.start $ toValue startnum]
- else []) ++
- (if numstyle == Example
- then [A.class_ "example"]
- else []) ++
+ let attribs = [A.start $ toValue startnum | startnum /= 1] ++
+ [A.class_ "example" | numstyle == Example] ++
(if numstyle /= DefaultStyle
- then if writerHtml5 opts
+ then if html5
then [A.type_ $
case numstyle of
Decimal -> "1"
@@ -577,23 +825,25 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
else [A.style $ toValue $ "list-style-type: " ++
numstyle']
else [])
- return $ foldl (!) (ordList opts contents) attribs
+ l <- ordList opts contents
+ return $ foldl (!) l attribs
blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
do term' <- if null term
then return mempty
else liftM H.dt $ inlineListToHtml opts term
- defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
+ defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) .
blockListToHtml opts) defs
return $ mconcat $ nl opts : term' : nl opts :
intersperse (nl opts) defs') lst
- return $ defList opts contents
+ defList opts contents
blockToHtml opts (Table capt aligns widths headers rows') = do
captionDoc <- if null capt
then return mempty
else do
cs <- inlineListToHtml opts capt
return $ H.caption cs >> nl opts
+ html5 <- gets stHtml5
let percent w = show (truncate (100*w) :: Integer) ++ "%"
let coltags = if all (== 0.0) widths
then mempty
@@ -601,7 +851,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do
H.colgroup $ do
nl opts
mapM_ (\w -> do
- if writerHtml5 opts
+ if html5
then H.col ! A.style (toValue $ "width: " ++
percent w)
else H.col ! A.width (toValue $ percent w)
@@ -624,18 +874,19 @@ blockToHtml opts (Table capt aligns widths headers rows') = do
else tbl ! A.style (toValue $ "width:" ++
show (round (totalWidth * 100) :: Int) ++ "%;")
-tableRowToHtml :: WriterOptions
+tableRowToHtml :: PandocMonad m
+ => WriterOptions
-> [Alignment]
-> Int
-> [[Block]]
- -> State WriterState Html
+ -> StateT WriterState m Html
tableRowToHtml opts aligns rownum cols' = do
let mkcell = if rownum == 0 then H.th else H.td
let rowclass = case rownum of
- 0 -> "header"
+ 0 -> "header"
x | x `rem` 2 == 1 -> "odd"
- _ -> "even"
- cols'' <- sequence $ zipWith
+ _ -> "even"
+ cols'' <- zipWithM
(\alignment item -> tableItemToHtml opts mkcell alignment item)
aligns cols'
return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'')
@@ -648,21 +899,23 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> ""
-tableItemToHtml :: WriterOptions
+tableItemToHtml :: PandocMonad m
+ => WriterOptions
-> (Html -> Html)
-> Alignment
-> [Block]
- -> State WriterState Html
+ -> StateT WriterState m Html
tableItemToHtml opts tag' align' item = do
contents <- blockListToHtml opts item
+ html5 <- gets stHtml5
let alignStr = alignmentToString align'
- let attribs = if writerHtml5 opts
+ let attribs = if html5
then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
else A.align (toValue alignStr)
let tag'' = if null alignStr
then tag'
else tag' ! attribs
- return $ (tag'' $ contents) >> nl opts
+ return $ tag'' contents >> nl opts
toListItems :: WriterOptions -> [Html] -> [Html]
toListItems opts items = map (toListItem opts) items ++ [nl opts]
@@ -670,12 +923,16 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts]
toListItem :: WriterOptions -> Html -> Html
toListItem opts item = nl opts >> H.li item
-blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
+blockListToHtml :: PandocMonad m
+ => WriterOptions -> [Block] -> StateT WriterState m Html
blockListToHtml opts lst =
- fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst
+ (mconcat . intersperse (nl opts) . filter nonempty)
+ <$> mapM (blockToHtml opts) lst
+ where nonempty (Empty _) = False
+ nonempty _ = True
-- | Convert list of Pandoc inline elements to HTML.
-inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
+inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html
inlineListToHtml opts lst =
mapM (inlineToHtml opts) lst >>= return . mconcat
@@ -684,9 +941,9 @@ annotateMML :: XML.Element -> String -> XML.Element
annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)])
where
cs = case elChildren e of
- [] -> unode "mrow" ()
+ [] -> unode "mrow" ()
[x] -> x
- xs -> unode "mrow" xs
+ xs -> unode "mrow" xs
math childs = XML.Element q as [XML.Elem childs] l
where
(XML.Element q as _ l) = e
@@ -694,27 +951,29 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs,
-- | Convert Pandoc inline element to HTML.
-inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
-inlineToHtml opts inline =
+inlineToHtml :: PandocMonad m
+ => WriterOptions -> Inline -> StateT WriterState m Html
+inlineToHtml opts inline = do
+ html5 <- gets stHtml5
case inline of
(Str str) -> return $ strToHtml str
- (Space) -> return $ strToHtml " "
- (SoftBreak) -> return $ case writerWrapText opts of
- WrapNone -> preEscapedString " "
- WrapAuto -> preEscapedString " "
- WrapPreserve -> preEscapedString "\n"
- (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br)
+ Space -> return $ strToHtml " "
+ SoftBreak -> return $ case writerWrapText opts of
+ WrapNone -> preEscapedString " "
+ WrapAuto -> preEscapedString " "
+ WrapPreserve -> preEscapedString "\n"
+ LineBreak -> return $ (if html5 then H5.br else H.br)
<> strToHtml "\n"
(Span (id',classes,kvs) ils)
-> inlineListToHtml opts ils >>=
- return . addAttrs opts attr' . H.span
+ addAttrs opts attr' . H.span
where attr' = (id',classes',kvs')
classes' = filter (`notElem` ["csl-no-emph",
"csl-no-strong",
"csl-no-smallcaps"]) classes
kvs' = if null styles
then kvs
- else (("style", concat styles) : kvs)
+ else ("style", concat styles) : kvs
styles = ["font-style:normal;"
| "csl-no-emph" `elem` classes]
++ ["font-weight:normal;"
@@ -724,20 +983,23 @@ inlineToHtml opts inline =
(Emph lst) -> inlineListToHtml opts lst >>= return . H.em
(Strong lst) -> inlineListToHtml opts lst >>= return . H.strong
(Code attr str) -> case hlCode of
- Nothing -> return
- $ addAttrs opts attr
- $ H.code $ strToHtml str
- Just h -> do
+ Left msg -> do
+ unless (null msg) $
+ report $ CouldNotHighlight msg
+ addAttrs opts attr $ H.code $ strToHtml str
+ Right h -> do
modify $ \st -> st{ stHighlighting = True }
- return $ addAttrs opts (id',[],keyvals) h
+ addAttrs opts (id',[],keyvals) h
where (id',_,keyvals) = attr
- hlCode = if writerHighlight opts
- then highlight formatHtmlInline attr str
- else Nothing
+ hlCode = if isJust (writerHighlightStyle opts)
+ then highlight
+ (writerSyntaxMap opts)
+ formatHtmlInline attr str
+ else Left ""
(Strikeout lst) -> inlineListToHtml opts lst >>=
return . H.del
(SmallCaps lst) -> inlineListToHtml opts lst >>=
- return . (H.span ! A.style "font-variant: small-caps;")
+ return . (H.span ! A.class_ "smallcaps")
(Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup
(Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub
(Quoted quoteType lst) ->
@@ -768,15 +1030,15 @@ inlineToHtml opts inline =
JsMath _ -> do
let m = preEscapedString str
return $ case t of
- InlineMath -> H.span ! A.class_ mathClass $ m
+ InlineMath -> H.span ! A.class_ mathClass $ m
DisplayMath -> H.div ! A.class_ mathClass $ m
WebTeX url -> do
- let imtag = if writerHtml5 opts then H5.img else H.img
+ let imtag = if html5 then H5.img else H.img
let m = imtag ! A.style "vertical-align:middle"
! A.src (toValue $ url ++ urlEncode str)
! A.alt (toValue str)
! A.title (toValue str)
- let brtag = if writerHtml5 opts then H5.br else H.br
+ let brtag = if html5 then H5.br else H.br
return $ case t of
InlineMath -> m
DisplayMath -> brtag >> m >> brtag
@@ -784,103 +1046,124 @@ inlineToHtml opts inline =
return $ case t of
InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
- MathML _ -> do
- let dt = if t == InlineMath
- then DisplayInline
- else DisplayBlock
+ MathML -> do
let conf = useShortEmptyTags (const False)
defaultConfigPP
- case writeMathML dt <$> readTeX str of
+ res <- lift $ convertMath writeMathML t str
+ case res of
Right r -> return $ preEscapedString $
ppcElement conf (annotateMML r str)
- Left _ -> inlineListToHtml opts
- (texMathToInlines t str) >>=
- return . (H.span ! A.class_ mathClass)
+ Left il -> (H.span ! A.class_ mathClass) <$>
+ inlineToHtml opts il
MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $
case t of
InlineMath -> "\\(" ++ str ++ "\\)"
DisplayMath -> "\\[" ++ str ++ "\\]"
- KaTeX _ _ -> return $ H.span ! A.class_ mathClass $
- toHtml (case t of
- InlineMath -> str
- DisplayMath -> "\\displaystyle " ++ str)
+ KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $
+ case t of
+ InlineMath -> "\\(" ++ str ++ "\\)"
+ DisplayMath -> "\\[" ++ str ++ "\\]"
PlainMath -> do
- x <- inlineListToHtml opts (texMathToInlines t str)
+ x <- lift (texMathToInlines t str) >>= inlineListToHtml opts
let m = H.span ! A.class_ mathClass $ x
- let brtag = if writerHtml5 opts then H5.br else H.br
+ let brtag = if html5 then H5.br else H.br
return $ case t of
InlineMath -> m
DisplayMath -> brtag >> m >> brtag
- (RawInline f str)
- | f == Format "html" -> return $ preEscapedString str
- | otherwise -> return mempty
+ (RawInline f str) -> do
+ ishtml <- isRawHtml f
+ if ishtml
+ then return $ preEscapedString str
+ else if (f == Format "latex" || f == Format "tex") &&
+ "\\begin" `isPrefixOf` str &&
+ allowsMathEnvironments (writerHTMLMathMethod opts) &&
+ isMathEnvironment str
+ then inlineToHtml opts $ Math DisplayMath str
+ else do
+ report $ InlineNotRendered inline
+ return mempty
(Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
- return $ obfuscateLink opts attr linkText s
- (Link attr txt (s,tit)) -> do
+ obfuscateLink opts attr linkText s
+ (Link (ident,classes,kvs) txt (s,tit)) -> do
linkText <- inlineListToHtml opts txt
+ slideVariant <- gets stSlideVariant
let s' = case s of
- '#':xs | writerSlideVariant opts ==
- RevealJsSlides -> '#':'/':xs
- _ -> s
+ '#':xs -> let prefix = if slideVariant == RevealJsSlides
+ then "/"
+ else writerIdentifierPrefix opts
+ in '#' : prefix ++ xs
+ _ -> s
let link = H.a ! A.href (toValue s') $ linkText
- let link' = if txt == [Str (unEscapeString s)]
- then link ! A.class_ "uri"
- else link
- let link'' = addAttrs opts attr link'
+ let attr = if txt == [Str (unEscapeString s)]
+ then (ident, "uri" : classes, kvs)
+ else (ident, classes, kvs)
+ link' <- addAttrs opts attr link
return $ if null tit
- then link''
- else link'' ! A.title (toValue tit)
+ then link'
+ else link' ! A.title (toValue tit)
(Image attr txt (s,tit)) | treatAsImage s -> do
let alternate' = stringify txt
- let attributes = [A.src $ toValue s] ++
- [A.title $ toValue tit | not (null tit)] ++
- [A.alt $ toValue alternate' | not (null txt)] ++
- imgAttrsToHtml opts attr
- let tag = if writerHtml5 opts then H5.img else H.img
+ slideVariant <- gets stSlideVariant
+ let isReveal = slideVariant == RevealJsSlides
+ attrs <- imgAttrsToHtml opts attr
+ let attributes =
+ -- reveal.js uses data-src for lazy loading
+ (if isReveal
+ then customAttribute "data-src" $ toValue s
+ else A.src $ toValue s) :
+ [A.title $ toValue tit | not (null tit)] ++
+ [A.alt $ toValue alternate' | not (null txt)] ++
+ attrs
+ let tag = if html5 then H5.img else H.img
return $ foldl (!) tag attributes
-- note: null title included, as in Markdown.pl
(Image attr _ (s,tit)) -> do
- let attributes = [A.src $ toValue s] ++
- [A.title $ toValue tit | not (null tit)] ++
- imgAttrsToHtml opts attr
+ slideVariant <- gets stSlideVariant
+ let isReveal = slideVariant == RevealJsSlides
+ attrs <- imgAttrsToHtml opts attr
+ let attributes =
+ (if isReveal
+ then customAttribute "data-src" $ toValue s
+ else A.src $ toValue s) :
+ [A.title $ toValue tit | not (null tit)] ++
+ attrs
return $ foldl (!) H5.embed attributes
-- note: null title included, as in Markdown.pl
- (Note contents)
- | writerIgnoreNotes opts -> return mempty
- | otherwise -> do
+ (Note contents) -> do
notes <- gets stNotes
- let number = (length notes) + 1
+ let number = length notes + 1
let ref = show number
htmlContents <- blockListToNote opts ref contents
+ epubVersion <- gets stEPUBVersion
-- push contents onto front of notes
- modify $ \st -> st {stNotes = (htmlContents:notes)}
- let revealSlash = ['/' | writerSlideVariant opts
- == RevealJsSlides]
+ modify $ \st -> st {stNotes = htmlContents:notes}
+ slideVariant <- gets stSlideVariant
+ let revealSlash = ['/' | slideVariant == RevealJsSlides]
let link = H.a ! A.href (toValue $ "#" ++
revealSlash ++
writerIdentifierPrefix opts ++ "fn" ++ ref)
- ! A.class_ "footnoteRef"
+ ! A.class_ "footnote-ref"
! prefixedId opts ("fnref" ++ ref)
- $ (if isJust (writerEpubVersion opts)
+ $ (if isJust epubVersion
then id
else H.sup)
$ toHtml ref
- return $ case writerEpubVersion opts of
+ return $ case epubVersion of
Just EPUB3 -> link ! customAttribute "epub:type" "noteref"
_ -> link
(Cite cits il)-> do contents <- inlineListToHtml opts il
let citationIds = unwords $ map citationId cits
let result = H.span ! A.class_ "citation" $ contents
- return $ if writerHtml5 opts
+ return $ if html5
then result ! customAttribute "data-cites" (toValue citationIds)
else result
-blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
+blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html
blockListToNote opts ref blocks =
-- If last block is Para or Plain, include the backlink at the end of
-- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])]
+ let backlink = [Link ("",["footnote-back"],[]) [Str "↩"] ("#" ++ "fnref" ++ ref,[])]
blocks' = if null blocks
then []
else let lastBlock = last blocks
@@ -893,23 +1176,13 @@ blockListToNote opts ref blocks =
_ -> otherBlocks ++ [lastBlock,
Plain backlink]
in do contents <- blockListToHtml opts blocks'
- let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents
- let noteItem' = case writerEpubVersion opts of
+ let noteItem = H.li ! prefixedId opts ("fn" ++ ref) $ contents
+ epubVersion <- gets stEPUBVersion
+ let noteItem' = case epubVersion of
Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote"
_ -> noteItem
return $ nl opts >> noteItem'
--- Javascript snippet to render all KaTeX elements
-renderKaTeX :: String
-renderKaTeX = unlines [
- "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");"
- , "for (var i=0; i < mathElements.length; i++)"
- , "{"
- , " var texText = mathElements[i].firstChild"
- , " katex.render(texText.data, mathElements[i])"
- , "}}"
- ]
-
isMathEnvironment :: String -> Bool
isMathEnvironment s = "\\begin{" `isPrefixOf` s &&
envName `elem` mathmlenvs
@@ -944,6 +1217,219 @@ isMathEnvironment s = "\\begin{" `isPrefixOf` s &&
allowsMathEnvironments :: HTMLMathMethod -> Bool
allowsMathEnvironments (MathJax _) = True
-allowsMathEnvironments (MathML _) = True
+allowsMathEnvironments MathML = True
allowsMathEnvironments (WebTeX _) = True
allowsMathEnvironments _ = False
+
+isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool
+isRawHtml f = do
+ html5 <- gets stHtml5
+ return $ f == Format "html" ||
+ ((html5 && f == Format "html5") || f == Format "html4")
+
+html5Attributes :: Set.Set String
+html5Attributes = Set.fromList
+ [ "abbr"
+ , "accept"
+ , "accept-charset"
+ , "accesskey"
+ , "action"
+ , "allowfullscreen"
+ , "allowpaymentrequest"
+ , "allowusermedia"
+ , "alt"
+ , "as"
+ , "async"
+ , "autocomplete"
+ , "autofocus"
+ , "autoplay"
+ , "charset"
+ , "checked"
+ , "cite"
+ , "class"
+ , "color"
+ , "cols"
+ , "colspan"
+ , "content"
+ , "contenteditable"
+ , "controls"
+ , "coords"
+ , "crossorigin"
+ , "data"
+ , "datetime"
+ , "default"
+ , "defer"
+ , "dir"
+ , "dirname"
+ , "disabled"
+ , "download"
+ , "draggable"
+ , "enctype"
+ , "for"
+ , "form"
+ , "formaction"
+ , "formenctype"
+ , "formmethod"
+ , "formnovalidate"
+ , "formtarget"
+ , "headers"
+ , "height"
+ , "hidden"
+ , "high"
+ , "href"
+ , "hreflang"
+ , "http-equiv"
+ , "id"
+ , "inputmode"
+ , "integrity"
+ , "is"
+ , "ismap"
+ , "itemid"
+ , "itemprop"
+ , "itemref"
+ , "itemscope"
+ , "itemtype"
+ , "kind"
+ , "label"
+ , "lang"
+ , "list"
+ , "loop"
+ , "low"
+ , "manifest"
+ , "max"
+ , "maxlength"
+ , "media"
+ , "method"
+ , "min"
+ , "minlength"
+ , "multiple"
+ , "muted"
+ , "name"
+ , "nomodule"
+ , "nonce"
+ , "novalidate"
+ , "onabort"
+ , "onafterprint"
+ , "onauxclick"
+ , "onbeforeprint"
+ , "onbeforeunload"
+ , "onblur"
+ , "oncancel"
+ , "oncanplay"
+ , "oncanplaythrough"
+ , "onchange"
+ , "onclick"
+ , "onclose"
+ , "oncontextmenu"
+ , "oncopy"
+ , "oncuechange"
+ , "oncut"
+ , "ondblclick"
+ , "ondrag"
+ , "ondragend"
+ , "ondragenter"
+ , "ondragexit"
+ , "ondragleave"
+ , "ondragover"
+ , "ondragstart"
+ , "ondrop"
+ , "ondurationchange"
+ , "onemptied"
+ , "onended"
+ , "onerror"
+ , "onfocus"
+ , "onhashchange"
+ , "oninput"
+ , "oninvalid"
+ , "onkeydown"
+ , "onkeypress"
+ , "onkeyup"
+ , "onlanguagechange"
+ , "onload"
+ , "onloadeddata"
+ , "onloadedmetadata"
+ , "onloadend"
+ , "onloadstart"
+ , "onmessage"
+ , "onmessageerror"
+ , "onmousedown"
+ , "onmouseenter"
+ , "onmouseleave"
+ , "onmousemove"
+ , "onmouseout"
+ , "onmouseover"
+ , "onmouseup"
+ , "onoffline"
+ , "ononline"
+ , "onpagehide"
+ , "onpageshow"
+ , "onpaste"
+ , "onpause"
+ , "onplay"
+ , "onplaying"
+ , "onpopstate"
+ , "onprogress"
+ , "onratechange"
+ , "onrejectionhandled"
+ , "onreset"
+ , "onresize"
+ , "onscroll"
+ , "onsecuritypolicyviolation"
+ , "onseeked"
+ , "onseeking"
+ , "onselect"
+ , "onstalled"
+ , "onstorage"
+ , "onsubmit"
+ , "onsuspend"
+ , "ontimeupdate"
+ , "ontoggle"
+ , "onunhandledrejection"
+ , "onunload"
+ , "onvolumechange"
+ , "onwaiting"
+ , "onwheel"
+ , "open"
+ , "optimum"
+ , "pattern"
+ , "ping"
+ , "placeholder"
+ , "playsinline"
+ , "poster"
+ , "preload"
+ , "readonly"
+ , "referrerpolicy"
+ , "rel"
+ , "required"
+ , "reversed"
+ , "rows"
+ , "rowspan"
+ , "sandbox"
+ , "scope"
+ , "selected"
+ , "shape"
+ , "size"
+ , "sizes"
+ , "slot"
+ , "span"
+ , "spellcheck"
+ , "src"
+ , "srcdoc"
+ , "srclang"
+ , "srcset"
+ , "start"
+ , "step"
+ , "style"
+ , "tabindex"
+ , "target"
+ , "title"
+ , "translate"
+ , "type"
+ , "typemustmatch"
+ , "updateviacache"
+ , "usemap"
+ , "value"
+ , "width"
+ , "workertype"
+ , "wrap"
+ ]
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 29fdafe15..688c1f390 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
{-
-Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2014-2015, 2017-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Haddock
- Copyright : Copyright (C) 2014 John MacFarlane
+ Copyright : Copyright (C) 2014-2015,2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,17 +33,19 @@ Conversion of 'Pandoc' documents to haddock markup.
Haddock: <http://www.haskell.org/haddock/doc/html/>
-}
module Text.Pandoc.Writers.Haddock (writeHaddock) where
+import Control.Monad.State.Strict
+import Data.Default
+import Data.List (intersperse, transpose)
+import Data.Text (Text)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Data.List ( intersperse, transpose )
import Text.Pandoc.Pretty
-import Control.Monad.State
-import Text.Pandoc.Readers.TeXMath (texMathToInlines)
-import Network.URI (isURI)
-import Data.Default
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.Writers.Shared
type Notes = [[Block]]
data WriterState = WriterState { stNotes :: Notes }
@@ -49,13 +53,14 @@ instance Default WriterState
where def = WriterState{ stNotes = [] }
-- | Convert Pandoc to Haddock.
-writeHaddock :: WriterOptions -> Pandoc -> String
+writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeHaddock opts document =
- evalState (pandocToHaddock opts{
+ evalStateT (pandocToHaddock opts{
writerWrapText = writerWrapText opts } document) def
-- | Return haddock representation of document.
-pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String
+pandocToHaddock :: PandocMonad m
+ => WriterOptions -> Pandoc -> StateT WriterState m Text
pandocToHaddock opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
@@ -63,22 +68,22 @@ pandocToHaddock opts (Pandoc meta blocks) = do
body <- blockListToHaddock opts blocks
st <- get
notes' <- notesToHaddock opts (reverse $ stNotes st)
- let render' :: Doc -> String
+ let render' :: Doc -> Text
render' = render colwidth
let main = render' $ body <>
(if isEmpty notes' then empty else blankline <> notes')
metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToHaddock opts)
- (fmap (render colwidth) . inlineListToHaddock opts)
+ (fmap render' . blockListToHaddock opts)
+ (fmap render' . inlineListToHaddock opts)
meta
- let context = defField "body" main
- $ metadata
+ let context = defField "body" main metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return haddock representation of notes.
-notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToHaddock :: PandocMonad m
+ => WriterOptions -> [[Block]] -> StateT WriterState m Doc
notesToHaddock opts notes =
if null notes
then return empty
@@ -92,9 +97,10 @@ escapeString = escapeStringUsing haddockEscapes
where haddockEscapes = backslashEscapes "\\/'`\"@<"
-- | Convert Pandoc block element to haddock.
-blockToHaddock :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
+blockToHaddock :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> StateT WriterState m Doc
blockToHaddock _ Null = return empty
blockToHaddock opts (Div _ ils) = do
contents <- blockListToHaddock opts ils
@@ -110,10 +116,12 @@ blockToHaddock opts (Para inlines) =
(<> blankline) `fmap` blockToHaddock opts (Plain inlines)
blockToHaddock opts (LineBlock lns) =
blockToHaddock opts $ linesToPara lns
-blockToHaddock _ (RawBlock f str)
- | f == "haddock" = do
+blockToHaddock _ b@(RawBlock f str)
+ | f == "haddock" =
return $ text str <> text "\n"
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToHaddock opts HorizontalRule =
return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline
blockToHaddock opts (Header level (ident,_,_) inlines) = do
@@ -141,16 +149,16 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do
isPlainBlock _ = False
let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
(nst,tbl) <- case True of
- _ | isSimple -> fmap (nest 2,) $
+ _ | isSimple -> (nest 2,) <$>
pandocTable opts (all null headers) aligns widths
rawHeaders rawRows
- | not hasBlocks -> fmap (nest 2,) $
+ | not hasBlocks -> (nest 2,) <$>
pandocTable opts (all null headers) aligns widths
rawHeaders rawRows
- | otherwise -> fmap (id,) $
- gridTable opts (all null headers) aligns widths
- rawHeaders rawRows
- return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline
+ | otherwise -> (id,) <$>
+ gridTable opts blockListToHaddock
+ (all null headers) aligns widths headers rows
+ return $ prefixed "> " (nst $ tbl $$ blankline $$ caption'') $$ blankline
blockToHaddock opts (BulletList items) = do
contents <- mapM (bulletListItemToHaddock opts) items
return $ cat contents <> blankline
@@ -160,15 +168,15 @@ blockToHaddock opts (OrderedList (start,_,delim) items) = do
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $
- zip markers' items
+ contents <- zipWithM (orderedListItemToHaddock opts) markers' items
return $ cat contents <> blankline
blockToHaddock opts (DefinitionList items) = do
contents <- mapM (definitionListItemToHaddock opts) items
return $ cat contents <> blankline
-pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> State WriterState Doc
+pandocTable :: PandocMonad m
+ => WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> StateT WriterState m Doc
pandocTable opts headless aligns widths rawHeaders rawRows = do
let isSimple = all (==0) widths
let alignHeader alignment = case alignment of
@@ -184,18 +192,17 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do
(floor . (fromIntegral (writerColumns opts) *))
widths
let makeRow = hcat . intersperse (lblock 1 (text " ")) .
- (zipWith3 alignHeader aligns widthsInChars)
+ zipWith3 alignHeader aligns widthsInChars
let rows' = map makeRow rawRows
let head' = makeRow rawHeaders
let maxRowHeight = maximum $ map height (head':rows')
let underline = cat $ intersperse (text " ") $
map (\width -> text (replicate width '-')) widthsInChars
- let border = if maxRowHeight > 1
- then text (replicate (sum widthsInChars +
- length widthsInChars - 1) '-')
- else if headless
- then underline
- else empty
+ let border
+ | maxRowHeight > 1 = text (replicate (sum widthsInChars +
+ length widthsInChars - 1) '-')
+ | headless = underline
+ | otherwise = empty
let head'' = if headless
then empty
else border <> cr <> head'
@@ -207,35 +214,9 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do
else border
return $ head'' $$ underline $$ body $$ bottom
-gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> State WriterState Doc
-gridTable opts headless _aligns widths headers' rawRows = do
- let numcols = length headers'
- let widths' = if all (==0) widths
- then replicate numcols (1.0 / fromIntegral numcols)
- else widths
- let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths'
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = maximum (map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = chomp $ hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map (makeRow . map chomp) rawRows
- let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '+'
- let body = vcat $ intersperse (border '-') rows'
- let head'' = if headless
- then empty
- else head' $$ border '='
- return $ border '-' $$ head'' $$ body $$ border '-'
-
-- | Convert bullet list item (list of blocks) to haddock
-bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToHaddock :: PandocMonad m
+ => WriterOptions -> [Block] -> StateT WriterState m Doc
bulletListItemToHaddock opts items = do
contents <- blockListToHaddock opts items
let sps = replicate (writerTabStop opts - 2) ' '
@@ -250,22 +231,24 @@ bulletListItemToHaddock opts items = do
return $ hang (writerTabStop opts) start $ contents' <> cr
-- | Convert ordered list item (a list of blocks) to haddock
-orderedListItemToHaddock :: WriterOptions -- ^ options
- -> String -- ^ list item marker
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
+orderedListItemToHaddock :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> StateT WriterState m Doc
orderedListItemToHaddock opts marker items = do
contents <- blockListToHaddock opts items
let sps = case length marker - writerTabStop opts of
n | n > 0 -> text $ replicate n ' '
- _ -> text " "
+ _ -> text " "
let start = text marker <> sps
return $ hang (writerTabStop opts) start $ contents <> cr
-- | Convert definition list item (label, list of blocks) to haddock
-definitionListItemToHaddock :: WriterOptions
- -> ([Inline],[[Block]])
- -> State WriterState Doc
+definitionListItemToHaddock :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> StateT WriterState m Doc
definitionListItemToHaddock opts (label, defs) = do
labelText <- inlineListToHaddock opts label
defs' <- mapM (mapM (blockToHaddock opts)) defs
@@ -273,19 +256,22 @@ definitionListItemToHaddock opts (label, defs) = do
return $ nowrap (brackets labelText) <> cr <> contents <> cr
-- | Convert list of Pandoc block elements to haddock
-blockListToHaddock :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
+blockListToHaddock :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
blockListToHaddock opts blocks =
mapM (blockToHaddock opts) blocks >>= return . cat
-- | Convert list of Pandoc inline elements to haddock.
-inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToHaddock :: PandocMonad m
+ => WriterOptions -> [Inline] -> StateT WriterState m Doc
inlineListToHaddock opts lst =
mapM (inlineToHaddock opts) lst >>= return . cat
-- | Convert Pandoc inline element to haddock.
-inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc
+inlineToHaddock :: PandocMonad m
+ => WriterOptions -> Inline -> StateT WriterState m Doc
inlineToHaddock opts (Span (ident,_,_) ils) = do
contents <- inlineListToHaddock opts ils
if not (null ident) && null ils
@@ -315,18 +301,20 @@ inlineToHaddock opts (Quoted DoubleQuote lst) = do
return $ "“" <> contents <> "”"
inlineToHaddock _ (Code _ str) =
return $ "@" <> text (escapeString str) <> "@"
-inlineToHaddock _ (Str str) = do
+inlineToHaddock _ (Str str) =
return $ text $ escapeString str
inlineToHaddock opts (Math mt str) = do
let adjust x = case mt of
DisplayMath -> cr <> x <> cr
InlineMath -> x
- adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str)
-inlineToHaddock _ (RawInline f str)
+ adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts)
+inlineToHaddock _ il@(RawInline f str)
| f == "haddock" = return $ text str
- | otherwise = return empty
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
-- no line break in haddock (see above on CodeBlock)
-inlineToHaddock _ (LineBreak) = return cr
+inlineToHaddock _ LineBreak = return cr
inlineToHaddock opts SoftBreak =
case writerWrapText opts of
WrapAuto -> return space
@@ -339,7 +327,7 @@ inlineToHaddock _ (Link _ txt (src, _)) = do
let useAuto = isURI src &&
case txt of
[Str s] | escapeURI s == src -> True
- _ -> False
+ _ -> False
return $ nowrap $ "<" <> text src <>
(if useAuto then empty else space <> linktext) <> ">"
inlineToHaddock opts (Image attr alternate (source, tit)) = do
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index 8f0d21cf5..a5d851e40 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -1,8 +1,10 @@
-{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Writers.ICML
- Copyright : Copyright (C) 2013-2016 github.com/mb21
+ Copyright : Copyright (C) 2013-2018 github.com/mb21
License : GNU GPL, version 2 or above
Stability : alpha
@@ -14,20 +16,25 @@ InCopy is the companion word-processor to Adobe InDesign and ICML documents can
into InDesign with File -> Place.
-}
module Text.Pandoc.Writers.ICML (writeICML) where
+import Control.Monad.Except (catchError)
+import Control.Monad.State.Strict
+import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix)
+import Data.Maybe (fromMaybe)
+import qualified Data.Set as Set
+import Data.Text as Text (breakOnAll, pack)
+import Data.Text (Text)
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Readers.TeXMath (texMathToInlines)
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Shared (linesToPara, splitBy, fetchItem, warn)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse)
-import Data.Text as Text (breakOnAll, pack)
-import Control.Monad.State
-import Network.URI (isURI)
-import qualified Data.Set as Set
+import Text.Pandoc.Shared (isURI, linesToPara, splitBy)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML
type Style = [String]
type Hyperlink = [(Int, String)]
@@ -40,7 +47,7 @@ data WriterState = WriterState{
, maxListDepth :: Int
}
-type WS a = StateT WriterState IO a
+type WS m = StateT WriterState m
defaultWriterState :: WriterState
defaultWriterState = WriterState{
@@ -121,13 +128,13 @@ subListParName = "subParagraph"
footnoteName = "Footnote"
citeName = "Cite"
-
-- | Convert Pandoc document to string in ICML format.
-writeICML :: WriterOptions -> Pandoc -> IO String
+writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeICML opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ render' :: Doc -> Text
render' = render colwidth
renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState
metadata <- metaToJSON opts
@@ -139,18 +146,15 @@ writeICML opts (Pandoc meta blocks) = do
context = defField "body" main
$ defField "charStyles" (render' $ charStylesToDoc st)
$ defField "parStyles" (render' $ parStylesToDoc st)
- $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
- $ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Auxilary functions for parStylesToDoc and charStylesToDoc.
contains :: String -> (String, (String, String)) -> [(String, String)]
contains s rule =
- if isInfixOf (fst rule) s
- then [snd rule]
- else []
+ [snd rule | (fst rule) `isInfixOf` s]
-- | The monospaced font to use as default.
monospacedFont :: Doc
@@ -174,7 +178,7 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
where
makeStyle s =
let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str)
- attrs = concat $ map (contains s) $ [
+ attrs = concatMap (contains s) [
(defListTermName, ("BulletsAndNumberingListType", "BulletList"))
, (defListTermName, ("FontStyle", "Bold"))
, (tableHeaderName, ("FontStyle", "Bold"))
@@ -200,9 +204,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
where
numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)]
| otherwise = []
- listType | isOrderedList && (not $ isInfixOf subListParName s)
+ listType | isOrderedList && not (subListParName `isInfixOf` s)
= [("BulletsAndNumberingListType", "NumberedList")]
- | isBulletList && (not $ isInfixOf subListParName s)
+ | isBulletList && not (subListParName `isInfixOf` s)
= [("BulletsAndNumberingListType", "BulletList")]
| otherwise = []
indent = [("LeftIndent", show indt)]
@@ -210,9 +214,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
nBlockQuotes = countSubStrs blockQuoteName s
nDefLists = countSubStrs defListDefName s
indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists)
- props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm)
+ props = inTags True "Properties" [] (basedOn $$ tabList $$ numbForm)
where
- font = if isInfixOf codeBlockName s
+ font = if codeBlockName `isInfixOf` s
then monospacedFont
else empty
basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font
@@ -239,7 +243,7 @@ charStylesToDoc :: WriterState -> Doc
charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
where
makeStyle s =
- let attrs = concat $ map (contains s) [
+ let attrs = concatMap (contains s) [
(strikeoutName, ("StrikeThru", "true"))
, (superscriptName, ("Position", "Superscript"))
, (subscriptName, ("Position", "Subscript"))
@@ -253,7 +257,7 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font
where
font =
- if isInfixOf codeName s
+ if codeName `isInfixOf` s
then monospacedFont
else empty
in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props
@@ -273,23 +277,22 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
hyp (ident, url) = hdest $$ hlink
where
hdest = selfClosingTag "HyperlinkURLDestination"
- [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6
+ [("Self", "HyperlinkURLDestination/"++escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6
hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url),
("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")]
$ inTags True "Properties" []
$ inTags False "BorderColor" [("type","enumeration")] (text "Black")
- $$ (inTags False "Destination" [("type","object")]
- $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6
+ $$ inTags False "Destination" [("type","object")] (text $ "HyperlinkURLDestination/"++escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6
-- | Convert a list of Pandoc blocks to ICML.
-blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
blocksToICML opts style lst = do
docs <- mapM (blockToICML opts style) lst
return $ intersperseBrs docs
-- | Convert a Pandoc block element to ICML.
-blockToICML :: WriterOptions -> Style -> Block -> WS Doc
+blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc
blockToICML opts style (Plain lst) = parStyle opts style lst
-- title beginning with fig: indicates that the image is a figure
blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do
@@ -299,10 +302,12 @@ blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do
blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
blockToICML opts style (LineBlock lns) =
blockToICML opts style $ linesToPara lns
-blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
-blockToICML _ _ (RawBlock f str)
+blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) [Str str]
+blockToICML _ _ b@(RawBlock f str)
| f == Format "icml" = return $ text str
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
@@ -343,11 +348,10 @@ blockToICML opts style (Table caption aligns widths headers rows) =
then rows
else headers:rows
cells <- rowsToICML tabl (0::Int)
- let colWidths w = if w > 0
- then [("SingleColumnWidth",show $ 500 * w)]
- else []
- let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup)
- let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths
+ let colWidths w =
+ [("SingleColumnWidth",show $ 500 * w) | w > 0]
+ let tupToDoc tup = selfClosingTag "Column" $ ("Name",show $ fst tup) : colWidths (snd tup)
+ let colDescs = vcat $ zipWith (curry tupToDoc) [0..nrCols-1] widths
let tableDoc = return $ inTags True "Table" [
("AppliedTableStyle","TableStyle/Table")
, ("HeaderRowCount", nrHeaders)
@@ -359,7 +363,7 @@ blockToICML opts style (Div _ lst) = blocksToICML opts style lst
blockToICML _ _ Null = return empty
-- | Convert a list of lists of blocks to ICML list items.
-listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc
+listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc
listItemsToICML _ _ _ _ [] = return empty
listItemsToICML opts listType style attribs (first:rest) = do
st <- get
@@ -374,18 +378,17 @@ listItemsToICML opts listType style attribs (first:rest) = do
return $ intersperseBrs docs
-- | Convert a list of blocks to ICML list items.
-listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc
+listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc
listItemToICML opts style isFirst attribs item =
let makeNumbStart (Just (beginsWith, numbStl, _)) =
let doN DefaultStyle = []
- doN LowerRoman = [lowerRomanName]
- doN UpperRoman = [upperRomanName]
- doN LowerAlpha = [lowerAlphaName]
- doN UpperAlpha = [upperAlphaName]
- doN _ = []
- bw = if beginsWith > 1
- then [beginsWithName ++ show beginsWith]
- else []
+ doN LowerRoman = [lowerRomanName]
+ doN UpperRoman = [upperRomanName]
+ doN LowerAlpha = [lowerAlphaName]
+ doN UpperAlpha = [upperAlphaName]
+ doN _ = []
+ bw =
+ [beginsWithName ++ show beginsWith | beginsWith > 1]
in doN numbStl ++ bw
makeNumbStart Nothing = []
stl = if isFirst
@@ -394,26 +397,26 @@ listItemToICML opts style isFirst attribs item =
stl' = makeNumbStart attribs ++ stl
in if length item > 1
then do
- let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst
+ let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ Str "\t":lst
insertTab block = blockToICML opts style block
f <- blockToICML opts stl' $ head item
r <- mapM insertTab $ tail item
return $ intersperseBrs (f : r)
else blocksToICML opts stl' item
-definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc
+definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc
definitionListItemToICML opts style (term,defs) = do
term' <- parStyle opts (defListTermName:style) term
defs' <- mapM (blocksToICML opts (defListDefName:style)) defs
- return $ intersperseBrs $ (term' : defs')
+ return $ intersperseBrs (term' : defs')
-- | Convert a list of inline elements to ICML.
-inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc
+inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
-- | Convert an inline element to ICML.
-inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc
+inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc
inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str
inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst
inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst
@@ -433,17 +436,20 @@ inlineToICML opts style SoftBreak =
WrapPreserve -> charStyle style cr
inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
inlineToICML opts style (Math mt str) =
- cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str)
-inlineToICML _ _ (RawInline f str)
+ lift (texMathToInlines mt str) >>=
+ (fmap cat . mapM (inlineToICML opts style))
+inlineToICML _ _ il@(RawInline f str)
| f == Format "icml" = return $ text str
- | otherwise = return empty
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
inlineToICML opts style (Link _ lst (url, title)) = do
content <- inlinesToICML opts (linkName:style) lst
state $ \st ->
let ident = if null $ links st
then 1::Int
- else 1 + (fst $ head $ links st)
- newst = st{ links = (ident, url):(links st) }
+ else 1 + fst (head $ links st)
+ newst = st{ links = (ident, url):links st }
cont = inTags True "HyperlinkTextSource"
[("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content
in (cont, newst)
@@ -452,9 +458,9 @@ inlineToICML opts style (Note lst) = footnoteToICML opts style lst
inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst
-- | Convert a list of block elements to an ICML footnote.
-footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
footnoteToICML opts style lst =
- let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls
+ let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ Str "\t":ls
insertTab block = blockToICML opts (footnoteName:style) block
in do
contents <- mapM insertTab lst
@@ -466,24 +472,24 @@ footnoteToICML opts style lst =
-- | Auxiliary function to merge Space elements into the adjacent Strs.
mergeSpaces :: [Inline] -> [Inline]
-mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x =
+mergeSpaces (Str s:(x:(Str s':xs))) | isSp x =
mergeSpaces $ Str(s++" "++s') : xs
-mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs
-mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs
-mergeSpaces (x:xs) = x : (mergeSpaces xs)
+mergeSpaces (x:(Str s:xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs
+mergeSpaces (Str s:(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs
+mergeSpaces (x:xs) = x : mergeSpaces xs
mergeSpaces [] = []
isSp :: Inline -> Bool
-isSp Space = True
+isSp Space = True
isSp SoftBreak = True
-isSp _ = False
+isSp _ = False
-- | Intersperse line breaks
intersperseBrs :: [Doc] -> Doc
intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty)
-- | Wrap a list of inline elements in an ICML Paragraph Style
-parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc
+parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
parStyle opts style lst =
let slipIn x y = if null y
then x
@@ -498,7 +504,7 @@ parStyle opts style lst =
begins = filter (isPrefixOf beginsWithName) style
in if null begins
then ats
- else let i = maybe "" id $ stripPrefix beginsWithName $ head begins
+ else let i = fromMaybe "" $ stripPrefix beginsWithName $ head begins
in ("NumberingStartAt", i) : ats
else [attrs]
in do
@@ -507,16 +513,16 @@ parStyle opts style lst =
state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st })
-- | Wrap a Doc in an ICML Character Style.
-charStyle :: Style -> Doc -> WS Doc
+charStyle :: PandocMonad m => Style -> Doc -> WS m Doc
charStyle style content =
let (stlStr, attrs) = styleToStrAttr style
doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
- in do
+ in
state $ \st ->
- let styles = if null stlStr
- then st
- else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
- in (doc, styles)
+ let styles = if null stlStr
+ then st
+ else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
+ in (doc, styles)
-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute.
styleToStrAttr :: Style -> (String, [(String, String)])
@@ -529,20 +535,18 @@ styleToStrAttr style =
in (stlStr, attrs)
-- | Assemble an ICML Image.
-imageICML :: WriterOptions -> Style -> Attr -> Target -> WS Doc
+imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc
imageICML opts style attr (src, _) = do
- res <- liftIO $ fetchItem (writerSourceURL opts) src
- imgS <- case res of
- Left (_) -> do
- warn $ "Could not find image `" ++ src ++ "', skipping..."
- return def
- Right (img, _) -> do
- case imageSize img of
+ imgS <- catchError
+ (do (img, _) <- P.fetchItem src
+ case imageSize opts img of
Right size -> return size
Left msg -> do
- warn $ "Could not determine image size in `" ++
- src ++ "': " ++ msg
- return def
+ report $ CouldNotDetermineImageSize src msg
+ return def)
+ (\e -> do
+ report $ CouldNotFetchResource src (show e)
+ return def)
let (ow, oh) = sizeInPoints imgS
(imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS
hw = showFl $ ow / 2
@@ -571,6 +575,5 @@ imageICML opts style attr (src, _) = do
]
doc = inTags True "CharacterStyleRange" attrs
$ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"),
- ("ItemTransform", scale++" "++hw++" -"++hh)]
- $ (props $$ image)
+ ("ItemTransform", scale++" "++hw++" -"++hh)] (props $$ image)
state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } )
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
new file mode 100644
index 000000000..639961acd
--- /dev/null
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -0,0 +1,455 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Writers.JATS
+ Copyright : Copyright (C) 2017-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to JATS XML.
+Reference:
+https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html
+-}
+module Text.Pandoc.Writers.JATS ( writeJATS ) where
+import Control.Monad.Reader
+import Data.Char (toLower)
+import Data.Generics (everywhere, mkT)
+import Data.List (isSuffixOf, partition)
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.Options
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML
+import Text.TeXMath
+import qualified Text.XML.Light as Xml
+
+data JATSVersion = JATS1_1
+ deriving (Eq, Show)
+
+type JATS = ReaderT JATSVersion
+
+writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeJATS opts d =
+ runReaderT (docToJATS opts d) JATS1_1
+
+-- | Convert Pandoc document to string in JATS format.
+docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text
+docToJATS opts (Pandoc meta blocks) = do
+ let isBackBlock (Div ("refs",_,_) _) = True
+ isBackBlock _ = False
+ let (backblocks, bodyblocks) = partition isBackBlock blocks
+ let elements = hierarchicalize bodyblocks
+ let backElements = hierarchicalize backblocks
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
+ let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr)
+ (writerTemplate opts) &&
+ TopLevelDefault == writerTopLevelDivision opts
+ then opts{ writerTopLevelDivision = TopLevelChapter }
+ else opts
+ -- The numbering here follows LaTeX's internal numbering
+ let startLvl = case writerTopLevelDivision opts' of
+ TopLevelPart -> -1
+ TopLevelChapter -> 0
+ TopLevelSection -> 1
+ TopLevelDefault -> 1
+ metadata <- metaToJSON opts
+ (fmap (render' . vcat) .
+ mapM (elementToJATS opts' startLvl) .
+ hierarchicalize)
+ (fmap render' . inlinesToJATS opts')
+ meta
+ main <- (render' . vcat) <$>
+ mapM (elementToJATS opts' startLvl) elements
+ back <- (render' . vcat) <$>
+ mapM (elementToJATS opts' startLvl) backElements
+ let context = defField "body" main
+ $ defField "back" back
+ $ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False) metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Convert an Element to JATS.
+elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> JATS m Doc
+elementToJATS opts _ (Blk block) = blockToJATS opts block
+elementToJATS opts lvl (Sec _ _num (id',_,kvs) title elements) = do
+ let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')]
+ let otherAttrs = ["sec-type", "specific-use"]
+ let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs]
+ contents <- mapM (elementToJATS opts (lvl + 1)) elements
+ title' <- inlinesToJATS opts title
+ return $ inTags True "sec" attribs $
+ inTagsSimple "title" title' $$ vcat contents
+
+-- | Convert a list of Pandoc blocks to JATS.
+blocksToJATS :: PandocMonad m => WriterOptions -> [Block] -> JATS m Doc
+blocksToJATS opts = fmap vcat . mapM (blockToJATS opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- JATS varlistentrys.
+deflistItemsToJATS :: PandocMonad m
+ => WriterOptions -> [([Inline],[[Block]])] -> JATS m Doc
+deflistItemsToJATS opts items =
+ vcat <$> mapM (uncurry (deflistItemToJATS opts)) items
+
+-- | Convert a term and a list of blocks into a JATS varlistentry.
+deflistItemToJATS :: PandocMonad m
+ => WriterOptions -> [Inline] -> [[Block]] -> JATS m Doc
+deflistItemToJATS opts term defs = do
+ term' <- inlinesToJATS opts term
+ def' <- blocksToJATS opts $ concatMap (map plainToPara) defs
+ return $ inTagsIndented "def-item" $
+ inTagsIndented "term" term' $$
+ inTagsIndented "def" def'
+
+-- | Convert a list of lists of blocks to a list of JATS list items.
+listItemsToJATS :: PandocMonad m
+ => WriterOptions -> Maybe [String] -> [[Block]] -> JATS m Doc
+listItemsToJATS opts markers items =
+ case markers of
+ Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items
+ Just ms -> vcat <$> zipWithM (listItemToJATS opts) (map Just ms) items
+
+-- | Convert a list of blocks into a JATS list item.
+listItemToJATS :: PandocMonad m
+ => WriterOptions -> Maybe String -> [Block] -> JATS m Doc
+listItemToJATS opts mbmarker item = do
+ contents <- blocksToJATS opts item
+ return $ inTagsIndented "list-item" $
+ maybe empty (\lbl -> inTagsIndented "label" (text lbl)) mbmarker
+ $$ contents
+
+imageMimeType :: String -> [(String, String)] -> (String, String)
+imageMimeType src kvs =
+ let mbMT = getMimeType src
+ maintype = fromMaybe "image" $
+ lookup "mimetype" kvs `mplus`
+ (takeWhile (/='/') <$> mbMT)
+ subtype = fromMaybe "" $
+ lookup "mime-subtype" kvs `mplus`
+ ((drop 1 . dropWhile (/='/')) <$> mbMT)
+ in (maintype, subtype)
+
+languageFor :: [String] -> String
+languageFor classes =
+ case langs of
+ (l:_) -> escapeStringForXML l
+ [] -> ""
+ where isLang l = map toLower l `elem` map (map toLower) languages
+ langsFrom s = if isLang s
+ then [s]
+ else languagesByExtension . map toLower $ s
+ langs = concatMap langsFrom classes
+
+codeAttr :: Attr -> (String, [(String, String)])
+codeAttr (ident,classes,kvs) = (lang, attr)
+ where
+ attr = [("id",ident) | not (null ident)] ++
+ [("language",lang) | not (null lang)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["code-type",
+ "code-version", "executable",
+ "language-version", "orientation",
+ "platforms", "position", "specific-use"]]
+ lang = languageFor classes
+
+-- | Convert a Pandoc block element to JATS.
+blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc
+blockToJATS _ Null = return empty
+-- Bibliography reference:
+blockToJATS opts (Div ('r':'e':'f':'-':_,_,_) [Para lst]) =
+ inlinesToJATS opts lst
+blockToJATS opts (Div ("refs",_,_) xs) = do
+ contents <- blocksToJATS opts xs
+ return $ inTagsIndented "ref-list" contents
+blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do
+ contents <- blocksToJATS opts bs
+ let attr = [("id", ident) | not (null ident)] ++
+ [("xml:lang",l) | ("lang",l) <- kvs] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
+ "content-type", "orientation", "position"]]
+ return $ inTags True cls attr contents
+blockToJATS opts (Div (ident,_,kvs) bs) = do
+ contents <- blocksToJATS opts bs
+ let attr = [("id", ident) | not (null ident)] ++
+ [("xml:lang",l) | ("lang",l) <- kvs] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
+ "content-type", "orientation", "position"]]
+ return $ inTags True "boxed-text" attr contents
+blockToJATS opts (Header _ _ title) = do
+ title' <- inlinesToJATS opts title
+ return $ inTagsSimple "title" title'
+-- No Plain, everything needs to be in a block-level tag
+blockToJATS opts (Plain lst) = blockToJATS opts (Para lst)
+-- title beginning with fig: indicates that the image is a figure
+blockToJATS opts (Para [Image (ident,_,kvs) txt
+ (src,'f':'i':'g':':':tit)]) = do
+ alt <- inlinesToJATS opts txt
+ let (maintype, subtype) = imageMimeType src kvs
+ let capt = if null txt
+ then empty
+ else inTagsSimple "caption" alt
+ let attr = [("id", ident) | not (null ident)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation",
+ "position", "specific-use"]]
+ let graphicattr = [("mimetype",maintype),
+ ("mime-subtype",subtype),
+ ("xlink:href",src), -- do we need to URL escape this?
+ ("xlink:title",tit)]
+ return $ inTags True "fig" attr $
+ capt $$ selfClosingTag "graphic" graphicattr
+blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do
+ let (maintype, subtype) = imageMimeType src kvs
+ let attr = [("id", ident) | not (null ident)] ++
+ [("mimetype", maintype),
+ ("mime-subtype", subtype),
+ ("xlink:href", src)] ++
+ [("xlink:title", tit) | not (null tit)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift",
+ "content-type", "specific-use", "xlink:actuate",
+ "xlink:href", "xlink:role", "xlink:show",
+ "xlink:type"]]
+ return $ selfClosingTag "graphic" attr
+blockToJATS opts (Para lst) =
+ inTagsIndented "p" <$> inlinesToJATS opts lst
+blockToJATS opts (LineBlock lns) =
+ blockToJATS opts $ linesToPara lns
+blockToJATS opts (BlockQuote blocks) =
+ inTagsIndented "disp-quote" <$> blocksToJATS opts blocks
+blockToJATS _ (CodeBlock a str) = return $
+ inTags False tag attr (flush (text (escapeStringForXML str)))
+ where (lang, attr) = codeAttr a
+ tag = if null lang then "preformat" else "code"
+blockToJATS _ (BulletList []) = return empty
+blockToJATS opts (BulletList lst) =
+ inTags True "list" [("list-type", "bullet")] <$>
+ listItemsToJATS opts Nothing lst
+blockToJATS _ (OrderedList _ []) = return empty
+blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do
+ let listType = case numstyle of
+ DefaultStyle -> "order"
+ Decimal -> "order"
+ Example -> "order"
+ UpperAlpha -> "alpha-upper"
+ LowerAlpha -> "alpha-lower"
+ UpperRoman -> "roman-upper"
+ LowerRoman -> "roman-lower"
+ let simpleList = start == 1 && (delimstyle == DefaultDelim ||
+ delimstyle == Period)
+ let markers = if simpleList
+ then Nothing
+ else Just $
+ orderedListMarkers (start, numstyle, delimstyle)
+ inTags True "list" [("list-type", listType)] <$>
+ listItemsToJATS opts markers items
+blockToJATS opts (DefinitionList lst) =
+ inTags True "def-list" [] <$> deflistItemsToJATS opts lst
+blockToJATS _ b@(RawBlock f str)
+ | f == "jats" = return $ text str -- raw XML block
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToJATS _ HorizontalRule = return empty -- not semantic
+blockToJATS opts (Table [] aligns widths headers rows) = do
+ let percent w = show (truncate (100*w) :: Integer) ++ "*"
+ let coltags = vcat $ zipWith (\w al -> selfClosingTag "col"
+ ([("width", percent w) | w > 0] ++
+ [("align", alignmentToString al)])) widths aligns
+ thead <- if all null headers
+ then return empty
+ else inTagsIndented "thead" <$> tableRowToJATS opts True headers
+ tbody <- (inTagsIndented "tbody" . vcat) <$>
+ mapM (tableRowToJATS opts False) rows
+ return $ inTags True "table" [] $ coltags $$ thead $$ tbody
+blockToJATS opts (Table caption aligns widths headers rows) = do
+ captionDoc <- inTagsIndented "caption" <$> blockToJATS opts (Para caption)
+ tbl <- blockToJATS opts (Table [] aligns widths headers rows)
+ return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToJATS :: PandocMonad m
+ => WriterOptions
+ -> Bool
+ -> [[Block]]
+ -> JATS m Doc
+tableRowToJATS opts isHeader cols =
+ (inTagsIndented "tr" . vcat) <$> mapM (tableItemToJATS opts isHeader) cols
+
+tableItemToJATS :: PandocMonad m
+ => WriterOptions
+ -> Bool
+ -> [Block]
+ -> JATS m Doc
+tableItemToJATS opts isHeader [Plain item] =
+ inTags True (if isHeader then "th" else "td") [] <$>
+ inlinesToJATS opts item
+tableItemToJATS opts isHeader item =
+ (inTags True (if isHeader then "th" else "td") [] . vcat) <$>
+ mapM (blockToJATS opts) item
+
+-- | Convert a list of inline elements to JATS.
+inlinesToJATS :: PandocMonad m => WriterOptions -> [Inline] -> JATS m Doc
+inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) lst
+
+-- | Convert an inline element to JATS.
+inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> JATS m Doc
+inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str
+inlineToJATS opts (Emph lst) =
+ inTagsSimple "italic" <$> inlinesToJATS opts lst
+inlineToJATS opts (Strong lst) =
+ inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst
+inlineToJATS opts (Strikeout lst) =
+ inTagsSimple "strike" <$> inlinesToJATS opts lst
+inlineToJATS opts (Superscript lst) =
+ inTagsSimple "sup" <$> inlinesToJATS opts lst
+inlineToJATS opts (Subscript lst) =
+ inTagsSimple "sub" <$> inlinesToJATS opts lst
+inlineToJATS opts (SmallCaps lst) =
+ inTags False "sc" [("role", "smallcaps")] <$>
+ inlinesToJATS opts lst
+inlineToJATS opts (Quoted SingleQuote lst) = do
+ contents <- inlinesToJATS opts lst
+ return $ char '‘' <> contents <> char '’'
+inlineToJATS opts (Quoted DoubleQuote lst) = do
+ contents <- inlinesToJATS opts lst
+ return $ char '“' <> contents <> char '”'
+inlineToJATS _ (Code a str) =
+ return $ inTags False tag attr $ text (escapeStringForXML str)
+ where (lang, attr) = codeAttr a
+ tag = if null lang then "monospace" else "code"
+inlineToJATS _ il@(RawInline f x)
+ | f == "jats" = return $ text x
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToJATS _ LineBreak = return $ selfClosingTag "break" []
+inlineToJATS _ Space = return space
+inlineToJATS opts SoftBreak
+ | writerWrapText opts == WrapPreserve = return cr
+ | otherwise = return space
+inlineToJATS opts (Note contents) =
+ -- TODO technically only <p> tags are allowed inside
+ inTagsIndented "fn" <$> blocksToJATS opts contents
+inlineToJATS opts (Cite _ lst) =
+ -- TODO revisit this after examining the jats.csl pipeline
+ inlinesToJATS opts lst
+inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils
+inlineToJATS opts (Span (ident,_,kvs) ils) = do
+ contents <- inlinesToJATS opts ils
+ let attr = [("id",ident) | not (null ident)] ++
+ [("xml:lang",l) | ("lang",l) <- kvs] ++
+ [(k,v) | (k,v) <- kvs
+ , k `elem` ["content-type", "rationale",
+ "rid", "specific-use"]]
+ return $ selfClosingTag "milestone-start" attr <> contents <>
+ selfClosingTag "milestone-end" []
+inlineToJATS _ (Math t str) = do
+ let addPref (Xml.Attr q v)
+ | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v
+ | otherwise = Xml.Attr q v
+ let fixNS' e = e{ Xml.elName =
+ (Xml.elName e){ Xml.qPrefix = Just "mml" } }
+ let fixNS = everywhere (mkT fixNS') .
+ (\e -> e{ Xml.elAttribs = map addPref (Xml.elAttribs e) })
+ let conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
+ res <- convertMath writeMathML t str
+ let tagtype = case t of
+ DisplayMath -> "disp-formula"
+ InlineMath -> "inline-formula"
+ let rawtex = inTagsSimple "tex-math"
+ $ text "<![CDATA[" <>
+ text str <>
+ text "]]>"
+ return $ inTagsSimple tagtype $
+ case res of
+ Right r -> inTagsSimple "alternatives" $
+ cr <> rawtex $$
+ text (Xml.ppcElement conf $ fixNS r)
+ Left _ -> rawtex
+inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _))
+ | escapeURI t == email =
+ return $ inTagsSimple "email" $ text (escapeStringForXML email)
+inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do
+ let attr = [("id", ident) | not (null ident)] ++
+ [("alt", stringify txt) | not (null txt)] ++
+ [("rid", src)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
+ if null txt
+ then return $ selfClosingTag "xref" attr
+ else do
+ contents <- inlinesToJATS opts txt
+ return $ inTags False "xref" attr contents
+inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do
+ let attr = [("id", ident) | not (null ident)] ++
+ [("ext-link-type", "uri"),
+ ("xlink:href", src)] ++
+ [("xlink:title", tit) | not (null tit)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority",
+ "specific-use", "xlink:actuate",
+ "xlink:role", "xlink:show",
+ "xlink:type"]]
+ contents <- inlinesToJATS opts txt
+ return $ inTags False "ext-link" attr contents
+inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do
+ let mbMT = getMimeType src
+ let maintype = fromMaybe "image" $
+ lookup "mimetype" kvs `mplus`
+ (takeWhile (/='/') <$> mbMT)
+ let subtype = fromMaybe "" $
+ lookup "mime-subtype" kvs `mplus`
+ ((drop 1 . dropWhile (/='/')) <$> mbMT)
+ let attr = [("id", ident) | not (null ident)] ++
+ [("mimetype", maintype),
+ ("mime-subtype", subtype),
+ ("xlink:href", src)] ++
+ [("xlink:title", tit) | not (null tit)] ++
+ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift",
+ "content-type", "specific-use", "xlink:actuate",
+ "xlink:href", "xlink:role", "xlink:show",
+ "xlink:type"]]
+ return $ selfClosingTag "inline-graphic" attr
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 88934eb44..f61c878e5 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables,
- PatternGuards #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.LaTeX
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,37 +30,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into LaTeX.
-}
-module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
+module Text.Pandoc.Writers.LaTeX (
+ writeLaTeX
+ , writeBeamer
+ ) where
+import Control.Applicative ((<|>))
+import Control.Monad.State.Strict
+import Data.Aeson (FromJSON, object, (.=))
+import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
+ toLower)
+import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
+ stripPrefix, (\\))
+import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI (unEscapeString)
+import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
+ styleToLaTeX, toListingsLanguage)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Templates
-import Text.Printf ( printf )
-import Network.URI ( isURI, unEscapeString )
-import Data.Aeson (object, (.=), FromJSON)
-import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse,
- nub, nubBy, foldl' )
-import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit,
- ord, isAlphaNum )
-import Data.Maybe ( fromMaybe, isJust, catMaybes )
-import qualified Data.Text as T
-import Control.Applicative ((<|>))
-import Control.Monad.State
-import qualified Text.Parsec as P
import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
+import Text.Pandoc.Shared
import Text.Pandoc.Slides
-import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
- formatLaTeXInline, formatLaTeXBlock,
- toListingsLanguage)
+import Text.Pandoc.Templates
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared
+import qualified Text.Parsec as P
+import Text.Printf (printf)
data WriterState =
WriterState { stInNote :: Bool -- true if we're in a note
, stInQuote :: Bool -- true if in a blockquote
, stInMinipage :: Bool -- true if in minipage
, stInHeading :: Bool -- true if in a section heading
+ , stInItem :: Bool -- true if in \item[..]
, stNotes :: [Doc] -- notes in a minipage
, stOLLevel :: Int -- level of ordered list nesting
, stOptions :: WriterOptions -- writer options, so they don't have to be parameter
@@ -74,49 +82,75 @@ data WriterState =
, stHighlighting :: Bool -- true if document has highlighted code
, stIncremental :: Bool -- true if beamer lists should be displayed bit by bit
, stInternalLinks :: [String] -- list of internal link targets
- , stUsesEuro :: Bool -- true if euro symbol used
+ , stBeamer :: Bool -- produce beamer
+ , stEmptyLine :: Bool -- true if no content on line
}
+startingState :: WriterOptions -> WriterState
+startingState options = WriterState {
+ stInNote = False
+ , stInQuote = False
+ , stInMinipage = False
+ , stInHeading = False
+ , stInItem = False
+ , stNotes = []
+ , stOLLevel = 1
+ , stOptions = options
+ , stVerbInNote = False
+ , stTable = False
+ , stStrikeout = False
+ , stUrl = False
+ , stGraphics = False
+ , stLHS = False
+ , stBook = case writerTopLevelDivision options of
+ TopLevelPart -> True
+ TopLevelChapter -> True
+ _ -> False
+ , stCsquotes = False
+ , stHighlighting = False
+ , stIncremental = writerIncremental options
+ , stInternalLinks = []
+ , stBeamer = False
+ , stEmptyLine = True }
+
-- | Convert Pandoc to LaTeX.
-writeLaTeX :: WriterOptions -> Pandoc -> String
+writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeLaTeX options document =
- evalState (pandocToLaTeX options document) $
- WriterState { stInNote = False, stInQuote = False,
- stInMinipage = False, stInHeading = False,
- stNotes = [], stOLLevel = 1,
- stOptions = options, stVerbInNote = False,
- stTable = False, stStrikeout = False,
- stUrl = False, stGraphics = False,
- stLHS = False,
- stBook = (case writerTopLevelDivision options of
- TopLevelPart -> True
- TopLevelChapter -> True
- _ -> False),
- stCsquotes = False, stHighlighting = False,
- stIncremental = writerIncremental options,
- stInternalLinks = [], stUsesEuro = False }
-
-pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
+ evalStateT (pandocToLaTeX options document) $
+ startingState options
+
+-- | Convert Pandoc to LaTeX Beamer.
+writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeBeamer options document =
+ evalStateT (pandocToLaTeX options document) $
+ (startingState options){ stBeamer = True }
+
+type LW m = StateT WriterState m
+
+pandocToLaTeX :: PandocMonad m
+ => WriterOptions -> Pandoc -> LW m Text
pandocToLaTeX options (Pandoc meta blocks) = do
-- Strip off final 'references' header if --natbib or --biblatex
let method = writerCiteMethod options
let blocks' = if method == Biblatex || method == Natbib
then case reverse blocks of
- (Div (_,["references"],_) _):xs -> reverse xs
- _ -> blocks
+ Div (_,["references"],_) _:xs -> reverse xs
+ _ -> blocks
else blocks
-- see if there are internal links
- let isInternalLink (Link _ _ ('#':xs,_)) = [xs]
- isInternalLink _ = []
+ let isInternalLink (Link _ _ ('#':xs,_)) = [xs]
+ isInternalLink _ = []
modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
- let template = maybe "" id $ writerTemplate options
+ let template = fromMaybe "" $ writerTemplate options
-- set stBook depending on documentclass
let colwidth = if writerWrapText options == WrapAuto
then Just $ writerColumns options
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToLaTeX)
- (fmap (render colwidth) . inlineListToLaTeX)
+ (fmap render' . blockListToLaTeX)
+ (fmap render' . inlineListToLaTeX)
meta
let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
let documentClass = case P.parse pDocumentClass "template" template of
@@ -143,25 +177,38 @@ pandocToLaTeX options (Pandoc meta blocks) = do
else case last blocks' of
Header 1 _ il -> (init blocks', il)
_ -> (blocks', [])
- blocks''' <- if writerBeamer options
+ beamer <- gets stBeamer
+ blocks''' <- if beamer
then toSlides blocks''
else return blocks''
body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'''
- (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
- let main = render colwidth $ vsep body
+ (biblioTitle :: Text) <- render' <$> inlineListToLaTeX lastHeader
+ let main = render' $ vsep body
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
- let docLangs = nub $ query (extract "lang") blocks
+ docLangs <- catMaybes <$>
+ mapM (toLang . Just) (ordNub (query (extract "lang") blocks))
let hasStringValue x = isJust (getField x metadata :: Maybe String)
- let geometryFromMargins = intercalate [','] $ catMaybes $
- map (\(x,y) ->
+ let geometryFromMargins = intercalate [','] $ mapMaybe (\(x,y) ->
((x ++ "=") ++) <$> getField y metadata)
[("lmargin","margin-left")
,("rmargin","margin-right")
,("tmargin","margin-top")
,("bmargin","margin-bottom")
]
+ let toPolyObj lang = object [ "name" .= T.pack name
+ , "options" .= T.pack opts ]
+ where
+ (name, opts) = toPolyglossia lang
+ mblang <- toLang $ case getLang options meta of
+ Just l -> Just l
+ Nothing | null docLangs -> Nothing
+ | otherwise -> Just "en"
+ -- we need a default here since lang is used in template conditionals
+
+ let dirs = query (extract "dir") blocks
+
let context = defField "toc" (writerTableOfContents options) $
defField "toc-depth" (show (writerTOCDepth options -
if stBook st
@@ -170,7 +217,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "body" main $
defField "title-meta" titleMeta $
defField "author-meta" (intercalate "; " authorsMeta) $
- defField "documentclass" (if writerBeamer options
+ defField "documentclass" (if beamer
then ("beamer" :: String)
else if stBook st
then "book"
@@ -183,12 +230,14 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "lhs" (stLHS st) $
defField "graphics" (stGraphics st) $
defField "book-class" (stBook st) $
- defField "euro" (stUsesEuro st) $
defField "listings" (writerListings options || stLHS st) $
- defField "beamer" (writerBeamer options) $
+ defField "beamer" beamer $
(if stHighlighting st
- then defField "highlighting-macros" (styleToLaTeX
- $ writerHighlightStyle options )
+ then case writerHighlightStyle options of
+ Just sty ->
+ defField "highlighting-macros"
+ (styleToLaTeX sty)
+ Nothing -> id
else id) $
(case writerCiteMethod options of
Natbib -> defField "biblio-title" biblioTitle .
@@ -196,26 +245,26 @@ pandocToLaTeX options (Pandoc meta blocks) = do
Biblatex -> defField "biblio-title" biblioTitle .
defField "biblatex" True
_ -> id) $
- -- set lang to something so polyglossia/babel is included
- defField "lang" (if null docLangs then ""::String else "en") $
- defField "otherlangs" docLangs $
defField "colorlinks" (any hasStringValue
["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
- defField "dir" (if (null $ query (extract "dir") blocks)
- then ""::String
- else "ltr") $
+ (if null dirs
+ then id
+ else defField "dir" ("ltr" :: String)) $
defField "section-titles" True $
defField "geometry" geometryFromMargins $
+ (case getField "papersize" metadata of
+ -- uppercase a4, a5, etc.
+ Just (('A':d:ds) :: String)
+ | all isDigit (d:ds) -> resetField "papersize"
+ (('a':d:ds) :: String)
+ _ -> id)
metadata
- let toPolyObj lang = object [ "name" .= T.pack name
- , "options" .= T.pack opts ]
- where
- (name, opts) = toPolyglossia lang
- let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
- otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
let context' =
- defField "babel-lang" (toBabel lang)
- $ defField "babel-otherlangs" (map toBabel otherlangs)
+ -- note: lang is used in some conditionals in the template,
+ -- so we need to set it if we have any babel/polyglossia:
+ maybe id (defField "lang" . renderLang) mblang
+ $ maybe id (defField "babel-lang" . toBabel) mblang
+ $ defField "babel-otherlangs" (map toBabel docLangs)
$ defField "babel-newcommands" (concatMap (\(poly, babel) ->
-- \textspanish and \textgalician are already used by babel
-- save them as \oritext... and let babel use that
@@ -227,31 +276,30 @@ pandocToLaTeX options (Pandoc meta blocks) = do
"\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
"{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
++ poly ++ "}{##2}}}\n"
- else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
- ++ babel ++ "}{#2}}\n" ++
- "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{"
- ++ babel ++ "}}{\\end{otherlanguage}}\n"
+ else (if poly == "latin" -- see #4161
+ then "\\providecommand{\\textlatin}{}\n\\renewcommand"
+ else "\\newcommand") ++ "{\\text" ++ poly ++
+ "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++
+ "\\newenvironment{" ++ poly ++
+ "}[2][]{\\begin{otherlanguage}{" ++
+ babel ++ "}}{\\end{otherlanguage}}\n"
)
-- eliminate duplicates that have same polyglossia name
$ nubBy (\a b -> fst a == fst b)
-- find polyglossia and babel names of languages used in the document
- $ map (\l ->
- let lng = splitBy (=='-') l
- in (fst $ toPolyglossia lng, toBabel lng)
- )
- docLangs )
- $ defField "polyglossia-lang" (toPolyObj lang)
- $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
- $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
- Just "rtl" -> True
- _ -> False)
- $ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
+ $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs
+ )
+ $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang
+ $ defField "polyglossia-otherlangs" (map toPolyObj docLangs)
+ $
+ defField "latex-dir-rtl"
+ (getField "dir" context == Just ("rtl" :: String)) context
+ case writerTemplate options of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context'
-- | Convert Elements to LaTeX
-elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc
+elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc
elementToLaTeX _ (Blk block) = blockToLaTeX block
elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do
modify $ \s -> s{stInHeading = True}
@@ -266,18 +314,15 @@ data StringContext = TextString
deriving (Eq)
-- escape things as needed for LaTeX
-stringToLaTeX :: StringContext -> String -> State WriterState String
+stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String
stringToLaTeX _ [] = return ""
stringToLaTeX ctx (x:xs) = do
opts <- gets stOptions
rest <- stringToLaTeX ctx xs
- let ligatures = writerTeXLigatures opts && ctx == TextString
+ let ligatures = isEnabled Ext_smart opts && ctx == TextString
let isUrl = ctx == URLString
- when (x == '€') $
- modify $ \st -> st{ stUsesEuro = True }
return $
case x of
- '€' -> "\\euro{}" ++ rest
'{' -> "\\{" ++ rest
'}' -> "\\}" ++ rest
'`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest
@@ -311,26 +356,30 @@ stringToLaTeX ctx (x:xs) = do
'\x2013' | ligatures -> "--" ++ rest
_ -> x : rest
-toLabel :: String -> State WriterState String
+toLabel :: PandocMonad m => String -> LW m String
toLabel z = go `fmap` stringToLaTeX URLString z
where go [] = ""
go (x:xs)
| (isLetter x || isDigit x) && isAscii x = x:go xs
- | elem x ("_-+=:;." :: String) = x:go xs
+ | x `elem` ("_-+=:;." :: String) = x:go xs
| otherwise = "ux" ++ printf "%x" (ord x) ++ go xs
-- | Puts contents into LaTeX command.
inCmd :: String -> Doc -> Doc
inCmd cmd contents = char '\\' <> text cmd <> braces contents
-toSlides :: [Block] -> State WriterState [Block]
+toSlides :: PandocMonad m => [Block] -> LW m [Block]
toSlides bs = do
opts <- gets stOptions
let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts
let bs' = prepSlides slideLevel bs
- concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
+ concat `fmap` mapM (elementToBeamer slideLevel) (hierarchicalize bs')
-elementToBeamer :: Int -> Element -> State WriterState [Block]
+elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block]
+elementToBeamer _slideLevel (Blk (Div attr bs)) = do
+ -- make sure we support "blocks" inside divs
+ bs' <- concat `fmap` mapM (elementToBeamer 0) (hierarchicalize bs)
+ return [Div attr bs']
elementToBeamer _slideLevel (Blk b) = return [b]
elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
| lvl > slideLevel = do
@@ -340,7 +389,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
: bs ++ [RawBlock "latex" "\\end{block}"]
| lvl < slideLevel = do
bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
- return $ (Header lvl (ident,classes,kvs) tit) : bs
+ return $ Header lvl (ident,classes,kvs) tit : bs
| otherwise = do -- lvl == slideLevel
-- note: [fragile] is required or verbatim breaks
let hasCodeBlock (CodeBlock _ _) = [True]
@@ -349,19 +398,28 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
hasCode _ = []
let fragile = "fragile" `elem` classes ||
not (null $ query hasCodeBlock elts ++ query hasCode elts)
- let frameoptions = ["allowdisplaybreaks", "allowframebreaks",
+ let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile",
"b", "c", "t", "environment",
"label", "plain", "shrink", "standout"]
- let optionslist = ["fragile" | fragile] ++
+ let optionslist = ["fragile" | fragile && isNothing (lookup "fragile" kvs)] ++
[k | k <- classes, k `elem` frameoptions] ++
[k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions]
let options = if null optionslist
then ""
else "[" ++ intercalate "," optionslist ++ "]"
- let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) :
- if tit == [Str "\0"] -- marker for hrule
- then []
- else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"]
+ let latex = RawInline (Format "latex")
+ slideTitle <-
+ if tit == [Str "\0"] -- marker for hrule
+ then return []
+ else
+ if null ident
+ then return $ latex "{" : tit ++ [latex "}"]
+ else do
+ ref <- toLabel ident
+ return $ latex ("{%\n\\protect\\hypertarget{" ++
+ ref ++ "}{%\n") : tit ++ [latex "}}"]
+ let slideStart = Para $
+ RawInline "latex" ("\\begin{frame}" ++ options) : slideTitle
let slideEnd = RawBlock "latex" "\\end{frame}"
-- now carve up slide into blocks if there are sections inside
bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
@@ -376,43 +434,89 @@ isListBlock _ = False
isLineBreakOrSpace :: Inline -> Bool
isLineBreakOrSpace LineBreak = True
isLineBreakOrSpace SoftBreak = True
-isLineBreakOrSpace Space = True
-isLineBreakOrSpace _ = False
+isLineBreakOrSpace Space = True
+isLineBreakOrSpace _ = False
-- | Convert Pandoc block element to LaTeX.
-blockToLaTeX :: Block -- ^ Block to convert
- -> State WriterState Doc
+blockToLaTeX :: PandocMonad m
+ => Block -- ^ Block to convert
+ -> LW m Doc
blockToLaTeX Null = return empty
-blockToLaTeX (Div (identifier,classes,kvs) bs) = do
- beamer <- writerBeamer `fmap` gets stOptions
- ref <- toLabel identifier
- let linkAnchor = if null identifier
- then empty
- else "\\hypertarget" <> braces (text ref) <>
- braces empty
- let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
- let wrapDir = case lookup "dir" kvs of
- Just "rtl" -> align "RTL"
- Just "ltr" -> align "LTR"
- _ -> id
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> let (l, o) = toPolyglossiaEnv lng
- ops = if null o
- then ""
- else brackets $ text o
- in inCmd "begin" (text l) <> ops
- $$ blankline <> txt <> blankline
- $$ inCmd "end" (text l)
- Nothing -> txt
- wrapNotes txt = if beamer && "notes" `elem` classes
+blockToLaTeX (Div (identifier,classes,kvs) bs)
+ | "incremental" `elem` classes = do
+ let classes' = filter ("incremental"/=) classes
+ beamer <- gets stBeamer
+ if beamer
+ then do oldIncremental <- gets stIncremental
+ modify $ \s -> s{ stIncremental = True }
+ result <- blockToLaTeX $ Div (identifier,classes',kvs) bs
+ modify $ \s -> s{ stIncremental = oldIncremental }
+ return result
+ else blockToLaTeX $ Div (identifier,classes',kvs) bs
+ | "nonincremental" `elem` classes = do
+ let classes' = filter ("nonincremental"/=) classes
+ beamer <- gets stBeamer
+ if beamer
+ then do oldIncremental <- gets stIncremental
+ modify $ \s -> s{ stIncremental = False }
+ result <- blockToLaTeX $ Div (identifier,classes',kvs) bs
+ modify $ \s -> s{ stIncremental = oldIncremental }
+ return result
+ else blockToLaTeX $ Div (identifier,classes',kvs) bs
+ | otherwise = do
+ beamer <- gets stBeamer
+ linkAnchor' <- hypertarget True identifier empty
+ -- see #2704 for the motivation for adding \leavevmode:
+ let linkAnchor =
+ case bs of
+ Para _ : _
+ | not (isEmpty linkAnchor')
+ -> "\\leavevmode" <> linkAnchor' <> "%"
+ _ -> linkAnchor'
+ let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+ lang <- toLang $ lookup "lang" kvs
+ let wrapColumns = if "columns" `elem` classes
+ then \contents ->
+ inCmd "begin" "columns" <> brackets "T"
+ $$ contents
+ $$ inCmd "end" "columns"
+ else id
+ wrapColumn = if "column" `elem` classes
+ then \contents ->
+ let fromPct xs =
+ case reverse xs of
+ '%':ds -> '0':'.': reverse ds
+ _ -> xs
+ w = maybe "0.48" fromPct (lookup "width" kvs)
+ in inCmd "begin" "column" <>
+ braces (text w <> "\\textwidth")
+ $$ contents
+ $$ inCmd "end" "column"
+ else id
+ wrapDir = case lookup "dir" kvs of
+ Just "rtl" -> align "RTL"
+ Just "ltr" -> align "LTR"
+ _ -> id
+ wrapLang txt = case lang of
+ Just lng -> let (l, o) = toPolyglossiaEnv lng
+ ops = if null o
+ then ""
+ else brackets $ text o
+ in inCmd "begin" (text l) <> ops
+ $$ blankline <> txt <> blankline
+ $$ inCmd "end" (text l)
+ Nothing -> txt
+ wrapNotes txt = if beamer && "notes" `elem` classes
then "\\note" <> braces txt -- speaker notes
else linkAnchor $$ txt
- fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
+ (wrapColumns . wrapColumn . wrapDir . wrapLang . wrapNotes)
+ <$> blockListToLaTeX bs
blockToLaTeX (Plain lst) =
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-- title beginning with fig: indicates that the image is a figure
blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do
inNote <- gets stInNote
+ inMinipage <- gets stInMinipage
modify $ \st -> st{ stInMinipage = True, stNotes = [] }
capt <- inlineListToLaTeX txt
notes <- gets stNotes
@@ -426,25 +530,26 @@ blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = d
let footnotes = notesToLaTeX notes
lab <- labelFor ident
let caption = "\\caption" <> captForLof <> braces capt <> lab
- figure <- hypertarget ident (cr <>
- "\\begin{figure}" $$ "\\centering" $$ img $$
- caption $$ "\\end{figure}" <> cr)
- return $ if inNote
- -- can't have figures in notes
+ innards <- hypertarget True ident $
+ "\\centering" $$ img $$ caption <> cr
+ let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}"
+ return $ if inNote || inMinipage
+ -- can't have figures in notes or minipage (here, table cell)
+ -- http://www.tex.ac.uk/FAQ-ouparmd.html
then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}"
else figure $$ footnotes
-- . . . indicates pause in beamer slides
blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do
- beamer <- writerBeamer `fmap` gets stOptions
+ beamer <- gets stBeamer
if beamer
then blockToLaTeX (RawBlock "latex" "\\pause")
else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."]
blockToLaTeX (Para lst) =
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-blockToLaTeX (LineBlock lns) = do
+blockToLaTeX (LineBlock lns) =
blockToLaTeX $ linesToPara lns
blockToLaTeX (BlockQuote lst) = do
- beamer <- writerBeamer `fmap` gets stOptions
+ beamer <- gets stBeamer
case lst of
[b] | beamer && isListBlock b -> do
oldIncremental <- gets stIncremental
@@ -460,11 +565,11 @@ blockToLaTeX (BlockQuote lst) = do
return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
opts <- gets stOptions
- ref <- toLabel identifier
- let linkAnchor = if null identifier
+ lab <- labelFor identifier
+ linkAnchor' <- hypertarget True identifier lab
+ let linkAnchor = if isEmpty linkAnchor'
then empty
- else "\\hypertarget" <> braces (text ref) <>
- braces ("\\label" <> braces (text ref))
+ else linkAnchor' <> "%"
let lhsCodeBlock = do
modify $ \s -> s{ stLHS = True }
return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
@@ -479,6 +584,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
text str $$ text ("\\end{" ++ env ++ "}")) <> cr
let listingsCodeBlock = do
st <- get
+ ref <- toLabel identifier
let params = if writerListings (stOptions st)
then (case getListingsLanguage classes of
Just l -> [ "language=" ++ mbBraced l ]
@@ -495,9 +601,6 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
else [ "label=" ++ ref ])
else []
- mbBraced x = if not (all isAlphaNum x)
- then "{" <> x <> "}"
- else x
printParams
| null params = empty
| otherwise = brackets $ hcat (intersperse ", "
@@ -505,24 +608,34 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
"\\end{lstlisting}") $$ cr
let highlightedCodeBlock =
- case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
- Nothing -> rawCodeBlock
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (flush $ linkAnchor $$ text (T.unpack h))
+ case highlight (writerSyntaxMap opts)
+ formatLaTeXBlock ("",classes,keyvalAttr) str of
+ Left msg -> do
+ unless (null msg) $
+ report $ CouldNotHighlight msg
+ rawCodeBlock
+ Right h -> do
+ st <- get
+ when (stInNote st) $ modify (\s -> s{ stVerbInNote = True })
+ modify (\s -> s{ stHighlighting = True })
+ return (flush $ linkAnchor $$ text (T.unpack h))
case () of
_ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
- "literate" `elem` classes -> lhsCodeBlock
- | writerListings opts -> listingsCodeBlock
- | writerHighlight opts && not (null classes) -> highlightedCodeBlock
- | otherwise -> rawCodeBlock
-blockToLaTeX (RawBlock f x)
+ "literate" `elem` classes -> lhsCodeBlock
+ | writerListings opts -> listingsCodeBlock
+ | not (null classes) && isJust (writerHighlightStyle opts)
+ -> highlightedCodeBlock
+ | otherwise -> rawCodeBlock
+blockToLaTeX b@(RawBlock f x)
| f == Format "latex" || f == Format "tex"
= return $ text x
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToLaTeX (BulletList []) = return empty -- otherwise latex error
blockToLaTeX (BulletList lst) = do
incremental <- gets stIncremental
- beamer <- writerBeamer `fmap` gets stOptions
+ beamer <- gets stBeamer
let inc = if beamer && incremental then "[<+->]" else ""
items <- mapM listItemToLaTeX lst
let spacing = if isTightList lst
@@ -538,6 +651,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
put $ st {stOLLevel = oldlevel + 1}
items <- mapM listItemToLaTeX lst
modify (\s -> s {stOLLevel = oldlevel})
+ let beamer = stBeamer st
let tostyle x = case numstyle of
Decimal -> "\\arabic" <> braces x
UpperRoman -> "\\Roman" <> braces x
@@ -547,15 +661,24 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
Example -> "\\arabic" <> braces x
DefaultStyle -> "\\arabic" <> braces x
let todelim x = case numdelim of
- OneParen -> x <> ")"
- TwoParens -> parens x
- Period -> x <> "."
- _ -> x <> "."
+ OneParen -> x <> ")"
+ TwoParens -> parens x
+ Period -> x <> "."
+ _ -> x <> "."
+ let exemplar = case numstyle of
+ Decimal -> "1"
+ UpperRoman -> "I"
+ LowerRoman -> "i"
+ UpperAlpha -> "A"
+ LowerAlpha -> "a"
+ Example -> "1"
+ DefaultStyle -> "1"
let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel)
- let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim
- then empty
- else "\\def" <> "\\label" <> enum <>
- braces (todelim $ tostyle enum)
+ let stylecommand
+ | numstyle == DefaultStyle && numdelim == DefaultDelim = empty
+ | beamer = brackets (todelim exemplar)
+ | otherwise = "\\def" <> "\\label" <> enum <>
+ braces (todelim $ tostyle enum)
let resetcounter = if start == 1 || oldlevel > 4
then empty
else "\\setcounter" <> braces enum <>
@@ -579,7 +702,8 @@ blockToLaTeX (DefinitionList lst) = do
else empty
return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$
"\\end{description}"
-blockToLaTeX HorizontalRule = return $
+blockToLaTeX HorizontalRule =
+ return
"\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"
blockToLaTeX (Header level (id',classes,_) lst) = do
modify $ \s -> s{stInHeading = True}
@@ -587,33 +711,34 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
modify $ \s -> s{stInHeading = False}
return hdr
blockToLaTeX (Table caption aligns widths heads rows) = do
- headers <- if all null heads
- then return empty
- else do
- contents <- (tableRowToLaTeX True aligns widths) heads
- return ("\\toprule" $$ contents $$ "\\midrule")
- let endhead = if all null heads
- then empty
- else text "\\endhead"
- let endfirsthead = if all null heads
- then empty
- else text "\\endfirsthead"
+ let toHeaders hs = do contents <- tableRowToLaTeX True aligns widths hs
+ return ("\\toprule" $$ contents $$ "\\midrule")
+ let removeNote (Note _) = Span ("", [], []) []
+ removeNote x = x
captionText <- inlineListToLaTeX caption
+ firsthead <- if isEmpty captionText || all null heads
+ then return empty
+ else ($$ text "\\endfirsthead") <$> toHeaders heads
+ head' <- if all null heads
+ then return "\\toprule"
+ -- avoid duplicate notes in head and firsthead:
+ else toHeaders (if isEmpty firsthead
+ then heads
+ else walk removeNote heads)
let capt = if isEmpty captionText
then empty
- else text "\\caption" <> braces captionText <> "\\tabularnewline"
- $$ headers
- $$ endfirsthead
+ else text "\\caption" <>
+ braces captionText <> "\\tabularnewline"
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
- let colDescriptors = text $ concat $ map toColDescriptor aligns
+ let colDescriptors = text $ concatMap toColDescriptor aligns
modify $ \s -> s{ stTable = True }
return $ "\\begin{longtable}[]" <>
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
$$ capt
- $$ (if all null heads then "\\toprule" else empty)
- $$ headers
- $$ endhead
+ $$ firsthead
+ $$ head'
+ $$ "\\endhead"
$$ vcat rows'
$$ "\\bottomrule"
$$ "\\end{longtable}"
@@ -626,14 +751,16 @@ toColDescriptor align =
AlignCenter -> "c"
AlignDefault -> "l"
-blockListToLaTeX :: [Block] -> State WriterState Doc
-blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst
+blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc
+blockListToLaTeX lst =
+ vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst
-tableRowToLaTeX :: Bool
+tableRowToLaTeX :: PandocMonad m
+ => Bool
-> [Alignment]
-> [Double]
-> [[Block]]
- -> State WriterState Doc
+ -> LW m Doc
tableRowToLaTeX header aligns widths cols = do
-- scale factor compensates for extra space between columns
-- so the whole table isn't larger than columnwidth
@@ -643,9 +770,9 @@ tableRowToLaTeX header aligns widths cols = do
isSimple [] = True
isSimple _ = False
-- simple tables have to have simple cells:
- let widths' = if not (all isSimple cols)
+ let widths' = if all (== 0) widths && not (all isSimple cols)
then replicate (length aligns)
- (0.97 / fromIntegral (length aligns))
+ (scaleFactor / fromIntegral (length aligns))
else map (scaleFactor *) widths
cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
return $ hsep (intersperse "&" cells) <> "\\tabularnewline"
@@ -672,10 +799,10 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of
-- math breaks in simple tables.
displayMathToInline :: Inline -> Inline
displayMathToInline (Math DisplayMath x) = Math InlineMath x
-displayMathToInline x = x
+displayMathToInline x = x
-tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
- -> State WriterState Doc
+tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block])
+ -> LW m Doc
tableCellToLaTeX _ (0, _, blocks) =
blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
tableCellToLaTeX header (width, align, blocks) = do
@@ -691,7 +818,7 @@ tableCellToLaTeX header (width, align, blocks) = do
AlignDefault -> "\\raggedright"
return $ ("\\begin{minipage}" <> valign <>
braces (text (printf "%.2f\\columnwidth" width)) <>
- (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <>
+ (halign <> cr <> cellContents <> "\\strut" <> cr) <>
"\\end{minipage}") $$
notesToLaTeX notes
@@ -708,19 +835,22 @@ notesToLaTeX ns = (case length ns of
$ map (\x -> "\\footnotetext" <> braces x)
$ reverse ns)
-listItemToLaTeX :: [Block] -> State WriterState Doc
+listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc
listItemToLaTeX lst
-- we need to put some text before a header if it's the first
-- element in an item. This will look ugly in LaTeX regardless, but
-- this will keep the typesetter from throwing an error.
- | ((Header _ _ _) :_) <- lst =
- blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2)
+ | (Header{} :_) <- lst =
+ blockListToLaTeX lst >>= return . (text "\\item ~" $$) . nest 2
| otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) .
- (nest 2)
+ nest 2
-defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc
+defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc
defListItemToLaTeX (term, defs) = do
+ -- needed to turn off 'listings' because it breaks inside \item[...]:
+ modify $ \s -> s{stInItem = True}
term' <- inlineListToLaTeX term
+ modify $ \s -> s{stInItem = False}
-- put braces around term if it contains an internal link,
-- since otherwise we get bad bracket interactions: \item[\hyperref[..]
let isInternalLink (Link _ _ ('#':_,_)) = True
@@ -730,32 +860,33 @@ defListItemToLaTeX (term, defs) = do
else term'
def' <- liftM vsep $ mapM blockListToLaTeX defs
return $ case defs of
- (((Header _ _ _) : _) : _) ->
+ ((Header{} : _) : _) ->
"\\item" <> brackets term'' <> " ~ " $$ def'
_ ->
"\\item" <> brackets term'' $$ def'
-- | Craft the section header, inserting the secton reference, if supplied.
-sectionHeader :: Bool -- True for unnumbered
+sectionHeader :: PandocMonad m
+ => Bool -- True for unnumbered
-> [Char]
-> Int
-> [Inline]
- -> State WriterState Doc
+ -> LW m Doc
sectionHeader unnumbered ident level lst = do
txt <- inlineListToLaTeX lst
plain <- stringToLaTeX TextString $ concatMap stringify lst
- let removeInvalidInline (Note _) = []
+ let removeInvalidInline (Note _) = []
removeInvalidInline (Span (id', _, _) _) | not (null id') = []
- removeInvalidInline (Image _ _ _) = []
- removeInvalidInline x = [x]
+ removeInvalidInline Image{} = []
+ removeInvalidInline x = [x]
let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst
txtNoNotes <- inlineListToLaTeX lstNoNotes
-- footnotes in sections don't work (except for starred variants)
-- unless you specify an optional argument:
-- \section[mysec]{mysec\footnote{blah}}
- optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == []
+ optional <- if unnumbered || lstNoNotes == lst || null lstNoNotes
then return empty
- else do
+ else
return $ brackets txtNoNotes
let contents = if render Nothing txt == plain
then braces txt
@@ -767,7 +898,8 @@ sectionHeader unnumbered ident level lst = do
let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault
then TopLevelChapter
else writerTopLevelDivision opts
- let level' = if writerBeamer opts &&
+ beamer <- gets stBeamer
+ let level' = if beamer &&
topLevelDivision `elem` [TopLevelPart, TopLevelChapter]
-- beamer has parts but no chapters
then if level == 1 then -1 else level - 1
@@ -794,7 +926,8 @@ sectionHeader unnumbered ident level lst = do
lab <- labelFor ident
let star = if unnumbered && level' < 4 then text "*" else empty
let stuffing = star <> optional <> contents
- stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab
+ stuffing' <- hypertarget True ident $
+ text ('\\':sectionType) <> stuffing <> lab
return $ if level' > 5
then txt
else prefix $$ stuffing'
@@ -804,28 +937,28 @@ sectionHeader unnumbered ident level lst = do
braces txtNoNotes
else empty
-hypertarget :: String -> Doc -> State WriterState Doc
-hypertarget ident x = do
+hypertarget :: PandocMonad m => Bool -> String -> Doc -> LW m Doc
+hypertarget _ "" x = return x
+hypertarget addnewline ident x = do
ref <- text `fmap` toLabel ident
- internalLinks <- gets stInternalLinks
- return $
- if ident `elem` internalLinks
- then text "\\hypertarget"
+ return $ text "\\hypertarget"
<> braces ref
- <> braces x
- else x
+ <> braces ((if addnewline && not (isEmpty x)
+ then ("%" <> cr)
+ else empty) <> x)
-labelFor :: String -> State WriterState Doc
+labelFor :: PandocMonad m => String -> LW m Doc
labelFor "" = return empty
labelFor ident = do
ref <- text `fmap` toLabel ident
return $ text "\\label" <> braces ref
-- | Convert list of inline elements to LaTeX.
-inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
+inlineListToLaTeX :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> LW m Doc
inlineListToLaTeX lst =
- mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst)
+ mapM inlineToLaTeX (fixLineInitialSpaces lst)
>>= return . hcat
-- nonbreaking spaces (~) in LaTeX don't work after line breaks,
-- so we turn nbsps after hard breaks to \hspace commands.
@@ -837,43 +970,35 @@ inlineListToLaTeX lst =
fixNbsps s = let (ys,zs) = span (=='\160') s
in replicate (length ys) hspace ++ [Str zs]
hspace = RawInline "latex" "\\hspace*{0.333em}"
- -- linebreaks after blank lines cause problems:
- fixBreaks [] = []
- fixBreaks ys@(LineBreak : LineBreak : _) =
- case span (== LineBreak) ys of
- (lbs, rest) -> RawInline "latex"
- ("\\\\[" ++ show (length lbs) ++
- "\\baselineskip]") : fixBreaks rest
- fixBreaks (y:ys) = y : fixBreaks ys
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
-isQuoted _ = False
+isQuoted _ = False
-- | Convert inline element to LaTeX
-inlineToLaTeX :: Inline -- ^ Inline to convert
- -> State WriterState Doc
+inlineToLaTeX :: PandocMonad m
+ => Inline -- ^ Inline to convert
+ -> LW m Doc
inlineToLaTeX (Span (id',classes,kvs) ils) = do
- ref <- toLabel id'
- let linkAnchor = if null id'
- then empty
- else "\\protect\\hypertarget" <> braces (text ref) <>
- braces empty
+ linkAnchor <- hypertarget False id' empty
+ lang <- toLang $ lookup "lang" kvs
let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
["textnormal" | "csl-no-strong" `elem` classes ||
"csl-no-smallcaps" `elem` classes] ++
["RL" | ("dir", "rtl") `elem` kvs] ++
["LR" | ("dir", "ltr") `elem` kvs] ++
- (case lookup "lang" kvs of
- Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng
+ (case lang of
+ Just lng -> let (l, o) = toPolyglossia lng
ops = if null o then "" else ("[" ++ o ++ "]")
in ["text" ++ l ++ ops]
Nothing -> [])
contents <- inlineListToLaTeX ils
- return $ linkAnchor <>
- if null cmds
- then braces contents
- else foldr inCmd contents cmds
+ return $ (if null id'
+ then empty
+ else "\\protect" <> linkAnchor) <>
+ (if null cmds
+ then braces contents
+ else foldr inCmd contents cmds)
inlineToLaTeX (Emph lst) =
inlineListToLaTeX lst >>= return . inCmd "emph"
inlineToLaTeX (Strong lst) =
@@ -886,7 +1011,7 @@ inlineToLaTeX (Strikeout lst) = do
return $ inCmd "sout" contents
inlineToLaTeX (Superscript lst) =
inlineListToLaTeX lst >>= return . inCmd "textsuperscript"
-inlineToLaTeX (Subscript lst) = do
+inlineToLaTeX (Subscript lst) =
inlineListToLaTeX lst >>= return . inCmd "textsubscript"
inlineToLaTeX (SmallCaps lst) =
inlineListToLaTeX lst >>= return . inCmd "textsc"
@@ -901,26 +1026,39 @@ inlineToLaTeX (Cite cits lst) = do
inlineToLaTeX (Code (_,classes,_) str) = do
opts <- gets stOptions
inHeading <- gets stInHeading
+ inItem <- gets stInItem
+ let listingsCode = do
+ let listingsopt = case getListingsLanguage classes of
+ Just l -> "[language=" ++ mbBraced l ++ "]"
+ Nothing -> ""
+ inNote <- gets stInNote
+ when inNote $ modify $ \s -> s{ stVerbInNote = True }
+ let chr = case "!\"&'()*,-./:;?@_" \\ str of
+ (c:_) -> c
+ [] -> '!'
+ let str' = escapeStringUsing (backslashEscapes "\\{}%~_&") str
+ -- we always put lstinline in a dummy 'passthrough' command
+ -- (defined in the default template) so that we don't have
+ -- to change the way we escape characters depending on whether
+ -- the lstinline is inside another command. See #1629:
+ return $ text $ "\\passthrough{\\lstinline" ++ listingsopt ++ [chr] ++ str' ++ [chr] ++ "}"
+ let rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}"))
+ $ stringToLaTeX CodeString str
+ where escapeSpaces = concatMap
+ (\c -> if c == ' ' then "\\ " else [c])
+ let highlightCode =
+ case highlight (writerSyntaxMap opts)
+ formatLaTeXInline ("",classes,[]) str of
+ Left msg -> do
+ unless (null msg) $ report $ CouldNotHighlight msg
+ rawCode
+ Right h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (text (T.unpack h))
case () of
- _ | writerListings opts && not inHeading -> listingsCode
- | writerHighlight opts && not (null classes) -> highlightCode
+ _ | writerListings opts && not (inHeading || inItem) -> listingsCode
+ | isJust (writerHighlightStyle opts) && not (null classes)
+ -> highlightCode
| otherwise -> rawCode
- where listingsCode = do
- inNote <- gets stInNote
- when inNote $ modify $ \s -> s{ stVerbInNote = True }
- let chr = case "!\"&'()*,-./:;?@_" \\ str of
- (c:_) -> c
- [] -> '!'
- return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
- highlightCode = do
- case highlight formatLaTeXInline ("",classes,[]) str of
- Nothing -> rawCode
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (text (T.unpack h))
- rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}"))
- $ stringToLaTeX CodeString str
- where
- escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c])
inlineToLaTeX (Quoted qt lst) = do
contents <- inlineListToLaTeX lst
csquotes <- liftM stCsquotes get
@@ -928,32 +1066,43 @@ inlineToLaTeX (Quoted qt lst) = do
if csquotes
then return $ "\\enquote" <> braces contents
else do
- let s1 = if (not (null lst)) && (isQuoted (head lst))
+ let s1 = if not (null lst) && isQuoted (head lst)
then "\\,"
else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
+ let s2 = if not (null lst) && isQuoted (last lst)
then "\\,"
else empty
let inner = s1 <> contents <> s2
return $ case qt of
DoubleQuote ->
- if writerTeXLigatures opts
+ if isEnabled Ext_smart opts
then text "``" <> inner <> text "''"
else char '\x201C' <> inner <> char '\x201D'
SingleQuote ->
- if writerTeXLigatures opts
+ if isEnabled Ext_smart opts
then char '`' <> inner <> char '\''
else char '\x2018' <> inner <> char '\x2019'
-inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str
-inlineToLaTeX (Math InlineMath str) =
+inlineToLaTeX (Str str) = do
+ setEmptyLine False
+ liftM text $ stringToLaTeX TextString str
+inlineToLaTeX (Math InlineMath str) = do
+ setEmptyLine False
return $ "\\(" <> text str <> "\\)"
-inlineToLaTeX (Math DisplayMath str) =
+inlineToLaTeX (Math DisplayMath str) = do
+ setEmptyLine False
return $ "\\[" <> text str <> "\\]"
-inlineToLaTeX (RawInline f str)
+inlineToLaTeX il@(RawInline f str)
| f == Format "latex" || f == Format "tex"
- = return $ text str
- | otherwise = return empty
-inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr
+ = do
+ setEmptyLine False
+ return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToLaTeX LineBreak = do
+ emptyLine <- gets stEmptyLine
+ setEmptyLine True
+ return $ (if emptyLine then "~" else "") <> "\\\\" <> cr
inlineToLaTeX SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
case wrapText of
@@ -982,19 +1131,32 @@ inlineToLaTeX (Link _ txt (src, _)) =
src' <- stringToLaTeX URLString (escapeURI src)
return $ text ("\\href{" ++ src' ++ "}{") <>
contents <> char '}'
+inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do
+ report $ InlineNotRendered il
+ return empty
inlineToLaTeX (Image attr _ (source, _)) = do
+ setEmptyLine False
modify $ \s -> s{ stGraphics = True }
opts <- gets stOptions
let showDim dir = let d = text (show dir) <> "="
- in case (dimension dir attr) of
+ in case dimension dir attr of
Just (Pixel a) ->
[d <> text (showInInch opts (Pixel a)) <> "in"]
Just (Percent a) ->
- [d <> text (showFl (a / 100)) <> "\\textwidth"]
+ [d <> text (showFl (a / 100)) <>
+ case dir of
+ Width -> "\\textwidth"
+ Height -> "\\textheight"
+ ]
Just dim ->
[d <> text (show dim)]
Nothing ->
- []
+ case dir of
+ Width | isJust (dimension Height attr) ->
+ [d <> "\\textwidth"]
+ Height | isJust (dimension Width attr) ->
+ [d <> "\\textheight"]
+ _ -> []
dimList = showDim Width ++ showDim Height
dims = if null dimList
then empty
@@ -1008,6 +1170,7 @@ inlineToLaTeX (Image attr _ (source, _)) = do
(if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <>
dims <> braces (text source'')
inlineToLaTeX (Note contents) = do
+ setEmptyLine False
inMinipage <- gets stInMinipage
modify (\s -> s{stInNote = True})
contents' <- blockListToLaTeX contents
@@ -1016,9 +1179,9 @@ inlineToLaTeX (Note contents) = do
(CodeBlock _ _ : _) -> cr
_ -> empty
let noteContents = nest 2 contents' <> optnl
- opts <- gets stOptions
+ beamer <- gets stBeamer
-- in beamer slides, display footnote from current overlay forward
- let beamerMark = if writerBeamer opts
+ let beamerMark = if beamer
then text "<.->"
else empty
modify $ \st -> st{ stNotes = noteContents : stNotes st }
@@ -1035,8 +1198,12 @@ protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs
where ltx = RawInline (Format "latex")
protectCode (x : xs) = x : protectCode xs
-citationsToNatbib :: [Citation] -> State WriterState Doc
-citationsToNatbib (one:[])
+setEmptyLine :: PandocMonad m => Bool -> LW m ()
+setEmptyLine b = modify $ \st -> st{ stEmptyLine = b }
+
+citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc
+citationsToNatbib
+ [one]
= citeCommand c p s k
where
Citation { citationId = k
@@ -1046,8 +1213,8 @@ citationsToNatbib (one:[])
}
= one
c = case m of
- AuthorInText -> "citet"
- SuppressAuthor -> "citeyearpar"
+ AuthorInText -> "citet"
+ SuppressAuthor -> "citeyearpar"
NormalCitation -> "citep"
citationsToNatbib cits
@@ -1056,9 +1223,11 @@ citationsToNatbib cits
where
noPrefix = all (null . citationPrefix)
noSuffix = all (null . citationSuffix)
- ismode m = all (((==) m) . citationMode)
- p = citationPrefix $ head $ cits
- s = citationSuffix $ last $ cits
+ ismode m = all ((==) m . citationMode)
+ p = citationPrefix $
+ head cits
+ s = citationSuffix $
+ last cits
ks = intercalate ", " $ map citationId cits
citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
@@ -1082,17 +1251,20 @@ citationsToNatbib cits = do
SuppressAuthor -> citeCommand "citeyear" p s k
NormalCitation -> citeCommand "citealp" p s k
-citeCommand :: String -> [Inline] -> [Inline] -> String -> State WriterState Doc
+citeCommand :: PandocMonad m
+ => String -> [Inline] -> [Inline] -> String -> LW m Doc
citeCommand c p s k = do
args <- citeArguments p s k
return $ text ("\\" ++ c) <> args
-citeArguments :: [Inline] -> [Inline] -> String -> State WriterState Doc
+citeArguments :: PandocMonad m
+ => [Inline] -> [Inline] -> String -> LW m Doc
citeArguments p s k = do
let s' = case s of
- (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r
+ (Str
+ [x] : r) | isPunctuation x -> dropWhile (== Space) r
(Str (x:xs) : r) | isPunctuation x -> Str xs : r
- _ -> s
+ _ -> s
pdoc <- inlineListToLaTeX p
sdoc <- inlineListToLaTeX s'
let optargs = case (isEmpty pdoc, isEmpty sdoc) of
@@ -1101,8 +1273,9 @@ citeArguments p s k = do
(_ , _ ) -> brackets pdoc <> brackets sdoc
return $ optargs <> braces (text k)
-citationsToBiblatex :: [Citation] -> State WriterState Doc
-citationsToBiblatex (one:[])
+citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc
+citationsToBiblatex
+ [one]
= citeCommand cmd p s k
where
Citation { citationId = k
@@ -1133,15 +1306,20 @@ citationsToBiblatex _ = return empty
-- Determine listings language from list of class attributes.
getListingsLanguage :: [String] -> Maybe String
-getListingsLanguage [] = Nothing
-getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
+getListingsLanguage xs
+ = foldr ((<|>) . toListingsLanguage) Nothing xs
+
+mbBraced :: String -> String
+mbBraced x = if not (all isAlphaNum x)
+ then "{" <> x <> "}"
+ else x
-- Extract a key from divs and spans
extract :: String -> Block -> [String]
extract key (Div attr _) = lookKey key attr
-extract key (Plain ils) = concatMap (extractInline key) ils
-extract key (Para ils) = concatMap (extractInline key) ils
-extract key (Header _ _ ils) = concatMap (extractInline key) ils
+extract key (Plain ils) = query (extractInline key) ils
+extract key (Para ils) = query (extractInline key) ils
+extract key (Header _ _ ils) = query (extractInline key) ils
extract _ _ = []
-- Extract a key from spans
@@ -1154,85 +1332,95 @@ lookKey :: String -> Attr -> [String]
lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
-- In environments \Arabic instead of \arabic is used
-toPolyglossiaEnv :: String -> (String, String)
+toPolyglossiaEnv :: Lang -> (String, String)
toPolyglossiaEnv l =
- case toPolyglossia $ (splitBy (=='-')) l of
+ case toPolyglossia l of
("arabic", o) -> ("Arabic", o)
x -> x
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Polyglossia (language, options) tuple
-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
-toPolyglossia :: [String] -> (String, String)
-toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria")
-toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya")
-toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco")
-toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania")
-toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia")
-toPolyglossia ("de":"1901":_) = ("german", "spelling=old")
-toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old")
-toPolyglossia ("de":"AT":_) = ("german", "variant=austrian")
-toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old")
-toPolyglossia ("de":"CH":_) = ("german", "variant=swiss")
-toPolyglossia ("de":_) = ("german", "")
-toPolyglossia ("dsb":_) = ("lsorbian", "")
-toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly")
-toPolyglossia ("en":"AU":_) = ("english", "variant=australian")
-toPolyglossia ("en":"CA":_) = ("english", "variant=canadian")
-toPolyglossia ("en":"GB":_) = ("english", "variant=british")
-toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand")
-toPolyglossia ("en":"UK":_) = ("english", "variant=british")
-toPolyglossia ("en":"US":_) = ("english", "variant=american")
-toPolyglossia ("grc":_) = ("greek", "variant=ancient")
-toPolyglossia ("hsb":_) = ("usorbian", "")
-toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic")
-toPolyglossia ("sl":_) = ("slovenian", "")
-toPolyglossia x = (commonFromBcp47 x, "")
+toPolyglossia :: Lang -> (String, String)
+toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars)
+ | "1901" `elem` vars = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ "AT" vars)
+ | "1901" `elem` vars = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ "CH" vars)
+ | "1901" `elem` vars = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _) = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "")
+toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly")
+toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient")
+toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars)
+ | "x-classic" `elem` vars = ("latin", "variant=classic")
+toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "")
+toPolyglossia x = (commonFromBcp47 x, "")
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Babel language string.
-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
-- List of supported languages (slightly outdated):
-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
-toBabel :: [String] -> String
-toBabel ("de":"1901":_) = "german"
-toBabel ("de":"AT":"1901":_) = "austrian"
-toBabel ("de":"AT":_) = "naustrian"
-toBabel ("de":"CH":"1901":_) = "swissgerman"
-toBabel ("de":"CH":_) = "nswissgerman"
-toBabel ("de":_) = "ngerman"
-toBabel ("dsb":_) = "lowersorbian"
-toBabel ("el":"polyton":_) = "polutonikogreek"
-toBabel ("en":"AU":_) = "australian"
-toBabel ("en":"CA":_) = "canadian"
-toBabel ("en":"GB":_) = "british"
-toBabel ("en":"NZ":_) = "newzealand"
-toBabel ("en":"UK":_) = "british"
-toBabel ("en":"US":_) = "american"
-toBabel ("fr":"CA":_) = "canadien"
-toBabel ("fra":"aca":_) = "acadian"
-toBabel ("grc":_) = "polutonikogreek"
-toBabel ("hsb":_) = "uppersorbian"
-toBabel ("la":"x":"classic":_) = "classiclatin"
-toBabel ("sl":_) = "slovene"
-toBabel x = commonFromBcp47 x
+toBabel :: Lang -> String
+toBabel (Lang "de" _ "AT" vars)
+ | "1901" `elem` vars = "austrian"
+ | otherwise = "naustrian"
+toBabel (Lang "de" _ "CH" vars)
+ | "1901" `elem` vars = "swissgerman"
+ | otherwise = "nswissgerman"
+toBabel (Lang "de" _ _ vars)
+ | "1901" `elem` vars = "german"
+ | otherwise = "ngerman"
+toBabel (Lang "dsb" _ _ _) = "lowersorbian"
+toBabel (Lang "el" _ _ vars)
+ | "polyton" `elem` vars = "polutonikogreek"
+toBabel (Lang "en" _ "AU" _) = "australian"
+toBabel (Lang "en" _ "CA" _) = "canadian"
+toBabel (Lang "en" _ "GB" _) = "british"
+toBabel (Lang "en" _ "NZ" _) = "newzealand"
+toBabel (Lang "en" _ "UK" _) = "british"
+toBabel (Lang "en" _ "US" _) = "american"
+toBabel (Lang "fr" _ "CA" _) = "canadien"
+toBabel (Lang "fra" _ _ vars)
+ | "aca" `elem` vars = "acadian"
+toBabel (Lang "grc" _ _ _) = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _) = "uppersorbian"
+toBabel (Lang "la" _ _ vars)
+ | "x-classic" `elem` vars = "classiclatin"
+toBabel (Lang "sl" _ _ _) = "slovene"
+toBabel x = commonFromBcp47 x
-- Takes a list of the constituents of a BCP 47 language code
-- and converts it to a string shared by Babel and Polyglossia.
-- https://tools.ietf.org/html/bcp47#section-2.1
-commonFromBcp47 :: [String] -> String
-commonFromBcp47 [] = ""
-commonFromBcp47 ("pt":"BR":_) = "brazil"
+commonFromBcp47 :: Lang -> String
+commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil"
-- Note: documentation says "brazilian" works too, but it doesn't seem to work
-- on some systems. See #2953.
-commonFromBcp47 ("sr":"Cyrl":_) = "serbianc"
-commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin"
-commonFromBcp47 x = fromIso $ head x
+commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc"
+commonFromBcp47 (Lang "zh" "Latn" _ vars)
+ | "pinyin" `elem` vars = "pinyin"
+commonFromBcp47 (Lang l _ _ _) = fromIso l
where
fromIso "af" = "afrikaans"
fromIso "am" = "amharic"
@@ -1316,10 +1504,6 @@ commonFromBcp47 x = fromIso $ head x
fromIso "vi" = "vietnamese"
fromIso _ = ""
-deNote :: Inline -> Inline
-deNote (Note _) = RawInline (Format "latex") ""
-deNote x = x
-
pDocumentOptions :: P.Parsec String () [String]
pDocumentOptions = do
P.char '['
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 98b08b08b..1be955fe3 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -1,5 +1,6 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Man
- Copyright : Copyright (C) 2007-2015 John MacFarlane
+ Copyright : Copyright (C) 2007-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,79 +30,95 @@ Conversion of 'Pandoc' documents to groff man page format.
-}
module Text.Pandoc.Writers.Man ( writeMan) where
+import Control.Monad.State.Strict
+import Data.List (intercalate, intersperse, sort, stripPrefix)
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder (deleteMeta)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Templates
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Readers.TeXMath
-import Text.Printf ( printf )
-import Data.List ( stripPrefix, intersperse, intercalate )
-import Data.Maybe (fromMaybe)
import Text.Pandoc.Pretty
-import Text.Pandoc.Builder (deleteMeta)
-import Control.Monad.State
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
type Notes = [[Block]]
-data WriterState = WriterState { stNotes :: Notes
- , stHasTables :: Bool }
+data WriterState = WriterState { stNotes :: Notes
+ , stFontFeatures :: Map.Map Char Bool
+ , stHasTables :: Bool }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState { stNotes = []
+ , stFontFeatures = Map.fromList [
+ ('I',False)
+ , ('B',False)
+ , ('C',False)
+ ]
+ , stHasTables = False }
-- | Convert Pandoc to Man.
-writeMan :: WriterOptions -> Pandoc -> String
-writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False)
+writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeMan opts document =
+ evalStateT (pandocToMan opts document) defaultWriterState
-- | Return groff man representation of document.
-pandocToMan :: WriterOptions -> Pandoc -> State WriterState String
+pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m Text
pandocToMan opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
- let render' = render colwidth
+ let render' :: Doc -> Text
+ render' = render colwidth
titleText <- inlineListToMan opts $ docTitle meta
let title' = render' titleText
let setFieldsFromTitle =
- case break (== ' ') title' of
- (cmdName, rest) -> case break (=='(') cmdName of
- (xs, '(':ys) | not (null ys) &&
- last ys == ')' ->
+ case T.break (== ' ') title' of
+ (cmdName, rest) -> case T.break (=='(') cmdName of
+ (xs, ys) | "(" `T.isPrefixOf` ys
+ && ")" `T.isSuffixOf` ys ->
defField "title" xs .
- defField "section" (init ys) .
- case splitBy (=='|') rest of
+ defField "section" (T.init $ T.drop 1 ys) .
+ case T.splitOn "|" rest of
(ft:hds) ->
- defField "footer" (trim ft) .
+ defField "footer" (T.strip ft) .
defField "header"
- (trim $ concat hds)
+ (T.strip $ mconcat hds)
[] -> id
_ -> defField "title" title'
metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToMan opts)
- (fmap (render colwidth) . inlineListToMan opts)
+ (fmap render' . blockListToMan opts)
+ (fmap render' . inlineListToMan opts)
$ deleteMeta "title" meta
body <- blockListToMan opts blocks
- notes <- liftM stNotes get
+ notes <- gets stNotes
notes' <- notesToMan opts (reverse notes)
let main = render' $ body $$ notes' $$ text ""
- hasTables <- liftM stHasTables get
+ hasTables <- gets stHasTables
let context = defField "body" main
$ setFieldsFromTitle
$ defField "has-tables" hasTables
$ defField "hyphenate" True
- $ defField "pandoc-version" pandocVersion
- $ metadata
+ $ defField "pandoc-version" pandocVersion metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return man representation of notes.
-notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc
notesToMan opts notes =
if null notes
then return empty
- else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ else zipWithM (noteToMan opts) [1..] notes >>=
return . (text ".SH NOTES" $$) . vcat
-- | Return man representation of a note.
-noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc
noteToMan opts num note = do
contents <- blockListToMan opts note
let marker = cr <> text ".SS " <> brackets (text (show num))
@@ -123,11 +140,11 @@ escapeString = escapeStringUsing manEscapes
-- | Escape a literal (code) section for Man.
escapeCode :: String -> String
-escapeCode = concat . intersperse "\n" . map escapeLine . lines where
+escapeCode = intercalate "\n" . map escapeLine . lines where
escapeLine codeline =
case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of
a@('.':_) -> "\\&" ++ a
- b -> b
+ b -> b
-- We split inline lists into sentences, and print one sentence per
-- line. groff/troff treats the line-ending period differently.
@@ -139,8 +156,8 @@ breakSentence [] = ([],[])
breakSentence xs =
let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
- isSentenceEndInline (LineBreak) = True
- isSentenceEndInline _ = False
+ isSentenceEndInline LineBreak = True
+ isSentenceEndInline _ = False
(as, bs) = break isSentenceEndInline xs
in case bs of
[] -> (as, [])
@@ -160,9 +177,10 @@ splitSentences xs =
in if null rest then [sent] else sent : splitSentences rest
-- | Convert Pandoc block element to man.
-blockToMan :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
+blockToMan :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> StateT WriterState m Doc
blockToMan _ Null = return empty
blockToMan opts (Div _ bs) = blockListToMan opts bs
blockToMan opts (Plain inlines) =
@@ -173,9 +191,11 @@ blockToMan opts (Para inlines) = do
return $ text ".PP" $$ contents
blockToMan opts (LineBlock lns) =
blockToMan opts $ linesToPara lns
-blockToMan _ (RawBlock f str)
+blockToMan _ b@(RawBlock f str)
| f == Format "man" = return $ text str
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
blockToMan opts (Header level _ inlines) = do
contents <- inlineListToMan opts inlines
@@ -205,12 +225,12 @@ blockToMan opts (Table caption alignments widths headers rows) =
then repeat ""
else map (printf "w(%0.1fn)" . (70 *)) widths
-- 78n default width - 8n indent = 70n
- let coldescriptions = text $ intercalate " "
+ let coldescriptions = text $ unwords
(zipWith (\align width -> aligncode align ++ width)
alignments iwidths) ++ "."
colheadings <- mapM (blockListToMan opts) headers
let makeRow cols = text "T{" $$
- (vcat $ intersperse (text "T}@T{") cols) $$
+ vcat (intersperse (text "T}@T{") cols) $$
text "T}"
let colheadings' = if all null headers
then empty
@@ -227,7 +247,8 @@ blockToMan opts (BulletList items) = do
return (vcat contents)
blockToMan opts (OrderedList attribs items) = do
let markers = take (length items) $ orderedListMarkers attribs
- let indent = 1 + (maximum $ map length markers)
+ let indent = 1 +
+ maximum (map length markers)
contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
zip markers items
return (vcat contents)
@@ -236,11 +257,11 @@ blockToMan opts (DefinitionList items) = do
return (vcat contents)
-- | Convert bullet list item (list of blocks) to man.
-bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc
bulletListItemToMan _ [] = return empty
-bulletListItemToMan opts ((Para first):rest) =
- bulletListItemToMan opts ((Plain first):rest)
-bulletListItemToMan opts ((Plain first):rest) = do
+bulletListItemToMan opts (Para first:rest) =
+ bulletListItemToMan opts (Plain first:rest)
+bulletListItemToMan opts (Plain first:rest) = do
first' <- blockToMan opts (Plain first)
rest' <- blockListToMan opts rest
let first'' = text ".IP \\[bu] 2" $$ first'
@@ -254,14 +275,15 @@ bulletListItemToMan opts (first:rest) = do
return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
-- | Convert ordered list item (a list of blocks) to man.
-orderedListItemToMan :: WriterOptions -- ^ options
- -> String -- ^ order marker for list item
- -> Int -- ^ number of spaces to indent
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
+orderedListItemToMan :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> StateT WriterState m Doc
orderedListItemToMan _ _ _ [] = return empty
-orderedListItemToMan opts num indent ((Para first):rest) =
- orderedListItemToMan opts num indent ((Plain first):rest)
+orderedListItemToMan opts num indent (Para first:rest) =
+ orderedListItemToMan opts num indent (Plain first:rest)
orderedListItemToMan opts num indent (first:rest) = do
first' <- blockToMan opts first
rest' <- blockListToMan opts rest
@@ -273,44 +295,47 @@ orderedListItemToMan opts num indent (first:rest) = do
return $ first'' $$ rest''
-- | Convert definition list item (label, list of blocks) to man.
-definitionListItemToMan :: WriterOptions
- -> ([Inline],[[Block]])
- -> State WriterState Doc
+definitionListItemToMan :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> StateT WriterState m Doc
definitionListItemToMan opts (label, defs) = do
labelText <- inlineListToMan opts label
contents <- if null defs
then return empty
- else liftM vcat $ forM defs $ \blocks -> do
- let (first, rest) = case blocks of
- ((Para x):y) -> (Plain x,y)
- (x:y) -> (x,y)
- [] -> error "blocks is null"
- rest' <- liftM vcat $
- mapM (\item -> blockToMan opts item) rest
- first' <- blockToMan opts first
- return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ else liftM vcat $ forM defs $ \blocks ->
+ case blocks of
+ (x:xs) -> do
+ first' <- blockToMan opts $
+ case x of
+ Para y -> Plain y
+ _ -> x
+ rest' <- liftM vcat $ mapM
+ (\item -> blockToMan opts item) xs
+ return $ first' $$
+ text ".RS" $$ rest' $$ text ".RE"
+ [] -> return empty
return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents
-- | Convert list of Pandoc block elements to man.
-blockListToMan :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
+blockListToMan :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
blockListToMan opts blocks =
mapM (blockToMan opts) blocks >>= (return . vcat)
-- | Convert list of Pandoc inline elements to man.
-inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc
inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
-- | Convert Pandoc inline element to man.
-inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc
inlineToMan opts (Span _ ils) = inlineListToMan opts ils
-inlineToMan opts (Emph lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[I]" <> contents <> text "\\f[]"
-inlineToMan opts (Strong lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[B]" <> contents <> text "\\f[]"
+inlineToMan opts (Emph lst) =
+ withFontFeature 'I' (inlineListToMan opts lst)
+inlineToMan opts (Strong lst) =
+ withFontFeature 'B' (inlineListToMan opts lst)
inlineToMan opts (Strikeout lst) = do
contents <- inlineListToMan opts lst
return $ text "[STRIKEOUT:" <> contents <> char ']'
@@ -330,22 +355,26 @@ inlineToMan opts (Quoted DoubleQuote lst) = do
inlineToMan opts (Cite _ lst) =
inlineListToMan opts lst
inlineToMan _ (Code _ str) =
- return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
+ withFontFeature 'C' (return (text $ escapeCode str))
inlineToMan _ (Str str@('.':_)) =
return $ afterBreak "\\&" <> text (escapeString str)
inlineToMan _ (Str str) = return $ text $ escapeString str
inlineToMan opts (Math InlineMath str) =
- inlineListToMan opts $ texMathToInlines InlineMath str
+ lift (texMathToInlines InlineMath str) >>= inlineListToMan opts
inlineToMan opts (Math DisplayMath str) = do
- contents <- inlineListToMan opts $ texMathToInlines DisplayMath str
+ contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts
return $ cr <> text ".RS" $$ contents $$ text ".RE"
-inlineToMan _ (RawInline f str)
+inlineToMan _ il@(RawInline f str)
| f == Format "man" = return $ text str
- | otherwise = return empty
-inlineToMan _ (LineBreak) = return $
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToMan _ LineBreak = return $
cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
inlineToMan _ SoftBreak = return space
inlineToMan _ Space = return space
+inlineToMan opts (Link _ txt ('#':_, _)) =
+ inlineListToMan opts txt -- skip internal links
inlineToMan opts (Link _ txt (src, _)) = do
linktext <- inlineListToMan opts txt
let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
@@ -355,7 +384,7 @@ inlineToMan opts (Link _ txt (src, _)) = do
char '<' <> text srcSuffix <> char '>'
_ -> linktext <> text " (" <> text src <> char ')'
inlineToMan opts (Image attr alternate (source, tit)) = do
- let txt = if (null alternate) || (alternate == [Str ""]) ||
+ let txt = if null alternate || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
then [Str "image"]
else alternate
@@ -364,6 +393,24 @@ inlineToMan opts (Image attr alternate (source, tit)) = do
inlineToMan _ (Note contents) = do
-- add to notes in state
modify $ \st -> st{ stNotes = contents : stNotes st }
- notes <- liftM stNotes get
- let ref = show $ (length notes)
+ notes <- gets stNotes
+ let ref = show (length notes)
return $ char '[' <> text ref <> char ']'
+
+fontChange :: PandocMonad m => StateT WriterState m Doc
+fontChange = do
+ features <- gets stFontFeatures
+ let filling = sort [c | (c,True) <- Map.toList features]
+ return $ text $ "\\f[" ++ filling ++ "]"
+
+withFontFeature :: PandocMonad m
+ => Char
+ -> StateT WriterState m Doc
+ -> StateT WriterState m Doc
+withFontFeature c action = do
+ modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st }
+ begin <- fontChange
+ d <- action
+ modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st }
+ end <- fontChange
+ return $ begin <> d <> end
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index e3bb3eea0..cdd8f3b66 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1,6 +1,9 @@
-{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-}
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,47 +34,50 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text.
Markdown: <http://daringfireball.net/projects/markdown/>
-}
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
-import Data.Maybe (fromMaybe)
-import Data.Monoid (Any(..))
-import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy )
-import Data.Char ( isSpace, isPunctuation, ord, chr )
-import Data.Ord ( comparing )
-import Text.Pandoc.Pretty
import Control.Monad.Reader
-import Control.Monad.State
-import Text.Pandoc.Writers.HTML (writeHtmlString)
-import Text.Pandoc.Readers.TeXMath (texMathToInlines)
-import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
-import Network.URI (isURI)
+import Control.Monad.State.Strict
+import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum)
import Data.Default
-import Data.Yaml (Value(Object,String,Array,Bool,Number))
import qualified Data.HashMap.Strict as H
-import qualified Data.Vector as V
-import qualified Data.Text as T
+import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe)
+import Data.Monoid (Any (..))
+import Data.Ord (comparing)
import qualified Data.Set as Set
-import Network.HTTP ( urlEncode )
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Vector as V
+import Data.Yaml (Value (Array, Bool, Number, Object, String))
+import Network.HTTP (urlEncode)
+import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.Writers.Shared
type Notes = [[Block]]
-type Ref = ([Inline], Target, Attr)
+type Ref = (Doc, Target, Attr)
type Refs = [Ref]
-type MD = ReaderT WriterEnv (State WriterState)
+type MD m = ReaderT WriterEnv (StateT WriterState m)
-evalMD :: MD a -> WriterEnv -> WriterState -> a
-evalMD md env st = evalState (runReaderT md env) st
+evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
+evalMD md env st = evalStateT (runReaderT md env) st
-data WriterEnv = WriterEnv { envInList :: Bool
- , envPlain :: Bool
+data WriterEnv = WriterEnv { envInList :: Bool
+ , envPlain :: Bool
, envRefShortcutable :: Bool
, envBlockLevel :: Int
- , envEscapeSpaces :: Bool
+ , envEscapeSpaces :: Bool
}
instance Default WriterEnv
@@ -82,21 +88,26 @@ instance Default WriterEnv
, envEscapeSpaces = False
}
-data WriterState = WriterState { stNotes :: Notes
- , stRefs :: Refs
- , stIds :: Set.Set String
+data WriterState = WriterState { stNotes :: Notes
+ , stRefs :: Refs
+ , stKeys :: M.Map Key
+ (M.Map (Target, Attr) Int)
+ , stLastIdx :: Int
+ , stIds :: Set.Set String
, stNoteNum :: Int
}
instance Default WriterState
where def = WriterState{ stNotes = []
, stRefs = []
+ , stKeys = M.empty
+ , stLastIdx = 0
, stIds = Set.empty
, stNoteNum = 1
}
-- | Convert Pandoc to Markdown.
-writeMarkdown :: WriterOptions -> Pandoc -> String
+writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeMarkdown opts document =
evalMD (pandocToMarkdown opts{
writerWrapText = if isEnabled Ext_hard_line_breaks opts
@@ -106,7 +117,7 @@ writeMarkdown opts document =
-- | Convert Pandoc to plain text (like markdown, but without links,
-- pictures, or inline formatting).
-writePlain :: WriterOptions -> Pandoc -> String
+writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writePlain opts document =
evalMD (pandocToMarkdown opts document) def{ envPlain = True } def
@@ -155,8 +166,7 @@ jsonToYaml (Object hashmap) =
| otherwise -> (k' <> ":") $$ x
(k', Object _, x) -> (k' <> ":") $$ nest 2 x
(_, String "", _) -> empty
- (k', _, x) | k == "meta-json" -> empty
- | otherwise -> k' <> ":" <> space <> hang 2 "" x)
+ (k', _, x) -> k' <> ":" <> space <> hang 2 "" x)
$ sortBy (comparing fst) $ H.toList hashmap
jsonToYaml (Array vec) =
vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec
@@ -171,15 +181,17 @@ jsonToYaml (Number n) = text $ show n
jsonToYaml _ = empty
-- | Return markdown representation of document.
-pandocToMarkdown :: WriterOptions -> Pandoc -> MD String
+pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text
pandocToMarkdown opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
isPlain <- asks envPlain
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToMarkdown opts)
- (fmap (render colwidth) . inlineListToMarkdown opts)
+ let render' :: Doc -> Text
+ render' = render colwidth . chomp
+ metadata <- metaToJSON'
+ (fmap render' . blockListToMarkdown opts)
+ (fmap render' . blockToMarkdown opts . Plain)
meta
let title' = maybe empty text $ getField "title" metadata
let authors' = maybe [] (map text) $ getField "author" metadata
@@ -196,49 +208,51 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
| otherwise -> empty
Nothing -> empty
let headerBlocks = filter isHeaderBlock blocks
- let toc = if writerTableOfContents opts
- then tableOfContents opts headerBlocks
- else empty
+ toc <- if writerTableOfContents opts
+ then render' <$> tableOfContents opts headerBlocks
+ else return ""
-- Strip off final 'references' header if markdown citations enabled
let blocks' = if isEnabled Ext_citations opts
then case reverse blocks of
(Div (_,["references"],_) _):xs -> reverse xs
- _ -> blocks
+ _ -> blocks
else blocks
body <- blockListToMarkdown opts blocks'
notesAndRefs' <- notesAndRefs opts
- let render' :: Doc -> String
- render' = render colwidth
let main = render' $ body <> notesAndRefs'
- let context = defField "toc" (render' toc)
+ let context = -- for backwards compatibility we populate toc
+ -- with the contents of the toc, rather than a
+ -- boolean:
+ defField "toc" toc
+ $ defField "table-of-contents" toc
$ defField "body" main
$ (if isNullMeta meta
then id
else defField "titleblock" (render' titleblock))
- $ metadata
+ $ addVariablesToJSON opts metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return markdown representation of reference key table.
-refsToMarkdown :: WriterOptions -> Refs -> MD Doc
+refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc
refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-- | Return markdown representation of a reference key.
-keyToMarkdown :: WriterOptions
+keyToMarkdown :: PandocMonad m
+ => WriterOptions
-> Ref
- -> MD Doc
-keyToMarkdown opts (label, (src, tit), attr) = do
- label' <- inlineListToMarkdown opts label
+ -> MD m Doc
+keyToMarkdown opts (label', (src, tit), attr) = do
let tit' = if null tit
then empty
else space <> "\"" <> text tit <> "\""
return $ nest 2 $ hang 2
("[" <> label' <> "]:" <> space) (text src <> tit')
- <> linkAttributes opts attr
+ <+> linkAttributes opts attr
-- | Return markdown representation of notes.
-notesToMarkdown :: WriterOptions -> [[Block]] -> MD Doc
+notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc
notesToMarkdown opts notes = do
n <- gets stNoteNum
notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes)
@@ -246,7 +260,7 @@ notesToMarkdown opts notes = do
return $ vsep notes'
-- | Return markdown representation of a note.
-noteToMarkdown :: WriterOptions -> Int -> [Block] -> MD Doc
+noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc
noteToMarkdown opts num blocks = do
contents <- blockListToMarkdown opts blocks
let num' = text $ writerIdentifierPrefix opts ++ show num
@@ -256,45 +270,65 @@ noteToMarkdown opts num blocks = do
let markerSize = 4 + offset num'
let spacer = case writerTabStop opts - markerSize of
n | n > 0 -> text $ replicate n ' '
- _ -> text " "
+ _ -> text " "
return $ if isEnabled Ext_footnotes opts
then hang (writerTabStop opts) (marker <> spacer) contents
else marker <> spacer <> contents
-- | Escape special characters for Markdown.
escapeString :: WriterOptions -> String -> String
-escapeString opts = escapeStringUsing markdownEscapes
- where markdownEscapes = ('<', "&lt;") : ('>', "&gt;") :
- backslashEscapes specialChars
- specialChars =
- (if isEnabled Ext_superscript opts
- then ('^':)
- else id) .
- (if isEnabled Ext_subscript opts
- then ('~':)
- else id) .
- (if isEnabled Ext_tex_math_dollars opts
- then ('$':)
- else id) $
- "\\`*_[]#"
+escapeString _ [] = []
+escapeString opts (c:cs) =
+ case c of
+ '<' | isEnabled Ext_all_symbols_escapable opts ->
+ '\\' : '<' : escapeString opts cs
+ | otherwise -> "&lt;" ++ escapeString opts cs
+ '>' | isEnabled Ext_all_symbols_escapable opts ->
+ '\\' : '>' : escapeString opts cs
+ | otherwise -> "&gt;" ++ escapeString opts cs
+ '@' | isEnabled Ext_citations opts ->
+ case cs of
+ (d:_)
+ | isAlphaNum d || d == '_'
+ -> '\\':'@':escapeString opts cs
+ _ -> '@':escapeString opts cs
+ _ | c `elem` ['\\','`','*','_','[',']','#'] ->
+ '\\':c:escapeString opts cs
+ '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':escapeString opts cs
+ '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs
+ '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs
+ '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs
+ '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
+ '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
+ '-' | isEnabled Ext_smart opts ->
+ case cs of
+ '-':_ -> '\\':'-':escapeString opts cs
+ _ -> '-':escapeString opts cs
+ '.' | isEnabled Ext_smart opts ->
+ case cs of
+ '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest
+ _ -> '.':escapeString opts cs
+ _ -> c : escapeString opts cs
-- | Construct table of contents from list of header blocks.
-tableOfContents :: WriterOptions -> [Block] -> Doc
-tableOfContents opts headers =
- let opts' = opts { writerIgnoreNotes = True }
- contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers
- in evalMD (blockToMarkdown opts' contents) def def
+tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc
+tableOfContents opts headers = do
+ contents <- BulletList <$> mapM (elementToListItem opts) (hierarchicalize headers)
+ blockToMarkdown opts contents
-- | Converts an Element to a list item for a table of contents,
-elementToListItem :: WriterOptions -> Element -> [Block]
+elementToListItem :: PandocMonad m => WriterOptions -> Element -> MD m [Block]
elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs)
- = Plain headerLink :
- [ BulletList (map (elementToListItem opts) subsecs) |
- not (null subsecs) && lev < writerTOCDepth opts ]
- where headerLink = if null ident
- then headerText
- else [Link nullAttr headerText ('#':ident, "")]
-elementToListItem _ (Blk _) = []
+ = do isPlain <- asks envPlain
+ let headerLink = if null ident || isPlain
+ then walk deNote headerText
+ else [Link nullAttr (walk deNote headerText)
+ ('#':ident, "")]
+ listContents <- if null subsecs || lev >= writerTOCDepth opts
+ then return []
+ else mapM (elementToListItem opts) subsecs
+ return [Plain headerLink, BulletList listContents]
+elementToListItem _ (Blk _) = return []
attrsToMarkdown :: Attr -> Doc
attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
@@ -331,10 +365,10 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker
beginsWithOrderedListMarker :: String -> Bool
beginsWithOrderedListMarker str =
case runParser olMarker defaultParserState "para start" (take 10 str) of
- Left _ -> False
- Right _ -> True
+ Left _ -> False
+ Right _ -> True
-notesAndRefs :: WriterOptions -> MD Doc
+notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc
notesAndRefs opts = do
notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts
modify $ \s -> s { stNotes = [] }
@@ -345,16 +379,17 @@ notesAndRefs opts = do
if | writerReferenceLocation opts == EndOfDocument -> empty
| isEmpty notes' && isEmpty refs' -> empty
| otherwise -> blankline
-
+
return $
(if isEmpty notes' then empty else blankline <> notes') <>
(if isEmpty refs' then empty else blankline <> refs') <>
endSpacing
-- | Convert Pandoc block element to markdown.
-blockToMarkdown :: WriterOptions -- ^ Options
+blockToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> MD Doc
+ -> MD m Doc
blockToMarkdown opts blk =
local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $
do doc <- blockToMarkdown' opts blk
@@ -363,17 +398,26 @@ blockToMarkdown opts blk =
then notesAndRefs opts >>= (\d -> return $ doc <> d)
else return doc
-blockToMarkdown' :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> MD Doc
+blockToMarkdown' :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> MD m Doc
blockToMarkdown' _ Null = return empty
blockToMarkdown' opts (Div attrs ils) = do
contents <- blockListToMarkdown opts ils
- return $ if isEnabled Ext_raw_html opts &&
- isEnabled Ext_markdown_in_html_blocks opts
- then tagWithAttrs "div" attrs <> blankline <>
- contents <> blankline <> "</div>" <> blankline
- else contents <> blankline
+ return $
+ case () of
+ _ | isEnabled Ext_fenced_divs opts &&
+ attrs /= nullAttr ->
+ nowrap (text ":::" <+> attrsToMarkdown attrs) $$
+ chomp contents $$
+ text ":::" <> blankline
+ | isEnabled Ext_native_divs opts ||
+ (isEnabled Ext_raw_html opts &&
+ isEnabled Ext_markdown_in_html_blocks opts) ->
+ tagWithAttrs "div" attrs <> blankline <>
+ contents <> blankline <> "</div>" <> blankline
+ | otherwise -> contents <> blankline
blockToMarkdown' opts (Plain inlines) = do
contents <- inlineListToMarkdown opts inlines
-- escape if para starts with ordered list marker
@@ -382,13 +426,28 @@ blockToMarkdown' opts (Plain inlines) = do
then Just $ writerColumns opts
else Nothing
let rendered = render colwidth contents
- let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs
- | otherwise = x : escapeDelimiter xs
- escapeDelimiter [] = []
- let contents' = if isEnabled Ext_all_symbols_escapable opts &&
- not isPlain && beginsWithOrderedListMarker rendered
- then text $ escapeDelimiter rendered
- else contents
+ let escapeMarker (x:xs) | x `elem` (".()" :: String) = '\\':x:xs
+ | otherwise = x : escapeMarker xs
+ escapeMarker [] = []
+ let contents' =
+ case rendered of
+ '%':_ | isEnabled Ext_pandoc_title_block opts &&
+ isEnabled Ext_all_symbols_escapable opts ->
+ "\\" <> contents
+ '+':s:_ | not isPlain && isSpace s -> "\\" <> contents
+ '*':s:_ | not isPlain && isSpace s -> "\\" <> contents
+ '-':s:_ | not isPlain && isSpace s -> "\\" <> contents
+ '+':[] | not isPlain -> "\\" <> contents
+ '*':[] | not isPlain -> "\\" <> contents
+ '-':[] | not isPlain -> "\\" <> contents
+ '|':_ | (isEnabled Ext_line_blocks opts ||
+ isEnabled Ext_pipe_tables opts)
+ && isEnabled Ext_all_symbols_escapable opts
+ -> "\\" <> contents
+ _ | not isPlain && beginsWithOrderedListMarker rendered
+ && isEnabled Ext_all_symbols_escapable opts
+ -> text $ escapeMarker rendered
+ | otherwise -> contents
return $ contents' <> cr
-- title beginning with fig: indicates figure
blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
@@ -401,9 +460,11 @@ blockToMarkdown' opts (LineBlock lns) =
mdLines <- mapM (inlineListToMarkdown opts) lns
return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline
else blockToMarkdown opts $ linesToPara lns
-blockToMarkdown' opts (RawBlock f str)
- | f == "markdown" = return $ text str <> text "\n"
- | f == "html" && isEnabled Ext_raw_html opts = do
+blockToMarkdown' opts b@(RawBlock f str)
+ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+ "markdown_mmd", "markdown_strict"]
+ = return $ text str <> text "\n"
+ | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do
plain <- asks envPlain
return $ if plain
then empty
@@ -415,7 +476,9 @@ blockToMarkdown' opts (RawBlock f str)
return $ if plain
then empty
else text str <> text "\n"
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToMarkdown' opts HorizontalRule = do
return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline
blockToMarkdown' opts (Header level attr inlines) = do
@@ -442,6 +505,8 @@ blockToMarkdown' opts (Header level attr inlines) = do
space <> attrsToMarkdown attr
| otherwise -> empty
contents <- inlineListToMarkdown opts $
+ -- ensure no newlines; see #3736
+ walk lineBreakToSpace $
if level == 1 && plain
then capitalize inlines
else inlines
@@ -497,39 +562,70 @@ blockToMarkdown' opts (BlockQuote blocks) = do
contents <- blockListToMarkdown opts blocks
return $ (prefixed leader contents) <> blankline
blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
+ let numcols = maximum (length aligns : length widths :
+ map length (headers:rows))
caption' <- inlineListToMarkdown opts caption
let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
- then empty
- else blankline <> ": " <> caption' <> blankline
- rawHeaders <- mapM (blockListToMarkdown opts) headers
- rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
+ then blankline
+ else blankline $$ (": " <> caption') $$ blankline
let isLineBreak LineBreak = Any True
isLineBreak _ = Any False
- let isSimple = all (==0) widths &&
- not ( getAny (query isLineBreak (headers:rows)) )
+ let hasLineBreak = getAny . query isLineBreak
+ let isSimpleCell [Plain ils] = not (hasLineBreak ils)
+ isSimpleCell [Para ils ] = not (hasLineBreak ils)
+ isSimpleCell [] = True
+ isSimpleCell _ = False
+ let hasSimpleCells = all isSimpleCell (concat (headers:rows))
+ let isSimple = hasSimpleCells && all (==0) widths
let isPlainBlock (Plain _) = True
isPlainBlock _ = False
let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
- (nst,tbl) <- case True of
- _ | isSimple &&
- isEnabled Ext_simple_tables opts -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isSimple &&
- isEnabled Ext_pipe_tables opts -> fmap (id,) $
- pipeTable (all null headers) aligns rawHeaders rawRows
- | not hasBlocks &&
- isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isEnabled Ext_grid_tables opts -> fmap (id,) $
- gridTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isEnabled Ext_raw_html opts -> fmap (id,) $
- return $ text $ writeHtmlString def
- $ Pandoc nullMeta [t]
- | otherwise -> return $ (id, text "[TABLE]")
- return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
+ let padRow r = case numcols - length r of
+ x | x > 0 -> r ++ replicate x empty
+ | otherwise -> r
+ let aligns' = case numcols - length aligns of
+ x | x > 0 -> aligns ++ replicate x AlignDefault
+ | otherwise -> aligns
+ let widths' = case numcols - length widths of
+ x | x > 0 -> widths ++ replicate x 0.0
+ | otherwise -> widths
+ (nst,tbl) <-
+ case True of
+ _ | isSimple &&
+ isEnabled Ext_simple_tables opts -> do
+ rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts))
+ rows
+ (nest 2,) <$> pandocTable opts False (all null headers)
+ aligns' widths' rawHeaders rawRows
+ | isSimple &&
+ isEnabled Ext_pipe_tables opts -> do
+ rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts))
+ rows
+ (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows
+ | not hasBlocks &&
+ isEnabled Ext_multiline_tables opts -> do
+ rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts))
+ rows
+ (nest 2,) <$> pandocTable opts True (all null headers)
+ aligns' widths' rawHeaders rawRows
+ | isEnabled Ext_grid_tables opts &&
+ writerColumns opts >= 8 * numcols -> (id,) <$>
+ gridTable opts blockListToMarkdown
+ (all null headers) aligns' widths' headers rows
+ | isEnabled Ext_raw_html opts -> fmap (id,) $
+ (text . T.unpack) <$>
+ (writeHtml5String def $ Pandoc nullMeta [t])
+ | hasSimpleCells &&
+ isEnabled Ext_pipe_tables opts -> do
+ rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts))
+ rows
+ (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows
+ | otherwise -> return $ (id, text "[TABLE]")
+ return $ nst $ tbl $$ caption'' $$ blankline
blockToMarkdown' opts (BulletList items) = do
contents <- inList $ mapM (bulletListItemToMarkdown opts) items
return $ cat contents <> blankline
@@ -550,7 +646,7 @@ blockToMarkdown' opts (DefinitionList items) = do
contents <- inList $ mapM (definitionListItemToMarkdown opts) items
return $ cat contents <> blankline
-inList :: MD a -> MD a
+inList :: Monad m => MD m a -> MD m a
inList p = local (\env -> env {envInList = True}) p
addMarkdownAttribute :: String -> String
@@ -562,7 +658,7 @@ addMarkdownAttribute s =
x /= "markdown"]
_ -> s
-pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD Doc
+pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc
pipeTable headless aligns rawHeaders rawRows = do
let sp = text " "
let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
@@ -590,9 +686,10 @@ pipeTable headless aligns rawHeaders rawRows = do
let body = vcat $ map torow rawRows
return $ header $$ border $$ body
-pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> MD Doc
-pandocTable opts headless aligns widths rawHeaders rawRows = do
+pandocTable :: PandocMonad m
+ => WriterOptions -> Bool -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> MD m Doc
+pandocTable opts multiline headless aligns widths rawHeaders rawRows = do
let isSimple = all (==0) widths
let alignHeader alignment = case alignment of
AlignLeft -> lblock
@@ -609,23 +706,21 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do
let minNumChars = (+ 2) . maximum . map minOffset
let columns = transpose (rawHeaders : rawRows)
-- minimal column width without wrapping a single word
- let noWordWrapWidth
- | writerWrapText opts == WrapAuto
- = fromIntegral $ maximum (map minNumChars columns)
- | otherwise = fromIntegral $ maximum (map numChars columns)
- let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w)
- (noWordWrapWidth * w / minimum widths)
+ let relWidth w col =
+ max (floor $ fromIntegral (writerColumns opts - 1) * w)
+ (if writerWrapText opts == WrapAuto
+ then minNumChars col
+ else numChars col)
let widthsInChars
| isSimple = map numChars columns
- | otherwise = map relWidth widths
+ | otherwise = zipWith relWidth widths columns
let makeRow = hcat . intersperse (lblock 1 (text " ")) .
(zipWith3 alignHeader aligns widthsInChars)
let rows' = map makeRow rawRows
let head' = makeRow rawHeaders
- let maxRowHeight = maximum $ map height (head':rows')
let underline = cat $ intersperse (text " ") $
map (\width -> text (replicate width '-')) widthsInChars
- let border = if maxRowHeight > 1
+ let border = if multiline
then text (replicate (sum widthsInChars +
length widthsInChars - 1) '-')
else if headless
@@ -634,7 +729,7 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do
let head'' = if headless
then empty
else border <> cr <> head'
- let body = if maxRowHeight > 1
+ let body = if multiline
then vsep rows'
else vcat rows'
let bottom = if headless
@@ -642,62 +737,15 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do
else border
return $ head'' $$ underline $$ body $$ bottom
-gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> MD Doc
-gridTable opts headless aligns widths headers' rawRows = do
- let numcols = length headers'
- let widths' = if all (==0) widths
- then replicate numcols (1.0 / fromIntegral numcols)
- else widths
- let widthsInChars = map
- ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths'
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = maximum (1 : map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = chomp $ hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map (makeRow . map chomp) rawRows
- let borderpart ch align widthInChars =
- let widthInChars' = if widthInChars < 1 then 1 else widthInChars
- in (if (align == AlignLeft || align == AlignCenter)
- then char ':'
- else char ch) <>
- text (replicate widthInChars' ch) <>
- (if (align == AlignRight || align == AlignCenter)
- then char ':'
- else char ch)
- let border ch aligns' widthsInChars' =
- char '+' <>
- hcat (intersperse (char '+') (zipWith (borderpart ch)
- aligns' widthsInChars')) <> char '+'
- let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars)
- rows'
- let head'' = if headless
- then empty
- else head' $$ border '=' aligns widthsInChars
- if headless
- then return $
- border '-' aligns widthsInChars $$
- body $$
- border '-' (repeat AlignDefault) widthsInChars
- else return $
- border '-' (repeat AlignDefault) widthsInChars $$
- head'' $$
- body $$
- border '-' (repeat AlignDefault) widthsInChars
-
itemEndsWithTightList :: [Block] -> Bool
itemEndsWithTightList bs =
case bs of
[Plain _, BulletList xs] -> isTightList xs
[Plain _, OrderedList _ xs] -> isTightList xs
- _ -> False
+ _ -> False
-- | Convert bullet list item (list of blocks) to markdown.
-bulletListItemToMarkdown :: WriterOptions -> [Block] -> MD Doc
+bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc
bulletListItemToMarkdown opts bs = do
contents <- blockListToMarkdown opts bs
let sps = replicate (writerTabStop opts - 2) ' '
@@ -709,15 +757,16 @@ bulletListItemToMarkdown opts bs = do
return $ hang (writerTabStop opts) start $ contents' <> cr
-- | Convert ordered list item (a list of blocks) to markdown.
-orderedListItemToMarkdown :: WriterOptions -- ^ options
+orderedListItemToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ options
-> String -- ^ list item marker
-> [Block] -- ^ list item (list of blocks)
- -> MD Doc
+ -> MD m Doc
orderedListItemToMarkdown opts marker bs = do
contents <- blockListToMarkdown opts bs
let sps = case length marker - writerTabStop opts of
n | n > 0 -> text $ replicate n ' '
- _ -> text " "
+ _ -> text " "
let start = text marker <> sps
-- remove trailing blank line if item ends with a tight list
let contents' = if itemEndsWithTightList bs
@@ -726,9 +775,10 @@ orderedListItemToMarkdown opts marker bs = do
return $ hang (writerTabStop opts) start $ contents' <> cr
-- | Convert definition list item (label, list of blocks) to markdown.
-definitionListItemToMarkdown :: WriterOptions
+definitionListItemToMarkdown :: PandocMonad m
+ => WriterOptions
-> ([Inline],[[Block]])
- -> MD Doc
+ -> MD m Doc
definitionListItemToMarkdown opts (label, defs) = do
labelText <- inlineListToMarkdown opts label
defs' <- mapM (mapM (blockToMarkdown opts)) defs
@@ -739,7 +789,7 @@ definitionListItemToMarkdown opts (label, defs) = do
let leader = if isPlain then " " else ": "
let sps = case writerTabStop opts - 3 of
n | n > 0 -> text $ replicate n ' '
- _ -> text " "
+ _ -> text " "
if isEnabled Ext_compact_definition_lists opts
then do
let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
@@ -750,7 +800,7 @@ definitionListItemToMarkdown opts (label, defs) = do
$ vcat d <> cr) defs'
let isTight = case defs of
((Plain _ : _): _) -> True
- _ -> False
+ _ -> False
return $ blankline <> nowrap labelText <>
(if isTight then cr else blankline) <> contents <> blankline
else do
@@ -758,75 +808,129 @@ definitionListItemToMarkdown opts (label, defs) = do
vsep (map vsep defs') <> blankline
-- | Convert list of Pandoc block elements to markdown.
-blockListToMarkdown :: WriterOptions -- ^ Options
+blockListToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> MD Doc
-blockListToMarkdown opts blocks =
+ -> MD m Doc
+blockListToMarkdown opts blocks = do
+ inlist <- asks envInList
+ isPlain <- asks envPlain
+ -- a) insert comment between list and indented code block, or the
+ -- code block will be treated as a list continuation paragraph
+ -- b) change Plain to Para unless it's followed by a RawBlock
+ -- or has a list as its parent (#3487)
+ let fixBlocks (b : CodeBlock attr x : rest)
+ | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
+ && isListBlock b = b : commentSep : CodeBlock attr x :
+ fixBlocks rest
+ fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (Plain ils : bs@(RawBlock{}:_)) =
+ Plain ils : fixBlocks bs
+ fixBlocks (Plain ils : bs) | inlist =
+ Plain ils : fixBlocks bs
+ fixBlocks (Plain ils : bs) =
+ Para ils : fixBlocks bs
+ fixBlocks (x : xs) = x : fixBlocks xs
+ fixBlocks [] = []
+ isListBlock (BulletList _) = True
+ isListBlock (OrderedList _ _) = True
+ isListBlock (DefinitionList _) = True
+ isListBlock _ = False
+ commentSep = if isPlain
+ then Null
+ else if isEnabled Ext_raw_html opts
+ then RawBlock "html" "<!-- -->\n"
+ else RawBlock "markdown" "&nbsp;\n"
mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
- -- insert comment between list and indented code block, or the
- -- code block will be treated as a list continuation paragraph
- where fixBlocks (b : CodeBlock attr x : rest)
- | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
- && isListBlock b = b : commentSep : CodeBlock attr x :
- fixBlocks rest
- fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (x : xs) = x : fixBlocks xs
- fixBlocks [] = []
- isListBlock (BulletList _) = True
- isListBlock (OrderedList _ _) = True
- isListBlock (DefinitionList _) = True
- isListBlock _ = False
- commentSep = if isEnabled Ext_raw_html opts
- then RawBlock "html" "<!-- -->\n"
- else RawBlock "markdown" "&nbsp;"
+
+getKey :: Doc -> Key
+getKey = toKey . render Nothing
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
-getReference :: Attr -> [Inline] -> Target -> MD [Inline]
+getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc
getReference attr label target = do
- st <- get
- case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of
+ refs <- gets stRefs
+ case find (\(_,t,a) -> t == target && a == attr) refs of
Just (ref, _, _) -> return ref
Nothing -> do
- let label' = case find (\(l,_,_) -> l == label) (stRefs st) of
- Just _ -> -- label is used; generate numerical label
- case find (\n -> notElem [Str (show n)]
- (map (\(l,_,_) -> l) (stRefs st)))
- [1..(10000 :: Integer)] of
- Just x -> [Str (show x)]
- Nothing -> error "no unique label"
- Nothing -> label
- modify (\s -> s{ stRefs = (label', target, attr) : stRefs st })
- return label'
+ keys <- gets stKeys
+ case M.lookup (getKey label) keys of
+ Nothing -> do -- no other refs with this label
+ (lab', idx) <- if isEmpty label
+ then do
+ i <- (+ 1) <$> gets stLastIdx
+ modify $ \s -> s{ stLastIdx = i }
+ return (text (show i), i)
+ else return (label, 0)
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs,
+ stKeys = M.insert (getKey label)
+ (M.insert (target, attr) idx mempty)
+ (stKeys s) })
+ return lab'
+
+ Just km -> do -- we have refs with this label
+ case M.lookup (target, attr) km of
+ Just i -> do
+ let lab' = label <> if i == 0
+ then mempty
+ else text (show i)
+ -- make sure it's in stRefs; it may be
+ -- a duplicate that was printed in a previous
+ -- block:
+ when ((lab', target, attr) `notElem` refs) $
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs })
+ return lab'
+ Nothing -> do -- but this one is to a new target
+ i <- (+ 1) <$> gets stLastIdx
+ modify $ \s -> s{ stLastIdx = i }
+ let lab' = text (show i)
+ modify (\s -> s{
+ stRefs = (lab', target, attr) : refs,
+ stKeys = M.insert (getKey label)
+ (M.insert (target, attr) i km)
+ (stKeys s) })
+ return lab'
-- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: WriterOptions -> [Inline] -> MD Doc
+inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc
inlineListToMarkdown opts lst = do
inlist <- asks envInList
go (if inlist then avoidBadWrapsInList lst else lst)
where go [] = return empty
go (i:is) = case i of
(Link _ _ _) -> case is of
- -- If a link is followed by another link or '[' we don't shortcut
- (Link _ _ _):_ -> unshortcutable
- Space:(Link _ _ _):_ -> unshortcutable
- Space:(Str('[':_)):_ -> unshortcutable
- Space:(RawInline _ ('[':_)):_ -> unshortcutable
- Space:(Cite _ _):_ -> unshortcutable
- SoftBreak:(Link _ _ _):_ -> unshortcutable
- SoftBreak:(Str('[':_)):_ -> unshortcutable
+ -- If a link is followed by another link, or '[', '(' or ':'
+ -- then we don't shortcut
+ (Link _ _ _):_ -> unshortcutable
+ Space:(Link _ _ _):_ -> unshortcutable
+ Space:(Str('[':_)):_ -> unshortcutable
+ Space:(RawInline _ ('[':_)):_ -> unshortcutable
+ Space:(Cite _ _):_ -> unshortcutable
+ SoftBreak:(Link _ _ _):_ -> unshortcutable
+ SoftBreak:(Str('[':_)):_ -> unshortcutable
SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable
- SoftBreak:(Cite _ _):_ -> unshortcutable
- (Cite _ _):_ -> unshortcutable
- Str ('[':_):_ -> unshortcutable
- (RawInline _ ('[':_)):_ -> unshortcutable
- (RawInline _ (' ':'[':_)):_ -> unshortcutable
- _ -> shortcutable
+ SoftBreak:(Cite _ _):_ -> unshortcutable
+ LineBreak:(Link _ _ _):_ -> unshortcutable
+ LineBreak:(Str('[':_)):_ -> unshortcutable
+ LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable
+ LineBreak:(Cite _ _):_ -> unshortcutable
+ (Cite _ _):_ -> unshortcutable
+ Str ('[':_):_ -> unshortcutable
+ Str ('(':_):_ -> unshortcutable
+ Str (':':_):_ -> unshortcutable
+ (RawInline _ ('[':_)):_ -> unshortcutable
+ (RawInline _ ('(':_)):_ -> unshortcutable
+ (RawInline _ (':':_)):_ -> unshortcutable
+ (RawInline _ (' ':'[':_)):_ -> unshortcutable
+ _ -> shortcutable
_ -> shortcutable
where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
unshortcutable = do
@@ -836,9 +940,9 @@ inlineListToMarkdown opts lst = do
fmap (iMark <>) (go is)
isSp :: Inline -> Bool
-isSp Space = True
+isSp Space = True
isSp SoftBreak = True
-isSp _ = False
+isSp _ = False
avoidBadWrapsInList :: [Inline] -> [Inline]
avoidBadWrapsInList [] = []
@@ -857,7 +961,7 @@ avoidBadWrapsInList (s:Str cs:[])
avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
isOrderedListMarker :: String -> Bool
-isOrderedListMarker xs = (last xs `elem` ['.',')']) &&
+isOrderedListMarker xs = not (null xs) && (last xs `elem` ['.',')']) &&
isRight (runParser (anyOrderedListMarker >> eof)
defaultParserState "" xs)
@@ -866,27 +970,30 @@ isRight (Right _) = True
isRight (Left _) = False
-- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: WriterOptions -> Inline -> MD Doc
+inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc
inlineToMarkdown opts (Span attrs ils) = do
plain <- asks envPlain
contents <- inlineListToMarkdown opts ils
return $ case plain of
True -> contents
- False | isEnabled Ext_bracketed_spans opts ->
- "[" <> contents <> "]" <>
- if attrs == nullAttr
- then "{}"
- else linkAttributes opts attrs
+ False | attrs == nullAttr -> contents
+ | isEnabled Ext_bracketed_spans opts ->
+ let attrs' = if attrs /= nullAttr
+ then attrsToMarkdown attrs
+ else empty
+ in "[" <> contents <> "]" <> attrs'
| isEnabled Ext_raw_html opts ||
isEnabled Ext_native_spans opts ->
tagWithAttrs "span" attrs <> contents <> text "</span>"
| otherwise -> contents
+inlineToMarkdown _ (Emph []) = return empty
inlineToMarkdown opts (Emph lst) = do
plain <- asks envPlain
contents <- inlineListToMarkdown opts lst
return $ if plain
then "_" <> contents <> "_"
else "*" <> contents <> "*"
+inlineToMarkdown _ (Strong []) = return empty
inlineToMarkdown opts (Strong lst) = do
plain <- asks envPlain
if plain
@@ -894,6 +1001,7 @@ inlineToMarkdown opts (Strong lst) = do
else do
contents <- inlineListToMarkdown opts lst
return $ "**" <> contents <> "**"
+inlineToMarkdown _ (Strikeout []) = return empty
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
return $ if isEnabled Ext_strikeout opts
@@ -901,6 +1009,7 @@ inlineToMarkdown opts (Strikeout lst) = do
else if isEnabled Ext_raw_html opts
then "<s>" <> contents <> "</s>"
else contents
+inlineToMarkdown _ (Superscript []) = return empty
inlineToMarkdown opts (Superscript lst) =
local (\env -> env {envEscapeSpaces = True}) $ do
contents <- inlineListToMarkdown opts lst
@@ -908,14 +1017,12 @@ inlineToMarkdown opts (Superscript lst) =
then "^" <> contents <> "^"
else if isEnabled Ext_raw_html opts
then "<sup>" <> contents <> "</sup>"
- else case (render Nothing contents) of
- ds | all (\d -> d >= '0' && d <= '9') ds
- -> text (map toSuperscript ds)
- _ -> contents
- where toSuperscript '1' = '\x00B9'
- toSuperscript '2' = '\x00B2'
- toSuperscript '3' = '\x00B3'
- toSuperscript c = chr (0x2070 + (ord c - 48))
+ else
+ let rendered = render Nothing contents
+ in case mapM toSuperscript rendered of
+ Just r -> text r
+ Nothing -> text $ "^(" ++ rendered ++ ")"
+inlineToMarkdown _ (Subscript []) = return empty
inlineToMarkdown opts (Subscript lst) =
local (\env -> env {envEscapeSpaces = True}) $ do
contents <- inlineListToMarkdown opts lst
@@ -923,27 +1030,27 @@ inlineToMarkdown opts (Subscript lst) =
then "~" <> contents <> "~"
else if isEnabled Ext_raw_html opts
then "<sub>" <> contents <> "</sub>"
- else case (render Nothing contents) of
- ds | all (\d -> d >= '0' && d <= '9') ds
- -> text (map toSubscript ds)
- _ -> contents
- where toSubscript c = chr (0x2080 + (ord c - 48))
+ else
+ let rendered = render Nothing contents
+ in case mapM toSubscript rendered of
+ Just r -> text r
+ Nothing -> text $ "_(" ++ rendered ++ ")"
inlineToMarkdown opts (SmallCaps lst) = do
plain <- asks envPlain
if not plain &&
(isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
- then do
- contents <- inlineListToMarkdown opts lst
- return $ tagWithAttrs "span"
- ("",[],[("style","font-variant:small-caps;")])
- <> contents <> text "</span>"
+ then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst)
else inlineListToMarkdown opts $ capitalize lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ "‘" <> contents <> "’"
+ return $ if isEnabled Ext_smart opts
+ then "'" <> contents <> "'"
+ else "‘" <> contents <> "’"
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ "“" <> contents <> "”"
+ return $ if isEnabled Ext_smart opts
+ then "\"" <> contents <> "\""
+ else "“" <> contents <> "”"
inlineToMarkdown opts (Code attr str) = do
let tickGroups = filter (\s -> '`' `elem` s) $ group str
let longest = if null tickGroups
@@ -960,14 +1067,17 @@ inlineToMarkdown opts (Code attr str) = do
else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
inlineToMarkdown opts (Str str) = do
isPlain <- asks envPlain
- if isPlain
- then return $ text str
- else return $ text $ escapeString opts str
+ let str' = (if isEnabled Ext_smart opts
+ then unsmartify opts
+ else id) $
+ if isPlain
+ then str
+ else escapeString opts str
+ return $ text str'
inlineToMarkdown opts (Math InlineMath str) =
case writerHTMLMathMethod opts of
- WebTeX url ->
- inlineToMarkdown opts (Image nullAttr [Str str]
- (url ++ urlEncode str, str))
+ WebTeX url -> inlineToMarkdown opts
+ (Image nullAttr [Str str] (url ++ urlEncode str, str))
_ | isEnabled Ext_tex_math_dollars opts ->
return $ "$" <> text str <> "$"
| isEnabled Ext_tex_math_single_backslash opts ->
@@ -976,9 +1086,9 @@ inlineToMarkdown opts (Math InlineMath str) =
return $ "\\\\(" <> text str <> "\\\\)"
| otherwise -> do
plain <- asks envPlain
- inlineListToMarkdown opts $
- (if plain then makeMathPlainer else id) $
- texMathToInlines InlineMath str
+ texMathToInlines InlineMath str >>=
+ inlineListToMarkdown opts .
+ (if plain then makeMathPlainer else id)
inlineToMarkdown opts (Math DisplayMath str) =
case writerHTMLMathMethod opts of
WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
@@ -991,15 +1101,19 @@ inlineToMarkdown opts (Math DisplayMath str) =
| isEnabled Ext_tex_math_double_backslash opts ->
return $ "\\\\[" <> text str <> "\\\\]"
| otherwise -> (\x -> cr <> x <> cr) `fmap`
- inlineListToMarkdown opts (texMathToInlines DisplayMath str)
-inlineToMarkdown opts (RawInline f str) = do
+ (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
+inlineToMarkdown opts il@(RawInline f str) = do
plain <- asks envPlain
- if not plain &&
- ( f == "markdown" ||
+ if (plain && f == "plain") || (not plain &&
+ ( f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+ "markdown_mmd", "markdown_strict"] ||
(isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) ||
- (isEnabled Ext_raw_html opts && f == "html") )
+ (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"])
+ ))
then return $ text str
- else return empty
+ else do
+ report $ InlineNotRendered il
+ return empty
inlineToMarkdown opts (LineBreak) = do
plain <- asks envPlain
if plain || isEnabled Ext_hard_line_breaks opts
@@ -1052,7 +1166,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit))
| isEnabled Ext_raw_html opts &&
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
- return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [lnk]]
+ (text . T.unpack . T.strip) <$>
+ writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
| otherwise = do
plain <- asks envPlain
linktext <- inlineListToMarkdown opts txt
@@ -1063,20 +1178,20 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit))
let useAuto = isURI src &&
case txt of
[Str s] | escapeURI s == srcSuffix -> True
- _ -> False
+ _ -> False
let useRefLinks = writerReferenceLinks opts && not useAuto
shortcutable <- asks envRefShortcutable
let useShortcutRefLinks = shortcutable &&
isEnabled Ext_shortcut_reference_links opts
- ref <- if useRefLinks then getReference attr txt (src, tit) else return []
- reftext <- inlineListToMarkdown opts ref
+ reftext <- if useRefLinks then getReference attr linktext (src, tit)
+ else return empty
return $ if useAuto
then if plain
then text srcSuffix
else "<" <> text srcSuffix <> ">"
else if useRefLinks
then let first = "[" <> linktext <> "]"
- second = if txt == ref
+ second = if getKey linktext == getKey reftext
then if useShortcutRefLinks
then ""
else "[]"
@@ -1091,7 +1206,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit))
| isEnabled Ext_raw_html opts &&
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
- return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [img]]
+ (text . T.unpack . T.strip) <$>
+ writeHtml5String def (Pandoc nullMeta [Plain [img]])
| otherwise = do
plain <- asks envPlain
let txt = if null alternate || alternate == [Str source]
@@ -1114,4 +1230,36 @@ makeMathPlainer :: [Inline] -> [Inline]
makeMathPlainer = walk go
where
go (Emph xs) = Span nullAttr xs
- go x = x
+ go x = x
+
+toSuperscript :: Char -> Maybe Char
+toSuperscript '1' = Just '\x00B9'
+toSuperscript '2' = Just '\x00B2'
+toSuperscript '3' = Just '\x00B3'
+toSuperscript '+' = Just '\x207A'
+toSuperscript '-' = Just '\x207B'
+toSuperscript '=' = Just '\x207C'
+toSuperscript '(' = Just '\x207D'
+toSuperscript ')' = Just '\x207E'
+toSuperscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2070 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
+
+toSubscript :: Char -> Maybe Char
+toSubscript '+' = Just '\x208A'
+toSubscript '-' = Just '\x208B'
+toSubscript '=' = Just '\x208C'
+toSubscript '(' = Just '\x208D'
+toSubscript ')' = Just '\x208E'
+toSubscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2080 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
+
+lineBreakToSpace :: Inline -> Inline
+lineBreakToSpace LineBreak = Space
+lineBreakToSpace SoftBreak = Space
+lineBreakToSpace x = x
diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs
new file mode 100644
index 000000000..477f5a0b1
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Math.hs
@@ -0,0 +1,56 @@
+module Text.Pandoc.Writers.Math
+ ( texMathToInlines
+ , convertMath
+ , defaultMathJaxURL
+ , defaultKaTeXURL
+ )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.TeXMath (DisplayType (..), Exp, readTeX, writePandoc)
+
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
+-- can't be converted.
+texMathToInlines :: PandocMonad m
+ => MathType
+ -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m [Inline]
+texMathToInlines mt inp = do
+ res <- convertMath writePandoc mt inp
+ case res of
+ Right (Just ils) -> return ils
+ Right Nothing -> do
+ report $ CouldNotConvertTeXMath inp ""
+ return [mkFallback mt inp]
+ Left il -> return [il]
+
+mkFallback :: MathType -> String -> Inline
+mkFallback mt str = Str (delim ++ str ++ delim)
+ where delim = case mt of
+ DisplayMath -> "$$"
+ InlineMath -> "$"
+
+-- | Converts a raw TeX math formula using a writer function,
+-- issuing a warning and producing a fallback (a raw string)
+-- on failure.
+convertMath :: PandocMonad m
+ => (DisplayType -> [Exp] -> a) -> MathType -> String
+ -> m (Either Inline a)
+convertMath writer mt str =
+ case writer dt <$> readTeX str of
+ Right r -> return (Right r)
+ Left e -> do
+ report $ CouldNotConvertTeXMath str e
+ return (Left $ mkFallback mt str)
+ where dt = case mt of
+ DisplayMath -> DisplayBlock
+ InlineMath -> DisplayInline
+
+defaultMathJaxURL :: String
+defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/"
+
+defaultKaTeXURL :: String
+defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.8.3/"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 78d4651e7..2470d9200 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.MediaWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane
+ Copyright : Copyright (C) 2008-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,41 +30,44 @@ Conversion of 'Pandoc' documents to MediaWiki markup.
MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
-}
module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.List (intercalate)
+import qualified Data.Set as Set
+import Data.Text (Text, pack)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
import Text.Pandoc.Pretty (render)
-import Text.Pandoc.ImageSize
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intersect, intercalate )
-import Network.URI ( isURI )
-import Control.Monad.Reader
-import Control.Monad.State
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML (escapeStringForXML)
data WriterState = WriterState {
- stNotes :: Bool -- True if there are notes
- , stOptions :: WriterOptions -- writer options
+ stNotes :: Bool -- True if there are notes
+ , stOptions :: WriterOptions -- writer options
}
data WriterReader = WriterReader {
- options :: WriterOptions -- Writer options
- , listLevel :: String -- String at beginning of list items, e.g. "**"
- , useTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ options :: WriterOptions -- Writer options
+ , listLevel :: String -- String at beginning of list items, e.g. "**"
+ , useTags :: Bool -- True if we should use HTML tags because we're in a complex list
}
-type MediaWikiWriter = ReaderT WriterReader (State WriterState)
+type MediaWikiWriter m = ReaderT WriterReader (StateT WriterState m)
-- | Convert Pandoc to MediaWiki.
-writeMediaWiki :: WriterOptions -> Pandoc -> String
+writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeMediaWiki opts document =
let initialState = WriterState { stNotes = False, stOptions = opts }
env = WriterReader { options = opts, listLevel = [], useTags = False }
- in evalState (runReaderT (pandocToMediaWiki document) env) initialState
+ in evalStateT (runReaderT (pandocToMediaWiki document) env) initialState
-- | Return MediaWiki representation of document.
-pandocToMediaWiki :: Pandoc -> MediaWikiWriter String
+pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text
pandocToMediaWiki (Pandoc meta blocks) = do
opts <- asks options
metadata <- metaToJSON opts
@@ -79,8 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do
let main = body ++ notes
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts) metadata
- return $ case writerTemplate opts of
- Nothing -> main
+ pack <$> case writerTemplate opts of
+ Nothing -> return main
Just tpl -> renderTemplate' tpl context
-- | Escape special characters for MediaWiki.
@@ -88,8 +91,9 @@ escapeString :: String -> String
escapeString = escapeStringForXML
-- | Convert Pandoc block element to MediaWiki.
-blockToMediaWiki :: Block -- ^ Block element
- -> MediaWikiWriter String
+blockToMediaWiki :: PandocMonad m
+ => Block -- ^ Block element
+ -> MediaWikiWriter m String
blockToMediaWiki Null = return ""
@@ -123,10 +127,10 @@ blockToMediaWiki (Para inlines) = do
blockToMediaWiki (LineBlock lns) =
blockToMediaWiki $ linesToPara lns
-blockToMediaWiki (RawBlock f str)
+blockToMediaWiki b@(RawBlock f str)
| f == Format "mediawiki" = return str
| f == Format "html" = return str
- | otherwise = return ""
+ | otherwise = "" <$ report (BlockNotRendered b)
blockToMediaWiki HorizontalRule = return "\n-----\n"
@@ -136,23 +140,15 @@ blockToMediaWiki (Header level _ inlines) = do
return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
blockToMediaWiki (CodeBlock (_,classes,_) str) = do
- let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
- "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
- "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
- "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
- "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
- "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
- "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
- "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
- "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
+ let at = Set.fromList classes `Set.intersection` highlightingLangs
return $
- if null at
- then "<pre" ++ (if null classes
- then ">"
- else " class=\"" ++ unwords classes ++ "\">") ++
- escapeString str ++ "</pre>"
- else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>"
- -- note: no escape!
+ case Set.toList at of
+ [] -> "<pre" ++ (if null classes
+ then ">"
+ else " class=\"" ++ unwords classes ++ "\">") ++
+ escapeString str ++ "</pre>"
+ (l:_) -> "<source lang=\"" ++ l ++ "\">" ++ str ++ "</source>"
+ -- note: no escape! even for <!
blockToMediaWiki (BlockQuote blocks) = do
contents <- blockListToMediaWiki blocks
@@ -218,7 +214,7 @@ listAttribsToString (startnum, numstyle, _) =
else "")
-- | Convert bullet or ordered list item (list of blocks) to MediaWiki.
-listItemToMediaWiki :: [Block] -> MediaWikiWriter String
+listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m String
listItemToMediaWiki items = do
contents <- blockListToMediaWiki items
tags <- asks useTags
@@ -229,8 +225,9 @@ listItemToMediaWiki items = do
return $ marker ++ " " ++ contents
-- | Convert definition list item (label, list of blocks) to MediaWiki.
-definitionListItemToMediaWiki :: ([Inline],[[Block]])
- -> MediaWikiWriter String
+definitionListItemToMediaWiki :: PandocMonad m
+ => ([Inline],[[Block]])
+ -> MediaWikiWriter m String
definitionListItemToMediaWiki (label, items) = do
labelText <- inlineListToMediaWiki label
contents <- mapM blockListToMediaWiki items
@@ -259,18 +256,18 @@ isSimpleListItem :: [Block] -> Bool
isSimpleListItem [] = True
isSimpleListItem [x] =
case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- DefinitionList _ -> isSimpleList x
- _ -> False
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
isSimpleListItem [x, y] | isPlainOrPara x =
case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- DefinitionList _ -> isSimpleList y
- _ -> False
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ _ -> False
isSimpleListItem _ = False
isPlainOrPara :: Block -> Bool
@@ -284,20 +281,22 @@ vcat = intercalate "\n"
-- Auxiliary functions for tables:
-tableRowToMediaWiki :: Bool
+tableRowToMediaWiki :: PandocMonad m
+ => Bool
-> [Alignment]
-> [Double]
-> (Int, [[Block]])
- -> MediaWikiWriter String
+ -> MediaWikiWriter m String
tableRowToMediaWiki headless alignments widths (rownum, cells) = do
cells' <- mapM (tableCellToMediaWiki headless rownum)
$ zip3 alignments widths cells
return $ unlines cells'
-tableCellToMediaWiki :: Bool
+tableCellToMediaWiki :: PandocMonad m
+ => Bool
-> Int
-> (Alignment, Double, [Block])
- -> MediaWikiWriter String
+ -> MediaWikiWriter m String
tableCellToMediaWiki headless rownum (alignment, width, bs) = do
contents <- blockListToMediaWiki bs
let marker = if rownum == 1 && not headless then "!" else "|"
@@ -322,13 +321,13 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
-imageToMediaWiki :: Attr -> MediaWikiWriter String
+imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m String
imageToMediaWiki attr = do
opts <- gets stOptions
let (_, cls, _) = attr
toPx = fmap (showInPixel opts) . checkPct
checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
+ checkPct maybeDim = maybeDim
go (Just w) Nothing = '|':w ++ "px"
go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px"
go Nothing (Just h) = "|x" ++ h ++ "px"
@@ -340,18 +339,19 @@ imageToMediaWiki attr = do
return $ dims ++ classes
-- | Convert list of Pandoc block elements to MediaWiki.
-blockListToMediaWiki :: [Block] -- ^ List of block elements
- -> MediaWikiWriter String
+blockListToMediaWiki :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> MediaWikiWriter m String
blockListToMediaWiki blocks =
fmap vcat $ mapM blockToMediaWiki blocks
-- | Convert list of Pandoc inline elements to MediaWiki.
-inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String
+inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m String
inlineListToMediaWiki lst =
fmap concat $ mapM inlineToMediaWiki lst
-- | Convert Pandoc inline element to MediaWiki.
-inlineToMediaWiki :: Inline -> MediaWikiWriter String
+inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m String
inlineToMediaWiki (Span attrs ils) = do
contents <- inlineListToMediaWiki ils
@@ -394,22 +394,28 @@ inlineToMediaWiki (Code _ str) =
inlineToMediaWiki (Str str) = return $ escapeString str
-inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>"
- -- note: str should NOT be escaped
+inlineToMediaWiki (Math mt str) = return $
+ "<math display=\"" ++
+ (if mt == DisplayMath then "block" else "inline") ++
+ "\">" ++ str ++ "</math>"
+ -- note: str should NOT be escaped
-inlineToMediaWiki (RawInline f str)
+inlineToMediaWiki il@(RawInline f str)
| f == Format "mediawiki" = return str
| f == Format "html" = return str
- | otherwise = return ""
+ | otherwise = "" <$ report (InlineNotRendered il)
-inlineToMediaWiki (LineBreak) = return "<br />\n"
+inlineToMediaWiki LineBreak = return "<br />\n"
inlineToMediaWiki SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
+ listlevel <- asks listLevel
case wrapText of
WrapAuto -> return " "
WrapNone -> return " "
- WrapPreserve -> return "\n"
+ WrapPreserve -> if null listlevel
+ then return "\n"
+ else return " "
inlineToMediaWiki Space = return " "
@@ -437,5 +443,636 @@ inlineToMediaWiki (Image attr alt (source, tit)) = do
inlineToMediaWiki (Note contents) = do
contents' <- blockListToMediaWiki contents
modify (\s -> s { stNotes = True })
- return $ "<ref>" ++ contents' ++ "</ref>"
- -- note - may not work for notes with multiple blocks
+ return $ "<ref>" ++ stripTrailingNewlines contents' ++ "</ref>"
+ -- note - does not work for notes with multiple blocks
+
+highlightingLangs :: Set.Set String
+highlightingLangs = Set.fromList [
+ "abap",
+ "abl",
+ "abnf",
+ "aconf",
+ "actionscript",
+ "actionscript3",
+ "ada",
+ "ada2005",
+ "ada95",
+ "adl",
+ "agda",
+ "ahk",
+ "alloy",
+ "ambienttalk",
+ "ambienttalk/2",
+ "antlr",
+ "antlr-actionscript",
+ "antlr-as",
+ "antlr-c#",
+ "antlr-cpp",
+ "antlr-csharp",
+ "antlr-java",
+ "antlr-objc",
+ "antlr-perl",
+ "antlr-python",
+ "antlr-rb",
+ "antlr-ruby",
+ "apache",
+ "apacheconf",
+ "apl",
+ "applescript",
+ "arduino",
+ "arexx",
+ "as",
+ "as3",
+ "asm",
+ "aspectj",
+ "aspx-cs",
+ "aspx-vb",
+ "asy",
+ "asymptote",
+ "at",
+ "autohotkey",
+ "autoit",
+ "awk",
+ "b3d",
+ "basemake",
+ "bash",
+ "basic",
+ "bat",
+ "batch",
+ "bbcode",
+ "because",
+ "befunge",
+ "bf",
+ "blitzbasic",
+ "blitzmax",
+ "bmax",
+ "bnf",
+ "boo",
+ "boogie",
+ "bplus",
+ "brainfuck",
+ "bro",
+ "bsdmake",
+ "bugs",
+ "c",
+ "c#",
+ "c++",
+ "c++-objdumb",
+ "c-objdump",
+ "ca65",
+ "cadl",
+ "camkes",
+ "cbmbas",
+ "ceylon",
+ "cf3",
+ "cfc",
+ "cfengine3",
+ "cfg",
+ "cfm",
+ "cfs",
+ "chai",
+ "chaiscript",
+ "chapel",
+ "cheetah",
+ "chpl",
+ "cirru",
+ "cl",
+ "clay",
+ "clipper",
+ "clj",
+ "cljs",
+ "clojure",
+ "clojurescript",
+ "cmake",
+ "cobol",
+ "cobolfree",
+ "coffee",
+ "coffee-script",
+ "coffeescript",
+ "common-lisp",
+ "componentpascal",
+ "console",
+ "control",
+ "coq",
+ "cp",
+ "cpp",
+ "cpp-objdump",
+ "cpsa",
+ "crmsh",
+ "croc",
+ "cry",
+ "cryptol",
+ "csh",
+ "csharp",
+ "csound",
+ "csound-csd",
+ "csound-document",
+ "csound-orc",
+ "csound-sco",
+ "csound-score",
+ "css",
+ "css+django",
+ "css+erb",
+ "css+genshi",
+ "css+genshitext",
+ "css+jinja",
+ "css+lasso",
+ "css+mako",
+ "css+mozpreproc",
+ "css+myghty",
+ "css+php",
+ "css+ruby",
+ "css+smarty",
+ "cu",
+ "cucumber",
+ "cuda",
+ "cxx-objdump",
+ "cypher",
+ "cython",
+ "d",
+ "d-objdump",
+ "dart",
+ "debcontrol",
+ "debsources",
+ "delphi",
+ "dg",
+ "diff",
+ "django",
+ "docker",
+ "dockerfile",
+ "dosbatch",
+ "doscon",
+ "dosini",
+ "dpatch",
+ "dtd",
+ "duby",
+ "duel",
+ "dylan",
+ "dylan-console",
+ "dylan-lid",
+ "dylan-repl",
+ "earl-grey",
+ "earlgrey",
+ "easytrieve",
+ "ebnf",
+ "ec",
+ "ecl",
+ "eg",
+ "eiffel",
+ "elisp",
+ "elixir",
+ "elm",
+ "emacs",
+ "erb",
+ "erl",
+ "erlang",
+ "evoque",
+ "ex",
+ "exs",
+ "ezhil",
+ "factor",
+ "fan",
+ "fancy",
+ "felix",
+ "fish",
+ "fishshell",
+ "flx",
+ "fortran",
+ "fortranfixed",
+ "foxpro",
+ "fsharp",
+ "fy",
+ "gap",
+ "gas",
+ "gawk",
+ "genshi",
+ "genshitext",
+ "gherkin",
+ "glsl",
+ "gnuplot",
+ "go",
+ "golo",
+ "gooddata-cl",
+ "gosu",
+ "groff",
+ "groovy",
+ "gst",
+ "haml",
+ "handlebars",
+ "haskell",
+ "haxe",
+ "haxeml",
+ "hexdump",
+ "hs",
+ "html",
+ "html+cheetah",
+ "html+django",
+ "html+erb",
+ "html+evoque",
+ "html+genshi",
+ "html+handlebars",
+ "html+jinja",
+ "html+kid",
+ "html+lasso",
+ "html+mako",
+ "html+myghty",
+ "html+php",
+ "html+ruby",
+ "html+smarty",
+ "html+spitfire",
+ "html+twig",
+ "html+velocity",
+ "htmlcheetah",
+ "htmldjango",
+ "http",
+ "hx",
+ "hxml",
+ "hxsl",
+ "hy",
+ "hybris",
+ "hylang",
+ "i6",
+ "i6t",
+ "i7",
+ "idl",
+ "idl4",
+ "idr",
+ "idris",
+ "iex",
+ "igor",
+ "igorpro",
+ "ik",
+ "inform6",
+ "inform7",
+ "ini",
+ "io",
+ "ioke",
+ "irb",
+ "irc",
+ "isabelle",
+ "j",
+ "jade",
+ "jags",
+ "jasmin",
+ "jasminxt",
+ "java",
+ "javascript",
+ "javascript+cheetah",
+ "javascript+django",
+ "javascript+erb",
+ "javascript+genshi",
+ "javascript+genshitext",
+ "javascript+jinja",
+ "javascript+lasso",
+ "javascript+mako",
+ "javascript+mozpreproc",
+ "javascript+myghty",
+ "javascript+php",
+ "javascript+ruby",
+ "javascript+smarty",
+ "javascript+spitfire",
+ "jbst",
+ "jcl",
+ "jinja",
+ "jl",
+ "jlcon",
+ "jproperties",
+ "js",
+ "js+cheetah",
+ "js+django",
+ "js+erb",
+ "js+genshi",
+ "js+genshitext",
+ "js+jinja",
+ "js+lasso",
+ "js+mako",
+ "js+myghty",
+ "js+php",
+ "js+ruby",
+ "js+smarty",
+ "js+spitfire",
+ "json",
+ "json-ld",
+ "jsonld",
+ "jsonml+bst",
+ "jsp",
+ "julia",
+ "kal",
+ "kconfig",
+ "kernel-config",
+ "kid",
+ "koka",
+ "kotlin",
+ "ksh",
+ "lagda",
+ "lasso",
+ "lassoscript",
+ "latex",
+ "lcry",
+ "lcryptol",
+ "lean",
+ "less",
+ "lhaskell",
+ "lhs",
+ "lid",
+ "lidr",
+ "lidris",
+ "lighttpd",
+ "lighty",
+ "limbo",
+ "linux-config",
+ "liquid",
+ "lisp",
+ "literate-agda",
+ "literate-cryptol",
+ "literate-haskell",
+ "literate-idris",
+ "live-script",
+ "livescript",
+ "llvm",
+ "logos",
+ "logtalk",
+ "lsl",
+ "lua",
+ "m2",
+ "make",
+ "makefile",
+ "mako",
+ "man",
+ "maql",
+ "mask",
+ "mason",
+ "mathematica",
+ "matlab",
+ "matlabsession",
+ "mawk",
+ "menuconfig",
+ "mf",
+ "minid",
+ "mma",
+ "modelica",
+ "modula2",
+ "moin",
+ "monkey",
+ "moo",
+ "moocode",
+ "moon",
+ "moonscript",
+ "mozhashpreproc",
+ "mozpercentpreproc",
+ "mq4",
+ "mq5",
+ "mql",
+ "mql4",
+ "mql5",
+ "msc",
+ "mscgen",
+ "mupad",
+ "mxml",
+ "myghty",
+ "mysql",
+ "nasm",
+ "nawk",
+ "nb",
+ "nemerle",
+ "nesc",
+ "newlisp",
+ "newspeak",
+ "nginx",
+ "nim",
+ "nimrod",
+ "nit",
+ "nix",
+ "nixos",
+ "nroff",
+ "nsh",
+ "nsi",
+ "nsis",
+ "numpy",
+ "obj-c",
+ "obj-c++",
+ "obj-j",
+ "objc",
+ "objc++",
+ "objdump",
+ "objdump-nasm",
+ "objective-c",
+ "objective-c++",
+ "objective-j",
+ "objectivec",
+ "objectivec++",
+ "objectivej",
+ "objectpascal",
+ "objj",
+ "ocaml",
+ "octave",
+ "odin",
+ "ooc",
+ "opa",
+ "openbugs",
+ "openedge",
+ "pacmanconf",
+ "pan",
+ "parasail",
+ "pas",
+ "pascal",
+ "pawn",
+ "pcmk",
+ "perl",
+ "perl6",
+ "php",
+ "php3",
+ "php4",
+ "php5",
+ "pig",
+ "pike",
+ "pkgconfig",
+ "pl",
+ "pl6",
+ "plpgsql",
+ "po",
+ "posh",
+ "postgres",
+ "postgres-console",
+ "postgresql",
+ "postgresql-console",
+ "postscr",
+ "postscript",
+ "pot",
+ "pov",
+ "powershell",
+ "praat",
+ "progress",
+ "prolog",
+ "properties",
+ "proto",
+ "protobuf",
+ "ps1",
+ "ps1con",
+ "psm1",
+ "psql",
+ "puppet",
+ "py",
+ "py3",
+ "py3tb",
+ "pycon",
+ "pypy",
+ "pypylog",
+ "pyrex",
+ "pytb",
+ "python",
+ "python3",
+ "pyx",
+ "qbasic",
+ "qbs",
+ "qml",
+ "qvt",
+ "qvto",
+ "r",
+ "racket",
+ "ragel",
+ "ragel-c",
+ "ragel-cpp",
+ "ragel-d",
+ "ragel-em",
+ "ragel-java",
+ "ragel-objc",
+ "ragel-rb",
+ "ragel-ruby",
+ "raw",
+ "rb",
+ "rbcon",
+ "rconsole",
+ "rd",
+ "rebol",
+ "red",
+ "red/system",
+ "redcode",
+ "registry",
+ "resource",
+ "resourcebundle",
+ "rest",
+ "restructuredtext",
+ "rexx",
+ "rhtml",
+ "rkt",
+ "roboconf-graph",
+ "roboconf-instances",
+ "robotframework",
+ "rout",
+ "rql",
+ "rsl",
+ "rst",
+ "rts",
+ "ruby",
+ "rust",
+ "s",
+ "sage",
+ "salt",
+ "sass",
+ "sc",
+ "scala",
+ "scaml",
+ "scheme",
+ "scilab",
+ "scm",
+ "scss",
+ "sh",
+ "shell",
+ "shell-session",
+ "shen",
+ "slim",
+ "sls",
+ "smali",
+ "smalltalk",
+ "smarty",
+ "sml",
+ "snobol",
+ "sources.list",
+ "sourceslist",
+ "sp",
+ "sparql",
+ "spec",
+ "spitfire",
+ "splus",
+ "sql",
+ "sqlite3",
+ "squeak",
+ "squid",
+ "squid.conf",
+ "squidconf",
+ "ssp",
+ "st",
+ "stan",
+ "supercollider",
+ "sv",
+ "swift",
+ "swig",
+ "systemverilog",
+ "tads3",
+ "tap",
+ "tcl",
+ "tcsh",
+ "tcshcon",
+ "tea",
+ "termcap",
+ "terminfo",
+ "terraform",
+ "tex",
+ "text",
+ "tf",
+ "thrift",
+ "todotxt",
+ "trac-wiki",
+ "trafficscript",
+ "treetop",
+ "ts",
+ "turtle",
+ "twig",
+ "typescript",
+ "udiff",
+ "urbiscript",
+ "v",
+ "vala",
+ "vapi",
+ "vb.net",
+ "vbnet",
+ "vctreestatus",
+ "velocity",
+ "verilog",
+ "vfp",
+ "vgl",
+ "vhdl",
+ "vim",
+ "winbatch",
+ "winbugs",
+ "x10",
+ "xbase",
+ "xml",
+ "xml+cheetah",
+ "xml+django",
+ "xml+erb",
+ "xml+evoque",
+ "xml+genshi",
+ "xml+jinja",
+ "xml+kid",
+ "xml+lasso",
+ "xml+mako",
+ "xml+myghty",
+ "xml+php",
+ "xml+ruby",
+ "xml+smarty",
+ "xml+spitfire",
+ "xml+velocity",
+ "xq",
+ "xql",
+ "xqm",
+ "xquery",
+ "xqy",
+ "xslt",
+ "xten",
+ "xtend",
+ "xul+mozpreproc",
+ "yaml",
+ "yaml+jinja",
+ "zephir" ]
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
new file mode 100644
index 000000000..83d80cd4a
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -0,0 +1,639 @@
+{-
+Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Ms
+ Copyright : Copyright (C) 2007-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to groff ms format.
+
+TODO:
+
+[ ] use base URL to construct absolute URLs from relative ones for external
+ links
+[ ] is there a better way to do strikeout?
+[ ] tight/loose list distinction
+-}
+
+module Text.Pandoc.Writers.Ms ( writeMs ) where
+import Control.Monad.State.Strict
+import Data.Char (isLower, isUpper, toUpper)
+import Data.List (intercalate, intersperse, sort)
+import qualified Data.Map as Map
+import Data.Maybe (catMaybes, fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI (escapeURIString, isAllowedInURI)
+import Skylighting
+import System.FilePath (takeExtension)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Highlighting
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
+import Text.TeXMath (writeEqn)
+
+data WriterState = WriterState { stHasInlineMath :: Bool
+ , stFirstPara :: Bool
+ , stNotes :: [Note]
+ , stSmallCaps :: Bool
+ , stHighlighting :: Bool
+ , stFontFeatures :: Map.Map Char Bool
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{ stHasInlineMath = False
+ , stFirstPara = True
+ , stNotes = []
+ , stSmallCaps = False
+ , stHighlighting = False
+ , stFontFeatures = Map.fromList [
+ ('I',False)
+ , ('B',False)
+ , ('C',False)
+ ]
+ }
+
+type Note = [Block]
+
+type MS = StateT WriterState
+
+-- | Convert Pandoc to Ms.
+writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeMs opts document =
+ evalStateT (pandocToMs opts document) defaultWriterState
+
+-- | Return groff ms representation of document.
+pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text
+pandocToMs opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
+ metadata <- metaToJSON opts
+ (fmap render' . blockListToMs opts)
+ (fmap render' . inlineListToMs' opts)
+ meta
+ body <- blockListToMs opts blocks
+ let main = render' body
+ hasInlineMath <- gets stHasInlineMath
+ let titleMeta = (escapeString . stringify) $ docTitle meta
+ let authorsMeta = map (escapeString . stringify) $ docAuthors meta
+ hasHighlighting <- gets stHighlighting
+ let highlightingMacros = if hasHighlighting
+ then case writerHighlightStyle opts of
+ Nothing -> mempty
+ Just sty -> render' $ styleToMs sty
+ else mempty
+
+ let context = defField "body" main
+ $ defField "has-inline-math" hasInlineMath
+ $ defField "hyphenate" True
+ $ defField "pandoc-version" pandocVersion
+ $ defField "toc" (writerTableOfContents opts)
+ $ defField "title-meta" titleMeta
+ $ defField "author-meta" (intercalate "; " authorsMeta)
+ $ defField "highlighting-macros" highlightingMacros metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Association list of characters to escape.
+msEscapes :: Map.Map Char String
+msEscapes = Map.fromList
+ [ ('\160', "\\~")
+ , ('\'', "\\[aq]")
+ , ('`', "\\`")
+ , ('\8217', "'")
+ , ('"', "\\[dq]")
+ , ('\x2014', "\\[em]")
+ , ('\x2013', "\\[en]")
+ , ('\x2026', "\\&...")
+ , ('~', "\\[ti]")
+ , ('^', "\\[ha]")
+ , ('-', "\\-")
+ , ('@', "\\@")
+ , ('\\', "\\\\")
+ ]
+
+escapeChar :: Char -> String
+escapeChar c = fromMaybe [c] (Map.lookup c msEscapes)
+
+-- | Escape | character, used to mark inline math, inside math.
+escapeBar :: String -> String
+escapeBar = concatMap go
+ where go '|' = "\\[u007C]"
+ go c = [c]
+
+-- | Escape special characters for Ms.
+escapeString :: String -> String
+escapeString = concatMap escapeChar
+
+escapeUri :: String -> String
+escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c)
+
+toSmallCaps :: String -> String
+toSmallCaps [] = []
+toSmallCaps (c:cs)
+ | isLower c = let (lowers,rest) = span isLower (c:cs)
+ in "\\s-2" ++ escapeString (map toUpper lowers) ++
+ "\\s0" ++ toSmallCaps rest
+ | isUpper c = let (uppers,rest) = span isUpper (c:cs)
+ in escapeString uppers ++ toSmallCaps rest
+ | otherwise = escapeChar c ++ toSmallCaps cs
+
+-- | Escape a literal (code) section for Ms.
+escapeCode :: String -> String
+escapeCode = intercalate "\n" . map escapeLine . lines
+ where escapeCodeChar ' ' = "\\ "
+ escapeCodeChar '\t' = "\\\t"
+ escapeCodeChar c = escapeChar c
+ escapeLine codeline =
+ case concatMap escapeCodeChar codeline of
+ a@('.':_) -> "\\&" ++ a
+ b -> b
+
+-- We split inline lists into sentences, and print one sentence per
+-- line. groff/troff treats the line-ending period differently.
+-- See http://code.google.com/p/pandoc/issues/detail?id=148.
+
+-- | Returns the first sentence in a list of inlines, and the rest.
+breakSentence :: [Inline] -> ([Inline], [Inline])
+breakSentence [] = ([],[])
+breakSentence xs =
+ let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
+ isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
+ isSentenceEndInline LineBreak = True
+ isSentenceEndInline _ = False
+ (as, bs) = break isSentenceEndInline xs
+ in case bs of
+ [] -> (as, [])
+ [c] -> (as ++ [c], [])
+ (c:Space:cs) -> (as ++ [c], cs)
+ (c:SoftBreak:cs) -> (as ++ [c], cs)
+ (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
+ (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
+ (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
+ (c:cs) -> (as ++ [c] ++ ds, es)
+ where (ds, es) = breakSentence cs
+
+-- | Split a list of inlines into sentences.
+splitSentences :: [Inline] -> [[Inline]]
+splitSentences xs =
+ let (sent, rest) = breakSentence xs
+ in if null rest then [sent] else sent : splitSentences rest
+
+blockToMs :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> MS m Doc
+blockToMs _ Null = return empty
+blockToMs opts (Div _ bs) = do
+ setFirstPara
+ res <- blockListToMs opts bs
+ setFirstPara
+ return res
+blockToMs opts (Plain inlines) =
+ liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
+blockToMs opts (Para [Image attr alt (src,_tit)])
+ | let ext = takeExtension src in (ext == ".ps" || ext == ".eps") = do
+ let (mbW,mbH) = (inPoints opts <$> dimension Width attr,
+ inPoints opts <$> dimension Height attr)
+ let sizeAttrs = case (mbW, mbH) of
+ (Just wp, Nothing) -> space <> doubleQuotes
+ (text (show (floor wp :: Int) ++ "p"))
+ (Just wp, Just hp) -> space <> doubleQuotes
+ (text (show (floor wp :: Int) ++ "p")) <>
+ space <>
+ doubleQuotes (text (show (floor hp :: Int)))
+ _ -> empty
+ capt <- inlineListToMs' opts alt
+ return $ nowrap (text ".PSPIC -C " <>
+ doubleQuotes (text (escapeString src)) <>
+ sizeAttrs) $$
+ text ".ce 1000" $$
+ capt $$
+ text ".ce 0"
+blockToMs opts (Para inlines) = do
+ firstPara <- gets stFirstPara
+ resetFirstPara
+ contents <- liftM vcat $ mapM (inlineListToMs' opts) $
+ splitSentences inlines
+ return $ text (if firstPara then ".LP" else ".PP") $$ contents
+blockToMs _ b@(RawBlock f str)
+ | f == Format "ms" = return $ text str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToMs _ HorizontalRule = do
+ resetFirstPara
+ return $ text ".HLINE"
+blockToMs opts (Header level (ident,classes,_) inlines) = do
+ setFirstPara
+ contents <- inlineListToMs' opts $ map breakToSpace inlines
+ let (heading, secnum) = if writerNumberSections opts &&
+ "unnumbered" `notElem` classes
+ then (".NH", "\\*[SN]")
+ else (".SH", "")
+ let anchor = if null ident
+ then empty
+ else nowrap $
+ text ".pdfhref M " <> doubleQuotes (text ident)
+ let bookmark = text ".pdfhref O " <> text (show level ++ " ") <>
+ doubleQuotes (text $ secnum ++
+ (if null secnum
+ then ""
+ else " ") ++
+ escapeString (stringify inlines))
+ let backlink = nowrap (text ".pdfhref L -D " <>
+ doubleQuotes (text ident) <> space <> text "\\") <> cr <>
+ text " -- "
+ let tocEntry = if writerTableOfContents opts &&
+ level <= writerTOCDepth opts
+ then text ".XS"
+ $$ backlink <> doubleQuotes (
+ nowrap (text (replicate level '\t') <>
+ (if null secnum
+ then empty
+ else text secnum <> text "\\~\\~")
+ <> contents))
+ $$ text ".XE"
+ else empty
+ modify $ \st -> st{ stFirstPara = True }
+ return $ (text heading <> space <> text (show level)) $$
+ contents $$
+ bookmark $$
+ anchor $$
+ tocEntry
+blockToMs opts (CodeBlock attr str) = do
+ hlCode <- highlightCode opts attr str
+ setFirstPara
+ return $
+ text ".IP" $$
+ text ".nf" $$
+ text "\\f[C]" $$
+ hlCode $$
+ text "\\f[]" $$
+ text ".fi"
+blockToMs opts (LineBlock ls) = do
+ resetFirstPara
+ blockToMs opts $ Para $ intercalate [LineBreak] ls
+blockToMs opts (BlockQuote blocks) = do
+ setFirstPara
+ contents <- blockListToMs opts blocks
+ setFirstPara
+ return $ text ".RS" $$ contents $$ text ".RE"
+blockToMs opts (Table caption alignments widths headers rows) =
+ let aligncode AlignLeft = "l"
+ aligncode AlignRight = "r"
+ aligncode AlignCenter = "c"
+ aligncode AlignDefault = "l"
+ in do
+ caption' <- inlineListToMs' opts caption
+ let iwidths = if all (== 0) widths
+ then repeat ""
+ else map (printf "w(%0.1fn)" . (70 *)) widths
+ -- 78n default width - 8n indent = 70n
+ let coldescriptions = text $ unwords
+ (zipWith (\align width -> aligncode align ++ width)
+ alignments iwidths) ++ "."
+ colheadings <- mapM (blockListToMs opts) headers
+ let makeRow cols = text "T{" $$
+ vcat (intersperse (text "T}\tT{") cols) $$
+ text "T}"
+ let colheadings' = if all null headers
+ then empty
+ else makeRow colheadings $$ char '_'
+ body <- mapM (\row -> do
+ cols <- mapM (blockListToMs opts) row
+ return $ makeRow cols) rows
+ setFirstPara
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "delim(@@) tab(\t);" $$ coldescriptions $$
+ colheadings' $$ vcat body $$ text ".TE"
+
+blockToMs opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMs opts) items
+ setFirstPara
+ return (vcat contents)
+blockToMs opts (OrderedList attribs items) = do
+ let markers = take (length items) $ orderedListMarkers attribs
+ let indent = 2 +
+ maximum (map length markers)
+ contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $
+ zip markers items
+ setFirstPara
+ return (vcat contents)
+blockToMs opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMs opts) items
+ setFirstPara
+ return (vcat contents)
+
+-- | Convert bullet list item (list of blocks) to ms.
+bulletListItemToMs :: PandocMonad m => WriterOptions -> [Block] -> MS m Doc
+bulletListItemToMs _ [] = return empty
+bulletListItemToMs opts (Para first:rest) =
+ bulletListItemToMs opts (Plain first:rest)
+bulletListItemToMs opts (Plain first:rest) = do
+ first' <- blockToMs opts (Plain first)
+ rest' <- blockListToMs opts rest
+ let first'' = text ".IP \\[bu] 3" $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 3" $$ rest' $$ text ".RE"
+ return (first'' $$ rest'')
+bulletListItemToMs opts (first:rest) = do
+ first' <- blockToMs opts first
+ rest' <- blockListToMs opts rest
+ return $ text "\\[bu] .RS 3" $$ first' $$ rest' $$ text ".RE"
+
+-- | Convert ordered list item (a list of blocks) to ms.
+orderedListItemToMs :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> MS m Doc
+orderedListItemToMs _ _ _ [] = return empty
+orderedListItemToMs opts num indent (Para first:rest) =
+ orderedListItemToMs opts num indent (Plain first:rest)
+orderedListItemToMs opts num indent (first:rest) = do
+ first' <- blockToMs opts first
+ rest' <- blockListToMs opts rest
+ let num' = printf ("%" ++ show (indent - 1) ++ "s") num
+ let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS " <> text (show indent) $$
+ rest' $$ text ".RE"
+ return $ first'' $$ rest''
+
+-- | Convert definition list item (label, list of blocks) to ms.
+definitionListItemToMs :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> MS m Doc
+definitionListItemToMs opts (label, defs) = do
+ labelText <- inlineListToMs' opts $ map breakToSpace label
+ contents <- if null defs
+ then return empty
+ else liftM vcat $ forM defs $ \blocks -> do
+ let (first, rest) = case blocks of
+ (Para x:y) -> (Plain x,y)
+ (x:y) -> (x,y)
+ [] -> (Plain [], [])
+ -- should not happen
+ rest' <- liftM vcat $
+ mapM (\item -> blockToMs opts item) rest
+ first' <- blockToMs opts first
+ return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ return $ nowrap (text ".IP " <> doubleQuotes labelText) $$ contents
+
+-- | Convert list of Pandoc block elements to ms.
+blockListToMs :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> MS m Doc
+blockListToMs opts blocks =
+ mapM (blockToMs opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to ms.
+inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc
+-- if list starts with ., insert a zero-width character \& so it
+-- won't be interpreted as markup if it falls at the beginning of a line.
+inlineListToMs opts lst = hcat <$> mapM (inlineToMs opts) lst
+
+-- This version to be used when there is no further inline content;
+-- forces a note at the end.
+inlineListToMs' :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc
+inlineListToMs' opts lst = do
+ x <- hcat <$> mapM (inlineToMs opts) lst
+ y <- handleNotes opts empty
+ return $ x <> y
+
+-- | Convert Pandoc inline element to ms.
+inlineToMs :: PandocMonad m => WriterOptions -> Inline -> MS m Doc
+inlineToMs opts (Span _ ils) = inlineListToMs opts ils
+inlineToMs opts (Emph lst) =
+ withFontFeature 'I' (inlineListToMs opts lst)
+inlineToMs opts (Strong lst) =
+ withFontFeature 'B' (inlineListToMs opts lst)
+inlineToMs opts (Strikeout lst) = do
+ contents <- inlineListToMs opts lst
+ -- we use grey color instead of strikeout, which seems quite
+ -- hard to do in groff for arbitrary bits of text
+ return $ text "\\m[strikecolor]" <> contents <> text "\\m[]"
+inlineToMs opts (Superscript lst) = do
+ contents <- inlineListToMs opts lst
+ return $ text "\\*{" <> contents <> text "\\*}"
+inlineToMs opts (Subscript lst) = do
+ contents <- inlineListToMs opts lst
+ return $ text "\\*<" <> contents <> text "\\*>"
+inlineToMs opts (SmallCaps lst) = do
+ -- see https://lists.gnu.org/archive/html/groff/2015-01/msg00016.html
+ modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) }
+ res <- inlineListToMs opts lst
+ modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) }
+ return res
+inlineToMs opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMs opts lst
+ return $ char '`' <> contents <> char '\''
+inlineToMs opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMs opts lst
+ return $ text "\\[lq]" <> contents <> text "\\[rq]"
+inlineToMs opts (Cite _ lst) =
+ inlineListToMs opts lst
+inlineToMs opts (Code attr str) = do
+ hlCode <- highlightCode opts attr str
+ withFontFeature 'C' (return hlCode)
+inlineToMs _ (Str str) = do
+ let shim = case str of
+ '.':_ -> afterBreak "\\&"
+ _ -> empty
+ smallcaps <- gets stSmallCaps
+ if smallcaps
+ then return $ shim <> text (toSmallCaps str)
+ else return $ shim <> text (escapeString str)
+inlineToMs opts (Math InlineMath str) = do
+ modify $ \st -> st{ stHasInlineMath = True }
+ res <- convertMath writeEqn InlineMath str
+ case res of
+ Left il -> inlineToMs opts il
+ Right r -> return $ text "@" <> text (escapeBar r) <> text "@"
+inlineToMs opts (Math DisplayMath str) = do
+ res <- convertMath writeEqn InlineMath str
+ case res of
+ Left il -> do
+ contents <- inlineToMs opts il
+ return $ cr <> text ".RS" $$ contents $$ text ".RE"
+ Right r -> return $
+ cr <> text ".EQ" $$ text (escapeBar r) $$ text ".EN"
+inlineToMs _ il@(RawInline f str)
+ | f == Format "ms" = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToMs _ LineBreak = return $ cr <> text ".br" <> cr
+inlineToMs opts SoftBreak =
+ handleNotes opts $
+ case writerWrapText opts of
+ WrapAuto -> space
+ WrapNone -> space
+ WrapPreserve -> cr
+inlineToMs opts Space = handleNotes opts space
+inlineToMs opts (Link _ txt ('#':ident, _)) = do
+ -- internal link
+ contents <- inlineListToMs' opts $ map breakToSpace txt
+ return $ text "\\c" <> cr <> nowrap (text ".pdfhref L -D " <>
+ doubleQuotes (text ident) <> text " -A " <>
+ doubleQuotes (text "\\c") <> space <> text "\\") <> cr <>
+ text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&"
+inlineToMs opts (Link _ txt (src, _)) = do
+ -- external link
+ contents <- inlineListToMs' opts $ map breakToSpace txt
+ return $ text "\\c" <> cr <> nowrap (text ".pdfhref W -D " <>
+ doubleQuotes (text (escapeUri src)) <> text " -A " <>
+ doubleQuotes (text "\\c") <> space <> text "\\") <> cr <>
+ text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&"
+inlineToMs _ (Image _ alternate (_, _)) =
+ return $ char '[' <> text "IMAGE: " <>
+ text (escapeString (stringify alternate)) <> char ']'
+inlineToMs _ (Note contents) = do
+ modify $ \st -> st{ stNotes = contents : stNotes st }
+ return $ text "\\**"
+
+handleNotes :: PandocMonad m => WriterOptions -> Doc -> MS m Doc
+handleNotes opts fallback = do
+ notes <- gets stNotes
+ if null notes
+ then return fallback
+ else do
+ modify $ \st -> st{ stNotes = [] }
+ vcat <$> mapM (handleNote opts) notes
+
+handleNote :: PandocMonad m => WriterOptions -> Note -> MS m Doc
+handleNote opts bs = do
+ -- don't start with Paragraph or we'll get a spurious blank
+ -- line after the note ref:
+ let bs' = case bs of
+ (Para ils : rest) -> Plain ils : rest
+ _ -> bs
+ contents <- blockListToMs opts bs'
+ return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr
+
+fontChange :: PandocMonad m => MS m Doc
+fontChange = do
+ features <- gets stFontFeatures
+ let filling = sort [c | (c,True) <- Map.toList features]
+ return $ text $ "\\f[" ++ filling ++ "]"
+
+withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc
+withFontFeature c action = do
+ modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st }
+ begin <- fontChange
+ d <- action
+ modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st }
+ end <- fontChange
+ return $ begin <> d <> end
+
+setFirstPara :: PandocMonad m => MS m ()
+setFirstPara = modify $ \st -> st{ stFirstPara = True }
+
+resetFirstPara :: PandocMonad m => MS m ()
+resetFirstPara = modify $ \st -> st{ stFirstPara = False }
+
+breakToSpace :: Inline -> Inline
+breakToSpace SoftBreak = Space
+breakToSpace LineBreak = Space
+breakToSpace x = x
+
+-- Highlighting
+
+styleToMs :: Style -> Doc
+styleToMs sty = vcat $ colordefs ++ map (toMacro sty) alltoktypes
+ where alltoktypes = enumFromTo KeywordTok NormalTok
+ colordefs = map toColorDef allcolors
+ toColorDef c = text (".defcolor " ++
+ hexColor c ++ " rgb #" ++ hexColor c)
+ allcolors = catMaybes $ ordNub $
+ [defaultColor sty, backgroundColor sty,
+ lineNumberColor sty, lineNumberBackgroundColor sty] ++
+ concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty))
+ colorsForToken ts = [tokenColor ts, tokenBackground ts]
+
+hexColor :: Color -> String
+hexColor (RGB r g b) = printf "%02x%02x%02x" r g b
+
+toMacro :: Style -> TokenType -> Doc
+toMacro sty toktype =
+ nowrap (text ".ds " <> text (show toktype) <> text " " <>
+ setbg <> setcolor <> setfont <>
+ text "\\\\$1" <>
+ resetfont <> resetcolor <> resetbg)
+ where setcolor = maybe empty fgcol tokCol
+ resetcolor = maybe empty (const $ text "\\\\m[]") tokCol
+ setbg = empty -- maybe empty bgcol tokBg
+ resetbg = empty -- maybe empty (const $ text "\\\\M[]") tokBg
+ fgcol c = text $ "\\\\m[" ++ hexColor c ++ "]"
+ -- bgcol c = text $ "\\\\M[" ++ hexColor c ++ "]"
+ setfont = if tokBold || tokItalic
+ then text $ "\\\\f[C" ++ ['B' | tokBold] ++
+ ['I' | tokItalic] ++ "]"
+ else empty
+ resetfont = if tokBold || tokItalic
+ then text "\\\\f[C]"
+ else empty
+ tokSty = Map.lookup toktype (tokenStyles sty)
+ tokCol = (tokSty >>= tokenColor) `mplus` defaultColor sty
+ -- tokBg = (tokSty >>= tokenBackground) `mplus` backgroundColor sty
+ tokBold = fromMaybe False (tokenBold <$> tokSty)
+ tokItalic = fromMaybe False (tokenItalic <$> tokSty)
+ -- tokUnderline = fromMaybe False (tokSty >>= tokUnderline)
+ -- lnColor = lineNumberColor sty
+ -- lnBkgColor = lineNumberBackgroundColor sty
+
+msFormatter :: FormatOptions -> [SourceLine] -> Doc
+msFormatter _fmtopts =
+ vcat . map fmtLine
+ where fmtLine = hcat . map fmtToken
+ fmtToken (toktype, tok) = text "\\*" <>
+ brackets (text (show toktype) <> text " \""
+ <> text (escapeCode (T.unpack tok)) <> text "\"")
+
+highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc
+highlightCode opts attr str =
+ case highlight (writerSyntaxMap opts) msFormatter attr str of
+ Left msg -> do
+ unless (null msg) $ report $ CouldNotHighlight msg
+ return $ text (escapeCode str)
+ Right h -> do
+ modify (\st -> st{ stHighlighting = True })
+ return h
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
new file mode 100644
index 000000000..ad67e489d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -0,0 +1,408 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Muse
+ Copyright : Copyright (C) 2017-2018 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : stable
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Muse.
+
+This module is mostly intended for <https://amusewiki.org/ Amusewiki> markup support,
+as described by <https://amusewiki.org/library/manual Text::Amuse markup manual>.
+Original <https://www.gnu.org/software/emacs-muse/ Emacs Muse> markup support
+is a secondary goal.
+
+Where Text::Amuse markup
+<https://metacpan.org/pod/Text::Amuse#DIFFERENCES-WITH-THE-ORIGINAL-EMACS-MUSE-MARKUP differs>
+from <https://www.gnu.org/software/emacs-muse/manual/ Emacs Muse markup>,
+Text::Amuse markup is supported.
+For example, native tables are always used instead of Org Mode tables.
+However, @\<literal style="html">@ tag is used for HTML raw blocks
+even though it is supported only in Emacs Muse.
+-}
+module Text.Pandoc.Writers.Muse (writeMuse) where
+import Control.Monad.State.Strict
+import Data.Text (Text)
+import Data.List (intersperse, transpose, isInfixOf)
+import System.FilePath (takeExtension)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Options
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import qualified Data.Set as Set
+
+type Notes = [[Block]]
+data WriterState =
+ WriterState { stNotes :: Notes
+ , stOptions :: WriterOptions
+ , stTopLevel :: Bool
+ , stInsideBlock :: Bool
+ , stIds :: Set.Set String
+ }
+
+-- | Convert Pandoc to Muse.
+writeMuse :: PandocMonad m
+ => WriterOptions
+ -> Pandoc
+ -> m Text
+writeMuse opts document =
+ let st = WriterState { stNotes = []
+ , stOptions = opts
+ , stTopLevel = True
+ , stInsideBlock = False
+ , stIds = Set.empty
+ }
+ in evalStateT (pandocToMuse document) st
+
+-- | Return Muse representation of document.
+pandocToMuse :: PandocMonad m
+ => Pandoc
+ -> StateT WriterState m Text
+pandocToMuse (Pandoc meta blocks) = do
+ opts <- gets stOptions
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' :: Doc -> Text
+ render' = render Nothing
+ metadata <- metaToJSON opts
+ (fmap render' . blockListToMuse)
+ (fmap render' . inlineListToMuse)
+ meta
+ body <- blockListToMuse blocks
+ notes <- liftM (reverse . stNotes) get >>= notesToMuse
+ let main = render colwidth $ body $+$ notes
+ let context = defField "body" main metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Helper function for flatBlockListToMuse
+-- | Render all blocks and insert blank lines between the first two
+catWithBlankLines :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> Int -- ^ Number of blank lines
+ -> StateT WriterState m Doc
+catWithBlankLines (b : bs) n = do
+ b' <- blockToMuse b
+ bs' <- flatBlockListToMuse bs
+ return $ b' <> blanklines n <> bs'
+catWithBlankLines _ _ = error "Expected at least one block"
+
+-- | Convert list of Pandoc block elements to Muse
+-- | without setting stTopLevel.
+flatBlockListToMuse :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
+flatBlockListToMuse bs@(BulletList _ : BulletList _ : _) = catWithBlankLines bs 2
+flatBlockListToMuse bs@(OrderedList (_, style1, _) _ : OrderedList (_, style2, _) _ : _) =
+ catWithBlankLines bs (if style1' == style2' then 2 else 0)
+ where
+ style1' = normalizeStyle style1
+ style2' = normalizeStyle style2
+ normalizeStyle DefaultStyle = Decimal
+ normalizeStyle s = s
+flatBlockListToMuse bs@(DefinitionList _ : DefinitionList _ : _) = catWithBlankLines bs 2
+flatBlockListToMuse bs@(_ : _) = catWithBlankLines bs 0
+flatBlockListToMuse [] = return mempty
+
+-- | Convert list of Pandoc block elements to Muse.
+blockListToMuse :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
+blockListToMuse blocks = do
+ oldState <- get
+ modify $ \s -> s { stTopLevel = not $ stInsideBlock s
+ , stInsideBlock = True
+ }
+ result <- flatBlockListToMuse blocks
+ modify $ \s -> s { stTopLevel = stTopLevel oldState
+ , stInsideBlock = stInsideBlock oldState
+ }
+ return result
+
+-- | Convert Pandoc block element to Muse.
+blockToMuse :: PandocMonad m
+ => Block -- ^ Block element
+ -> StateT WriterState m Doc
+blockToMuse (Plain inlines) = inlineListToMuse inlines
+blockToMuse (Para inlines) = do
+ contents <- inlineListToMuse inlines
+ return $ contents <> blankline
+blockToMuse (LineBlock lns) = do
+ lns' <- mapM inlineListToMuse lns
+ return $ nowrap $ vcat (map (text "> " <>) lns') <> blankline
+blockToMuse (CodeBlock (_,_,_) str) =
+ return $ "<example>" $$ text str $$ "</example>" $$ blankline
+blockToMuse (RawBlock (Format format) str) =
+ return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$
+ text str $$ "</literal>" $$ blankline
+blockToMuse (BlockQuote blocks) = do
+ contents <- flatBlockListToMuse blocks
+ return $ blankline
+ <> "<quote>"
+ $$ nest 0 contents -- nest 0 to remove trailing blank lines
+ $$ "</quote>"
+ <> blankline
+blockToMuse (OrderedList (start, style, _) items) = do
+ let markers = take (length items) $ orderedListMarkers
+ (start, style, Period)
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- zipWithM orderedListItemToMuse markers' items
+ -- ensure that sublists have preceding blank line
+ topLevel <- gets stTopLevel
+ return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline
+ where orderedListItemToMuse :: PandocMonad m
+ => String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> StateT WriterState m Doc
+ orderedListItemToMuse marker item = do
+ contents <- blockListToMuse item
+ return $ hang (length marker + 1) (text marker <> space) contents
+blockToMuse (BulletList items) = do
+ contents <- mapM bulletListItemToMuse items
+ -- ensure that sublists have preceding blank line
+ topLevel <- gets stTopLevel
+ return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline
+ where bulletListItemToMuse :: PandocMonad m
+ => [Block]
+ -> StateT WriterState m Doc
+ bulletListItemToMuse item = do
+ contents <- blockListToMuse item
+ return $ hang 2 "- " contents
+blockToMuse (DefinitionList items) = do
+ contents <- mapM definitionListItemToMuse items
+ -- ensure that sublists have preceding blank line
+ topLevel <- gets stTopLevel
+ return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline
+ where definitionListItemToMuse :: PandocMonad m
+ => ([Inline], [[Block]])
+ -> StateT WriterState m Doc
+ definitionListItemToMuse (label, defs) = do
+ label' <- inlineListToMuse label
+ contents <- liftM vcat $ mapM descriptionToMuse defs
+ let ind = offset label'
+ return $ hang ind label' contents
+ descriptionToMuse :: PandocMonad m
+ => [Block]
+ -> StateT WriterState m Doc
+ descriptionToMuse desc = hang 4 " :: " <$> blockListToMuse desc
+blockToMuse (Header level (ident,_,_) inlines) = do
+ opts <- gets stOptions
+ contents <- inlineListToMuse inlines
+
+ ids <- gets stIds
+ let autoId = uniqueIdent inlines ids
+ modify $ \st -> st{ stIds = Set.insert autoId ids }
+
+ let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId)
+ then empty
+ else "#" <> text ident <> cr
+ let header' = text $ replicate level '*'
+ return $ blankline <> nowrap (header' <> space <> contents)
+ $$ attr' <> blankline
+-- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors
+blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline
+blockToMuse (Table caption _ _ headers rows) = do
+ caption' <- inlineListToMuse caption
+ headers' <- mapM blockListToMuse headers
+ rows' <- mapM (mapM blockListToMuse) rows
+ let noHeaders = all null headers
+
+ let numChars = maximum . map offset
+ -- FIXME: width is not being used.
+ let widthsInChars =
+ map numChars $ transpose (headers' : rows')
+ -- FIXME: Muse doesn't allow blocks with height more than 1.
+ let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks
+ where h = maximum (1 : map height blocks)
+ sep' = lblock (length sep) $ vcat (replicate h (text sep))
+ let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars
+ let head' = makeRow " || " headers'
+ let rowSeparator = if noHeaders then " | " else " | "
+ rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row
+ return $ makeRow rowSeparator cols) rows
+ let body = vcat rows''
+ return $ (if noHeaders then empty else head')
+ $$ body
+ $$ (if null caption then empty else " |+ " <> caption' <> " +|")
+ $$ blankline
+blockToMuse (Div _ bs) = flatBlockListToMuse bs
+blockToMuse Null = return empty
+
+-- | Return Muse representation of notes.
+notesToMuse :: PandocMonad m
+ => Notes
+ -> StateT WriterState m Doc
+notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes)
+
+-- | Return Muse representation of a note.
+noteToMuse :: PandocMonad m
+ => Int
+ -> [Block]
+ -> StateT WriterState m Doc
+noteToMuse num note = do
+ contents <- blockListToMuse note
+ let marker = "[" ++ show num ++ "] "
+ return $ hang (length marker) (text marker) contents
+
+-- | Escape special characters for Muse.
+escapeString :: String -> String
+escapeString s =
+ "<verbatim>" ++
+ substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++
+ "</verbatim>"
+
+-- | Escape special characters for Muse if needed.
+conditionalEscapeString :: String -> String
+conditionalEscapeString s =
+ if any (`elem` ("#*<=>[]|" :: String)) s ||
+ "::" `isInfixOf` s ||
+ "----" `isInfixOf` s ||
+ "~~" `isInfixOf` s
+ then escapeString s
+ else s
+
+normalizeInlineList :: [Inline] -> [Inline]
+normalizeInlineList (x : Str "" : xs)
+ = normalizeInlineList (x:xs)
+normalizeInlineList (Str x1 : Str x2 : xs)
+ = normalizeInlineList $ Str (x1 ++ x2) : xs
+normalizeInlineList (Emph x1 : Emph x2 : ils)
+ = normalizeInlineList $ Emph (x1 ++ x2) : ils
+normalizeInlineList (Strong x1 : Strong x2 : ils)
+ = normalizeInlineList $ Strong (x1 ++ x2) : ils
+normalizeInlineList (Strikeout x1 : Strikeout x2 : ils)
+ = normalizeInlineList $ Strikeout (x1 ++ x2) : ils
+normalizeInlineList (Superscript x1 : Superscript x2 : ils)
+ = normalizeInlineList $ Superscript (x1 ++ x2) : ils
+normalizeInlineList (Subscript x1 : Subscript x2 : ils)
+ = normalizeInlineList $ Subscript (x1 ++ x2) : ils
+normalizeInlineList (SmallCaps x1 : SmallCaps x2 : ils)
+ = normalizeInlineList $ SmallCaps (x1 ++ x2) : ils
+normalizeInlineList (Code _ x1 : Code _ x2 : ils)
+ = normalizeInlineList $ Code nullAttr (x1 ++ x2) : ils
+normalizeInlineList (RawInline f1 x1 : RawInline f2 x2 : ils) | f1 == f2
+ = normalizeInlineList $ RawInline f1 (x1 ++ x2) : ils
+normalizeInlineList (Span a1 x1 : Span a2 x2 : ils) | a1 == a2
+ = normalizeInlineList $ Span a1 (x1 ++ x2) : ils
+normalizeInlineList (x:xs) = x : normalizeInlineList xs
+normalizeInlineList [] = []
+
+fixNotes :: [Inline] -> [Inline]
+fixNotes [] = []
+fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest
+fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest
+fixNotes (x:xs) = x : fixNotes xs
+
+-- | Convert list of Pandoc inline elements to Muse.
+inlineListToMuse :: PandocMonad m
+ => [Inline]
+ -> StateT WriterState m Doc
+inlineListToMuse lst = hcat <$> mapM inlineToMuse (fixNotes $ normalizeInlineList lst)
+
+-- | Convert Pandoc inline element to Muse.
+inlineToMuse :: PandocMonad m
+ => Inline
+ -> StateT WriterState m Doc
+inlineToMuse (Str str) = return $ text $ conditionalEscapeString str
+inlineToMuse (Emph lst) = do
+ contents <- inlineListToMuse lst
+ return $ "<em>" <> contents <> "</em>"
+inlineToMuse (Strong lst) = do
+ contents <- inlineListToMuse lst
+ return $ "<strong>" <> contents <> "</strong>"
+inlineToMuse (Strikeout lst) = do
+ contents <- inlineListToMuse lst
+ return $ "<del>" <> contents <> "</del>"
+inlineToMuse (Superscript lst) = do
+ contents <- inlineListToMuse lst
+ return $ "<sup>" <> contents <> "</sup>"
+inlineToMuse (Subscript lst) = do
+ contents <- inlineListToMuse lst
+ return $ "<sub>" <> contents <> "</sub>"
+inlineToMuse (SmallCaps lst) = inlineListToMuse lst
+inlineToMuse (Quoted SingleQuote lst) = do
+ contents <- inlineListToMuse lst
+ return $ "‘" <> contents <> "’"
+inlineToMuse (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMuse lst
+ return $ "“" <> contents <> "”"
+-- Amusewiki does not support <cite> tag,
+-- and Emacs Muse citation support is limited
+-- (https://www.gnu.org/software/emacs-muse/manual/html_node/Citations.html#Citation)
+-- so just fallback to expanding inlines.
+inlineToMuse (Cite _ lst) = inlineListToMuse lst
+inlineToMuse (Code _ str) = return $
+ "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>"
+inlineToMuse (Math t str) =
+ lift (texMathToInlines t str) >>= inlineListToMuse
+inlineToMuse (RawInline (Format f) str) =
+ return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>"
+inlineToMuse LineBreak = return $ "<br>" <> cr
+inlineToMuse Space = return space
+inlineToMuse SoftBreak = do
+ wrapText <- gets $ writerWrapText . stOptions
+ return $ if wrapText == WrapPreserve then cr else space
+inlineToMuse (Link _ txt (src, _)) =
+ case txt of
+ [Str x] | escapeURI x == src ->
+ return $ "[[" <> text (escapeLink x) <> "]]"
+ _ -> do contents <- inlineListToMuse txt
+ return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]"
+ where escapeLink lnk = if isImageUrl lnk then "URL:" ++ lnk else lnk
+ -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ isImageUrl = (`elem` imageExtensions) . takeExtension
+inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) =
+ inlineToMuse (Image attr alt (source,title))
+inlineToMuse (Image attr inlines (source, title)) = do
+ opts <- gets stOptions
+ alt <- inlineListToMuse inlines
+ let title' = if null title
+ then if null inlines
+ then ""
+ else "[" <> alt <> "]"
+ else "[" <> text title <> "]"
+ let width = case dimension Width attr of
+ Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer)
+ _ -> ""
+ return $ "[[" <> text (source ++ width) <> "]" <> title' <> "]"
+inlineToMuse (Note contents) = do
+ -- add to notes in state
+ notes <- gets stNotes
+ modify $ \st -> st { stNotes = contents:notes }
+ let ref = show $ length notes + 1
+ return $ "[" <> text ref <> "]"
+inlineToMuse (Span (_,name:_,_) inlines) = do
+ contents <- inlineListToMuse inlines
+ return $ "<class name=\"" <> text name <> "\">" <> contents <> "</class>"
+inlineToMuse (Span _ lst) = inlineListToMuse lst
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 87e23aeeb..f852bad96 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Native
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,14 +30,17 @@ Conversion of a 'Pandoc' document to a string representation.
-}
module Text.Pandoc.Writers.Native ( writeNative )
where
-import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
-import Data.List ( intersperse )
+import Data.List (intersperse)
+import Data.Text (Text)
+import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
+import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
import Text.Pandoc.Pretty
prettyList :: [Doc] -> Doc
prettyList ds =
- "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]"
+ "[" <>
+ cat (intersperse (cr <> ",") $ map (nest 1) ds) <> "]"
-- | Prettyprint Pandoc block element.
prettyBlock :: Block -> Doc
@@ -47,12 +50,12 @@ prettyBlock (BlockQuote blocks) =
"BlockQuote" $$ prettyList (map prettyBlock blocks)
prettyBlock (OrderedList attribs blockLists) =
"OrderedList" <> space <> text (show attribs) $$
- (prettyList $ map (prettyList . map prettyBlock) blockLists)
+ prettyList (map (prettyList . map prettyBlock) blockLists)
prettyBlock (BulletList blockLists) =
"BulletList" $$
- (prettyList $ map (prettyList . map prettyBlock) blockLists)
+ prettyList (map (prettyList . map prettyBlock) blockLists)
prettyBlock (DefinitionList items) = "DefinitionList" $$
- (prettyList $ map deflistitem items)
+ prettyList (map deflistitem items)
where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <>
nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")"
prettyBlock (Table caption aligns widths header rows) =
@@ -66,8 +69,8 @@ prettyBlock (Div attr blocks) =
prettyBlock block = text $ show block
-- | Prettyprint Pandoc document.
-writeNative :: WriterOptions -> Pandoc -> String
-writeNative opts (Pandoc meta blocks) =
+writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeNative opts (Pandoc meta blocks) = return $
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index ce4d456a3..63a3f915a 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ODT
- Copyright : Copyright (C) 2008-2015 John MacFarlane
+ Copyright : Copyright (C) 2008-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,49 +29,70 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to ODT.
-}
module Text.Pandoc.Writers.ODT ( writeODT ) where
-import Data.IORef
-import Data.List ( isPrefixOf )
-import Data.Maybe ( fromMaybe )
-import Text.XML.Light.Output
-import Text.TeXMath
-import qualified Data.ByteString.Lazy as B
-import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
-import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
-import Text.Pandoc.Shared ( stringify, fetchItem', warn,
- getDefaultReferenceODT )
-import Text.Pandoc.ImageSize
-import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
+import Control.Monad.Except (catchError)
+import Control.Monad.State.Strict
+import qualified Data.ByteString.Lazy as B
+import Data.Generics (everywhere', mkT)
+import Data.List (isPrefixOf)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text.Lazy as TL
+import System.FilePath (takeDirectory, takeExtension, (<.>))
+import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
+import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
import Text.Pandoc.Walk
-import Text.Pandoc.Writers.Shared ( fixDisplayMath )
-import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
-import Control.Monad (liftM)
+import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
+import Text.Pandoc.Writers.Shared (fixDisplayMath)
import Text.Pandoc.XML
-import Text.Pandoc.Pretty
-import qualified Control.Exception as E
-import Data.Time.Clock.POSIX ( getPOSIXTime )
-import System.FilePath ( takeExtension, takeDirectory, (<.>))
+import Text.TeXMath
+import Text.XML.Light
+
+newtype ODTState = ODTState { stEntries :: [Entry]
+ }
+
+type O m = StateT ODTState m
-- | Produce an ODT file from a Pandoc document.
-writeODT :: WriterOptions -- ^ Writer options
+writeODT :: PandocMonad m
+ => WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
- -> IO B.ByteString
-writeODT opts doc@(Pandoc meta _) = do
- let datadir = writerUserDataDir opts
+ -> m B.ByteString
+writeODT opts doc =
+ let initState = ODTState{ stEntries = []
+ }
+ in
+ evalStateT (pandocToODT opts doc) initState
+
+-- | Produce an ODT file from a Pandoc document.
+pandocToODT :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> O m B.ByteString
+pandocToODT opts doc@(Pandoc meta _) = do
let title = docTitle meta
+ lang <- toLang (getLang opts meta)
refArchive <-
- case writerReferenceODT opts of
- Just f -> liftM toArchive $ B.readFile f
- Nothing -> getDefaultReferenceODT datadir
+ case writerReferenceDoc opts of
+ Just f -> liftM toArchive $ lift $ P.readFileLazy f
+ Nothing -> lift $ (toArchive . B.fromStrict) <$>
+ P.readDataFile "reference.odt"
-- handle formulas and pictures
- picEntriesRef <- newIORef ([] :: [Entry])
- doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc
- let newContents = writeOpenDocument opts{writerWrapText = WrapNone} doc'
- epochtime <- floor `fmap` getPOSIXTime
+ -- picEntriesRef <- P.newIORef ([] :: [Entry])
+ doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc
+ newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc'
+ epochtime <- floor `fmap` lift P.getPOSIXTime
let contentEntry = toEntry "content.xml" epochtime
- $ fromStringLazy newContents
- picEntries <- readIORef picEntriesRef
+ $ fromTextLazy $ TL.fromStrict newContents
+ picEntries <- gets stEntries
let archive = foldr addEntryToArchive refArchive
$ contentEntry : picEntries
-- construct META-INF/manifest.xml based on archive
@@ -90,14 +111,13 @@ writeODT opts doc@(Pandoc meta _) = do
$ fromStringLazy $ render Nothing
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
- ( inTags True "manifest:manifest"
+ (inTags True "manifest:manifest"
[("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")
- ,("manifest:version","1.2")]
- $ ( selfClosingTag "manifest:file-entry"
+ ,("manifest:version","1.2")] ( selfClosingTag "manifest:file-entry"
[("manifest:media-type","application/vnd.oasis.opendocument.text")
,("manifest:full-path","/")]
- $$ vcat ( map toFileEntry $ files )
- $$ vcat ( map toFileEntry $ formulas )
+ $$ vcat ( map toFileEntry files )
+ $$ vcat ( map toFileEntry formulas )
)
)
let archive' = addEntryToArchive manifestEntry archive
@@ -105,88 +125,138 @@ writeODT opts doc@(Pandoc meta _) = do
$ fromStringLazy $ render Nothing
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
$$
- ( inTags True "office:document-meta"
+ (inTags True "office:document-meta"
[("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0")
,("xmlns:xlink","http://www.w3.org/1999/xlink")
,("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0")
,("xmlns:ooo","http://openoffice.org/2004/office")
,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
- ,("office:version","1.2")]
- $ ( inTagsSimple "office:meta"
- $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title))
- )
+ ,("office:version","1.2")] ( inTagsSimple "office:meta" $
+ ( inTagsSimple "dc:title"
+ (text $ escapeStringForXML (stringify title))
+ $$
+ case lang of
+ Just l -> inTagsSimple "dc:language"
+ (text (escapeStringForXML (renderLang l)))
+ Nothing -> empty
+ )
)
)
-- make sure mimetype is first
let mimetypeEntry = toEntry "mimetype" epochtime
$ fromStringLazy "application/vnd.oasis.opendocument.text"
- let archive'' = addEntryToArchive mimetypeEntry
+ archive'' <- updateStyleWithLang lang
+ $ addEntryToArchive mimetypeEntry
$ addEntryToArchive metaEntry archive'
return $ fromArchive archive''
+updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
+updateStyleWithLang Nothing arch = return arch
+updateStyleWithLang (Just lang) arch = do
+ epochtime <- floor `fmap` lift P.getPOSIXTime
+ return arch{ zEntries = [if eRelativePath e == "styles.xml"
+ then case parseXMLDoc
+ (toStringLazy (fromEntry e)) of
+ Nothing -> e
+ Just d ->
+ toEntry "styles.xml" epochtime
+ ( fromStringLazy
+ . ppTopElement
+ . addLang lang $ d )
+ else e
+ | e <- zEntries arch] }
+
+addLang :: Lang -> Element -> Element
+addLang lang = everywhere' (mkT updateLangAttr)
+ where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
+ = Attr n (langLanguage lang)
+ updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
+ = Attr n (langRegion lang)
+ updateLangAttr x = x
+
-- | transform both Image and Math elements
-transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
-transformPicMath opts entriesRef (Image attr@(id', cls, _) lab (src,t)) = do
- res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
- case res of
- Left (_ :: E.SomeException) -> do
- warn $ "Could not find image `" ++ src ++ "', skipping..."
- return $ Emph lab
- Right (img, mbMimeType) -> do
- (ptX, ptY) <- case imageSize img of
+transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline
+transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError
+ (do (img, mbMimeType) <- P.fetchItem src
+ (ptX, ptY) <- case imageSize opts img of
Right s -> return $ sizeInPoints s
Left msg -> do
- warn $ "Could not determine image size in `" ++
- src ++ "': " ++ msg
+ report $ CouldNotDetermineImageSize src msg
return (100, 100)
let dims =
case (getDim Width, getDim Height) of
(Just w, Just h) -> [("width", show w), ("height", show h)]
- (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")]
- (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)]
+ (Just w@(Percent p), Nothing) -> [("width", show w), ("height", show (p / ratio) ++ "%")]
+ (Nothing, Just h@(Percent p)) -> [("width", show (p * ratio) ++ "%"), ("height", show h)]
(Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")]
(Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)]
_ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")]
where
ratio = ptX / ptY
- getDim dir = case (dimension dir attr) of
+ getDim dir = case dimension dir attr of
Just (Percent i) -> Just $ Percent i
Just dim -> Just $ Inch $ inInch opts dim
Nothing -> Nothing
let newattr = (id', cls, dims)
- entries <- readIORef entriesRef
+ entries <- gets stEntries
let extension = fromMaybe (takeExtension $ takeWhile (/='?') src)
(mbMimeType >>= extensionFromMimeType)
let newsrc = "Pictures/" ++ show (length entries) <.> extension
let toLazy = B.fromChunks . (:[])
- epochtime <- floor `fmap` getPOSIXTime
+ epochtime <- floor `fmap` lift P.getPOSIXTime
let entry = toEntry newsrc epochtime $ toLazy img
- modifyIORef entriesRef (entry:)
- return $ Image newattr lab (newsrc, t)
-transformPicMath _ entriesRef (Math t math) = do
- entries <- readIORef entriesRef
+ modify $ \st -> st{ stEntries = entry : entries }
+ return $ Image newattr lab (newsrc, t))
+ (\e -> do
+ report $ CouldNotFetchResource src (show e)
+ return $ Emph lab)
+
+transformPicMath _ (Math t math) = do
+ entries <- gets stEntries
let dt = if t == InlineMath then DisplayInline else DisplayBlock
case writeMathML dt <$> readTeX math of
Left _ -> return $ Math t math
Right r -> do
let conf = useShortEmptyTags (const False) defaultConfigPP
let mathml = ppcTopElement conf r
- epochtime <- floor `fmap` getPOSIXTime
+ epochtime <- floor `fmap` (lift P.getPOSIXTime)
let dirname = "Formula-" ++ show (length entries) ++ "/"
let fname = dirname ++ "content.xml"
let entry = toEntry fname epochtime (fromStringLazy mathml)
- modifyIORef entriesRef (entry:)
+ let fname' = dirname ++ "settings.xml"
+ let entry' = toEntry fname' epochtime $ documentSettings (t == InlineMath)
+ modify $ \st -> st{ stEntries = entry' : (entry : entries) }
return $ RawInline (Format "opendocument") $ render Nothing $
- inTags False "draw:frame" [("text:anchor-type",
- if t == DisplayMath
- then "paragraph"
- else "as-char")
- ,("style:vertical-pos", "middle")
- ,("style:vertical-rel", "text")] $
+ inTags False "draw:frame" (if t == DisplayMath
+ then [("draw:style-name","fr2")
+ -- `draw:frame` does not support either
+ -- `style:vertical-pos` or `style:vertical-rel`,
+ -- therefore those attributes must go into the
+ -- `style:style` element
+ ,("text:anchor-type","paragraph")]
+ else [("draw:style-name","fr1")
+ ,("text:anchor-type","as-char")]) $
selfClosingTag "draw:object" [("xlink:href", dirname)
, ("xlink:type", "simple")
, ("xlink:show", "embed")
, ("xlink:actuate", "onLoad")]
-transformPicMath _ _ x = return x
+transformPicMath _ x = return x
+
+documentSettings :: Bool -> B.ByteString
+documentSettings isTextMode = fromStringLazy $ render Nothing
+ $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+ $$
+ (inTags True "office:document-settings"
+ [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0")
+ ,("xmlns:xlink","http://www.w3.org/1999/xlink")
+ ,("xmlns:config","urn:oasis:names:tc:opendocument:xmlns:config:1.0")
+ ,("xmlns:ooo","http://openoffice.org/2004/office")
+ ,("office:version","1.2")] $
+ inTagsSimple "office:settings" $
+ inTags False "config:config-item-set"
+ [("config:name", "ooo:configuration-settings")] $
+ inTags False "config:config-item" [("config:name", "IsTextMode")
+ ,("config:type", "boolean")] $
+ text $ if isTextMode then "true" else "false")
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
new file mode 100644
index 000000000..30d8d72dd
--- /dev/null
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -0,0 +1,108 @@
+{-
+Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.OOXML
+ Copyright : Copyright (C) 2012-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions common to OOXML writers (Docx and Powerpoint)
+-}
+module Text.Pandoc.Writers.OOXML ( mknode
+ , nodename
+ , toLazy
+ , renderXml
+ , parseXml
+ , elemToNameSpaces
+ , elemName
+ , isElem
+ , NameSpaces
+ , fitToPage
+ ) where
+
+import Codec.Archive.Zip
+import Control.Monad.Reader
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BL8
+import Data.Maybe (mapMaybe)
+import Data.Monoid ((<>))
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.XML.Light as XML
+
+mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode s attrs =
+ add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s)
+
+nodename :: String -> QName
+nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
+ where (name, prefix) = case break (==':') s of
+ (xs,[]) -> (xs, Nothing)
+ (ys, _:zs) -> (zs, Just ys)
+
+toLazy :: B.ByteString -> BL.ByteString
+toLazy = BL.fromChunks . (:[])
+
+renderXml :: Element -> BL.ByteString
+renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
+ UTF8.fromStringLazy (showElement elt)
+
+parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element
+parseXml refArchive distArchive relpath =
+ case findEntryByPath relpath refArchive `mplus`
+ findEntryByPath relpath distArchive of
+ Nothing -> fail $ relpath ++ " missing in reference file"
+ Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
+ Nothing -> fail $ relpath ++ " corrupt in reference file"
+ Just d -> return d
+
+-- Copied from Util
+
+attrToNSPair :: XML.Attr -> Maybe (String, String)
+attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name =
+ QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ let ns' = ns ++ elemToNameSpaces element
+ in qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns'
+
+type NameSpaces = [(String, String)]
+
+-- | Scales the image to fit the page
+-- sizes are passed in emu
+fitToPage :: (Double, Double) -> Integer -> (Integer, Integer)
+fitToPage (x, y) pageWidth
+ -- Fixes width to the page width and scales the height
+ | x > fromIntegral pageWidth =
+ (pageWidth, floor $ (fromIntegral pageWidth / x) * y)
+ | otherwise = (floor x, floor y)
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 20c2c5cbc..29e1bc80c 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE CPP #-}
{-
-Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.OPML
- Copyright : Copyright (C) 2013-2015 John MacFarlane
+ Copyright : Copyright (C) 2013-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,40 +29,45 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to OPML XML.
-}
module Text.Pandoc.Writers.OPML ( writeOPML) where
+import Control.Monad.Except (throwError)
+import Data.Text (Text, unpack)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Compat.Time
import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Shared
+import Text.Pandoc.Error
import Text.Pandoc.Options
+import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Writers.HTML (writeHtml5String)
import Text.Pandoc.Writers.Markdown (writeMarkdown)
-import Text.Pandoc.Pretty
-import Text.Pandoc.Compat.Time
-import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML
-- | Convert Pandoc document to string in OPML format.
-writeOPML :: WriterOptions -> Pandoc -> String
-writeOPML opts (Pandoc meta blocks) =
+writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeOPML opts (Pandoc meta blocks) = do
let elements = hierarchicalize blocks
colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta
- Just metadata = metaToJSON opts
- (Just . writeMarkdown def . Pandoc nullMeta)
- (Just . trimr . writeMarkdown def . Pandoc nullMeta .
- (\ils -> [Plain ils]))
- meta'
- main = render colwidth $ vcat (map (elementToOPML opts) elements)
- context = defField "body" main metadata
- in case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ metadata <- metaToJSON opts
+ (writeMarkdown def . Pandoc nullMeta)
+ (\ils -> T.stripEnd <$> writeMarkdown def (Pandoc nullMeta [Plain ils]))
+ meta'
+ main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements
+ let context = defField "body" main metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-writeHtmlInlines :: [Inline] -> String
-writeHtmlInlines ils = trim $ writeHtmlString def
- $ Pandoc nullMeta [Plain ils]
+
+writeHtmlInlines :: PandocMonad m => [Inline] -> m Text
+writeHtmlInlines ils =
+ T.strip <$> writeHtml5String def (Pandoc nullMeta [Plain ils])
-- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT
showDateTimeRFC822 :: UTCTime -> String
@@ -75,20 +80,27 @@ convertDate ils = maybe "" showDateTimeRFC822 $
#else
parseTime
#endif
- defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils)
+ defaultTimeLocale "%F" =<< normalizeDate (stringify ils)
-- | Convert an Element to OPML.
-elementToOPML :: WriterOptions -> Element -> Doc
-elementToOPML _ (Blk _) = empty
-elementToOPML opts (Sec _ _num _ title elements) =
- let isBlk (Blk _) = True
- isBlk _ = False
- fromBlk (Blk x) = x
- fromBlk _ = error "fromBlk called on non-block"
+elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc
+elementToOPML _ (Blk _) = return empty
+elementToOPML opts (Sec _ _num _ title elements) = do
+ let isBlk :: Element -> Bool
+ isBlk (Blk _) = True
+ isBlk _ = False
+
+ fromBlk :: PandocMonad m => Element -> m Block
+ fromBlk (Blk x) = return x
+ fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block"
+
(blocks, rest) = span isBlk elements
- attrs = [("text", writeHtmlInlines title)] ++
- [("_note", writeMarkdown def (Pandoc nullMeta
- (map fromBlk blocks)))
- | not (null blocks)]
- in inTags True "outline" attrs $
- vcat (map (elementToOPML opts) rest)
+ htmlIls <- writeHtmlInlines title
+ md <- if null blocks
+ then return mempty
+ else do blks <- mapM fromBlk blocks
+ writeMarkdown def $ Pandoc nullMeta blks
+ let attrs = ("text", unpack htmlIls) :
+ [("_note", unpack md) | not (null blocks)]
+ o <- mapM (elementToOPML opts) rest
+ return $ inTags True "outline" attrs $ vcat o
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 8f0e037c5..17edc0cbd 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
{-
-Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it>
+Copyright (C) 2008-2018 Andrea Rossato <andrea.rossato@ing.unitn.it>
and John MacFarlane.
This program is free software; you can redistribute it and/or modify
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.OpenDocument
- Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane
+ Copyright : Copyright (C) 2008-2018 Andrea Rossato and John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
@@ -30,32 +32,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to OpenDocument XML.
-}
module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
+import Control.Arrow ((***), (>>>))
+import Control.Monad.State.Strict hiding (when)
+import Data.Char (chr)
+import Data.List (sortBy)
+import qualified Data.Map as Map
+import Data.Ord (comparing)
+import qualified Data.Set as Set
+import Data.Text (Text)
+import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.XML
+import Text.Pandoc.Pretty
import Text.Pandoc.Shared (linesToPara)
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Readers.TeXMath
-import Text.Pandoc.Pretty
-import Text.Printf ( printf )
-import Control.Arrow ( (***), (>>>) )
-import Control.Monad.State hiding ( when )
-import Data.Char (chr)
-import qualified Data.Set as Set
-import qualified Data.Map as Map
+import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
-import Data.List (sortBy)
-import Data.Ord (comparing)
+import Text.Pandoc.XML
+import Text.Printf (printf)
-- | Auxiliary function to convert Plain block to Para.
plainToPara :: Block -> Block
plainToPara (Plain x) = Para x
-plainToPara x = x
+plainToPara x = x
--
-- OpenDocument writer
--
+type OD m = StateT WriterState m
+
data WriterState =
WriterState { stNotes :: [Doc]
, stTableStyles :: [Doc]
@@ -88,46 +96,45 @@ defaultWriterState =
when :: Bool -> Doc -> Doc
when p a = if p then a else empty
-addTableStyle :: Doc -> State WriterState ()
+addTableStyle :: PandocMonad m => Doc -> OD m ()
addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s }
-addNote :: Doc -> State WriterState ()
+addNote :: PandocMonad m => Doc -> OD m ()
addNote i = modify $ \s -> s { stNotes = i : stNotes s }
-addParaStyle :: Doc -> State WriterState ()
+addParaStyle :: PandocMonad m => Doc -> OD m ()
addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s }
-addTextStyle :: Set.Set TextStyle -> (String, Doc) -> State WriterState ()
+addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m ()
addTextStyle attrs i = modify $ \s ->
s { stTextStyles = Map.insert attrs i (stTextStyles s) }
-addTextStyleAttr :: TextStyle -> State WriterState ()
+addTextStyleAttr :: PandocMonad m => TextStyle -> OD m ()
addTextStyleAttr t = modify $ \s ->
s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) }
-increaseIndent :: State WriterState ()
+increaseIndent :: PandocMonad m => OD m ()
increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s }
-resetIndent :: State WriterState ()
-resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 }
+resetIndent :: PandocMonad m => OD m ()
+resetIndent = modify $ \s -> s { stIndentPara = stIndentPara s - 1 }
-inTightList :: State WriterState a -> State WriterState a
+inTightList :: PandocMonad m => OD m a -> OD m a
inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r ->
modify (\s -> s { stTight = False }) >> return r
-setInDefinitionList :: Bool -> State WriterState ()
+setInDefinitionList :: PandocMonad m => Bool -> OD m ()
setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
-setFirstPara :: State WriterState ()
+setFirstPara :: PandocMonad m => OD m ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
-inParagraphTags :: Doc -> State WriterState Doc
-inParagraphTags d | isEmpty d = return empty
+inParagraphTags :: PandocMonad m => Doc -> OD m Doc
inParagraphTags d = do
b <- gets stFirstPara
a <- if b
then do modify $ \st -> st { stFirstPara = False }
- return $ [("text:style-name", "First_20_paragraph")]
+ return [("text:style-name", "First_20_paragraph")]
else return [("text:style-name", "Text_20_body")]
return $ inTags False "text:p" a d
@@ -137,7 +144,7 @@ inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
inSpanTags :: String -> Doc -> Doc
inSpanTags s = inTags False "text:span" [("text:style-name",s)]
-withTextStyle :: TextStyle -> State WriterState a -> State WriterState a
+withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a
withTextStyle s f = do
oldTextStyleAttr <- gets stTextStyleAttr
addTextStyleAttr s
@@ -145,7 +152,7 @@ withTextStyle s f = do
modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr }
return res
-inTextStyle :: Doc -> State WriterState Doc
+inTextStyle :: PandocMonad m => Doc -> OD m Doc
inTextStyle d = do
at <- gets stTextStyleAttr
if Set.null at
@@ -161,12 +168,30 @@ inTextStyle d = do
inTags False "style:style"
[("style:name", styleName)
,("style:family", "text")]
- $ selfClosingTag "style:text-properties"
- (concatMap textStyleAttr (Set.toList at)))
+ $ selfClosingTag "style:text-properties"
+ (concatMap textStyleAttr (Set.toList at)))
return $ inTags False
"text:span" [("text:style-name",styleName)] d
-inHeaderTags :: Int -> Doc -> State WriterState Doc
+formulaStyles :: [Doc]
+formulaStyles = [formulaStyle InlineMath, formulaStyle DisplayMath]
+
+formulaStyle :: MathType -> Doc
+formulaStyle mt = inTags False "style:style"
+ [("style:name", if mt == InlineMath then "fr1" else "fr2")
+ ,("style:family", "graphic")
+ ,("style:parent-style-name", "Formula")]
+ $ selfClosingTag "style:graphic-properties" $ if mt == InlineMath then
+ [("style:vertical-pos", "middle")
+ ,("style:vertical-rel", "text")]
+ else
+ [("style:vertical-pos", "middle")
+ ,("style:vertical-rel", "paragraph-content")
+ ,("style:horizontal-pos", "center")
+ ,("style:horizontal-rel", "paragraph-content")
+ ,("style:wrap", "none")]
+
+inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc
inHeaderTags i d =
return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
, ("text:outline-level", show i)] d
@@ -189,64 +214,67 @@ handleSpaces s
rm [] = empty
-- | Convert Pandoc document to string in OpenDocument format.
-writeOpenDocument :: WriterOptions -> Pandoc -> String
-writeOpenDocument opts (Pandoc meta blocks) =
+writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeOpenDocument opts (Pandoc meta blocks) = do
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ let render' :: Doc -> Text
render' = render colwidth
- ((body, metadata),s) = flip runState
+ ((body, metadata),s) <- flip runStateT
defaultWriterState $ do
m <- metaToJSON opts
- (fmap (render colwidth) . blocksToOpenDocument opts)
- (fmap (render colwidth) . inlinesToOpenDocument opts)
+ (fmap render' . blocksToOpenDocument opts)
+ (fmap render' . inlinesToOpenDocument opts)
meta
b <- render' `fmap` blocksToOpenDocument opts blocks
return (b, m)
- styles = stTableStyles s ++ stParaStyles s ++
- map snd (reverse $ sortBy (comparing fst) $
- Map.elems (stTextStyles s))
+ let styles = stTableStyles s ++ stParaStyles s ++ formulaStyles ++
+ map snd (sortBy (flip (comparing fst)) (
+ Map.elems (stTextStyles s)))
listStyle (n,l) = inTags True "text:list-style"
[("style:name", "L" ++ show n)] (vcat l)
- listStyles = map listStyle (stListStyles s)
- automaticStyles = vcat $ reverse $ styles ++ listStyles
- context = defField "body" body
- $ defField "automatic-styles" (render' automaticStyles)
- $ metadata
- in case writerTemplate opts of
- Nothing -> body
- Just tpl -> renderTemplate' tpl context
-
-withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc
+ let listStyles = map listStyle (stListStyles s)
+ let automaticStyles = vcat $ reverse $ styles ++ listStyles
+ let context = defField "body" body
+ $ defField "toc" (writerTableOfContents opts)
+ $defField "automatic-styles" (render' automaticStyles) metadata
+ case writerTemplate opts of
+ Nothing -> return body
+ Just tpl -> renderTemplate' tpl context
+
+withParagraphStyle :: PandocMonad m
+ => WriterOptions -> String -> [Block] -> OD m Doc
withParagraphStyle o s (b:bs)
| Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l
| otherwise = go =<< blockToOpenDocument o b
where go i = (<>) i <$> withParagraphStyle o s bs
withParagraphStyle _ _ [] = return empty
-inPreformattedTags :: String -> State WriterState Doc
+inPreformattedTags :: PandocMonad m => String -> OD m Doc
inPreformattedTags s = do
n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")]
return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s
-orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc
+orderedListToOpenDocument :: PandocMonad m
+ => WriterOptions -> Int -> [[Block]] -> OD m Doc
orderedListToOpenDocument o pn bs =
vcat . map (inTagsIndented "text:list-item") <$>
mapM (orderedItemToOpenDocument o pn . map plainToPara) bs
-orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc
-orderedItemToOpenDocument o n (b:bs)
- | OrderedList a l <- b = newLevel a l
- | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l
- | otherwise = go =<< blockToOpenDocument o b
- where
- go i = ($$) i <$> orderedItemToOpenDocument o n bs
- newLevel a l = do
- nn <- length <$> gets stParaStyles
- ls <- head <$> gets stListStyles
- modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) }
- inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l
-orderedItemToOpenDocument _ _ [] = return empty
+orderedItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> Int -> [Block] -> OD m Doc
+orderedItemToOpenDocument o n bs = vcat <$> mapM go bs
+ where go (OrderedList a l) = newLevel a l
+ go (Para l) = inParagraphTagsWithStyle ("P" ++ show n) <$>
+ inlinesToOpenDocument o l
+ go b = blockToOpenDocument o b
+ newLevel a l = do
+ nn <- length <$> gets stParaStyles
+ ls <- head <$> gets stListStyles
+ modify $ \s -> s { stListStyles = orderedListLevelStyle a ls :
+ drop 1 (stListStyles s) }
+ inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l
isTightList :: [[Block]] -> Bool
isTightList [] = False
@@ -254,7 +282,8 @@ isTightList (b:_)
| Plain {} : _ <- b = True
| otherwise = False
-newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int)
+newOrderedListStyle :: PandocMonad m
+ => Bool -> ListAttributes -> OD m (Int,Int)
newOrderedListStyle b a = do
ln <- (+) 1 . length <$> gets stListStyles
let nbs = orderedListLevelStyle a (ln, [])
@@ -262,7 +291,8 @@ newOrderedListStyle b a = do
modify $ \s -> s { stListStyles = nbs : stListStyles s }
return (ln,pn)
-bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc
+bulletListToOpenDocument :: PandocMonad m
+ => WriterOptions -> [[Block]] -> OD m Doc
bulletListToOpenDocument o b = do
ln <- (+) 1 . length <$> gets stListStyles
(pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln
@@ -270,48 +300,53 @@ bulletListToOpenDocument o b = do
is <- listItemsToOpenDocument ("P" ++ show pn) o b
return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is
-listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc
+listItemsToOpenDocument :: PandocMonad m
+ => String -> WriterOptions -> [[Block]] -> OD m Doc
listItemsToOpenDocument s o is =
vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is
-deflistItemToOpenDocument :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState Doc
+deflistItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc
deflistItemToOpenDocument o (t,d) = do
let ts = if isTightList d
then "Definition_20_Term_20_Tight" else "Definition_20_Term"
ds = if isTightList d
then "Definition_20_Definition_20_Tight" else "Definition_20_Definition"
t' <- withParagraphStyle o ts [Para t]
- d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d
+ d' <- liftM vcat $ mapM (withParagraphStyle o ds . map plainToPara) d
return $ t' $$ d'
-inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+inBlockQuote :: PandocMonad m
+ => WriterOptions -> Int -> [Block] -> OD m Doc
inBlockQuote o i (b:bs)
| BlockQuote l <- b = do increaseIndent
ni <- paraStyle
[("style:parent-style-name","Quotations")]
go =<< inBlockQuote o ni (map plainToPara l)
- | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l
- | otherwise = do go =<< blockToOpenDocument o b
+ | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l
+ | otherwise = go =<< blockToOpenDocument o b
where go block = ($$) block <$> inBlockQuote o i bs
inBlockQuote _ _ [] = resetIndent >> return empty
-- | Convert a list of Pandoc blocks to OpenDocument.
-blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc
+blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc
blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
-- | Convert a Pandoc block element to OpenDocument.
-blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
+blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc
blockToOpenDocument o bs
| Plain b <- bs = if null b
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
| Para [Image attr c (s,'f':'i':'g':':':t)] <- bs
= figure attr c s t
- | Para b <- bs = if null b
+ | Para b <- bs = if null b &&
+ not (isEnabled Ext_empty_paragraphs o)
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
| LineBlock b <- bs = blockToOpenDocument o $ linesToPara b
- | Div _ xs <- bs = blocksToOpenDocument o xs
+ | Div attr xs <- bs = withLangFromAttr attr
+ (blocksToOpenDocument o xs)
| Header i _ b <- bs = setFirstPara >>
(inHeaderTags i =<< inlinesToOpenDocument o b)
| BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
@@ -324,7 +359,9 @@ blockToOpenDocument o bs
[ ("text:style-name", "Horizontal_20_Line") ])
| RawBlock f s <- bs = if f == Format "opendocument"
then return $ text s
- else return empty
+ else do
+ report $ BlockNotRendered bs
+ return empty
| Null <- bs = return empty
| otherwise = return empty
where
@@ -370,17 +407,23 @@ blockToOpenDocument o bs
captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
return $ imageDoc $$ captionDoc
-colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
+colHeadsToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> [String] -> [[Block]]
+ -> OD m Doc
colHeadsToOpenDocument o tn ns hs =
inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$>
mapM (tableItemToOpenDocument o tn) (zip ns hs)
-tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
+tableRowToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> [String] -> [[Block]]
+ -> OD m Doc
tableRowToOpenDocument o tn ns cs =
inTagsIndented "table:table-row" . vcat <$>
mapM (tableItemToOpenDocument o tn) (zip ns cs)
-tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc
+tableItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> (String,[Block])
+ -> OD m Doc
tableItemToOpenDocument o tn (n,i) =
let a = [ ("table:style-name" , tn ++ ".A1" )
, ("office:value-type", "string" )
@@ -389,10 +432,10 @@ tableItemToOpenDocument o tn (n,i) =
withParagraphStyle o n (map plainToPara i)
-- | Convert a list of inline elements to OpenDocument.
-inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc
+inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc
inlinesToOpenDocument o l = hcat <$> toChunks o l
-toChunks :: WriterOptions -> [Inline] -> State WriterState [Doc]
+toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc]
toChunks _ [] = return []
toChunks o (x : xs)
| isChunkable x = do
@@ -407,21 +450,21 @@ toChunks o (x : xs)
where (ys, zs) = span isChunkable xs
isChunkable :: Inline -> Bool
-isChunkable (Str _) = True
-isChunkable Space = True
+isChunkable (Str _) = True
+isChunkable Space = True
isChunkable SoftBreak = True
-isChunkable _ = False
+isChunkable _ = False
-- | Convert an inline element to OpenDocument.
-inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
+inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc
inlineToOpenDocument o ils
= case ils of
Space -> return space
SoftBreak
| writerWrapText o == WrapPreserve
-> return $ preformatted "\n"
- | otherwise -> return $ space
- Span _ xs -> inlinesToOpenDocument o xs
+ | otherwise ->return space
+ Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs)
LineBreak -> return $ selfClosingTag "text:line-break" []
Str s -> return $ handleSpaces $ escapeStringForXML s
Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l
@@ -432,11 +475,14 @@ inlineToOpenDocument o ils
SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l
Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l
Code _ s -> inlinedCode $ preformatted s
- Math t s -> inlinesToOpenDocument o (texMathToInlines t s)
+ Math t s -> lift (texMathToInlines t s) >>=
+ inlinesToOpenDocument o
Cite _ l -> inlinesToOpenDocument o l
RawInline f s -> if f == Format "opendocument"
then return $ text s
- else return empty
+ else do
+ report $ InlineNotRendered ils
+ return empty
Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l
Image attr _ (s,t) -> mkImg attr s t
Note l -> mkNote l
@@ -453,8 +499,6 @@ inlineToOpenDocument o ils
let getDims [] = []
getDims (("width", w) :xs) = ("svg:width", w) : getDims xs
getDims (("height", h):xs) = ("svg:height", h) : getDims xs
- getDims (x@("style:rel-width", _) :xs) = x : getDims xs
- getDims (x@("style:rel-height", _):xs) = x : getDims xs
getDims (_:xs) = getDims xs
return $ inTags False "draw:frame"
(("draw:name", "img" ++ show id') : getDims kvs) $
@@ -473,18 +517,18 @@ inlineToOpenDocument o ils
addNote nn
return nn
-bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc]))
-bulletListStyle l =
- let doStyles i = inTags True "text:list-level-style-bullet"
- [ ("text:level" , show (i + 1) )
- , ("text:style-name" , "Bullet_20_Symbols")
- , ("style:num-suffix", "." )
- , ("text:bullet-char", [bulletList !! i] )
- ] (listLevelStyle (1 + i))
- bulletList = map chr $ cycle [8226,8227,8259]
- listElStyle = map doStyles [0..9]
- in do pn <- paraListStyle l
- return (pn, (l, listElStyle))
+bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc]))
+bulletListStyle l = do
+ let doStyles i = inTags True "text:list-level-style-bullet"
+ [ ("text:level" , show (i + 1) )
+ , ("text:style-name" , "Bullet_20_Symbols")
+ , ("style:num-suffix", "." )
+ , ("text:bullet-char", [bulletList !! i] )
+ ] (listLevelStyle (1 + i))
+ bulletList = map chr $ cycle [8226,9702,9642]
+ listElStyle = map doStyles [0..9]
+ pn <- paraListStyle l
+ return (pn, (l, listElStyle))
orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc])
orderedListLevelStyle (s,n, d) (l,ls) =
@@ -494,11 +538,11 @@ orderedListLevelStyle (s,n, d) (l,ls) =
,("style:num-suffix", ")")]
_ -> [("style:num-suffix", ".")]
format = case n of
- UpperAlpha -> "A"
- LowerAlpha -> "a"
- UpperRoman -> "I"
- LowerRoman -> "i"
- _ -> "1"
+ UpperAlpha -> "A"
+ LowerAlpha -> "a"
+ UpperRoman -> "I"
+ LowerRoman -> "i"
+ _ -> "1"
listStyle = inTags True "text:list-level-style-number"
([ ("text:level" , show $ 1 + length ls )
, ("text:style-name" , "Numbering_20_Symbols")
@@ -509,10 +553,10 @@ orderedListLevelStyle (s,n, d) (l,ls) =
listLevelStyle :: Int -> Doc
listLevelStyle i =
- let indent = show (0.25 * fromIntegral i :: Double) in
+ let indent = show (0.4 * fromIntegral (i - 1) :: Double) in
selfClosingTag "style:list-level-properties"
[ ("text:space-before" , indent ++ "in")
- , ("text:min-label-width", "0.25in")]
+ , ("text:min-label-width", "0.4in")]
tableStyle :: Int -> [(Char,Double)] -> Doc
tableStyle num wcs =
@@ -529,7 +573,7 @@ tableStyle num wcs =
[ ("style:name" , tableId ++ "." ++ [c])
, ("style:family", "table-column" )] $
selfClosingTag "style:table-column-properties"
- [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))]
+ [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))]
cellStyle = inTags True "style:style"
[ ("style:name" , tableId ++ ".A1")
, ("style:family", "table-cell" )] $
@@ -538,31 +582,33 @@ tableStyle num wcs =
columnStyles = map colStyle wcs
in table $$ vcat columnStyles $$ cellStyle
-paraStyle :: [(String,String)] -> State WriterState Int
+paraStyle :: PandocMonad m => [(String,String)] -> OD m Int
paraStyle attrs = do
pn <- (+) 1 . length <$> gets stParaStyles
- i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double
+ i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara
b <- gets stInDefinition
t <- gets stTight
let styleAttr = [ ("style:name" , "P" ++ show pn)
, ("style:family" , "paragraph" )]
- indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i
+ indentVal = flip (++) "in" . show $ if b then max 0.5 i else i
tight = if t then [ ("fo:margin-top" , "0in" )
, ("fo:margin-bottom" , "0in" )]
else []
- indent = if (i /= 0 || b)
+ indent = if i /= 0 || b
then [ ("fo:margin-left" , indentVal)
, ("fo:margin-right" , "0in" )
, ("fo:text-indent" , "0in" )
, ("style:auto-text-indent" , "false" )]
else []
attributes = indent ++ tight
- paraProps = when (not $ null attributes) $
- selfClosingTag "style:paragraph-properties" attributes
+ paraProps = if null attributes
+ then mempty
+ else selfClosingTag
+ "style:paragraph-properties" attributes
addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps
return pn
-paraListStyle :: Int -> State WriterState Int
+paraListStyle :: PandocMonad m => Int -> OD m Int
paraListStyle l = paraStyle
[("style:parent-style-name","Text_20_body")
,("style:list-style-name", "L" ++ show l )]
@@ -582,7 +628,14 @@ paraTableStyles t s (a:xs)
[ ("fo:text-align", x)
, ("style:justify-single-word", "false")]
-data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
+data TextStyle = Italic
+ | Bold
+ | Strike
+ | Sub
+ | Sup
+ | SmallC
+ | Pre
+ | Language Lang
deriving ( Eq,Ord )
textStyleAttr :: TextStyle -> [(String,String)]
@@ -600,4 +653,18 @@ textStyleAttr s
| Pre <- s = [("style:font-name" ,"Courier New")
,("style:font-name-asian" ,"Courier New")
,("style:font-name-complex" ,"Courier New")]
+ | Language lang <- s
+ = [("fo:language" ,langLanguage lang)
+ ,("fo:country" ,langRegion lang)]
| otherwise = []
+
+withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
+withLangFromAttr (_,_,kvs) action =
+ case lookup "lang" kvs of
+ Nothing -> action
+ Just l ->
+ case parseBCP47 l of
+ Right lang -> withTextStyle (Language lang) action
+ Left _ -> do
+ report $ InvalidLang l
+ action
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 4302459cc..2307204a5 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -1,8 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
- Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>,
- and John MacFarlane <jgm@berkeley.edu>
+ 2010-2018 John MacFarlane <jgm@berkeley.edu>
+ 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,10 +21,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Org
- Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane
+ Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
+ 2010-2018 John MacFarlane <jgm@berkeley.edu>
+ 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
License : GNU GPL, version 2 or above
- Maintainer : Puneeth Chaganti <punchagan@gmail.com>
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Stability : alpha
Portability : portable
@@ -32,64 +34,67 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode.
Org-Mode: <http://orgmode.org>
-}
-module Text.Pandoc.Writers.Org ( writeOrg) where
+module Text.Pandoc.Writers.Org (writeOrg) where
+import Control.Monad.State.Strict
+import Data.Char (isAlphaNum, toLower)
+import Data.List (intersect, intersperse, isPrefixOf, partition, transpose)
+import Data.Text (Text)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
import Text.Pandoc.Pretty
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Data.Char ( isAlphaNum, toLower )
-import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose )
-import Control.Monad.State
+import Text.Pandoc.Writers.Shared
data WriterState =
- WriterState { stNotes :: [[Block]]
- , stLinks :: Bool
- , stImages :: Bool
- , stHasMath :: Bool
- , stOptions :: WriterOptions
+ WriterState { stNotes :: [[Block]]
+ , stHasMath :: Bool
+ , stOptions :: WriterOptions
}
+type Org = StateT WriterState
+
-- | Convert Pandoc to Org.
-writeOrg :: WriterOptions -> Pandoc -> String
-writeOrg opts document =
- let st = WriterState { stNotes = [], stLinks = False,
- stImages = False, stHasMath = False,
+writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeOrg opts document = do
+ let st = WriterState { stNotes = [],
+ stHasMath = False,
stOptions = opts }
- in evalState (pandocToOrg document) st
+ evalStateT (pandocToOrg document) st
-- | Return Org representation of document.
-pandocToOrg :: Pandoc -> State WriterState String
+pandocToOrg :: PandocMonad m => Pandoc -> Org m Text
pandocToOrg (Pandoc meta blocks) = do
- opts <- liftM stOptions get
+ opts <- gets stOptions
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToOrg)
- (fmap (render colwidth) . inlineListToOrg)
+ (fmap render' . blockListToOrg)
+ (fmap render' . inlineListToOrg)
meta
body <- blockListToOrg blocks
- notes <- liftM (reverse . stNotes) get >>= notesToOrg
- -- note that the notes may contain refs, so we do them first
- hasMath <- liftM stHasMath get
- let main = render colwidth $ foldl ($+$) empty $ [body, notes]
+ notes <- gets (reverse . stNotes) >>= notesToOrg
+ hasMath <- gets stHasMath
+ let main = render colwidth . foldl ($+$) empty $ [body, notes]
let context = defField "body" main
- $ defField "math" hasMath
+ . defField "math" hasMath
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return Org representation of notes.
-notesToOrg :: [[Block]] -> State WriterState Doc
+notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc
notesToOrg notes =
- mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
- return . vsep
+ vsep <$> zipWithM noteToOrg [1..] notes
-- | Return Org representation of a note.
-noteToOrg :: Int -> [Block] -> State WriterState Doc
+noteToOrg :: PandocMonad m => Int -> [Block] -> Org m Doc
noteToOrg num note = do
contents <- blockListToOrg note
let marker = "[fn:" ++ show num ++ "] "
@@ -109,8 +114,9 @@ isRawFormat f =
f == Format "latex" || f == Format "tex" || f == Format "org"
-- | Convert Pandoc block element to Org.
-blockToOrg :: Block -- ^ Block element
- -> State WriterState Doc
+blockToOrg :: PandocMonad m
+ => Block -- ^ Block element
+ -> Org m Doc
blockToOrg Null = return empty
blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do
contents <- blockListToOrg bs
@@ -123,36 +129,25 @@ blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do
blankline $$ contents $$
blankline $$ drawerEndTag $$
blankline
-blockToOrg (Div attrs bs) = do
+blockToOrg (Div (ident, classes, kv) bs) = do
contents <- blockListToOrg bs
+ -- if one class looks like the name of a greater block then output as such:
+ -- The ID, if present, is added via the #+NAME keyword; other classes and
+ -- key-value pairs are kept as #+ATTR_HTML attributes.
let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower
- return $ case attrs of
- ("", [], []) ->
- -- nullAttr, treat contents as if it wasn't wrapped
- blankline $$ contents $$ blankline
- (ident, [], []) ->
- -- only an id: add id as an anchor, unwrap the rest
- blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline
- (ident, classes, kv) ->
- -- if one class looks like the name of a greater block then output as
- -- such: The ID, if present, is added via the #+NAME keyword; other
- -- classes and key-value pairs are kept as #+ATTR_HTML attributes.
- let
- (blockTypeCand, classes') = partition isGreaterBlockClass classes
- in case blockTypeCand of
- (blockType:classes'') ->
- blankline $$ attrHtml (ident, classes'' <> classes', kv) $$
- "#+BEGIN_" <> text blockType $$ contents $$
- "#+END_" <> text blockType $$ blankline
- _ ->
- -- fallback: wrap in div tags
- let
- startTag = tagWithAttrs "div" attrs
- endTag = text "</div>"
- in blankline $$ "#+BEGIN_HTML" $$
- nest 2 startTag $$ "#+END_HTML" $$ blankline $$
- contents $$ blankline $$ "#+BEGIN_HTML" $$
- nest 2 endTag $$ "#+END_HTML" $$ blankline
+ (blockTypeCand, classes') = partition isGreaterBlockClass classes
+ return $ case blockTypeCand of
+ (blockType:classes'') ->
+ blankline $$ attrHtml (ident, classes'' <> classes', kv) $$
+ "#+BEGIN_" <> text blockType $$ contents $$
+ "#+END_" <> text blockType $$ blankline
+ _ ->
+ -- fallback with id: add id as an anchor if present, discard classes and
+ -- key-value pairs, unwrap the content.
+ let contents' = if not (null ident)
+ then "<<" <> text ident <> ">>" $$ contents
+ else contents
+ in blankline $$ contents' $$ blankline
blockToOrg (Plain inlines) = inlineListToOrg inlines
-- title beginning with fig: indicates that the image is a figure
blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
@@ -167,20 +162,22 @@ blockToOrg (Para inlines) = do
blockToOrg (LineBlock lns) = do
let splitStanza [] = []
splitStanza xs = case break (== mempty) xs of
- (l, []) -> l : []
+ (l, []) -> [l]
(l, _:r) -> l : splitStanza r
let joinWithLinefeeds = nowrap . mconcat . intersperse cr
let joinWithBlankLines = mconcat . intersperse blankline
- let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls
- contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns)
+ let prettifyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls
+ contents <- joinWithBlankLines <$> mapM prettifyStanza (splitStanza lns)
return $ blankline $$ "#+BEGIN_VERSE" $$
nest 2 contents $$ "#+END_VERSE" <> blankline
blockToOrg (RawBlock "html" str) =
return $ blankline $$ "#+BEGIN_HTML" $$
nest 2 (text str) $$ "#+END_HTML" $$ blankline
-blockToOrg (RawBlock f str) | isRawFormat f =
- return $ text str
-blockToOrg (RawBlock _ _) = return empty
+blockToOrg b@(RawBlock f str)
+ | isRawFormat f = return $ text str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
blockToOrg (Header level attr inlines) = do
contents <- inlineListToOrg inlines
@@ -190,7 +187,7 @@ blockToOrg (Header level attr inlines) = do
else cr <> nest (level + 1) (propertiesDrawer attr)
return $ headerStr <> " " <> contents <> drawerStr <> blankline
blockToOrg (CodeBlock (_,classes,_) str) = do
- opts <- stOptions <$> get
+ opts <- gets stOptions
let tabstop = writerTabStop opts
let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers
let (beg, end) = case at of
@@ -205,7 +202,7 @@ blockToOrg (Table caption' _ _ headers rows) = do
caption'' <- inlineListToOrg caption'
let caption = if null caption'
then empty
- else ("#+CAPTION: " <> caption'')
+ else "#+CAPTION: " <> caption''
headers' <- mapM blockListToOrg headers
rawRows <- mapM (mapM blockListToOrg) rows
let numChars = maximum . map offset
@@ -215,16 +212,16 @@ blockToOrg (Table caption' _ _ headers rows) = do
-- FIXME: Org doesn't allow blocks with height more than 1.
let hpipeBlocks blocks = hcat [beg, middle, end]
where h = maximum (1 : map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
+ sep' = lblock 3 $ vcat (replicate h (text " | "))
+ beg = lblock 2 $ vcat (replicate h (text "| "))
+ end = lblock 2 $ vcat (replicate h (text " |"))
middle = hcat $ intersperse sep' blocks
let makeRow = hpipeBlocks . zipWith lblock widthsInChars
let head' = makeRow headers'
rows' <- mapM (\row -> do cols <- mapM blockListToOrg row
return $ makeRow cols) rows
let border ch = char '|' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ (hcat . intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
char ch <> char '|'
let body = vcat rows'
@@ -245,8 +242,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $
- zip markers' items
+ contents <- zipWithM orderedListItemToOrg markers' items
-- ensure that sublists have preceding blank line
return $ blankline $$ vcat contents $$ blankline
blockToOrg (DefinitionList items) = do
@@ -254,25 +250,27 @@ blockToOrg (DefinitionList items) = do
return $ vcat contents $$ blankline
-- | Convert bullet list item (list of blocks) to Org.
-bulletListItemToOrg :: [Block] -> State WriterState Doc
+bulletListItemToOrg :: PandocMonad m => [Block] -> Org m Doc
bulletListItemToOrg items = do
contents <- blockListToOrg items
- return $ hang 3 "- " (contents <> cr)
+ return $ hang 2 "- " (contents <> cr)
-- | Convert ordered list item (a list of blocks) to Org.
-orderedListItemToOrg :: String -- ^ marker for list item
+orderedListItemToOrg :: PandocMonad m
+ => String -- ^ marker for list item
-> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
+ -> Org m Doc
orderedListItemToOrg marker items = do
contents <- blockListToOrg items
return $ hang (length marker + 1) (text marker <> space) (contents <> cr)
-- | Convert defintion list item (label, list of blocks) to Org.
-definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc
+definitionListItemToOrg :: PandocMonad m
+ => ([Inline], [[Block]]) -> Org m Doc
definitionListItemToOrg (label, defs) = do
label' <- inlineListToOrg label
- contents <- liftM vcat $ mapM blockListToOrg defs
- return $ hang 3 "- " $ label' <> " :: " <> (contents <> cr)
+ contents <- vcat <$> mapM blockListToOrg defs
+ return . hang 2 "- " $ label' <> " :: " <> (contents <> cr)
-- | Convert list of key/value pairs to Org :PROPERTIES: drawer.
propertiesDrawer :: Attr -> Doc
@@ -280,8 +278,8 @@ propertiesDrawer (ident, classes, kv) =
let
drawerStart = text ":PROPERTIES:"
drawerEnd = text ":END:"
- kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv
- kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv'
+ kv' = if classes == mempty then kv else ("CLASS", unwords classes):kv
+ kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv'
properties = vcat $ map kvToOrgProperty kv''
in
drawerStart <> cr <> properties <> cr <> drawerEnd
@@ -294,23 +292,37 @@ attrHtml :: Attr -> Doc
attrHtml ("" , [] , []) = mempty
attrHtml (ident, classes, kvs) =
let
- name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr
+ name = if null ident then mempty else "#+NAME: " <> text ident <> cr
keyword = "#+ATTR_HTML"
classKv = ("class", unwords classes)
kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs)
in name <> keyword <> ": " <> text (unwords kvStrings) <> cr
-- | Convert list of Pandoc block elements to Org.
-blockListToOrg :: [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat
+blockListToOrg :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> Org m Doc
+blockListToOrg blocks = vcat <$> mapM blockToOrg blocks
-- | Convert list of Pandoc inline elements to Org.
-inlineListToOrg :: [Inline] -> State WriterState Doc
-inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
+inlineListToOrg :: PandocMonad m
+ => [Inline]
+ -> Org m Doc
+inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst)
+ where fixMarkers [] = [] -- prevent note refs and list markers from wrapping, see #4171
+ fixMarkers (Space : x : rest) | shouldFix x =
+ Str " " : x : fixMarkers rest
+ fixMarkers (SoftBreak : x : rest) | shouldFix x =
+ Str " " : x : fixMarkers rest
+ fixMarkers (x : rest) = x : fixMarkers rest
+
+ shouldFix Note{} = True -- Prevent footnotes
+ shouldFix (Str "-") = True -- Prevent bullet list items
+ -- TODO: prevent ordered list items
+ shouldFix _ = False
-- | Convert Pandoc inline element to Org.
-inlineToOrg :: Inline -> State WriterState Doc
+inlineToOrg :: PandocMonad m => Inline -> Org m Doc
inlineToOrg (Span (uid, [], []) []) =
return $ "<<" <> text uid <> ">>"
inlineToOrg (Span _ lst) =
@@ -339,50 +351,48 @@ inlineToOrg (Quoted DoubleQuote lst) = do
return $ "\"" <> contents <> "\""
inlineToOrg (Cite _ lst) = inlineListToOrg lst
inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
-inlineToOrg (Str str) = return $ text $ escapeString str
+inlineToOrg (Str str) = return . text $ escapeString str
inlineToOrg (Math t str) = do
modify $ \st -> st{ stHasMath = True }
return $ if t == InlineMath
then "$" <> text str <> "$"
else "$$" <> text str <> "$$"
-inlineToOrg (RawInline f@(Format f') str) =
- return $ if isRawFormat f
- then text str
- else "@@" <> text f' <> ":" <> text str <> "@@"
-inlineToOrg (LineBreak) = return (text "\\\\" <> cr)
+inlineToOrg il@(RawInline f str)
+ | isRawFormat f = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToOrg LineBreak = return (text "\\\\" <> cr)
inlineToOrg Space = return space
inlineToOrg SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
case wrapText of
- WrapPreserve -> return cr
- WrapAuto -> return space
- WrapNone -> return space
-inlineToOrg (Link _ txt (src, _)) = do
+ WrapPreserve -> return cr
+ WrapAuto -> return space
+ WrapNone -> return space
+inlineToOrg (Link _ txt (src, _)) =
case txt of
[Str x] | escapeURI x == src -> -- autolink
- do modify $ \s -> s{ stLinks = True }
- return $ "[[" <> text (orgPath x) <> "]]"
+ return $ "[[" <> text (orgPath x) <> "]]"
_ -> do contents <- inlineListToOrg txt
- modify $ \s -> s{ stLinks = True }
return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]"
-inlineToOrg (Image _ _ (source, _)) = do
- modify $ \s -> s{ stImages = True }
+inlineToOrg (Image _ _ (source, _)) =
return $ "[[" <> text (orgPath source) <> "]]"
inlineToOrg (Note contents) = do
-- add to notes in state
- notes <- get >>= (return . stNotes)
+ notes <- gets stNotes
modify $ \st -> st { stNotes = contents:notes }
- let ref = show $ (length notes) + 1
+ let ref = show $ length notes + 1
return $ "[fn:" <> text ref <> "]"
orgPath :: String -> String
orgPath src =
case src of
- [] -> mempty -- wiki link
- ('#':xs) -> xs -- internal link
- _ | isUrl src -> src
- _ | isFilePath src -> src
- _ -> "file:" <> src
+ [] -> mempty -- wiki link
+ ('#':_) -> src -- internal link
+ _ | isUrl src -> src
+ _ | isFilePath src -> src
+ _ -> "file:" <> src
where
isFilePath :: String -> Bool
isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"]
diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs
new file mode 100644
index 000000000..645a4cb86
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Powerpoint.hs
@@ -0,0 +1,63 @@
+
+
+{-
+Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Powerpoint
+ Copyright : Copyright (C) 2017-2018 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to powerpoint (pptx). -}
+
+{-
+This is a wrapper around two modules:
+
+ - Text.Pandoc.Writers.Powerpoint.Presentation (which converts a
+ pandoc document into a Presentation datatype), and
+
+ - Text.Pandoc.Writers.Powerpoint.Output (which converts a
+ Presentation into a zip archive, which can be output).
+-}
+
+module Text.Pandoc.Writers.Powerpoint (writePowerpoint) where
+
+import Codec.Archive.Zip
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Options (WriterOptions)
+import Text.Pandoc.Writers.Shared (fixDisplayMath)
+import Text.Pandoc.Writers.Powerpoint.Presentation (documentToPresentation)
+import Text.Pandoc.Writers.Powerpoint.Output (presentationToArchive)
+import qualified Data.ByteString.Lazy as BL
+
+writePowerpoint :: (PandocMonad m)
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m BL.ByteString
+writePowerpoint opts (Pandoc meta blks) = do
+ let blks' = walk fixDisplayMath blks
+ let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks')
+ mapM_ report logMsgs
+ archv <- presentationToArchive opts pres
+ return $ fromArchive archv
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
new file mode 100644
index 000000000..410b6c20c
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -0,0 +1,1834 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Powerpoint.Output
+ Copyright : Copyright (C) 2017-2018 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of Presentation datatype (defined in
+Text.Pandoc.Writers.Powerpoint.Presentation) to a zip archive.
+-}
+
+module Text.Pandoc.Writers.Powerpoint.Output ( presentationToArchive
+ ) where
+
+import Control.Monad.Except (throwError, catchError)
+import Control.Monad.Reader
+import Control.Monad.State
+import Codec.Archive.Zip
+import Data.Char (toUpper)
+import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
+import Data.Default
+import Text.Pandoc.Compat.Time (formatTime, defaultTimeLocale)
+import Data.Time.Clock (UTCTime)
+import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
+import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
+import Text.XML.Light
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Error (PandocError(..))
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Options
+import Text.Pandoc.MIME
+import qualified Data.ByteString.Lazy as BL
+import Text.Pandoc.Writers.OOXML
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, isJust, maybeToList, catMaybes)
+import Text.Pandoc.ImageSize
+import Control.Applicative ((<|>))
+import System.FilePath.Glob
+import Text.TeXMath
+import Text.Pandoc.Writers.Math (convertMath)
+import Text.Pandoc.Writers.Powerpoint.Presentation
+import Skylighting (fromColor)
+
+-- This populates the global ids map with images already in the
+-- template, so the ids won't be used by images introduced by the
+-- user.
+initialGlobalIds :: Archive -> Archive -> M.Map FilePath Int
+initialGlobalIds refArchive distArchive =
+ let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive
+ mediaPaths = filter (isPrefixOf "ppt/media/image") archiveFiles
+
+ go :: FilePath -> Maybe (FilePath, Int)
+ go fp = do
+ s <- stripPrefix "ppt/media/image" $ fst $ splitExtension fp
+ (n, _) <- listToMaybe $ reads s
+ return (fp, n)
+ in
+ M.fromList $ mapMaybe go mediaPaths
+
+getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
+getPresentationSize refArchive distArchive = do
+ entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus`
+ findEntryByPath "ppt/presentation.xml" distArchive
+ presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+ let ns = elemToNameSpaces presElement
+ sldSize <- findChild (elemName ns "p" "sldSz") presElement
+ cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
+ cyS <- findAttr (QName "cy" Nothing Nothing) sldSize
+ (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String)
+ (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String)
+ return (cx `div` 12700, cy `div` 12700)
+
+data WriterEnv = WriterEnv { envRefArchive :: Archive
+ , envDistArchive :: Archive
+ , envUTCTime :: UTCTime
+ , envOpts :: WriterOptions
+ , envPresentationSize :: (Integer, Integer)
+ , envSlideHasHeader :: Bool
+ , envInList :: Bool
+ , envInNoteSlide :: Bool
+ , envCurSlideId :: Int
+ -- the difference between the number at
+ -- the end of the slide file name and
+ -- the rId number
+ , envSlideIdOffset :: Int
+ , envContentType :: ContentType
+ , envSlideIdMap :: M.Map SlideId Int
+ -- maps the slide number to the
+ -- corresponding notes id number. If there
+ -- are no notes for a slide, there will be
+ -- no entry in the map for it.
+ , envSpeakerNotesIdMap :: M.Map Int Int
+ }
+ deriving (Show)
+
+instance Default WriterEnv where
+ def = WriterEnv { envRefArchive = emptyArchive
+ , envDistArchive = emptyArchive
+ , envUTCTime = posixSecondsToUTCTime 0
+ , envOpts = def
+ , envPresentationSize = (720, 540)
+ , envSlideHasHeader = False
+ , envInList = False
+ , envInNoteSlide = False
+ , envCurSlideId = 1
+ , envSlideIdOffset = 1
+ , envContentType = NormalContent
+ , envSlideIdMap = mempty
+ , envSpeakerNotesIdMap = mempty
+ }
+
+data ContentType = NormalContent
+ | TwoColumnLeftContent
+ | TwoColumnRightContent
+ deriving (Show, Eq)
+
+data MediaInfo = MediaInfo { mInfoFilePath :: FilePath
+ , mInfoLocalId :: Int
+ , mInfoGlobalId :: Int
+ , mInfoMimeType :: Maybe MimeType
+ , mInfoExt :: Maybe String
+ , mInfoCaption :: Bool
+ } deriving (Show, Eq)
+
+data WriterState = WriterState { stLinkIds :: M.Map Int (M.Map Int LinkTarget)
+ -- (FP, Local ID, Global ID, Maybe Mime)
+ , stMediaIds :: M.Map Int [MediaInfo]
+ , stMediaGlobalIds :: M.Map FilePath Int
+ } deriving (Show, Eq)
+
+instance Default WriterState where
+ def = WriterState { stLinkIds = mempty
+ , stMediaIds = mempty
+ , stMediaGlobalIds = mempty
+ }
+
+type P m = ReaderT WriterEnv (StateT WriterState m)
+
+runP :: Monad m => WriterEnv -> WriterState -> P m a -> m a
+runP env st p = evalStateT (runReaderT p env) st
+
+--------------------------------------------------------------------
+
+copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive
+copyFileToArchive arch fp = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+ case findEntryByPath fp refArchive `mplus` findEntryByPath fp distArchive of
+ Nothing -> fail $ fp ++ " missing in reference file"
+ Just e -> return $ addEntryToArchive e arch
+
+alwaysInheritedPatterns :: [Pattern]
+alwaysInheritedPatterns =
+ map compile [ "docProps/app.xml"
+ , "ppt/slideLayouts/slideLayout*.xml"
+ , "ppt/slideLayouts/_rels/slideLayout*.xml.rels"
+ , "ppt/slideMasters/slideMaster1.xml"
+ , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+ , "ppt/theme/theme1.xml"
+ , "ppt/theme/_rels/theme1.xml.rels"
+ , "ppt/presProps.xml"
+ , "ppt/viewProps.xml"
+ , "ppt/tableStyles.xml"
+ , "ppt/media/image*"
+ ]
+
+-- We only look for these under special conditions
+contingentInheritedPatterns :: Presentation -> [Pattern]
+contingentInheritedPatterns pres = [] ++
+ if presHasSpeakerNotes pres
+ then map compile [ "ppt/notesMasters/notesMaster*.xml"
+ , "ppt/notesMasters/_rels/notesMaster*.xml.rels"
+ , "ppt/theme/theme2.xml"
+ , "ppt/theme/_rels/theme2.xml.rels"
+ ]
+ else []
+
+inheritedPatterns :: Presentation -> [Pattern]
+inheritedPatterns pres =
+ alwaysInheritedPatterns ++ contingentInheritedPatterns pres
+
+patternToFilePaths :: PandocMonad m => Pattern -> P m [FilePath]
+patternToFilePaths pat = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+
+ let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive
+ return $ filter (match pat) archiveFiles
+
+patternsToFilePaths :: PandocMonad m => [Pattern] -> P m [FilePath]
+patternsToFilePaths pats = concat <$> mapM patternToFilePaths pats
+
+-- Here are the files we'll require to make a Powerpoint document. If
+-- any of these are missing, we should error out of our build.
+requiredFiles :: [FilePath]
+requiredFiles = [ "docProps/app.xml"
+ , "ppt/presProps.xml"
+ , "ppt/slideLayouts/slideLayout1.xml"
+ , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
+ , "ppt/slideLayouts/slideLayout2.xml"
+ , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
+ , "ppt/slideLayouts/slideLayout3.xml"
+ , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
+ , "ppt/slideLayouts/slideLayout4.xml"
+ , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
+ , "ppt/slideMasters/slideMaster1.xml"
+ , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+ , "ppt/theme/theme1.xml"
+ , "ppt/viewProps.xml"
+ , "ppt/tableStyles.xml"
+ ]
+
+presentationToArchiveP :: PandocMonad m => Presentation -> P m Archive
+presentationToArchiveP p@(Presentation docProps slides) = do
+ filePaths <- patternsToFilePaths $ inheritedPatterns p
+
+ -- make sure all required files are available:
+ let missingFiles = filter (\fp -> not (fp `elem` filePaths)) requiredFiles
+ unless (null missingFiles)
+ (throwError $
+ PandocSomeError $
+ "The following required files are missing:\n" ++
+ (unlines $ map (" " ++) missingFiles)
+ )
+
+ newArch' <- foldM copyFileToArchive emptyArchive filePaths
+ -- we make a docProps/core.xml entry out of the presentation docprops
+ docPropsEntry <- docPropsToEntry docProps
+ -- we make this ourself in case there's something unexpected in the
+ -- one in the reference doc.
+ relsEntry <- topLevelRelsEntry
+ -- presentation entry and rels. We have to do the rels first to make
+ -- sure we know the correct offset for the rIds.
+ presEntry <- presentationToPresEntry p
+ presRelsEntry <- presentationToRelsEntry p
+ slideEntries <- mapM slideToEntry slides
+ slideRelEntries <- mapM slideToSlideRelEntry slides
+ spkNotesEntries <- catMaybes <$> mapM slideToSpeakerNotesEntry slides
+ spkNotesRelEntries <- catMaybes <$> mapM slideToSpeakerNotesRelEntry slides
+ -- These have to come after everything, because they need the info
+ -- built up in the state.
+ mediaEntries <- makeMediaEntries
+ contentTypesEntry <- presentationToContentTypes p >>= contentTypesToEntry
+ -- fold everything into our inherited archive and return it.
+ return $ foldr addEntryToArchive newArch' $
+ slideEntries ++
+ slideRelEntries ++
+ spkNotesEntries ++
+ spkNotesRelEntries ++
+ mediaEntries ++
+ [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry]
+
+makeSlideIdMap :: Presentation -> M.Map SlideId Int
+makeSlideIdMap (Presentation _ slides) =
+ M.fromList $ (map slideId slides) `zip` [1..]
+
+makeSpeakerNotesMap :: Presentation -> M.Map Int Int
+makeSpeakerNotesMap (Presentation _ slides) =
+ M.fromList $ (mapMaybe f $ slides `zip` [1..]) `zip` [1..]
+ where f (Slide _ _ Nothing, _) = Nothing
+ f (Slide _ _ (Just _), n) = Just n
+
+presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive
+presentationToArchive opts pres = do
+ distArchive <- (toArchive . BL.fromStrict) <$>
+ P.readDefaultDataFile "reference.pptx"
+ refArchive <- case writerReferenceDoc opts of
+ Just f -> toArchive <$> P.readFileLazy f
+ Nothing -> (toArchive . BL.fromStrict) <$>
+ P.readDataFile "reference.pptx"
+
+ utctime <- P.getCurrentTime
+
+ presSize <- case getPresentationSize refArchive distArchive of
+ Just sz -> return sz
+ Nothing -> throwError $
+ PandocSomeError $
+ "Could not determine presentation size"
+
+ let env = def { envRefArchive = refArchive
+ , envDistArchive = distArchive
+ , envUTCTime = utctime
+ , envOpts = opts
+ , envPresentationSize = presSize
+ , envSlideIdMap = makeSlideIdMap pres
+ , envSpeakerNotesIdMap = makeSpeakerNotesMap pres
+ }
+
+ let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive
+ }
+
+ runP env st $ presentationToArchiveP pres
+
+
+
+--------------------------------------------------
+
+-- Check to see if the presentation has speaker notes. This will
+-- influence whether we import the notesMaster template.
+presHasSpeakerNotes :: Presentation -> Bool
+presHasSpeakerNotes (Presentation _ slides) = any isJust $ map slideSpeakerNotes slides
+
+curSlideHasSpeakerNotes :: PandocMonad m => P m Bool
+curSlideHasSpeakerNotes = do
+ sldId <- asks envCurSlideId
+ notesIdMap <- asks envSpeakerNotesIdMap
+ return $ isJust $ M.lookup sldId notesIdMap
+
+--------------------------------------------------
+
+getLayout :: PandocMonad m => Layout -> P m Element
+getLayout layout = do
+ let layoutpath = case layout of
+ (MetadataSlide _ _ _ _) -> "ppt/slideLayouts/slideLayout1.xml"
+ (TitleSlide _) -> "ppt/slideLayouts/slideLayout3.xml"
+ (ContentSlide _ _) -> "ppt/slideLayouts/slideLayout2.xml"
+ (TwoColumnSlide _ _ _) -> "ppt/slideLayouts/slideLayout4.xml"
+ distArchive <- asks envDistArchive
+ root <- case findEntryByPath layoutpath distArchive of
+ Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
+ Just element -> return $ element
+ Nothing -> throwError $
+ PandocSomeError $
+ layoutpath ++ " corrupt in reference file"
+ Nothing -> throwError $
+ PandocSomeError $
+ layoutpath ++ " missing in reference file"
+ return root
+
+shapeHasId :: NameSpaces -> String -> Element -> Bool
+shapeHasId ns ident element
+ | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element
+ , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr
+ , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr =
+ nm == ident
+ | otherwise = False
+
+-- The content shape in slideLayout2 (Title/Content) has id=3 In
+-- slideLayout4 (two column) the left column is id=3, and the right
+-- column is id=4.
+getContentShape :: PandocMonad m => NameSpaces -> Element -> P m Element
+getContentShape ns spTreeElem
+ | isElem ns "p" "spTree" spTreeElem = do
+ contentType <- asks envContentType
+ let idx = case contentType of
+ NormalContent -> "1"
+ TwoColumnLeftContent -> "1"
+ TwoColumnRightContent -> "2"
+ case getShapeByPlaceHolderIndex ns spTreeElem idx of
+ Just e -> return e
+ Nothing -> throwError $
+ PandocSomeError $
+ "Could not find shape for Powerpoint content"
+getContentShape _ _ = throwError $
+ PandocSomeError $
+ "Attempted to find content on non shapeTree"
+
+getShapeDimensions :: NameSpaces
+ -> Element
+ -> Maybe ((Integer, Integer), (Integer, Integer))
+getShapeDimensions ns element
+ | isElem ns "p" "sp" element = do
+ spPr <- findChild (elemName ns "p" "spPr") element
+ xfrm <- findChild (elemName ns "a" "xfrm") spPr
+ off <- findChild (elemName ns "a" "off") xfrm
+ xS <- findAttr (QName "x" Nothing Nothing) off
+ yS <- findAttr (QName "y" Nothing Nothing) off
+ ext <- findChild (elemName ns "a" "ext") xfrm
+ cxS <- findAttr (QName "cx" Nothing Nothing) ext
+ cyS <- findAttr (QName "cy" Nothing Nothing) ext
+ (x, _) <- listToMaybe $ reads xS
+ (y, _) <- listToMaybe $ reads yS
+ (cx, _) <- listToMaybe $ reads cxS
+ (cy, _) <- listToMaybe $ reads cyS
+ return $ ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700))
+ | otherwise = Nothing
+
+
+getMasterShapeDimensionsById :: String
+ -> Element
+ -> Maybe ((Integer, Integer), (Integer, Integer))
+getMasterShapeDimensionsById ident master = do
+ let ns = elemToNameSpaces master
+ cSld <- findChild (elemName ns "p" "cSld") master
+ spTree <- findChild (elemName ns "p" "spTree") cSld
+ sp <- filterChild (\e -> (isElem ns "p" "sp" e) && (shapeHasId ns ident e)) spTree
+ getShapeDimensions ns sp
+
+getContentShapeSize :: PandocMonad m
+ => NameSpaces
+ -> Element
+ -> Element
+ -> P m ((Integer, Integer), (Integer, Integer))
+getContentShapeSize ns layout master
+ | isElem ns "p" "sldLayout" layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ sp <- getContentShape ns spTree
+ case getShapeDimensions ns sp of
+ Just sz -> return sz
+ Nothing -> do let mbSz =
+ findChild (elemName ns "p" "nvSpPr") sp >>=
+ findChild (elemName ns "p" "cNvPr") >>=
+ findAttr (QName "id" Nothing Nothing) >>=
+ flip getMasterShapeDimensionsById master
+ case mbSz of
+ Just sz' -> return sz'
+ Nothing -> throwError $
+ PandocSomeError $
+ "Couldn't find necessary content shape size"
+getContentShapeSize _ _ _ = throwError $
+ PandocSomeError $
+ "Attempted to find content shape size in non-layout"
+
+replaceNamedChildren :: NameSpaces
+ -> String
+ -> String
+ -> [Element]
+ -> Element
+ -> Element
+replaceNamedChildren ns prefix name newKids element =
+ element { elContent = concat $ fun True $ elContent element }
+ where
+ fun :: Bool -> [Content] -> [[Content]]
+ fun _ [] = []
+ fun switch ((Elem e) : conts) | isElem ns prefix name e =
+ if switch
+ then (map Elem $ newKids) : fun False conts
+ else fun False conts
+ fun switch (cont : conts) = [cont] : fun switch conts
+
+----------------------------------------------------------------
+
+registerLink :: PandocMonad m => LinkTarget -> P m Int
+registerLink link = do
+ curSlideId <- asks envCurSlideId
+ linkReg <- gets stLinkIds
+ mediaReg <- gets stMediaIds
+ hasSpeakerNotes <- curSlideHasSpeakerNotes
+ let maxLinkId = case M.lookup curSlideId linkReg of
+ Just mp -> case M.keys mp of
+ [] -> if hasSpeakerNotes then 2 else 1
+ ks -> maximum ks
+ Nothing -> if hasSpeakerNotes then 2 else 1
+ maxMediaId = case M.lookup curSlideId mediaReg of
+ Just [] -> if hasSpeakerNotes then 2 else 1
+ Just mInfos -> maximum $ map mInfoLocalId mInfos
+ Nothing -> if hasSpeakerNotes then 2 else 1
+ maxId = max maxLinkId maxMediaId
+ slideLinks = case M.lookup curSlideId linkReg of
+ Just mp -> M.insert (maxId + 1) link mp
+ Nothing -> M.singleton (maxId + 1) link
+ modify $ \st -> st{ stLinkIds = M.insert curSlideId slideLinks linkReg}
+ return $ maxId + 1
+
+registerMedia :: PandocMonad m => FilePath -> [ParaElem] -> P m MediaInfo
+registerMedia fp caption = do
+ curSlideId <- asks envCurSlideId
+ linkReg <- gets stLinkIds
+ mediaReg <- gets stMediaIds
+ globalIds <- gets stMediaGlobalIds
+ hasSpeakerNotes <- curSlideHasSpeakerNotes
+ let maxLinkId = case M.lookup curSlideId linkReg of
+ Just mp -> case M.keys mp of
+ [] -> if hasSpeakerNotes then 2 else 1
+ ks -> maximum ks
+ Nothing -> if hasSpeakerNotes then 2 else 1
+ maxMediaId = case M.lookup curSlideId mediaReg of
+ Just [] -> if hasSpeakerNotes then 2 else 1
+ Just mInfos -> maximum $ map mInfoLocalId mInfos
+ Nothing -> if hasSpeakerNotes then 2 else 1
+ maxLocalId = max maxLinkId maxMediaId
+
+ maxGlobalId = case M.elems globalIds of
+ [] -> 0
+ ids -> maximum ids
+
+ (imgBytes, mbMt) <- P.fetchItem fp
+ let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ '.':x))
+ <|>
+ case imageType imgBytes of
+ Just Png -> Just ".png"
+ Just Jpeg -> Just ".jpeg"
+ Just Gif -> Just ".gif"
+ Just Pdf -> Just ".pdf"
+ Just Eps -> Just ".eps"
+ Just Svg -> Just ".svg"
+ Just Emf -> Just ".emf"
+ Nothing -> Nothing
+
+ let newGlobalId = case M.lookup fp globalIds of
+ Just ident -> ident
+ Nothing -> maxGlobalId + 1
+
+ let newGlobalIds = M.insert fp newGlobalId globalIds
+
+ let mediaInfo = MediaInfo { mInfoFilePath = fp
+ , mInfoLocalId = maxLocalId + 1
+ , mInfoGlobalId = newGlobalId
+ , mInfoMimeType = mbMt
+ , mInfoExt = imgExt
+ , mInfoCaption = (not . null) caption
+ }
+
+ let slideMediaInfos = case M.lookup curSlideId mediaReg of
+ Just minfos -> mediaInfo : minfos
+ Nothing -> [mediaInfo]
+
+
+ modify $ \st -> st{ stMediaIds = M.insert curSlideId slideMediaInfos mediaReg
+ , stMediaGlobalIds = newGlobalIds
+ }
+ return mediaInfo
+
+makeMediaEntry :: PandocMonad m => MediaInfo -> P m Entry
+makeMediaEntry mInfo = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime
+ (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo)
+ let ext = case mInfoExt mInfo of
+ Just e -> e
+ Nothing -> ""
+ let fp = "ppt/media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext
+ return $ toEntry fp epochtime $ BL.fromStrict imgBytes
+
+makeMediaEntries :: PandocMonad m => P m [Entry]
+makeMediaEntries = do
+ mediaInfos <- gets stMediaIds
+ let allInfos = mconcat $ M.elems mediaInfos
+ mapM makeMediaEntry allInfos
+
+-- -- | Scales the image to fit the page
+-- -- sizes are passed in emu
+-- fitToPage' :: (Double, Double) -- image size in emu
+-- -> Integer -- pageWidth
+-- -> Integer -- pageHeight
+-- -> (Integer, Integer) -- imagesize
+-- fitToPage' (x, y) pageWidth pageHeight
+-- -- Fixes width to the page width and scales the height
+-- | x <= fromIntegral pageWidth && y <= fromIntegral pageHeight =
+-- (floor x, floor y)
+-- | x / fromIntegral pageWidth > y / fromIntegral pageWidth =
+-- (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y)
+-- | otherwise =
+-- (floor $ ((fromIntegral pageHeight) / y) * x, pageHeight)
+
+-- positionImage :: (Double, Double) -> Integer -> Integer -> (Integer, Integer)
+-- positionImage (x, y) pageWidth pageHeight =
+-- let (x', y') = fitToPage' (x, y) pageWidth pageHeight
+-- in
+-- ((pageWidth - x') `div` 2, (pageHeight - y') `div` 2)
+
+getMaster :: PandocMonad m => P m Element
+getMaster = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+ parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml"
+
+-- We want to get the header dimensions, so we can make sure that the
+-- image goes underneath it. We only use this in a content slide if it
+-- has a header.
+
+-- getHeaderSize :: PandocMonad m => P m ((Integer, Integer), (Integer, Integer))
+-- getHeaderSize = do
+-- master <- getMaster
+-- let ns = elemToNameSpaces master
+-- sps = [master] >>=
+-- findChildren (elemName ns "p" "cSld") >>=
+-- findChildren (elemName ns "p" "spTree") >>=
+-- findChildren (elemName ns "p" "sp")
+-- mbXfrm =
+-- listToMaybe (filter (shapeHasName ns "Title Placeholder 1") sps) >>=
+-- findChild (elemName ns "p" "spPr") >>=
+-- findChild (elemName ns "a" "xfrm")
+-- xoff = mbXfrm >>=
+-- findChild (elemName ns "a" "off") >>=
+-- findAttr (QName "x" Nothing Nothing) >>=
+-- (listToMaybe . (\s -> reads s :: [(Integer, String)]))
+-- yoff = mbXfrm >>=
+-- findChild (elemName ns "a" "off") >>=
+-- findAttr (QName "y" Nothing Nothing) >>=
+-- (listToMaybe . (\s -> reads s :: [(Integer, String)]))
+-- xext = mbXfrm >>=
+-- findChild (elemName ns "a" "ext") >>=
+-- findAttr (QName "cx" Nothing Nothing) >>=
+-- (listToMaybe . (\s -> reads s :: [(Integer, String)]))
+-- yext = mbXfrm >>=
+-- findChild (elemName ns "a" "ext") >>=
+-- findAttr (QName "cy" Nothing Nothing) >>=
+-- (listToMaybe . (\s -> reads s :: [(Integer, String)]))
+-- off = case xoff of
+-- Just (xoff', _) | Just (yoff',_) <- yoff -> (xoff', yoff')
+-- _ -> (1043490, 1027664)
+-- ext = case xext of
+-- Just (xext', _) | Just (yext',_) <- yext -> (xext', yext')
+-- _ -> (7024744, 1143000)
+-- return $ (off, ext)
+
+-- Hard-coded for now
+-- captionPosition :: ((Integer, Integer), (Integer, Integer))
+-- captionPosition = ((457200, 6061972), (8229600, 527087))
+
+captionHeight :: Integer
+captionHeight = 40
+
+createCaption :: PandocMonad m
+ => ((Integer, Integer), (Integer, Integer))
+ -> [ParaElem]
+ -> P m Element
+createCaption contentShapeDimensions paraElements = do
+ let para = Paragraph def{pPropAlign = Just AlgnCenter} paraElements
+ elements <- mapM paragraphToElement [para]
+ let ((x, y), (cx, cy)) = contentShapeDimensions
+ let txBody = mknode "p:txBody" [] $
+ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements
+ return $
+ mknode "p:sp" [] [ mknode "p:nvSpPr" []
+ [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] ()
+ , mknode "p:cNvSpPr" [("txBox", "1")] ()
+ , mknode "p:nvPr" [] ()
+ ]
+ , mknode "p:spPr" []
+ [ mknode "a:xfrm" []
+ [ mknode "a:off" [("x", show $ 12700 * x),
+ ("y", show $ 12700 * (y + cy - captionHeight))] ()
+ , mknode "a:ext" [("cx", show $ 12700 * cx),
+ ("cy", show $ 12700 * captionHeight)] ()
+ ]
+ , mknode "a:prstGeom" [("prst", "rect")]
+ [ mknode "a:avLst" [] ()
+ ]
+ , mknode "a:noFill" [] ()
+ ]
+ , txBody
+ ]
+
+makePicElements :: PandocMonad m
+ => Element
+ -> PicProps
+ -> MediaInfo
+ -> [ParaElem]
+ -> P m [Element]
+makePicElements layout picProps mInfo alt = do
+ opts <- asks envOpts
+ (pageWidth, pageHeight) <- asks envPresentationSize
+ -- hasHeader <- asks envSlideHasHeader
+ let hasCaption = mInfoCaption mInfo
+ (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo)
+ let (pxX, pxY) = case imageSize opts imgBytes of
+ Right sz -> sizeInPixels $ sz
+ Left _ -> sizeInPixels $ def
+ master <- getMaster
+ let ns = elemToNameSpaces layout
+ ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master
+ `catchError`
+ (\_ -> return ((0, 0), (pageWidth, pageHeight)))
+
+ let cy = if hasCaption then cytmp - captionHeight else cytmp
+
+ let imgRatio = fromIntegral pxX / fromIntegral pxY :: Double
+ boxRatio = fromIntegral cx / fromIntegral cy :: Double
+ (dimX, dimY) = if imgRatio > boxRatio
+ then (fromIntegral cx, fromIntegral cx / imgRatio)
+ else (fromIntegral cy * imgRatio, fromIntegral cy)
+
+ (dimX', dimY') = (round dimX * 12700, round dimY * 12700) :: (Integer, Integer)
+ (xoff, yoff) = (fromIntegral x + (fromIntegral cx - dimX) / 2,
+ fromIntegral y + (fromIntegral cy - dimY) / 2)
+ (xoff', yoff') = (round xoff * 12700, round yoff * 12700) :: (Integer, Integer)
+
+ let cNvPicPr = mknode "p:cNvPicPr" [] $
+ mknode "a:picLocks" [("noGrp","1")
+ ,("noChangeAspect","1")] ()
+ -- cNvPr will contain the link information so we do that separately,
+ -- and register the link if necessary.
+ let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")]
+ cNvPr <- case picPropLink picProps of
+ Just link -> do idNum <- registerLink link
+ return $ mknode "p:cNvPr" cNvPrAttr $
+ mknode "a:hlinkClick" [("r:id", "rId" ++ show idNum)] ()
+ Nothing -> return $ mknode "p:cNvPr" cNvPrAttr ()
+ let nvPicPr = mknode "p:nvPicPr" []
+ [ cNvPr
+ , cNvPicPr
+ , mknode "p:nvPr" [] ()]
+ let blipFill = mknode "p:blipFill" []
+ [ mknode "a:blip" [("r:embed", "rId" ++ (show $ mInfoLocalId mInfo))] ()
+ , mknode "a:stretch" [] $
+ mknode "a:fillRect" [] () ]
+ let xfrm = mknode "a:xfrm" []
+ [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] ()
+ , mknode "a:ext" [("cx",show dimX')
+ ,("cy",show dimY')] () ]
+ let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
+ mknode "a:avLst" [] ()
+ let ln = mknode "a:ln" [("w","9525")]
+ [ mknode "a:noFill" [] ()
+ , mknode "a:headEnd" [] ()
+ , mknode "a:tailEnd" [] () ]
+ let spPr = mknode "p:spPr" [("bwMode","auto")]
+ [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
+
+ let picShape = mknode "p:pic" []
+ [ nvPicPr
+ , blipFill
+ , spPr ]
+
+ -- And now, maybe create the caption:
+ if hasCaption
+ then do cap <- createCaption ((x, y), (cx, cytmp)) alt
+ return [picShape, cap]
+ else return [picShape]
+
+
+paraElemToElement :: PandocMonad m => ParaElem -> P m Element
+paraElemToElement Break = return $ mknode "a:br" [] ()
+paraElemToElement (Run rpr s) = do
+ let sizeAttrs = case rPropForceSize rpr of
+ Just n -> [("sz", (show $ n * 100))]
+ Nothing -> if rPropCode rpr
+ -- hardcoded size for code for now
+ then [("sz", "1800")]
+ else []
+ attrs = sizeAttrs ++
+ (if rPropBold rpr then [("b", "1")] else []) ++
+ (if rPropItalics rpr then [("i", "1")] else []) ++
+ (if rPropUnderline rpr then [("u", "sng")] else []) ++
+ (case rStrikethrough rpr of
+ Just NoStrike -> [("strike", "noStrike")]
+ Just SingleStrike -> [("strike", "sngStrike")]
+ Just DoubleStrike -> [("strike", "dblStrike")]
+ Nothing -> []) ++
+ (case rBaseline rpr of
+ Just n -> [("baseline", show n)]
+ Nothing -> []) ++
+ (case rCap rpr of
+ Just NoCapitals -> [("cap", "none")]
+ Just SmallCapitals -> [("cap", "small")]
+ Just AllCapitals -> [("cap", "all")]
+ Nothing -> []) ++
+ []
+ linkProps <- case rLink rpr of
+ Just link -> do
+ idNum <- registerLink link
+ -- first we have to make sure that if it's an
+ -- anchor, it's in the anchor map. If not, there's
+ -- no link.
+ return $ case link of
+ InternalTarget _ ->
+ let linkAttrs =
+ [ ("r:id", "rId" ++ show idNum)
+ , ("action", "ppaction://hlinksldjump")
+ ]
+ in [mknode "a:hlinkClick" linkAttrs ()]
+ -- external
+ ExternalTarget _ ->
+ let linkAttrs =
+ [ ("r:id", "rId" ++ show idNum)
+ ]
+ in [mknode "a:hlinkClick" linkAttrs ()]
+ Nothing -> return []
+ let colorContents = case rSolidFill rpr of
+ Just color ->
+ case fromColor color of
+ '#':hx -> [mknode "a:solidFill" []
+ [mknode "a:srgbClr" [("val", map toUpper hx)] ()]
+ ]
+ _ -> []
+ Nothing -> []
+ let codeContents = if rPropCode rpr
+ then [mknode "a:latin" [("typeface", "Courier")] ()]
+ else []
+ let propContents = linkProps ++ colorContents ++ codeContents
+ return $ mknode "a:r" [] [ mknode "a:rPr" attrs $ propContents
+ , mknode "a:t" [] s
+ ]
+paraElemToElement (MathElem mathType texStr) = do
+ res <- convertMath writeOMML mathType (unTeXString texStr)
+ case res of
+ Right r -> return $ mknode "a14:m" [] $ addMathInfo r
+ Left (Str s) -> paraElemToElement (Run def s)
+ Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
+
+-- This is a bit of a kludge -- really requires adding an option to
+-- TeXMath, but since that's a different package, we'll do this one
+-- step at a time.
+addMathInfo :: Element -> Element
+addMathInfo element =
+ let mathspace = Attr { attrKey = (QName "m" Nothing (Just "xmlns"))
+ , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
+ }
+ in add_attr mathspace element
+
+-- We look through the element to see if it contains an a14:m
+-- element. If so, we surround it. This is a bit ugly, but it seems
+-- more dependable than looking through shapes for math. Plus this is
+-- an xml implementation detail, so it seems to make sense to do it at
+-- the xml level.
+surroundWithMathAlternate :: Element -> Element
+surroundWithMathAlternate element =
+ case findElement (QName "m" Nothing (Just "a14")) element of
+ Just _ ->
+ mknode "mc:AlternateContent"
+ [("xmlns:mc", "http://schemas.openxmlformats.org/markup-compatibility/2006")
+ ] [ mknode "mc:Choice"
+ [ ("xmlns:a14", "http://schemas.microsoft.com/office/drawing/2010/main")
+ , ("Requires", "a14")] [ element ]
+ ]
+ Nothing -> element
+
+paragraphToElement :: PandocMonad m => Paragraph -> P m Element
+paragraphToElement par = do
+ let
+ attrs = [("lvl", show $ pPropLevel $ paraProps par)] ++
+ (case pPropMarginLeft (paraProps par) of
+ Just px -> [("marL", show $ 12700 * px), ("indent", "0")]
+ Nothing -> []
+ ) ++
+ (case pPropAlign (paraProps par) of
+ Just AlgnLeft -> [("algn", "l")]
+ Just AlgnRight -> [("algn", "r")]
+ Just AlgnCenter -> [("algn", "ctr")]
+ Nothing -> []
+ )
+ props = [] ++
+ (case pPropSpaceBefore $ paraProps par of
+ Just px -> [mknode "a:spcBef" [] [
+ mknode "a:spcPts" [("val", show $ 100 * px)] ()
+ ]
+ ]
+ Nothing -> []
+ ) ++
+ (case pPropBullet $ paraProps par of
+ Just Bullet -> []
+ Just (AutoNumbering attrs') ->
+ [mknode "a:buAutoNum" [("type", autoNumberingToType attrs')] ()]
+ Nothing -> [mknode "a:buNone" [] ()]
+ )
+ paras <- mapM paraElemToElement (paraElems par)
+ return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] ++ paras
+
+shapeToElement :: PandocMonad m => Element -> Shape -> P m Element
+shapeToElement layout (TextBox paras)
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ sp <- getContentShape ns spTree
+ elements <- mapM paragraphToElement paras
+ let txBody = mknode "p:txBody" [] $
+ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements
+ emptySpPr = mknode "p:spPr" [] ()
+ return $
+ surroundWithMathAlternate $
+ replaceNamedChildren ns "p" "txBody" [txBody] $
+ replaceNamedChildren ns "p" "spPr" [emptySpPr] $
+ sp
+-- GraphicFrame and Pic should never reach this.
+shapeToElement _ _ = return $ mknode "p:sp" [] ()
+
+shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element]
+shapeToElements layout (Pic picProps fp alt) = do
+ mInfo <- registerMedia fp alt
+ case mInfoExt mInfo of
+ Just _ -> do
+ makePicElements layout picProps mInfo alt
+ Nothing -> shapeToElements layout $ TextBox [Paragraph def alt]
+shapeToElements layout (GraphicFrame tbls cptn) =
+ graphicFrameToElements layout tbls cptn
+shapeToElements layout shp = do
+ element <- shapeToElement layout shp
+ return [element]
+
+shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element]
+shapesToElements layout shps = do
+ concat <$> mapM (shapeToElements layout) shps
+
+graphicFrameToElements :: PandocMonad m => Element -> [Graphic] -> [ParaElem] -> P m [Element]
+graphicFrameToElements layout tbls caption = do
+ -- get the sizing
+ master <- getMaster
+ (pageWidth, pageHeight) <- asks envPresentationSize
+ let ns = elemToNameSpaces layout
+ ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master
+ `catchError`
+ (\_ -> return ((0, 0), (pageWidth, pageHeight)))
+
+ let cy = if (not $ null caption) then cytmp - captionHeight else cytmp
+
+ elements <- mapM (graphicToElement cx) tbls
+ let graphicFrameElts =
+ mknode "p:graphicFrame" [] $
+ [ mknode "p:nvGraphicFramePr" [] $
+ [ mknode "p:cNvPr" [("id", "6"), ("name", "Content Placeholder 5")] ()
+ , mknode "p:cNvGraphicFramePr" [] $
+ [mknode "a:graphicFrameLocks" [("noGrp", "1")] ()]
+ , mknode "p:nvPr" [] $
+ [mknode "p:ph" [("idx", "1")] ()]
+ ]
+ , mknode "p:xfrm" [] $
+ [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] ()
+ , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] ()
+ ]
+ ] ++ elements
+
+ if (not $ null caption)
+ then do capElt <- createCaption ((x, y), (cx, cytmp)) caption
+ return [graphicFrameElts, capElt]
+ else return [graphicFrameElts]
+
+getDefaultTableStyle :: PandocMonad m => P m (Maybe String)
+getDefaultTableStyle = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+ tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml"
+ return $ findAttr (QName "def" Nothing Nothing) tblStyleLst
+
+graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element
+graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
+ let colWidths = if null hdrCells
+ then case rows of
+ r : _ | not (null r) -> replicate (length r) $
+ (tableWidth `div` (toInteger $ length r))
+ -- satisfy the compiler. This is the same as
+ -- saying that rows is empty, but the compiler
+ -- won't understand that `[]` exhausts the
+ -- alternatives.
+ _ -> []
+ else replicate (length hdrCells) $
+ (tableWidth `div` (toInteger $ length hdrCells))
+
+ let cellToOpenXML paras =
+ do elements <- mapM paragraphToElement paras
+ let elements' = if null elements
+ then [mknode "a:p" [] [mknode "a:endParaRPr" [] ()]]
+ else elements
+ return $
+ [mknode "a:txBody" [] $
+ ([ mknode "a:bodyPr" [] ()
+ , mknode "a:lstStyle" [] ()]
+ ++ elements')]
+ headers' <- mapM cellToOpenXML hdrCells
+ rows' <- mapM (mapM cellToOpenXML) rows
+ let borderProps = mknode "a:tcPr" [] ()
+ let emptyCell = [mknode "a:p" [] [mknode "a:pPr" [] ()]]
+ let mkcell border contents = mknode "a:tc" []
+ $ (if null contents
+ then emptyCell
+ else contents) ++ [ borderProps | border ]
+ let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells
+
+ let mkgridcol w = mknode "a:gridCol"
+ [("w", show ((12700 * w) :: Integer))] ()
+ let hasHeader = not (all null hdrCells)
+
+ mbDefTblStyle <- getDefaultTableStyle
+ let tblPrElt = mknode "a:tblPr"
+ [ ("firstRow", if tblPrFirstRow tblPr then "1" else "0")
+ , ("bandRow", if tblPrBandRow tblPr then "1" else "0")
+ ] (case mbDefTblStyle of
+ Nothing -> []
+ Just sty -> [mknode "a:tableStyleId" [] sty])
+
+ return $ mknode "a:graphic" [] $
+ [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] $
+ [mknode "a:tbl" [] $
+ [ tblPrElt
+ , mknode "a:tblGrid" [] (if all (==0) colWidths
+ then []
+ else map mkgridcol colWidths)
+ ]
+ ++ [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows'
+ ]
+ ]
+
+getShapeByPlaceHolderType :: NameSpaces -> Element -> String -> Maybe Element
+getShapeByPlaceHolderType ns spTreeElem phType
+ | isElem ns "p" "spTree" spTreeElem =
+ let findPhType element = isElem ns "p" "sp" element &&
+ Just phType == (Just element >>=
+ findChild (elemName ns "p" "nvSpPr") >>=
+ findChild (elemName ns "p" "nvPr") >>=
+ findChild (elemName ns "p" "ph") >>=
+ findAttr (QName "type" Nothing Nothing))
+ in
+ filterChild findPhType spTreeElem
+ | otherwise = Nothing
+
+getShapeByPlaceHolderIndex :: NameSpaces -> Element -> String -> Maybe Element
+getShapeByPlaceHolderIndex ns spTreeElem phIdx
+ | isElem ns "p" "spTree" spTreeElem =
+ let findPhType element = isElem ns "p" "sp" element &&
+ Just phIdx == (Just element >>=
+ findChild (elemName ns "p" "nvSpPr") >>=
+ findChild (elemName ns "p" "nvPr") >>=
+ findChild (elemName ns "p" "ph") >>=
+ findAttr (QName "idx" Nothing Nothing))
+ in
+ filterChild findPhType spTreeElem
+ | otherwise = Nothing
+
+
+nonBodyTextToElement :: PandocMonad m => Element -> String -> [ParaElem] -> P m Element
+nonBodyTextToElement layout phType paraElements
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld
+ , Just sp <- getShapeByPlaceHolderType ns spTree phType = do
+ let hdrPara = Paragraph def paraElements
+ element <- paragraphToElement hdrPara
+ let txBody = mknode "p:txBody" [] $
+ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++
+ [element]
+ return $ replaceNamedChildren ns "p" "txBody" [txBody] sp
+ -- XXX: TODO
+ | otherwise = return $ mknode "p:sp" [] ()
+
+contentToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> P m Element
+contentToElement layout hdrShape shapes
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ element <- nonBodyTextToElement layout "title" hdrShape
+ let hdrShapeElements = if null hdrShape
+ then []
+ else [element]
+ contentElements <- local
+ (\env -> env {envContentType = NormalContent})
+ (shapesToElements layout shapes)
+ return $
+ replaceNamedChildren ns "p" "sp"
+ (hdrShapeElements ++ contentElements)
+ spTree
+contentToElement _ _ _ = return $ mknode "p:sp" [] ()
+
+twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element
+twoColumnToElement layout hdrShape shapesL shapesR
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ element <- nonBodyTextToElement layout "title" hdrShape
+ let hdrShapeElements = if null hdrShape
+ then []
+ else [element]
+ contentElementsL <- local
+ (\env -> env {envContentType =TwoColumnLeftContent})
+ (shapesToElements layout shapesL)
+ contentElementsR <- local
+ (\env -> env {envContentType =TwoColumnRightContent})
+ (shapesToElements layout shapesR)
+ -- let contentElementsL' = map (setIdx ns "1") contentElementsL
+ -- contentElementsR' = map (setIdx ns "2") contentElementsR
+ return $
+ replaceNamedChildren ns "p" "sp"
+ (hdrShapeElements ++ contentElementsL ++ contentElementsR)
+ spTree
+twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] ()
+
+
+titleToElement :: PandocMonad m => Element -> [ParaElem] -> P m Element
+titleToElement layout titleElems
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ element <- nonBodyTextToElement layout "title" titleElems
+ let titleShapeElements = if null titleElems
+ then []
+ else [element]
+ return $ replaceNamedChildren ns "p" "sp" titleShapeElements spTree
+titleToElement _ _ = return $ mknode "p:sp" [] ()
+
+metadataToElement :: PandocMonad m => Element -> [ParaElem] -> [ParaElem] -> [[ParaElem]] -> [ParaElem] -> P m Element
+metadataToElement layout titleElems subtitleElems authorsElems dateElems
+ | ns <- elemToNameSpaces layout
+ , Just cSld <- findChild (elemName ns "p" "cSld") layout
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
+ titleShapeElements <- if null titleElems
+ then return []
+ else sequence [nonBodyTextToElement layout "ctrTitle" titleElems]
+ let combinedAuthorElems = intercalate [Break] authorsElems
+ subtitleAndAuthorElems = intercalate [Break, Break] [subtitleElems, combinedAuthorElems]
+ subtitleShapeElements <- if null subtitleAndAuthorElems
+ then return []
+ else sequence [nonBodyTextToElement layout "subTitle" subtitleAndAuthorElems]
+ dateShapeElements <- if null dateElems
+ then return []
+ else sequence [nonBodyTextToElement layout "dt" dateElems]
+ return $ replaceNamedChildren ns "p" "sp"
+ (titleShapeElements ++ subtitleShapeElements ++ dateShapeElements)
+ spTree
+metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] ()
+
+slideToElement :: PandocMonad m => Slide -> P m Element
+slideToElement (Slide _ l@(ContentSlide hdrElems shapes) _ )= do
+ layout <- getLayout l
+ spTree <- local (\env -> if null hdrElems
+ then env
+ else env{envSlideHasHeader=True}) $
+ contentToElement layout hdrElems shapes
+ return $ mknode "p:sld"
+ [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"),
+ ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"),
+ ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main")
+ ] [mknode "p:cSld" [] [spTree]]
+slideToElement (Slide _ l@(TwoColumnSlide hdrElems shapesL shapesR) _) = do
+ layout <- getLayout l
+ spTree <- local (\env -> if null hdrElems
+ then env
+ else env{envSlideHasHeader=True}) $
+ twoColumnToElement layout hdrElems shapesL shapesR
+ return $ mknode "p:sld"
+ [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"),
+ ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"),
+ ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main")
+ ] [mknode "p:cSld" [] [spTree]]
+slideToElement (Slide _ l@(TitleSlide hdrElems) _) = do
+ layout <- getLayout l
+ spTree <- titleToElement layout hdrElems
+ return $ mknode "p:sld"
+ [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"),
+ ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"),
+ ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main")
+ ] [mknode "p:cSld" [] [spTree]]
+slideToElement (Slide _ l@(MetadataSlide titleElems subtitleElems authorElems dateElems) _) = do
+ layout <- getLayout l
+ spTree <- metadataToElement layout titleElems subtitleElems authorElems dateElems
+ return $ mknode "p:sld"
+ [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"),
+ ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"),
+ ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main")
+ ] [mknode "p:cSld" [] [spTree]]
+
+
+--------------------------------------------------------------------
+-- Notes:
+
+getNotesMaster :: PandocMonad m => P m Element
+getNotesMaster = do
+ let notesMasterPath = "ppt/notesMasters/notesMaster1.xml"
+ distArchive <- asks envDistArchive
+ root <- case findEntryByPath notesMasterPath distArchive of
+ Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
+ Just element -> return $ element
+ Nothing -> throwError $
+ PandocSomeError $
+ notesMasterPath ++ " corrupt in reference file"
+ Nothing -> throwError $
+ PandocSomeError $
+ notesMasterPath ++ " missing in reference file"
+ return root
+
+getSlideNumberFieldId :: PandocMonad m => Element -> P m String
+getSlideNumberFieldId notesMaster
+ | ns <- elemToNameSpaces notesMaster
+ , Just cSld <- findChild (elemName ns "p" "cSld") notesMaster
+ , Just spTree <- findChild (elemName ns "p" "spTree") cSld
+ , Just sp <- getShapeByPlaceHolderType ns spTree "sldNum"
+ , Just txBody <- findChild (elemName ns "p" "txBody") sp
+ , Just p <- findChild (elemName ns "a" "p") txBody
+ , Just fld <- findChild (elemName ns "a" "fld") p
+ , Just fldId <- findAttr (QName "id" Nothing Nothing) fld =
+ return fldId
+ | otherwise = throwError $
+ PandocSomeError $
+ "No field id for slide numbers in notesMaster.xml"
+
+speakerNotesSlideImage :: Element
+speakerNotesSlideImage =
+ mknode "p:sp" [] $
+ [ mknode "p:nvSpPr" [] $
+ [ mknode "p:cNvPr" [ ("id", "2")
+ , ("name", "Slide Image Placeholder 1")
+ ] ()
+ , mknode "p:cNvSpPr" [] $
+ [ mknode "a:spLocks" [ ("noGrp", "1")
+ , ("noRot", "1")
+ , ("noChangeAspect", "1")
+ ] ()
+ ]
+ , mknode "p:nvPr" [] $
+ [ mknode "p:ph" [("type", "sldImg")] ()]
+ ]
+ , mknode "p:spPr" [] ()
+ ]
+
+-- we want to wipe links from the speaker notes in the
+-- paragraphs. Powerpoint doesn't allow you to input them, and it
+-- would provide extra complications.
+removeParaLinks :: Paragraph -> Paragraph
+removeParaLinks paragraph = paragraph{paraElems = map f (paraElems paragraph)}
+ where f (Run rProps s) = Run rProps{rLink=Nothing} s
+ f pe = pe
+
+-- put an empty paragraph between paragraphs for more expected spacing.
+spaceParas :: [Paragraph] -> [Paragraph]
+spaceParas = intersperse (Paragraph def [])
+
+speakerNotesBody :: PandocMonad m => [Paragraph] -> P m Element
+speakerNotesBody paras = do
+ elements <- mapM paragraphToElement $ spaceParas $ map removeParaLinks paras
+ let txBody = mknode "p:txBody" [] $
+ [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements
+ return $
+ mknode "p:sp" [] $
+ [ mknode "p:nvSpPr" [] $
+ [ mknode "p:cNvPr" [ ("id", "3")
+ , ("name", "Notes Placeholder 2")
+ ] ()
+ , mknode "p:cNvSpPr" [] $
+ [ mknode "a:spLocks" [("noGrp", "1")] ()]
+ , mknode "p:nvPr" [] $
+ [ mknode "p:ph" [("type", "body"), ("idx", "1")] ()]
+ ]
+ , mknode "p:spPr" [] ()
+ , txBody
+ ]
+
+speakerNotesSlideNumber :: Int -> String -> Element
+speakerNotesSlideNumber pgNum fieldId =
+ mknode "p:sp" [] $
+ [ mknode "p:nvSpPr" [] $
+ [ mknode "p:cNvPr" [ ("id", "4")
+ , ("name", "Slide Number Placeholder 3")
+ ] ()
+ , mknode "p:cNvSpPr" [] $
+ [ mknode "a:spLocks" [("noGrp", "1")] ()]
+ , mknode "p:nvPr" [] $
+ [ mknode "p:ph" [ ("type", "sldNum")
+ , ("sz", "quarter")
+ , ("idx", "10")
+ ] ()
+ ]
+ ]
+ , mknode "p:spPr" [] ()
+ , mknode "p:txBody" [] $
+ [ mknode "a:bodyPr" [] ()
+ , mknode "a:lstStyle" [] ()
+ , mknode "a:p" [] $
+ [ mknode "a:fld" [ ("id", fieldId)
+ , ("type", "slidenum")
+ ]
+ [ mknode "a:rPr" [("lang", "en-US")] ()
+ , mknode "a:t" [] (show pgNum)
+ ]
+ , mknode "a:endParaRPr" [("lang", "en-US")] ()
+ ]
+ ]
+ ]
+
+slideToSpeakerNotesElement :: PandocMonad m => Slide -> P m (Maybe Element)
+slideToSpeakerNotesElement slide
+ | Slide _ _ mbNotes <- slide
+ , Just (SpeakerNotes paras) <- mbNotes = do
+ master <- getNotesMaster
+ fieldId <- getSlideNumberFieldId master
+ num <- slideNum slide
+ let imgShape = speakerNotesSlideImage
+ sldNumShape = speakerNotesSlideNumber num fieldId
+ bodyShape <- speakerNotesBody paras
+ return $ Just $
+ mknode "p:notes"
+ [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main")
+ , ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+ , ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main")
+ ] [ mknode "p:cSld" []
+ [ mknode "p:spTree" []
+ [ mknode "p:nvGrpSpPr" []
+ [ mknode "p:cNvPr" [("id", "1"), ("name", "")] ()
+ , mknode "p:cNvGrpSpPr" [] ()
+ , mknode "p:nvPr" [] ()
+ ]
+ , mknode "p:grpSpPr" []
+ [ mknode "a:xfrm" []
+ [ mknode "a:off" [("x", "0"), ("y", "0")] ()
+ , mknode "a:ext" [("cx", "0"), ("cy", "0")] ()
+ , mknode "a:chOff" [("x", "0"), ("y", "0")] ()
+ , mknode "a:chExt" [("cx", "0"), ("cy", "0")] ()
+ ]
+ ]
+ , imgShape
+ , bodyShape
+ , sldNumShape
+ ]
+ ]
+ ]
+slideToSpeakerNotesElement _ = return Nothing
+
+-----------------------------------------------------------------------
+
+getSlideIdNum :: PandocMonad m => SlideId -> P m Int
+getSlideIdNum sldId = do
+ slideIdMap <- asks envSlideIdMap
+ case M.lookup sldId slideIdMap of
+ Just n -> return n
+ Nothing -> throwError $
+ PandocShouldNeverHappenError $
+ "Slide Id " ++ (show sldId) ++ " not found."
+
+slideNum :: PandocMonad m => Slide -> P m Int
+slideNum slide = getSlideIdNum $ slideId slide
+
+idNumToFilePath :: Int -> FilePath
+idNumToFilePath idNum = "slide" ++ (show $ idNum) ++ ".xml"
+
+slideToFilePath :: PandocMonad m => Slide -> P m FilePath
+slideToFilePath slide = do
+ idNum <- slideNum slide
+ return $ "slide" ++ (show $ idNum) ++ ".xml"
+
+slideToRelId :: PandocMonad m => Slide -> P m String
+slideToRelId slide = do
+ n <- slideNum slide
+ offset <- asks envSlideIdOffset
+ return $ "rId" ++ (show $ n + offset)
+
+
+data Relationship = Relationship { relId :: Int
+ , relType :: MimeType
+ , relTarget :: FilePath
+ } deriving (Show, Eq)
+
+elementToRel :: Element -> Maybe Relationship
+elementToRel element
+ | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing =
+ do rId <- findAttr (QName "Id" Nothing Nothing) element
+ numStr <- stripPrefix "rId" rId
+ num <- case reads numStr :: [(Int, String)] of
+ (n, _) : _ -> Just n
+ [] -> Nothing
+ type' <- findAttr (QName "Type" Nothing Nothing) element
+ target <- findAttr (QName "Target" Nothing Nothing) element
+ return $ Relationship num type' target
+ | otherwise = Nothing
+
+slideToPresRel :: PandocMonad m => Slide -> P m Relationship
+slideToPresRel slide = do
+ idNum <- slideNum slide
+ n <- asks envSlideIdOffset
+ let rId = idNum + n
+ fp = "slides/" ++ idNumToFilePath idNum
+ return $ Relationship { relId = rId
+ , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
+ , relTarget = fp
+ }
+
+getRels :: PandocMonad m => P m [Relationship]
+getRels = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+ relsElem <- parseXml refArchive distArchive "ppt/_rels/presentation.xml.rels"
+ let globalNS = "http://schemas.openxmlformats.org/package/2006/relationships"
+ let relElems = findChildren (QName "Relationship" (Just globalNS) Nothing) relsElem
+ return $ mapMaybe elementToRel relElems
+
+presentationToRels :: PandocMonad m => Presentation -> P m [Relationship]
+presentationToRels pres@(Presentation _ slides) = do
+ mySlideRels <- mapM slideToPresRel slides
+ let notesMasterRels =
+ if presHasSpeakerNotes pres
+ then [Relationship { relId = length mySlideRels + 2
+ , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster"
+ , relTarget = "notesMasters/notesMaster1.xml"
+ }]
+ else []
+ insertedRels = mySlideRels ++ notesMasterRels
+ rels <- getRels
+ -- we remove the slide rels and the notesmaster (if it's
+ -- there). We'll put these back in ourselves, if necessary.
+ let relsWeKeep = filter
+ (\r -> relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" &&
+ relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
+ rels
+ -- We want to make room for the slides in the id space. The slides
+ -- will start at Id2 (since Id1 is for the slide master). There are
+ -- two slides in the data file, but that might change in the future,
+ -- so we will do this:
+ --
+ -- 1. We look to see what the minimum relWithoutSlide id (greater than 1) is.
+ -- 2. We add the difference between this and the number of slides to
+ -- all relWithoutSlide rels (unless they're 1)
+ -- 3. If we have a notesmaster slide, we make space for that as well.
+
+ let minRelNotOne = case filter (1<) $ map relId relsWeKeep of
+ [] -> 0 -- doesn't matter in this case, since
+ -- there will be nothing to map the
+ -- function over
+ l -> minimum l
+
+ modifyRelNum :: Int -> Int
+ modifyRelNum 1 = 1
+ modifyRelNum n = n - minRelNotOne + 2 + length insertedRels
+
+ relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep
+
+ return $ insertedRels ++ relsWeKeep'
+
+-- We make this ourselves, in case there's a thumbnail in the one from
+-- the template.
+topLevelRels :: [Relationship]
+topLevelRels =
+ [ Relationship { relId = 1
+ , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
+ , relTarget = "ppt/presentation.xml"
+ }
+ , Relationship { relId = 2
+ , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
+ , relTarget = "docProps/core.xml"
+ }
+ , Relationship { relId = 3
+ , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties"
+ , relTarget = "docProps/app.xml"
+ }
+ ]
+
+topLevelRelsEntry :: PandocMonad m => P m Entry
+topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels
+
+relToElement :: Relationship -> Element
+relToElement rel = mknode "Relationship" [ ("Id", "rId" ++ (show $ relId rel))
+ , ("Type", relType rel)
+ , ("Target", relTarget rel) ] ()
+
+relsToElement :: [Relationship] -> Element
+relsToElement rels = mknode "Relationships"
+ [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
+ (map relToElement rels)
+
+presentationToRelsEntry :: PandocMonad m => Presentation -> P m Entry
+presentationToRelsEntry pres = do
+ rels <- presentationToRels pres
+ elemToEntry "ppt/_rels/presentation.xml.rels" $ relsToElement rels
+
+elemToEntry :: PandocMonad m => FilePath -> Element -> P m Entry
+elemToEntry fp element = do
+ epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime
+ return $ toEntry fp epochtime $ renderXml element
+
+slideToEntry :: PandocMonad m => Slide -> P m Entry
+slideToEntry slide = do
+ idNum <- slideNum slide
+ local (\env -> env{envCurSlideId = idNum}) $ do
+ element <- slideToElement slide
+ elemToEntry ("ppt/slides/" ++ idNumToFilePath idNum) element
+
+slideToSpeakerNotesEntry :: PandocMonad m => Slide -> P m (Maybe Entry)
+slideToSpeakerNotesEntry slide = do
+ idNum <- slideNum slide
+ local (\env -> env{envCurSlideId = idNum}) $ do
+ mbElement <- slideToSpeakerNotesElement slide
+ mbNotesIdNum <- do mp <- asks envSpeakerNotesIdMap
+ return $ M.lookup idNum mp
+ case mbElement of
+ Just element | Just notesIdNum <- mbNotesIdNum ->
+ Just <$>
+ elemToEntry
+ ("ppt/notesSlides/notesSlide" ++ show notesIdNum ++ ".xml")
+ element
+ _ -> return Nothing
+
+slideToSpeakerNotesRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
+slideToSpeakerNotesRelElement slide
+ | Slide _ _ mbNotes <- slide
+ , Just _ <- mbNotes = do
+ idNum <- slideNum slide
+ return $ Just $
+ mknode "Relationships"
+ [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
+ [ mknode "Relationship" [ ("Id", "rId2")
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
+ , ("Target", "../slides/slide" ++ show idNum ++ ".xml")
+ ] ()
+ , mknode "Relationship" [ ("Id", "rId1")
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
+ , ("Target", "../notesMasters/notesMaster1.xml")
+ ] ()
+ ]
+slideToSpeakerNotesRelElement _ = return Nothing
+
+slideToSpeakerNotesRelEntry :: PandocMonad m => Slide -> P m (Maybe Entry)
+slideToSpeakerNotesRelEntry slide = do
+ idNum <- slideNum slide
+ mbElement <- slideToSpeakerNotesRelElement slide
+ mp <- asks envSpeakerNotesIdMap
+ let mbNotesIdNum = M.lookup idNum mp
+ case mbElement of
+ Just element | Just notesIdNum <- mbNotesIdNum ->
+ Just <$>
+ elemToEntry
+ ("ppt/notesSlides/_rels/notesSlide" ++ show notesIdNum ++ ".xml.rels")
+ element
+ _ -> return Nothing
+
+slideToSlideRelEntry :: PandocMonad m => Slide -> P m Entry
+slideToSlideRelEntry slide = do
+ idNum <- slideNum slide
+ element <- slideToSlideRelElement slide
+ elemToEntry ("ppt/slides/_rels/" ++ idNumToFilePath idNum ++ ".rels") element
+
+linkRelElement :: PandocMonad m => Int -> LinkTarget -> P m Element
+linkRelElement rIdNum (InternalTarget targetId) = do
+ targetIdNum <- getSlideIdNum targetId
+ return $
+ mknode "Relationship" [ ("Id", "rId" ++ show rIdNum)
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
+ , ("Target", "slide" ++ show targetIdNum ++ ".xml")
+ ] ()
+linkRelElement rIdNum (ExternalTarget (url, _)) = do
+ return $
+ mknode "Relationship" [ ("Id", "rId" ++ show rIdNum)
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink")
+ , ("Target", url)
+ , ("TargetMode", "External")
+ ] ()
+
+linkRelElements :: PandocMonad m => M.Map Int LinkTarget -> P m [Element]
+linkRelElements mp = mapM (\(n, lnk) -> linkRelElement n lnk) (M.toList mp)
+
+mediaRelElement :: MediaInfo -> Element
+mediaRelElement mInfo =
+ let ext = case mInfoExt mInfo of
+ Just e -> e
+ Nothing -> ""
+ in
+ mknode "Relationship" [ ("Id", "rId" ++ (show $ mInfoLocalId mInfo))
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image")
+ , ("Target", "../media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext)
+ ] ()
+
+speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
+speakerNotesSlideRelElement slide = do
+ idNum <- slideNum slide
+ mp <- asks envSpeakerNotesIdMap
+ return $ case M.lookup idNum mp of
+ Nothing -> Nothing
+ Just n ->
+ let target = "../notesSlides/notesSlide" ++ show n ++ ".xml"
+ in Just $
+ mknode "Relationship" [ ("Id", "rId2")
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide")
+ , ("Target", target)
+ ] ()
+
+slideToSlideRelElement :: PandocMonad m => Slide -> P m Element
+slideToSlideRelElement slide = do
+ idNum <- slideNum slide
+ let target = case slide of
+ (Slide _ (MetadataSlide _ _ _ _) _) -> "../slideLayouts/slideLayout1.xml"
+ (Slide _ (TitleSlide _) _) -> "../slideLayouts/slideLayout3.xml"
+ (Slide _ (ContentSlide _ _) _) -> "../slideLayouts/slideLayout2.xml"
+ (Slide _ (TwoColumnSlide _ _ _) _) -> "../slideLayouts/slideLayout4.xml"
+
+ speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide
+
+ linkIds <- gets stLinkIds
+ mediaIds <- gets stMediaIds
+
+ linkRels <- case M.lookup idNum linkIds of
+ Just mp -> linkRelElements mp
+ Nothing -> return []
+ let mediaRels = case M.lookup idNum mediaIds of
+ Just mInfos -> map mediaRelElement mInfos
+ Nothing -> []
+
+ return $
+ mknode "Relationships"
+ [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
+ ([mknode "Relationship" [ ("Id", "rId1")
+ , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout")
+ , ("Target", target)] ()
+ ] ++ speakerNotesRels ++ linkRels ++ mediaRels)
+
+slideToSldIdElement :: PandocMonad m => Slide -> P m Element
+slideToSldIdElement slide = do
+ n <- slideNum slide
+ let id' = show $ n + 255
+ rId <- slideToRelId slide
+ return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] ()
+
+presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element
+presentationToSldIdLst (Presentation _ slides) = do
+ ids <- mapM slideToSldIdElement slides
+ return $ mknode "p:sldIdLst" [] ids
+
+presentationToPresentationElement :: PandocMonad m => Presentation -> P m Element
+presentationToPresentationElement pres@(Presentation _ slds) = do
+ refArchive <- asks envRefArchive
+ distArchive <- asks envDistArchive
+ element <- parseXml refArchive distArchive "ppt/presentation.xml"
+ sldIdLst <- presentationToSldIdLst pres
+
+ let modifySldIdLst :: Content -> Content
+ modifySldIdLst (Elem e) = case elName e of
+ (QName "sldIdLst" _ _) -> Elem sldIdLst
+ _ -> Elem e
+ modifySldIdLst ct = ct
+
+ notesMasterRId = length slds + 2
+
+ notesMasterElem = mknode "p:notesMasterIdLst" []
+ [ mknode
+ "p:NotesMasterId"
+ [("r:id", "rId" ++ show notesMasterRId)]
+ ()
+ ]
+
+ -- if there's a notesMasterIdLst in the presentation.xml file,
+ -- we want to remove it. We then want to put our own, if
+ -- necessary, after the slideMasterIdLst element.
+
+ removeNotesMaster' :: Content -> [Content]
+ removeNotesMaster' (Elem e) = case elName e of
+ (QName "notesMasterIdLst" _ _) -> []
+ _ -> [Elem e]
+ removeNotesMaster' ct = [ct]
+
+ removeNotesMaster :: [Content] -> [Content]
+ removeNotesMaster = concatMap removeNotesMaster'
+
+ insertNotesMaster' :: Content -> [Content]
+ insertNotesMaster' (Elem e) = case elName e of
+ (QName "sldMasterIdLst" _ _) -> [Elem e, Elem notesMasterElem]
+ _ -> [Elem e]
+ insertNotesMaster' ct = [ct]
+
+ insertNotesMaster :: [Content] -> [Content]
+ insertNotesMaster = if presHasSpeakerNotes pres
+ then concatMap insertNotesMaster'
+ else id
+
+ newContent = insertNotesMaster $
+ removeNotesMaster $
+ map modifySldIdLst $
+ elContent element
+
+ return $ element{elContent = newContent}
+
+presentationToPresEntry :: PandocMonad m => Presentation -> P m Entry
+presentationToPresEntry pres = presentationToPresentationElement pres >>=
+ elemToEntry "ppt/presentation.xml"
+
+-- adapted from the Docx writer
+docPropsElement :: PandocMonad m => DocProps -> P m Element
+docPropsElement docProps = do
+ utctime <- asks envUTCTime
+ let keywords = case dcKeywords docProps of
+ Just xs -> intercalate "," xs
+ Nothing -> ""
+ return $
+ mknode "cp:coreProperties"
+ [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
+ ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:dcterms","http://purl.org/dc/terms/")
+ ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
+ ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
+ $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps)
+ : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps)
+ : (mknode "cp:keywords" [] keywords)
+ : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
+ , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+ ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+
+docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
+docPropsToEntry docProps = docPropsElement docProps >>=
+ elemToEntry "docProps/core.xml"
+
+
+defaultContentTypeToElem :: DefaultContentType -> Element
+defaultContentTypeToElem dct =
+ mknode "Default"
+ [("Extension", defContentTypesExt dct),
+ ("ContentType", defContentTypesType dct)]
+ ()
+
+overrideContentTypeToElem :: OverrideContentType -> Element
+overrideContentTypeToElem oct =
+ mknode "Override"
+ [("PartName", overrideContentTypesPart oct),
+ ("ContentType", overrideContentTypesType oct)]
+ ()
+
+contentTypesToElement :: ContentTypes -> Element
+contentTypesToElement ct =
+ let ns = "http://schemas.openxmlformats.org/package/2006/content-types"
+ in
+ mknode "Types" [("xmlns", ns)] $
+ (map defaultContentTypeToElem $ contentTypesDefaults ct) ++
+ (map overrideContentTypeToElem $ contentTypesOverrides ct)
+
+data DefaultContentType = DefaultContentType
+ { defContentTypesExt :: String
+ , defContentTypesType:: MimeType
+ }
+ deriving (Show, Eq)
+
+data OverrideContentType = OverrideContentType
+ { overrideContentTypesPart :: FilePath
+ , overrideContentTypesType :: MimeType
+ }
+ deriving (Show, Eq)
+
+data ContentTypes = ContentTypes { contentTypesDefaults :: [DefaultContentType]
+ , contentTypesOverrides :: [OverrideContentType]
+ }
+ deriving (Show, Eq)
+
+contentTypesToEntry :: PandocMonad m => ContentTypes -> P m Entry
+contentTypesToEntry ct = elemToEntry "[Content_Types].xml" $ contentTypesToElement ct
+
+pathToOverride :: FilePath -> Maybe OverrideContentType
+pathToOverride fp = OverrideContentType ("/" ++ fp) <$> (getContentType fp)
+
+mediaFileContentType :: FilePath -> Maybe DefaultContentType
+mediaFileContentType fp = case takeExtension fp of
+ '.' : ext -> Just $
+ DefaultContentType { defContentTypesExt = ext
+ , defContentTypesType =
+ case getMimeType fp of
+ Just mt -> mt
+ Nothing -> "application/octet-stream"
+ }
+ _ -> Nothing
+
+mediaContentType :: MediaInfo -> Maybe DefaultContentType
+mediaContentType mInfo
+ | Just ('.' : ext) <- mInfoExt mInfo =
+ Just $ DefaultContentType { defContentTypesExt = ext
+ , defContentTypesType =
+ case mInfoMimeType mInfo of
+ Just mt -> mt
+ Nothing -> "application/octet-stream"
+ }
+ | otherwise = Nothing
+
+getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath]
+getSpeakerNotesFilePaths = do
+ mp <- asks envSpeakerNotesIdMap
+ let notesIdNums = M.elems mp
+ return $ map (\n -> "ppt/notesSlides/notesSlide" ++ show n ++ ".xml") notesIdNums
+
+presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes
+presentationToContentTypes p@(Presentation _ slides) = do
+ mediaInfos <- (mconcat . M.elems) <$> gets stMediaIds
+ filePaths <- patternsToFilePaths $ inheritedPatterns p
+ let mediaFps = filter (match (compile "ppt/media/image*")) filePaths
+ let defaults = [ DefaultContentType "xml" "application/xml"
+ , DefaultContentType "rels" "application/vnd.openxmlformats-package.relationships+xml"
+ ]
+ mediaDefaults = nub $
+ (mapMaybe mediaContentType $ mediaInfos) ++
+ (mapMaybe mediaFileContentType $ mediaFps)
+
+ inheritedOverrides = mapMaybe pathToOverride filePaths
+ docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"]
+ presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"]
+ relativePaths <- mapM slideToFilePath slides
+ let slideOverrides = mapMaybe
+ (\fp -> pathToOverride $ "ppt/slides/" ++ fp)
+ relativePaths
+ speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths
+ return $ ContentTypes
+ (defaults ++ mediaDefaults)
+ (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides)
+
+presML :: String
+presML = "application/vnd.openxmlformats-officedocument.presentationml"
+
+noPresML :: String
+noPresML = "application/vnd.openxmlformats-officedocument"
+
+getContentType :: FilePath -> Maybe MimeType
+getContentType fp
+ | fp == "ppt/presentation.xml" = Just $ presML ++ ".presentation.main+xml"
+ | fp == "ppt/presProps.xml" = Just $ presML ++ ".presProps+xml"
+ | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml"
+ | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml"
+ | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml"
+ | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml"
+ | "ppt" : "slideMasters" : f : [] <- splitDirectories fp
+ , (_, ".xml") <- splitExtension f =
+ Just $ presML ++ ".slideMaster+xml"
+ | "ppt" : "slides" : f : [] <- splitDirectories fp
+ , (_, ".xml") <- splitExtension f =
+ Just $ presML ++ ".slide+xml"
+ | "ppt" : "notesMasters" : f : [] <- splitDirectories fp
+ , (_, ".xml") <- splitExtension f =
+ Just $ presML ++ ".notesMaster+xml"
+ | "ppt" : "notesSlides" : f : [] <- splitDirectories fp
+ , (_, ".xml") <- splitExtension f =
+ Just $ presML ++ ".notesSlide+xml"
+ | "ppt" : "theme" : f : [] <- splitDirectories fp
+ , (_, ".xml") <- splitExtension f =
+ Just $ noPresML ++ ".theme+xml"
+ | "ppt" : "slideLayouts" : _ : [] <- splitDirectories fp=
+ Just $ presML ++ ".slideLayout+xml"
+ | otherwise = Nothing
+
+autoNumberingToType :: ListAttributes -> String
+autoNumberingToType (_, numStyle, numDelim) =
+ typeString ++ delimString
+ where
+ typeString = case numStyle of
+ Decimal -> "arabic"
+ UpperAlpha -> "alphaUc"
+ LowerAlpha -> "alphaLc"
+ UpperRoman -> "romanUc"
+ LowerRoman -> "romanLc"
+ _ -> "arabic"
+ delimString = case numDelim of
+ Period -> "Period"
+ OneParen -> "ParenR"
+ TwoParens -> "ParenBoth"
+ _ -> "Period"
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
new file mode 100644
index 000000000..396469edd
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -0,0 +1,987 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Powerpoint.Presentation
+ Copyright : Copyright (C) 2017-2018 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of Presentation datatype, modeling a MS Powerpoint (pptx)
+document, and functions for converting a Pandoc document to
+Presentation.
+-}
+
+module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation
+ , Presentation(..)
+ , DocProps(..)
+ , Slide(..)
+ , Layout(..)
+ , SpeakerNotes(..)
+ , SlideId(..)
+ , Shape(..)
+ , Graphic(..)
+ , BulletType(..)
+ , Algnment(..)
+ , Paragraph(..)
+ , ParaElem(..)
+ , ParaProps(..)
+ , RunProps(..)
+ , TableProps(..)
+ , Strikethrough(..)
+ , Capitals(..)
+ , PicProps(..)
+ , URL
+ , TeXString(..)
+ , LinkTarget(..)
+ ) where
+
+
+import Control.Monad.Reader
+import Control.Monad.State
+import Data.List (intercalate)
+import Data.Default
+import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Slides (getSlideLevel)
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Walk
+import Text.Pandoc.Compat.Time (UTCTime)
+import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element"
+import Text.Pandoc.Writers.Shared (metaValueToInlines)
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Maybe (maybeToList, fromMaybe)
+import Text.Pandoc.Highlighting
+import qualified Data.Text as T
+import Control.Applicative ((<|>))
+import Skylighting
+
+data WriterEnv = WriterEnv { envMetadata :: Meta
+ , envRunProps :: RunProps
+ , envParaProps :: ParaProps
+ , envSlideLevel :: Int
+ , envOpts :: WriterOptions
+ , envSlideHasHeader :: Bool
+ , envInList :: Bool
+ , envInNoteSlide :: Bool
+ , envCurSlideId :: SlideId
+ , envInSpeakerNotes :: Bool
+ }
+ deriving (Show)
+
+instance Default WriterEnv where
+ def = WriterEnv { envMetadata = mempty
+ , envRunProps = def
+ , envParaProps = def
+ , envSlideLevel = 2
+ , envOpts = def
+ , envSlideHasHeader = False
+ , envInList = False
+ , envInNoteSlide = False
+ , envCurSlideId = SlideId "Default"
+ , envInSpeakerNotes = False
+ }
+
+
+data WriterState = WriterState { stNoteIds :: M.Map Int [Block]
+ -- associate anchors with slide id
+ , stAnchorMap :: M.Map String SlideId
+ , stSlideIdSet :: S.Set SlideId
+ , stLog :: [LogMessage]
+ , stSpeakerNotesMap :: M.Map SlideId [[Paragraph]]
+ } deriving (Show, Eq)
+
+instance Default WriterState where
+ def = WriterState { stNoteIds = mempty
+ , stAnchorMap = mempty
+ -- we reserve this s
+ , stSlideIdSet = reservedSlideIds
+ , stLog = []
+ , stSpeakerNotesMap = mempty
+ }
+
+metadataSlideId :: SlideId
+metadataSlideId = SlideId "Metadata"
+
+tocSlideId :: SlideId
+tocSlideId = SlideId "TOC"
+
+endNotesSlideId :: SlideId
+endNotesSlideId = SlideId "EndNotes"
+
+reservedSlideIds :: S.Set SlideId
+reservedSlideIds = S.fromList [ metadataSlideId
+ , tocSlideId
+ , endNotesSlideId
+ ]
+
+uniqueSlideId' :: Integer -> S.Set SlideId -> String -> SlideId
+uniqueSlideId' n idSet s =
+ let s' = if n == 0 then s else s ++ "-" ++ show n
+ in if SlideId s' `S.member` idSet
+ then uniqueSlideId' (n+1) idSet s
+ else SlideId s'
+
+uniqueSlideId :: S.Set SlideId -> String -> SlideId
+uniqueSlideId = uniqueSlideId' 0
+
+runUniqueSlideId :: String -> Pres SlideId
+runUniqueSlideId s = do
+ idSet <- gets stSlideIdSet
+ let sldId = uniqueSlideId idSet s
+ modify $ \st -> st{stSlideIdSet = S.insert sldId idSet}
+ return sldId
+
+addLogMessage :: LogMessage -> Pres ()
+addLogMessage msg = modify $ \st -> st{stLog = msg : stLog st}
+
+type Pres = ReaderT WriterEnv (State WriterState)
+
+runPres :: WriterEnv -> WriterState -> Pres a -> (a, [LogMessage])
+runPres env st p = (pres, reverse $ stLog finalSt)
+ where (pres, finalSt) = runState (runReaderT p env) st
+
+-- GHC 7.8 will still complain about concat <$> mapM unless we specify
+-- Functor. We can get rid of this when we stop supporting GHC 7.8.
+concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs = liftM concat (mapM f xs)
+
+type Pixels = Integer
+
+data Presentation = Presentation DocProps [Slide]
+ deriving (Show)
+
+data DocProps = DocProps { dcTitle :: Maybe String
+ , dcSubject :: Maybe String
+ , dcCreator :: Maybe String
+ , dcKeywords :: Maybe [String]
+ , dcCreated :: Maybe UTCTime
+ } deriving (Show, Eq)
+
+
+data Slide = Slide { slideId :: SlideId
+ , slideLayout :: Layout
+ , slideSpeakerNotes :: Maybe SpeakerNotes
+ } deriving (Show, Eq)
+
+newtype SlideId = SlideId String
+ deriving (Show, Eq, Ord)
+
+-- In theory you could have anything on a notes slide but it seems
+-- designed mainly for one textbox, so we'll just put in the contents
+-- of that textbox, to avoid other shapes that won't work as well.
+newtype SpeakerNotes = SpeakerNotes {fromSpeakerNotes :: [Paragraph]}
+ deriving (Show, Eq)
+
+data Layout = MetadataSlide { metadataSlideTitle :: [ParaElem]
+ , metadataSlideSubtitle :: [ParaElem]
+ , metadataSlideAuthors :: [[ParaElem]]
+ , metadataSlideDate :: [ParaElem]
+ }
+ | TitleSlide { titleSlideHeader :: [ParaElem]}
+ | ContentSlide { contentSlideHeader :: [ParaElem]
+ , contentSlideContent :: [Shape]
+ }
+ | TwoColumnSlide { twoColumnSlideHeader :: [ParaElem]
+ , twoColumnSlideLeft :: [Shape]
+ , twoColumnSlideRight :: [Shape]
+ }
+ deriving (Show, Eq)
+
+data Shape = Pic PicProps FilePath [ParaElem]
+ | GraphicFrame [Graphic] [ParaElem]
+ | TextBox [Paragraph]
+ deriving (Show, Eq)
+
+type Cell = [Paragraph]
+
+data TableProps = TableProps { tblPrFirstRow :: Bool
+ , tblPrBandRow :: Bool
+ } deriving (Show, Eq)
+
+data Graphic = Tbl TableProps [Cell] [[Cell]]
+ deriving (Show, Eq)
+
+
+data Paragraph = Paragraph { paraProps :: ParaProps
+ , paraElems :: [ParaElem]
+ } deriving (Show, Eq)
+
+data BulletType = Bullet
+ | AutoNumbering ListAttributes
+ deriving (Show, Eq)
+
+data Algnment = AlgnLeft | AlgnRight | AlgnCenter
+ deriving (Show, Eq)
+
+data ParaProps = ParaProps { pPropMarginLeft :: Maybe Pixels
+ , pPropMarginRight :: Maybe Pixels
+ , pPropLevel :: Int
+ , pPropBullet :: Maybe BulletType
+ , pPropAlign :: Maybe Algnment
+ , pPropSpaceBefore :: Maybe Pixels
+ } deriving (Show, Eq)
+
+instance Default ParaProps where
+ def = ParaProps { pPropMarginLeft = Just 0
+ , pPropMarginRight = Just 0
+ , pPropLevel = 0
+ , pPropBullet = Nothing
+ , pPropAlign = Nothing
+ , pPropSpaceBefore = Nothing
+ }
+
+newtype TeXString = TeXString {unTeXString :: String}
+ deriving (Eq, Show)
+
+data ParaElem = Break
+ | Run RunProps String
+ -- It would be more elegant to have native TeXMath
+ -- Expressions here, but this allows us to use
+ -- `convertmath` from T.P.Writers.Math. Will perhaps
+ -- revisit in the future.
+ | MathElem MathType TeXString
+ deriving (Show, Eq)
+
+data Strikethrough = NoStrike | SingleStrike | DoubleStrike
+ deriving (Show, Eq)
+
+data Capitals = NoCapitals | SmallCapitals | AllCapitals
+ deriving (Show, Eq)
+
+type URL = String
+
+data LinkTarget = ExternalTarget (URL, String)
+ | InternalTarget SlideId
+ deriving (Show, Eq)
+
+data RunProps = RunProps { rPropBold :: Bool
+ , rPropItalics :: Bool
+ , rStrikethrough :: Maybe Strikethrough
+ , rBaseline :: Maybe Int
+ , rCap :: Maybe Capitals
+ , rLink :: Maybe LinkTarget
+ , rPropCode :: Bool
+ , rPropBlockQuote :: Bool
+ , rPropForceSize :: Maybe Pixels
+ , rSolidFill :: Maybe Color
+ -- TODO: Make a full underline data type with
+ -- the different options.
+ , rPropUnderline :: Bool
+ } deriving (Show, Eq)
+
+instance Default RunProps where
+ def = RunProps { rPropBold = False
+ , rPropItalics = False
+ , rStrikethrough = Nothing
+ , rBaseline = Nothing
+ , rCap = Nothing
+ , rLink = Nothing
+ , rPropCode = False
+ , rPropBlockQuote = False
+ , rPropForceSize = Nothing
+ , rSolidFill = Nothing
+ , rPropUnderline = False
+ }
+
+data PicProps = PicProps { picPropLink :: Maybe LinkTarget
+ , picWidth :: Maybe Dimension
+ , picHeight :: Maybe Dimension
+ } deriving (Show, Eq)
+
+instance Default PicProps where
+ def = PicProps { picPropLink = Nothing
+ , picWidth = Nothing
+ , picHeight = Nothing
+ }
+
+--------------------------------------------------
+
+inlinesToParElems :: [Inline] -> Pres [ParaElem]
+inlinesToParElems ils = concatMapM inlineToParElems ils
+
+inlineToParElems :: Inline -> Pres [ParaElem]
+inlineToParElems (Str s) = do
+ pr <- asks envRunProps
+ return [Run pr s]
+inlineToParElems (Emph ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rPropItalics=True}}) $
+ inlinesToParElems ils
+inlineToParElems (Strong ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rPropBold=True}}) $
+ inlinesToParElems ils
+inlineToParElems (Strikeout ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rStrikethrough=Just SingleStrike}}) $
+ inlinesToParElems ils
+inlineToParElems (Superscript ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just 30000}}) $
+ inlinesToParElems ils
+inlineToParElems (Subscript ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just (-25000)}}) $
+ inlinesToParElems ils
+inlineToParElems (SmallCaps ils) =
+ local (\r -> r{envRunProps = (envRunProps r){rCap = Just SmallCapitals}}) $
+ inlinesToParElems ils
+inlineToParElems Space = inlineToParElems (Str " ")
+inlineToParElems SoftBreak = inlineToParElems (Str " ")
+inlineToParElems LineBreak = return [Break]
+inlineToParElems (Link _ ils (url, title)) =
+ local (\r ->r{envRunProps = (envRunProps r){rLink = Just $ ExternalTarget (url, title)}}) $
+ inlinesToParElems ils
+inlineToParElems (Code _ str) =
+ local (\r ->r{envRunProps = (envRunProps r){rPropCode = True}}) $
+ inlineToParElems $ Str str
+inlineToParElems (Math mathtype str) =
+ return [MathElem mathtype (TeXString str)]
+-- We ignore notes if we're in a speaker notes div. Otherwise this
+-- would add an entry to the endnotes slide, which would put speaker
+-- notes in the public presentation. In the future, we can entertain a
+-- way of adding a speakernotes-specific note that would just add
+-- paragraphs to the bottom of the notes page.
+inlineToParElems (Note blks) = do
+ inSpNotes <- asks envInSpeakerNotes
+ if inSpNotes
+ then return []
+ else do
+ notes <- gets stNoteIds
+ let maxNoteId = case M.keys notes of
+ [] -> 0
+ lst -> maximum lst
+ curNoteId = maxNoteId + 1
+ modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes }
+ local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $
+ inlineToParElems $ Superscript [Str $ show curNoteId]
+inlineToParElems (Span _ ils) = concatMapM inlineToParElems ils
+inlineToParElems (RawInline _ _) = return []
+inlineToParElems _ = return []
+
+isListType :: Block -> Bool
+isListType (OrderedList _ _) = True
+isListType (BulletList _) = True
+isListType (DefinitionList _) = True
+isListType _ = False
+
+registerAnchorId :: String -> Pres ()
+registerAnchorId anchor = do
+ anchorMap <- gets stAnchorMap
+ sldId <- asks envCurSlideId
+ unless (null anchor) $
+ modify $ \st -> st {stAnchorMap = M.insert anchor sldId anchorMap}
+
+-- Currently hardcoded, until I figure out how to make it dynamic.
+blockQuoteSize :: Pixels
+blockQuoteSize = 20
+
+noteSize :: Pixels
+noteSize = 18
+
+blockToParagraphs :: Block -> Pres [Paragraph]
+blockToParagraphs (Plain ils) = do
+ parElems <- inlinesToParElems ils
+ pProps <- asks envParaProps
+ return [Paragraph pProps parElems]
+blockToParagraphs (Para ils) = do
+ parElems <- inlinesToParElems ils
+ pProps <- asks envParaProps
+ return [Paragraph pProps parElems]
+blockToParagraphs (LineBlock ilsList) = do
+ parElems <- inlinesToParElems $ intercalate [LineBreak] ilsList
+ pProps <- asks envParaProps
+ return [Paragraph pProps parElems]
+-- TODO: work out the attributes
+blockToParagraphs (CodeBlock attr str) =
+ local (\r -> r{ envParaProps = def{pPropMarginLeft = Just 100}
+ , envRunProps = (envRunProps r){rPropCode = True}}) $ do
+ mbSty <- writerHighlightStyle <$> asks envOpts
+ synMap <- writerSyntaxMap <$> asks envOpts
+ case mbSty of
+ Just sty ->
+ case highlight synMap (formatSourceLines sty) attr str of
+ Right pElems -> do pProps <- asks envParaProps
+ return [Paragraph pProps pElems]
+ Left _ -> blockToParagraphs $ Para [Str str]
+ Nothing -> blockToParagraphs $ Para [Str str]
+-- We can't yet do incremental lists, but we should render a
+-- (BlockQuote List) as a list to maintain compatibility with other
+-- formats.
+blockToParagraphs (BlockQuote (blk : blks)) | isListType blk = do
+ ps <- blockToParagraphs blk
+ ps' <- blockToParagraphs $ BlockQuote blks
+ return $ ps ++ ps'
+blockToParagraphs (BlockQuote blks) =
+ local (\r -> r{ envParaProps = (envParaProps r){pPropMarginLeft = Just 100}
+ , envRunProps = (envRunProps r){rPropForceSize = Just blockQuoteSize}})$
+ concatMapM blockToParagraphs blks
+-- TODO: work out the format
+blockToParagraphs (RawBlock _ _) = return []
+blockToParagraphs (Header _ (ident, _, _) ils) = do
+ -- Note that this function only deals with content blocks, so it
+ -- will only touch headers that are above the current slide level --
+ -- slides at or below the slidelevel will be taken care of by
+ -- `blocksToSlide'`. We have the register anchors in both of them.
+ registerAnchorId ident
+ -- we set the subeader to bold
+ parElems <- local (\e->e{envRunProps = (envRunProps e){rPropBold=True}}) $
+ inlinesToParElems ils
+ -- and give it a bit of space before it.
+ return [Paragraph def{pPropSpaceBefore = Just 30} parElems]
+blockToParagraphs (BulletList blksLst) = do
+ pProps <- asks envParaProps
+ let lvl = pPropLevel pProps
+ local (\env -> env{ envInList = True
+ , envParaProps = pProps{ pPropLevel = lvl + 1
+ , pPropBullet = Just Bullet
+ , pPropMarginLeft = Nothing
+ }}) $
+ concatMapM multiParBullet blksLst
+blockToParagraphs (OrderedList listAttr blksLst) = do
+ pProps <- asks envParaProps
+ let lvl = pPropLevel pProps
+ local (\env -> env{ envInList = True
+ , envParaProps = pProps{ pPropLevel = lvl + 1
+ , pPropBullet = Just (AutoNumbering listAttr)
+ , pPropMarginLeft = Nothing
+ }}) $
+ concatMapM multiParBullet blksLst
+blockToParagraphs (DefinitionList entries) = do
+ let go :: ([Inline], [[Block]]) -> Pres [Paragraph]
+ go (ils, blksLst) = do
+ term <-blockToParagraphs $ Para [Strong ils]
+ -- For now, we'll treat each definition term as a
+ -- blockquote. We can extend this further later.
+ definition <- concatMapM (blockToParagraphs . BlockQuote) blksLst
+ return $ term ++ definition
+ concatMapM go entries
+blockToParagraphs (Div (_, "notes" : [], _) blks) =
+ local (\env -> env{envInSpeakerNotes=True}) $ do
+ sldId <- asks envCurSlideId
+ spkNotesMap <- gets stSpeakerNotesMap
+ paras <- concatMapM blockToParagraphs blks
+ let spkNotesMap' = case M.lookup sldId spkNotesMap of
+ Just lst -> M.insert sldId (paras : lst) spkNotesMap
+ Nothing -> M.insert sldId [paras] spkNotesMap
+ modify $ \st -> st{stSpeakerNotesMap = spkNotesMap'}
+ return []
+blockToParagraphs (Div _ blks) = concatMapM blockToParagraphs blks
+blockToParagraphs blk = do
+ addLogMessage $ BlockNotRendered blk
+ return []
+
+-- Make sure the bullet env gets turned off after the first para.
+multiParBullet :: [Block] -> Pres [Paragraph]
+multiParBullet [] = return []
+multiParBullet (b:bs) = do
+ pProps <- asks envParaProps
+ p <- blockToParagraphs b
+ ps <- local (\env -> env{envParaProps = pProps{pPropBullet = Nothing}}) $
+ concatMapM blockToParagraphs bs
+ return $ p ++ ps
+
+cellToParagraphs :: Alignment -> TableCell -> Pres [Paragraph]
+cellToParagraphs algn tblCell = do
+ paras <- mapM blockToParagraphs tblCell
+ let alignment = case algn of
+ AlignLeft -> Just AlgnLeft
+ AlignRight -> Just AlgnRight
+ AlignCenter -> Just AlgnCenter
+ AlignDefault -> Nothing
+ paras' = map (map (\p -> p{paraProps = (paraProps p){pPropAlign = alignment}})) paras
+ return $ concat paras'
+
+rowToParagraphs :: [Alignment] -> [TableCell] -> Pres [[Paragraph]]
+rowToParagraphs algns tblCells = do
+ -- We have to make sure we have the right number of alignments
+ let pairs = zip (algns ++ repeat AlignDefault) tblCells
+ mapM (uncurry cellToParagraphs) pairs
+
+withAttr :: Attr -> Shape -> Shape
+withAttr attr (Pic picPr url caption) =
+ let picPr' = picPr { picWidth = dimension Width attr
+ , picHeight = dimension Height attr
+ }
+ in
+ Pic picPr' url caption
+withAttr _ sp = sp
+
+blockToShape :: Block -> Pres Shape
+blockToShape (Plain (il:_)) | Image attr ils (url, _) <- il =
+ (withAttr attr . Pic def url) <$> inlinesToParElems ils
+blockToShape (Para (il:_)) | Image attr ils (url, _) <- il =
+ (withAttr attr . Pic def url) <$> inlinesToParElems ils
+blockToShape (Plain (il:_)) | Link _ (il':_) target <- il
+ , Image attr ils (url, _) <- il' =
+ (withAttr attr . Pic def {picPropLink = Just $ ExternalTarget target} url) <$>
+ inlinesToParElems ils
+blockToShape (Para (il:_)) | Link _ (il':_) target <- il
+ , Image attr ils (url, _) <- il' =
+ (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} url) <$>
+ inlinesToParElems ils
+blockToShape (Table caption algn _ hdrCells rows) = do
+ caption' <- inlinesToParElems caption
+ hdrCells' <- rowToParagraphs algn hdrCells
+ rows' <- mapM (rowToParagraphs algn) rows
+ let tblPr = if null hdrCells
+ then TableProps { tblPrFirstRow = False
+ , tblPrBandRow = True
+ }
+ else TableProps { tblPrFirstRow = True
+ , tblPrBandRow = True
+ }
+
+ return $ GraphicFrame [Tbl tblPr hdrCells' rows'] caption'
+blockToShape blk = do paras <- blockToParagraphs blk
+ let paras' = map (\par -> par{paraElems = combineParaElems $ paraElems par}) paras
+ return $ TextBox paras'
+
+combineShapes :: [Shape] -> [Shape]
+combineShapes [] = []
+combineShapes[s] = [s]
+combineShapes (pic@(Pic{}) : ss) = pic : combineShapes ss
+combineShapes (TextBox [] : ss) = combineShapes ss
+combineShapes (s : TextBox [] : ss) = combineShapes (s : ss)
+combineShapes (TextBox (p:ps) : TextBox (p':ps') : ss) =
+ combineShapes $ TextBox ((p:ps) ++ (p':ps')) : ss
+combineShapes (s:ss) = s : combineShapes ss
+
+blocksToShapes :: [Block] -> Pres [Shape]
+blocksToShapes blks = combineShapes <$> mapM blockToShape blks
+
+isImage :: Inline -> Bool
+isImage (Image{}) = True
+isImage (Link _ (Image _ _ _ : _) _) = True
+isImage _ = False
+
+splitBlocks' :: [Block] -> [[Block]] -> [Block] -> Pres [[Block]]
+splitBlocks' cur acc [] = return $ acc ++ (if null cur then [] else [cur])
+splitBlocks' cur acc (HorizontalRule : blks) =
+ splitBlocks' [] (acc ++ (if null cur then [] else [cur])) blks
+splitBlocks' cur acc (h@(Header n _ _) : blks) = do
+ slideLevel <- asks envSlideLevel
+ case compare n slideLevel of
+ LT -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[h]]) blks
+ EQ -> splitBlocks' [h] (acc ++ (if null cur then [] else [cur])) blks
+ GT -> splitBlocks' (cur ++ [h]) acc blks
+-- `blockToParagraphs` treats Plain and Para the same, so we can save
+-- some code duplication by treating them the same here.
+splitBlocks' cur acc (Plain ils : blks) = splitBlocks' cur acc (Para ils : blks)
+splitBlocks' cur acc (Para (il:ils) : blks) | isImage il = do
+ slideLevel <- asks envSlideLevel
+ case cur of
+ [(Header n _ _)] | n == slideLevel ->
+ splitBlocks' []
+ (acc ++ [cur ++ [Para [il]]])
+ (if null ils then blks else Para ils : blks)
+ _ -> splitBlocks' []
+ (acc ++ (if null cur then [] else [cur]) ++ [[Para [il]]])
+ (if null ils then blks else Para ils : blks)
+splitBlocks' cur acc (tbl@(Table{}) : blks) = do
+ slideLevel <- asks envSlideLevel
+ case cur of
+ [(Header n _ _)] | n == slideLevel ->
+ splitBlocks' [] (acc ++ [cur ++ [tbl]]) blks
+ _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[tbl]]) blks
+splitBlocks' cur acc (d@(Div (_, classes, _) _): blks) | "columns" `elem` classes = do
+ slideLevel <- asks envSlideLevel
+ case cur of
+ [(Header n _ _)] | n == slideLevel ->
+ splitBlocks' [] (acc ++ [cur ++ [d]]) blks
+ _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[d]]) blks
+splitBlocks' cur acc (blk : blks) = splitBlocks' (cur ++ [blk]) acc blks
+
+splitBlocks :: [Block] -> Pres [[Block]]
+splitBlocks = splitBlocks' [] []
+
+getSpeakerNotes :: Pres (Maybe SpeakerNotes)
+getSpeakerNotes = do
+ sldId <- asks envCurSlideId
+ spkNtsMap <- gets stSpeakerNotesMap
+ return $ (SpeakerNotes . concat . reverse) <$> (M.lookup sldId spkNtsMap)
+
+blocksToSlide' :: Int -> [Block] -> Pres Slide
+blocksToSlide' lvl (Header n (ident, _, _) ils : blks)
+ | n < lvl = do
+ registerAnchorId ident
+ sldId <- asks envCurSlideId
+ hdr <- inlinesToParElems ils
+ return $ Slide sldId TitleSlide {titleSlideHeader = hdr} Nothing
+ | n == lvl = do
+ registerAnchorId ident
+ hdr <- inlinesToParElems ils
+ -- Now get the slide without the header, and then add the header
+ -- in.
+ slide <- blocksToSlide' lvl blks
+ let layout = case slideLayout slide of
+ ContentSlide _ cont -> ContentSlide hdr cont
+ TwoColumnSlide _ contL contR -> TwoColumnSlide hdr contL contR
+ layout' -> layout'
+ return $ slide{slideLayout = layout}
+blocksToSlide' _ (blk : blks)
+ | Div (_, classes, _) divBlks <- blk
+ , "columns" `elem` classes
+ , Div (_, clsL, _) blksL : Div (_, clsR, _) blksR : remaining <- divBlks
+ , "column" `elem` clsL, "column" `elem` clsR = do
+ unless (null blks)
+ (mapM (addLogMessage . BlockNotRendered) blks >> return ())
+ unless (null remaining)
+ (mapM (addLogMessage . BlockNotRendered) remaining >> return ())
+ mbSplitBlksL <- splitBlocks blksL
+ mbSplitBlksR <- splitBlocks blksR
+ let blksL' = case mbSplitBlksL of
+ bs : _ -> bs
+ [] -> []
+ let blksR' = case mbSplitBlksR of
+ bs : _ -> bs
+ [] -> []
+ shapesL <- blocksToShapes blksL'
+ shapesR <- blocksToShapes blksR'
+ sldId <- asks envCurSlideId
+ return $ Slide
+ sldId
+ TwoColumnSlide { twoColumnSlideHeader = []
+ , twoColumnSlideLeft = shapesL
+ , twoColumnSlideRight = shapesR
+ }
+ Nothing
+blocksToSlide' _ (blk : blks) = do
+ inNoteSlide <- asks envInNoteSlide
+ shapes <- if inNoteSlide
+ then forceFontSize noteSize $ blocksToShapes (blk : blks)
+ else blocksToShapes (blk : blks)
+ sldId <- asks envCurSlideId
+ return $
+ Slide
+ sldId
+ ContentSlide { contentSlideHeader = []
+ , contentSlideContent = shapes
+ }
+ Nothing
+blocksToSlide' _ [] = do
+ sldId <- asks envCurSlideId
+ return $
+ Slide
+ sldId
+ ContentSlide { contentSlideHeader = []
+ , contentSlideContent = []
+ }
+ Nothing
+
+blocksToSlide :: [Block] -> Pres Slide
+blocksToSlide blks = do
+ slideLevel <- asks envSlideLevel
+ sld <- blocksToSlide' slideLevel blks
+ spkNotes <- getSpeakerNotes
+ return $ sld{slideSpeakerNotes = spkNotes}
+
+makeNoteEntry :: Int -> [Block] -> [Block]
+makeNoteEntry n blks =
+ let enum = Str (show n ++ ".")
+ in
+ case blks of
+ (Para ils : blks') -> (Para $ enum : Space : ils) : blks'
+ _ -> Para [enum] : blks
+
+forceFontSize :: Pixels -> Pres a -> Pres a
+forceFontSize px x = do
+ rpr <- asks envRunProps
+ local (\r -> r {envRunProps = rpr{rPropForceSize = Just px}}) x
+
+-- We leave these as blocks because we will want to include them in
+-- the TOC.
+makeEndNotesSlideBlocks :: Pres [Block]
+makeEndNotesSlideBlocks = do
+ noteIds <- gets stNoteIds
+ slideLevel <- asks envSlideLevel
+ meta <- asks envMetadata
+ -- Get identifiers so we can give the notes section a unique ident.
+ anchorSet <- M.keysSet <$> gets stAnchorMap
+ if M.null noteIds
+ then return []
+ else do let title = case lookupMeta "notes-title" meta of
+ Just val -> metaValueToInlines val
+ Nothing -> [Str "Notes"]
+ ident = Shared.uniqueIdent title anchorSet
+ hdr = Header slideLevel (ident, [], []) title
+ blks <- return $
+ concatMap (\(n, bs) -> makeNoteEntry n bs) $
+ M.toList noteIds
+ return $ hdr : blks
+
+getMetaSlide :: Pres (Maybe Slide)
+getMetaSlide = do
+ meta <- asks envMetadata
+ title <- inlinesToParElems $ docTitle meta
+ subtitle <- inlinesToParElems $
+ case lookupMeta "subtitle" meta of
+ Just (MetaString s) -> [Str s]
+ Just (MetaInlines ils) -> ils
+ Just (MetaBlocks [Plain ils]) -> ils
+ Just (MetaBlocks [Para ils]) -> ils
+ _ -> []
+ authors <- mapM inlinesToParElems $ docAuthors meta
+ date <- inlinesToParElems $ docDate meta
+ if null title && null subtitle && null authors && null date
+ then return Nothing
+ else return $
+ Just $
+ Slide
+ metadataSlideId
+ MetadataSlide { metadataSlideTitle = title
+ , metadataSlideSubtitle = subtitle
+ , metadataSlideAuthors = authors
+ , metadataSlideDate = date
+ }
+ Nothing
+
+-- adapted from the markdown writer
+elementToListItem :: Shared.Element -> Pres [Block]
+elementToListItem (Shared.Sec lev _nums (ident,_,_) headerText subsecs) = do
+ opts <- asks envOpts
+ let headerLink = if null ident
+ then walk Shared.deNote headerText
+ else [Link nullAttr (walk Shared.deNote headerText)
+ ('#':ident, "")]
+ listContents <- if null subsecs || lev >= writerTOCDepth opts
+ then return []
+ else mapM elementToListItem subsecs
+ return [Plain headerLink, BulletList listContents]
+elementToListItem (Shared.Blk _) = return []
+
+makeTOCSlide :: [Block] -> Pres Slide
+makeTOCSlide blks = local (\env -> env{envCurSlideId = tocSlideId}) $ do
+ contents <- BulletList <$> mapM elementToListItem (Shared.hierarchicalize blks)
+ meta <- asks envMetadata
+ slideLevel <- asks envSlideLevel
+ let tocTitle = case lookupMeta "toc-title" meta of
+ Just val -> metaValueToInlines val
+ Nothing -> [Str "Table of Contents"]
+ hdr = Header slideLevel nullAttr tocTitle
+ sld <- blocksToSlide [hdr, contents]
+ return sld
+
+combineParaElems' :: Maybe ParaElem -> [ParaElem] -> [ParaElem]
+combineParaElems' mbPElem [] = maybeToList mbPElem
+combineParaElems' Nothing (pElem : pElems) =
+ combineParaElems' (Just pElem) pElems
+combineParaElems' (Just pElem') (pElem : pElems)
+ | Run rPr' s' <- pElem'
+ , Run rPr s <- pElem
+ , rPr == rPr' =
+ combineParaElems' (Just $ Run rPr' $ s' ++ s) pElems
+ | otherwise =
+ pElem' : combineParaElems' (Just pElem) pElems
+
+combineParaElems :: [ParaElem] -> [ParaElem]
+combineParaElems = combineParaElems' Nothing
+
+applyToParagraph :: Monad m => (ParaElem -> m ParaElem) -> Paragraph -> m Paragraph
+applyToParagraph f para = do
+ paraElems' <- mapM f $ paraElems para
+ return $ para {paraElems = paraElems'}
+
+applyToShape :: Monad m => (ParaElem -> m ParaElem) -> Shape -> m Shape
+applyToShape f (Pic pPr fp pes) = do
+ pes' <- mapM f pes
+ return $ Pic pPr fp pes'
+applyToShape f (GraphicFrame gfx pes) = do
+ pes' <- mapM f pes
+ return $ GraphicFrame gfx pes'
+applyToShape f (TextBox paras) = do
+ paras' <- mapM (applyToParagraph f) paras
+ return $ TextBox paras'
+
+applyToLayout :: Monad m => (ParaElem -> m ParaElem) -> Layout -> m Layout
+applyToLayout f (MetadataSlide title subtitle authors date) = do
+ title' <- mapM f title
+ subtitle' <- mapM f subtitle
+ authors' <- mapM (mapM f) authors
+ date' <- mapM f date
+ return $ MetadataSlide title' subtitle' authors' date'
+applyToLayout f (TitleSlide title) = do
+ title' <- mapM f title
+ return $ TitleSlide title'
+applyToLayout f (ContentSlide hdr content) = do
+ hdr' <- mapM f hdr
+ content' <- mapM (applyToShape f) content
+ return $ ContentSlide hdr' content'
+applyToLayout f (TwoColumnSlide hdr contentL contentR) = do
+ hdr' <- mapM f hdr
+ contentL' <- mapM (applyToShape f) contentL
+ contentR' <- mapM (applyToShape f) contentR
+ return $ TwoColumnSlide hdr' contentL' contentR'
+
+applyToSlide :: Monad m => (ParaElem -> m ParaElem) -> Slide -> m Slide
+applyToSlide f slide = do
+ layout' <- applyToLayout f $ slideLayout slide
+ mbNotes' <- case slideSpeakerNotes slide of
+ Just (SpeakerNotes notes) -> (Just . SpeakerNotes) <$>
+ mapM (applyToParagraph f) notes
+ Nothing -> return Nothing
+ return slide{slideLayout = layout', slideSpeakerNotes = mbNotes'}
+
+replaceAnchor :: ParaElem -> Pres ParaElem
+replaceAnchor (Run rProps s)
+ | Just (ExternalTarget ('#':anchor, _)) <- rLink rProps = do
+ anchorMap <- gets stAnchorMap
+ -- If the anchor is not in the anchormap, we just remove the
+ -- link.
+ let rProps' = case M.lookup anchor anchorMap of
+ Just n -> rProps{rLink = Just $ InternalTarget n}
+ Nothing -> rProps{rLink = Nothing}
+ return $ Run rProps' s
+replaceAnchor pe = return pe
+
+emptyParaElem :: ParaElem -> Bool
+emptyParaElem (Run _ s) =
+ null $ Shared.trim s
+emptyParaElem (MathElem _ ts) =
+ null $ Shared.trim $ unTeXString ts
+emptyParaElem _ = False
+
+emptyParagraph :: Paragraph -> Bool
+emptyParagraph para = all emptyParaElem $ paraElems para
+
+
+emptyShape :: Shape -> Bool
+emptyShape (TextBox paras) = all emptyParagraph $ paras
+emptyShape _ = False
+
+emptyLayout :: Layout -> Bool
+emptyLayout layout = case layout of
+ MetadataSlide title subtitle authors date ->
+ all emptyParaElem title &&
+ all emptyParaElem subtitle &&
+ all (all emptyParaElem) authors &&
+ all emptyParaElem date
+ TitleSlide hdr -> all emptyParaElem hdr
+ ContentSlide hdr shapes ->
+ all emptyParaElem hdr &&
+ all emptyShape shapes
+ TwoColumnSlide hdr shapes1 shapes2 ->
+ all emptyParaElem hdr &&
+ all emptyShape shapes1 &&
+ all emptyShape shapes2
+
+emptySlide :: Slide -> Bool
+emptySlide (Slide _ layout Nothing) = emptyLayout layout
+emptySlide _ = False
+
+blocksToPresentationSlides :: [Block] -> Pres [Slide]
+blocksToPresentationSlides blks = do
+ opts <- asks envOpts
+ metadataslides <- maybeToList <$> getMetaSlide
+ -- As far as I can tell, if we want to have a variable-length toc in
+ -- the future, we'll have to make it twice. Once to get the length,
+ -- and a second time to include the notes slide. We can't make the
+ -- notes slide before the body slides because we need to know if
+ -- there are notes, and we can't make either before the toc slide,
+ -- because we need to know its length to get slide numbers right.
+ --
+ -- For now, though, since the TOC slide is only length 1, if it
+ -- exists, we'll just get the length, and then come back to make the
+ -- slide later
+ blksLst <- splitBlocks blks
+ bodySlideIds <- mapM
+ (\n -> runUniqueSlideId $ "BodySlide" ++ show n)
+ (take (length blksLst) [1..] :: [Integer])
+ bodyslides <- mapM
+ (\(bs, ident) ->
+ local (\st -> st{envCurSlideId = ident}) (blocksToSlide bs))
+ (zip blksLst bodySlideIds)
+ endNotesSlideBlocks <- makeEndNotesSlideBlocks
+ -- now we come back and make the real toc...
+ tocSlides <- if writerTableOfContents opts
+ then do toc <- makeTOCSlide $ blks ++ endNotesSlideBlocks
+ return [toc]
+ else return []
+ -- ... and the notes slide. We test to see if the blocks are empty,
+ -- because we don't want to make an empty slide.
+ endNotesSlides <- if null endNotesSlideBlocks
+ then return []
+ else do endNotesSlide <- local
+ (\env -> env { envCurSlideId = endNotesSlideId
+ , envInNoteSlide = True
+ })
+ (blocksToSlide endNotesSlideBlocks)
+ return [endNotesSlide]
+
+ let slides = metadataslides ++ tocSlides ++ bodyslides ++ endNotesSlides
+ slides' = filter (not . emptySlide) slides
+ mapM (applyToSlide replaceAnchor) slides'
+
+metaToDocProps :: Meta -> DocProps
+metaToDocProps meta =
+ let keywords = case lookupMeta "keywords" meta of
+ Just (MetaList xs) -> Just $ map Shared.stringify xs
+ _ -> Nothing
+
+ authors = case map Shared.stringify $ docAuthors meta of
+ [] -> Nothing
+ ss -> Just $ intercalate ";" ss
+ in
+ DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta
+ , dcSubject = Shared.stringify <$> lookupMeta "subject" meta
+ , dcCreator = authors
+ , dcKeywords = keywords
+ , dcCreated = Nothing
+ }
+
+documentToPresentation :: WriterOptions
+ -> Pandoc
+ -> (Presentation, [LogMessage])
+documentToPresentation opts (Pandoc meta blks) =
+ let env = def { envOpts = opts
+ , envMetadata = meta
+ , envSlideLevel = fromMaybe (getSlideLevel blks) (writerSlideLevel opts)
+ }
+ (presSlides, msgs) = runPres env def $ blocksToPresentationSlides blks
+ docProps = metaToDocProps meta
+ in
+ (Presentation docProps presSlides, msgs)
+
+-- --------------------------------------------------------------
+
+applyTokStyToRunProps :: TokenStyle -> RunProps -> RunProps
+applyTokStyToRunProps tokSty rProps =
+ rProps{ rSolidFill = tokenColor tokSty <|> rSolidFill rProps
+ , rPropBold = tokenBold tokSty || rPropBold rProps
+ , rPropItalics = tokenItalic tokSty || rPropItalics rProps
+ , rPropUnderline = tokenUnderline tokSty || rPropUnderline rProps
+ }
+
+formatToken :: Style -> Token -> ParaElem
+formatToken sty (tokType, txt) =
+ let rProps = def{rPropCode = True, rSolidFill = defaultColor sty}
+ rProps' = case M.lookup tokType (tokenStyles sty) of
+ Just tokSty -> applyTokStyToRunProps tokSty rProps
+ Nothing -> rProps
+ in
+ Run rProps' $ T.unpack txt
+
+formatSourceLine :: Style -> FormatOptions -> SourceLine -> [ParaElem]
+formatSourceLine sty _ srcLn = map (formatToken sty) srcLn
+
+formatSourceLines :: Style -> FormatOptions -> [SourceLine] -> [ParaElem]
+formatSourceLines sty opts srcLns = intercalate [Break] $
+ map (formatSourceLine sty opts) srcLns
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 064434483..95cb46643 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.RST
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -31,19 +31,21 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST ) where
+import Control.Monad.State.Strict
+import Data.Char (isSpace, toLower)
+import Data.List (isPrefixOf, stripPrefix)
+import Data.Maybe (fromMaybe)
+import Data.Text (Text, stripEnd)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
+import Text.Pandoc.Pretty
import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.ImageSize
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Builder (deleteMeta)
-import Data.Maybe (fromMaybe)
-import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose )
-import Network.URI (isURI)
-import Text.Pandoc.Pretty
-import Control.Monad.State
-import Data.Char (isSpace, toLower)
+import Text.Pandoc.Writers.Shared
type Refs = [([Inline], Target)]
@@ -57,95 +59,99 @@ data WriterState =
, stTopLevel :: Bool
}
+type RST = StateT WriterState
+
-- | Convert Pandoc to RST.
-writeRST :: WriterOptions -> Pandoc -> String
-writeRST opts document =
+writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeRST opts document = do
let st = WriterState { stNotes = [], stLinks = [],
stImages = [], stHasMath = False,
stHasRawTeX = False, stOptions = opts,
- stTopLevel = True}
- in evalState (pandocToRST document) st
+ stTopLevel = True }
+ evalStateT (pandocToRST document) st
-- | Return RST representation of document.
-pandocToRST :: Pandoc -> State WriterState String
+pandocToRST :: PandocMonad m => Pandoc -> RST m Text
pandocToRST (Pandoc meta blocks) = do
- opts <- liftM stOptions get
+ opts <- gets stOptions
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
let subtit = case lookupMeta "subtitle" meta of
Just (MetaBlocks [Plain xs]) -> xs
- _ -> []
+ _ -> []
title <- titleToRST (docTitle meta) subtit
metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToRST)
- (fmap (trimr . render colwidth) . inlineListToRST)
- $ deleteMeta "title" $ deleteMeta "subtitle" meta
+ (fmap render' . blockListToRST)
+ (fmap (stripEnd . render') . inlineListToRST)
+ $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta
body <- blockListToRST' True $ case writerTemplate opts of
Just _ -> normalizeHeadings 1 blocks
Nothing -> blocks
- notes <- liftM (reverse . stNotes) get >>= notesToRST
+ notes <- gets (reverse . stNotes) >>= notesToRST
-- note that the notes may contain refs, so we do them first
- refs <- liftM (reverse . stLinks) get >>= refsToRST
- pics <- liftM (reverse . stImages) get >>= pictRefsToRST
- hasMath <- liftM stHasMath get
- rawTeX <- liftM stHasRawTeX get
- let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
+ refs <- gets (reverse . stLinks) >>= refsToRST
+ pics <- gets (reverse . stImages) >>= pictRefsToRST
+ hasMath <- gets stHasMath
+ rawTeX <- gets stHasRawTeX
+ let main = render' $ foldl ($+$) empty [body, notes, refs, pics]
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts)
$ defField "toc-depth" (show $ writerTOCDepth opts)
$ defField "math" hasMath
$ defField "title" (render Nothing title :: String)
$ defField "math" hasMath
- $ defField "rawtex" rawTeX
- $ metadata
+ $ defField "rawtex" rawTeX metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
where
normalizeHeadings lev (Header l a i:bs) =
Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
where (cont,bs') = break (headerLtEq l) bs
headerLtEq level (Header l' _ _) = l' <= level
- headerLtEq _ _ = False
+ headerLtEq _ _ = False
normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs
normalizeHeadings _ [] = []
-- | Return RST representation of reference key table.
-refsToRST :: Refs -> State WriterState Doc
+refsToRST :: PandocMonad m => Refs -> RST m Doc
refsToRST refs = mapM keyToRST refs >>= return . vcat
-- | Return RST representation of a reference key.
-keyToRST :: ([Inline], (String, String))
- -> State WriterState Doc
+keyToRST :: PandocMonad m => ([Inline], (String, String)) -> RST m Doc
keyToRST (label, (src, _)) = do
label' <- inlineListToRST label
- let label'' = if ':' `elem` ((render Nothing label') :: String)
+ let label'' = if ':' `elem` (render Nothing label' :: String)
then char '`' <> label' <> char '`'
else label'
return $ nowrap $ ".. _" <> label'' <> ": " <> text src
-- | Return RST representation of notes.
-notesToRST :: [[Block]] -> State WriterState Doc
+notesToRST :: PandocMonad m => [[Block]] -> RST m Doc
notesToRST notes =
- mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
+ zipWithM noteToRST [1..] notes >>=
return . vsep
-- | Return RST representation of a note.
-noteToRST :: Int -> [Block] -> State WriterState Doc
+noteToRST :: PandocMonad m => Int -> [Block] -> RST m Doc
noteToRST num note = do
contents <- blockListToRST note
let marker = ".. [" <> text (show num) <> "]"
return $ nowrap $ marker $$ nest 3 contents
-- | Return RST representation of picture reference table.
-pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))]
- -> State WriterState Doc
+pictRefsToRST :: PandocMonad m
+ => [([Inline], (Attr, String, String, Maybe String))]
+ -> RST m Doc
pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
-- | Return RST representation of a picture substitution reference.
-pictToRST :: ([Inline], (Attr, String, String, Maybe String))
- -> State WriterState Doc
+pictToRST :: PandocMonad m
+ => ([Inline], (Attr, String, String, Maybe String))
+ -> RST m Doc
pictToRST (label, (attr, src, _, mbtarget)) = do
label' <- inlineListToRST label
dims <- imageDimsToRST attr
@@ -160,10 +166,27 @@ pictToRST (label, (attr, src, _, mbtarget)) = do
Just t -> " :target: " <> text t
-- | Escape special characters for RST.
-escapeString :: String -> String
-escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
+escapeString :: WriterOptions -> String -> String
+escapeString = escapeString' True
+ where
+ escapeString' _ _ [] = []
+ escapeString' firstChar opts (c:cs) =
+ case c of
+ _ | c `elem` ['\\','`','*','_','|'] &&
+ (firstChar || null cs) -> '\\':c:escapeString' False opts cs
+ '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs
+ '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs
+ '-' | isEnabled Ext_smart opts ->
+ case cs of
+ '-':_ -> '\\':'-':escapeString' False opts cs
+ _ -> '-':escapeString' False opts cs
+ '.' | isEnabled Ext_smart opts ->
+ case cs of
+ '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest
+ _ -> '.':escapeString' False opts cs
+ _ -> c : escapeString' False opts cs
-titleToRST :: [Inline] -> [Inline] -> State WriterState Doc
+titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m Doc
titleToRST [] _ = return empty
titleToRST tit subtit = do
title <- inlineListToRST tit
@@ -179,8 +202,9 @@ bordered contents c =
border = text (replicate len c)
-- | Convert Pandoc block element to RST.
-blockToRST :: Block -- ^ Block element
- -> State WriterState Doc
+blockToRST :: PandocMonad m
+ => Block -- ^ Block element
+ -> RST m Doc
blockToRST Null = return empty
blockToRST (Div attr bs) = do
contents <- blockListToRST bs
@@ -200,7 +224,7 @@ blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
else ":figclass: " <> text (unwords cls)
return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline
blockToRST (Para inlines)
- | LineBreak `elem` inlines = do -- use line block if LineBreaks
+ | LineBreak `elem` inlines =
linesToLineBlock $ splitBy (==LineBreak) inlines
| otherwise = do
contents <- inlineListToRST inlines
@@ -211,17 +235,22 @@ blockToRST (RawBlock f@(Format f') str)
| f == "rst" = return $ text str
| otherwise = return $ blankline <> ".. raw:: " <>
text (map toLower f') $+$
- (nest 3 $ text str) $$ blankline
+ nest 3 (text str) $$ blankline
blockToRST HorizontalRule =
return $ blankline $$ "--------------" $$ blankline
blockToRST (Header level (name,classes,_) inlines) = do
contents <- inlineListToRST inlines
+ -- we calculate the id that would be used by auto_identifiers
+ -- so we know whether to print an explicit identifier
+ let autoId = uniqueIdent inlines mempty
isTopLevel <- gets stTopLevel
if isTopLevel
then do
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
let border = text $ replicate (offset contents) headerChar
- return $ nowrap $ contents $$ border $$ blankline
+ let anchor | null name || name == autoId = empty
+ | otherwise = ".. _" <> text name <> ":" $$ blankline
+ return $ nowrap $ anchor $$ contents $$ border $$ blankline
else do
let rub = "rubric:: " <> contents
let name' | null name = empty
@@ -230,7 +259,7 @@ blockToRST (Header level (name,classes,_) inlines) = do
| otherwise = ":class: " <> text (unwords classes)
return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline
blockToRST (CodeBlock (_,classes,kvs) str) = do
- opts <- stOptions <$> get
+ opts <- gets stOptions
let tabstop = writerTabStop opts
let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs
let numberlines = if "numberLines" `elem` classes
@@ -246,59 +275,38 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do
(lang:_) -> (".. code:: " <> text lang) $$ numberlines)
$+$ nest tabstop (text str) $$ blankline
blockToRST (BlockQuote blocks) = do
- tabstop <- get >>= (return . writerTabStop . stOptions)
+ tabstop <- gets $ writerTabStop . stOptions
contents <- blockListToRST blocks
- return $ (nest tabstop contents) <> blankline
-blockToRST (Table caption _ widths headers rows) = do
+ return $ nest tabstop contents <> blankline
+blockToRST (Table caption aligns widths headers rows) = do
caption' <- inlineListToRST caption
- let caption'' = if null caption
- then empty
- else blankline <> text "Table: " <> caption'
- headers' <- mapM blockListToRST headers
- rawRows <- mapM (mapM blockListToRST) rows
- -- let isSimpleCell [Plain _] = True
- -- isSimpleCell [Para _] = True
- -- isSimpleCell [] = True
- -- isSimpleCell _ = False
- -- let isSimple = all (==0) widths && all (all isSimpleCell) rows
- let numChars = maximum . map offset
- opts <- get >>= return . stOptions
- let widthsInChars =
- if all (== 0) widths
- then map ((+2) . numChars) $ transpose (headers' : rawRows)
- else map (floor . (fromIntegral (writerColumns opts) *)) widths
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = height (hcat blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map makeRow rawRows
- let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '+'
- let body = vcat $ intersperse (border '-') rows'
- let head'' = if all null headers
- then empty
- else head' $$ border '='
- return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline
+ let blocksToDoc opts bs = do
+ oldOpts <- gets stOptions
+ modify $ \st -> st{ stOptions = opts }
+ result <- blockListToRST bs
+ modify $ \st -> st{ stOptions = oldOpts }
+ return result
+ opts <- gets stOptions
+ tbl <- gridTable opts blocksToDoc (all null headers)
+ (map (const AlignDefault) aligns) widths
+ headers rows
+ return $ if null caption
+ then tbl $$ blankline
+ else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$
+ blankline
blockToRST (BulletList items) = do
contents <- mapM bulletListItemToRST items
-- ensure that sublists have preceding blank line
return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (OrderedList (start, style', delim) items) = do
let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
- then take (length items) $ repeat "#."
+ then replicate (length items) "#."
else take (length items) $ orderedListMarkers
(start, style', delim)
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
- zip markers' items
+ contents <- zipWithM orderedListItemToRST markers' items
-- ensure that sublists have preceding blank line
return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (DefinitionList items) = do
@@ -307,51 +315,69 @@ blockToRST (DefinitionList items) = do
return $ blankline $$ chomp (vcat contents) $$ blankline
-- | Convert bullet list item (list of blocks) to RST.
-bulletListItemToRST :: [Block] -> State WriterState Doc
+bulletListItemToRST :: PandocMonad m => [Block] -> RST m Doc
bulletListItemToRST items = do
contents <- blockListToRST items
return $ hang 3 "- " $ contents <> cr
-- | Convert ordered list item (a list of blocks) to RST.
-orderedListItemToRST :: String -- ^ marker for list item
+orderedListItemToRST :: PandocMonad m
+ => String -- ^ marker for list item
-> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
+ -> RST m Doc
orderedListItemToRST marker items = do
contents <- blockListToRST items
let marker' = marker ++ " "
return $ hang (length marker') (text marker') $ contents <> cr
-- | Convert defintion list item (label, list of blocks) to RST.
-definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc
+definitionListItemToRST :: PandocMonad m => ([Inline], [[Block]]) -> RST m Doc
definitionListItemToRST (label, defs) = do
label' <- inlineListToRST label
contents <- liftM vcat $ mapM blockListToRST defs
- tabstop <- get >>= (return . writerTabStop . stOptions)
- return $ label' $$ nest tabstop (nestle contents <> cr)
+ tabstop <- gets $ writerTabStop . stOptions
+ return $ nowrap label' $$ nest tabstop (nestle contents <> cr)
-- | Format a list of lines as line block.
-linesToLineBlock :: [[Inline]] -> State WriterState Doc
+linesToLineBlock :: PandocMonad m => [[Inline]] -> RST m Doc
linesToLineBlock inlineLines = do
lns <- mapM inlineListToRST inlineLines
- return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline
+ return $
+ vcat (map (hang 2 (text "| ")) lns) <> blankline
-- | Convert list of Pandoc block elements to RST.
-blockListToRST' :: Bool
+blockListToRST' :: PandocMonad m
+ => Bool
-> [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> RST m Doc
blockListToRST' topLevel blocks = do
+ -- insert comment between list and quoted blocks, see #4248 and #3675
+ let fixBlocks (b1:b2@(BlockQuote _):bs)
+ | toClose b1 = b1 : commentSep : b2 : fixBlocks bs
+ where
+ toClose Plain{} = False
+ toClose Header{} = False
+ toClose LineBlock{} = False
+ toClose HorizontalRule = False
+ toClose (Para [Image _ _ (_,'f':'i':'g':':':_)]) = True
+ toClose Para{} = False
+ toClose _ = True
+ commentSep = RawBlock "rst" "..\n\n"
+ fixBlocks (b:bs) = b : fixBlocks bs
+ fixBlocks [] = []
tl <- gets stTopLevel
modify (\s->s{stTopLevel=topLevel})
- res <- vcat `fmap` mapM blockToRST blocks
+ res <- vcat `fmap` mapM blockToRST (fixBlocks blocks)
modify (\s->s{stTopLevel=tl})
return res
-blockListToRST :: [Block] -- ^ List of block elements
- -> State WriterState Doc
+blockListToRST :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> RST m Doc
blockListToRST = blockListToRST' False
-- | Convert list of Pandoc inline elements to RST.
-inlineListToRST :: [Inline] -> State WriterState Doc
+inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc
inlineListToRST lst =
mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
return . hcat
@@ -362,7 +388,7 @@ inlineListToRST lst =
removeSpaceAfterDisplayMath [] = []
insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
insertBS (x:y:z:zs)
- | isComplex y && (surroundComplex x z) =
+ | isComplex y && surroundComplex x z =
x : y : insertBS (z : zs)
insertBS (x:y:zs)
| isComplex x && not (okAfterComplex y) =
@@ -396,23 +422,28 @@ inlineListToRST lst =
okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String)
okBeforeComplex _ = False
isComplex :: Inline -> Bool
- isComplex (Emph _) = True
- isComplex (Strong _) = True
- isComplex (SmallCaps _) = True
- isComplex (Strikeout _) = True
+ isComplex (Emph _) = True
+ isComplex (Strong _) = True
+ isComplex (SmallCaps _) = True
+ isComplex (Strikeout _) = True
isComplex (Superscript _) = True
- isComplex (Subscript _) = True
- isComplex (Link _ _ _) = True
- isComplex (Image _ _ _) = True
- isComplex (Code _ _) = True
- isComplex (Math _ _) = True
- isComplex (Cite _ (x:_)) = isComplex x
- isComplex (Span _ (x:_)) = isComplex x
- isComplex _ = False
+ isComplex (Subscript _) = True
+ isComplex Link{} = True
+ isComplex Image{} = True
+ isComplex (Code _ _) = True
+ isComplex (Math _ _) = True
+ isComplex (Cite _ (x:_)) = isComplex x
+ isComplex (Span _ (x:_)) = isComplex x
+ isComplex _ = False
-- | Convert Pandoc inline element to RST.
-inlineToRST :: Inline -> State WriterState Doc
-inlineToRST (Span _ ils) = inlineListToRST ils
+inlineToRST :: PandocMonad m => Inline -> RST m Doc
+inlineToRST (Span (_,_,kvs) ils) = do
+ contents <- inlineListToRST ils
+ return $
+ case lookup "role" kvs of
+ Just role -> ":" <> text role <> ":`" <> contents <> "`"
+ Nothing -> contents
inlineToRST (Emph lst) = do
contents <- inlineListToRST lst
return $ "*" <> contents <> "*"
@@ -431,14 +462,33 @@ inlineToRST (Subscript lst) = do
inlineToRST (SmallCaps lst) = inlineListToRST lst
inlineToRST (Quoted SingleQuote lst) = do
contents <- inlineListToRST lst
- return $ "‘" <> contents <> "’"
+ opts <- gets stOptions
+ if isEnabled Ext_smart opts
+ then return $ "'" <> contents <> "'"
+ else return $ "‘" <> contents <> "’"
inlineToRST (Quoted DoubleQuote lst) = do
contents <- inlineListToRST lst
- return $ "“" <> contents <> "”"
+ opts <- gets stOptions
+ if isEnabled Ext_smart opts
+ then return $ "\"" <> contents <> "\""
+ else return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
inlineListToRST lst
-inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
-inlineToRST (Str str) = return $ text $ escapeString str
+inlineToRST (Code _ str) = do
+ opts <- gets stOptions
+ -- we trim the string because the delimiters must adjoin a
+ -- non-space character; see #3496
+ -- we use :literal: when the code contains backticks, since
+ -- :literal: allows backslash-escapes; see #3974
+ return $ if '`' `elem` str
+ then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
+ else "``" <> text (trim str) <> "``"
+inlineToRST (Str str) = do
+ opts <- gets stOptions
+ return $ text $
+ (if isEnabled Ext_smart opts
+ then unsmartify opts
+ else id) $ escapeString opts str
inlineToRST (Math t str) = do
modify $ \st -> st{ stHasMath = True }
return $ if t == InlineMath
@@ -447,20 +497,20 @@ inlineToRST (Math t str) = do
then blankline $$ ".. math::" $$
blankline $$ nest 3 (text str) $$ blankline
else blankline $$ (".. math:: " <> text str) $$ blankline
-inlineToRST (RawInline f x)
+inlineToRST il@(RawInline f x)
| f == "rst" = return $ text x
| f == "latex" || f == "tex" = do
modify $ \st -> st{ stHasRawTeX = True }
return $ ":raw-latex:`" <> text x <> "`"
- | otherwise = return empty
-inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
+ | otherwise = empty <$ report (InlineNotRendered il)
+inlineToRST LineBreak = return cr -- there's no line break in RST (see Para)
inlineToRST Space = return space
inlineToRST SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
+ wrapText <- gets $ writerWrapText . stOptions
case wrapText of
- WrapPreserve -> return cr
- WrapAuto -> return space
- WrapNone -> return space
+ WrapPreserve -> return cr
+ WrapAuto -> return space
+ WrapNone -> return space
-- autolink
inlineToRST (Link _ [Str str] (src, _))
| isURI src &&
@@ -473,15 +523,15 @@ inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do
label <- registerImage attr alt (imgsrc,imgtit) (Just src)
return $ "|" <> label <> "|"
inlineToRST (Link _ txt (src, tit)) = do
- useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
- linktext <- inlineListToRST $ normalizeSpaces txt
+ useReferenceLinks <- gets $ writerReferenceLinks . stOptions
+ linktext <- inlineListToRST $ B.toList . B.trimInlines . B.fromList $ txt
if useReferenceLinks
- then do refs <- get >>= return . stLinks
+ then do refs <- gets stLinks
case lookup txt refs of
Just (src',tit') ->
if src == src' && tit == tit'
then return $ "`" <> linktext <> "`_"
- else do -- duplicate label, use non-reference link
+ else
return $ "`" <> linktext <> " <" <> text src <> ">`__"
Nothing -> do
modify $ \st -> st { stLinks = (txt,(src,tit)):refs }
@@ -494,12 +544,12 @@ inlineToRST (Note contents) = do
-- add to notes in state
notes <- gets stNotes
modify $ \st -> st { stNotes = contents:notes }
- let ref = show $ (length notes) + 1
+ let ref = show $ length notes + 1
return $ " [" <> text ref <> "]_"
-registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc
+registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe String -> RST m Doc
registerImage attr alt (src,tit) mbtarget = do
- pics <- get >>= return . stImages
+ pics <- gets stImages
txt <- case lookup alt pics of
Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget)
-> return alt
@@ -512,14 +562,14 @@ registerImage attr alt (src,tit) mbtarget = do
return alt'
inlineListToRST txt
-imageDimsToRST :: Attr -> State WriterState Doc
+imageDimsToRST :: PandocMonad m => Attr -> RST m Doc
imageDimsToRST attr = do
let (ident, _, _) = attr
name = if null ident
then empty
else ":name: " <> text ident
showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d)
- in case (dimension dir attr) of
+ in case dimension dir attr of
Just (Percent a) ->
case dir of
Height -> empty
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 8f942b4d0..7006b58d1 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -1,5 +1,6 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.RTF
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,103 +28,125 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
-module Text.Pandoc.Writers.RTF ( writeRTF, writeRTFWithEmbeddedImages ) where
+module Text.Pandoc.Writers.RTF ( writeRTF
+ ) where
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad
+import qualified Data.ByteString as B
+import Data.Char (chr, isDigit, ord)
+import Data.List (intercalate, isSuffixOf)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk
-import Data.List ( isSuffixOf, intercalate )
-import Data.Char ( ord, chr, isDigit )
-import qualified Data.ByteString as B
-import qualified Data.Map as M
-import Text.Printf ( printf )
-import Text.Pandoc.ImageSize
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
-- | Convert Image inlines into a raw RTF embedded image, read from a file,
-- or a MediaBag, or the internet.
-- If file not found or filetype not jpeg or png, leave the inline unchanged.
-rtfEmbedImage :: WriterOptions -> Inline -> IO Inline
-rtfEmbedImage opts x@(Image attr _ (src,_)) = do
- result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
- case result of
- Right (imgdata, Just mime)
- | mime == "image/jpeg" || mime == "image/png" -> do
- let bytes = map (printf "%02x") $ B.unpack imgdata
- let filetype = case mime of
- "image/jpeg" -> "\\jpegblip"
- "image/png" -> "\\pngblip"
- _ -> error "Unknown file type"
- sizeSpec <- case imageSize imgdata of
- Left msg -> do
- warn $ "Could not determine image size in `" ++
- src ++ "': " ++ msg
- return ""
- Right sz -> return $ "\\picw" ++ show xpx ++
- "\\pich" ++ show ypx ++
- "\\picwgoal" ++ show (floor (xpt * 20) :: Integer)
- ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer)
- -- twip = 1/1440in = 1/20pt
- where (xpx, ypx) = sizeInPixels sz
- (xpt, ypt) = desiredSizeInPoints opts attr sz
- let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++
- concat bytes ++ "}"
- return $ if B.null imgdata
- then x
- else RawInline (Format "rtf") raw
- _ -> return x
+rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline
+rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError
+ (do result <- P.fetchItem src
+ case result of
+ (imgdata, Just mime)
+ | mime == "image/jpeg" || mime == "image/png" -> do
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ filetype <-
+ case mime of
+ "image/jpeg" -> return "\\jpegblip"
+ "image/png" -> return "\\pngblip"
+ _ -> throwError $
+ PandocShouldNeverHappenError $
+ "Unknown file type " ++ mime
+ sizeSpec <-
+ case imageSize opts imgdata of
+ Left msg -> do
+ report $ CouldNotDetermineImageSize src msg
+ return ""
+ Right sz -> return $ "\\picw" ++ show xpx ++
+ "\\pich" ++ show ypx ++
+ "\\picwgoal" ++ show (floor (xpt * 20) :: Integer)
+ ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer)
+ -- twip = 1/1440in = 1/20pt
+ where (xpx, ypx) = sizeInPixels sz
+ (xpt, ypt) = desiredSizeInPoints opts attr sz
+ let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++
+ concat bytes ++ "}"
+ if B.null imgdata
+ then do
+ report $ CouldNotFetchResource src "image contained no data"
+ return x
+ else return $ RawInline (Format "rtf") raw
+ | otherwise -> do
+ report $ CouldNotFetchResource src "image is not a jpeg or png"
+ return x
+ (_, Nothing) -> do
+ report $ CouldNotDetermineMimeType src
+ return x)
+ (\e -> do
+ report $ CouldNotFetchResource src (show e)
+ return x)
rtfEmbedImage _ x = return x
--- | Convert Pandoc to a string in rich text format, with
--- images embedded as encoded binary data.
-writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String
-writeRTFWithEmbeddedImages options doc =
- writeRTF options `fmap` walkM (rtfEmbedImage options) doc
-
-- | Convert Pandoc to a string in rich text format.
-writeRTF :: WriterOptions -> Pandoc -> String
-writeRTF options (Pandoc meta@(Meta metamap) blocks) =
+writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeRTF options doc = do
+ -- handle images
+ Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc
let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta
- toPlain (MetaBlocks [Para ils]) = MetaInlines ils
- toPlain x = x
- -- adjust title, author, date so we don't get para inside para
- meta' = Meta $ M.adjust toPlain "title"
+ let toPlain (MetaBlocks [Para ils]) = MetaInlines ils
+ toPlain x = x
+ -- adjust title, author, date so we don't get para inside para
+ let meta' = Meta $ M.adjust toPlain "title"
. M.adjust toPlain "author"
. M.adjust toPlain "date"
$ metamap
- Just metadata = metaToJSON options
- (Just . concatMap (blockToRTF 0 AlignDefault))
- (Just . inlineListToRTF)
+ metadata <- metaToJSON options
+ (fmap concat . mapM (blockToRTF 0 AlignDefault))
+ inlinesToRTF
meta'
- body = concatMap (blockToRTF 0 AlignDefault) blocks
- isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options
- isTOCHeader _ = False
- context = defField "body" body
+ body <- blocksToRTF 0 AlignDefault blocks
+ let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options
+ isTOCHeader _ = False
+ toc <- tableOfContents $ filter isTOCHeader blocks
+ let context = defField "body" body
$ defField "spacer" spacer
- $ (if writerTableOfContents options
- then defField "toc"
- (tableOfContents $ filter isTOCHeader blocks)
- else id)
- $ metadata
- in case writerTemplate options of
+ $(if writerTableOfContents options
+ then defField "table-of-contents" toc
+ -- for backwards compatibility,
+ -- we populate toc with the contents
+ -- of the toc rather than a boolean:
+ . defField "toc" toc
+ else id) metadata
+ T.pack <$>
+ case writerTemplate options of
Just tpl -> renderTemplate' tpl context
- Nothing -> case reverse body of
+ Nothing -> return $
+ case reverse body of
('\n':_) -> body
_ -> body ++ "\n"
-- | Construct table of contents from list of header blocks.
-tableOfContents :: [Block] -> String
-tableOfContents headers =
- let contentsTree = hierarchicalize headers
- in concatMap (blockToRTF 0 AlignDefault) $
- [Header 1 nullAttr [Str "Contents"],
- BulletList (map elementToListItem contentsTree)]
+tableOfContents :: PandocMonad m => [Block] -> m String
+tableOfContents headers = do
+ let contents = map elementToListItem $ hierarchicalize headers
+ blocksToRTF 0 AlignDefault
+ [Header 1 nullAttr [Str "Contents"], BulletList contents]
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++
+elementToListItem (Sec _ _ _ sectext subsecs) = Plain sectext :
if null subsecs
then []
else [BulletList (map elementToListItem subsecs)]
@@ -140,11 +163,11 @@ handleUnicode (c:cs) =
lower = r + 0xDC00
in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs
else enc c ++ handleUnicode cs
- else c:(handleUnicode cs)
+ else c:handleUnicode cs
where
surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff)
|| (0xe000 <= ord x && ord x <= 0xffff) )
- enc x = '\\':'u':(show (ord x)) ++ "?"
+ enc x = '\\':'u':show (ord x) ++ "?"
-- | Escape special characters.
escapeSpecial :: String -> String
@@ -175,13 +198,13 @@ rtfParSpaced :: Int -- ^ space after (in twips)
-> String
rtfParSpaced spaceAfter indent firstLineIndent alignment content =
let alignString = case alignment of
- AlignLeft -> "\\ql "
- AlignRight -> "\\qr "
- AlignCenter -> "\\qc "
+ AlignLeft -> "\\ql "
+ AlignRight -> "\\qr "
+ AlignCenter -> "\\qc "
AlignDefault -> "\\ql "
in "{\\pard " ++ alignString ++
- "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
- " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+ "\\f0 \\sa" ++ show spaceAfter ++ " \\li" ++ show indent ++
+ " \\fi" ++ show firstLineIndent ++ " " ++ content ++ "\\par}\n"
-- | Default paragraph.
rtfPar :: Int -- ^ block indent (in twips)
@@ -221,143 +244,180 @@ orderedMarkers indent (start, style, delim) =
_ -> orderedListMarkers (start, LowerAlpha, Period)
else orderedListMarkers (start, style, delim)
+blocksToRTF :: PandocMonad m
+ => Int
+ -> Alignment
+ -> [Block]
+ -> m String
+blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align)
+
-- | Convert Pandoc block element to RTF.
-blockToRTF :: Int -- ^ indent level
+blockToRTF :: PandocMonad m
+ => Int -- ^ indent level
-> Alignment -- ^ alignment
-> Block -- ^ block to convert
- -> String
-blockToRTF _ _ Null = ""
+ -> m String
+blockToRTF _ _ Null = return ""
blockToRTF indent alignment (Div _ bs) =
- concatMap (blockToRTF indent alignment) bs
+ blocksToRTF indent alignment bs
blockToRTF indent alignment (Plain lst) =
- rtfCompact indent 0 alignment $ inlineListToRTF lst
+ rtfCompact indent 0 alignment <$> inlinesToRTF lst
blockToRTF indent alignment (Para lst) =
- rtfPar indent 0 alignment $ inlineListToRTF lst
+ rtfPar indent 0 alignment <$> inlinesToRTF lst
blockToRTF indent alignment (LineBlock lns) =
blockToRTF indent alignment $ linesToPara lns
blockToRTF indent alignment (BlockQuote lst) =
- concatMap (blockToRTF (indent + indentIncrement) alignment) lst
+ blocksToRTF (indent + indentIncrement) alignment lst
blockToRTF indent _ (CodeBlock _ str) =
- rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF _ _ (RawBlock f str)
- | f == Format "rtf" = str
- | otherwise = ""
-blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
- concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
-blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
- zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
-blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
- concatMap (definitionListItemToRTF alignment indent) lst
-blockToRTF indent _ HorizontalRule =
+ return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ codeStringToRTF str)
+blockToRTF _ _ b@(RawBlock f str)
+ | f == Format "rtf" = return str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return ""
+blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$>
+ mapM (listItemToRTF alignment indent (bulletMarker indent)) lst
+blockToRTF indent alignment (OrderedList attribs lst) =
+ (spaceAtEnd . concat) <$>
+ zipWithM (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
+blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$>
+ mapM (definitionListItemToRTF alignment indent) lst
+blockToRTF indent _ HorizontalRule = return $
rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
-blockToRTF indent alignment (Header level _ lst) = rtfPar indent 0 alignment $
- "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
-blockToRTF indent alignment (Table caption aligns sizes headers rows) =
- (if all null headers
- then ""
- else tableRowToRTF True indent aligns sizes headers) ++
- concatMap (tableRowToRTF False indent aligns sizes) rows ++
- rtfPar indent 0 alignment (inlineListToRTF caption)
+blockToRTF indent alignment (Header level _ lst) = do
+ contents <- inlinesToRTF lst
+ return $ rtfPar indent 0 alignment $
+ "\\b \\fs" ++ show (40 - (level * 4)) ++ " " ++ contents
+blockToRTF indent alignment (Table caption aligns sizes headers rows) = do
+ caption' <- inlinesToRTF caption
+ header' <- if all null headers
+ then return ""
+ else tableRowToRTF True indent aligns sizes headers
+ rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows
+ return $ header' ++ rows' ++ rtfPar indent 0 alignment caption'
-tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String
-tableRowToRTF header indent aligns sizes' cols =
+tableRowToRTF :: PandocMonad m
+ => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String
+tableRowToRTF header indent aligns sizes' cols = do
let totalTwips = 6 * 1440 -- 6 inches
- sizes = if all (== 0) sizes'
- then take (length cols) $ repeat (1.0 / fromIntegral (length cols))
+ let sizes = if all (== 0) sizes'
+ then replicate (length cols) (1.0 / fromIntegral (length cols))
else sizes'
- columns = concat $ zipWith (tableItemToRTF indent) aligns cols
- rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
+ columns <- concat <$>
+ zipWithM (tableItemToRTF indent) aligns cols
+ let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
(0 :: Integer) sizes
- cellDefs = map (\edge -> (if header
+ let cellDefs = map (\edge -> (if header
then "\\clbrdrb\\brdrs"
else "") ++ "\\cellx" ++ show edge)
rightEdges
- start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
+ let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
"\\trkeep\\intbl\n{\n"
- end = "}\n\\intbl\\row}\n"
- in start ++ columns ++ end
+ let end = "}\n\\intbl\\row}\n"
+ return $ start ++ columns ++ end
-tableItemToRTF :: Int -> Alignment -> [Block] -> String
-tableItemToRTF indent alignment item =
- let contents = concatMap (blockToRTF indent alignment) item
- in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n"
+tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String
+tableItemToRTF indent alignment item = do
+ contents <- blocksToRTF indent alignment item
+ return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n"
-- | Ensure that there's the same amount of space after compact
-- lists as after regular lists.
spaceAtEnd :: String -> String
spaceAtEnd str =
- if isSuffixOf "\\par}\n" str
- then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ if "\\par}\n" `isSuffixOf` str
+ then take (length str - 6) str ++ "\\sa180\\par}\n"
else str
-- | Convert list item (list of blocks) to RTF.
-listItemToRTF :: Alignment -- ^ alignment
+listItemToRTF :: PandocMonad m
+ => Alignment -- ^ alignment
-> Int -- ^ indent level
-> String -- ^ list start marker
-> [Block] -- ^ list item (list of blocks)
- -> [Char]
-listItemToRTF alignment indent marker [] =
- rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
- (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
-listItemToRTF alignment indent marker list =
- let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list
- listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
- show listIncrement ++ "\\tab"
- insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
+ -> m String
+listItemToRTF alignment indent marker [] = return $
+ rtfCompact (indent + listIncrement) (negate listIncrement) alignment
+ (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ")
+listItemToRTF alignment indent marker list = do
+ (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list
+ let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++
+ "\\tx" ++ show listIncrement ++ "\\tab"
+ let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
listMarker ++ dropWhile isDigit xs
insertListMarker ('\\':'f':'i':d:xs) | isDigit d =
listMarker ++ dropWhile isDigit xs
insertListMarker (x:xs) =
x : insertListMarker xs
insertListMarker [] = []
- -- insert the list marker into the (processed) first block
- in insertListMarker first ++ concat rest
+ -- insert the list marker into the (processed) first block
+ return $ insertListMarker first ++ concat rest
-- | Convert definition list item (label, list of blocks) to RTF.
-definitionListItemToRTF :: Alignment -- ^ alignment
+definitionListItemToRTF :: PandocMonad m
+ => Alignment -- ^ alignment
-> Int -- ^ indent level
-> ([Inline],[[Block]]) -- ^ list item (list of blocks)
- -> [Char]
-definitionListItemToRTF alignment indent (label, defs) =
- let labelText = blockToRTF indent alignment (Plain label)
- itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $
- concat defs
- in labelText ++ itemsText
+ -> m String
+definitionListItemToRTF alignment indent (label, defs) = do
+ labelText <- blockToRTF indent alignment (Plain label)
+ itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs)
+ return $ labelText ++ itemsText
-- | Convert list of inline items to RTF.
-inlineListToRTF :: [Inline] -- ^ list of inlines to convert
- -> String
-inlineListToRTF lst = concatMap inlineToRTF lst
+inlinesToRTF :: PandocMonad m
+ => [Inline] -- ^ list of inlines to convert
+ -> m String
+inlinesToRTF lst = concat <$> mapM inlineToRTF lst
-- | Convert inline item to RTF.
-inlineToRTF :: Inline -- ^ inline to convert
- -> String
-inlineToRTF (Span _ lst) = inlineListToRTF lst
-inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Quoted SingleQuote lst) =
- "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
-inlineToRTF (Quoted DoubleQuote lst) =
- "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
-inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
-inlineToRTF (Str str) = stringToRTF str
-inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str
-inlineToRTF (Cite _ lst) = inlineListToRTF lst
-inlineToRTF (RawInline f str)
- | f == Format "rtf" = str
- | otherwise = ""
-inlineToRTF (LineBreak) = "\\line "
-inlineToRTF SoftBreak = " "
-inlineToRTF Space = " "
-inlineToRTF (Link _ text (src, _)) =
- "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
- "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
+inlineToRTF :: PandocMonad m
+ => Inline -- ^ inline to convert
+ -> m String
+inlineToRTF (Span _ lst) = inlinesToRTF lst
+inlineToRTF (Emph lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\i " ++ contents ++ "}"
+inlineToRTF (Strong lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\b " ++ contents ++ "}"
+inlineToRTF (Strikeout lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\strike " ++ contents ++ "}"
+inlineToRTF (Superscript lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\super " ++ contents ++ "}"
+inlineToRTF (Subscript lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\sub " ++ contents ++ "}"
+inlineToRTF (SmallCaps lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\scaps " ++ contents ++ "}"
+inlineToRTF (Quoted SingleQuote lst) = do
+ contents <- inlinesToRTF lst
+ return $ "\\u8216'" ++ contents ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) = do
+ contents <- inlinesToRTF lst
+ return $ "\\u8220\"" ++ contents ++ "\\u8221\""
+inlineToRTF (Code _ str) = return $ "{\\f1 " ++ codeStringToRTF str ++ "}"
+inlineToRTF (Str str) = return $ stringToRTF str
+inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF
+inlineToRTF (Cite _ lst) = inlinesToRTF lst
+inlineToRTF il@(RawInline f str)
+ | f == Format "rtf" = return str
+ | otherwise = do
+ return $ InlineNotRendered il
+ return ""
+inlineToRTF LineBreak = return "\\line "
+inlineToRTF SoftBreak = return " "
+inlineToRTF Space = return " "
+inlineToRTF (Link _ text (src, _)) = do
+ contents <- inlinesToRTF text
+ return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ codeStringToRTF src ++
+ "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n"
inlineToRTF (Image _ _ (source, _)) =
- "{\\cf1 [image: " ++ source ++ "]\\cf0}"
-inlineToRTF (Note contents) =
- "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
- (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
+ return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Note contents) = do
+ body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents
+ return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ body ++ "}"
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 845d22077..ae4cc5cc5 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Shared
- Copyright : Copyright (C) 2013-2015 John MacFarlane
+ Copyright : Copyright (C) 2013-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,62 +30,91 @@ Shared utility functions for pandoc writers.
-}
module Text.Pandoc.Writers.Shared (
metaToJSON
+ , metaToJSON'
+ , addVariablesToJSON
, getField
, setField
+ , resetField
, defField
, tagWithAttrs
, fixDisplayMath
+ , unsmartify
+ , gridTable
+ , metaValueToInlines
)
where
-import Text.Pandoc.Definition
-import Text.Pandoc.Pretty
-import Text.Pandoc.XML (escapeStringForXML)
-import Control.Monad (liftM)
-import Text.Pandoc.Options (WriterOptions(..))
+import Control.Monad (zipWithM)
+import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
+ encode, fromJSON)
import qualified Data.HashMap.Strict as H
+import Data.List (groupBy, intersperse, transpose)
import qualified Data.Map as M
+import Data.Maybe (isJust)
import qualified Data.Text as T
-import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode)
-import Text.Pandoc.UTF8 (toStringLazy)
import qualified Data.Traversable as Traversable
-import Data.List ( groupBy )
-import Data.Maybe ( isJust )
+import qualified Text.Pandoc.Builder as Builder
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Pretty
+import Text.Pandoc.Walk (query)
+import Text.Pandoc.UTF8 (toStringLazy)
+import Text.Pandoc.XML (escapeStringForXML)
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.
-- Variables overwrite metadata fields with the same names.
-- If multiple variables are set with the same name, a list is
--- assigned.
-metaToJSON :: Monad m
+-- assigned. Does nothing if 'writerTemplate' is Nothing.
+metaToJSON :: (Functor m, Monad m, ToJSON a)
=> WriterOptions
- -> ([Block] -> m String)
- -> ([Inline] -> m String)
+ -> ([Block] -> m a)
+ -> ([Inline] -> m a)
-> Meta
-> m Value
-metaToJSON opts blockWriter inlineWriter (Meta metamap)
- | isJust (writerTemplate opts) = do
- let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty)
- $ writerVariables opts
- renderedMap <- Traversable.mapM
- (metaValueToJSON blockWriter inlineWriter)
- metamap
- let metadata = M.foldWithKey defField baseContext renderedMap
- return $ defField "meta-json" (toStringLazy $ encode metadata) metadata
+metaToJSON opts blockWriter inlineWriter meta
+ | isJust (writerTemplate opts) =
+ addVariablesToJSON opts <$> metaToJSON' blockWriter inlineWriter meta
| otherwise = return (Object H.empty)
-metaValueToJSON :: Monad m
- => ([Block] -> m String)
- -> ([Inline] -> m String)
+-- | Like 'metaToJSON', but does not include variables and is
+-- not sensitive to 'writerTemplate'.
+metaToJSON' :: (Functor m, Monad m, ToJSON a)
+ => ([Block] -> m a)
+ -> ([Inline] -> m a)
+ -> Meta
+ -> m Value
+metaToJSON' blockWriter inlineWriter (Meta metamap) = do
+ renderedMap <- Traversable.mapM
+ (metaValueToJSON blockWriter inlineWriter)
+ metamap
+ return $ M.foldrWithKey defField (Object H.empty) renderedMap
+
+-- | Add variables to JSON object, replacing any existing values.
+-- Also include @meta-json@, a field containing a string representation
+-- of the original JSON object itself, prior to addition of variables.
+addVariablesToJSON :: WriterOptions -> Value -> Value
+addVariablesToJSON opts metadata =
+ foldl (\acc (x,y) -> setField x y acc)
+ (defField "meta-json" (toStringLazy $ encode metadata) (Object mempty))
+ (writerVariables opts)
+ `combineMetadata` metadata
+ where combineMetadata (Object o1) (Object o2) = Object $ H.union o1 o2
+ combineMetadata x _ = x
+
+metaValueToJSON :: (Functor m, Monad m, ToJSON a)
+ => ([Block] -> m a)
+ -> ([Inline] -> m a)
-> MetaValue
-> m Value
-metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $
+metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = toJSON <$>
Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap
-metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $
+metaValueToJSON blockWriter inlineWriter (MetaList xs) = toJSON <$>
Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs
metaValueToJSON _ _ (MetaBool b) = return $ toJSON b
-metaValueToJSON _ _ (MetaString s) = return $ toJSON s
-metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs
-metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs
+metaValueToJSON _ inlineWriter (MetaString s) = toJSON <$>
+ inlineWriter (Builder.toList (Builder.text s))
+metaValueToJSON blockWriter _ (MetaBlocks bs) = toJSON <$> blockWriter bs
+metaValueToJSON _ inlineWriter (MetaInlines is) = toJSON <$> inlineWriter is
-- | Retrieve a field value from a JSON object.
getField :: FromJSON a
@@ -111,10 +140,22 @@ setField field val (Object hashmap) =
Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap
where combine newval oldval =
case fromJSON oldval of
- Success xs -> toJSON $ xs ++ [newval]
- _ -> toJSON [oldval, newval]
+ Success xs -> toJSON $ xs ++ [newval]
+ _ -> toJSON [oldval, newval]
setField _ _ x = x
+resetField :: ToJSON a
+ => String
+ -> a
+ -> Value
+ -> Value
+-- | Reset a field of a JSON object. If the field already has a value,
+-- the new value replaces it.
+-- This is a utility function to be used in preparing template contexts.
+resetField field val (Object hashmap) =
+ Object $ H.insert (T.pack field) (toJSON val) hashmap
+resetField _ _ x = x
+
defField :: ToJSON a
=> String
-> a
@@ -148,22 +189,131 @@ isDisplayMath _ = False
stripLeadingTrailingSpace :: [Inline] -> [Inline]
stripLeadingTrailingSpace = go . reverse . go . reverse
- where go (Space:xs) = xs
+ where go (Space:xs) = xs
go (SoftBreak:xs) = xs
- go xs = xs
+ go xs = xs
-- Put display math in its own block (for ODT/DOCX).
fixDisplayMath :: Block -> Block
fixDisplayMath (Plain lst)
| any isDisplayMath lst && not (all isDisplayMath lst) =
-- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
+ Div ("",["math"],[]) $
+ map Plain $
+ filter (not . null) $
+ map stripLeadingTrailingSpace $
groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
not (isDisplayMath x || isDisplayMath y)) lst
fixDisplayMath (Para lst)
| any isDisplayMath lst && not (all isDisplayMath lst) =
-- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
+ Div ("",["math"],[]) $
+ map Para $
+ filter (not . null) $
+ map stripLeadingTrailingSpace $
groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
not (isDisplayMath x || isDisplayMath y)) lst
fixDisplayMath x = x
+
+unsmartify :: WriterOptions -> String -> String
+unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs
+unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs
+unsmartify opts ('\8211':xs)
+ | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs
+ | otherwise = "--" ++ unsmartify opts xs
+unsmartify opts ('\8212':xs)
+ | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs
+ | otherwise = "---" ++ unsmartify opts xs
+unsmartify opts ('\8220':xs) = '"' : unsmartify opts xs
+unsmartify opts ('\8221':xs) = '"' : unsmartify opts xs
+unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs
+unsmartify opts (x:xs) = x : unsmartify opts xs
+unsmartify _ [] = []
+
+gridTable :: Monad m
+ => WriterOptions
+ -> (WriterOptions -> [Block] -> m Doc)
+ -> Bool -- ^ headless
+ -> [Alignment]
+ -> [Double]
+ -> [[Block]]
+ -> [[[Block]]]
+ -> m Doc
+gridTable opts blocksToDoc headless aligns widths headers rows = do
+ let numcols = maximum (length aligns : length widths :
+ map length (headers:rows))
+ let handleGivenWidths widths' = do
+ let widthsInChars' = map (
+ (\x -> if x < 1 then 1 else x) .
+ (\x -> x - 3) . floor .
+ (fromIntegral (writerColumns opts) *)
+ ) widths'
+ rawHeaders' <- zipWithM blocksToDoc
+ (map (\w -> opts{writerColumns =
+ min (w - 2) (writerColumns opts)}) widthsInChars')
+ headers
+ rawRows' <- mapM
+ (\cs -> zipWithM blocksToDoc
+ (map (\w -> opts{writerColumns =
+ min (w - 2) (writerColumns opts)}) widthsInChars')
+ cs)
+ rows
+ return (widthsInChars', rawHeaders', rawRows')
+ let handleZeroWidths = do
+ rawHeaders' <- mapM (blocksToDoc opts) headers
+ rawRows' <- mapM (mapM (blocksToDoc opts)) rows
+ let numChars [] = 0
+ numChars xs = maximum . map offset $ xs
+ let widthsInChars' =
+ map numChars $ transpose (rawHeaders' : rawRows')
+ if sum widthsInChars' > writerColumns opts
+ then -- use even widths
+ handleGivenWidths
+ (replicate numcols (1.0 / fromIntegral numcols) :: [Double])
+ else return (widthsInChars', rawHeaders', rawRows')
+ (widthsInChars, rawHeaders, rawRows) <- if all (== 0) widths
+ then handleZeroWidths
+ else handleGivenWidths widths
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (1 : map height blocks)
+ sep' = lblock 3 $ vcat (replicate h (text " | "))
+ beg = lblock 2 $ vcat (replicate h (text "| "))
+ end = lblock 2 $ vcat (replicate h (text " |"))
+ middle = chomp $ hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow rawHeaders
+ let rows' = map (makeRow . map chomp) rawRows
+ let borderpart ch align widthInChars =
+ (if align == AlignLeft || align == AlignCenter
+ then char ':'
+ else char ch) <>
+ text (replicate widthInChars ch) <>
+ (if align == AlignRight || align == AlignCenter
+ then char ':'
+ else char ch)
+ let border ch aligns' widthsInChars' =
+ char '+' <>
+ hcat (intersperse (char '+') (zipWith (borderpart ch)
+ aligns' widthsInChars')) <> char '+'
+ let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars)
+ rows'
+ let head'' = if headless
+ then empty
+ else head' $$ border '=' aligns widthsInChars
+ if headless
+ then return $
+ border '-' aligns widthsInChars $$
+ body $$
+ border '-' (repeat AlignDefault) widthsInChars
+ else return $
+ border '-' (repeat AlignDefault) widthsInChars $$
+ head'' $$
+ body $$
+ border '-' (repeat AlignDefault) widthsInChars
+
+metaValueToInlines :: MetaValue -> [Inline]
+metaValueToInlines (MetaString s) = [Str s]
+metaValueToInlines (MetaInlines ils) = ils
+metaValueToInlines (MetaBlocks bs) = query return bs
+metaValueToInlines (MetaBool b) = [Str $ show b]
+metaValueToInlines _ = []
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index 9bd23ac3b..4936c743e 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Docbook
- Copyright : Copyright (C) 2006-2015 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,63 +30,67 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to Docbook XML.
-}
module Text.Pandoc.Writers.TEI (writeTEI) where
+import Data.Char (toLower)
+import Data.List (isPrefixOf, stripPrefix)
+import Data.Text (Text)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Data.List ( stripPrefix, isPrefixOf )
-import Data.Char ( toLower )
-import Text.Pandoc.Highlighting ( languages, languagesByExtension )
import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML
-- | Convert list of authors to a docbook <author> section
-authorToTEI :: WriterOptions -> [Inline] -> B.Inlines
-authorToTEI opts name' =
- let name = render Nothing $ inlinesToTEI opts name'
- colwidth = if writerWrapText opts == WrapAuto
+authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines
+authorToTEI opts name' = do
+ name <- render Nothing <$> inlinesToTEI opts name'
+ let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
- in B.rawInline "tei" $ render colwidth $
+ return $ B.rawInline "tei" $ render colwidth $
inTagsSimple "author" (text $ escapeStringForXML name)
-- | Convert Pandoc document to string in Docbook format.
-writeTEI :: WriterOptions -> Pandoc -> String
-writeTEI opts (Pandoc meta blocks) =
+writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeTEI opts (Pandoc meta blocks) = do
let elements = hierarchicalize blocks
colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
+ render' :: Doc -> Text
render' = render colwidth
startLvl = case writerTopLevelDivision opts of
TopLevelPart -> -1
TopLevelChapter -> 0
TopLevelSection -> 1
TopLevelDefault -> 1
- auths' = map (authorToTEI opts) $ docAuthors meta
- meta' = B.setMeta "author" auths' meta
- Just metadata = metaToJSON opts
- (Just . render colwidth . (vcat .
- (map (elementToTEI opts startLvl)) . hierarchicalize))
- (Just . render colwidth . inlinesToTEI opts)
+ auths' <- mapM (authorToTEI opts) $ docAuthors meta
+ let meta' = B.setMeta "author" auths' meta
+ metadata <- metaToJSON opts
+ (fmap (render' . vcat) .
+ mapM (elementToTEI opts startLvl) . hierarchicalize)
+ (fmap render' . inlinesToTEI opts)
meta'
- main = render' $ vcat (map (elementToTEI opts startLvl) elements)
- context = defField "body" main
- $ defField "mathml" (case writerHTMLMathMethod opts of
- MathML _ -> True
- _ -> False)
- $ metadata
- in case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements
+ let context = defField "body" main
+ $
+ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False) metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to TEI.
-elementToTEI :: WriterOptions -> Int -> Element -> Doc
+elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc
elementToTEI opts _ (Blk block) = blockToTEI opts block
-elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) =
+elementToTEI opts lvl (Sec _ _num attr title elements) = do
-- TEI doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
@@ -96,14 +101,14 @@ elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) =
| n == 0 -> "chapter"
| n >= 1 && n <= 5 -> "level" ++ show n
| otherwise -> "section"
- in inTags True "div" [("type", divType) | not (null id')] $
--- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $
- inTagsSimple "head" (inlinesToTEI opts title) $$
- vcat (map (elementToTEI opts (lvl + 1)) elements')
+ contents <- vcat <$> mapM (elementToTEI opts (lvl + 1)) elements'
+ titleContents <- inlinesToTEI opts title
+ return $ inTags True "div" (("type", divType) : idFromAttr opts attr) $
+ inTagsSimple "head" titleContents $$ contents
-- | Convert a list of Pandoc blocks to TEI.
-blocksToTEI :: WriterOptions -> [Block] -> Doc
-blocksToTEI opts = vcat . map (blockToTEI opts)
+blocksToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc
+blocksToTEI opts bs = vcat <$> mapM (blockToTEI opts) bs
-- | Auxiliary function to convert Plain block to Para.
plainToPara :: Block -> Block
@@ -112,48 +117,54 @@ plainToPara x = x
-- | Convert a list of pairs of terms and definitions into a TEI
-- list with labels and items.
-deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc
+deflistItemsToTEI :: PandocMonad m
+ => WriterOptions -> [([Inline],[[Block]])] -> m Doc
deflistItemsToTEI opts items =
- vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items
+ vcat <$> mapM (uncurry (deflistItemToTEI opts)) items
-- | Convert a term and a list of blocks into a TEI varlistentry.
-deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc
-deflistItemToTEI opts term defs =
- let def' = concatMap (map plainToPara) defs
- in inTagsIndented "label" (inlinesToTEI opts term) $$
- inTagsIndented "item" (blocksToTEI opts def')
+deflistItemToTEI :: PandocMonad m
+ => WriterOptions -> [Inline] -> [[Block]] -> m Doc
+deflistItemToTEI opts term defs = do
+ term' <- inlinesToTEI opts term
+ defs' <- blocksToTEI opts $ concatMap (map plainToPara) defs
+ return $ inTagsIndented "label" term' $$
+ inTagsIndented "item" defs'
-- | Convert a list of lists of blocks to a list of TEI list items.
-listItemsToTEI :: WriterOptions -> [[Block]] -> Doc
-listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items
+listItemsToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc
+listItemsToTEI opts items = vcat <$> mapM (listItemToTEI opts) items
-- | Convert a list of blocks into a TEI list item.
-listItemToTEI :: WriterOptions -> [Block] -> Doc
+listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc
listItemToTEI opts item =
- inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item
+ inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item)
-imageToTEI :: WriterOptions -> Attr -> String -> Doc
-imageToTEI _ attr src = selfClosingTag "graphic" $
- ("url", src) : idAndRole attr ++ dims
+imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m Doc
+imageToTEI opts attr src = return $ selfClosingTag "graphic" $
+ ("url", src) : idFromAttr opts attr ++ dims
where
- dims = go Width "width" ++ go Height "depth"
- go dir dstr = case (dimension dir attr) of
+ dims = go Width "width" ++ go Height "height"
+ go dir dstr = case dimension dir attr of
Just a -> [(dstr, show a)]
Nothing -> []
-- | Convert a Pandoc block element to TEI.
-blockToTEI :: WriterOptions -> Block -> Doc
-blockToTEI _ Null = empty
+blockToTEI :: PandocMonad m => WriterOptions -> Block -> m Doc
+blockToTEI _ Null = return empty
-- Add ids to paragraphs in divs with ids - this is needed for
-- pandoc-citeproc to get link anchors in bibliographies:
-blockToTEI opts (Div (ident,_,_) [Para lst]) =
- let attribs = [("id", ident) | not (null ident)] in
- inTags False "p" attribs $ inlinesToTEI opts lst
+blockToTEI opts (Div attr [Para lst]) = do
+ let attribs = idFromAttr opts attr
+ inTags False "p" attribs <$> inlinesToTEI opts lst
blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
-blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize
+blockToTEI _ h@Header{} = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return empty
-- For TEI simple, text must be within containing block element, so
--- we use plainToPara to ensure that Plain text ends up contained by
--- something.
+-- we use treat as Para to ensure that Plain text ends up contained by
+-- something:
blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
-- title beginning with fig: indicates that the image is a figure
--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
@@ -168,13 +179,13 @@ blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
-- (imageToTEI opts attr src)) $$
-- inTagsSimple "textobject" (inTagsSimple "phrase" alt))
blockToTEI opts (Para lst) =
- inTags False "p" [] $ inlinesToTEI opts lst
+ inTags False "p" [] <$> inlinesToTEI opts lst
blockToTEI opts (LineBlock lns) =
blockToTEI opts $ linesToPara lns
blockToTEI opts (BlockQuote blocks) =
- inTagsIndented "quote" $ blocksToTEI opts blocks
+ inTagsIndented "quote" <$> blocksToTEI opts blocks
blockToTEI _ (CodeBlock (_,classes,_) str) =
- text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
+ return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
flush (text (escapeStringForXML str) <> cr <> text "</ab>")
where lang = if null langs
then ""
@@ -184,11 +195,11 @@ blockToTEI _ (CodeBlock (_,classes,_) str) =
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
-blockToTEI opts (BulletList lst) =
+blockToTEI opts (BulletList lst) = do
let attribs = [("type", "unordered")]
- in inTags True "list" attribs $ listItemsToTEI opts lst
-blockToTEI _ (OrderedList _ []) = empty
-blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) =
+ inTags True "list" attribs <$> listItemsToTEI opts lst
+blockToTEI _ (OrderedList _ []) = return empty
+blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do
let attribs = case numstyle of
DefaultStyle -> []
Decimal -> [("type", "ordered:arabic")]
@@ -197,127 +208,138 @@ blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) =
LowerAlpha -> [("type", "ordered:loweralpha")]
UpperRoman -> [("type", "ordered:upperroman")]
LowerRoman -> [("type", "ordered:lowerroman")]
- items = if start == 1
- then listItemsToTEI opts (first:rest)
- else (inTags True "item" [("n",show start)]
- (blocksToTEI opts $ map plainToPara first)) $$
- listItemsToTEI opts rest
- in inTags True "list" attribs items
-blockToTEI opts (DefinitionList lst) =
+ items <- if start == 1
+ then listItemsToTEI opts (first:rest)
+ else do
+ fi <- blocksToTEI opts $ map plainToPara first
+ re <- listItemsToTEI opts rest
+ return $ inTags True "item" [("n",show start)] fi $$ re
+ return $ inTags True "list" attribs items
+blockToTEI opts (DefinitionList lst) = do
let attribs = [("type", "definition")]
- in inTags True "list" attribs $ deflistItemsToTEI opts lst
-blockToTEI _ (RawBlock f str)
- | f == "tei" = text str -- raw TEI block (should such a thing exist).
--- | f == "html" = text str -- allow html for backwards compatibility
- | otherwise = empty
-blockToTEI _ HorizontalRule =
- selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")]
+ inTags True "list" attribs <$> deflistItemsToTEI opts lst
+blockToTEI _ b@(RawBlock f str)
+ | f == "tei" = return $ text str
+ -- raw TEI block (should such a thing exist).
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToTEI _ HorizontalRule = return $
+ selfClosingTag "milestone" [("unit","undefined")
+ ,("type","separator")
+ ,("rendition","line")]
-- | TEI Tables
-- TEI Simple's tables are composed of cells and rows; other
-- table info in the AST is here lossily discard.
-blockToTEI opts (Table _ _ _ headers rows) =
- let
- headers' = tableHeadersToTEI opts headers
--- headers' = if all null headers
--- then return empty
--- else tableRowToTEI opts headers
- in
- inTags True "table" [] $
- vcat $ [headers'] <> map (tableRowToTEI opts) rows
+blockToTEI opts (Table _ _ _ headers rows) = do
+ headers' <- tableHeadersToTEI opts headers
+ rows' <- mapM (tableRowToTEI opts) rows
+ return $ inTags True "table" [] $ headers' $$ vcat rows'
-tableRowToTEI :: WriterOptions
- -> [[Block]]
- -> Doc
+tableRowToTEI :: PandocMonad m
+ => WriterOptions
+ -> [[Block]]
+ -> m Doc
tableRowToTEI opts cols =
- inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols
+ (inTagsIndented "row" . vcat) <$> mapM (tableItemToTEI opts) cols
-tableHeadersToTEI :: WriterOptions
+tableHeadersToTEI :: PandocMonad m
+ => WriterOptions
-> [[Block]]
- -> Doc
+ -> m Doc
tableHeadersToTEI opts cols =
- inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols
+ (inTags True "row" [("role","label")] . vcat) <$>
+ mapM (tableItemToTEI opts) cols
-tableItemToTEI :: WriterOptions
- -> [Block]
- -> Doc
+tableItemToTEI :: PandocMonad m
+ => WriterOptions
+ -> [Block]
+ -> m Doc
tableItemToTEI opts item =
- inTags False "cell" [] $ vcat $ map (blockToTEI opts) item
+ (inTags False "cell" [] . vcat) <$> mapM (blockToTEI opts) item
-- | Convert a list of inline elements to TEI.
-inlinesToTEI :: WriterOptions -> [Inline] -> Doc
-inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst
+inlinesToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m Doc
+inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst
-- | Convert an inline element to TEI.
-inlineToTEI :: WriterOptions -> Inline -> Doc
-inlineToTEI _ (Str str) = text $ escapeStringForXML str
+inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m Doc
+inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str
inlineToTEI opts (Emph lst) =
- inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst
+ inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst
inlineToTEI opts (Strong lst) =
- inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst
+ inTags False "hi" [("rendition", "simple:bold")] <$> inlinesToTEI opts lst
inlineToTEI opts (Strikeout lst) =
- inTags False "hi" [("rendition", "simple:strikethrough")] $
+ inTags False "hi" [("rendition", "simple:strikethrough")] <$>
inlinesToTEI opts lst
inlineToTEI opts (Superscript lst) =
- inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst
+ inTags False "hi" [("rendition", "simple:superscript")] <$>
+ inlinesToTEI opts lst
inlineToTEI opts (Subscript lst) =
- inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst
+ inTags False "hi" [("rendition", "simple:subscript")] <$>
+ inlinesToTEI opts lst
inlineToTEI opts (SmallCaps lst) =
- inTags False "hi" [("rendition", "simple:smallcaps")] $
- inlinesToTEI opts lst
+ inTags False "hi" [("rendition", "simple:smallcaps")] <$>
+ inlinesToTEI opts lst
inlineToTEI opts (Quoted _ lst) =
- inTagsSimple "quote" $ inlinesToTEI opts lst
+ inTagsSimple "quote" <$> inlinesToTEI opts lst
inlineToTEI opts (Cite _ lst) =
inlinesToTEI opts lst
inlineToTEI opts (Span _ ils) =
inlinesToTEI opts ils
-inlineToTEI _ (Code _ str) =
+inlineToTEI _ (Code _ str) = return $
inTags False "seg" [("type","code")] $ text (escapeStringForXML str)
-- Distinguish display from inline math by wrapping the former in a "figure."
-inlineToTEI _ (Math t str) =
+inlineToTEI _ (Math t str) = return $
case t of
InlineMath -> inTags False "formula" [("notation","TeX")] $
- text (str)
+ text str
DisplayMath -> inTags True "figure" [("type","math")] $
- inTags False "formula" [("notation","TeX")] $ text (str)
+ inTags False "formula" [("notation","TeX")] $ text str
-inlineToTEI _ (RawInline f x) | f == "tei" = text x
- | otherwise = empty
-inlineToTEI _ LineBreak = selfClosingTag "lb" []
-inlineToTEI _ Space = space
+inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x
+ | otherwise = empty <$
+ report (InlineNotRendered il)
+inlineToTEI _ LineBreak = return $ selfClosingTag "lb" []
+inlineToTEI _ Space =
+ return space
-- because we use \n for LineBreak, we can't do soft breaks:
-inlineToTEI _ SoftBreak = space
+inlineToTEI _ SoftBreak =
+ return space
inlineToTEI opts (Link attr txt (src, _))
- | Just email <- stripPrefix "mailto:" src =
+ | Just email <- stripPrefix "mailto:" src = do
let emailLink = text $
- escapeStringForXML $ email
- in case txt of
- [Str s] | escapeURI s == email -> emailLink
- _ -> inlinesToTEI opts txt <+>
- char '(' <> emailLink <> char ')'
+ escapeStringForXML email
+ case txt of
+ [Str s] | escapeURI s == email ->
+ return emailLink
+ _ -> do
+ linktext <- inlinesToTEI opts txt
+ return $ linktext <+> char '(' <> emailLink <> char ')'
| otherwise =
- (if isPrefixOf "#" src
- then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr
- else inTags False "ref" $ ("target", src) : idAndRole attr ) $
+ (if "#" `isPrefixOf` src
+ then inTags False "ref" $ ("target", drop 1 src)
+ : idFromAttr opts attr
+ else inTags False "ref" $ ("target", src)
+ : idFromAttr opts attr ) <$>
inlinesToTEI opts txt
-inlineToTEI opts (Image attr description (src, tit)) =
+inlineToTEI opts (Image attr description (src, tit)) = do
let titleDoc = if null tit
then empty
- else inTags False "figDesc" [] (text $ escapeStringForXML tit)
- imageDesc = if null description
- then empty
- else inTags False "head" [] (inlinesToTEI opts description)
- in inTagsIndented "figure" $ imageDesc $$
- imageToTEI opts attr src $$ titleDoc
+ else inTags False "figDesc" []
+ (text $ escapeStringForXML tit)
+ imageDesc <- if null description
+ then return empty
+ else inTags False "head" []
+ <$> inlinesToTEI opts description
+ img <- imageToTEI opts attr src
+ return $ inTagsIndented "figure" $ imageDesc $$ img $$ titleDoc
inlineToTEI opts (Note contents) =
- inTagsIndented "note" $ blocksToTEI opts contents
+ inTagsIndented "note" <$> blocksToTEI opts contents
-idAndRole :: Attr -> [(String, String)]
-idAndRole (id',cls,_) = ident ++ role
- where
- ident = if null id'
- then []
- else [("id", id')]
- role = if null cls
- then []
- else [("role", unwords cls)]
+idFromAttr :: WriterOptions -> Attr -> [(String, String)]
+idFromAttr opts (id',_,_) =
+ if null id'
+ then []
+ else [("xml:id", writerIdentifierPrefix opts ++ id')]
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index f2b9aa15f..bf434642e 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2008-2015 John MacFarlane and Peter Wang
+Copyright (C) 2008-2018 John MacFarlane
+ 2012 Peter Wang
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Texinfo
- Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang
+ Copyright : Copyright (C) 2008-2018 John MacFarlane
+ 2012 Peter Wang
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,21 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into Texinfo.
-}
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
+import Control.Monad.Except (throwError)
+import Control.Monad.State.Strict
+import Data.Char (chr, ord)
+import Data.List (maximumBy, transpose)
+import Data.Ord (comparing)
+import qualified Data.Set as Set
+import Data.Text (Text)
+import Network.URI (unEscapeString)
+import System.FilePath
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
+import Text.Pandoc.Pretty
import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Text.Printf ( printf )
-import Data.List ( transpose, maximumBy )
-import Data.Ord ( comparing )
-import Data.Char ( chr, ord )
-import Control.Monad.State
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Network.URI ( isURI, unEscapeString )
-import System.FilePath
-import qualified Data.Set as Set
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
@@ -59,10 +66,12 @@ data WriterState =
- generated .texi files don't work when run through texi2dvi
-}
+type TI m = StateT WriterState m
+
-- | Convert Pandoc to Texinfo.
-writeTexinfo :: WriterOptions -> Pandoc -> String
+writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeTexinfo options document =
- evalState (pandocToTexinfo options $ wrapTop document) $
+ evalStateT (pandocToTexinfo options $ wrapTop document)
WriterState { stStrikeout = False, stSuperscript = False,
stEscapeComma = False, stSubscript = False,
stIdentifiers = Set.empty, stOptions = options}
@@ -72,16 +81,18 @@ wrapTop :: Pandoc -> Pandoc
wrapTop (Pandoc meta blocks) =
Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks)
-pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String
+pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m Text
pandocToTexinfo options (Pandoc meta blocks) = do
let titlePage = not $ all null
$ docTitle meta : docDate meta : docAuthors meta
let colwidth = if writerWrapText options == WrapAuto
then Just $ writerColumns options
else Nothing
+ let render' :: Doc -> Text
+ render' = render colwidth
metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToTexinfo)
- (fmap (render colwidth) . inlineListToTexinfo)
+ (fmap render' . blockListToTexinfo)
+ (fmap render' . inlineListToTexinfo)
meta
main <- blockListToTexinfo blocks
st <- get
@@ -91,11 +102,11 @@ pandocToTexinfo options (Pandoc meta blocks) = do
$ defField "titlepage" titlePage
$ defField "subscript" (stSubscript st)
$ defField "superscript" (stSuperscript st)
- $ defField "strikeout" (stStrikeout st)
- $ metadata
+ $
+ defField "strikeout" (stStrikeout st) metadata
case writerTemplate options of
Nothing -> return body
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape things as needed for Texinfo.
stringToTexinfo :: String -> String
@@ -110,7 +121,7 @@ stringToTexinfo = escapeStringUsing texinfoEscapes
, ('\x2019', "'")
]
-escapeCommas :: State WriterState Doc -> State WriterState Doc
+escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc
escapeCommas parser = do
oldEscapeComma <- gets stEscapeComma
modify $ \st -> st{ stEscapeComma = True }
@@ -123,8 +134,9 @@ inCmd :: String -> Doc -> Doc
inCmd cmd contents = char '@' <> text cmd <> braces contents
-- | Convert Pandoc block element to Texinfo.
-blockToTexinfo :: Block -- ^ Block to convert
- -> State WriterState Doc
+blockToTexinfo :: PandocMonad m
+ => Block -- ^ Block to convert
+ -> TI m Doc
blockToTexinfo Null = return empty
@@ -154,17 +166,19 @@ blockToTexinfo (BlockQuote lst) = do
contents $$
text "@end quotation"
-blockToTexinfo (CodeBlock _ str) = do
+blockToTexinfo (CodeBlock _ str) =
return $ blankline $$
- text "@verbatim" $$
- flush (text str) $$
- text "@end verbatim" <> blankline
+ text "@verbatim" $$
+ flush (text str) $$
+ text "@end verbatim" <> blankline
-blockToTexinfo (RawBlock f str)
+blockToTexinfo b@(RawBlock f str)
| f == "texinfo" = return $ text str
| f == "latex" || f == "tex" =
return $ text "@tex" $$ text str $$ text "@end tex"
- | otherwise = return empty
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
blockToTexinfo (BulletList lst) = do
items <- mapM listItemToTexinfo lst
@@ -204,7 +218,7 @@ blockToTexinfo HorizontalRule =
text "@bigskip@hrule@bigskip" $$
text "@end iftex" $$
text "@ifnottex" $$
- text (take 72 $ repeat '-') $$
+ text (replicate 72 '-') $$
text "@end ifnottex"
blockToTexinfo (Header 0 _ lst) = do
@@ -214,23 +228,27 @@ blockToTexinfo (Header 0 _ lst) = do
return $ text "@node Top" $$
text "@top " <> txt <> blankline
-blockToTexinfo (Header level _ lst) = do
- node <- inlineListForNode lst
- txt <- inlineListToTexinfo lst
- idsUsed <- gets stIdentifiers
- let id' = uniqueIdent lst idsUsed
- modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
- return $ if (level > 0) && (level <= 4)
- then blankline <> text "@node " <> node $$
- text (seccmd level) <> txt $$
- text "@anchor" <> braces (text $ '#':id')
- else txt
- where
- seccmd 1 = "@chapter "
- seccmd 2 = "@section "
- seccmd 3 = "@subsection "
- seccmd 4 = "@subsubsection "
- seccmd _ = error "illegal seccmd level"
+blockToTexinfo (Header level _ lst)
+ | level < 1 || level > 4 = blockToTexinfo (Para lst)
+ | otherwise = do
+ node <- inlineListForNode lst
+ txt <- inlineListToTexinfo lst
+ idsUsed <- gets stIdentifiers
+ let id' = uniqueIdent lst idsUsed
+ modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
+ sec <- seccmd level
+ return $ if (level > 0) && (level <= 4)
+ then blankline <> text "@node " <> node $$
+ text sec <> txt $$
+ text "@anchor" <> braces (text $ '#':id')
+ else txt
+ where
+ seccmd :: PandocMonad m => Int -> TI m String
+ seccmd 1 = return "@chapter "
+ seccmd 2 = return "@section "
+ seccmd 3 = return "@subsection "
+ seccmd 4 = return "@subsubsection "
+ seccmd _ = throwError $ PandocSomeError "illegal seccmd level"
blockToTexinfo (Table caption aligns widths heads rows) = do
headers <- if all null heads
@@ -256,28 +274,32 @@ blockToTexinfo (Table caption aligns widths heads rows) = do
inCmd "caption" captionText $$
text "@end float"
-tableHeadToTexinfo :: [Alignment]
+tableHeadToTexinfo :: PandocMonad m
+ => [Alignment]
-> [[Block]]
- -> State WriterState Doc
+ -> TI m Doc
tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem "
-tableRowToTexinfo :: [Alignment]
+tableRowToTexinfo :: PandocMonad m
+ => [Alignment]
-> [[Block]]
- -> State WriterState Doc
+ -> TI m Doc
tableRowToTexinfo = tableAnyRowToTexinfo "@item "
-tableAnyRowToTexinfo :: String
+tableAnyRowToTexinfo :: PandocMonad m
+ => String
-> [Alignment]
-> [[Block]]
- -> State WriterState Doc
+ -> TI m Doc
tableAnyRowToTexinfo itemtype aligns cols =
zipWithM alignedBlock aligns cols >>=
return . (text itemtype $$) . foldl (\row item -> row $$
(if isEmpty row then empty else text " @tab ") <> item) empty
-alignedBlock :: Alignment
+alignedBlock :: PandocMonad m
+ => Alignment
-> [Block]
- -> State WriterState Doc
+ -> TI m Doc
-- XXX @flushleft and @flushright text won't get word wrapped. Since word
-- wrapping is more important than alignment, we ignore the alignment.
alignedBlock _ = blockListToTexinfo
@@ -292,8 +314,9 @@ alignedBlock _ col = blockListToTexinfo col
-}
-- | Convert Pandoc block elements to Texinfo.
-blockListToTexinfo :: [Block]
- -> State WriterState Doc
+blockListToTexinfo :: PandocMonad m
+ => [Block]
+ -> TI m Doc
blockListToTexinfo [] = return empty
blockListToTexinfo (x:xs) = do
x' <- blockToTexinfo x
@@ -316,8 +339,8 @@ blockListToTexinfo (x:xs) = do
Para _ -> do
xs' <- blockListToTexinfo xs
case xs of
- ((CodeBlock _ _):_) -> return $ x' $$ xs'
- _ -> return $ x' $+$ xs'
+ (CodeBlock _ _:_) -> return $ x' $$ xs'
+ _ -> return $ x' $+$ xs'
_ -> do
xs' <- blockListToTexinfo xs
return $ x' $$ xs'
@@ -335,15 +358,17 @@ collectNodes level (x:xs) =
_ ->
collectNodes level xs
-makeMenuLine :: Block
- -> State WriterState Doc
+makeMenuLine :: PandocMonad m
+ => Block
+ -> TI m Doc
makeMenuLine (Header _ _ lst) = do
txt <- inlineListForNode lst
return $ text "* " <> txt <> text "::"
-makeMenuLine _ = error "makeMenuLine called with non-Header block"
+makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block"
-listItemToTexinfo :: [Block]
- -> State WriterState Doc
+listItemToTexinfo :: PandocMonad m
+ => [Block]
+ -> TI m Doc
listItemToTexinfo lst = do
contents <- blockListToTexinfo lst
let spacer = case reverse lst of
@@ -351,8 +376,9 @@ listItemToTexinfo lst = do
_ -> empty
return $ text "@item" $$ contents <> spacer
-defListItemToTexinfo :: ([Inline], [[Block]])
- -> State WriterState Doc
+defListItemToTexinfo :: PandocMonad m
+ => ([Inline], [[Block]])
+ -> TI m Doc
defListItemToTexinfo (term, defs) = do
term' <- inlineListToTexinfo term
let defToTexinfo bs = do d <- blockListToTexinfo bs
@@ -363,13 +389,15 @@ defListItemToTexinfo (term, defs) = do
return $ text "@item " <> term' $+$ vcat defs'
-- | Convert list of inline elements to Texinfo.
-inlineListToTexinfo :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
+inlineListToTexinfo :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> TI m Doc
inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
-- | Convert list of inline elements to Texinfo acceptable for a node name.
-inlineListForNode :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
+inlineListForNode :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> TI m Doc
inlineListForNode = return . text . stringToTexinfo .
filter (not . disallowedInNode) . stringify
@@ -378,8 +406,9 @@ disallowedInNode :: Char -> Bool
disallowedInNode c = c `elem` (".,:()" :: String)
-- | Convert inline element to Texinfo
-inlineToTexinfo :: Inline -- ^ Inline to convert
- -> State WriterState Doc
+inlineToTexinfo :: PandocMonad m
+ => Inline -- ^ Inline to convert
+ -> TI m Doc
inlineToTexinfo (Span _ lst) =
inlineListToTexinfo lst
@@ -408,7 +437,7 @@ inlineToTexinfo (Subscript lst) = do
inlineToTexinfo (SmallCaps lst) =
inlineListToTexinfo lst >>= return . inCmd "sc"
-inlineToTexinfo (Code _ str) = do
+inlineToTexinfo (Code _ str) =
return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
inlineToTexinfo (Quoted SingleQuote lst) = do
@@ -423,12 +452,14 @@ inlineToTexinfo (Cite _ lst) =
inlineListToTexinfo lst
inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
-inlineToTexinfo (RawInline f str)
+inlineToTexinfo il@(RawInline f str)
| f == "latex" || f == "tex" =
return $ text "@tex" $$ text str $$ text "@end tex"
| f == "texinfo" = return $ text str
- | otherwise = return empty
-inlineToTexinfo (LineBreak) = return $ text "@*" <> cr
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToTexinfo LineBreak = return $ text "@*" <> cr
inlineToTexinfo SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
case wrapText of
@@ -441,10 +472,10 @@ inlineToTexinfo (Link _ txt (src@('#':_), _)) = do
contents <- escapeCommas $ inlineListToTexinfo txt
return $ text "@ref" <>
braces (text (stringToTexinfo src) <> text "," <> contents)
-inlineToTexinfo (Link _ txt (src, _)) = do
+inlineToTexinfo (Link _ txt (src, _)) =
case txt of
[Str x] | escapeURI x == src -> -- autolink
- do return $ text $ "@url{" ++ x ++ "}"
+ return $ text $ "@url{" ++ x ++ "}"
_ -> do contents <- escapeCommas $ inlineListToTexinfo txt
let src1 = stringToTexinfo src
return $ text ("@uref{" ++ src1 ++ ",") <> contents <>
@@ -453,7 +484,7 @@ inlineToTexinfo (Link _ txt (src, _)) = do
inlineToTexinfo (Image attr alternate (source, _)) = do
content <- escapeCommas $ inlineListToTexinfo alternate
opts <- gets stOptions
- let showDim dim = case (dimension dim attr) of
+ let showDim dim = case dimension dim attr of
(Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in"
(Just (Percent _)) -> ""
(Just d) -> show d
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index f73876fd2..f46eb43bc 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Textile
- Copyright : Copyright (C) 2010-2015 John MacFarlane
+ Copyright : Copyright (C) 2010-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -30,17 +30,20 @@ Conversion of 'Pandoc' documents to Textile markup.
Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
-}
module Text.Pandoc.Writers.Textile ( writeTextile ) where
+import Control.Monad.State.Strict
+import Data.Char (isSpace)
+import Data.List (intercalate)
+import Data.Text (Text, pack)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared
import Text.Pandoc.Pretty (render)
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intercalate )
-import Control.Monad.State
-import Data.Char ( isSpace )
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.XML (escapeStringForXML)
data WriterState = WriterState {
stNotes :: [String] -- Footnotes
@@ -49,29 +52,34 @@ data WriterState = WriterState {
, stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
}
+type TW = StateT WriterState
+
-- | Convert Pandoc to Textile.
-writeTextile :: WriterOptions -> Pandoc -> String
+writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeTextile opts document =
- evalState (pandocToTextile opts document)
- WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing,
+ evalStateT (pandocToTextile opts document)
+ WriterState { stNotes = [],
+ stListLevel = [],
+ stStartNum = Nothing,
stUseTags = False }
-- | Return Textile representation of document.
-pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
+pandocToTextile :: PandocMonad m
+ => WriterOptions -> Pandoc -> TW m Text
pandocToTextile opts (Pandoc meta blocks) = do
metadata <- metaToJSON opts (blockListToTextile opts)
(inlineListToTextile opts) meta
body <- blockListToTextile opts blocks
- notes <- liftM (unlines . reverse . stNotes) get
- let main = body ++ if null notes then "" else ("\n\n" ++ notes)
+ notes <- gets $ unlines . reverse . stNotes
+ let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes)
let context = defField "body" main metadata
case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-withUseTags :: State WriterState a -> State WriterState a
+withUseTags :: PandocMonad m => TW m a -> TW m a
withUseTags action = do
- oldUseTags <- liftM stUseTags get
+ oldUseTags <- gets stUseTags
modify $ \s -> s { stUseTags = True }
result <- action
modify $ \s -> s { stUseTags = oldUseTags }
@@ -101,9 +109,10 @@ escapeStringForTextile :: String -> String
escapeStringForTextile = concatMap escapeCharForTextile
-- | Convert Pandoc block element to Textile.
-blockToTextile :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState String
+blockToTextile :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> TW m String
blockToTextile _ Null = return ""
@@ -123,8 +132,8 @@ blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
return $ im ++ "\n" ++ capt
blockToTextile opts (Para inlines) = do
- useTags <- liftM stUseTags get
- listLevel <- liftM stListLevel get
+ useTags <- gets stUseTags
+ listLevel <- gets stListLevel
contents <- inlineListToTextile opts inlines
return $ if useTags
then "<p>" ++ contents ++ "</p>"
@@ -133,9 +142,11 @@ blockToTextile opts (Para inlines) = do
blockToTextile opts (LineBlock lns) =
blockToTextile opts $ linesToPara lns
-blockToTextile _ (RawBlock f str)
+blockToTextile _ b@(RawBlock f str)
| f == Format "html" || f == Format "textile" = return str
- | otherwise = return ""
+ | otherwise = do
+ report $ BlockNotRendered b
+ return ""
blockToTextile _ HorizontalRule = return "<hr />\n"
@@ -211,7 +222,7 @@ blockToTextile opts (Table capt aligns widths headers rows') = do
"<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n"
blockToTextile opts x@(BulletList items) = do
- oldUseTags <- liftM stUseTags get
+ oldUseTags <- gets stUseTags
let useTags = oldUseTags || not (isSimpleList x)
if useTags
then do
@@ -219,13 +230,13 @@ blockToTextile opts x@(BulletList items) = do
return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n"
else do
modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
- level <- get >>= return . length . stListLevel
+ level <- gets $ length . stListLevel
contents <- mapM (listItemToTextile opts) items
modify $ \s -> s { stListLevel = init (stListLevel s) }
return $ vcat contents ++ (if level > 1 then "" else "\n")
blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do
- oldUseTags <- liftM stUseTags get
+ oldUseTags <- gets stUseTags
let useTags = oldUseTags || not (isSimpleList x)
if useTags
then do
@@ -237,7 +248,7 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do
, stStartNum = if start > 1
then Just start
else Nothing }
- level <- get >>= return . length . stListLevel
+ level <- gets $ length . stListLevel
contents <- mapM (listItemToTextile opts) items
modify $ \s -> s { stListLevel = init (stListLevel s),
stStartNum = Nothing }
@@ -261,10 +272,11 @@ listAttribsToString (startnum, numstyle, _) =
else "")
-- | Convert bullet or ordered list item (list of blocks) to Textile.
-listItemToTextile :: WriterOptions -> [Block] -> State WriterState String
+listItemToTextile :: PandocMonad m
+ => WriterOptions -> [Block] -> TW m String
listItemToTextile opts items = do
contents <- blockListToTextile opts items
- useTags <- get >>= return . stUseTags
+ useTags <- gets stUseTags
if useTags
then return $ "<li>" ++ contents ++ "</li>"
else do
@@ -277,14 +289,15 @@ listItemToTextile opts items = do
Nothing -> return $ marker ++ " " ++ contents
-- | Convert definition list item (label, list of blocks) to Textile.
-definitionListItemToTextile :: WriterOptions
+definitionListItemToTextile :: PandocMonad m
+ => WriterOptions
-> ([Inline],[[Block]])
- -> State WriterState String
+ -> TW m String
definitionListItemToTextile opts (label, items) = do
labelText <- inlineListToTextile opts label
contents <- mapM (blockListToTextile opts) items
return $ "<dt>" ++ labelText ++ "</dt>\n" ++
- (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents)
+ intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents)
-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
isSimpleList :: Block -> Bool
@@ -301,16 +314,16 @@ isSimpleListItem :: [Block] -> Bool
isSimpleListItem [] = True
isSimpleListItem [x] =
case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- _ -> False
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ _ -> False
isSimpleListItem [x, y] | isPlainOrPara x =
case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- _ -> False
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ _ -> False
isSimpleListItem _ = False
isPlainOrPara :: Block -> Bool
@@ -325,18 +338,19 @@ vcat = intercalate "\n"
-- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki,
-- and Textile writers, and should be abstracted out.)
-tableRowToTextile :: WriterOptions
- -> [String]
- -> Int
- -> [[Block]]
- -> State WriterState String
+tableRowToTextile :: PandocMonad m
+ => WriterOptions
+ -> [String]
+ -> Int
+ -> [[Block]]
+ -> TW m String
tableRowToTextile opts alignStrings rownum cols' = do
let celltype = if rownum == 0 then "th" else "td"
let rowclass = case rownum of
- 0 -> "header"
+ 0 -> "header"
x | x `rem` 2 == 1 -> "odd"
- _ -> "even"
- cols'' <- sequence $ zipWith
+ _ -> "even"
+ cols'' <- zipWithM
(\alignment item -> tableItemToTextile opts celltype alignment item)
alignStrings cols'
return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
@@ -348,11 +362,12 @@ alignmentToString alignment = case alignment of
AlignCenter -> "center"
AlignDefault -> "left"
-tableItemToTextile :: WriterOptions
- -> String
- -> String
- -> [Block]
- -> State WriterState String
+tableItemToTextile :: PandocMonad m
+ => WriterOptions
+ -> String
+ -> String
+ -> [Block]
+ -> TW m String
tableItemToTextile opts celltype align' item = do
let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++
x ++ "</" ++ celltype ++ ">"
@@ -360,19 +375,21 @@ tableItemToTextile opts celltype align' item = do
return $ mkcell contents
-- | Convert list of Pandoc block elements to Textile.
-blockListToTextile :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState String
+blockListToTextile :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> TW m String
blockListToTextile opts blocks =
mapM (blockToTextile opts) blocks >>= return . vcat
-- | Convert list of Pandoc inline elements to Textile.
-inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToTextile :: PandocMonad m
+ => WriterOptions -> [Inline] -> TW m String
inlineListToTextile opts lst =
mapM (inlineToTextile opts) lst >>= return . concat
-- | Convert Pandoc inline element to Textile.
-inlineToTextile :: WriterOptions -> Inline -> State WriterState String
+inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String
inlineToTextile opts (Span _ lst) =
inlineListToTextile opts lst
@@ -429,13 +446,15 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str
inlineToTextile _ (Math _ str) =
return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>"
-inlineToTextile opts (RawInline f str)
+inlineToTextile opts il@(RawInline f str)
| f == Format "html" || f == Format "textile" = return str
| (f == Format "latex" || f == Format "tex") &&
isEnabled Ext_raw_tex opts = return str
- | otherwise = return ""
+ | otherwise = do
+ report $ InlineNotRendered il
+ return ""
-inlineToTextile _ (LineBreak) = return "\n"
+inlineToTextile _ LineBreak = return "\n"
inlineToTextile _ SoftBreak = return " "
@@ -464,7 +483,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do
then ""
else "(" ++ unwords cls ++ ")"
showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";"
- in case (dimension dir attr) of
+ in case dimension dir attr of
Just (Percent a) -> toCss $ show (Percent a)
Just dim -> toCss $ showInPixel opts dim ++ "px"
Nothing -> Nothing
@@ -476,7 +495,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do
return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!"
inlineToTextile opts (Note contents) = do
- curNotes <- liftM stNotes get
+ curNotes <- gets stNotes
let newnum = length curNotes + 1
contents' <- blockListToTextile opts contents
let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n"
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 423928c8a..dec1f9d4a 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -1,5 +1,6 @@
{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu>
+ 2017-2018 Alex Ivkin
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,11 +19,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ZimWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane, 2016 Alex Ivkin
+ Copyright : Copyright (C) 2008-2018 John MacFarlane, 2017-2018 Alex Ivkin
License : GNU GPL, version 2 or above
Maintainer : Alex Ivkin <alex@ivkin.net>
- Stability : alpha
+ Stability : beta
Portability : portable
Conversion of 'Pandoc' documents to ZimWiki markup.
@@ -31,48 +32,53 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html
-}
module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where
+import Control.Monad (zipWithM)
+import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
+import Data.Default (Default (..))
+import Data.List (intercalate, isInfixOf, isPrefixOf, transpose)
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe)
+import Data.Text (Text, breakOnAll, pack)
+import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
-import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) )
-import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr
- , substitute )
-import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf )
-import Data.Text ( breakOnAll, pack )
-import Data.Default (Default(..))
-import Network.URI ( isURI )
-import Control.Monad ( zipWithM )
-import Control.Monad.State ( modify, State, get, evalState )
---import Control.Monad.Reader ( ReaderT, runReaderT, ask, local )
+import Text.Pandoc.Logging
+import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText))
+import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting,
+ substitute, trimr)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared (defField, metaToJSON)
data WriterState = WriterState {
- stItemNum :: Int,
- stIndent :: String -- Indent after the marker at the beginning of list items
+ stItemNum :: Int,
+ stIndent :: String, -- Indent after the marker at the beginning of list items
+ stInTable :: Bool, -- Inside a table
+ stInLink :: Bool -- Inside a link description
}
instance Default WriterState where
- def = WriterState { stItemNum = 1, stIndent = "" }
+ def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False }
+
+type ZW = StateT WriterState
-- | Convert Pandoc to ZimWiki.
-writeZimWiki :: WriterOptions -> Pandoc -> String
-writeZimWiki opts document = evalState (pandocToZimWiki opts document) (WriterState 1 "")
+writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def
-- | Return ZimWiki representation of document.
-pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String
+pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text
pandocToZimWiki opts (Pandoc meta blocks) = do
metadata <- metaToJSON opts
(fmap trimr . blockListToZimWiki opts)
(inlineListToZimWiki opts)
meta
- body <- blockListToZimWiki opts blocks
+ body <- pack <$> blockListToZimWiki opts blocks
--let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n"
let main = body
let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ metadata
+ $ defField "toc" (writerTableOfContents opts) metadata
case writerTemplate opts of
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
Nothing -> return main
-- | Escape special characters for ZimWiki.
@@ -83,7 +89,7 @@ escapeString = substitute "__" "''__''" .
substitute "//" "''//''"
-- | Convert Pandoc block element to ZimWiki.
-blockToZimWiki :: WriterOptions -> Block -> State WriterState String
+blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m String
blockToZimWiki _ Null = return ""
@@ -107,18 +113,20 @@ blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n"
blockToZimWiki opts (Para inlines) = do
- indent <- stIndent <$> get
- -- useTags <- stUseTags <$> get
+ indent <- gets stIndent
+ -- useTags <- gets stUseTags
contents <- inlineListToZimWiki opts inlines
return $ contents ++ if null indent then "\n" else ""
-blockToZimWiki opts (LineBlock lns) = do
+blockToZimWiki opts (LineBlock lns) =
blockToZimWiki opts $ linesToPara lns
-blockToZimWiki opts (RawBlock f str)
+blockToZimWiki opts b@(RawBlock f str)
| f == Format "zimwiki" = return str
- | f == Format "html" = do cont <- indentFromHTML opts str; return cont
- | otherwise = return ""
+ | f == Format "html" = indentFromHTML opts str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return ""
blockToZimWiki _ HorizontalRule = return "\n----\n"
@@ -128,9 +136,13 @@ blockToZimWiki opts (Header level _ inlines) = do
return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
blockToZimWiki _ (CodeBlock (_,classes,_) str) = do
+ -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using
+ let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")]
+ let langmap = Map.fromList langal
return $ case classes of
- [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- no lang block is a quote block
- (x:_) -> "{{{code: lang=\"" ++ x ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec
+ [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block
+ (x:_) -> "{{{code: lang=\"" ++
+ fromMaybe x (Map.lookup x langmap) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec
blockToZimWiki opts (BlockQuote blocks) = do
contents <- blockListToZimWiki opts blocks
@@ -143,12 +155,12 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do
c <- inlineListToZimWiki opts capt
return $ "" ++ c ++ "\n"
headers' <- if all null headers
- then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0)
- else zipWithM (tableItemToZimWiki opts) aligns headers
+ then zipWithM (tableItemToZimWiki opts) aligns (head rows)
+ else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers
rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
let widths = map (maximum . map length) $ transpose (headers':rows')
let padTo (width, al) s =
- case (width - length s) of
+ case width - length s of
x | x > 0 ->
if al == AlignLeft || al == AlignDefault
then s ++ replicate x ' '
@@ -157,63 +169,63 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do
else replicate (x `div` 2) ' ' ++
s ++ replicate (x - x `div` 2) ' '
| otherwise -> s
- let borderCell (width, al) _ =
- if al == AlignLeft
- then ":"++ replicate (width-1) '-'
- else if al == AlignDefault
- then replicate width '-'
- else if al == AlignRight
- then replicate (width-1) '-' ++ ":"
- else ":" ++ replicate (width-2) '-' ++ ":"
+ let borderCell (width, al) _
+ | al == AlignLeft = ":"++ replicate (width-1) '-'
+ | al == AlignDefault = replicate width '-'
+ | al == AlignRight = replicate (width-1) '-' ++ ":"
+ | otherwise = ":" ++ replicate (width-2) '-' ++ ":"
let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|"
- let renderRow sep cells = sep ++ intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep
+ let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|"
return $ captionDoc ++
- (if null headers' then "" else renderRow "|" headers' ++ "\n") ++ underheader ++ "\n" ++
- unlines (map (renderRow "|") rows')
+ (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++
+ unlines (map renderRow rows')
blockToZimWiki opts (BulletList items) = do
- indent <- stIndent <$> get
+ indent <- gets stIndent
modify $ \s -> s { stIndent = stIndent s ++ "\t" }
- contents <- (mapM (listItemToZimWiki opts) items)
+ contents <- mapM (listItemToZimWiki opts) items
modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
return $ vcat contents ++ if null indent then "\n" else ""
blockToZimWiki opts (OrderedList _ items) = do
- indent <- stIndent <$> get
+ indent <- gets stIndent
modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 }
- contents <- (mapM (orderedListItemToZimWiki opts) items)
+ contents <- mapM (orderedListItemToZimWiki opts) items
modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
return $ vcat contents ++ if null indent then "\n" else ""
blockToZimWiki opts (DefinitionList items) = do
- contents <- (mapM (definitionListItemToZimWiki opts) items)
+ contents <- mapM (definitionListItemToZimWiki opts) items
return $ vcat contents
-definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String
+definitionListItemToZimWiki :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> ZW m String
definitionListItemToZimWiki opts (label, items) = do
labelText <- inlineListToZimWiki opts label
contents <- mapM (blockListToZimWiki opts) items
- indent <- stIndent <$> get
+ indent <- gets stIndent
return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents
-- Auxiliary functions for lists:
-indentFromHTML :: WriterOptions -> String -> State WriterState String
+indentFromHTML :: PandocMonad m => WriterOptions -> String -> ZW m String
indentFromHTML _ str = do
- indent <- stIndent <$> get
- itemnum <- stItemNum <$> get
- if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "."
- else if isInfixOf "</li>" str then return "\n"
- else if isInfixOf "<li value=" str then do
+ indent <- gets stIndent
+ itemnum <- gets stItemNum
+ if "<li>" `isInfixOf` str then return $ indent ++ show itemnum ++ "."
+ else if "</li>" `isInfixOf` str then return "\n"
+ else if "<li value=" `isInfixOf` str then do
-- poor man's cut
let val = drop 10 $ reverse $ drop 1 $ reverse str
--let val = take ((length valls) - 2) valls
modify $ \s -> s { stItemNum = read val }
return ""
- else if isInfixOf "<ol>" str then do
+ else if "<ol>" `isInfixOf` str then do
let olcount=countSubStrs "<ol>" str
modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 }
return ""
- else if isInfixOf "</ol>" str then do
+ else if "</ol>" `isInfixOf` str then do
let olcount=countSubStrs "/<ol>" str
modify $ \s -> s{ stIndent = drop olcount (stIndent s) }
return ""
@@ -230,23 +242,25 @@ vcat :: [String] -> String
vcat = intercalate "\n"
-- | Convert bullet list item (list of blocks) to ZimWiki.
-listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m String
listItemToZimWiki opts items = do
contents <- blockListToZimWiki opts items
- indent <- stIndent <$> get
+ indent <- gets stIndent
return $ indent ++ "* " ++ contents
-- | Convert ordered list item (list of blocks) to ZimWiki.
-orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+orderedListItemToZimWiki :: PandocMonad m
+ => WriterOptions -> [Block] -> ZW m String
orderedListItemToZimWiki opts items = do
contents <- blockListToZimWiki opts items
- indent <- stIndent <$> get
- itemnum <- stItemNum <$> get
+ indent <- gets stIndent
+ itemnum <- gets stItemNum
--modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering
return $ indent ++ show itemnum ++ ". " ++ contents
-- Auxiliary functions for tables:
-tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String
+tableItemToZimWiki :: PandocMonad m
+ => WriterOptions -> Alignment -> [Block] -> ZW m String
tableItemToZimWiki opts align' item = do
let mkcell x = (if align' == AlignRight || align' == AlignCenter
then " "
@@ -254,19 +268,24 @@ tableItemToZimWiki opts align' item = do
(if align' == AlignLeft || align' == AlignCenter
then " "
else "")
- contents <- blockListToZimWiki opts item -- local (\s -> s { stBackSlashLB = True }) $
+ modify $ \s -> s { stInTable = True }
+ contents <- blockListToZimWiki opts item
+ modify $ \s -> s { stInTable = False }
return $ mkcell contents
-- | Convert list of Pandoc block elements to ZimWiki.
-blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+blockListToZimWiki :: PandocMonad m
+ => WriterOptions -> [Block] -> ZW m String
blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks
-- | Convert list of Pandoc inline elements to ZimWiki.
-inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String
-inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst)
+inlineListToZimWiki :: PandocMonad m
+ => WriterOptions -> [Inline] -> ZW m String
+inlineListToZimWiki opts lst = concat <$> mapM (inlineToZimWiki opts) lst
-- | Convert Pandoc inline element to ZimWiki.
-inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String
+inlineToZimWiki :: PandocMonad m
+ => WriterOptions -> Inline -> ZW m String
inlineToZimWiki opts (Emph lst) = do
contents <- inlineListToZimWiki opts lst
@@ -304,7 +323,15 @@ inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst
inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''"
-inlineToZimWiki _ (Str str) = return $ escapeString str
+inlineToZimWiki _ (Str str) = do
+ inTable <- gets stInTable
+ inLink <- gets stInLink
+ if inTable
+ then return $ substitute "|" "\\|" . escapeString $ str
+ else
+ if inLink
+ then return str
+ else return $ escapeString str
inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped
where delim = case mathType of
@@ -312,12 +339,18 @@ inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note
InlineMath -> "$"
-- | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
-inlineToZimWiki opts (RawInline f str)
+inlineToZimWiki opts il@(RawInline f str)
| f == Format "zimwiki" = return str
- | f == Format "html" = do cont <- indentFromHTML opts str; return cont
- | otherwise = return ""
+ | f == Format "html" = indentFromHTML opts str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return ""
-inlineToZimWiki _ (LineBreak) = return "\n" -- was \\\\
+inlineToZimWiki _ LineBreak = do
+ inTable <- gets stInTable
+ if inTable
+ then return "\\n"
+ else return "\n"
inlineToZimWiki opts SoftBreak =
case writerWrapText opts of
@@ -328,37 +361,45 @@ inlineToZimWiki opts SoftBreak =
inlineToZimWiki _ Space = return " "
inlineToZimWiki opts (Link _ txt (src, _)) = do
- label <- inlineListToZimWiki opts txt
+ inTable <- gets stInTable
+ modify $ \s -> s { stInLink = True }
+ label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it
+ modify $ \s -> s { stInLink = False }
+ let label'= if inTable
+ then "" -- no label is allowed in a table
+ else "|"++label
case txt of
[Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
| escapeURI s == src -> return src
_ -> if isURI src
- then return $ "[[" ++ src ++ "|" ++ label ++ "]]"
- else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
+ then return $ "[[" ++ src ++ label' ++ "]]"
+ else return $ "[[" ++ src' ++ label' ++ "]]"
where src' = case src of
'/':xs -> xs -- with leading / it's a
_ -> src -- link to a help page
inlineToZimWiki opts (Image attr alt (source, tit)) = do
alt' <- inlineListToZimWiki opts alt
- let txt = case (tit, alt) of
- ("", []) -> ""
- ("", _ ) -> "|" ++ alt'
- (_ , _ ) -> "|" ++ tit
+ inTable <- gets stInTable
+ let txt = case (tit, alt, inTable) of
+ ("",[], _) -> ""
+ ("", _, False ) -> "|" ++ alt'
+ (_ , _, False ) -> "|" ++ tit
+ (_ , _, True ) -> ""
-- Relative links fail isURI and receive a colon
prefix = if isURI source then "" else ":"
return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}"
inlineToZimWiki opts (Note contents) = do
+ -- no concept of notes in zim wiki, use a text block
contents' <- blockListToZimWiki opts contents
- return $ "((" ++ contents' ++ "))"
- -- note - may not work for notes with multiple blocks
+ return $ " **{Note:** " ++ trimr contents' ++ "**}**"
imageDims :: WriterOptions -> Attr -> String
imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
where
toPx = fmap (showInPixel opts) . checkPct
checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
+ checkPct maybeDim = maybeDim
go (Just w) Nothing = "?" ++ w
go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
go Nothing (Just h) = "?0x" ++ h
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index e105aee91..62874f0b9 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.XML
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -36,18 +36,20 @@ module Text.Pandoc.XML ( escapeCharForXML,
toEntities,
fromEntities ) where
-import Text.Pandoc.Pretty
-import Data.Char (ord, isAscii, isSpace)
+import Data.Char (isAscii, isSpace, ord)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Pretty
-- | Escape one character as needed for XML.
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- c -> [c]
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ c -> [c]
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
@@ -91,11 +93,10 @@ inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
-- | Escape all non-ascii characters using numerical entities.
-toEntities :: String -> String
-toEntities [] = ""
-toEntities (c:cs)
- | isAscii c = c : toEntities cs
- | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+toEntities :: Text -> Text
+toEntities = T.concatMap go
+ where go c | isAscii c = T.singleton c
+ | otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
-- Unescapes XML entities
fromEntities :: String -> String
@@ -108,8 +109,8 @@ fromEntities ('&':xs) =
(zs, ys) -> (zs,ys)
ent' = case ent of
'#':'X':ys -> '#':'x':ys -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
+ '#':_ -> ent
+ _ -> ent ++ ";"
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []