diff options
Diffstat (limited to 'src/Text')
128 files changed, 29427 insertions, 11943 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index d83fa85e7..dd2856674 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,17 +39,18 @@ inline links: > module Main where > import Text.Pandoc +> import Data.Text (Text) +> import qualified Data.Text.IO as T > -> markdownToRST :: String -> String -> markdownToRST = -> writeRST def {writerReferenceLinks = True} . -> handleError . readMarkdown def -> -> main = getContents >>= putStrLn . markdownToRST +> mdToRST :: Text -> IO Text +> mdToRST txt = runIOorExplode $ +> readMarkdown def txt +> >>= writeRST def{ writerReferenceLinks = True } -Note: all of the readers assume that the input text has @'\n'@ -line endings. So if you get your input text from a web form, -you should remove @'\r'@ characters using @filter (/='\r')@. +> +> main :: IO () +> main = do +> T.getContents >>= mdToRST >>= T.putStrLn -} @@ -59,332 +62,29 @@ module Text.Pandoc , module Text.Pandoc.Generic -- * Options , module Text.Pandoc.Options + -- * Logging + , module Text.Pandoc.Logging + -- * Typeclass + , module Text.Pandoc.Class -- * Error handling , module Text.Pandoc.Error - -- * Lists of readers and writers - , readers - , writers -- * Readers: converting /to/ Pandoc format - , Reader (..) - , mkStringReader - , readDocx - , readOdt - , readMarkdown - , readCommonMark - , readMediaWiki - , readRST - , readOrg - , readLaTeX - , readHtml - , readTextile - , readDocBook - , readOPML - , readHaddock - , readNative - , readJSON - , readTWiki - , readTxt2Tags - , readTxt2TagsNoMacros - , readEPUB + , module Text.Pandoc.Readers -- * Writers: converting /from/ Pandoc format - , Writer (..) - , writeNative - , writeJSON - , writeMarkdown - , writePlain - , writeRST - , writeLaTeX - , writeConTeXt - , writeTexinfo - , writeHtml - , writeHtmlString - , writeICML - , writeDocbook - , writeOPML - , writeOpenDocument - , writeMan - , writeMediaWiki - , writeDokuWiki - , writeZimWiki - , writeTextile - , writeRTF - , writeODT - , writeDocx - , writeEPUB - , writeFB2 - , writeOrg - , writeAsciiDoc - , writeHaddock - , writeCommonMark - , writeCustom - , writeTEI + , module Text.Pandoc.Writers -- * Rendering templates and default templates , module Text.Pandoc.Templates -- * Miscellaneous - , getReader - , getWriter - , getDefaultExtensions - , ToJsonFilter(..) , pandocVersion ) where +import Text.Pandoc.Class import Text.Pandoc.Definition +import Text.Pandoc.Error import Text.Pandoc.Generic -import Text.Pandoc.JSON -import Text.Pandoc.Readers.Markdown -import Text.Pandoc.Readers.CommonMark -import Text.Pandoc.Readers.MediaWiki -import Text.Pandoc.Readers.RST -import Text.Pandoc.Readers.Org -import Text.Pandoc.Readers.DocBook -import Text.Pandoc.Readers.OPML -import Text.Pandoc.Readers.LaTeX -import Text.Pandoc.Readers.HTML -import Text.Pandoc.Readers.Textile -import Text.Pandoc.Readers.Native -import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.TWiki -import Text.Pandoc.Readers.Docx -import Text.Pandoc.Readers.Odt -import Text.Pandoc.Readers.Txt2Tags -import Text.Pandoc.Readers.EPUB -import Text.Pandoc.Writers.Native -import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST -import Text.Pandoc.Writers.LaTeX -import Text.Pandoc.Writers.ConTeXt -import Text.Pandoc.Writers.Texinfo -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.ODT -import Text.Pandoc.Writers.Docx -import Text.Pandoc.Writers.EPUB -import Text.Pandoc.Writers.FB2 -import Text.Pandoc.Writers.ICML -import Text.Pandoc.Writers.Docbook -import Text.Pandoc.Writers.OPML -import Text.Pandoc.Writers.OpenDocument -import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF -import Text.Pandoc.Writers.MediaWiki -import Text.Pandoc.Writers.DokuWiki -import Text.Pandoc.Writers.ZimWiki -import Text.Pandoc.Writers.Textile -import Text.Pandoc.Writers.Org -import Text.Pandoc.Writers.AsciiDoc -import Text.Pandoc.Writers.Haddock -import Text.Pandoc.Writers.CommonMark -import Text.Pandoc.Writers.Custom -import Text.Pandoc.Writers.TEI -import Text.Pandoc.Templates +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion) -import Text.Pandoc.MediaBag (MediaBag) -import Text.Pandoc.Error -import Data.Aeson -import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) -import Data.Set (Set) -import qualified Data.Set as Set -import Text.Parsec -import Text.Parsec.Error -import qualified Text.Pandoc.UTF8 as UTF8 - -parseFormatSpec :: String - -> Either ParseError (String, Set Extension -> Set Extension) -parseFormatSpec = parse formatSpec "" - where formatSpec = do - name <- formatName - extMods <- many extMod - return (name, \x -> foldl (flip ($)) x extMods) - formatName = many1 $ noneOf "-+" - extMod = do - polarity <- oneOf "-+" - name <- many $ noneOf "-+" - ext <- case safeRead ("Ext_" ++ name) of - Just n -> return n - Nothing - | name == "lhs" -> return Ext_literate_haskell - | otherwise -> fail $ "Unknown extension: " ++ name - return $ case polarity of - '-' -> Set.delete ext - _ -> Set.insert ext - - -data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) - -mkStringReader :: (ReaderOptions -> String -> Either PandocError Pandoc) -> Reader -mkStringReader r = StringReader (\o s -> return $ r o s) - -mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader -mkStringReaderWithWarnings r = StringReader $ \o s -> - case r o s of - Left err -> return $ Left err - Right (doc, warnings) -> do - mapM_ warn warnings - return (Right doc) - -mkBSReader :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)) -> Reader -mkBSReader r = ByteStringReader (\o s -> return $ r o s) - -mkBSReaderWithWarnings :: (ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag, [String])) -> Reader -mkBSReaderWithWarnings r = ByteStringReader $ \o s -> - case r o s of - Left err -> return $ Left err - Right (doc, mediaBag, warnings) -> do - mapM_ warn warnings - return $ Right (doc, mediaBag) - --- | Association list of formats and readers. -readers :: [(String, Reader)] -readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) - ,("json" , mkStringReader readJSON ) - ,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) - ,("commonmark" , mkStringReader readCommonMark) - ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) - ,("mediawiki" , mkStringReader readMediaWiki) - ,("docbook" , mkStringReader readDocBook) - ,("opml" , mkStringReader readOPML) - ,("org" , mkStringReader readOrg) - ,("textile" , mkStringReader readTextile) -- TODO : textile+lhs - ,("html" , mkStringReader readHtml) - ,("latex" , mkStringReader readLaTeX) - ,("haddock" , mkStringReader readHaddock) - ,("twiki" , mkStringReader readTWiki) - ,("docx" , mkBSReaderWithWarnings readDocxWithWarnings) - ,("odt" , mkBSReader readOdt) - ,("t2t" , mkStringReader readTxt2TagsNoMacros) - ,("epub" , mkBSReader readEPUB) - ] - -data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) - | IOStringWriter (WriterOptions -> Pandoc -> IO String) - | IOByteStringWriter (WriterOptions -> Pandoc -> IO BL.ByteString) - --- | Association list of formats and writers. -writers :: [ ( String, Writer ) ] -writers = [ - ("native" , PureStringWriter writeNative) - ,("json" , PureStringWriter writeJSON) - ,("docx" , IOByteStringWriter writeDocx) - ,("odt" , IOByteStringWriter writeODT) - ,("epub" , IOByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB2 }) - ,("epub3" , IOByteStringWriter $ \o -> - writeEPUB o{ writerEpubVersion = Just EPUB3 }) - ,("fb2" , IOStringWriter writeFB2) - ,("html" , PureStringWriter writeHtmlString) - ,("html5" , PureStringWriter $ \o -> - writeHtmlString o{ writerHtml5 = True }) - ,("icml" , IOStringWriter writeICML) - ,("s5" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = S5Slides - , writerTableOfContents = False }) - ,("slidy" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlidySlides }) - ,("slideous" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlideousSlides }) - ,("dzslides" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = DZSlides - , writerHtml5 = True }) - ,("revealjs" , PureStringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = RevealJsSlides - , writerHtml5 = True }) - ,("docbook" , PureStringWriter writeDocbook) - ,("docbook5" , PureStringWriter $ \o -> - writeDocbook o{ writerDocbook5 = True }) - ,("opml" , PureStringWriter writeOPML) - ,("opendocument" , PureStringWriter writeOpenDocument) - ,("latex" , PureStringWriter writeLaTeX) - ,("beamer" , PureStringWriter $ \o -> - writeLaTeX o{ writerBeamer = True }) - ,("context" , PureStringWriter writeConTeXt) - ,("texinfo" , PureStringWriter writeTexinfo) - ,("man" , PureStringWriter writeMan) - ,("markdown" , PureStringWriter writeMarkdown) - ,("markdown_strict" , PureStringWriter writeMarkdown) - ,("markdown_phpextra" , PureStringWriter writeMarkdown) - ,("markdown_github" , PureStringWriter writeMarkdown) - ,("markdown_mmd" , PureStringWriter writeMarkdown) - ,("plain" , PureStringWriter writePlain) - ,("rst" , PureStringWriter writeRST) - ,("mediawiki" , PureStringWriter writeMediaWiki) - ,("dokuwiki" , PureStringWriter writeDokuWiki) - ,("zimwiki" , PureStringWriter writeZimWiki) - ,("textile" , PureStringWriter writeTextile) - ,("rtf" , IOStringWriter writeRTFWithEmbeddedImages) - ,("org" , PureStringWriter writeOrg) - ,("asciidoc" , PureStringWriter writeAsciiDoc) - ,("haddock" , PureStringWriter writeHaddock) - ,("commonmark" , PureStringWriter writeCommonMark) - ,("tei" , PureStringWriter writeTEI) - ] - -getDefaultExtensions :: String -> Set Extension -getDefaultExtensions "markdown_strict" = strictExtensions -getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions -getDefaultExtensions "markdown_mmd" = multimarkdownExtensions -getDefaultExtensions "markdown_github" = githubMarkdownExtensions -getDefaultExtensions "markdown" = pandocExtensions -getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations, - Ext_auto_identifiers] -getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] -getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, - Ext_native_divs, - Ext_native_spans] -getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, - Ext_native_divs, - Ext_native_spans, - Ext_epub_html_exts] -getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] - --- | Retrieve reader based on formatSpec (format+extensions). -getReader :: String -> Either String Reader -getReader s = - case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (readerName, setExts) -> - case lookup readerName readers of - Nothing -> Left $ "Unknown reader: " ++ readerName - Just (StringReader r) -> Right $ StringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - --- | Retrieve writer based on formatSpec (format+extensions). -getWriter :: String -> Either String Writer -getWriter s - = case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (writerName, setExts) -> - case lookup writerName writers of - Nothing -> Left $ "Unknown writer: " ++ writerName - Just (PureStringWriter r) -> Right $ PureStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOStringWriter r) -> Right $ IOStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - -{-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} --- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. -class ToJSONFilter a => ToJsonFilter a - where toJsonFilter :: a -> IO () - toJsonFilter = toJSONFilter - -readJSON :: ReaderOptions -> String -> Either PandocError Pandoc -readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy - -writeJSON :: WriterOptions -> Pandoc -> String -writeJSON _ = UTF8.toStringLazy . encode +import Text.Pandoc.Readers +import Text.Pandoc.Shared (pandocVersion) +import Text.Pandoc.Templates +import Text.Pandoc.Writers diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs new file mode 100644 index 000000000..26c754cd6 --- /dev/null +++ b/src/Text/Pandoc/App.hs @@ -0,0 +1,1636 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TupleSections #-} +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.App + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Does a pandoc conversion based on command-line options. +-} +module Text.Pandoc.App ( + convertWithOpts + , Opt(..) + , LineEnding(..) + , Filter(..) + , defaultOpts + , parseOptions + , options + , applyFilters + ) where +import qualified Control.Exception as E +import Control.Monad +import Control.Monad.Except (catchError, throwError) +import Control.Monad.Trans +import Data.Aeson (defaultOptions) +import Data.Aeson.TH (deriveJSON) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.Char (toLower, toUpper) +import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Monoid +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TE +import qualified Data.Text.Encoding.Error as TE +import Data.Yaml (decode) +import qualified Data.Yaml as Yaml +import GHC.Generics +import Network.URI (URI (..), parseURI) +import Paths_pandoc (getDataDir) +import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, + defConfig, Indent(..), NumberFormat(..)) +import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme, + pygments) +import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition) +import System.Console.GetOpt +import System.Directory (getAppUserDataDirectory) +import System.Environment (getArgs, getProgName) +import System.Exit (exitSuccess) +import System.FilePath +import System.IO (nativeNewline, stdout) +import qualified System.IO as IO (Newline (..)) +import System.IO.Error (isDoesNotExistError) +import Text.Pandoc +import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import Text.Pandoc.Builder (setMeta, deleteMeta) +import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) +import Text.Pandoc.Highlighting (highlightingStyles) +import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) +import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, + headerShift, isURI, ordNub, safeRead, tabFilter) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL) +import Text.Pandoc.XML (toEntities) +import Text.Printf +#ifndef _WINDOWS +import System.Posix.IO (stdOutput) +import System.Posix.Terminal (queryTerminal) +#endif + +data LineEnding = LF | CRLF | Native deriving (Show, Generic) + +parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt +parseOptions options' defaults = do + rawArgs <- map UTF8.decodeArg <$> getArgs + prg <- getProgName + + let (actions, args, unrecognizedOpts, errors) = + getOpt' Permute options' rawArgs + + let unknownOptionErrors = + foldr (handleUnrecognizedOption . takeWhile (/= '=')) [] + unrecognizedOpts + + unless (null errors && null unknownOptionErrors) $ + E.throwIO $ PandocOptionError $ + concat errors ++ unlines unknownOptionErrors ++ + ("Try " ++ prg ++ " --help for more information.") + + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaults) actions + return (opts{ optInputFiles = args }) + +latexEngines :: [String] +latexEngines = ["pdflatex", "lualatex", "xelatex"] + +htmlEngines :: [String] +htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] + +engines :: [(String, String)] +engines = map ("html",) htmlEngines ++ + map ("html5",) htmlEngines ++ + map ("latex",) latexEngines ++ + map ("beamer",) latexEngines ++ + [ ("ms", "pdfroff") + , ("context", "context") + ] + +pdfEngines :: [String] +pdfEngines = ordNub $ map snd engines + +pdfWriterAndProg :: Maybe String -- ^ user-specified writer name + -> Maybe String -- ^ user-specified pdf-engine + -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) +pdfWriterAndProg mWriter mEngine = do + let panErr msg = liftIO $ E.throwIO $ PandocAppError msg + case go mWriter mEngine of + Right (writ, prog) -> return (writ, Just prog) + Left err -> panErr err + where + go Nothing Nothing = Right ("latex", "pdflatex") + go (Just writer) Nothing = (writer,) <$> engineForWriter writer + go Nothing (Just engine) = (,engine) <$> writerForEngine engine + go (Just writer) (Just engine) = + case find (== (baseWriterName writer, engine)) engines of + Just _ -> Right (writer, engine) + Nothing -> Left $ "pdf-engine " ++ engine ++ + " is not compatible with output format " ++ writer + + writerForEngine eng = case [f | (f,e) <- engines, e == eng] of + fmt : _ -> Right fmt + [] -> Left $ + "pdf-engine " ++ eng ++ " not known" + + engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of + eng : _ -> Right eng + [] -> Left $ + "cannot produce pdf output from " ++ w + +convertWithOpts :: Opt -> IO () +convertWithOpts opts = do + let outputFile = fromMaybe "-" (optOutputFile opts) + let filters = optFilters opts + let verbosity = optVerbosity opts + + when (optDumpArgs opts) $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (UTF8.hPutStrLn stdout) (optInputFiles opts) + exitSuccess + + epubMetadata <- case optEpubMetadata opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc" + isPandocCiteproc _ = False + -- --bibliography implies -F pandoc-citeproc for backwards compatibility: + let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && + optCiteMethod opts `notElem` [Natbib, Biblatex] && + all (not . isPandocCiteproc) filters + let filters' = if needsCiteproc then JSONFilter "pandoc-citeproc" : filters + else filters + + let sources = case optInputFiles opts of + [] -> ["-"] + xs | optIgnoreArgs opts -> ["-"] + | otherwise -> xs + + datadir <- case optDataDir opts of + Nothing -> E.catch + (Just <$> getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return $ optDataDir opts + + -- assign reader and writer based on options and filenames + let readerName = fromMaybe ( defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources) (optReader opts) + + let nonPdfWriterName Nothing = defaultWriterName outputFile + nonPdfWriterName (Just x) = x + + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + (writerName, maybePdfProg) <- + if pdfOutput + then pdfWriterAndProg (optWriter opts) (optPdfEngine opts) + else return (nonPdfWriterName $ optWriter opts, Nothing) + + let format = baseWriterName + $ takeFileName writerName -- in case path to lua script + + -- disabling the custom writer for now + (writer, writerExts) <- + if ".lua" `isSuffixOf` format + -- note: use non-lowercased version writerName + then return (TextWriter + (\o d -> writeCustom writerName o d) + :: Writer PandocIO, mempty) + else case getWriter writerName of + Left e -> E.throwIO $ PandocAppError $ + if format == "pdf" + then e ++ + "\nTo create a pdf using pandoc, use " ++ + "-t latex|beamer|context|ms|html5" ++ + "\nand specify an output file with " ++ + ".pdf extension (-o filename.pdf)." + else e + Right (w, es) -> return (w :: Writer PandocIO, es) + + -- TODO: we have to get the input and the output into the state for + -- the sake of the text2tags reader. + (reader, readerExts) <- + case getReader readerName of + Right (r, es) -> return (r :: Reader PandocIO, es) + Left e -> E.throwIO $ PandocAppError e' + where e' = case readerName of + "pdf" -> e ++ + "\nPandoc can convert to PDF, but not from PDF." + "doc" -> e ++ + "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." + _ -> e + + let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput + let addStringAsVariable varname s vars = return $ (varname, s) : vars + + highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts + let addSyntaxMap existingmap f = do + res <- parseSyntaxDefinition f + case res of + Left errstr -> E.throwIO $ PandocSyntaxMapError errstr + Right syn -> return $ addSyntaxDefinition syn existingmap + + syntaxMap <- foldM addSyntaxMap defaultSyntaxMap + (optSyntaxDefinitions opts) + + -- We don't want to send output to the terminal if the user + -- does 'pandoc -t docx input.txt'; though we allow them to + -- force this with '-o -'. On posix systems, we detect + -- when stdout is being piped and allow output to stdout + -- in that case, but on Windows we can't. +#ifdef _WINDOWS + let istty = True +#else + istty <- queryTerminal stdOutput +#endif + when (not (isTextFormat format) && istty && isNothing ( optOutputFile opts)) $ + E.throwIO $ PandocAppError $ + "Cannot write " ++ format ++ " output to terminal.\n" ++ + "Specify an output file using the -o option, or " ++ + "use '-o -' to force output to stdout." + + let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" + then 0 + else optTabStop opts) + + readSources :: [FilePath] -> PandocIO Text + readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> + mapM readSource srcs + + let runIO' :: PandocIO a -> IO a + runIO' f = do + (res, reports) <- runIOorExplode $ do + setTrace (optTrace opts) + setVerbosity verbosity + x <- f + rs <- getLog + return (x, rs) + case optLogFile opts of + Nothing -> return () + Just logfile -> B.writeFile logfile (encodeLogMessages reports) + let isWarning msg = messageVerbosity msg == WARNING + when (optFailIfWarnings opts && any isWarning reports) $ + E.throwIO PandocFailOnWarningError + return res + + let eol = case optEol opts of + CRLF -> IO.CRLF + LF -> IO.LF + Native -> nativeNewline + + -- note: this reverses the list constructed in option parsing, + -- which in turn was reversed from the command-line order, + -- so we end up with the correct order in the variable list: + let withList _ [] vars = return vars + withList f (x:xs) vars = f x vars >>= withList f xs + + let addContentsAsVariable varname fp vars = do + s <- UTF8.toString <$> readFileStrict fp + return $ (varname, s) : vars + + runIO' $ do + setUserDataDir datadir + setInputFiles (optInputFiles opts) + setOutputFile (optOutputFile opts) + + variables <- + withList (addStringAsVariable "sourcefile") + (reverse $ optInputFiles opts) + (("outputfile", fromMaybe "-" (optOutputFile opts)) + : optVariables opts) + -- we reverse this list because, unlike + -- the other option lists here, it is + -- not reversed when parsed from CLI arguments. + -- See withList, above. + >>= + withList (addContentsAsVariable "include-before") + (optIncludeBeforeBody opts) + >>= + withList (addContentsAsVariable "include-after") + (optIncludeAfterBody opts) + >>= + withList (addContentsAsVariable "header-includes") + (optIncludeInHeader opts) + >>= + withList (addStringAsVariable "css") (optCss opts) + >>= + maybe return (addStringAsVariable "title-prefix") + (optTitlePrefix opts) + >>= + maybe return (addStringAsVariable "epub-cover-image") + (optEpubCoverImage opts) + >>= + (\vars -> case optHTMLMathMethod opts of + LaTeXMathML Nothing -> do + s <- UTF8.toString <$> readDataFile "LaTeXMathML.js" + return $ ("mathml-script", s) : vars + _ -> return vars) + >>= + (\vars -> if format == "dzslides" + then do + dztempl <- UTF8.toString <$> readDataFile + ("dzslides" </> "template.html") + let dzline = "<!-- {{{{ dzslides core" + let dzcore = unlines + $ dropWhile (not . (dzline `isPrefixOf`)) + $ lines dztempl + return $ ("dzslides-core", dzcore) : vars + else return vars) + + abbrevs <- (Set.fromList . filter (not . null) . lines) <$> + case optAbbreviations opts of + Nothing -> UTF8.toString <$> readDataFile "abbreviations" + Just f -> UTF8.toString <$> readFileStrict f + + templ <- case optTemplate opts of + _ | not standalone -> return Nothing + Nothing -> Just <$> getDefaultTemplate format + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + Just . UTF8.toString <$> + (readFileStrict tp' `catchError` + (\e -> + case e of + PandocIOError _ e' | + isDoesNotExistError e' -> + readDataFile ("templates" </> tp') + _ -> throwError e)) + + metadata <- if format == "jats" && + isNothing (lookup "csl" (optMetadata opts)) && + isNothing (lookup "citation-style" (optMetadata opts)) + then do + jatsCSL <- readDataFile "jats.csl" + let jatsEncoded = makeDataURI + ("application/xml", jatsCSL) + return $ ("csl", jatsEncoded) : optMetadata opts + else return $ optMetadata opts + + case lookup "lang" (optMetadata opts) of + Just l -> case parseBCP47 l of + Left _ -> return () + Right l' -> setTranslations l' + Nothing -> setTranslations $ Lang "en" "" "US" [] + + let writerOptions = def { + writerTemplate = templ + , writerVariables = variables + , writerTabStop = optTabStop opts + , writerTableOfContents = optTableOfContents opts + , writerHTMLMathMethod = optHTMLMathMethod opts + , writerIncremental = optIncremental opts + , writerCiteMethod = optCiteMethod opts + , writerNumberSections = optNumberSections opts + , writerNumberOffset = optNumberOffset opts + , writerSectionDivs = optSectionDivs opts + , writerExtensions = writerExts + , writerReferenceLinks = optReferenceLinks opts + , writerReferenceLocation = optReferenceLocation opts + , writerDpi = optDpi opts + , writerWrapText = optWrapText opts + , writerColumns = optColumns opts + , writerEmailObfuscation = optEmailObfuscation opts + , writerIdentifierPrefix = optIdentifierPrefix opts + , writerHtmlQTags = optHtmlQTags opts + , writerTopLevelDivision = optTopLevelDivision opts + , writerListings = optListings opts + , writerSlideLevel = optSlideLevel opts + , writerHighlightStyle = highlightStyle + , writerSetextHeaders = optSetextHeaders opts + , writerEpubSubdirectory = optEpubSubdirectory opts + , writerEpubMetadata = epubMetadata + , writerEpubFonts = optEpubFonts opts + , writerEpubChapterLevel = optEpubChapterLevel opts + , writerTOCDepth = optTOCDepth opts + , writerReferenceDoc = optReferenceDoc opts + , writerSyntaxMap = syntaxMap + } + + let readerOpts = def{ + readerStandalone = standalone + , readerColumns = optColumns opts + , readerTabStop = optTabStop opts + , readerIndentedCodeClasses = optIndentedCodeClasses opts + , readerDefaultImageExtension = + optDefaultImageExtension opts + , readerTrackChanges = optTrackChanges opts + , readerAbbreviations = abbrevs + , readerExtensions = readerExts + , readerStripComments = optStripComments opts + } + + let transforms = (case optBaseHeaderLevel opts of + x | x > 1 -> (headerShift (x - 1) :) + | otherwise -> id) . + (if optStripEmptyParagraphs opts + then (stripEmptyParagraphs :) + else id) . + (if extensionEnabled Ext_east_asian_line_breaks + readerExts && + not (extensionEnabled Ext_east_asian_line_breaks + writerExts && + writerWrapText writerOptions == WrapPreserve) + then (eastAsianLineBreakFilter :) + else id) $ + [] + + let sourceToDoc :: [FilePath] -> PandocIO Pandoc + sourceToDoc sources' = + case reader of + TextReader r + | optFileScope opts || readerName == "json" -> + mconcat <$> mapM (readSource >=> r readerOpts) sources + | otherwise -> + readSources sources' >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (readFile' >=> r readerOpts) sources + + + when (readerName == "markdown_github" || + writerName == "markdown_github") $ + report $ Deprecated "markdown_github" "Use gfm instead." + + setResourcePath (optResourcePath opts) + mapM_ (uncurry setRequestHeader) (optRequestHeaders opts) + + doc <- sourceToDoc sources >>= + ( (if isJust (optExtractMedia opts) + then fillMediaBag + else return) + >=> return . addMetadata metadata + >=> applyTransforms transforms + >=> applyFilters readerOpts filters' [format] + >=> maybe return extractMedia (optExtractMedia opts) + ) + + case writer of + ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile + TextWriter f -> case maybePdfProg of + Just pdfProg -> do + res <- makePDF pdfProg (optPdfEngineArgs opts) f + writerOptions doc + case res of + Right pdf -> writeFnBinary outputFile pdf + Left err' -> liftIO $ + E.throwIO $ PandocPDFError $ + TL.unpack (TE.decodeUtf8With TE.lenientDecode err') + + Nothing -> do + let htmlFormat = format `elem` + ["html","html4","html5","s5","slidy", + "slideous","dzslides","revealjs"] + handleEntities = if (htmlFormat || + format == "docbook4" || + format == "docbook5" || + format == "docbook") && optAscii opts + then toEntities + else id + addNl = if standalone + then id + else (<> T.singleton '\n') + output <- (addNl . handleEntities) <$> f writerOptions doc + writerFn eol outputFile =<< + if optSelfContained opts && htmlFormat + -- TODO not maximally efficient; change type + -- of makeSelfContained so it works w/ Text + then T.pack <$> makeSelfContained (T.unpack output) + else return output + +type Transform = Pandoc -> Pandoc + +isTextFormat :: String -> Bool +isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"] + +-- | Data structure for command line options. +data Opt = Opt + { optTabStop :: Int -- ^ Number of spaces per tab + , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: Maybe String -- ^ Reader format + , optWriter :: Maybe String -- ^ Writer format + , optTableOfContents :: Bool -- ^ Include table of contents + , optBaseHeaderLevel :: Int -- ^ Base header level + , optTemplate :: Maybe FilePath -- ^ Custom template + , optVariables :: [(String,String)] -- ^ Template variables to set + , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optOutputFile :: Maybe FilePath -- ^ Name of output file + , optInputFiles :: [FilePath] -- ^ Names of input files + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optNumberOffset :: [Int] -- ^ Starting number for sections + , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 + , optSelfContained :: Bool -- ^ Make HTML accessible offline + , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML + , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code + , optSyntaxDefinitions :: [FilePath] -- ^ xml syntax defs to load + , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions + , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file + , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc + , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container + , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata + , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed + , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters + , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub + , optTOCDepth :: Int -- ^ Number of levels to include in TOC + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output + , optTrace :: Bool -- ^ Enable tracing + , optLogFile :: Maybe FilePath -- ^ File to write JSON log output + , optFailIfWarnings :: Bool -- ^ Fail on warnings + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output + , optDpi :: Int -- ^ Dpi + , optWrapText :: WrapOption -- ^ Options for wrapping text + , optColumns :: Int -- ^ Line length in characters + , optFilters :: [Filter] -- ^ Filters to apply + , optEmailObfuscation :: ObfuscationMethod + , optIdentifierPrefix :: String + , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs + , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optDataDir :: Maybe FilePath + , optCiteMethod :: CiteMethod -- ^ Method to output cites + , optListings :: Bool -- ^ Use listings package for code blocks + , optPdfEngine :: Maybe String -- ^ Program to use for latex/html -> pdf + , optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine + , optSlideLevel :: Maybe Int -- ^ Header level that creates slides + , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optAscii :: Bool -- ^ Use ascii characters only in html + , optDefaultImageExtension :: String -- ^ Default image extension + , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media + , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. + , optFileScope :: Bool -- ^ Parse input files before combining + , optTitlePrefix :: Maybe String -- ^ Prefix for title + , optCss :: [FilePath] -- ^ CSS files to link to + , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before + , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body + , optIncludeInHeader :: [FilePath] -- ^ Files to include in header + , optResourcePath :: [FilePath] -- ^ Path to search for images etc + , optRequestHeaders :: [(String, String)] -- ^ Headers for HTTP requests + , optEol :: LineEnding -- ^ Style of line-endings to use + , optStripComments :: Bool -- ^ Skip HTML comments + } deriving (Generic, Show) + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optTabStop = 4 + , optPreserveTabs = False + , optStandalone = False + , optReader = Nothing + , optWriter = Nothing + , optTableOfContents = False + , optBaseHeaderLevel = 1 + , optTemplate = Nothing + , optVariables = [] + , optMetadata = [] + , optOutputFile = Nothing + , optInputFiles = [] + , optNumberSections = False + , optNumberOffset = [0,0,0,0,0,0] + , optSectionDivs = False + , optIncremental = False + , optSelfContained = False + , optHtmlQTags = False + , optHighlightStyle = Just "pygments" + , optSyntaxDefinitions = [] + , optTopLevelDivision = TopLevelDefault + , optHTMLMathMethod = PlainMath + , optAbbreviations = Nothing + , optReferenceDoc = Nothing + , optEpubSubdirectory = "EPUB" + , optEpubMetadata = Nothing + , optEpubFonts = [] + , optEpubChapterLevel = 1 + , optEpubCoverImage = Nothing + , optTOCDepth = 3 + , optDumpArgs = False + , optIgnoreArgs = False + , optVerbosity = WARNING + , optTrace = False + , optLogFile = Nothing + , optFailIfWarnings = False + , optReferenceLinks = False + , optReferenceLocation = EndOfDocument + , optDpi = 96 + , optWrapText = WrapAuto + , optColumns = 72 + , optFilters = [] + , optEmailObfuscation = NoObfuscation + , optIdentifierPrefix = "" + , optStripEmptyParagraphs = False + , optIndentedCodeClasses = [] + , optDataDir = Nothing + , optCiteMethod = Citeproc + , optListings = False + , optPdfEngine = Nothing + , optPdfEngineArgs = [] + , optSlideLevel = Nothing + , optSetextHeaders = True + , optAscii = False + , optDefaultImageExtension = "" + , optExtractMedia = Nothing + , optTrackChanges = AcceptChanges + , optFileScope = False + , optTitlePrefix = Nothing + , optCss = [] + , optIncludeBeforeBody = [] + , optIncludeAfterBody = [] + , optIncludeInHeader = [] + , optResourcePath = ["."] + , optRequestHeaders = [] + , optEol = Native + , optStripComments = False + } + +addMetadata :: [(String, String)] -> Pandoc -> Pandoc +addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs + +addMeta :: (String, String) -> Pandoc -> Pandoc +addMeta (k, v) (Pandoc meta bs) = Pandoc meta' bs + where meta' = case lookupMeta k meta of + Nothing -> setMeta k v' meta + Just (MetaList xs) -> + setMeta k (MetaList (xs ++ [v'])) meta + Just x -> setMeta k (MetaList [x, v']) meta + v' = readMetaValue v + +removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc +removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs + +readMetaValue :: String -> MetaValue +readMetaValue s = case decode (UTF8.fromString s) of + Just (Yaml.String t) -> MetaString $ T.unpack t + Just (Yaml.Bool b) -> MetaBool b + _ -> MetaString s + +-- Determine default reader based on source file extensions +defaultReaderName :: String -> [FilePath] -> String +defaultReaderName fallback [] = fallback +defaultReaderName fallback (x:xs) = + case takeExtension (map toLower x) of + ".xhtml" -> "html" + ".html" -> "html" + ".htm" -> "html" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".muse" -> "muse" + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".rst" -> "rst" + ".org" -> "org" + ".lhs" -> "markdown+lhs" + ".db" -> "docbook" + ".opml" -> "opml" + ".wiki" -> "mediawiki" + ".dokuwiki" -> "dokuwiki" + ".textile" -> "textile" + ".native" -> "native" + ".json" -> "json" + ".docx" -> "docx" + ".t2t" -> "t2t" + ".epub" -> "epub" + ".odt" -> "odt" + ".pdf" -> "pdf" -- so we get an "unknown reader" error + ".doc" -> "doc" -- so we get an "unknown reader" error + _ -> defaultReaderName fallback xs + +-- Determine default writer based on output file extension +defaultWriterName :: FilePath -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + case takeExtension (map toLower x) of + "" -> "markdown" -- empty extension + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".context" -> "context" + ".ctx" -> "context" + ".rtf" -> "rtf" + ".rst" -> "rst" + ".s5" -> "s5" + ".native" -> "native" + ".json" -> "json" + ".txt" -> "markdown" + ".text" -> "markdown" + ".md" -> "markdown" + ".muse" -> "muse" + ".markdown" -> "markdown" + ".textile" -> "textile" + ".lhs" -> "markdown+lhs" + ".texi" -> "texinfo" + ".texinfo" -> "texinfo" + ".db" -> "docbook" + ".odt" -> "odt" + ".docx" -> "docx" + ".epub" -> "epub" + ".org" -> "org" + ".asciidoc" -> "asciidoc" + ".adoc" -> "asciidoc" + ".fb2" -> "fb2" + ".opml" -> "opml" + ".icml" -> "icml" + ".tei.xml" -> "tei" + ".tei" -> "tei" + ".ms" -> "ms" + ".roff" -> "ms" + ".pptx" -> "pptx" + ['.',y] | y `elem` ['1'..'9'] -> "man" + _ -> "html" + +-- Transformations of a Pandoc document post-parsing: + +applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc +applyTransforms transforms d = return $ foldr ($) d transforms + +readSource :: FilePath -> PandocIO Text +readSource "-" = liftIO (UTF8.toText <$> BS.getContents) +readSource src = case parseURI src of + Just u | uriScheme u `elem` ["http:","https:"] -> + readURI src + | uriScheme u == "file:" -> + liftIO $ UTF8.toText <$> + BS.readFile (uriPath u) + _ -> liftIO $ UTF8.toText <$> + BS.readFile src + +readURI :: FilePath -> PandocIO Text +readURI src = UTF8.toText . fst <$> openURL src + +readFile' :: MonadIO m => FilePath -> m B.ByteString +readFile' "-" = liftIO B.getContents +readFile' f = liftIO $ B.readFile f + +writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () +writeFnBinary "-" = liftIO . B.putStr +writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) + +writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () +-- TODO this implementation isn't maximally efficient: +writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack +writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack + +lookupHighlightStyle :: Maybe String -> IO (Maybe Style) +lookupHighlightStyle Nothing = return Nothing +lookupHighlightStyle (Just s) + | takeExtension s == ".theme" = -- attempt to load KDE theme + do contents <- B.readFile s + case parseTheme contents of + Left _ -> E.throwIO $ PandocOptionError $ + "Could not read highlighting theme " ++ s + Right sty -> return (Just sty) + | otherwise = + case lookup (map toLower s) highlightingStyles of + Just sty -> return (Just sty) + Nothing -> E.throwIO $ PandocOptionError $ + "Unknown highlight-style " ++ s + +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = + [ Option "fr" ["from","read"] + (ReqArg + (\arg opt -> return opt { optReader = + Just (map toLower arg) }) + "FORMAT") + "" + + , Option "tw" ["to","write"] + (ReqArg + (\arg opt -> return opt { optWriter = + Just (map toLower arg) }) + "FORMAT") + "" + + , Option "o" ["output"] + (ReqArg + (\arg opt -> return opt { optOutputFile = Just arg }) + "FILE") + "" -- "Name of output file" + + , Option "" ["data-dir"] + (ReqArg + (\arg opt -> return opt { optDataDir = Just arg }) + "DIRECTORY") -- "Directory containing pandoc data files." + "" + + , Option "" ["base-header-level"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 && t < 6 -> + return opt{ optBaseHeaderLevel = t } + _ -> E.throwIO $ PandocOptionError + "base-header-level must be 1-5") + "NUMBER") + "" -- "Headers base level" + + , Option "" ["strip-empty-paragraphs"] + (NoArg + (\opt -> do + deprecatedOption "--stripEmptyParagraphs" + "Use +empty_paragraphs extension." + return opt{ optStripEmptyParagraphs = True })) + "" -- "Strip empty paragraphs" + + , Option "" ["indented-code-classes"] + (ReqArg + (\arg opt -> return opt { optIndentedCodeClasses = words $ + map (\c -> if c == ',' then ' ' else c) arg }) + "STRING") + "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" + + , Option "F" ["filter"] + (ReqArg + (\arg opt -> return opt { optFilters = + JSONFilter arg : optFilters opt }) + "PROGRAM") + "" -- "External JSON filter" + + , Option "" ["lua-filter"] + (ReqArg + (\arg opt -> return opt { optFilters = + LuaFilter arg : optFilters opt }) + "SCRIPTPATH") + "" -- "Lua filter" + + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> return opt { optTabStop = t } + _ -> E.throwIO $ PandocOptionError + "tab-stop must be a number greater than 0") + "NUMBER") + "" -- "Tab stop (default 4)" + + , Option "" ["track-changes"] + (ReqArg + (\arg opt -> do + action <- case arg of + "accept" -> return AcceptChanges + "reject" -> return RejectChanges + "all" -> return AllChanges + _ -> E.throwIO $ PandocOptionError + ("Unknown option for track-changes: " ++ arg) + return opt { optTrackChanges = action }) + "accept|reject|all") + "" -- "Accepting or reject MS Word track-changes."" + + , Option "" ["file-scope"] + (NoArg + (\opt -> return opt { optFileScope = True })) + "" -- "Parse input files before combining" + + , Option "" ["extract-media"] + (ReqArg + (\arg opt -> + return opt { optExtractMedia = Just arg }) + "PATH") + "" -- "Directory to which to extract embedded media" + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" + + , Option "" ["template"] + (ReqArg + (\arg opt -> + return opt{ optTemplate = Just arg, + optStandalone = True }) + "FILE") + "" -- "Use custom template" + + , Option "M" ["metadata"] + (ReqArg + (\arg opt -> do + let (key, val) = splitField arg + return opt{ optMetadata = (key, val) : optMetadata opt }) + "KEY[:VALUE]") + "" + + , Option "V" ["variable"] + (ReqArg + (\arg opt -> do + let (key, val) = splitField arg + return opt{ optVariables = (key, val) : optVariables opt }) + "KEY[:VALUE]") + "" + + , Option "D" ["print-default-template"] + (ReqArg + (\arg _ -> do + templ <- runIO $ do + setUserDataDir Nothing + getDefaultTemplate arg + case templ of + Right t -> UTF8.hPutStr stdout t + Left e -> E.throwIO e + exitSuccess) + "FORMAT") + "" -- "Print default template for FORMAT" + + , Option "" ["print-default-data-file"] + (ReqArg + (\arg _ -> do + runIOorExplode $ + readDefaultDataFile arg >>= liftIO . BS.hPutStr stdout + exitSuccess) + "FILE") + "" -- "Print default data file" + + , Option "" ["print-highlight-style"] + (ReqArg + (\arg _ -> do + sty <- fromMaybe pygments <$> + lookupHighlightStyle (Just arg) + B.putStr $ encodePretty' + defConfig{confIndent = Spaces 4 + ,confCompare = keyOrder + (map T.pack + ["text-color" + ,"background-color" + ,"line-number-color" + ,"line-number-background-color" + ,"bold" + ,"italic" + ,"underline" + ,"text-styles"]) + ,confNumFormat = Generic + ,confTrailingNewline = True} sty + exitSuccess) + "STYLE|FILE") + "" -- "Print default template for FORMAT" + + , Option "" ["dpi"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> return opt { optDpi = t } + _ -> E.throwIO $ PandocOptionError + "dpi must be a number greater than 0") + "NUMBER") + "" -- "Dpi (default 96)" + + , Option "" ["eol"] + (ReqArg + (\arg opt -> + case toLower <$> arg of + "crlf" -> return opt { optEol = CRLF } + "lf" -> return opt { optEol = LF } + "native" -> return opt { optEol = Native } + -- mac-syntax (cr) is not supported in ghc-base. + _ -> E.throwIO $ PandocOptionError + "--eol must be crlf, lf, or native") + "crlf|lf|native") + "" -- "EOL (default OS-dependent)" + + , Option "" ["wrap"] + (ReqArg + (\arg opt -> + case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of + Just o -> return opt { optWrapText = o } + Nothing -> E.throwIO $ PandocOptionError + "--wrap must be auto, none, or preserve") + "auto|none|preserve") + "" -- "Option for wrapping text in output" + + , Option "" ["columns"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> return opt { optColumns = t } + _ -> E.throwIO $ PandocOptionError + "columns must be a number greater than 0") + "NUMBER") + "" -- "Length of line in characters" + + , Option "" ["strip-comments"] + (NoArg + (\opt -> return opt { optStripComments = True })) + "" -- "Strip HTML comments" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "" ["toc-depth"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optTOCDepth = t } + _ -> E.throwIO $ PandocOptionError + "TOC level must be a number between 1 and 6") + "NUMBER") + "" -- "Number of levels to include in TOC" + + , Option "" ["no-highlight"] + (NoArg + (\opt -> return opt { optHighlightStyle = Nothing })) + "" -- "Don't highlight source code" + + , Option "" ["highlight-style"] + (ReqArg + (\arg opt -> return opt{ optHighlightStyle = Just arg }) + "STYLE|FILE") + "" -- "Style for highlighted code" + + , Option "" ["syntax-definition"] + (ReqArg + (\arg opt -> return opt{ optSyntaxDefinitions = arg : + optSyntaxDefinitions opt }) + "FILE") + "" -- "Syntax definition (xml) file" + + , Option "H" ["include-in-header"] + (ReqArg + (\arg opt -> return opt{ optIncludeInHeader = + arg : optIncludeInHeader opt, + optStandalone = True }) + "FILE") + "" -- "File to include at end of header (implies -s)" + + , Option "B" ["include-before-body"] + (ReqArg + (\arg opt -> return opt{ optIncludeBeforeBody = + arg : optIncludeBeforeBody opt, + optStandalone = True }) + "FILE") + "" -- "File to include before document body" + + , Option "A" ["include-after-body"] + (ReqArg + (\arg opt -> return opt{ optIncludeAfterBody = + arg : optIncludeAfterBody opt, + optStandalone = True }) + "FILE") + "" -- "File to include after document body" + + , Option "" ["resource-path"] + (ReqArg + (\arg opt -> return opt { optResourcePath = + splitSearchPath arg }) + "SEARCHPATH") + "" -- "Paths to search for images and other resources" + + , Option "" ["request-header"] + (ReqArg + (\arg opt -> do + let (key, val) = splitField arg + return opt{ optRequestHeaders = + (key, val) : optRequestHeaders opt }) + "NAME:VALUE") + "" + + , Option "" ["self-contained"] + (NoArg + (\opt -> return opt { optSelfContained = True, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" + + , Option "" ["html-q-tags"] + (NoArg + (\opt -> + return opt { optHtmlQTags = True })) + "" -- "Use <q> tags for quotes in HTML" + + , Option "" ["ascii"] + (NoArg + (\opt -> return opt { optAscii = True })) + "" -- "Use ascii characters only in HTML output" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "" ["reference-location"] + (ReqArg + (\arg opt -> do + action <- case arg of + "block" -> return EndOfBlock + "section" -> return EndOfSection + "document" -> return EndOfDocument + _ -> E.throwIO $ PandocOptionError + ("Unknown option for reference-location: " ++ arg) + return opt { optReferenceLocation = action }) + "block|section|document") + "" -- "Accepting or reject MS Word track-changes."" + + , Option "" ["atx-headers"] + (NoArg + (\opt -> return opt { optSetextHeaders = False } )) + "" -- "Use atx-style headers for markdown" + + , Option "" ["top-level-division"] + (ReqArg + (\arg opt -> do + let tldName = "TopLevel" ++ uppercaseFirstLetter arg + case safeRead tldName of + Just tlDiv -> return opt { optTopLevelDivision = tlDiv } + _ -> E.throwIO $ PandocOptionError + ("Top-level division must be " ++ + "section, chapter, part, or default")) + "section|chapter|part") + "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["number-offset"] + (ReqArg + (\arg opt -> + case safeRead ('[':arg ++ "]") of + Just ns -> return opt { optNumberOffset = ns, + optNumberSections = True } + _ -> E.throwIO $ PandocOptionError + "could not parse number-offset") + "NUMBERS") + "" -- "Starting number for sections, subsections, etc." + + , Option "" ["listings"] + (NoArg + (\opt -> return opt { optListings = True })) + "" -- "Use listings package for LaTeX code blocks" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in Slidy/Slideous/S5" + + , Option "" ["slide-level"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optSlideLevel = Just t } + _ -> E.throwIO $ PandocOptionError + "slide level must be a number between 1 and 6") + "NUMBER") + "" -- "Force header level for slides" + + , Option "" ["section-divs"] + (NoArg + (\opt -> return opt { optSectionDivs = True })) + "" -- "Put sections in div tags in HTML" + + , Option "" ["default-image-extension"] + (ReqArg + (\arg opt -> return opt { optDefaultImageExtension = arg }) + "extension") + "" -- "Default extension for extensionless images" + + , Option "" ["email-obfuscation"] + (ReqArg + (\arg opt -> do + method <- case arg of + "references" -> return ReferenceObfuscation + "javascript" -> return JavascriptObfuscation + "none" -> return NoObfuscation + _ -> E.throwIO $ PandocOptionError + ("Unknown obfuscation method: " ++ arg) + return opt { optEmailObfuscation = method }) + "none|javascript|references") + "" -- "Method for obfuscating email in HTML" + + , Option "" ["id-prefix"] + (ReqArg + (\arg opt -> return opt { optIdentifierPrefix = arg }) + "STRING") + "" -- "Prefix to add to automatically generated HTML identifiers" + + , Option "T" ["title-prefix"] + (ReqArg + (\arg opt -> do + let newvars = ("title-prefix", arg) : optVariables opt + return opt { optVariables = newvars, + optStandalone = True }) + "STRING") + "" -- "String to prefix to HTML window title" + + , Option "c" ["css"] + (ReqArg + (\arg opt -> return opt{ optCss = arg : optCss opt }) + -- add new link to end, so it is included in proper order + "URL") + "" -- "Link to CSS style sheet" + + , Option "" ["reference-doc"] + (ReqArg + (\arg opt -> + return opt { optReferenceDoc = Just arg }) + "FILE") + "" -- "Path of custom reference doc" + + , Option "" ["epub-subdirectory"] + (ReqArg + (\arg opt -> + return opt { optEpubSubdirectory = arg }) + "DIRNAME") + "" -- "Name of subdirectory for epub content in OCF container" + + , Option "" ["epub-cover-image"] + (ReqArg + (\arg opt -> + return opt { optVariables = + ("epub-cover-image", arg) : optVariables opt }) + "FILE") + "" -- "Path of epub cover image" + + , Option "" ["epub-metadata"] + (ReqArg + (\arg opt -> return opt { optEpubMetadata = Just arg }) + "FILE") + "" -- "Path of epub metadata file" + + , Option "" ["epub-embed-font"] + (ReqArg + (\arg opt -> + return opt{ optEpubFonts = arg : optEpubFonts opt }) + "FILE") + "" -- "Directory of fonts to embed" + + , Option "" ["epub-chapter-level"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optEpubChapterLevel = t } + _ -> E.throwIO $ PandocOptionError + "chapter level must be a number between 1 and 6") + "NUMBER") + "" -- "Header level at which to split chapters in EPUB" + + , Option "" ["pdf-engine"] + (ReqArg + (\arg opt -> do + let b = takeBaseName arg + if b `elem` pdfEngines + then return opt { optPdfEngine = Just arg } + else E.throwIO $ PandocOptionError $ "pdf-engine must be one of " + ++ intercalate ", " pdfEngines) + "PROGRAM") + "" -- "Name of program to use in generating PDF" + + , Option "" ["pdf-engine-opt"] + (ReqArg + (\arg opt -> do + let oldArgs = optPdfEngineArgs opt + return opt { optPdfEngineArgs = oldArgs ++ [arg]}) + "STRING") + "" -- "Flags to pass to the PDF-engine, all instances of this option are accumulated and used" + + , Option "" ["bibliography"] + (ReqArg + (\arg opt -> return opt{ optMetadata = + ("bibliography", arg) : optMetadata opt }) + "FILE") + "" + + , Option "" ["csl"] + (ReqArg + (\arg opt -> + return opt{ optMetadata = + ("csl", arg) : optMetadata opt }) + "FILE") + "" + + , Option "" ["citation-abbreviations"] + (ReqArg + (\arg opt -> + return opt{ optMetadata = + ("citation-abbreviations", arg): optMetadata opt }) + "FILE") + "" + + , Option "" ["natbib"] + (NoArg + (\opt -> return opt { optCiteMethod = Natbib })) + "" -- "Use natbib cite commands in LaTeX output" + + , Option "" ["biblatex"] + (NoArg + (\opt -> return opt { optCiteMethod = Biblatex })) + "" -- "Use biblatex cite commands in LaTeX output" + + , Option "" ["mathml"] + (NoArg + (\opt -> + return opt { optHTMLMathMethod = MathML })) + "" -- "Use mathml for HTML math" + + , Option "" ["webtex"] + (OptArg + (\arg opt -> do + let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use web service for HTML math" + + , Option "" ["mathjax"] + (OptArg + (\arg opt -> do + let url' = fromMaybe (defaultMathJaxURL ++ + "MathJax.js?config=TeX-AMS_CHTML-full") arg + return opt { optHTMLMathMethod = MathJax url'}) + "URL") + "" -- "Use MathJax for HTML math" + + , Option "" ["katex"] + (OptArg + (\arg opt -> + return opt + { optHTMLMathMethod = KaTeX $ + fromMaybe defaultKaTeXURL arg }) + "URL") + "" -- Use KaTeX for HTML Math + + , Option "m" ["latexmathml", "asciimathml"] + (OptArg + (\arg opt -> do + deprecatedOption "--latexmathml, --asciimathml, -m" "" + return opt { optHTMLMathMethod = LaTeXMathML arg }) + "URL") + "" -- "Use LaTeXMathML script in html output" + + , Option "" ["mimetex"] + (OptArg + (\arg opt -> do + deprecatedOption "--mimetex" "" + let url' = case arg of + Just u -> u ++ "?" + Nothing -> "/cgi-bin/mimetex.cgi?" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use mimetex for HTML math" + + , Option "" ["jsmath"] + (OptArg + (\arg opt -> do + deprecatedOption "--jsmath" "" + return opt { optHTMLMathMethod = JsMath arg}) + "URL") + "" -- "Use jsMath for HTML math" + + , Option "" ["gladtex"] + (NoArg + (\opt -> do + deprecatedOption "--gladtex" "" + return opt { optHTMLMathMethod = GladTeX })) + "" -- "Use gladtex for HTML math" + + , Option "" ["abbreviations"] + (ReqArg + (\arg opt -> return opt { optAbbreviations = Just arg }) + "FILE") + "" -- "Specify file for custom abbreviations" + + , Option "" ["trace"] + (NoArg + (\opt -> return opt { optTrace = True })) + "" -- "Turn on diagnostic tracing in readers." + + , Option "" ["dump-args"] + (NoArg + (\opt -> return opt { optDumpArgs = True })) + "" -- "Print output filename and arguments to stdout." + + , Option "" ["ignore-args"] + (NoArg + (\opt -> return opt { optIgnoreArgs = True })) + "" -- "Ignore command-line arguments." + + , Option "" ["verbose"] + (NoArg + (\opt -> return opt { optVerbosity = INFO })) + "" -- "Verbose diagnostic output." + + , Option "" ["quiet"] + (NoArg + (\opt -> return opt { optVerbosity = ERROR })) + "" -- "Suppress warnings." + + , Option "" ["fail-if-warnings"] + (NoArg + (\opt -> return opt { optFailIfWarnings = True })) + "" -- "Exit with error status if there were warnings." + + , Option "" ["log"] + (ReqArg + (\arg opt -> return opt{ optLogFile = Just arg }) + "FILE") + "" -- "Log messages in JSON format to this file." + + , Option "" ["bash-completion"] + (NoArg + (\_ -> do + ddir <- getDataDir + tpl <- runIOorExplode $ + UTF8.toString <$> + readDefaultDataFile "bash_completion.tpl" + let optnames (Option shorts longs _ _) = + map (\c -> ['-',c]) shorts ++ + map ("--" ++) longs + let allopts = unwords (concatMap optnames options) + UTF8.hPutStrLn stdout $ printf tpl allopts + (unwords readersNames) + (unwords writersNames) + (unwords $ map fst highlightingStyles) + ddir + exitSuccess )) + "" -- "Print bash completion script" + + , Option "" ["list-input-formats"] + (NoArg + (\_ -> do + mapM_ (UTF8.hPutStrLn stdout) readersNames + exitSuccess )) + "" + + , Option "" ["list-output-formats"] + (NoArg + (\_ -> do + mapM_ (UTF8.hPutStrLn stdout) writersNames + exitSuccess )) + "" + + , Option "" ["list-extensions"] + (OptArg + (\arg _ -> do + let exts = getDefaultExtensions (fromMaybe "markdown" arg) + let showExt x = (if extensionEnabled x exts + then '+' + else '-') : drop 4 (show x) + mapM_ (UTF8.hPutStrLn stdout . showExt) + ([minBound..maxBound] :: [Extension]) + exitSuccess ) + "FORMAT") + "" + + , Option "" ["list-highlight-languages"] + (NoArg + (\_ -> do + let langs = [ T.unpack (T.toLower (sShortname s)) + | s <- M.elems defaultSyntaxMap + , sShortname s `notElem` + [T.pack "Alert", T.pack "Alert_indent"] + ] + mapM_ (UTF8.hPutStrLn stdout) langs + exitSuccess )) + "" + + , Option "" ["list-highlight-styles"] + (NoArg + (\_ -> do + mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles + exitSuccess )) + "" + + , Option "v" ["version"] + (NoArg + (\_ -> do + prg <- getProgName + defaultDatadir <- E.catch + (getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return "") + UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ + compileInfo ++ "\nDefault user data directory: " ++ + defaultDatadir ++ copyrightMessage) + exitSuccess )) + "" -- "Print version" + + , Option "h" ["help"] + (NoArg + (\_ -> do + prg <- getProgName + UTF8.hPutStr stdout (usageMessage prg options) + exitSuccess )) + "" -- "Show help" + + ] + +-- Returns usage message +usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]") + +copyrightMessage :: String +copyrightMessage = intercalate "\n" [ + "", + "Copyright (C) 2006-2018 John MacFarlane", + "Web: http://pandoc.org", + "This is free software; see the source for copying conditions.", + "There is no warranty, not even for merchantability or fitness", + "for a particular purpose." ] + +compileInfo :: String +compileInfo = + "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ + VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting + +handleUnrecognizedOption :: String -> [String] -> [String] +handleUnrecognizedOption "--smart" = + (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++ + "For example: pandoc -f markdown+smart -t markdown-smart.") :) +handleUnrecognizedOption "--normalize" = + ("--normalize has been removed. Normalization is now automatic." :) +handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart" +handleUnrecognizedOption "--old-dashes" = + ("--old-dashes has been removed. Use +old_dashes extension instead." :) +handleUnrecognizedOption "--no-wrap" = + ("--no-wrap has been removed. Use --wrap=none instead." :) +handleUnrecognizedOption "--latex-engine" = + ("--latex-engine has been removed. Use --pdf-engine instead." :) +handleUnrecognizedOption "--latex-engine-opt" = + ("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :) +handleUnrecognizedOption "--chapters" = + ("--chapters has been removed. Use --top-level-division=chapter instead." :) +handleUnrecognizedOption "--reference-docx" = + ("--reference-docx has been removed. Use --reference-doc instead." :) +handleUnrecognizedOption "--reference-odt" = + ("--reference-odt has been removed. Use --reference-doc instead." :) +handleUnrecognizedOption "--parse-raw" = + ("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n" :) +handleUnrecognizedOption "--epub-stylesheet" = + ("--epub-stylesheet has been removed. Use --css instead.\n" :) +handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw" +handleUnrecognizedOption x = + (("Unknown option " ++ x ++ ".") :) + +uppercaseFirstLetter :: String -> String +uppercaseFirstLetter (c:cs) = toUpper c : cs +uppercaseFirstLetter [] = [] + +readersNames :: [String] +readersNames = sort (map fst (readers :: [(String, Reader PandocIO)])) + +writersNames :: [String] +writersNames = sort (map fst (writers :: [(String, Writer PandocIO)])) + +splitField :: String -> (String, String) +splitField s = + case break (`elem` ":=") s of + (k,_:v) -> (k,v) + (k,[]) -> (k,"true") + +baseWriterName :: String -> String +baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') + +deprecatedOption :: String -> String -> IO () +deprecatedOption o msg = + runIO (report $ Deprecated o msg) >>= + \r -> case r of + Right () -> return () + Left e -> E.throwIO e + +-- see https://github.com/jgm/pandoc/pull/4083 +-- using generic deriving caused long compilation times +$(deriveJSON defaultOptions ''LineEnding) +$(deriveJSON defaultOptions ''Opt) diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 8eb1ba663..11d3eddac 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2016 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,8 +30,8 @@ ascii equivalents (used in constructing HTML identifiers). -} module Text.Pandoc.Asciify (toAsciiChar) where -import qualified Data.Map as M import Data.Char (isAscii) +import qualified Data.Map as M toAsciiChar :: Char -> Maybe Char toAsciiChar c | isAscii c = Just c diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs new file mode 100644 index 000000000..2dd825142 --- /dev/null +++ b/src/Text/Pandoc/BCP47.hs @@ -0,0 +1,125 @@ +{- +Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.BCP47 + Copyright : Copyright (C) 2017–2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for parsing and rendering BCP47 language identifiers. +-} +module Text.Pandoc.BCP47 ( + getLang + , parseBCP47 + , Lang(..) + , renderLang + ) +where +import Control.Monad (guard) +import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower, + toUpper) +import Data.List (intercalate) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import qualified Text.Parsec as P + +-- | Represents BCP 47 language/country code. +data Lang = Lang{ langLanguage :: String + , langScript :: String + , langRegion :: String + , langVariants :: [String] } + deriving (Eq, Ord, Show) + +-- | Render a Lang as BCP 47. +renderLang :: Lang -> String +renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) + ([langScript lang, langRegion lang] ++ langVariants lang)) + +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe String +getLang opts meta = + case lookup "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + +-- | Parse a BCP 47 string as a Lang. Currently we parse +-- extensions and private-use fields as "variants," even +-- though officially they aren't. +parseBCP47 :: String -> Either String Lang +parseBCP47 lang = + case P.parse bcp47 "lang" lang of + Right r -> Right r + Left e -> Left $ show e + where bcp47 = do + language <- pLanguage + script <- P.option "" pScript + region <- P.option "" pRegion + variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) + P.eof + return Lang{ langLanguage = language + , langScript = script + , langRegion = region + , langVariants = variants } + asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) + pLanguage = do + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return $ map toLower cs + pScript = P.try $ do + P.char '-' + x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) + xs <- P.count 3 + (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) + return $ map toLower (x:xs) + pRegion = P.try $ do + P.char '-' + cs <- P.many1 asciiLetter + let lcs = length cs + guard $ lcs == 2 || lcs == 3 + return $ map toUpper cs + pVariant = P.try $ do + P.char '-' + ds <- P.option "" (P.count 1 P.digit) + cs <- P.many1 asciiLetter + let var = ds ++ cs + guard $ if null ds + then length var >= 5 && length var <= 8 + else length var == 4 + return $ map toLower var + pExtension = P.try $ do + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ length cs >= 2 && length cs <= 8 + return $ map toLower cs + pPrivateUse = P.try $ do + P.char '-' + P.char 'x' + P.char '-' + cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) + guard $ not (null cs) && length cs <= 8 + let var = "x-" ++ cs + return $ map toLower var diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index f479ed9d0..d44b5e1e2 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -11,7 +11,7 @@ import Text.Parsec.String ruleParser :: Parser (String, String) ruleParser = do p <- many1 (noneOf ":") <* char ':' - v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces + v <- many1 (noneOf ":;") <* optional (char ';') <* spaces return (trim p, trim v) styleAttrParser :: Parser [(String, String)] @@ -25,14 +25,14 @@ foldOrElse v xs = foldr (orElse v) v xs eitherToMaybe :: Either a b -> Maybe b eitherToMaybe (Right x) = Just x -eitherToMaybe _ = Nothing +eitherToMaybe _ = Nothing -- | takes a list of keys/properties and a CSS string and -- returns the corresponding key-value-pairs. pickStylesToKVs :: [String] -> String -> [(String, String)] pickStylesToKVs props styleAttr = case parse styleAttrParser "" styleAttr of - Left _ -> [] + Left _ -> [] Right styles -> filter (\s -> fst s `elem` props) styles -- | takes a list of key/property synonyms and a CSS string and maybe @@ -40,4 +40,4 @@ pickStylesToKVs props styleAttr = pickStyleAttrProps :: [String] -> String -> Maybe String pickStyleAttrProps lookupProps styleAttr = do styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr - foldOrElse Nothing $ map (flip lookup styles) lookupProps + foldOrElse Nothing $ map (`lookup` styles) lookupProps diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs new file mode 100644 index 000000000..3415ae88f --- /dev/null +++ b/src/Text/Pandoc/CSV.hs @@ -0,0 +1,102 @@ +{- +Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.CSV + Copyright : Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu> + License : GNU GPL, version 2 or above + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Simple CSV parser. +-} + +module Text.Pandoc.CSV ( + CSVOptions(..), + defaultCSVOptions, + parseCSV, + ParseError +) where + +import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Parsec +import Text.Parsec.Text (Parser) + +data CSVOptions = CSVOptions{ + csvDelim :: Char + , csvQuote :: Char + , csvKeepSpace :: Bool -- treat whitespace following delim as significant + , csvEscape :: Maybe Char -- default is to double up quote +} deriving (Read, Show) + +defaultCSVOptions :: CSVOptions +defaultCSVOptions = CSVOptions{ + csvDelim = ',' + , csvQuote = '"' + , csvKeepSpace = False + , csvEscape = Nothing } + +parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]] +parseCSV opts t = parse (pCSV opts) "csv" t + +pCSV :: CSVOptions -> Parser [[Text]] +pCSV opts = + (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof) + +pCSVRow :: CSVOptions -> Parser [Text] +pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts + +blank :: Parser () +blank = try $ spaces >> (() <$ endline <|> eof) + +pCSVCell :: CSVOptions -> Parser Text +pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts + +pCSVQuotedCell :: CSVOptions -> Parser Text +pCSVQuotedCell opts = do + char (csvQuote opts) + res <- many (satisfy (\c -> c /= csvQuote opts && + Just c /= csvEscape opts) <|> escaped opts) + char (csvQuote opts) + return $ T.pack res + +escaped :: CSVOptions -> Parser Char +escaped opts = + case csvEscape opts of + Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts) + Just c -> try $ char c >> noneOf "\r\n" + +pCSVUnquotedCell :: CSVOptions -> Parser Text +pCSVUnquotedCell opts = T.pack <$> + many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n' + && c /= csvQuote opts)) + +pCSVDelim :: CSVOptions -> Parser () +pCSVDelim opts = do + char (csvDelim opts) + if csvKeepSpace opts + then return () + else skipMany (oneOf " \t") + +endline :: Parser () +endline = do + optional (void $ char '\r') + void $ char '\n' diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs new file mode 100644 index 000000000..aa0379942 --- /dev/null +++ b/src/Text/Pandoc/Class.hs @@ -0,0 +1,1082 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} +#if MIN_VERSION_base(4,8,0) +#else +{-# LANGUAGE OverlappingInstances #-} +#endif + +{- +Copyright (C) 2016-17 Jesse Rosenthal <jrosenthal@jhu.edu> +and John MacFarlane. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Class + Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +This module defines a type class, 'PandocMonad', for pandoc readers +and writers. A pure instance 'PandocPure' and an impure instance +'PandocIO' are provided. This allows users of the library to choose +whether they want conversions to perform IO operations (such as +reading include files or images). +-} + +module Text.Pandoc.Class ( PandocMonad(..) + , CommonState(..) + , PureState(..) + , getPureState + , getsPureState + , putPureState + , modifyPureState + , getPOSIXTime + , getZonedTime + , readFileFromDirs + , report + , setTrace + , setRequestHeader + , getLog + , setVerbosity + , getVerbosity + , getMediaBag + , setMediaBag + , insertMedia + , setUserDataDir + , getUserDataDir + , fetchItem + , getInputFiles + , setInputFiles + , getOutputFile + , setOutputFile + , setResourcePath + , getResourcePath + , PandocIO(..) + , PandocPure(..) + , FileTree + , FileInfo(..) + , addToFileTree + , insertInFileTree + , runIO + , runIOorExplode + , runPure + , readDefaultDataFile + , readDataFile + , fetchMediaResource + , fillMediaBag + , extractMedia + , toLang + , setTranslations + , translateTerm + , Translations + ) where + +import Prelude hiding (readFile) +import System.Random (StdGen, next, mkStdGen) +import qualified System.Random as IO (newStdGen) +import Codec.Archive.Zip +import qualified Data.CaseInsensitive as CI +import Data.Unique (hashUnique) +import Data.List (stripPrefix) +import qualified Data.Unique as IO (newUnique) +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified System.Directory as Directory +import Text.Pandoc.Compat.Time (UTCTime) +import Text.Pandoc.Logging +import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) +import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) +import Text.Pandoc.Definition +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (fromMaybe) +import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds + , posixSecondsToUTCTime + , POSIXTime ) +import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) +import Data.ByteString.Base64 (decodeLenient) +import Network.URI ( escapeURIString, nonStrictRelativeTo, + unEscapeString, parseURIReference, isAllowedInURI, + parseURI, URI(..) ) +import Network.HTTP.Client + (httpLbs, responseBody, responseHeaders, + Request(port, host, requestHeaders), parseRequest, newManager) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import System.Environment (getEnv) +import Network.HTTP.Types.Header ( hContentType ) +import Network (withSocketsDo) +import Data.ByteString.Lazy (toChunks) +import qualified Control.Exception as E +import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Walk (walkM, walk) +import qualified Text.Pandoc.MediaBag as MB +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified System.Environment as IO (lookupEnv) +import System.FilePath.Glob (match, compile) +import System.Directory (createDirectoryIfMissing, getDirectoryContents, + doesDirectoryExist) +import System.FilePath + ((</>), (<.>), takeDirectory, takeExtension, dropExtension, + isRelative, normalise, splitDirectories) +import qualified System.FilePath.Glob as IO (glob) +import qualified System.FilePath.Posix as Posix +import qualified System.Directory as IO (getModificationTime) +import Control.Monad as M (fail) +import Control.Monad.State.Strict +import Control.Monad.Except +import Data.Word (Word8) +import Data.Default +import System.IO.Error +import System.IO (stderr) +import qualified Data.Map as M +import Text.Pandoc.Error +import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, + readTranslations) +import qualified Debug.Trace +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data (dataFiles) +#else +import qualified Paths_pandoc as Paths +#endif + +-- | The PandocMonad typeclass contains all the potentially +-- IO-related functions used in pandoc's readers and writers. +-- Instances of this typeclass may implement these functions +-- in IO (as in 'PandocIO') or using an internal state that +-- represents a file system, time, and so on (as in 'PandocPure'). +class (Functor m, Applicative m, Monad m, MonadError PandocError m) + => PandocMonad m where + -- | Lookup an environment variable. + lookupEnv :: String -> m (Maybe String) + -- | Get the current (UTC) time. + getCurrentTime :: m UTCTime + -- | Get the locale's time zone. + getCurrentTimeZone :: m TimeZone + -- | Return a new generator for random numbers. + newStdGen :: m StdGen + -- | Return a new unique integer. + newUniqueHash :: m Int + -- | Retrieve contents and mime type from a URL, raising + -- an error on failure. + openURL :: String -> m (B.ByteString, Maybe MimeType) + -- | Read the lazy ByteString contents from a file path, + -- raising an error on failure. + readFileLazy :: FilePath -> m BL.ByteString + -- | Read the strict ByteString contents from a file path, + -- raising an error on failure. + readFileStrict :: FilePath -> m B.ByteString + -- | Return a list of paths that match a glob, relative to + -- the working directory. See 'System.FilePath.Glob' for + -- the glob syntax. + glob :: String -> m [FilePath] + -- | Returns True if file exists. + fileExists :: FilePath -> m Bool + -- | Returns the path of data file. + getDataFileName :: FilePath -> m FilePath + -- | Return the modification time of a file. + getModificationTime :: FilePath -> m UTCTime + -- | Get the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + getCommonState :: m CommonState + -- | Set the value of the 'CommonState' used by all instances + -- of 'PandocMonad'. + -- | Get the value of a specific field of 'CommonState'. + putCommonState :: CommonState -> m () + -- | Get the value of a specific field of 'CommonState'. + getsCommonState :: (CommonState -> a) -> m a + getsCommonState f = f <$> getCommonState + -- | Modify the 'CommonState'. + modifyCommonState :: (CommonState -> CommonState) -> m () + modifyCommonState f = getCommonState >>= putCommonState . f + -- Output a log message. + logOutput :: LogMessage -> m () + -- Output a debug message to sterr, using 'Debug.Trace.trace', + -- if tracing is enabled. Note: this writes to stderr even in + -- pure instances. + trace :: String -> m () + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) + +-- * Functions defined for all PandocMonad instances + +-- | Set the verbosity level. +setVerbosity :: PandocMonad m => Verbosity -> m () +setVerbosity verbosity = + modifyCommonState $ \st -> st{ stVerbosity = verbosity } + +-- | Get the verbosity level. +getVerbosity :: PandocMonad m => m Verbosity +getVerbosity = getsCommonState stVerbosity + +-- Get the accomulated log messages (in temporal order). +getLog :: PandocMonad m => m [LogMessage] +getLog = reverse <$> getsCommonState stLog + +-- | Log a message using 'logOutput'. Note that 'logOutput' is +-- called only if the verbosity level exceeds the level of the +-- message, but the message is added to the list of log messages +-- that will be retrieved by 'getLog' regardless of its verbosity level. +report :: PandocMonad m => LogMessage -> m () +report msg = do + verbosity <- getsCommonState stVerbosity + let level = messageVerbosity msg + when (level <= verbosity) $ logOutput msg + modifyCommonState $ \st -> st{ stLog = msg : stLog st } + +-- | Determine whether tracing is enabled. This affects +-- the behavior of 'trace'. If tracing is not enabled, +-- 'trace' does nothing. +setTrace :: PandocMonad m => Bool -> m () +setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} + +-- | Set request header to use in HTTP requests. +setRequestHeader :: PandocMonad m + => String -- ^ Header name + -> String -- ^ Value + -> m () +setRequestHeader name val = modifyCommonState $ \st -> + st{ stRequestHeaders = + (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) } + +-- | Initialize the media bag. +setMediaBag :: PandocMonad m => MediaBag -> m () +setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} + +-- Retrieve the media bag. +getMediaBag :: PandocMonad m => m MediaBag +getMediaBag = getsCommonState stMediaBag + +-- Insert an item into the media bag. +insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () +insertMedia fp mime bs = do + mb <- getMediaBag + let mb' = MB.insertMedia fp mime bs mb + setMediaBag mb' + +-- Retrieve the input filenames. +getInputFiles :: PandocMonad m => m [FilePath] +getInputFiles = getsCommonState stInputFiles + +-- Set the input filenames. +setInputFiles :: PandocMonad m => [FilePath] -> m () +setInputFiles fs = do + let sourceURL = case fs of + [] -> Nothing + (x:_) -> case parseURI x of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriQuery = "", + uriFragment = "" } + _ -> Nothing + + modifyCommonState $ \st -> st{ stInputFiles = fs + , stSourceURL = sourceURL } + +-- Retrieve the output filename. +getOutputFile :: PandocMonad m => m (Maybe FilePath) +getOutputFile = getsCommonState stOutputFile + +-- Set the output filename. +setOutputFile :: PandocMonad m => Maybe FilePath -> m () +setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf } + +-- Retrieve the resource path searched by 'fetchItem'. +getResourcePath :: PandocMonad m => m [FilePath] +getResourcePath = getsCommonState stResourcePath + +-- Set the resource path searched by 'fetchItem'. +setResourcePath :: PandocMonad m => [FilePath] -> m () +setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} + +-- Get the POSIX time. +getPOSIXTime :: PandocMonad m => m POSIXTime +getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime + +-- Get the zoned time. +getZonedTime :: PandocMonad m => m ZonedTime +getZonedTime = do + t <- getCurrentTime + tz <- getCurrentTimeZone + return $ utcToZonedTime tz t + +-- | Read file, checking in any number of directories. +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String) +readFileFromDirs [] _ = return Nothing +readFileFromDirs (d:ds) f = catchError + ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f)) + (\_ -> readFileFromDirs ds f) + +-- + +-- | 'CommonState' represents state that is used by all +-- instances of 'PandocMonad'. Normally users should not +-- need to interact with it directly; instead, auxiliary +-- functions like 'setVerbosity' and 'withMediaBag' should be used. +data CommonState = CommonState { stLog :: [LogMessage] + -- ^ A list of log messages in reverse order + , stUserDataDir :: Maybe FilePath + -- ^ Directory to search for data files + , stSourceURL :: Maybe String + -- ^ Absolute URL + dir of 1st source file + , stRequestHeaders :: [(String, String)] + -- ^ Headers to add for HTTP requests + , stMediaBag :: MediaBag + -- ^ Media parsed from binary containers + , stTranslations :: Maybe + (Lang, Maybe Translations) + -- ^ Translations for localization + , stInputFiles :: [FilePath] + -- ^ List of input files from command line + , stOutputFile :: Maybe FilePath + -- ^ Output file from command line + , stResourcePath :: [FilePath] + -- ^ Path to search for resources like + -- included images + , stVerbosity :: Verbosity + -- ^ Verbosity level + , stTrace :: Bool + -- ^ Controls whether tracing messages are + -- issued. + } + +instance Default CommonState where + def = CommonState { stLog = [] + , stUserDataDir = Nothing + , stSourceURL = Nothing + , stRequestHeaders = [] + , stMediaBag = mempty + , stTranslations = Nothing + , stInputFiles = [] + , stOutputFile = Nothing + , stResourcePath = ["."] + , stVerbosity = WARNING + , stTrace = False + } + +-- | Convert BCP47 string to a Lang, issuing warning +-- if there are problems. +toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang Nothing = return Nothing +toLang (Just s) = + case parseBCP47 s of + Left _ -> do + report $ InvalidLang s + return Nothing + Right l -> return (Just l) + +-- | Select the language to use with 'translateTerm'. +-- Note that this does not read a translation file; +-- that is only done the first time 'translateTerm' is +-- used. +setTranslations :: PandocMonad m => Lang -> m () +setTranslations lang = + modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } + +-- | Load term map. +getTranslations :: PandocMonad m => m Translations +getTranslations = do + mbtrans <- getsCommonState stTranslations + case mbtrans of + Nothing -> return mempty -- no language defined + Just (_, Just t) -> return t + Just (lang, Nothing) -> do -- read from file + let translationFile = "translations/" ++ renderLang lang ++ ".yaml" + let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml" + let getTrans fp = do + bs <- readDataFile fp + case readTranslations (UTF8.toString bs) of + Left e -> do + report $ CouldNotLoadTranslations (renderLang lang) + (fp ++ ": " ++ e) + -- make sure we don't try again... + modifyCommonState $ \st -> + st{ stTranslations = Nothing } + return mempty + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + catchError (getTrans translationFile) + (\_ -> + catchError (getTrans fallbackFile) + (\e -> do + report $ CouldNotLoadTranslations (renderLang lang) + $ case e of + PandocCouldNotFindDataFileError _ -> + "data file " ++ fallbackFile ++ " not found" + _ -> "" + -- make sure we don't try again... + modifyCommonState $ \st -> st{ stTranslations = Nothing } + return mempty)) + +-- | Get a translation from the current term map. +-- Issue a warning if the term is not defined. +translateTerm :: PandocMonad m => Term -> m String +translateTerm term = do + translations <- getTranslations + case lookupTerm term translations of + Just s -> return s + Nothing -> do + report $ NoTranslation (show term) + return "" + +-- | Evaluate a 'PandocIO' operation. +runIO :: PandocIO a -> IO (Either PandocError a) +runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma + +-- | Evaluate a 'PandocIO' operation, handling any errors +-- by exiting with an appropriate message and error status. +runIOorExplode :: PandocIO a -> IO a +runIOorExplode ma = runIO ma >>= handleError + +newtype PandocIO a = PandocIO { + unPandocIO :: ExceptT PandocError (StateT CommonState IO) a + } deriving ( MonadIO + , Functor + , Applicative + , Monad + , MonadError PandocError + ) + +-- | Utility function to lift IO errors into 'PandocError's. +liftIOError :: (String -> IO a) -> String -> PandocIO a +liftIOError f u = do + res <- liftIO $ tryIOError $ f u + case res of + Left e -> throwError $ PandocIOError u e + Right r -> return r + +instance PandocMonad PandocIO where + lookupEnv = liftIO . IO.lookupEnv + getCurrentTime = liftIO IO.getCurrentTime + getCurrentTimeZone = liftIO IO.getCurrentTimeZone + newStdGen = liftIO IO.newStdGen + newUniqueHash = hashUnique <$> liftIO IO.newUnique + + openURL u + | Just u'' <- stripPrefix "data:" u = do + let mime = takeWhile (/=',') u'' + let contents = UTF8.fromString $ + unEscapeString $ drop 1 $ dropWhile (/=',') u'' + return (decodeLenient contents, Just mime) + | otherwise = do + let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v) + customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders + report $ Fetching u + res <- liftIO $ E.try $ withSocketsDo $ do + let parseReq = parseRequest + proxy <- tryIOError (getEnv "http_proxy") + let addProxy' x = case proxy of + Left _ -> return x + Right pr -> parseReq pr >>= \r -> + return (addProxy (host r) (port r) x) + req <- parseReq u >>= addProxy' + let req' = req{requestHeaders = customHeaders ++ requestHeaders req} + resp <- newManager tlsManagerSettings >>= httpLbs req' + return (B.concat $ toChunks $ responseBody resp, + UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) + + case res of + Right r -> return r + Left e -> throwError $ PandocHttpError u e + + readFileLazy s = liftIOError BL.readFile s + readFileStrict s = liftIOError B.readFile s + + glob = liftIOError IO.glob + fileExists = liftIOError Directory.doesFileExist +#ifdef EMBED_DATA_FILES + getDataFileName = return +#else + getDataFileName = liftIOError Paths.getDataFileName +#endif + getModificationTime = liftIOError IO.getModificationTime + getCommonState = PandocIO $ lift get + putCommonState x = PandocIO $ lift $ put x + logOutput msg = liftIO $ do + UTF8.hPutStr stderr $ + "[" ++ show (messageVerbosity msg) ++ "] " + alertIndent $ lines $ showLogMessage msg + +alertIndent :: [String] -> IO () +alertIndent [] = return () +alertIndent (l:ls) = do + UTF8.hPutStrLn stderr l + mapM_ go ls + where go l' = do UTF8.hPutStr stderr " " + UTF8.hPutStrLn stderr l' + +-- | Specialized version of parseURIReference that disallows +-- single-letter schemes. Reason: these are usually windows absolute +-- paths. +parseURIReference' :: String -> Maybe URI +parseURIReference' s = + case parseURIReference s of + Just u + | length (uriScheme u) > 2 -> Just u + | null (uriScheme u) -> Just u -- protocol-relative + _ -> Nothing + +-- | Set the user data directory in common state. +setUserDataDir :: PandocMonad m + => Maybe FilePath + -> m () +setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp } + +-- | Get the user data directory from common state. +getUserDataDir :: PandocMonad m + => m (Maybe FilePath) +getUserDataDir = getsCommonState stUserDataDir + +-- | Fetch an image or other item from the local filesystem or the net. +-- Returns raw content and maybe mime type. +fetchItem :: PandocMonad m + => String + -> m (B.ByteString, Maybe MimeType) +fetchItem s = do + mediabag <- getMediaBag + case lookupMedia s mediabag of + Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Nothing -> downloadOrRead s + +downloadOrRead :: PandocMonad m + => String + -> m (B.ByteString, Maybe MimeType) +downloadOrRead s = do + sourceURL <- getsCommonState stSourceURL + case (sourceURL >>= parseURIReference' . + ensureEscaped, ensureEscaped s) of + (Just u, s') -> -- try fetching from relative path at source + case parseURIReference' s' of + Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u + Nothing -> openURL s' -- will throw error + (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI + case parseURIReference' s' of + Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon + Nothing -> openURL s' -- will throw error + (Nothing, s') -> + case parseURI s' of -- requires absolute URI + -- We don't want to treat C:/ as a scheme: + Just u' | length (uriScheme u') > 2 -> openURL (show u') + Just u' | uriScheme u' == "file:" -> + readLocalFile $ dropWhile (=='/') (uriPath u') + _ -> readLocalFile fp -- get from local file system + where readLocalFile f = do + resourcePath <- getResourcePath + cont <- if isRelative f + then withPaths resourcePath readFileStrict f + else readFileStrict f + return (cont, mime) + httpcolon = URI{ uriScheme = "http:", + uriAuthority = Nothing, + uriPath = "", + uriQuery = "", + uriFragment = "" } + dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ dropFragmentAndQuery s + mime = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" + x -> getMimeType x + ensureEscaped = escapeURIString isAllowedInURI . map convertSlash + convertSlash '\\' = '/' + convertSlash x = x + +-- Retrieve default reference.docx. +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/comments.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.docx") + if exists + then return (Just (d </> "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- Retrieve default reference.odt. +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.odt") + if exists + then return (Just (d </> "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +getDefaultReferencePptx :: PandocMonad m => m Archive +getDefaultReferencePptx = do + -- We're going to narrow this down substantially once we get it + -- working. + let paths = [ "[Content_Types].xml" + , "_rels/.rels" + , "docProps/app.xml" + , "docProps/core.xml" + , "ppt/_rels/presentation.xml.rels" + , "ppt/presProps.xml" + , "ppt/presentation.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/slideLayout10.xml" + , "ppt/slideLayouts/slideLayout11.xml" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/slideLayout5.xml" + , "ppt/slideLayouts/slideLayout6.xml" + , "ppt/slideLayouts/slideLayout7.xml" + , "ppt/slideLayouts/slideLayout8.xml" + , "ppt/slideLayouts/slideLayout9.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slides/_rels/slide1.xml.rels" + , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" + , "ppt/tableStyles.xml" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + -- These relate to notes slides. + , "ppt/notesMasters/notesMaster1.xml" + , "ppt/notesMasters/_rels/notesMaster1.xml.rels" + , "ppt/notesSlides/notesSlide1.xml" + , "ppt/notesSlides/_rels/notesSlide1.xml.rels" + , "ppt/notesSlides/notesSlide2.xml" + , "ppt/notesSlides/_rels/notesSlide2.xml.rels" + , "ppt/theme/theme2.xml" + ] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime + contents <- toLazy <$> readDataFile ("pptx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.pptx") + if exists + then return (Just (d </> "reference.pptx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + + +-- | Read file from user data directory or, +-- if not found there, from Cabal data directory. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- getUserDataDir + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir </> fname) + if exists + then readFileStrict (userDir </> fname) + else readDefaultDataFile fname + +-- | Read file from from Cabal data directory. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx +readDefaultDataFile "reference.pptx" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx +readDefaultDataFile "reference.odt" = + (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError fname + Just contents -> return contents +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname + +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError fn +#endif + +makeCanonical :: FilePath -> FilePath +makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + where transformPathParts = reverse . foldl go [] + go as "." = as + go (_:as) ".." = as + go as x = x : as + +withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a +withPaths [] _ fp = throwError $ PandocResourceNotFound fp +withPaths (p:ps) action fp = + catchError (action (p </> fp)) + (\_ -> withPaths ps action fp) + +-- | Fetch local or remote resource (like an image) and provide data suitable +-- for adding it to the MediaBag. +fetchMediaResource :: PandocMonad m + => String -> m (FilePath, Maybe MimeType, BL.ByteString) +fetchMediaResource src = do + (bs, mt) <- downloadOrRead src + let ext = fromMaybe (takeExtension src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> ext + return (fname, mt, bs') + +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. +fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc +fillMediaBag d = walkM handleImage d + where handleImage :: PandocMonad m => Inline -> m Inline + handleImage (Image attr lab (src, tit)) = catchError + (do mediabag <- getMediaBag + case lookupMedia src mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (fname, mt, bs) <- fetchMediaResource src + insertMedia fname mt bs + return $ Image attr lab (fname, tit)) + (\e -> + case e of + PandocResourceNotFound _ -> do + report $ CouldNotFetchResource src + "replacing image with description" + -- emit alt text + return $ Span ("",["image"],[]) lab + PandocHttpError u er -> do + report $ CouldNotFetchResource u + (show er ++ "\rReplacing image with description.") + -- emit alt text + return $ Span ("",["image"],[]) lab + _ -> throwError e) + handleImage x = return x + +-- | Extract media from the mediabag into a directory. +extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia dir d = do + media <- getMediaBag + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + mapM_ (writeMedia dir media) fps + return $ walk (adjustImagePath dir fps) d + +-- Write the contents of a media bag to a path. +writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () +writeMedia dir mediabag subpath = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir </> normalise subpath + let mbcontents = lookupMedia subpath mediabag + case mbcontents of + Nothing -> throwError $ PandocResourceNotFound subpath + Just (_, bs) -> do + report $ Extracting fullpath + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + liftIOError (\p -> BL.writeFile p bs) fullpath + +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) +adjustImagePath _ _ x = x + +-- | The 'PureState' contains ersatz representations +-- of things that would normally be obtained through IO. +data PureState = PureState { stStdGen :: StdGen + , stWord8Store :: [Word8] -- should be + -- inifinite, + -- i.e. [1..] + , stUniqStore :: [Int] -- should be + -- inifinite and + -- contain every + -- element at most + -- once, e.g. [1..] + , stEnv :: [(String, String)] + , stTime :: UTCTime + , stTimeZone :: TimeZone + , stReferenceDocx :: Archive + , stReferencePptx :: Archive + , stReferenceODT :: Archive + , stFiles :: FileTree + , stUserDataFiles :: FileTree + , stCabalDataFiles :: FileTree + } + +instance Default PureState where + def = PureState { stStdGen = mkStdGen 1848 + , stWord8Store = [1..] + , stUniqStore = [1..] + , stEnv = [("USER", "pandoc-user")] + , stTime = posixSecondsToUTCTime 0 + , stTimeZone = utc + , stReferenceDocx = emptyArchive + , stReferencePptx = emptyArchive + , stReferenceODT = emptyArchive + , stFiles = mempty + , stUserDataFiles = mempty + , stCabalDataFiles = mempty + } + + +getPureState :: PandocPure PureState +getPureState = PandocPure $ lift $ lift get + +getsPureState :: (PureState -> a) -> PandocPure a +getsPureState f = f <$> getPureState + +putPureState :: PureState -> PandocPure () +putPureState ps= PandocPure $ lift $ lift $ put ps + +modifyPureState :: (PureState -> PureState) -> PandocPure () +modifyPureState f = PandocPure $ lift $ lift $ modify f + + +data FileInfo = FileInfo { infoFileMTime :: UTCTime + , infoFileContents :: B.ByteString + } + +newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} + deriving (Monoid) + +getFileInfo :: FilePath -> FileTree -> Maybe FileInfo +getFileInfo fp tree = + M.lookup (makeCanonical fp) (unFileTree tree) + +-- | Add the specified file to the FileTree. If file +-- is a directory, add its contents recursively. +addToFileTree :: FileTree -> FilePath -> IO FileTree +addToFileTree tree fp = do + isdir <- doesDirectoryExist fp + if isdir + then do -- recursively add contents of directories + let isSpecial ".." = True + isSpecial "." = True + isSpecial _ = False + fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp + foldM addToFileTree tree fs + else do + contents <- B.readFile fp + mtime <- IO.getModificationTime fp + return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime + , infoFileContents = contents } tree + +-- | Insert an ersatz file into the 'FileTree'. +insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree +insertInFileTree fp info (FileTree treemap) = + FileTree $ M.insert (makeCanonical fp) info treemap + +newtype PandocPure a = PandocPure { + unPandocPure :: ExceptT PandocError + (StateT CommonState (State PureState)) a + } deriving ( Functor + , Applicative + , Monad + , MonadError PandocError + ) + +-- Run a 'PandocPure' operation. +runPure :: PandocPure a -> Either PandocError a +runPure x = flip evalState def $ + flip evalStateT def $ + runExceptT $ + unPandocPure x + +instance PandocMonad PandocPure where + lookupEnv s = do + env <- getsPureState stEnv + return (lookup s env) + + getCurrentTime = getsPureState stTime + + getCurrentTimeZone = getsPureState stTimeZone + + newStdGen = do + g <- getsPureState stStdGen + let (_, nxtGen) = next g + modifyPureState $ \st -> st { stStdGen = nxtGen } + return g + + newUniqueHash = do + uniqs <- getsPureState stUniqStore + case uniqs of + u : us -> do + modifyPureState $ \st -> st { stUniqStore = us } + return u + _ -> M.fail "uniq store ran out of elements" + openURL u = throwError $ PandocResourceNotFound u + readFileLazy fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return (BL.fromStrict bs) + Nothing -> throwError $ PandocResourceNotFound fp + readFileStrict fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return bs + Nothing -> throwError $ PandocResourceNotFound fp + + glob s = do + FileTree ftmap <- getsPureState stFiles + return $ filter (match (compile s)) $ M.keys ftmap + + fileExists fp = do + fps <- getsPureState stFiles + case getFileInfo fp fps of + Nothing -> return False + Just _ -> return True + + getDataFileName fp = return $ "data/" ++ fp + + getModificationTime fp = do + fps <- getsPureState stFiles + case infoFileMTime <$> getFileInfo fp fps of + Just tm -> return tm + Nothing -> throwError $ PandocIOError fp + (userError "Can't get modification time") + + getCommonState = PandocPure $ lift get + putCommonState x = PandocPure $ lift $ put x + + logOutput _msg = return () + +-- This requires UndecidableInstances. We could avoid that +-- by repeating the definitions below for every monad transformer +-- we use: ReaderT, WriterT, StateT, RWST. But this seems to +-- be harmless. +instance (MonadTrans t, PandocMonad m, Functor (t m), + MonadError PandocError (t m), Monad (t m), + Applicative (t m)) => PandocMonad (t m) where + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + logOutput = lift . logOutput + +#if MIN_VERSION_base(4,8,0) +instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where +#else +instance PandocMonad m => PandocMonad (ParsecT s st m) where +#endif + lookupEnv = lift . lookupEnv + getCurrentTime = lift getCurrentTime + getCurrentTimeZone = lift getCurrentTimeZone + newStdGen = lift newStdGen + newUniqueHash = lift newUniqueHash + openURL = lift . openURL + readFileLazy = lift . readFileLazy + readFileStrict = lift . readFileStrict + glob = lift . glob + fileExists = lift . fileExists + getDataFileName = lift . getDataFileName + getModificationTime = lift . getModificationTime + getCommonState = lift getCommonState + putCommonState = lift . putCommonState + trace msg = do + tracing <- getsCommonState stTrace + when tracing $ do + pos <- getPosition + Debug.Trace.trace + ("[trace] Parsed " ++ msg ++ " at line " ++ + show (sourceLine pos) ++ + if sourceName pos == "chunk" + then " of chunk" + else "") + (return ()) + logOutput = lift . logOutput diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hs index 8786647c5..af0e4504f 100644 --- a/src/Text/Pandoc/Data.hsb +++ b/src/Text/Pandoc/Data.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE OverloadedStrings #-} --- to be processed using hsb2hs +{-# LANGUAGE TemplateHaskell #-} + module Text.Pandoc.Data (dataFiles) where + import qualified Data.ByteString as B +import Data.FileEmbed import System.FilePath (splitDirectories) import qualified System.FilePath.Posix as Posix @@ -12,4 +14,9 @@ dataFiles = map (\(fp, contents) -> (Posix.joinPath (splitDirectories fp), contents)) dataFiles' dataFiles' :: [(FilePath, B.ByteString)] -dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data" +dataFiles' = ("MANUAL.txt", $(embedFile "MANUAL.txt")) : + -- handle the hidden file separately, since embedDir doesn't + -- include it: + ("docx/_rels/.rels", $(embedFile "data/docx/_rels/.rels")) : + ("pptx/_rels/.rels", $(embedFile "data/pptx/_rels/.rels")) : + $(embedDir "data") diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs index c9f368abc..3766960ea 100644 --- a/src/Text/Pandoc/Emoji.hs +++ b/src/Text/Pandoc/Emoji.hs @@ -903,4 +903,3 @@ emojis = M.fromList ,("zero","0\65039\8419") ,("zzz","\128164") ] - diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 5e26771fe..f78a31481 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Error - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,40 +30,94 @@ This module provides a standard way to deal with possible errors encounted during parsing. -} -module Text.Pandoc.Error (PandocError(..), handleError) where +module Text.Pandoc.Error ( + PandocError(..), + handleError) where +import Control.Exception (Exception) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Network.HTTP.Client (HttpException) +import System.Exit (ExitCode (..), exitWith) +import System.IO (stderr) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) -import GHC.Generics (Generic) -import Data.Generics (Typeable) -import Control.Exception (Exception) type Input = String -data PandocError = -- | Generic parse failure - ParseFailure String - -- | Error thrown by a Parsec parser - | ParsecError Input ParseError +data PandocError = PandocIOError String IOError + | PandocHttpError String HttpException + | PandocShouldNeverHappenError String + | PandocSomeError String + | PandocParseError String + | PandocParsecError Input ParseError + | PandocMakePDFError String + | PandocOptionError String + | PandocSyntaxMapError String + | PandocFailOnWarningError + | PandocPDFProgramNotFoundError String + | PandocPDFError String + | PandocFilterError String String + | PandocCouldNotFindDataFileError String + | PandocResourceNotFound String + | PandocTemplateError String + | PandocAppError String + | PandocEpubSubdirectoryError String + | PandocMacroLoop String deriving (Show, Typeable, Generic) instance Exception PandocError --- | An unsafe method to handle `PandocError`s. -handleError :: Either PandocError a -> a -handleError (Right r) = r -handleError (Left err) = - case err of - ParseFailure string -> error string - ParsecError input err' -> +-- | Handle PandocError by exiting with an error message. +handleError :: Either PandocError a -> IO a +handleError (Right r) = return r +handleError (Left e) = + case e of + PandocIOError _ err' -> ioError err' + PandocHttpError u err' -> err 61 $ + "Could not fetch " ++ u ++ "\n" ++ show err' + PandocShouldNeverHappenError s -> err 62 s + PandocSomeError s -> err 63 s + PandocParseError s -> err 64 s + PandocParsecError input err' -> let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos ls = lines input ++ [""] errorInFile = if length ls > errLine - 1 - then concat ["\n", (ls !! (errLine - 1)) + then concat ["\n", ls !! (errLine - 1) ,"\n", replicate (errColumn - 1) ' ' ,"^"] else "" - in error $ "\nError at " ++ show err' - ++ errorInFile + in err 65 $ "\nError at " ++ show err' ++ + -- if error comes from a chunk or included file, + -- then we won't get the right text this way: + if sourceName errPos == "source" + then errorInFile + else "" + PandocMakePDFError s -> err 65 s + PandocOptionError s -> err 2 s + PandocSyntaxMapError s -> err 67 s + PandocFailOnWarningError -> err 3 "Failing because there were warnings." + PandocPDFProgramNotFoundError pdfprog -> err 47 $ + pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog + PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg + PandocFilterError filtername msg -> err 83 $ "Error running filter " ++ + filtername ++ ":\n" ++ msg + PandocCouldNotFindDataFileError fn -> err 97 $ + "Could not find data file " ++ fn + PandocResourceNotFound fn -> err 99 $ + "File " ++ fn ++ " not found in resource path" + PandocTemplateError s -> err 5 s + PandocAppError s -> err 1 s + PandocEpubSubdirectoryError s -> err 31 $ + "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + PandocMacroLoop s -> err 91 $ + "Loop encountered in expanding macro " ++ s +err :: Int -> String -> IO a +err exitCode msg = do + UTF8.hPutStrLn stderr msg + exitWith $ ExitFailure exitCode + return undefined diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs new file mode 100644 index 000000000..968476930 --- /dev/null +++ b/src/Text/Pandoc/Extensions.hs @@ -0,0 +1,378 @@ +{- +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE TemplateHaskell #-} + +{- | + Module : Text.Pandoc.Extensions + Copyright : Copyright (C) 2012-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data structures and functions for representing markup extensions. +-} +module Text.Pandoc.Extensions ( Extension(..) + , Extensions + , emptyExtensions + , extensionsFromList + , parseFormatSpec + , extensionEnabled + , enableExtension + , disableExtension + , getDefaultExtensions + , pandocExtensions + , plainExtensions + , strictExtensions + , phpMarkdownExtraExtensions + , githubMarkdownExtensions + , multimarkdownExtensions ) +where +import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions) +import Data.Aeson.TH (deriveJSON) +import Data.Bits (clearBit, setBit, testBit, (.|.)) +import Data.Data (Data) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Text.Pandoc.Shared (safeRead) +import Text.Parsec + +newtype Extensions = Extensions Integer + deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, ToJSON, FromJSON) + +instance Monoid Extensions where + mempty = Extensions 0 + mappend (Extensions a) (Extensions b) = Extensions (a .|. b) + +extensionsFromList :: [Extension] -> Extensions +extensionsFromList = foldr enableExtension emptyExtensions + +emptyExtensions :: Extensions +emptyExtensions = Extensions 0 + +extensionEnabled :: Extension -> Extensions -> Bool +extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) + +enableExtension :: Extension -> Extensions -> Extensions +enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) + +disableExtension :: Extension -> Extensions -> Extensions +disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) + +-- | Individually selectable syntax extensions. +data Extension = + Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup + | Ext_angle_brackets_escapable -- ^ Make < and > escapable + | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks + | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote + | Ext_blank_before_header -- ^ Require blank line before a header + | Ext_bracketed_spans -- ^ Bracketed spans with attributes + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_compact_definition_lists -- ^ Definition lists without space between items, + -- and disallow laziness + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters + | Ext_emoji -- ^ Support emoji like :smile: + | Ext_empty_paragraphs -- ^ Allow empty paragraphs + | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_divs -- ^ Allow fenced div syntax ::: + | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_four_space_rule -- ^ Require 4-space indent for list contents + | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using + -- GitHub's method for generating identifiers + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_implicit_figures -- ^ A paragraph with just an image is a figure + | Ext_implicit_header_references -- ^ Implicit reference links for headers + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) + | Ext_line_blocks -- ^ RST style line blocks + | Ext_link_attributes -- ^ link and image attributes + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff + -- container has attribute 'markdown' + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] + | Ext_mmd_link_attributes -- ^ MMD style reference link attributes + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> + | Ext_ntb -- ^ ConTeXt Natural Tables + | Ext_old_dashes -- ^ -- = em, - before number = en + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines + | Ext_raw_html -- ^ Allow raw HTML + | Ext_raw_tex -- ^ Allow raw TeX (other than math) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + | Ext_simple_tables -- ^ Pandoc-style simple tables + | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes + | Ext_space_in_atx_header -- ^ Require space between # and header text + | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_styles -- ^ Read styles that pandoc doesn't know + | Ext_table_captions -- ^ Pandoc-style table captions + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_yaml_metadata_block -- ^ YAML metadata block + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) + +-- | Extensions to be used with pandoc-flavored markdown. +pandocExtensions :: Extensions +pandocExtensions = extensionsFromList + [ Ext_footnotes + , Ext_inline_notes + , Ext_pandoc_title_block + , Ext_yaml_metadata_block + , Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_pipe_tables + , Ext_citations + , Ext_raw_tex + , Ext_raw_html + , Ext_tex_math_dollars + , Ext_latex_macros + , Ext_fenced_code_blocks + , Ext_fenced_code_attributes + , Ext_backtick_code_blocks + , Ext_inline_code_attributes + , Ext_raw_attribute + , Ext_markdown_in_html_blocks + , Ext_native_divs + , Ext_fenced_divs + , Ext_native_spans + , Ext_bracketed_spans + , Ext_escaped_line_breaks + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_all_symbols_escapable + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_space_in_atx_header + , Ext_strikeout + , Ext_superscript + , Ext_subscript + , Ext_auto_identifiers + , Ext_header_attributes + , Ext_link_attributes + , Ext_implicit_header_references + , Ext_line_blocks + , Ext_shortcut_reference_links + , Ext_smart + ] + +-- | Extensions to be used with plain text output. +plainExtensions :: Extensions +plainExtensions = extensionsFromList + [ Ext_table_captions + , Ext_implicit_figures + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_latex_macros + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + , Ext_strikeout + ] + +-- | Extensions to be used with github-flavored markdown. +phpMarkdownExtraExtensions :: Extensions +phpMarkdownExtraExtensions = extensionsFromList + [ Ext_footnotes + , Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_fenced_code_blocks + , Ext_definition_lists + , Ext_intraword_underscores + , Ext_header_attributes + , Ext_link_attributes + , Ext_abbreviations + , Ext_shortcut_reference_links + , Ext_spaced_reference_links + ] + +-- | Extensions to be used with github-flavored markdown. +githubMarkdownExtensions :: Extensions +githubMarkdownExtensions = extensionsFromList + [ Ext_angle_brackets_escapable + , Ext_pipe_tables + , Ext_raw_html + , Ext_fenced_code_blocks + , Ext_gfm_auto_identifiers + , Ext_ascii_identifiers + , Ext_backtick_code_blocks + , Ext_autolink_bare_uris + , Ext_space_in_atx_header + , Ext_intraword_underscores + , Ext_strikeout + , Ext_emoji + , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links + ] + +-- | Extensions to be used with multimarkdown. +multimarkdownExtensions :: Extensions +multimarkdownExtensions = extensionsFromList + [ Ext_pipe_tables + , Ext_raw_html + , Ext_markdown_attribute + , Ext_mmd_link_attributes + -- , Ext_raw_tex + -- Note: MMD's raw TeX syntax requires raw TeX to be + -- enclosed in HTML comment + , Ext_tex_math_double_backslash + , Ext_intraword_underscores + , Ext_mmd_title_block + , Ext_footnotes + , Ext_definition_lists + , Ext_all_symbols_escapable + , Ext_implicit_header_references + , Ext_shortcut_reference_links + , Ext_auto_identifiers + , Ext_mmd_header_identifiers + , Ext_implicit_figures + -- Note: MMD's syntax for superscripts and subscripts + -- is a bit more permissive than pandoc's, allowing + -- e^2 and a~1 instead of e^2^ and a~1~, so even with + -- these options we don't have full support for MMD + -- superscripts and subscripts, but there's no reason + -- not to include these: + , Ext_superscript + , Ext_subscript + , Ext_backtick_code_blocks + , Ext_spaced_reference_links + -- So far only in dev version of mmd: + , Ext_raw_attribute + ] + +-- | Language extensions to be used with strict markdown. +strictExtensions :: Extensions +strictExtensions = extensionsFromList + [ Ext_raw_html + , Ext_shortcut_reference_links + , Ext_spaced_reference_links + ] + +-- | Default extensions from format-describing string. +getDefaultExtensions :: String -> Extensions +getDefaultExtensions "markdown_strict" = strictExtensions +getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions +getDefaultExtensions "markdown_mmd" = multimarkdownExtensions +getDefaultExtensions "markdown_github" = githubMarkdownExtensions +getDefaultExtensions "markdown" = pandocExtensions +getDefaultExtensions "muse" = extensionsFromList + [Ext_amuse, + Ext_auto_identifiers] +getDefaultExtensions "plain" = plainExtensions +getDefaultExtensions "gfm" = githubMarkdownExtensions +getDefaultExtensions "commonmark" = extensionsFromList + [Ext_raw_html] +getDefaultExtensions "org" = extensionsFromList + [Ext_citations, + Ext_auto_identifiers] +getDefaultExtensions "html" = extensionsFromList + [Ext_auto_identifiers, + Ext_native_divs, + Ext_line_blocks, + Ext_native_spans] +getDefaultExtensions "html4" = getDefaultExtensions "html" +getDefaultExtensions "html5" = getDefaultExtensions "html" +getDefaultExtensions "epub" = extensionsFromList + [Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] +getDefaultExtensions "epub2" = getDefaultExtensions "epub" +getDefaultExtensions "epub3" = getDefaultExtensions "epub" +getDefaultExtensions "latex" = extensionsFromList + [Ext_smart, + Ext_latex_macros, + Ext_auto_identifiers] +getDefaultExtensions "context" = extensionsFromList + [Ext_smart, + Ext_auto_identifiers] +getDefaultExtensions "textile" = extensionsFromList + [Ext_old_dashes, + Ext_smart, + Ext_raw_html, + Ext_auto_identifiers] +getDefaultExtensions "opml" = pandocExtensions -- affects notes +getDefaultExtensions _ = extensionsFromList + [Ext_auto_identifiers] + +-- | Parse a format-specifying string into a markup format and a function that +-- takes Extensions and enables and disables extensions as defined in the format +-- spec. +parseFormatSpec :: String + -> Either ParseError (String, Extensions -> Extensions) +parseFormatSpec = parse formatSpec "" + where formatSpec = do + name <- formatName + extMods <- many extMod + return (name, \x -> foldl (flip ($)) x extMods) + formatName = many1 $ noneOf "-+" + extMod = do + polarity <- oneOf "-+" + name <- many $ noneOf "-+" + ext <- case safeRead ("Ext_" ++ name) of + Just n -> return n + Nothing + | name == "lhs" -> return Ext_literate_haskell + | otherwise -> fail $ "Unknown extension: " ++ name + return $ case polarity of + '-' -> disableExtension ext + _ -> enableExtension ext + +$(deriveJSON defaultOptions ''Extension) diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs new file mode 100644 index 000000000..e2a3c3e16 --- /dev/null +++ b/src/Text/Pandoc/Filter.hs @@ -0,0 +1,60 @@ +{- +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE TemplateHaskell #-} + +{- | + Module : Text.Pandoc.Filter + Copyright : Copyright (C) 2006-2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Programmatically modifications of pandoc documents. +-} +module Text.Pandoc.Filter + ( Filter (..) + , applyFilters + ) where + +import Data.Aeson (defaultOptions) +import Data.Aeson.TH (deriveJSON) +import Data.Foldable (foldrM) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Options (ReaderOptions) +import qualified Text.Pandoc.Filter.JSON as JSONFilter +import qualified Text.Pandoc.Filter.Lua as LuaFilter + +data Filter = LuaFilter FilePath + | JSONFilter FilePath + deriving (Show) + +applyFilters :: ReaderOptions + -> [Filter] + -> [String] + -> Pandoc + -> PandocIO Pandoc +applyFilters ropts filters args d = + foldrM ($) d $ map applyFilter filters + where + applyFilter (JSONFilter f) = JSONFilter.apply ropts args f + applyFilter (LuaFilter f) = LuaFilter.apply ropts args f + +$(deriveJSON defaultOptions ''Filter) diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs new file mode 100644 index 000000000..5772c2c41 --- /dev/null +++ b/src/Text/Pandoc/Filter/JSON.hs @@ -0,0 +1,97 @@ +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Programmatically modifications of pandoc documents via JSON filters. +-} +module Text.Pandoc.Filter.JSON (apply) where + +import Control.Monad (unless, when) +import Control.Monad.Trans (MonadIO (liftIO)) +import Data.Aeson (eitherDecode', encode) +import Data.Char (toLower) +import Data.Maybe (isNothing) +import System.Directory (executable, doesFileExist, findExecutable, + getPermissions) +import System.Environment (getEnvironment) +import System.Exit (ExitCode (..)) +import System.FilePath ((</>), takeExtension) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Error (PandocError (PandocFilterError)) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Filter.Path (expandFilterPath) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Process (pipeProcess) +import Text.Pandoc.Shared (pandocVersion) +import qualified Control.Exception as E +import qualified Text.Pandoc.UTF8 as UTF8 + +apply :: ReaderOptions + -> [String] + -> FilePath + -> Pandoc + -> PandocIO Pandoc +apply ropts args f d = do + f' <- expandFilterPath f + liftIO $ externalFilter ropts f' args d + +externalFilter :: MonadIO m + => ReaderOptions -> FilePath -> [String] -> Pandoc -> m Pandoc +externalFilter ropts f args' d = liftIO $ do + exists <- doesFileExist f + isExecutable <- if exists + then executable <$> getPermissions f + else return True + let (f', args'') = if exists + then case map toLower (takeExtension f) of + _ | isExecutable -> ("." </> f, args') + ".py" -> ("python", f:args') + ".hs" -> ("runhaskell", f:args') + ".pl" -> ("perl", f:args') + ".rb" -> ("ruby", f:args') + ".php" -> ("php", f:args') + ".js" -> ("node", f:args') + ".r" -> ("Rscript", f:args') + _ -> (f, args') + else (f, args') + unless (exists && isExecutable) $ do + mbExe <- findExecutable f' + when (isNothing mbExe) $ + E.throwIO $ PandocFilterError f ("Could not find executable " ++ f') + env <- getEnvironment + let env' = Just + ( ("PANDOC_VERSION", pandocVersion) + : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) + : env ) + (exitcode, outbs) <- E.handle filterException $ + pipeProcess env' f' args'' $ encode d + case exitcode of + ExitSuccess -> either (E.throwIO . PandocFilterError f) + return $ eitherDecode' outbs + ExitFailure ec -> E.throwIO $ PandocFilterError f + ("Filter returned error status " ++ show ec) + where filterException :: E.SomeException -> IO a + filterException e = E.throwIO $ PandocFilterError f (show e) diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs new file mode 100644 index 000000000..597a31cbc --- /dev/null +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -0,0 +1,53 @@ +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter.Lua + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Apply Lua filters to modify a pandoc documents programmatically. +-} +module Text.Pandoc.Filter.Lua (apply) where + +import Control.Exception (throw) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Error (PandocError (PandocFilterError)) +import Text.Pandoc.Filter.Path (expandFilterPath) +import Text.Pandoc.Lua (LuaException (..), runLuaFilter) +import Text.Pandoc.Options (ReaderOptions) + +apply :: ReaderOptions + -> [String] + -> FilePath + -> Pandoc + -> PandocIO Pandoc +apply ropts args f d = do + f' <- expandFilterPath f + let format = case args of + (x:_) -> x + _ -> error "Format not supplied for lua filter" + res <- runLuaFilter ropts f' format d + case res of + Right x -> return x + Left (LuaException s) -> throw (PandocFilterError f s) diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs new file mode 100644 index 000000000..8074bcbb7 --- /dev/null +++ b/src/Text/Pandoc/Filter/Path.hs @@ -0,0 +1,53 @@ +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Filter.Path + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Expand paths of filters, searching the data directory. +-} +module Text.Pandoc.Filter.Path + ( expandFilterPath + ) where + +import Text.Pandoc.Class (PandocMonad, fileExists, getUserDataDir) +import System.FilePath ((</>), isRelative) + + -- First we check to see if a filter is found. If not, and if it's + -- not an absolute path, we check to see whether it's in `userdir/filters`. + -- If not, we leave it unchanged. +expandFilterPath :: PandocMonad m => FilePath -> m FilePath +expandFilterPath fp = do + mbDatadir <- getUserDataDir + fpExists <- fileExists fp + if fpExists + then return fp + else case mbDatadir of + Just datadir | isRelative fp -> do + let filterPath = datadir </> "filters" </> fp + filterPathExists <- fileExists filterPath + if filterPathExists + then return filterPath + else return fp + _ -> return fp diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 896682389..113727750 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2016 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Exports functions for syntax highlighting. -} -module Text.Pandoc.Highlighting ( languages +module Text.Pandoc.Highlighting ( highlightingStyles + , languages , languagesByExtension , highlight , formatLaTeXInline @@ -48,14 +49,25 @@ module Text.Pandoc.Highlighting ( languages , fromListingsLanguage , toListingsLanguage ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared (safeRead) -import Skylighting -import Data.Maybe (fromMaybe) +import Control.Monad import Data.Char (toLower) import qualified Data.Map as M -import Control.Monad +import Data.Maybe (fromMaybe) import qualified Data.Text as T +import Skylighting +import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) + +highlightingStyles :: [(String, Style)] +highlightingStyles = + [("pygments", pygments), + ("tango", tango), + ("espresso", espresso), + ("zenburn", zenburn), + ("kate", kate), + ("monochrome", monochrome), + ("breezedark", breezeDark), + ("haddock", haddock)] languages :: [String] languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] @@ -64,34 +76,39 @@ languagesByExtension :: String -> [String] languagesByExtension ext = [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext] -highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter +highlight :: SyntaxMap + -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter -> Attr -- ^ Attributes of the CodeBlock -> String -- ^ Raw contents of the CodeBlock - -> Maybe a -- ^ Maybe the formatted result -highlight formatter (_, classes, keyvals) rawCode = + -> Either String a +highlight syntaxmap formatter (ident, classes, keyvals) rawCode = let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals)) fmtOpts = defaultFormatOpts{ startNumber = firstNum, + lineAnchors = any (`elem` + ["line-anchors", "lineAnchors"]) classes, numberLines = any (`elem` - ["number","numberLines", "number-lines"]) classes } - tokenizeOpts = TokenizerConfig{ syntaxMap = defaultSyntaxMap + ["number","numberLines", "number-lines"]) classes, + lineIdPrefix = if null ident + then mempty + else T.pack (ident ++ "-") } + tokenizeOpts = TokenizerConfig{ syntaxMap = syntaxmap , traceOutput = False } classes' = map T.pack classes rawCode' = T.pack rawCode - in case msum (map (\l -> lookupSyntax l defaultSyntaxMap) classes') of + in case msum (map (`lookupSyntax` syntaxmap) classes') of Nothing - | numberLines fmtOpts -> Just + | numberLines fmtOpts -> Right $ formatter fmtOpts{ codeClasses = [], containerClasses = classes' } - $ map (\ln -> [(NormalTok, ln)]) $ T.lines rawCode' - | otherwise -> Nothing + $ map (\ln -> [(NormalTok, ln)]) + $ T.lines rawCode' + | otherwise -> Left "" Just syntax -> - case tokenize tokenizeOpts syntax rawCode' of - Right slines -> Just $ - formatter fmtOpts{ codeClasses = - [T.toLower (sShortname syntax)], - containerClasses = classes' } slines - Left _ -> Nothing + formatter fmtOpts{ codeClasses = + [T.toLower (sShortname syntax)], + containerClasses = classes' } <$> + tokenize tokenizeOpts syntax rawCode' -- Functions for correlating latex listings package's language names -- with skylighting language names: diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e46c91eda..4c76aac13 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- - Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2016 John MacFarlane +Copyright : Copyright (C) 2011-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -38,8 +38,12 @@ module Text.Pandoc.ImageSize ( ImageType(..) , Dimension(..) , Direction(..) , dimension + , lengthToDim + , scaleDimension , inInch + , inPixel , inPoints + , inEm , numUnit , showInInch , showInPixel @@ -53,11 +57,13 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead, hush) +import Text.Pandoc.Shared (safeRead) import Data.Default (Default) import Numeric (showFFloat) import Text.Pandoc.Definition import Text.Pandoc.Options +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Text.XML.Light as Xml import qualified Data.Map as M import Control.Monad.Except import Data.Maybe (fromMaybe) @@ -65,7 +71,7 @@ import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl -data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show +data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show data Direction = Width | Height instance Show Direction where show Width = "width" @@ -73,13 +79,19 @@ instance Show Direction where data Dimension = Pixel Integer | Centimeter Double + | Millimeter Double | Inch Double | Percent Double + | Em Double + deriving Eq + instance Show Dimension where show (Pixel a) = show a ++ "px" show (Centimeter a) = showFl a ++ "cm" + show (Millimeter a) = showFl a ++ "mm" show (Inch a) = showFl a ++ "in" show (Percent a) = show a ++ "%" + show (Em a) = showFl a ++ "em" data ImageSize = ImageSize{ pxX :: Integer @@ -91,7 +103,13 @@ instance Default ImageSize where def = ImageSize 300 200 72 72 showFl :: (RealFloat a) => a -> String -showFl a = showFFloat (Just 5) a "" +showFl a = removeExtra0s $ showFFloat (Just 5) a "" + +removeExtra0s :: String -> String +removeExtra0s s = + case dropWhile (=='0') $ reverse s of + '.':xs -> reverse xs + xs -> reverse xs imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of @@ -100,19 +118,31 @@ imageType img = case B.take 4 img of "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF "\xff\xd8\xff\xe1" -> return Jpeg -- Exif "%PDF" -> return Pdf + "<svg" -> return Svg + "<?xm" + | findSvgTag img + -> return Svg "%!PS" - | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" + | B.take 4 (B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps + "\x01\x00\x00\x00" + | B.take 4 (B.drop 40 img) == " EMF" + -> return Emf _ -> mzero -imageSize :: ByteString -> Either String ImageSize -imageSize img = +findSvgTag :: ByteString -> Bool +findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img + +imageSize :: WriterOptions -> ByteString -> Either String ImageSize +imageSize opts img = case imageType img of Just Png -> mbToEither "could not determine PNG size" $ pngSize img Just Gif -> mbToEither "could not determine GIF size" $ gifSize img Just Jpeg -> jpegSize img + Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img Just Eps -> mbToEither "could not determine EPS size" $ epsSize img - Just Pdf -> Left "could not determine PDF size" -- TODO + Just Pdf -> mbToEither "could not determine PDF size" $ pdfSize img + Just Emf -> mbToEither "could not determine EMF size" $ emfSize img Nothing -> Left "could not determine image type" where mbToEither msg Nothing = Left msg mbToEither _ (Just x) = Right x @@ -145,7 +175,7 @@ desiredSizeInPoints opts attr s = (Nothing, Nothing) -> sizeInPoints s where ratio = fromIntegral (pxX s) / fromIntegral (pxY s) - getDim dir = case (dimension dir attr) of + getDim dir = case dimension dir attr of Just (Percent _) -> Nothing Just dim -> Just $ inPoints opts dim Nothing -> Nothing @@ -153,13 +183,30 @@ desiredSizeInPoints opts attr s = inPoints :: WriterOptions -> Dimension -> Double inPoints opts dim = 72 * inInch opts dim +inEm :: WriterOptions -> Dimension -> Double +inEm opts dim = (64/11) * inInch opts dim + inInch :: WriterOptions -> Dimension -> Double inInch opts dim = case dim of - (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts) + (Pixel a) -> fromIntegral a / fromIntegral (writerDpi opts) (Centimeter a) -> a * 0.3937007874 + (Millimeter a) -> a * 0.03937007874 (Inch a) -> a (Percent _) -> 0 + (Em a) -> a * (11/64) + +inPixel :: WriterOptions -> Dimension -> Integer +inPixel opts dim = + case dim of + (Pixel a) -> a + (Centimeter a) -> floor $ dpi * a * 0.3937007874 :: Integer + (Millimeter a) -> floor $ dpi * a * 0.03937007874 :: Integer + (Inch a) -> floor $ dpi * a :: Integer + (Percent _) -> 0 + (Em a) -> floor $ dpi * a * (11/64) :: Integer + where + dpi = fromIntegral $ writerDpi opts -- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000". -- Note: Dimensions in percentages are converted to the empty string. @@ -170,14 +217,8 @@ showInInch opts dim = showFl $ inInch opts dim -- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600". -- Note: Dimensions in percentages are converted to the empty string. showInPixel :: WriterOptions -> Dimension -> String -showInPixel opts dim = - case dim of - (Pixel a) -> show a - (Centimeter a) -> show (floor $ dpi * a * 0.3937007874 :: Int) - (Inch a) -> show (floor $ dpi * a :: Int) - (Percent _) -> "" - where - dpi = fromIntegral $ writerDpi opts +showInPixel _ (Percent _) = "" +showInPixel opts dim = show $ inPixel opts dim -- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm") numUnit :: String -> Maybe (Double, String) @@ -187,6 +228,17 @@ numUnit s = Just n -> Just (n, unit) Nothing -> Nothing +-- | Scale a dimension by a factor. +scaleDimension :: Double -> Dimension -> Dimension +scaleDimension factor dim = + case dim of + Pixel x -> Pixel (round $ factor * fromIntegral x) + Centimeter x -> Centimeter (factor * x) + Millimeter x -> Millimeter (factor * x) + Inch x -> Inch (factor * x) + Percent x -> Percent (factor * x) + Em x -> Em (factor * x) + -- | Read a Dimension from an Attr attribute. -- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc. dimension :: Direction -> Attr -> Maybe Dimension @@ -195,20 +247,21 @@ dimension dir (_, _, kvs) = Width -> extractDim "width" Height -> extractDim "height" where - extractDim key = - case lookup key kvs of - Just str -> - case numUnit str of - Just (num, unit) -> toDim num unit - Nothing -> Nothing - Nothing -> Nothing + extractDim key = lookup key kvs >>= lengthToDim + +lengthToDim :: String -> Maybe Dimension +lengthToDim s = numUnit s >>= uncurry toDim + where toDim a "cm" = Just $ Centimeter a - toDim a "mm" = Just $ Centimeter (a / 10) + toDim a "mm" = Just $ Millimeter a toDim a "in" = Just $ Inch a toDim a "inch" = Just $ Inch a toDim a "%" = Just $ Percent a toDim a "px" = Just $ Pixel (floor a::Integer) toDim a "" = Just $ Pixel (floor a::Integer) + toDim a "pt" = Just $ Inch (a / 72) + toDim a "pc" = Just $ Inch (a / 6) + toDim a "em" = Just $ Em a toDim _ _ = Nothing epsSize :: ByteString -> Maybe ImageSize @@ -218,7 +271,7 @@ epsSize img = do case ls' of [] -> mzero (x:_) -> case B.words x of - (_:_:_:ux:uy:[]) -> do + [_, _, _, ux, uy] -> do ux' <- safeRead $ B.unpack ux uy' <- safeRead $ B.unpack uy return ImageSize{ @@ -228,6 +281,29 @@ epsSize img = do , dpiY = 72 } _ -> mzero +pdfSize :: ByteString -> Maybe ImageSize +pdfSize img = + case dropWhile (\l -> not (l == "stream" || + "/MediaBox" `B.isPrefixOf` l)) (B.lines img) of + (x:_) + | "/MediaBox" `B.isPrefixOf` x + -> case B.words . B.takeWhile (/=']') + . B.drop 1 + . B.dropWhile (/='[') + $ x of + [x1, y1, x2, y2] -> do + x1' <- safeRead $ B.unpack x1 + x2' <- safeRead $ B.unpack x2 + y1' <- safeRead $ B.unpack y1 + y2' <- safeRead $ B.unpack y2 + return ImageSize{ + pxX = x2' - x1' + , pxY = y2' - y1' + , dpiX = 72 + , dpiY = 72 } + _ -> mzero + _ -> mzero + pngSize :: ByteString -> Maybe ImageSize pngSize img = do let (h, rest) = B.splitAt 8 img @@ -236,27 +312,26 @@ pngSize img = do let (i, rest') = B.splitAt 4 $ B.drop 4 rest guard $ i == "MHDR" || i == "IHDR" let (sizes, rest'') = B.splitAt 8 rest' - (x,y) <- case map fromIntegral $ unpack $ sizes of + (x,y) <- case map fromIntegral $unpack sizes of ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return - ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, - (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> (hush . Left) "PNG parse error" + (shift w1 24 + shift w2 16 + shift w3 8 + w4, + shift h1 24 + shift h2 16 + shift h3 8 + h4) + _ -> Nothing -- "PNG parse error" let (dpix, dpiy) = findpHYs rest'' - return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } + return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } findpHYs :: ByteString -> (Integer, Integer) -findpHYs x = - if B.null x || "IDAT" `B.isPrefixOf` x - then (72,72) -- default, no pHYs - else if "pHYs" `B.isPrefixOf` x - then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral - $ unpack $ B.take 9 $ B.drop 4 x - factor = if u == 1 -- dots per meter - then \z -> z * 254 `div` 10000 - else const 72 - in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4, - factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 ) - else findpHYs $ B.drop 1 x -- read another byte +findpHYs x + | B.null x || "IDAT" `B.isPrefixOf` x = (72,72) + | "pHYs" `B.isPrefixOf` x = + let [x1,x2,x3,x4,y1,y2,y3,y4,u] = + map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x + factor = if u == 1 -- dots per meter + then \z -> z * 254 `div` 10000 + else const 72 + in ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4, + factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 ) + | otherwise = findpHYs $ B.drop 1 x -- read another byte gifSize :: ByteString -> Maybe ImageSize gifSize img = do @@ -269,7 +344,55 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> (hush . Left) "GIF parse error" + _ -> Nothing -- "GIF parse error" + +svgSize :: WriterOptions -> ByteString -> Maybe ImageSize +svgSize opts img = do + doc <- Xml.parseXMLDoc $ UTF8.toString img + let dpi = fromIntegral $ writerDpi opts + let dirToInt dir = do + dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim + return $ inPixel opts dim + w <- dirToInt "width" + h <- dirToInt "height" + return ImageSize { + pxX = w + , pxY = h + , dpiX = dpi + , dpiY = dpi + } + +emfSize :: ByteString -> Maybe ImageSize +emfSize img = + let + parseheader = runGetOrFail $ do + skip 0x18 -- 0x00 + frameL <- getWord32le -- 0x18 measured in 1/100 of a millimetre + frameT <- getWord32le -- 0x1C + frameR <- getWord32le -- 0x20 + frameB <- getWord32le -- 0x24 + skip 0x20 -- 0x28 + deviceX <- getWord32le -- 0x48 pixels of reference device + deviceY <- getWord32le -- 0x4C + mmX <- getWord32le -- 0x50 real mm of reference device (always 320*240?) + mmY <- getWord32le -- 0x58 + -- end of header + let + w = (deviceX * (frameR - frameL)) `quot` (mmX * 100) + h = (deviceY * (frameB - frameT)) `quot` (mmY * 100) + dpiW = (deviceX * 254) `quot` (mmX * 10) + dpiH = (deviceY * 254) `quot` (mmY * 10) + return $ ImageSize + { pxX = fromIntegral w + , pxY = fromIntegral h + , dpiX = fromIntegral dpiW + , dpiY = fromIntegral dpiH + } + in + case parseheader . BL.fromStrict $ img of + Left _ -> Nothing + Right (_, _, size) -> Just size + jpegSize :: ByteString -> Either String ImageSize jpegSize img = @@ -284,16 +407,16 @@ jpegSize img = jfifSize :: ByteString -> Either String ImageSize jfifSize rest = let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral - $ unpack $ B.take 5 $ B.drop 9 $ rest + $ unpack $ B.take 5 $B.drop 9 rest factor = case dpiDensity of 1 -> id - 2 -> \x -> (x * 254 `div` 10) + 2 -> \x -> x * 254 `div` 10 _ -> const 72 dpix = factor (shift dpix1 8 + dpix2) dpiy = factor (shift dpiy1 8 + dpiy2) in case findJfifSize rest of Left msg -> Left msg - Right (w,h) -> Right $ ImageSize { pxX = w + Right (w,h) ->Right ImageSize { pxX = w , pxY = h , dpiX = dpix , dpiY = dpiy } @@ -327,7 +450,7 @@ runGet' p bl = exifSize :: ByteString -> Either String ImageSize -exifSize bs = runGet' header $ bl +exifSize bs =runGet' header bl where bl = BL.fromChunks [bs] header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do @@ -397,14 +520,13 @@ exifHeader hdr = do Left msg -> throwError msg Right x -> return x return (tag, payload) - entries <- sequence $ replicate (fromIntegral numentries) ifdEntry + entries <- replicateM (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset') -> do pos <- lift bytesRead lift $ skip (fromIntegral offset' - (fromIntegral pos - 8)) numsubentries <- lift getWord16 - sequence $ - replicate (fromIntegral numsubentries) ifdEntry + replicateM (fromIntegral numsubentries) ifdEntry _ -> return [] let allentries = entries ++ subentries (wdth, hght) <- case (lookup ExifImageWidth allentries, @@ -415,13 +537,13 @@ exifHeader hdr = do -- we return a default width and height when -- the exif header doesn't contain these let resfactor = case lookup ResolutionUnit allentries of - Just (UnsignedShort 1) -> (100 / 254) + Just (UnsignedShort 1) -> 100 / 254 _ -> 1 let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) $ lookup XResolution allentries let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) $ lookup YResolution allentries - return $ ImageSize{ + return ImageSize{ pxX = wdth , pxY = hght , dpiX = xres diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs new file mode 100644 index 000000000..b22c08467 --- /dev/null +++ b/src/Text/Pandoc/Logging.hs @@ -0,0 +1,342 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2016-17 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Logging + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This module provides data types and functions for warnings +and info messages. + +-} +module Text.Pandoc.Logging ( + Verbosity(..) + , LogMessage(..) + , encodeLogMessages + , showLogMessage + , messageVerbosity + ) where + +import Control.Monad (mzero) +import Data.Aeson +import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty', + keyOrder) +import qualified Data.ByteString.Lazy as BL +import Data.Data (Data, toConstr) +import Data.List (isSuffixOf) +import qualified Data.Text as Text +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Text.Pandoc.Definition +import Text.Parsec.Pos + +-- | Verbosity level. +data Verbosity = ERROR | WARNING | INFO + deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic) + +instance ToJSON Verbosity where + toJSON x = toJSON (show x) +instance FromJSON Verbosity where + parseJSON (String t) = + case t of + "ERROR" -> return ERROR + "WARNING" -> return WARNING + "INFO" -> return INFO + _ -> mzero + parseJSON _ = mzero + +data LogMessage = + SkippedContent String SourcePos + | CouldNotParseYamlMetadata String SourcePos + | DuplicateLinkReference String SourcePos + | DuplicateNoteReference String SourcePos + | NoteDefinedButNotUsed String SourcePos + | DuplicateIdentifier String SourcePos + | ReferenceNotFound String SourcePos + | CircularReference String SourcePos + | UndefinedToggle String SourcePos + | ParsingUnescaped String SourcePos + | CouldNotLoadIncludeFile String SourcePos + | MacroAlreadyDefined String SourcePos + | InlineNotRendered Inline + | BlockNotRendered Block + | DocxParserWarning String + | CouldNotFetchResource String String + | CouldNotDetermineImageSize String String + | CouldNotConvertImage String String + | CouldNotDetermineMimeType String + | CouldNotConvertTeXMath String String + | CouldNotParseCSS String + | Fetching String + | Extracting String + | NoTitleElement String + | NoLangSpecified + | InvalidLang String + | CouldNotHighlight String + | MissingCharacter String + | Deprecated String String + | NoTranslation String + | CouldNotLoadTranslations String String + deriving (Show, Eq, Data, Ord, Typeable, Generic) + +instance ToJSON LogMessage where + toJSON x = object $ + "verbosity" .= toJSON (messageVerbosity x) : + "type" .= toJSON (show $ toConstr x) : + case x of + SkippedContent s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= sourceLine pos, + "column" .= sourceColumn pos] + CouldNotParseYamlMetadata s pos -> + ["message" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + DuplicateLinkReference s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + NoteDefinedButNotUsed s pos -> + ["key" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + DuplicateNoteReference s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + DuplicateIdentifier s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + ReferenceNotFound s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + CircularReference s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + UndefinedToggle s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + ParsingUnescaped s pos -> + ["contents" .= Text.pack s, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + CouldNotLoadIncludeFile fp pos -> + ["path" .= Text.pack fp, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + MacroAlreadyDefined name pos -> + ["name" .= Text.pack name, + "source" .= Text.pack (sourceName pos), + "line" .= toJSON (sourceLine pos), + "column" .= toJSON (sourceColumn pos)] + InlineNotRendered il -> + ["contents" .= toJSON il] + BlockNotRendered bl -> + ["contents" .= toJSON bl] + DocxParserWarning s -> + ["contents" .= Text.pack s] + CouldNotFetchResource fp s -> + ["path" .= Text.pack fp, + "message" .= Text.pack s] + CouldNotDetermineImageSize fp s -> + ["path" .= Text.pack fp, + "message" .= Text.pack s] + CouldNotConvertImage fp s -> + ["path" .= Text.pack fp, + "message" .= Text.pack s] + CouldNotDetermineMimeType fp -> + ["path" .= Text.pack fp] + CouldNotConvertTeXMath s msg -> + ["contents" .= Text.pack s, + "message" .= Text.pack msg] + CouldNotParseCSS msg -> + ["message" .= Text.pack msg] + Fetching fp -> + ["path" .= Text.pack fp] + Extracting fp -> + ["path" .= Text.pack fp] + NoTitleElement fallback -> + ["fallback" .= Text.pack fallback] + NoLangSpecified -> [] + InvalidLang s -> + ["lang" .= Text.pack s] + CouldNotHighlight msg -> + ["message" .= Text.pack msg] + MissingCharacter msg -> + ["message" .= Text.pack msg] + Deprecated thing msg -> + ["thing" .= Text.pack thing, + "message" .= Text.pack msg] + NoTranslation term -> + ["term" .= Text.pack term] + CouldNotLoadTranslations lang msg -> + ["lang" .= Text.pack lang, + "message" .= Text.pack msg] + + +showPos :: SourcePos -> String +showPos pos = sn ++ "line " ++ + show (sourceLine pos) ++ " column " ++ show (sourceColumn pos) + where sn = if sourceName pos == "source" || sourceName pos == "" + then "" + else sourceName pos ++ " " + +encodeLogMessages :: [LogMessage] -> BL.ByteString +encodeLogMessages ms = + encodePretty' defConfig{ confCompare = + keyOrder [ "type", "verbosity", "contents", "message", "path", + "source", "line", "column" ] } ms + +showLogMessage :: LogMessage -> String +showLogMessage msg = + case msg of + SkippedContent s pos -> + "Skipped '" ++ s ++ "' at " ++ showPos pos + CouldNotParseYamlMetadata s pos -> + "Could not parse YAML metadata at " ++ showPos pos ++ + if null s then "" else ": " ++ s + DuplicateLinkReference s pos -> + "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos + DuplicateNoteReference s pos -> + "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos + NoteDefinedButNotUsed s pos -> + "Note with key '" ++ s ++ "' defined at " ++ showPos pos ++ + " but not used." + DuplicateIdentifier s pos -> + "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos + ReferenceNotFound s pos -> + "Reference not found for '" ++ s ++ "' at " ++ showPos pos + CircularReference s pos -> + "Circular reference '" ++ s ++ "' at " ++ showPos pos + UndefinedToggle s pos -> + "Undefined toggle '" ++ s ++ "' at " ++ showPos pos + ParsingUnescaped s pos -> + "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos + CouldNotLoadIncludeFile fp pos -> + "Could not load include file '" ++ fp ++ "' at " ++ showPos pos + MacroAlreadyDefined name pos -> + "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos + InlineNotRendered il -> + "Not rendering " ++ show il + BlockNotRendered bl -> + "Not rendering " ++ show bl + DocxParserWarning s -> + "Docx parser warning: " ++ s + CouldNotFetchResource fp s -> + "Could not fetch resource '" ++ fp ++ "'" ++ + if null s then "" else ": " ++ s + CouldNotDetermineImageSize fp s -> + "Could not determine image size for '" ++ fp ++ "'" ++ + if null s then "" else ": " ++ s + CouldNotConvertImage fp s -> + "Could not convert image '" ++ fp ++ "'" ++ + if null s then "" else ": " ++ s + CouldNotDetermineMimeType fp -> + "Could not determine mime type for '" ++ fp ++ "'" + CouldNotConvertTeXMath s m -> + "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++ + if null m then "" else ':' : '\n' : m + CouldNotParseCSS m -> + "Could not parse CSS" ++ if null m then "" else ':' : '\n' : m + Fetching fp -> + "Fetching " ++ fp ++ "..." + Extracting fp -> + "Extracting " ++ fp ++ "..." + NoTitleElement fallback -> + "This document format requires a nonempty <title> element.\n" ++ + "Please specify either 'title' or 'pagetitle' in the metadata.\n" ++ + "Falling back to '" ++ fallback ++ "'" + NoLangSpecified -> + "No value for 'lang' was specified in the metadata.\n" ++ + "It is recommended that lang be specified for this format." + InvalidLang s -> + "Invalid 'lang' value '" ++ s ++ "'.\n" ++ + "Use an IETF language tag like 'en-US'." + CouldNotHighlight m -> + "Could not highlight code block:\n" ++ m + MissingCharacter m -> + "Missing character: " ++ m + Deprecated t m -> + "Deprecated: " ++ t ++ + if null m + then "" + else ". " ++ m + NoTranslation t -> + "The term " ++ t ++ " has no translation defined." + CouldNotLoadTranslations lang m -> + "Could not load translations for " ++ lang ++ + if null m then "" else '\n' : m + +messageVerbosity:: LogMessage -> Verbosity +messageVerbosity msg = + case msg of + SkippedContent{} -> INFO + CouldNotParseYamlMetadata{} -> WARNING + DuplicateLinkReference{} -> WARNING + DuplicateNoteReference{} -> WARNING + NoteDefinedButNotUsed{} -> WARNING + DuplicateIdentifier{} -> WARNING + ReferenceNotFound{} -> WARNING + CircularReference{} -> WARNING + UndefinedToggle{} -> WARNING + CouldNotLoadIncludeFile f _ + | ".sty" `isSuffixOf` f -> INFO + | otherwise -> WARNING + MacroAlreadyDefined{} -> WARNING + ParsingUnescaped{} -> INFO + InlineNotRendered{} -> INFO + BlockNotRendered{} -> INFO + DocxParserWarning{} -> INFO + CouldNotFetchResource{} -> WARNING + CouldNotDetermineImageSize{} -> WARNING + CouldNotConvertImage{} -> WARNING + CouldNotDetermineMimeType{} -> WARNING + CouldNotConvertTeXMath{} -> WARNING + CouldNotParseCSS{} -> WARNING + Fetching{} -> INFO + Extracting{} -> INFO + NoTitleElement{} -> WARNING + NoLangSpecified -> INFO + InvalidLang{} -> WARNING + CouldNotHighlight{} -> WARNING + MissingCharacter{} -> WARNING + Deprecated{} -> WARNING + NoTranslation{} -> WARNING + CouldNotLoadTranslations{} -> WARNING diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs new file mode 100644 index 000000000..79955509d --- /dev/null +++ b/src/Text/Pandoc/Lua.hs @@ -0,0 +1,83 @@ +{- +Copyright © 2017–2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Lua + Copyright : Copyright © 2017–2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Running pandoc Lua filters. +-} +module Text.Pandoc.Lua + ( LuaException (..) + , runLuaFilter + , runPandocLua + ) where + +import Control.Monad ((>=>)) +import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..), + Status (OK), ToLuaStack (push)) +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Lua.Filter (LuaFilter, walkMWithLuaFilter) +import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath) +import Text.Pandoc.Lua.Util (popValue) +import Text.Pandoc.Options (ReaderOptions) +import qualified Foreign.Lua as Lua + +-- | Run the Lua filter in @filterPath@ for a transformation to target +-- format @format@. Pandoc uses Lua init files to setup the Lua +-- interpreter. +runLuaFilter :: ReaderOptions -> FilePath -> String + -> Pandoc -> PandocIO (Either LuaException Pandoc) +runLuaFilter ropts filterPath format doc = + runPandocLua (runLuaFilter' ropts filterPath format doc) + +runLuaFilter' :: ReaderOptions -> FilePath -> String + -> Pandoc -> Lua Pandoc +runLuaFilter' ropts filterPath format pd = do + registerFormat + registerReaderOptions + registerScriptPath filterPath + top <- Lua.gettop + stat <- Lua.dofile filterPath + if stat /= OK + then do + luaErrMsg <- popValue + Lua.throwLuaError luaErrMsg + else do + newtop <- Lua.gettop + -- Use the returned filters, or the implicitly defined global filter if + -- nothing was returned. + luaFilters <- if newtop - top >= 1 + then peek (-1) + else Lua.getglobal "_G" *> fmap (:[]) popValue + runAll luaFilters pd + where + registerFormat = do + push format + Lua.setglobal "FORMAT" + + registerReaderOptions = do + push ropts + Lua.setglobal "PANDOC_READER_OPTIONS" + +runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc +runAll = foldr ((>=>) . walkMWithLuaFilter) return diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs new file mode 100644 index 000000000..cc2b9d47e --- /dev/null +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -0,0 +1,170 @@ +{-# LANGUAGE FlexibleContexts #-} + +module Text.Pandoc.Lua.Filter ( LuaFilterFunction + , LuaFilter + , tryFilter + , runFilterFunction + , walkMWithLuaFilter + , walkInlines + , walkBlocks + , blockElementNames + , inlineElementNames + ) where +import Control.Monad (mplus, unless, when, (>=>)) +import Control.Monad.Catch (finally) +import Text.Pandoc.Definition +import Data.Foldable (foldrM) +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Foreign.Lua as Lua +import Foreign.Lua (FromLuaStack (peek), Lua, StackIndex, + Status (OK), ToLuaStack (push)) +import Text.Pandoc.Walk (walkM, Walkable) +import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, + showConstr, toConstr, tyconUQname) +import Text.Pandoc.Lua.StackInstances() +import Text.Pandoc.Lua.Util (typeCheck) + +type FunctionMap = Map String LuaFilterFunction + +newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int } + +instance ToLuaStack LuaFilterFunction where + push = pushFilterFunction + +instance FromLuaStack LuaFilterFunction where + peek = registerFilterFunction + +newtype LuaFilter = LuaFilter FunctionMap + +instance FromLuaStack LuaFilter where + peek idx = + let constrs = metaFilterName : pandocFilterNames + ++ blockElementNames + ++ inlineElementNames + fn c acc = do + Lua.getfield idx c + filterFn <- Lua.tryLua (peek (-1)) + Lua.pop 1 + return $ case filterFn of + Left _ -> acc + Right f -> (c, f) : acc + in LuaFilter . Map.fromList <$> foldrM fn [] constrs + +-- | Push the filter function to the top of the stack. +pushFilterFunction :: LuaFilterFunction -> Lua () +pushFilterFunction lf = + -- The function is stored in a lua registry table, retrieve it from there. + Lua.rawgeti Lua.registryindex (functionIndex lf) + +registerFilterFunction :: StackIndex -> Lua LuaFilterFunction +registerFilterFunction idx = do + isFn <- Lua.isfunction idx + unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx + Lua.pushvalue idx + refIdx <- Lua.ref Lua.registryindex + return $ LuaFilterFunction refIdx + +elementOrList :: FromLuaStack a => a -> Lua [a] +elementOrList x = do + let topOfStack = Lua.stackTop + elementUnchanged <- Lua.isnil topOfStack + if elementUnchanged + then [x] <$ Lua.pop 1 + else do + mbres <- Lua.peekEither topOfStack + case mbres of + Right res -> [res] <$ Lua.pop 1 + Left _ -> do + typeCheck Lua.stackTop Lua.TypeTable + Lua.toList topOfStack `finally` Lua.pop 1 + +-- | Try running a filter for the given element +tryFilter :: (Data a, FromLuaStack a, ToLuaStack a) + => LuaFilter -> a -> Lua [a] +tryFilter (LuaFilter fnMap) x = + let filterFnName = showConstr (toConstr x) + catchAllName = tyconUQname $ dataTypeName (dataTypeOf x) + in + case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of + Just fn -> runFilterFunction fn x *> elementOrList x + Nothing -> return [x] + +-- | Push a value to the stack via a lua filter function. The filter function is +-- called with given element as argument and is expected to return an element. +-- Alternatively, the function can return nothing or nil, in which case the +-- element is left unchanged. +runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua () +runFilterFunction lf x = do + pushFilterFunction lf + push x + z <- Lua.pcall 1 1 Nothing + when (z /= OK) $ do + let addPrefix = ("Error while running filter function: " ++) + Lua.throwTopMessageAsError' addPrefix + +walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc +walkMWithLuaFilter f = + walkInlines f >=> walkBlocks f >=> walkMeta f >=> walkPandoc f + +mconcatMapM :: (Monad m, Functor m) => (a -> m [a]) -> [a] -> m [a] +mconcatMapM f = fmap mconcat . mapM f + +hasOneOf :: LuaFilter -> [String] -> Bool +hasOneOf (LuaFilter fnMap) = any (\k -> Map.member k fnMap) + +walkInlines :: Walkable [Inline] a => LuaFilter -> a -> Lua a +walkInlines f = + if f `hasOneOf` inlineElementNames + then walkM (mconcatMapM (tryFilter f :: Inline -> Lua [Inline])) + else return + +walkBlocks :: Walkable [Block] a => LuaFilter -> a -> Lua a +walkBlocks f = + if f `hasOneOf` blockElementNames + then walkM (mconcatMapM (tryFilter f :: Block -> Lua [Block])) + else return + +walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc +walkMeta (LuaFilter fnMap) = + case Map.lookup "Meta" fnMap of + Just fn -> walkM (\(Pandoc meta blocks) -> do + meta' <- runFilterFunction fn meta *> singleElement meta + return $ Pandoc meta' blocks) + Nothing -> return + +walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc +walkPandoc (LuaFilter fnMap) = + case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of + Just fn -> \x -> runFilterFunction fn x *> singleElement x + Nothing -> return + +constructorsFor :: DataType -> [String] +constructorsFor x = map show (dataTypeConstrs x) + +inlineElementNames :: [String] +inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str [])) + +blockElementNames :: [String] +blockElementNames = "Block" : constructorsFor (dataTypeOf (Para [])) + +metaFilterName :: String +metaFilterName = "Meta" + +pandocFilterNames :: [String] +pandocFilterNames = ["Pandoc", "Doc"] + +singleElement :: FromLuaStack a => a -> Lua a +singleElement x = do + elementUnchanged <- Lua.isnil (-1) + if elementUnchanged + then x <$ Lua.pop 1 + else do + mbres <- Lua.peekEither (-1) + case mbres of + Right res -> res <$ Lua.pop 1 + Left err -> do + Lua.pop 1 + Lua.throwLuaError $ + "Error while trying to get a filter's return " ++ + "value from lua stack.\n" ++ err diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs new file mode 100644 index 000000000..8fa228837 --- /dev/null +++ b/src/Text/Pandoc/Lua/Init.hs @@ -0,0 +1,117 @@ +{- +Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Lua + Copyright : Copyright © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Functions to initialize the Lua interpreter. +-} +module Text.Pandoc.Lua.Init + ( LuaException (..) + , LuaPackageParams (..) + , runPandocLua + , initLuaState + , luaPackageParams + , registerScriptPath + ) where + +import Control.Monad.Trans (MonadIO (..)) +import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) +import Data.IORef (newIORef, readIORef) +import Data.Version (Version (versionBranch)) +import Foreign.Lua (Lua, LuaException (..)) +import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) +import Paths_pandoc (version) +import Text.Pandoc.Class (PandocIO, getCommonState, getUserDataDir, getMediaBag, + setMediaBag) +import Text.Pandoc.Definition (pandocTypesVersion) +import Text.Pandoc.Lua.Packages (LuaPackageParams (..), + installPandocPackageSearcher) +import Text.Pandoc.Lua.Util (loadScriptFromDataDir) + +import qualified Foreign.Lua as Lua +import qualified Foreign.Lua.Module.Text as Lua +import qualified Text.Pandoc.Definition as Pandoc + +-- | Run the lua interpreter, using pandoc's default way of environment +-- initalization. +runPandocLua :: Lua a -> PandocIO (Either LuaException a) +runPandocLua luaOp = do + luaPkgParams <- luaPackageParams + enc <- liftIO $ getForeignEncoding <* setForeignEncoding utf8 + res <- liftIO $ Lua.runLuaEither (initLuaState luaPkgParams *> luaOp) + liftIO $ setForeignEncoding enc + newMediaBag <- liftIO (readIORef (luaPkgMediaBag luaPkgParams)) + setMediaBag newMediaBag + return res + +-- | Generate parameters required to setup pandoc's lua environment. +luaPackageParams :: PandocIO LuaPackageParams +luaPackageParams = do + commonState <- getCommonState + datadir <- getUserDataDir + mbRef <- liftIO . newIORef =<< getMediaBag + return LuaPackageParams + { luaPkgCommonState = commonState + , luaPkgDataDir = datadir + , luaPkgMediaBag = mbRef + } + +-- Initialize the lua state with all required values +initLuaState :: LuaPackageParams -> Lua () +initLuaState luaPkgParams = do + Lua.openlibs + Lua.preloadTextModule "text" + Lua.push (versionBranch version) + Lua.setglobal "PANDOC_VERSION" + Lua.push (versionBranch pandocTypesVersion) + Lua.setglobal "PANDOC_API_VERSION" + installPandocPackageSearcher luaPkgParams + loadScriptFromDataDir (luaPkgDataDir luaPkgParams) "init.lua" + putConstructorsInRegistry + +registerScriptPath :: FilePath -> Lua () +registerScriptPath fp = do + Lua.push fp + Lua.setglobal "PANDOC_SCRIPT_FILE" + +putConstructorsInRegistry :: Lua () +putConstructorsInRegistry = do + Lua.getglobal "pandoc" + constrsToReg $ Pandoc.Pandoc mempty mempty + constrsToReg $ Pandoc.Str mempty + constrsToReg $ Pandoc.Para mempty + constrsToReg $ Pandoc.Meta mempty + constrsToReg $ Pandoc.MetaList mempty + constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0 + putInReg "Attr" -- used for Attr type alias + Lua.pop 1 + where + constrsToReg :: Data a => a -> Lua () + constrsToReg = mapM_ (putInReg . showConstr) . dataTypeConstrs . dataTypeOf + + putInReg :: String -> Lua () + putInReg name = do + Lua.push ("pandoc." ++ name) -- name in registry + Lua.push name -- in pandoc module + Lua.rawget (Lua.nthFromTop 3) + Lua.rawset Lua.registryindex diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs new file mode 100644 index 000000000..7d942a452 --- /dev/null +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -0,0 +1,108 @@ +{- +Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Lua.Module.MediaBag + Copyright : Copyright © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +The lua module @pandoc.mediabag@. +-} +module Text.Pandoc.Lua.Module.MediaBag + ( pushModule + ) where + +import Control.Monad (zipWithM_) +import Data.IORef (IORef, modifyIORef', readIORef) +import Data.Maybe (fromMaybe) +import Foreign.Lua (Lua, NumResults, Optional, liftIO) +import Text.Pandoc.Class (CommonState (..), fetchItem, putCommonState, + runIOorExplode, setMediaBag) +import Text.Pandoc.Lua.StackInstances () +import Text.Pandoc.Lua.Util (addFunction) +import Text.Pandoc.MIME (MimeType) + +import qualified Data.ByteString.Lazy as BL +import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.MediaBag as MB + +-- +-- MediaBag submodule +-- +pushModule :: CommonState -> IORef MB.MediaBag -> Lua NumResults +pushModule commonState mediaBagRef = do + Lua.newtable + addFunction "insert" (insertMediaFn mediaBagRef) + addFunction "lookup" (lookupMediaFn mediaBagRef) + addFunction "list" (mediaDirectoryFn mediaBagRef) + addFunction "fetch" (fetch commonState mediaBagRef) + return 1 + +insertMediaFn :: IORef MB.MediaBag + -> FilePath + -> Optional MimeType + -> BL.ByteString + -> Lua NumResults +insertMediaFn mbRef fp optionalMime contents = do + liftIO . modifyIORef' mbRef $ + MB.insertMedia fp (Lua.fromOptional optionalMime) contents + return 0 + +lookupMediaFn :: IORef MB.MediaBag + -> FilePath + -> Lua NumResults +lookupMediaFn mbRef fp = do + res <- MB.lookupMedia fp <$> liftIO (readIORef mbRef) + case res of + Nothing -> Lua.pushnil *> return 1 + Just (mimeType, contents) -> do + Lua.push mimeType + Lua.push contents + return 2 + +mediaDirectoryFn :: IORef MB.MediaBag + -> Lua NumResults +mediaDirectoryFn mbRef = do + dirContents <- MB.mediaDirectory <$> liftIO (readIORef mbRef) + Lua.newtable + zipWithM_ addEntry [1..] dirContents + return 1 + where + addEntry :: Int -> (FilePath, MimeType, Int) -> Lua () + addEntry idx (fp, mimeType, contentLength) = do + Lua.newtable + Lua.push "path" *> Lua.push fp *> Lua.rawset (-3) + Lua.push "type" *> Lua.push mimeType *> Lua.rawset (-3) + Lua.push "length" *> Lua.push contentLength *> Lua.rawset (-3) + Lua.rawseti (-2) idx + +fetch :: CommonState + -> IORef MB.MediaBag + -> String + -> Lua NumResults +fetch commonState mbRef src = do + mediaBag <- liftIO $ readIORef mbRef + (bs, mimeType) <- liftIO . runIOorExplode $ do + putCommonState commonState + setMediaBag mediaBag + fetchItem src + Lua.push $ fromMaybe "" mimeType + Lua.push bs + return 2 -- returns 2 values: contents, mimetype diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs new file mode 100644 index 000000000..b9410a353 --- /dev/null +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -0,0 +1,134 @@ +{- +Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleContexts #-} +{- | + Module : Text.Pandoc.Lua.Module.Pandoc + Copyright : Copyright © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Pandoc module for lua. +-} +module Text.Pandoc.Lua.Module.Pandoc + ( pushModule + ) where + +import Control.Monad (when) +import Data.Default (Default (..)) +import Data.Maybe (fromMaybe) +import Data.Text (pack) +import Foreign.Lua (ToLuaStack, FromLuaStack, Lua, NumResults, Optional, liftIO) +import System.Exit (ExitCode (..)) +import Text.Pandoc.Class (runIO) +import Text.Pandoc.Definition (Block, Inline) +import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter) +import Text.Pandoc.Lua.StackInstances () +import Text.Pandoc.Lua.Util (addFunction, addValue, loadScriptFromDataDir) +import Text.Pandoc.Walk (Walkable) +import Text.Pandoc.Options (ReaderOptions (readerExtensions)) +import Text.Pandoc.Process (pipeProcess) +import Text.Pandoc.Readers (Reader (..), getReader) + +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BSL +import qualified Foreign.Lua as Lua + +-- | Push the "pandoc" on the lua stack. Requires the `list` module to be +-- loaded. +pushModule :: Maybe FilePath -> Lua NumResults +pushModule datadir = do + loadScriptFromDataDir datadir "pandoc.lua" + addFunction "read" readDoc + addFunction "pipe" pipeFn + addFunction "walk_block" walkBlock + addFunction "walk_inline" walkInline + return 1 + +walkElement :: (ToLuaStack a, Walkable [Inline] a, Walkable [Block] a) + => a -> LuaFilter -> Lua a +walkElement x f = walkInlines f x >>= walkBlocks f + +walkInline :: Inline -> LuaFilter -> Lua Inline +walkInline = walkElement + +walkBlock :: Block -> LuaFilter -> Lua Block +walkBlock = walkElement + +readDoc :: String -> Optional String -> Lua NumResults +readDoc content formatSpecOrNil = do + let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) + case getReader formatSpec of + Left s -> Lua.raiseError s -- Unknown reader + Right (reader, es) -> + case reader of + TextReader r -> do + res <- liftIO $ runIO $ r def{ readerExtensions = es } (pack content) + case res of + Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc + Left s -> Lua.raiseError (show s) -- error while reading + _ -> Lua.raiseError "Only string formats are supported at the moment." + +-- | Pipes input through a command. +pipeFn :: String + -> [String] + -> BL.ByteString + -> Lua NumResults +pipeFn command args input = do + (ec, output) <- liftIO $ pipeProcess Nothing command args input + case ec of + ExitSuccess -> 1 <$ Lua.push output + ExitFailure n -> Lua.raiseError (PipeError command n output) + +data PipeError = PipeError + { pipeErrorCommand :: String + , pipeErrorCode :: Int + , pipeErrorOutput :: BL.ByteString + } + +instance FromLuaStack PipeError where + peek idx = + PipeError + <$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1) + <*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1) + <*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1) + +instance ToLuaStack PipeError where + push pipeErr = do + Lua.newtable + addValue "command" (pipeErrorCommand pipeErr) + addValue "error_code" (pipeErrorCode pipeErr) + addValue "output" (pipeErrorOutput pipeErr) + pushPipeErrorMetaTable + Lua.setmetatable (-2) + where + pushPipeErrorMetaTable :: Lua () + pushPipeErrorMetaTable = do + v <- Lua.newmetatable "pandoc pipe error" + when v $ addFunction "__tostring" pipeErrorMessage + + pipeErrorMessage :: PipeError -> Lua BL.ByteString + pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat + [ BSL.pack "Error running " + , BSL.pack cmd + , BSL.pack " (error code " + , BSL.pack $ show errorCode + , BSL.pack "): " + , if output == mempty then BSL.pack "<no output>" else output + ] diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs new file mode 100644 index 000000000..f8eb96dc7 --- /dev/null +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -0,0 +1,126 @@ +{- +Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Lua.Module.Utils + Copyright : Copyright © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Utility module for lua, exposing internal helper functions. +-} +module Text.Pandoc.Lua.Module.Utils + ( pushModule + ) where + +import Control.Applicative ((<|>)) +import Data.Default (def) +import Foreign.Lua (FromLuaStack, Lua, LuaInteger, NumResults) +import Text.Pandoc.Class (runIO, setUserDataDir) +import Text.Pandoc.Definition (Pandoc, Meta, MetaValue, Block, Inline) +import Text.Pandoc.Lua.StackInstances () +import Text.Pandoc.Lua.Util (addFunction, popValue) + +import qualified Data.Digest.Pure.SHA as SHA +import qualified Data.ByteString.Lazy as BSL +import qualified Foreign.Lua as Lua +import qualified Text.Pandoc.Filter.JSON as JSONFilter +import qualified Text.Pandoc.Shared as Shared + +-- | Push the "pandoc.utils" module to the lua stack. +pushModule :: Maybe FilePath -> Lua NumResults +pushModule mbDatadir = do + Lua.newtable + addFunction "hierarchicalize" hierarchicalize + addFunction "normalize_date" normalizeDate + addFunction "run_json_filter" (runJSONFilter mbDatadir) + addFunction "sha1" sha1 + addFunction "stringify" stringify + addFunction "to_roman_numeral" toRomanNumeral + return 1 + +-- | Convert list of Pandoc blocks into (hierarchical) list of Elements. +hierarchicalize :: [Block] -> Lua [Shared.Element] +hierarchicalize = return . Shared.hierarchicalize + +-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We +-- limit years to the range 1601-9999 (ISO 8601 accepts greater than +-- or equal to 1583, but MS Word only accepts dates starting 1601). +-- Returns nil instead of a string if the conversion failed. +normalizeDate :: String -> Lua (Lua.Optional String) +normalizeDate = return . Lua.Optional . Shared.normalizeDate + +-- | Run a JSON filter on the given document. +runJSONFilter :: Maybe FilePath + -> Pandoc + -> FilePath + -> Lua.Optional [String] + -> Lua NumResults +runJSONFilter mbDatadir doc filterFile optArgs = do + args <- case Lua.fromOptional optArgs of + Just x -> return x + Nothing -> do + Lua.getglobal "FORMAT" + (:[]) <$> popValue + filterRes <- Lua.liftIO . runIO $ do + setUserDataDir mbDatadir + JSONFilter.apply def args filterFile doc + case filterRes of + Left err -> Lua.raiseError (show err) + Right d -> (1 :: NumResults) <$ Lua.push d + +-- | Calculate the hash of the given contents. +sha1 :: BSL.ByteString + -> Lua String +sha1 = return . SHA.showDigest . SHA.sha1 + +-- | Convert pandoc structure to a string with formatting removed. +-- Footnotes are skipped (since we don't want their contents in link +-- labels). +stringify :: AstElement -> Lua String +stringify el = return $ case el of + PandocElement pd -> Shared.stringify pd + InlineElement i -> Shared.stringify i + BlockElement b -> Shared.stringify b + MetaElement m -> Shared.stringify m + MetaValueElement m -> Shared.stringify m + +data AstElement + = PandocElement Pandoc + | MetaElement Meta + | BlockElement Block + | InlineElement Inline + | MetaValueElement MetaValue + deriving (Show) + +instance FromLuaStack AstElement where + peek idx = do + res <- Lua.tryLua $ (PandocElement <$> Lua.peek idx) + <|> (InlineElement <$> Lua.peek idx) + <|> (BlockElement <$> Lua.peek idx) + <|> (MetaElement <$> Lua.peek idx) + <|> (MetaValueElement <$> Lua.peek idx) + case res of + Right x -> return x + Left _ -> Lua.throwLuaError + "Expected an AST element, but could not parse value as such." + +-- | Convert a number < 4000 to uppercase roman numeral. +toRomanNumeral :: LuaInteger -> Lua String +toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs new file mode 100644 index 000000000..1e6ff22fe --- /dev/null +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -0,0 +1,115 @@ +{- +Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} +{- | + Module : Text.Pandoc.Lua.Packages + Copyright : Copyright © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Pandoc module for lua. +-} +module Text.Pandoc.Lua.Packages + ( LuaPackageParams (..) + , installPandocPackageSearcher + ) where + +import Control.Monad (forM_) +import Data.ByteString.Char8 (unpack) +import Data.IORef (IORef) +import Foreign.Lua (Lua, NumResults, liftIO) +import Text.Pandoc.Class (CommonState, readDataFile, runIO, setUserDataDir) +import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Lua.Util (dostring') + +import qualified Foreign.Lua as Lua +import Text.Pandoc.Lua.Module.Pandoc as Pandoc +import Text.Pandoc.Lua.Module.MediaBag as MediaBag +import Text.Pandoc.Lua.Module.Utils as Utils + +-- | Parameters used to create lua packages/modules. +data LuaPackageParams = LuaPackageParams + { luaPkgCommonState :: CommonState + , luaPkgDataDir :: Maybe FilePath + , luaPkgMediaBag :: IORef MediaBag + } + +-- | Insert pandoc's package loader as the first loader, making it the default. +installPandocPackageSearcher :: LuaPackageParams -> Lua () +installPandocPackageSearcher luaPkgParams = do + luaVersion <- Lua.getglobal "_VERSION" *> Lua.peek (-1) + if luaVersion == "Lua 5.1" + then Lua.getglobal' "package.loaders" + else Lua.getglobal' "package.searchers" + shiftArray + Lua.pushHaskellFunction (pandocPackageSearcher luaPkgParams) + Lua.wrapHaskellFunction + Lua.rawseti (-2) 1 + Lua.pop 1 -- remove 'package.searchers' from stack + where + shiftArray = forM_ [4, 3, 2, 1] $ \i -> do + Lua.rawgeti (-1) i + Lua.rawseti (-2) (i + 1) + +-- | Load a pandoc module. +pandocPackageSearcher :: LuaPackageParams -> String -> Lua NumResults +pandocPackageSearcher luaPkgParams pkgName = + case pkgName of + "pandoc" -> let datadir = luaPkgDataDir luaPkgParams + in pushWrappedHsFun (Pandoc.pushModule datadir) + "pandoc.mediabag" -> let st = luaPkgCommonState luaPkgParams + mbRef = luaPkgMediaBag luaPkgParams + in pushWrappedHsFun (MediaBag.pushModule st mbRef) + "pandoc.utils" -> let datadirMb = luaPkgDataDir luaPkgParams + in pushWrappedHsFun (Utils.pushModule datadirMb) + _ -> searchPureLuaLoader + where + pushWrappedHsFun f = do + Lua.pushHaskellFunction f + Lua.wrapHaskellFunction + return 1 + searchPureLuaLoader = do + let filename = pkgName ++ ".lua" + modScript <- liftIO (dataDirScript (luaPkgDataDir luaPkgParams) filename) + case modScript of + Just script -> pushWrappedHsFun (loadStringAsPackage pkgName script) + Nothing -> do + Lua.push ("no file '" ++ filename ++ "' in pandoc's datadir") + return 1 + +loadStringAsPackage :: String -> String -> Lua NumResults +loadStringAsPackage pkgName script = do + status <- dostring' script + if status == Lua.OK + then return (1 :: NumResults) + else do + msg <- Lua.peek (-1) <* Lua.pop 1 + Lua.push ("Error while loading ``" ++ pkgName ++ "`.\n" ++ msg) + Lua.lerror + return (2 :: NumResults) + +-- | Get the string representation of the pandoc module +dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe String) +dataDirScript datadir moduleFile = do + res <- runIO $ setUserDataDir datadir >> readDataFile moduleFile + return $ case res of + Left _ -> Nothing + Right s -> Just (unpack s) diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs new file mode 100644 index 000000000..7e0dc20c4 --- /dev/null +++ b/src/Text/Pandoc/Lua/StackInstances.hs @@ -0,0 +1,376 @@ +{- +Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu> + 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | + Module : Text.Pandoc.Lua.StackInstances + Copyright : © 2012-2018 John MacFarlane + © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +StackValue instances for pandoc types. +-} +module Text.Pandoc.Lua.StackInstances () where + +import Control.Applicative ((<|>)) +import Control.Monad (when) +import Control.Monad.Catch (finally) +import Data.Data (showConstr, toConstr) +import Data.Foldable (forM_) +import Foreign.Lua (FromLuaStack (peek), Lua, LuaInteger, LuaNumber, StackIndex, + ToLuaStack (push), Type (..), throwLuaError, tryLua) +import Text.Pandoc.Definition +import Text.Pandoc.Extensions (Extensions) +import Text.Pandoc.Lua.Util (getTable, getTag, pushViaConstructor, typeCheck) +import Text.Pandoc.Options (ReaderOptions (..), TrackChanges) +import Text.Pandoc.Shared (Element (Blk, Sec), safeRead) + +import qualified Foreign.Lua as Lua +import qualified Data.Set as Set +import qualified Text.Pandoc.Lua.Util as LuaUtil + +defineHowTo :: String -> Lua a -> Lua a +defineHowTo ctx op = op `Lua.modifyLuaError` (("Could not " ++ ctx ++ ": ") ++) + +instance ToLuaStack Pandoc where + push (Pandoc meta blocks) = + pushViaConstructor "Pandoc" blocks meta + +instance FromLuaStack Pandoc where + peek idx = defineHowTo "get Pandoc value" $ do + typeCheck idx Lua.TypeTable + blocks <- getTable idx "blocks" + meta <- Lua.getfield idx "meta" *> (Lua.peek Lua.stackTop `finally` Lua.pop 1) + return $ Pandoc meta blocks + +instance ToLuaStack Meta where + push (Meta mmap) = + pushViaConstructor "Meta" mmap +instance FromLuaStack Meta where + peek idx = defineHowTo "get Meta value" $ do + typeCheck idx Lua.TypeTable + Meta <$> peek idx + +instance ToLuaStack MetaValue where + push = pushMetaValue +instance FromLuaStack MetaValue where + peek = peekMetaValue + +instance ToLuaStack Block where + push = pushBlock + +instance FromLuaStack Block where + peek = peekBlock + +-- Inline +instance ToLuaStack Inline where + push = pushInline + +instance FromLuaStack Inline where + peek = peekInline + +-- Citation +instance ToLuaStack Citation where + push (Citation cid prefix suffix mode noteNum hash) = + pushViaConstructor "Citation" cid mode prefix suffix noteNum hash + +instance FromLuaStack Citation where + peek idx = do + id' <- getTable idx "id" + prefix <- getTable idx "prefix" + suffix <- getTable idx "suffix" + mode <- getTable idx "mode" + num <- getTable idx "note_num" + hash <- getTable idx "hash" + return $ Citation id' prefix suffix mode num hash + +instance ToLuaStack Alignment where + push = push . show +instance FromLuaStack Alignment where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack CitationMode where + push = push . show +instance FromLuaStack CitationMode where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack Format where + push (Format f) = push f +instance FromLuaStack Format where + peek idx = Format <$> peek idx + +instance ToLuaStack ListNumberDelim where + push = push . show +instance FromLuaStack ListNumberDelim where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack ListNumberStyle where + push = push . show +instance FromLuaStack ListNumberStyle where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack MathType where + push = push . show +instance FromLuaStack MathType where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack QuoteType where + push = push . show +instance FromLuaStack QuoteType where + peek idx = safeRead' =<< peek idx + +instance ToLuaStack Double where + push = push . (realToFrac :: Double -> LuaNumber) +instance FromLuaStack Double where + peek = fmap (realToFrac :: LuaNumber -> Double) . peek + +instance ToLuaStack Int where + push = push . (fromIntegral :: Int -> LuaInteger) +instance FromLuaStack Int where + peek = fmap (fromIntegral :: LuaInteger-> Int) . peek + +safeRead' :: Read a => String -> Lua a +safeRead' s = case safeRead s of + Nothing -> throwLuaError ("Could not read: " ++ s) + Just x -> return x + +-- | Push an meta value element to the top of the lua stack. +pushMetaValue :: MetaValue -> Lua () +pushMetaValue = \case + MetaBlocks blcks -> pushViaConstructor "MetaBlocks" blcks + MetaBool bool -> push bool + MetaInlines inlns -> pushViaConstructor "MetaInlines" inlns + MetaList metalist -> pushViaConstructor "MetaList" metalist + MetaMap metamap -> pushViaConstructor "MetaMap" metamap + MetaString str -> push str + +-- | Interpret the value at the given stack index as meta value. +peekMetaValue :: StackIndex -> Lua MetaValue +peekMetaValue idx = defineHowTo "get MetaValue" $ do + -- Get the contents of an AST element. + let elementContent :: FromLuaStack a => Lua a + elementContent = peek idx + luatype <- Lua.ltype idx + case luatype of + TypeBoolean -> MetaBool <$> peek idx + TypeString -> MetaString <$> peek idx + TypeTable -> do + tag <- tryLua $ getTag idx + case tag of + Right "MetaBlocks" -> MetaBlocks <$> elementContent + Right "MetaBool" -> MetaBool <$> elementContent + Right "MetaMap" -> MetaMap <$> elementContent + Right "MetaInlines" -> MetaInlines <$> elementContent + Right "MetaList" -> MetaList <$> elementContent + Right "MetaString" -> MetaString <$> elementContent + Right t -> throwLuaError ("Unknown meta tag: " ++ t) + Left _ -> do + -- no meta value tag given, try to guess. + len <- Lua.rawlen idx + if len <= 0 + then MetaMap <$> peek idx + else (MetaInlines <$> peek idx) + <|> (MetaBlocks <$> peek idx) + <|> (MetaList <$> peek idx) + _ -> throwLuaError "could not get meta value" + +-- | Push an block element to the top of the lua stack. +pushBlock :: Block -> Lua () +pushBlock = \case + BlockQuote blcks -> pushViaConstructor "BlockQuote" blcks + BulletList items -> pushViaConstructor "BulletList" items + CodeBlock attr code -> pushViaConstructor "CodeBlock" code (LuaAttr attr) + DefinitionList items -> pushViaConstructor "DefinitionList" items + Div attr blcks -> pushViaConstructor "Div" blcks (LuaAttr attr) + Header lvl attr inlns -> pushViaConstructor "Header" lvl inlns (LuaAttr attr) + HorizontalRule -> pushViaConstructor "HorizontalRule" + LineBlock blcks -> pushViaConstructor "LineBlock" blcks + OrderedList lstAttr list -> pushViaConstructor "OrderedList" list lstAttr + Null -> pushViaConstructor "Null" + Para blcks -> pushViaConstructor "Para" blcks + Plain blcks -> pushViaConstructor "Plain" blcks + RawBlock f cs -> pushViaConstructor "RawBlock" f cs + Table capt aligns widths headers rows -> + pushViaConstructor "Table" capt aligns widths headers rows + +-- | Return the value at the given index as block if possible. +peekBlock :: StackIndex -> Lua Block +peekBlock idx = defineHowTo "get Block value" $ do + typeCheck idx Lua.TypeTable + tag <- getTag idx + case tag of + "BlockQuote" -> BlockQuote <$> elementContent + "BulletList" -> BulletList <$> elementContent + "CodeBlock" -> withAttr CodeBlock <$> elementContent + "DefinitionList" -> DefinitionList <$> elementContent + "Div" -> withAttr Div <$> elementContent + "Header" -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst) + <$> elementContent + "HorizontalRule" -> return HorizontalRule + "LineBlock" -> LineBlock <$> elementContent + "OrderedList" -> uncurry OrderedList <$> elementContent + "Null" -> return Null + "Para" -> Para <$> elementContent + "Plain" -> Plain <$> elementContent + "RawBlock" -> uncurry RawBlock <$> elementContent + "Table" -> (\(capt, aligns, widths, headers, body) -> + Table capt aligns widths headers body) + <$> elementContent + _ -> throwLuaError ("Unknown block type: " ++ tag) + where + -- Get the contents of an AST element. + elementContent :: FromLuaStack a => Lua a + elementContent = getTable idx "c" + +-- | Push an inline element to the top of the lua stack. +pushInline :: Inline -> Lua () +pushInline = \case + Cite citations lst -> pushViaConstructor "Cite" lst citations + Code attr lst -> pushViaConstructor "Code" lst (LuaAttr attr) + Emph inlns -> pushViaConstructor "Emph" inlns + Image attr alt (src,tit) -> pushViaConstructor "Image" alt src tit (LuaAttr attr) + LineBreak -> pushViaConstructor "LineBreak" + Link attr lst (src,tit) -> pushViaConstructor "Link" lst src tit (LuaAttr attr) + Note blcks -> pushViaConstructor "Note" blcks + Math mty str -> pushViaConstructor "Math" mty str + Quoted qt inlns -> pushViaConstructor "Quoted" qt inlns + RawInline f cs -> pushViaConstructor "RawInline" f cs + SmallCaps inlns -> pushViaConstructor "SmallCaps" inlns + SoftBreak -> pushViaConstructor "SoftBreak" + Space -> pushViaConstructor "Space" + Span attr inlns -> pushViaConstructor "Span" inlns (LuaAttr attr) + Str str -> pushViaConstructor "Str" str + Strikeout inlns -> pushViaConstructor "Strikeout" inlns + Strong inlns -> pushViaConstructor "Strong" inlns + Subscript inlns -> pushViaConstructor "Subscript" inlns + Superscript inlns -> pushViaConstructor "Superscript" inlns + +-- | Return the value at the given index as inline if possible. +peekInline :: StackIndex -> Lua Inline +peekInline idx = defineHowTo "get Inline value" $ do + typeCheck idx Lua.TypeTable + tag <- getTag idx + case tag of + "Cite" -> uncurry Cite <$> elementContent + "Code" -> withAttr Code <$> elementContent + "Emph" -> Emph <$> elementContent + "Image" -> (\(LuaAttr attr, lst, tgt) -> Image attr lst tgt) + <$> elementContent + "Link" -> (\(LuaAttr attr, lst, tgt) -> Link attr lst tgt) + <$> elementContent + "LineBreak" -> return LineBreak + "Note" -> Note <$> elementContent + "Math" -> uncurry Math <$> elementContent + "Quoted" -> uncurry Quoted <$> elementContent + "RawInline" -> uncurry RawInline <$> elementContent + "SmallCaps" -> SmallCaps <$> elementContent + "SoftBreak" -> return SoftBreak + "Space" -> return Space + "Span" -> withAttr Span <$> elementContent + "Str" -> Str <$> elementContent + "Strikeout" -> Strikeout <$> elementContent + "Strong" -> Strong <$> elementContent + "Subscript" -> Subscript <$> elementContent + "Superscript"-> Superscript <$> elementContent + _ -> throwLuaError ("Unknown inline type: " ++ tag) + where + -- Get the contents of an AST element. + elementContent :: FromLuaStack a => Lua a + elementContent = getTable idx "c" + +withAttr :: (Attr -> a -> b) -> (LuaAttr, a) -> b +withAttr f (attributes, x) = f (fromLuaAttr attributes) x + +-- | Wrapper for Attr +newtype LuaAttr = LuaAttr { fromLuaAttr :: Attr } + +instance ToLuaStack LuaAttr where + push (LuaAttr (id', classes, kv)) = + pushViaConstructor "Attr" id' classes kv + +instance FromLuaStack LuaAttr where + peek idx = defineHowTo "get Attr value" (LuaAttr <$> peek idx) + +-- +-- Hierarchical elements +-- +instance ToLuaStack Element where + push (Blk blk) = push blk + push (Sec lvl num attr label contents) = do + Lua.newtable + LuaUtil.addValue "level" lvl + LuaUtil.addValue "numbering" num + LuaUtil.addValue "attr" (LuaAttr attr) + LuaUtil.addValue "label" label + LuaUtil.addValue "contents" contents + pushSecMetaTable + Lua.setmetatable (-2) + where + pushSecMetaTable :: Lua () + pushSecMetaTable = do + inexistant <- Lua.newmetatable "PandocElementSec" + when inexistant $ do + LuaUtil.addValue "t" "Sec" + Lua.push "__index" + Lua.pushvalue (-2) + Lua.rawset (-3) + + +-- +-- Reader Options +-- +instance ToLuaStack Extensions where + push exts = push (show exts) + +instance ToLuaStack TrackChanges where + push = push . showConstr . toConstr + +instance ToLuaStack a => ToLuaStack (Set.Set a) where + push set = do + Lua.newtable + forM_ set (`LuaUtil.addValue` True) + +instance ToLuaStack ReaderOptions where + push ro = do + let ReaderOptions + (extensions :: Extensions) + (standalone :: Bool) + (columns :: Int) + (tabStop :: Int) + (indentedCodeClasses :: [String]) + (abbreviations :: Set.Set String) + (defaultImageExtension :: String) + (trackChanges :: TrackChanges) + (stripComments :: Bool) + = ro + Lua.newtable + LuaUtil.addValue "extensions" extensions + LuaUtil.addValue "standalone" standalone + LuaUtil.addValue "columns" columns + LuaUtil.addValue "tabStop" tabStop + LuaUtil.addValue "indentedCodeClasses" indentedCodeClasses + LuaUtil.addValue "abbreviations" abbreviations + LuaUtil.addValue "defaultImageExtension" defaultImageExtension + LuaUtil.addValue "trackChanges" trackChanges + LuaUtil.addValue "stripComments" stripComments diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs new file mode 100644 index 000000000..b7149af39 --- /dev/null +++ b/src/Text/Pandoc/Lua/Util.hs @@ -0,0 +1,187 @@ +{- +Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu> + 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleInstances #-} +{- | + Module : Text.Pandoc.Lua.Util + Copyright : © 2012–2018 John MacFarlane, + © 2017-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + Stability : alpha + +Lua utility functions. +-} +module Text.Pandoc.Lua.Util + ( getTag + , getTable + , addValue + , addFunction + , getRawInt + , setRawInt + , addRawInt + , typeCheck + , raiseError + , popValue + , PushViaCall + , pushViaCall + , pushViaConstructor + , loadScriptFromDataDir + , dostring' + ) where + +import Control.Monad (when) +import Control.Monad.Catch (finally) +import Data.ByteString.Char8 (unpack) +import Foreign.Lua (FromLuaStack (..), NumResults, Lua, NumArgs, StackIndex, + ToLuaStack (..), ToHaskellFunction) +import Foreign.Lua.Api (Status, call, pop, rawget, rawgeti, rawset, rawseti) +import Text.Pandoc.Class (readDataFile, runIOorExplode, setUserDataDir) + +import qualified Foreign.Lua as Lua + +-- | Adjust the stack index, assuming that @n@ new elements have been pushed on +-- the stack. +adjustIndexBy :: StackIndex -> StackIndex -> StackIndex +adjustIndexBy idx n = + if idx < 0 + then idx - n + else idx + +-- | Get value behind key from table at given index. +getTable :: (ToLuaStack a, FromLuaStack b) => StackIndex -> a -> Lua b +getTable idx key = do + push key + rawget (idx `adjustIndexBy` 1) + popValue + +-- | Add a key-value pair to the table at the top of the stack. +addValue :: (ToLuaStack a, ToLuaStack b) => a -> b -> Lua () +addValue key value = do + push key + push value + rawset (-3) + +-- | Add a function to the table at the top of the stack, using the given name. +addFunction :: ToHaskellFunction a => String -> a -> Lua () +addFunction name fn = do + Lua.push name + Lua.pushHaskellFunction fn + Lua.wrapHaskellFunction + Lua.rawset (-3) + +-- | Get value behind key from table at given index. +getRawInt :: FromLuaStack a => StackIndex -> Int -> Lua a +getRawInt idx key = do + rawgeti idx key + popValue + +-- | Set numeric key/value in table at the given index +setRawInt :: ToLuaStack a => StackIndex -> Int -> a -> Lua () +setRawInt idx key value = do + push value + rawseti (idx `adjustIndexBy` 1) key + +-- | Set numeric key/value in table at the top of the stack. +addRawInt :: ToLuaStack a => Int -> a -> Lua () +addRawInt = setRawInt (-1) + +typeCheck :: StackIndex -> Lua.Type -> Lua () +typeCheck idx expected = do + actual <- Lua.ltype idx + when (actual /= expected) $ do + expName <- Lua.typename expected + actName <- Lua.typename actual + Lua.throwLuaError $ "expected " ++ expName ++ " but got " ++ actName ++ "." + +raiseError :: ToLuaStack a => a -> Lua NumResults +raiseError e = do + Lua.push e + fromIntegral <$> Lua.lerror + +-- | Get, then pop the value at the top of the stack. +popValue :: FromLuaStack a => Lua a +popValue = do + resOrError <- Lua.peekEither (-1) + pop 1 + case resOrError of + Left err -> Lua.throwLuaError err + Right x -> return x + +-- | Helper class for pushing a single value to the stack via a lua function. +-- See @pushViaCall@. +class PushViaCall a where + pushViaCall' :: String -> Lua () -> NumArgs -> a + +instance PushViaCall (Lua ()) where + pushViaCall' fn pushArgs num = do + Lua.push fn + Lua.rawget (Lua.registryindex) + pushArgs + call num 1 + +instance (ToLuaStack a, PushViaCall b) => PushViaCall (a -> b) where + pushViaCall' fn pushArgs num x = + pushViaCall' fn (pushArgs *> push x) (num + 1) + +-- | Push an value to the stack via a lua function. The lua function is called +-- with all arguments that are passed to this function and is expected to return +-- a single value. +pushViaCall :: PushViaCall a => String -> a +pushViaCall fn = pushViaCall' fn (return ()) 0 + +-- | Call a pandoc element constructor within lua, passing all given arguments. +pushViaConstructor :: PushViaCall a => String -> a +pushViaConstructor pandocFn = pushViaCall ("pandoc." ++ pandocFn) + +-- | Load a file from pandoc's data directory. +loadScriptFromDataDir :: Maybe FilePath -> FilePath -> Lua () +loadScriptFromDataDir datadir scriptFile = do + script <- fmap unpack . Lua.liftIO . runIOorExplode $ + setUserDataDir datadir >> readDataFile scriptFile + status <- dostring' script + when (status /= Lua.OK) . + Lua.throwTopMessageAsError' $ \msg -> + "Couldn't load '" ++ scriptFile ++ "'.\n" ++ msg + +-- | Load a string and immediately perform a full garbage collection. This is +-- important to keep the program from hanging: If the program contained a call +-- to @require@, the a new loader function was created which then become +-- garbage. If that function is collected at an inopportune times, i.e. when the +-- Lua API is called via a function that doesn't allow calling back into Haskell +-- (getraw, setraw, …), then the function's finalizer, and the full program, +-- will hang. +dostring' :: String -> Lua Status +dostring' script = do + loadRes <- Lua.loadstring script + if loadRes == Lua.OK + then Lua.pcall 0 1 Nothing <* Lua.gc Lua.GCCOLLECT 0 + else return loadRes + +-- | Get the tag of a value. This is an optimized and specialized version of +-- @Lua.getfield idx "tag"@. It only checks for the field on the table at index +-- @idx@ and on its metatable, also ignoring any @__index@ value on the +-- metatable. +getTag :: StackIndex -> Lua String +getTag idx = do + top <- Lua.gettop + hasMT <- Lua.getmetatable idx + push "tag" + if hasMT then Lua.rawget (-2) else Lua.rawget (idx `adjustIndexBy` 1) + peek Lua.stackTop `finally` Lua.settop top diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index a08091217..43abe9b2f 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2016 John MacFarlane + Copyright : Copyright (C) 2011-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,11 +29,11 @@ Mime type lookup for ODT writer. -} module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType )where -import System.FilePath -import Data.Char ( toLower ) +import Data.Char (toLower) import Data.List (isPrefixOf, isSuffixOf) -import Data.Maybe (fromMaybe) import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import System.FilePath type MimeType = String @@ -168,6 +168,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("emb","chemical/x-embl-dl-nucleotide") ,("embl","chemical/x-embl-dl-nucleotide") ,("emf","image/x-emf") + ,("emz","application/x-msmetafile") ,("eml","message/rfc822") ,("ent","chemical/x-ncbi-asn1-ascii") ,("eot","application/vnd.ms-fontobject") @@ -243,7 +244,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("jpeg","image/jpeg") ,("jfif","image/jpeg") ,("jpg","image/jpeg") - ,("js","application/x-javascript") + ,("js","application/javascript") ,("kar","audio/midi") ,("key","application/pgp-keys") ,("kil","application/x-killustrator") @@ -324,6 +325,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("ogv","video/ogg") ,("ogx","application/ogg") ,("old","application/x-trash") + ,("opus","audio/ogg") ,("otg","application/vnd.oasis.opendocument.graphics-template") ,("oth","application/vnd.oasis.opendocument.text-web") ,("otp","application/vnd.oasis.opendocument.presentation-template") @@ -524,4 +526,3 @@ mimeTypesList = -- List borrowed from happstack-server. ,("zip","application/zip") ,("zmt","chemical/x-mopac-input") ] - diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index eea25fadf..0d060fe1a 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2014-2015, 2017–2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MediaBag - Copyright : Copyright (C) 2014 John MacFarlane + Copyright : Copyright (C) 2014-2015, 2017–2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,20 +35,15 @@ module Text.Pandoc.MediaBag ( lookupMedia, insertMedia, mediaDirectory, - extractMediaBag ) where -import System.FilePath -import qualified System.FilePath.Posix as Posix -import System.Directory (createDirectoryIfMissing) -import qualified Data.Map as M import qualified Data.ByteString.Lazy as BL -import Control.Monad (when) -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Maybe (fromMaybe) -import System.IO (stderr) import Data.Data (Data) +import qualified Data.Map as M +import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) +import System.FilePath +import qualified System.FilePath.Posix as Posix +import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' @@ -70,8 +66,8 @@ insertMedia fp mbMime contents (MediaBag mediamap) = MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of - ".gz" -> getMimeTypeDef $ dropExtension fp - _ -> getMimeTypeDef fp + ".gz" -> getMimeTypeDef $ dropExtension fp + _ -> getMimeTypeDef fp -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath @@ -83,27 +79,5 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldWithKey (\fp (mime,contents) -> - (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap - --- | Extract contents of MediaBag to a given directory. Print informational --- messages if 'verbose' is true. -extractMediaBag :: Bool - -> FilePath - -> MediaBag - -> IO () -extractMediaBag verbose dir (MediaBag mediamap) = do - sequence_ $ M.foldWithKey - (\fp (_ ,contents) -> - ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap - -writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () -writeMedia verbose dir (subpath, bs) = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / - let fullpath = dir </> normalise subpath - createDirectoryIfMissing True $ takeDirectory fullpath - when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath - BL.writeFile fullpath bs - - + M.foldrWithKey (\fp (mime,contents) -> + ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 48bc5f4eb..bd4ab252b 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE TemplateHaskell #-} {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,13 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Data structures and functions for representing parser and writer options. -} -module Text.Pandoc.Options ( Extension(..) - , pandocExtensions - , plainExtensions - , strictExtensions - , phpMarkdownExtraExtensions - , githubMarkdownExtensions - , multimarkdownExtensions +module Text.Pandoc.Options ( module Text.Pandoc.Extensions , ReaderOptions(..) , HTMLMathMethod (..) , CiteMethod (..) @@ -50,248 +46,57 @@ module Text.Pandoc.Options ( Extension(..) , def , isEnabled ) where -import Data.Set (Set) -import qualified Data.Set as Set -import Data.Default -import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.MediaBag (MediaBag) +import Data.Aeson (defaultOptions) +import Data.Aeson.TH (deriveJSON) import Data.Data (Data) +import Data.Default +import qualified Data.Set as Set import Data.Typeable (Typeable) import GHC.Generics (Generic) +import Skylighting (SyntaxMap, defaultSyntaxMap) +import Text.Pandoc.Extensions +import Text.Pandoc.Highlighting (Style, pygments) --- | Individually selectable syntax extensions. -data Extension = - Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes - | Ext_inline_notes -- ^ Pandoc-style inline notes - | Ext_pandoc_title_block -- ^ Pandoc title block - | Ext_yaml_metadata_block -- ^ YAML metadata block - | Ext_mmd_title_block -- ^ Multimarkdown metadata block - | Ext_table_captions -- ^ Pandoc-style table captions - | Ext_implicit_figures -- ^ A paragraph with just an image is a figure - | Ext_simple_tables -- ^ Pandoc-style simple tables - | Ext_multiline_tables -- ^ Pandoc-style multiline tables - | Ext_grid_tables -- ^ Grid tables (pandoc, reST) - | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) - | Ext_citations -- ^ Pandoc/citeproc citations - | Ext_raw_tex -- ^ Allow raw TeX (other than math) - | Ext_raw_html -- ^ Allow raw HTML - | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ - | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] - | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] - | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) - | Ext_fenced_code_blocks -- ^ Parse fenced code blocks - | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks - | Ext_inline_code_attributes -- ^ Allow attributes on inline code - | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks - | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags - | Ext_native_spans -- ^ Use Span inlines for contents of <span> - | Ext_bracketed_spans -- ^ Bracketed spans with attributes - | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown - -- iff container has attribute 'markdown' - | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak - | Ext_link_attributes -- ^ link and image attributes - | Ext_mmd_link_attributes -- ^ MMD style reference link attributes - | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links - | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters - | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank - | Ext_startnum -- ^ Make start number of ordered list significant - | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php - | Ext_compact_definition_lists -- ^ Definition lists without - -- space between items, and disallow laziness - | Ext_example_lists -- ^ Markdown-style numbered examples - | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable - | Ext_angle_brackets_escapable -- ^ Make < and > escapable - | Ext_intraword_underscores -- ^ Treat underscore inside word as literal - | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote - | Ext_blank_before_header -- ^ Require blank line before a header - | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax - | Ext_superscript -- ^ Superscript using ^this^ syntax - | Ext_subscript -- ^ Subscript using ~this~ syntax - | Ext_hard_line_breaks -- ^ All newlines become hard line breaks - | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored - | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between - -- East Asian wide characters - | Ext_literate_haskell -- ^ Enable literate Haskell conventions - | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions - | Ext_emoji -- ^ Support emoji like :smile: - | Ext_auto_identifiers -- ^ Automatic identifiers for headers - | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers - | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} - | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] - | Ext_implicit_header_references -- ^ Implicit reference links for headers - | Ext_line_blocks -- ^ RST style line blocks - | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - | Ext_shortcut_reference_links -- ^ Shortcut reference links - deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) - -pandocExtensions :: Set Extension -pandocExtensions = Set.fromList - [ Ext_footnotes - , Ext_inline_notes - , Ext_pandoc_title_block - , Ext_yaml_metadata_block - , Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_pipe_tables - , Ext_citations - , Ext_raw_tex - , Ext_raw_html - , Ext_tex_math_dollars - , Ext_latex_macros - , Ext_fenced_code_blocks - , Ext_fenced_code_attributes - , Ext_backtick_code_blocks - , Ext_inline_code_attributes - , Ext_markdown_in_html_blocks - , Ext_native_divs - , Ext_native_spans - , Ext_bracketed_spans - , Ext_escaped_line_breaks - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_all_symbols_escapable - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - , Ext_superscript - , Ext_subscript - , Ext_auto_identifiers - , Ext_header_attributes - , Ext_link_attributes - , Ext_implicit_header_references - , Ext_line_blocks - , Ext_shortcut_reference_links - ] - -plainExtensions :: Set Extension -plainExtensions = Set.fromList - [ Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_latex_macros - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - ] - -phpMarkdownExtraExtensions :: Set Extension -phpMarkdownExtraExtensions = Set.fromList - [ Ext_footnotes - , Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_fenced_code_blocks - , Ext_definition_lists - , Ext_intraword_underscores - , Ext_header_attributes - , Ext_link_attributes - , Ext_abbreviations - , Ext_shortcut_reference_links - ] - -githubMarkdownExtensions :: Set Extension -githubMarkdownExtensions = Set.fromList - [ Ext_angle_brackets_escapable - , Ext_pipe_tables - , Ext_raw_html - , Ext_fenced_code_blocks - , Ext_auto_identifiers - , Ext_ascii_identifiers - , Ext_backtick_code_blocks - , Ext_autolink_bare_uris - , Ext_intraword_underscores - , Ext_strikeout - , Ext_hard_line_breaks - , Ext_emoji - , Ext_lists_without_preceding_blankline - , Ext_shortcut_reference_links - ] - -multimarkdownExtensions :: Set Extension -multimarkdownExtensions = Set.fromList - [ Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_mmd_link_attributes - -- , Ext_raw_tex - -- Note: MMD's raw TeX syntax requires raw TeX to be - -- enclosed in HTML comment - , Ext_tex_math_double_backslash - , Ext_intraword_underscores - , Ext_mmd_title_block - , Ext_footnotes - , Ext_definition_lists - , Ext_all_symbols_escapable - , Ext_implicit_header_references - , Ext_auto_identifiers - , Ext_mmd_header_identifiers - , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript - , Ext_subscript - ] - -strictExtensions :: Set Extension -strictExtensions = Set.fromList - [ Ext_raw_html - , Ext_shortcut_reference_links - ] +class HasSyntaxExtensions a where + getExtensions :: a -> Extensions data ReaderOptions = ReaderOptions{ - readerExtensions :: Set Extension -- ^ Syntax extensions - , readerSmart :: Bool -- ^ Smart punctuation - , readerStandalone :: Bool -- ^ Standalone document with header - , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX - , readerColumns :: Int -- ^ Number of columns in terminal - , readerTabStop :: Int -- ^ Tab stop - , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior - -- in parsing dashes; -- is em-dash; - -- - before numerial is en-dash - , readerApplyMacros :: Bool -- ^ Apply macros to TeX math - , readerIndentedCodeClasses :: [String] -- ^ Default classes for + readerExtensions :: Extensions -- ^ Syntax extensions + , readerStandalone :: Bool -- ^ Standalone document with header + , readerColumns :: Int -- ^ Number of columns in terminal + , readerTabStop :: Int -- ^ Tab stop + , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks + , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations , readerDefaultImageExtension :: String -- ^ Default extension for images - , readerTrace :: Bool -- ^ Print debugging info - , readerTrackChanges :: TrackChanges - , readerFileScope :: Bool -- ^ Parse before combining + , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx + , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML } deriving (Show, Read, Data, Typeable, Generic) +instance HasSyntaxExtensions ReaderOptions where + getExtensions opts = readerExtensions opts + instance Default ReaderOptions where def = ReaderOptions{ - readerExtensions = pandocExtensions - , readerSmart = False + readerExtensions = emptyExtensions , readerStandalone = False - , readerParseRaw = False , readerColumns = 80 , readerTabStop = 4 - , readerOldDashes = False - , readerApplyMacros = True , readerIndentedCodeClasses = [] + , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" - , readerTrace = False , readerTrackChanges = AcceptChanges - , readerFileScope = False + , readerStripComments = False } +defaultAbbrevs :: Set.Set String +defaultAbbrevs = Set.fromList + [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", + "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", + "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", + "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", + "ch.", "sec.", "cf.", "cp."] + -- -- Writer options -- @@ -303,9 +108,9 @@ data HTMLMathMethod = PlainMath | JsMath (Maybe String) -- url of jsMath load script | GladTeX | WebTeX String -- url of TeX->image script. - | MathML (Maybe String) -- url of MathMLinHTML.js + | MathML | MathJax String -- url of MathJax.js - | KaTeX String String -- url of stylesheet and katex.js + | KaTeX String -- url of KaTeX files deriving (Show, Read, Eq, Data, Typeable, Generic) data CiteMethod = Citeproc -- use citeproc to render them @@ -356,51 +161,39 @@ data ReferenceLocation = EndOfBlock -- ^ End of block -- | Options for writers data WriterOptions = WriterOptions - { writerTemplate :: Maybe String -- ^ Template to use - , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? - , writerIncremental :: Bool -- ^ True if lists should be incremental - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ... - , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions - , writerWrapText :: WrapOption -- ^ Option for wrapping text - , writerColumns :: Int -- ^ Characters in a line (for text wrapping) - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + { writerTemplate :: Maybe String -- ^ Template to use + , writerVariables :: [(String, String)] -- ^ Variables to set in template + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerIncremental :: Bool -- ^ True if lists should be incremental + , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ... + , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , writerExtensions :: Extensions -- ^ Markdown extensions that can be used + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions + , writerWrapText :: WrapOption -- ^ Option for wrapping text + , writerColumns :: Int -- ^ Characters in a line (for text wrapping) + , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails + , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML -- and for footnote marks in markdown - , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file - , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory - , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerDocbook5 :: Bool -- ^ Produce DocBook5 - , writerHtml5 :: Bool -- ^ Produce HTML5 - , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML - , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show - , writerSlideLevel :: Maybe Int -- ^ Force header level of slides - , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions - , writerListings :: Bool -- ^ Use listings package for code - , writerHighlight :: Bool -- ^ Highlight source code - , writerHighlightStyle :: Style -- ^ Style to use for highlighting - , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex - , writerEpubVersion :: Maybe EPUBVersion -- ^ Nothing or EPUB version - , writerEpubMetadata :: String -- ^ Metadata to include in EPUB - , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line - , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed - , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) - , writerTOCDepth :: Int -- ^ Number of levels to include in TOC - , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified - , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified - , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader - , writerVerbose :: Bool -- ^ Verbose debugging output - , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML + , writerSlideLevel :: Maybe Int -- ^ Force header level of slides + , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions + , writerListings :: Bool -- ^ Use listings package for code + , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting + -- (Nothing = no highlighting) + , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF + , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB + , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed + , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) + , writerTOCDepth :: Int -- ^ Number of levels to include in TOC + , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown + , writerSyntaxMap :: SyntaxMap } deriving (Show, Data, Typeable, Generic) instance Default WriterOptions where @@ -408,48 +201,48 @@ instance Default WriterOptions where , writerVariables = [] , writerTabStop = 4 , writerTableOfContents = False - , writerSlideVariant = NoSlides , writerIncremental = False , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False , writerNumberSections = False , writerNumberOffset = [0,0,0,0,0,0] , writerSectionDivs = False - , writerExtensions = pandocExtensions + , writerExtensions = emptyExtensions , writerReferenceLinks = False , writerDpi = 96 , writerWrapText = WrapAuto , writerColumns = 72 , writerEmailObfuscation = NoObfuscation , writerIdentifierPrefix = "" - , writerSourceURL = Nothing - , writerUserDataDir = Nothing , writerCiteMethod = Citeproc - , writerDocbook5 = False - , writerHtml5 = False , writerHtmlQTags = False - , writerBeamer = False , writerSlideLevel = Nothing , writerTopLevelDivision = TopLevelDefault , writerListings = False - , writerHighlight = False - , writerHighlightStyle = pygments + , writerHighlightStyle = Just pygments , writerSetextHeaders = True - , writerTeXLigatures = True - , writerEpubVersion = Nothing - , writerEpubMetadata = "" - , writerEpubStylesheet = Nothing + , writerEpubSubdirectory = "EPUB" + , writerEpubMetadata = Nothing , writerEpubFonts = [] , writerEpubChapterLevel = 1 , writerTOCDepth = 3 - , writerReferenceODT = Nothing - , writerReferenceDocx = Nothing - , writerMediaBag = mempty - , writerVerbose = False - , writerLaTeXArgs = [] + , writerReferenceDoc = Nothing , writerReferenceLocation = EndOfDocument + , writerSyntaxMap = defaultSyntaxMap } +instance HasSyntaxExtensions WriterOptions where + getExtensions opts = writerExtensions opts + -- | Returns True if the given extension is enabled. -isEnabled :: Extension -> WriterOptions -> Bool -isEnabled ext opts = ext `Set.member` (writerExtensions opts) +isEnabled :: HasSyntaxExtensions a => Extension -> a -> Bool +isEnabled ext opts = ext `extensionEnabled` getExtensions opts + +$(deriveJSON defaultOptions ''ReaderOptions) +$(deriveJSON defaultOptions ''HTMLMathMethod) +$(deriveJSON defaultOptions ''CiteMethod) +$(deriveJSON defaultOptions ''ObfuscationMethod) +$(deriveJSON defaultOptions ''HTMLSlideVariant) +$(deriveJSON defaultOptions ''TrackChanges) +$(deriveJSON defaultOptions ''WrapOption) +$(deriveJSON defaultOptions ''TopLevelDivision) +$(deriveJSON defaultOptions ''ReferenceLocation) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 9faff1816..5f41d6c55 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,49 +32,60 @@ Conversion of LaTeX documents to PDF. -} module Text.Pandoc.PDF ( makePDF ) where +import qualified Codec.Picture as JP +import qualified Control.Exception as E +import Control.Monad (unless, when) +import Control.Monad.Trans (MonadIO (..)) +import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) -import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BC -import qualified Data.ByteString as BS +import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) -import System.Exit (ExitCode (..)) -import System.FilePath -import System.IO (stderr, stdout) -import System.IO.Temp (withTempFile) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.IO as TextIO import System.Directory -import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment -import Control.Monad (unless, when, (<=<)) -import qualified Control.Exception as E -import Data.List (isInfixOf) -import Data.Maybe (fromMaybe) -import qualified Text.Pandoc.UTF8 as UTF8 +import System.Exit (ExitCode (..)) +import System.FilePath +import System.IO (stdout) +import System.IO.Temp (withTempDirectory, withTempFile) +#if MIN_VERSION_base(4,8,3) +import System.IO.Error (IOError, isDoesNotExistError) +#else +import System.IO.Error (isDoesNotExistError) +#endif import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError)) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) +import Text.Pandoc.Process (pipeProcess) +import Text.Pandoc.Shared (inDirectory, stringify, withTempDir) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory, - stringify) import Text.Pandoc.Writers.Shared (getField, metaToJSON) -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) -import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) -import Text.Pandoc.Process (pipeProcess) -import qualified Data.ByteString.Lazy as BL -import qualified Codec.Picture as JP #ifdef _WINDOWS import Data.List (intercalate) #endif +import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getCommonState, + getVerbosity, putCommonState, report, runIO, + runIOorExplode, setVerbosity) +import Text.Pandoc.Logging #ifdef _WINDOWS changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, - -- xelatex, context, wkhtmltopdf) - -> (WriterOptions -> Pandoc -> String) -- ^ writer +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex, + -- wkhtmltopdf, weasyprint, prince, context, pdfroff) + -> [String] -- ^ arguments to pass to pdf creator + -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document - -> IO (Either ByteString ByteString) -makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do + -> PandocIO (Either ByteString ByteString) +makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", @@ -80,8 +93,7 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do _ -> [] meta' <- metaToJSON opts (return . stringify) (return . stringify) meta let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd - let args = mathArgs ++ - concatMap toArgs + let args = pdfargs ++ mathArgs ++ concatMap toArgs [("page-size", getField "papersize" meta') ,("title", getField "title" meta') ,("margin-bottom", fromMaybe (Just "1.2in") @@ -92,55 +104,61 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do (getField "margin-right" meta')) ,("margin-left", fromMaybe (Just "1.25in") (getField "margin-left" meta')) + ,("footer-html", fromMaybe Nothing + (getField "footer-html" meta')) + ,("header-html", fromMaybe Nothing + (getField "header-html" meta')) ] - let source = writer opts doc - html2pdf (writerVerbose opts) args source -makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages opts tmpdir doc - let source = writer opts doc' - args = writerLaTeXArgs opts - case takeBaseName program of - "context" -> context2pdf (writerVerbose opts) tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] - -> tex2pdf' (writerVerbose opts) args tmpdir program source - _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "wkhtmltopdf" args source +makePDF "weasyprint" pdfargs writer opts doc = do + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "weasyprint" pdfargs source +makePDF "prince" pdfargs writer opts doc = do + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "prince" pdfargs source +makePDF "pdfroff" pdfargs writer opts doc = do + source <- writer opts doc + let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", + "--no-toc-relocation"] ++ pdfargs + verbosity <- getVerbosity + liftIO $ ms2pdf verbosity args source +makePDF program pdfargs writer opts doc = do + let withTemp = if takeBaseName program == "context" + then withTempDirectory "." + else withTempDir + commonState <- getCommonState + verbosity <- getVerbosity + liftIO $ withTemp "tex2pdf." $ \tmpdir -> do + source <- runIOorExplode $ do + putCommonState commonState + doc' <- handleImages tmpdir doc + writer opts doc' + case takeBaseName program of + "context" -> context2pdf verbosity tmpdir source + prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] + -> tex2pdf' verbosity pdfargs tmpdir program source + _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program -handleImages :: WriterOptions - -> FilePath -- ^ temp dir to store images +handleImages :: FilePath -- ^ temp dir to store images -> Pandoc -- ^ document - -> IO Pandoc -handleImages opts tmpdir = walkM (convertImages tmpdir) <=< walkM (handleImage' opts tmpdir) - -handleImage' :: WriterOptions - -> FilePath - -> Inline - -> IO Inline -handleImage' opts tmpdir (Image attr ils (src,tit)) = do - exists <- doesFileExist src - if exists - then return $ Image attr ils (src,tit) - else do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Right (contents, Just mime) -> do - let ext = fromMaybe (takeExtension src) $ - extensionFromMimeType mime - let basename = showDigest $ sha1 $ BL.fromChunks [contents] - let fname = tmpdir </> basename <.> ext - BS.writeFile fname contents - return $ Image attr ils (fname,tit) - _ -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - -- return alt text - return $ Emph ils -handleImage' _ _ x = return x + -> PandocIO Pandoc +handleImages tmpdir doc = + fillMediaBag doc >>= + extractMedia tmpdir >>= + walkM (convertImages tmpdir) -convertImages :: FilePath -> Inline -> IO Inline +convertImages :: FilePath -> Inline -> PandocIO Inline convertImages tmpdir (Image attr ils (src, tit)) = do - img <- convertImage tmpdir src + img <- liftIO $ convertImage tmpdir src newPath <- case img of - Left e -> src <$ warn e + Left e -> do + report $ CouldNotConvertImage src e + return src Right fp -> return fp return (Image attr ils (newPath, tit)) convertImages _ x = return x @@ -152,29 +170,43 @@ convertImage tmpdir fname = Just "image/png" -> doNothing Just "image/jpeg" -> doNothing Just "application/pdf" -> doNothing + Just "image/svg+xml" -> E.catch (do + (exit, _) <- pipeProcess Nothing "rsvg-convert" + ["-f","pdf","-a","-o",pdfOut,fname] BL.empty + if exit == ExitSuccess + then return $ Right pdfOut + else return $ Left "conversion from SVG failed") + (\(e :: E.SomeException) -> return $ Left $ + "check that rsvg2pdf is in path.\n" ++ + show e) _ -> JP.readImage fname >>= \res -> case res of - Left _ -> return $ Left $ "Unable to convert `" ++ - fname ++ "' for use with pdflatex." + Left e -> return $ Left e Right img -> - E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ + E.catch (Right pngOut <$ JP.savePngImage pngOut img) $ \(e :: E.SomeException) -> return (Left (show e)) where - fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir + pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir + pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: Bool -- ^ Verbose output +tex2pdf' :: Verbosity -- ^ Verbosity level -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program - -> String -- ^ tex source + -> Text -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' verbose args tmpDir program source = do - let numruns = if "\\tableofcontents" `isInfixOf` source +tex2pdf' verbosity args tmpDir program source = do + let numruns = if "\\tableofcontents" `T.isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source + (exit, log', mbPdf) <- E.catch + (runTeXProgram verbosity program args 1 numruns tmpDir source) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError program + else E.throwIO e) case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -182,11 +214,26 @@ tex2pdf' verbose args tmpDir program source = do case logmsg of x | "! Package inputenc Error" `BC.isPrefixOf` x && program /= "xelatex" - -> "\nTry running pandoc with --latex-engine=xelatex." + -> "\nTry running pandoc with --pdf-engine=xelatex." _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf + (ExitSuccess, Just pdf) -> do + missingCharacterWarnings verbosity log' + return $ Right pdf + +missingCharacterWarnings :: Verbosity -> ByteString -> IO () +missingCharacterWarnings verbosity log' = do + let ls = BC.lines log' + let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " + let warnings = [ UTF8.toStringLazy (BC.drop 19 l) + | l <- ls + , isMissingCharacterWarning l + ] + runIO $ do + setVerbosity verbosity + mapM_ (report . MissingCharacter) warnings + return () -- parsing output @@ -212,12 +259,12 @@ extractConTeXtMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String - -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbose program args runNumber numRuns tmpDir source = do +runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath + -> Text -> IO (ExitCode, ByteString, Maybe ByteString) +runTeXProgram verbosity program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file - unless exists $ UTF8.writeFile file source + unless exists $ BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir @@ -234,7 +281,7 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when (verbose && runNumber == 1) $ do + when (verbosity >= INFO && runNumber == 1) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" @@ -244,16 +291,15 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do mapM_ print env'' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" - B.readFile file' >>= B.putStr + BL.readFile file' >>= BL.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty - when verbose $ do + (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty + when (verbosity >= INFO) $ do putStrLn $ "[makePDF] Run #" ++ show runNumber - B.hPutStr stdout out - B.hPutStr stderr err + BL.hPutStr stdout out putStr "\n" if runNumber <= numRuns - then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source + then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile @@ -261,36 +307,75 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - return (exit, out <> err, pdf) + -- Note that some things like Missing character warnings + -- appear in the log but not on stderr, so we prefer the log: + let logFile = replaceExtension file ".log" + logExists <- doesFileExist logFile + log' <- if logExists + then BL.readFile logFile + else return out + return (exit, log', pdf) + +ms2pdf :: Verbosity + -> [String] + -> Text + -> IO (Either ByteString ByteString) +ms2pdf verbosity args source = do + env' <- getEnvironment + when (verbosity >= INFO) $ do + putStrLn "[makePDF] Command line:" + putStrLn $ "pdfroff " ++ " " ++ unwords (map show args) + putStr "\n" + putStrLn "[makePDF] Environment:" + mapM_ print env' + putStr "\n" + putStrLn "[makePDF] Contents:\n" + putStr $ T.unpack source + putStr "\n" + (exit, out) <- E.catch + (pipeProcess (Just env') "pdfroff" args + (BL.fromStrict $ UTF8.fromText source)) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError "pdfroff" + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out + putStr "\n" + return $ case exit of + ExitFailure _ -> Left out + ExitSuccess -> Right out -html2pdf :: Bool -- ^ Verbose output - -> [String] -- ^ Args to wkhtmltopdf - -> String -- ^ HTML5 source +html2pdf :: Verbosity -- ^ Verbosity level + -> String -- ^ Program (wkhtmltopdf, weasyprint or prince) + -> [String] -- ^ Args to program + -> Text -- ^ HTML5 source -> IO (Either ByteString ByteString) -html2pdf verbose args source = do - file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp +html2pdf verbosity program args source = do pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp - UTF8.writeFile file source - let programArgs = args ++ [file, pdfFile] + let pdfFileArgName = ["-o" | program == "prince"] + let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile] env' <- getEnvironment - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] Command line:" - putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) + putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" mapM_ print env' putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + putStrLn "[makePDF] Contents of intermediate HTML:" + TextIO.putStr source putStr "\n" - (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf" - programArgs BL.empty - removeFile file - when verbose $ do - B.hPutStr stdout out - B.hPutStr stderr err + (exit, out) <- E.catch + (pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError program + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out putStr "\n" pdfExists <- doesFileExist pdfFile mbPdf <- if pdfExists @@ -298,23 +383,22 @@ html2pdf verbose args source = do -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. then do - res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile removeFile pdfFile return res else return Nothing - let log' = out <> err return $ case (exit, mbPdf) of - (ExitFailure _, _) -> Left log' + (ExitFailure _, _) -> Left out (ExitSuccess, Nothing) -> Left "" (ExitSuccess, Just pdf) -> Right pdf -context2pdf :: Bool -- ^ Verbose output +context2pdf :: Verbosity -- ^ Verbosity level -> FilePath -- ^ temp directory for output - -> String -- ^ ConTeXt source + -> Text -- ^ ConTeXt source -> IO (Either ByteString ByteString) -context2pdf verbose tmpDir source = inDirectory tmpDir $ do +context2pdf verbosity tmpDir source = inDirectory tmpDir $ do let file = "input.tex" - UTF8.writeFile file source + BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir @@ -323,27 +407,26 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do #endif let programArgs = "--batchmode" : [file] env' <- getEnvironment - let sep = [searchPathSeparator] - let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++) - $ lookup "TEXINPUTS" env' - let env'' = ("TEXINPUTS", texinputs) : - [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" putStrLn $ "context" ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" - mapM_ print env'' + mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') "context" programArgs BL.empty - when verbose $ do - B.hPutStr stdout out - B.hPutStr stderr err + (exit, out) <- E.catch + (pipeProcess (Just env') "context" programArgs BL.empty) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError "context" + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out putStr "\n" let pdfFile = replaceExtension file ".pdf" pdfExists <- doesFileExist pdfFile @@ -351,13 +434,11 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - let log' = out <> err case (exit, mbPdf) of (ExitFailure _, _) -> do - let logmsg = extractConTeXtMsg log' + let logmsg = extractConTeXtMsg out return $ Left logmsg (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf - diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 110e34c6a..82abcb440 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,11 +1,12 @@ -{-# LANGUAGE - FlexibleContexts -, GeneralizedNewtypeDeriving -, TypeSynonymInstances -, MultiParamTypeClasses -, FlexibleInstances #-} +{-# LANGUAGE ExplicitForAll #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TypeSynonymInstances #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,8 +34,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( anyLine, +module Text.Pandoc.Parsing ( takeWhileP, + takeP, + anyLine, + anyLineNewline, + indentWith, many1Till, + manyUntil, + sepBy1', notFollowedBy', oneOfStrings, oneOfStringsCI, @@ -43,9 +50,12 @@ module Text.Pandoc.Parsing ( anyLine, skipSpaces, blankline, blanklines, + gobbleSpaces, + gobbleAtMostSpaces, enclosed, stringAnyCase, parseFromString, + parseFromString', lineClump, charsInBalanced, romanNumeral, @@ -57,6 +67,11 @@ module Text.Pandoc.Parsing ( anyLine, withRaw, escaped, characterReference, + upperRoman, + lowerRoman, + decimal, + lowerAlpha, + upperAlpha, anyOrderedListMarker, orderedListMarker, charRef, @@ -64,20 +79,24 @@ module Text.Pandoc.Parsing ( anyLine, tableWith, widthsFromIndices, gridTableWith, + gridTableWith', readWith, - readWithWarnings, readWithM, testStringWith, guardEnabled, guardDisabled, updateLastStrPos, notAfterString, + logMessage, + reportLogMessages, ParserState (..), HasReaderOptions (..), HasHeaderMap (..), HasIdentifierList (..), HasMacros (..), + HasLogMessages (..), HasLastStrPosition (..), + HasIncludeFiles (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -100,20 +119,26 @@ module Text.Pandoc.Parsing ( anyLine, dash, nested, citeKey, - macro, - applyMacros', Parser, ParserT, - F(..), + F, + Future(..), runF, askF, asksF, + returnF, + trimInlinesF, token, + (<+?>), + extractIdClass, + insertIncludedFile, + insertIncludedFileF, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, runParserT, parse, + tokenPrim, anyToken, getInput, setInput, @@ -162,61 +187,106 @@ module Text.Pandoc.Parsing ( anyLine, sourceLine, setSourceColumn, setSourceLine, + incSourceColumn, newPos, - addWarning, - (<+?>), - extractIdClass + Line, + Column ) where +import Control.Monad.Identity +import Control.Monad.Reader +import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isHexDigit, + isPunctuation, isSpace, ord, toLower, toUpper) +import Data.Default +import Data.List (intercalate, isSuffixOf, transpose) +import qualified Data.Map as M +import Data.Maybe (mapMaybe, fromMaybe) +import Data.Monoid ((<>)) +import qualified Data.Set as Set +import Data.Text (Text) +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Asciify (toAsciiChar) +import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report) import Text.Pandoc.Definition +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..)) -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Readers.LaTeX.Types (Macro) +import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) +import Text.Pandoc.XML (fromEntities) import Text.Parsec hiding (token) -import Text.Parsec.Pos (newPos) -import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, - isHexDigit, isSpace, isPunctuation ) -import Data.List ( intercalate, transpose, isSuffixOf ) -import Text.Pandoc.Shared -import qualified Data.Map as M -import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, - parseMacroDefinitions) -import Text.HTML.TagSoup.Entity ( lookupEntity ) -import Text.Pandoc.Asciify (toAsciiChar) -import Data.Monoid ((<>)) -import Data.Default -import qualified Data.Set as Set -import Control.Monad.Reader -import Control.Monad.Identity -import Data.Maybe (catMaybes) +import Text.Parsec.Pos (initialPos, newPos, updatePosString) +import Control.Monad.Except import Text.Pandoc.Error type Parser t s = Parsec t s type ParserT = ParsecT -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) +-- | Reader monad wrapping the parser state. This is used to possibly delay +-- evaluation until all relevant information has been parsed and made available +-- in the parser state. +newtype Future s a = Future { runDelayed :: Reader s a } + deriving (Monad, Applicative, Functor) + +type F = Future ParserState -runF :: F a -> ParserState -> a -runF = runReader . unF +runF :: Future s a -> s -> a +runF = runReader . runDelayed -askF :: F ParserState -askF = F ask +askF :: Future s s +askF = Future ask -asksF :: (ParserState -> a) -> F a -asksF f = F $ asks f +asksF :: (s -> a) -> Future s a +asksF f = Future $ asks f -instance Monoid a => Monoid (F a) where +returnF :: Monad m => a -> m (Future s a) +returnF = return . return + +trimInlinesF :: Future s Inlines -> Future s Inlines +trimInlinesF = liftM trimInlines + +instance Monoid a => Monoid (Future s a) where mempty = return mempty mappend = liftM2 mappend mconcat = liftM mconcat . sequence +-- | Parse characters while a predicate is true. +takeWhileP :: Monad m + => (Char -> Bool) + -> ParserT [Char] st m [Char] +takeWhileP f = do + -- faster than 'many (satisfy f)' + inp <- getInput + pos <- getPosition + let (xs, rest) = span f inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + +-- Parse n characters of input (or the rest of the input if +-- there aren't n characters). +takeP :: Monad m => Int -> ParserT [Char] st m [Char] +takeP n = do + guard (n > 0) + -- faster than 'count n anyChar' + inp <- getInput + pos <- getPosition + let (xs, rest) = splitAt n inp + -- needed to persuade parsec that this won't match an empty string: + anyChar + setInput rest + setPosition $ updatePosString pos xs + return xs + -- | Parse any line of text -anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] +anyLine :: Monad m => ParserT [Char] st m [Char] anyLine = do -- This is much faster than: -- manyTill anyChar newline @@ -231,16 +301,54 @@ anyLine = do return this _ -> mzero +-- | Parse any line, include the final newline in the output +anyLineNewline :: Monad m => ParserT [Char] st m [Char] +anyLineNewline = (++ "\n") <$> anyLine + +-- | Parse indent by specified number of spaces (or equiv. tabs) +indentWith :: Stream s m Char + => HasReaderOptions st + => Int -> ParserT s st m [Char] +indentWith num = do + tabStop <- getOption readerTabStop + if num < tabStop + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> indentWith (num - tabStop)) ] + -- | Like @manyTill@, but reads at least one item. -many1Till :: Stream s m t +many1Till :: (Show end, Stream s m t) => ParserT s st m a -> ParserT s st m end -> ParserT s st m [a] many1Till p end = do + notFollowedBy' end first <- p rest <- manyTill p end return (first:rest) +-- | Like @manyTill@, but also returns the result of end parser. +manyUntil :: (Stream s m t) + => ParserT s u m a + -> ParserT s u m b + -> ParserT s u m ([a], b) +manyUntil p end = scan + where scan = + (do e <- end + return ([], e) + ) <|> + (do x <- p + (xs, e) <- scan + return (x:xs, e)) + +-- | Like @sepBy1@ from Parsec, +-- but does not fail if it @sep@ succeeds and @p@ fails. +sepBy1' :: (Stream s m t) + => ParsecT s u m a + -> ParsecT s u m sep + -> ParsecT s u m [a] +sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p) + -- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. @@ -276,7 +384,7 @@ oneOfStringsCI = oneOfStrings' ciMatch -- this optimizes toLower by checking common ASCII case -- first, before calling the expensive unicode-aware -- function: - toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32) + toLower' c | isAsciiUpper c = chr (ord c + 32) | isAscii c = c | otherwise = toLower c @@ -300,8 +408,38 @@ blankline = try $ skipSpaces >> newline blanklines :: Stream s m Char => ParserT s st m [Char] blanklines = many1 blankline +-- | Gobble n spaces; if tabs are encountered, expand them +-- and gobble some or all of their spaces, leaving the rest. +gobbleSpaces :: (HasReaderOptions st, Monad m) + => Int -> ParserT [Char] st m () +gobbleSpaces 0 = return () +gobbleSpaces n + | n < 0 = error "gobbleSpaces called with negative number" + | otherwise = try $ do + char ' ' <|> eatOneSpaceOfTab + gobbleSpaces (n - 1) + +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char +eatOneSpaceOfTab = do + char '\t' + tabstop <- getOption readerTabStop + inp <- getInput + setInput $ replicate (tabstop - 1) ' ' ++ inp + return ' ' + +-- | Gobble up to n spaces; if tabs are encountered, expand them +-- and gobble some or all of their spaces, leaving the rest. +gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) + => Int -> ParserT [Char] st m Int +gobbleAtMostSpaces 0 = return 0 +gobbleAtMostSpaces n + | n < 0 = error "gobbleAtMostSpaces called with negative number" + | otherwise = option 0 $ do + char ' ' <|> eatOneSpaceOfTab + (+ 1) <$> gobbleAtMostSpaces (n - 1) + -- | Parses material enclosed between start and end parsers. -enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser +enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser -> ParserT s st m end -- ^ end parser -> ParserT s st m a -- ^ content parser (to be used repeatedly) -> ParserT s st m [a] @@ -317,9 +455,13 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a +parseFromString :: Monad m + => ParserT [Char] st m a + -> String + -> ParserT [Char] st m a parseFromString parser str = do oldPos <- getPosition + setPosition $ initialPos "chunk" oldInput <- getInput setInput str result <- parser @@ -329,10 +471,22 @@ parseFromString parser str = do setPosition oldPos return result +-- | Like 'parseFromString' but specialized for 'ParserState'. +-- This resets 'stateLastStrPos', which is almost always what we want. +parseFromString' :: Monad m + => ParserT String ParserState m a + -> String + -> ParserT String ParserState m a +parseFromString' parser str = do + oldStrPos <- stateLastStrPos <$> getState + res <- parseFromString parser str + updateState $ \st -> st{ stateLastStrPos = oldStrPos } + return res + -- | Parse raw line block up to and including blank lines. -lineClump :: Stream [Char] m Char => ParserT [Char] st m String +lineClump :: Monad m => ParserT [Char] st m String lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + <|> (unlines <$> many1 (notFollowedBy blankline >> anyLine)) -- | Parse a string of characters between an open character -- and a close character, including text between balanced @@ -373,19 +527,19 @@ romanNumeral upperCase = do lookAhead $ oneOf romanDigits let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits - thousands <- many thousand >>= (return . (1000 *) . length) + thousands <- ((1000 *) . length) <$> many thousand ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fivehundreds <- ((500 *) . length) <$> many fivehundred fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) + hundreds <- ((100 *) . length) <$> many hundred nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) + fifties <- ((50 *) . length) <$> many fifty forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) + tens <- ((10 *) . length) <$> many ten nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) + fives <- ((5 *) . length) <$> many five fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) + ones <- length <$> many one let total = thousands + ninehundreds + fivehundreds + fourhundreds + hundreds + nineties + fifties + forties + tens + nines + fives + fours + ones @@ -401,8 +555,8 @@ emailAddress :: Stream s m Char => ParserT s st m (String, String) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom in (full, escapeURI $ "mailto:" ++ full) - mailbox = intercalate "." <$> (emailWord `sepby1` dot) - domain = intercalate "." <$> (subdomain `sepby1` dot) + mailbox = intercalate "." <$> (emailWord `sepBy1'` dot) + domain = intercalate "." <$> (subdomain `sepBy1'` dot) dot = char '.' subdomain = many1 $ alphaNum <|> innerPunct -- this excludes some valid email addresses, since an @@ -419,59 +573,33 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) return (x:xs) isEmailChar c = isAlphaNum c || isEmailPunct c isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;" - -- note: sepBy1 from parsec consumes input when sep - -- succeeds and p fails, so we use this variant here. - sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p)) - - --- Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes coap, doi, javascript, isbn, pmid -schemes :: [String] -schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", - "crid","data","dav","dict","dns","file","ftp","geo","go","gopher", - "h323","http","https","iax","icap","im","imap","info","ipp","iris", - "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid", - "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp", - "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve", - "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet", - "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon", - "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s", - "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin", - "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee", - "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb", - "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject", - "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms", - "keyparc","lastfm","ldaps","magnet","maps","market","message","mms", - "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi", - "platform","proxy","psyc","query","res","resource","rmi","rsync", - "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify", - "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004", - "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", - "ymsgr", "isbn", "pmid"] + uriScheme :: Stream s m Char => ParserT s st m String -uriScheme = oneOfStringsCI schemes +uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) +uri :: Monad m => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' + -- Avoid parsing e.g. "**Notes:**" as a raw URI: + notFollowedBy (oneOf "*_]") -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" + let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-&=" let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference let punct = skipMany1 (char ',') - <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) + <|> () <$ satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>') let uriChunk = skipMany1 wordChar <|> percentEscaped <|> entity - <|> (try $ punct >> + <|> try (punct >> lookAhead (void (satisfy isWordChar) <|> percentEscaped)) str <- snd <$> withRaw (skipMany1 ( () <$ (enclosed (char '(') (char ')') uriChunk @@ -485,7 +613,7 @@ uri = try $ do mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String mathInlineWith op cl = try $ do string op - notFollowedBy space + when (op == "$") $ notFollowedBy space words' <- many1Till (count 1 (noneOf " \t\n\\") <|> (char '\\' >> -- This next clause is needed because \text{..} can @@ -499,7 +627,7 @@ mathInlineWith op cl = try $ do return " " ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 - return $ concat words' + return $ trim $ concat words' where inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String inBalancedBraces 0 "" = do @@ -556,7 +684,9 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) +withRaw :: Monad m + => ParsecT [Char] st m a + -> ParsecT [Char] st m (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -566,9 +696,9 @@ withRaw parser = do let (l2,c2) = (sourceLine pos2, sourceColumn pos2) let inplines = take ((l2 - l1) + 1) $ lines inp let raw = case inplines of - [] -> "" - [l] -> take (c2 - c1) l - ls -> unlines (init ls) ++ take (c2 - 1) (last ls) + [] -> "" + [l] -> take (c2 - c1) l + ls -> unlines (init ls) ++ take (c2 - 1) (last ls) return (result, raw) -- | Parses backslash, then applies character parser. @@ -584,11 +714,11 @@ characterReference = try $ do ent <- many1Till nonspaceChar (char ';') let ent' = case ent of '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" + '#':_ -> ent + _ -> ent ++ ";" case lookupEntity ent' of - Just (c : _) -> return c - _ -> fail "entity not found" + Just (c : _) -> return c + _ -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) @@ -651,7 +781,7 @@ romanOne = (char 'i' >> return (LowerRoman, 1)) <|> -- | Parses an ordered list marker and returns list attributes. anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes -anyOrderedListMarker = choice $ +anyOrderedListMarker = choice [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] @@ -717,7 +847,7 @@ charRef = do c <- characterReference return $ Str [c] -lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String +lineBlockLine :: Monad m => ParserT [Char] st m String lineBlockLine = try $ do char '|' char ' ' @@ -727,33 +857,48 @@ lineBlockLine = try $ do continuations <- many (try $ char ' ' >> anyLine) return $ white ++ unwords (line : continuations) -blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char +blankLineBlockLine :: Stream s m Char => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String] +lineBlockLines :: Monad m => ParserT [Char] st m [String] lineBlockLines = try $ do lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine)) - skipMany1 $ blankline <|> blankLineBlockLine + skipMany blankline return lines' -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: Stream s m Char - => ParserT s ParserState m ([[Block]], [Alignment], [Int]) - -> ([Int] -> ParserT s ParserState m [[Block]]) - -> ParserT s ParserState m sep - -> ParserT s ParserState m end - -> ParserT s ParserState m Block +tableWith :: (Stream s m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s st m (mf [Blocks])) + -> ParserT s st m sep + -> ParserT s st m end + -> ParserT s st m (mf Blocks) tableWith headerParser rowParser lineParser footerParser = try $ do + (aligns, widths, heads, rows) <- tableWith' headerParser rowParser + lineParser footerParser + return $ B.table mempty (zip aligns widths) <$> heads <*> rows + +type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]]) + +tableWith' :: (Stream s m Char, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ([Int] -> ParserT s st m (mf [Blocks])) + -> ParserT s st m sep + -> ParserT s st m end + -> ParserT s st m (TableComponents mf) +tableWith' headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy1` lineParser + lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) + let widths = if null indices then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ Table [] aligns widths heads lines' + return (aligns, widths, heads, lines') -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -777,7 +922,7 @@ widthsFromIndices numColumns' indices = quotient = if totLength > numColumns then fromIntegral totLength else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in + fracs = map (\l -> fromIntegral l / quotient) lengths in tail fracs --- @@ -786,25 +931,44 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: Stream [Char] m Char - => ParserT [Char] ParserState m [Block] -- ^ Block list parser - -> Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Block +gridTableWith :: (Monad m, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table + -> ParserT [Char] st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter +gridTableWith' :: (Monad m, HasReaderOptions st, + Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -- ^ Block list parser + -> Bool -- ^ Headerless table + -> ParserT [Char] st m (TableComponents mf) +gridTableWith' blocks headless = + tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter + gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) +gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) gridPart ch = do + leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) + rightColon <- option False (True <$ char ':') char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] + let lengthDashes = length dashes + (if leftColon then 1 else 0) + + (if rightColon then 1 else 0) + let alignment = case (leftColon, rightColon) of + (True, True) -> AlignCenter + (True, False) -> AlignLeft + (False, True) -> AlignRight + (False, False) -> AlignDefault + return ((lengthDashes, lengthDashes + 1), alignment) + +gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -812,14 +976,14 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char +gridTableSep :: Stream s m Char => Char -> ParserT s st m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: Stream [Char] m Char +gridTableHeader :: (Monad m, Functor mf, Applicative mf, Monad mf) => Bool -- ^ Headerless table - -> ParserT [Char] ParserState m [Block] - -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int]) + -> ParserT [Char] st m (mf Blocks) + -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int]) gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -828,62 +992,63 @@ gridTableHeader headless blocks = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes + underDashes <- if headless + then return dashes + else gridDashedLines '=' + guard $ length dashes == length underDashes + let lines' = map (snd . fst) underDashes let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault - -- RST does not have a notion of alignments + let aligns = map snd underDashes let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose + then replicate (length underDashes) "" + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString blocks) $ map trim rawHeads + heads <- sequence <$> mapM (parseFromString blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String] +gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: Stream [Char] m Char - => ParserT [Char] ParserState m [Block] +gridTableRow :: (Monad m, Functor mf, Applicative mf, Monad mf) + => ParserT [Char] st m (mf Blocks) -> [Int] - -> ParserT [Char] ParserState m [[Block]] + -> ParserT [Char] st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - mapM (liftM compactifyCell . parseFromString blocks) cols + compactifyCell bs = case compactify [bs] of + [] -> mempty + x:_ -> x + cells <- sequence <$> mapM (parseFromString blocks) cols + return $ fmap (map compactifyCell) cells removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = if all startsWithSpace xs then map (drop 1) xs else xs - where startsWithSpace "" = True + where startsWithSpace "" = True startsWithSpace (y:_) = y == ' ' -compactifyCell :: [Block] -> [Block] -compactifyCell bs = head $ compactify [bs] - -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char] +gridTableFooter :: Stream s m Char => ParserT s st m [Char] gridTableFooter = blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Monad m) - => ParserT [Char] st m a -- ^ parser +readWithM :: Monad m + => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (ParsecError input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input -- | Parse a string with a given parser and state @@ -893,17 +1058,8 @@ readWith :: Parser [Char] st a -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -readWithWarnings :: Parser [Char] ParserState a - -> ParserState - -> String - -> Either PandocError (a, [String]) -readWithWarnings p = readWith $ do - doc <- p - warnings <- stateWarnings <$> getState - return (doc, warnings) - -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) +testStringWith :: Show a => ParserT [Char] ParserState Identity a -> [Char] -> IO () @@ -912,34 +1068,37 @@ testStringWith parser str = UTF8.putStrLn $ show $ -- | Parsing options. data ParserState = ParserState - { stateOptions :: ReaderOptions, -- ^ User options - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateAllowLinks :: Bool, -- ^ Allow parsing of links - stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph - stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys - stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys - stateSubstitutions :: SubstTable, -- ^ List of substitution references - stateNotes :: NoteTable, -- ^ List of notes (raw bodies) - stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) - stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata - stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) - stateIdentifiers :: Set.Set String, -- ^ Header identifiers used - stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateHasChapters :: Bool, -- ^ True if \chapter encountered - stateMacros :: [Macro], -- ^ List of macros defined so far - stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles + { stateOptions :: ReaderOptions, -- ^ User options + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateAllowLinks :: Bool, -- ^ Allow parsing of links + stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph + stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed + stateKeys :: KeyTable, -- ^ List of reference keys + stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys + stateSubstitutions :: SubstTable, -- ^ List of substitution references + stateNotes :: NoteTable, -- ^ List of notes (raw bodies) + stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) + stateNoteRefs :: Set.Set String, -- ^ List of note references used + stateMeta :: Meta, -- ^ Document metadata + stateMeta' :: F Meta, -- ^ Document metadata + stateCitations :: M.Map String String, -- ^ RST-style citations + stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used + stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) + stateIdentifiers :: Set.Set String, -- ^ Header identifiers used + stateNextExample :: Int, -- ^ Number of next example + stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far + stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: -- roles), 3) Additional classes (rest of Attr is unused)). - stateCaption :: Maybe Inlines, -- ^ Caption in current environment - stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed - stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String] -- ^ Warnings generated by the parser + stateCaption :: Maybe Inlines, -- ^ Caption in current environment + stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed + stateFencedDivLevel :: Int, -- ^ Depth of fenced div + stateContainers :: [String], -- ^ parent include files + stateLogMessages :: [LogMessage], -- ^ log messages + stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context } instance Default ParserState where @@ -957,6 +1116,9 @@ class HasReaderOptions st where -- default getOption f = (f . extractReaderOptions) <$> getState +instance HasReaderOptions ParserState where + extractReaderOptions = stateOptions + class HasQuoteContext st m where getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a @@ -972,9 +1134,6 @@ instance Monad m => HasQuoteContext ParserState m where setState newState { stateQuoteContext = oldQuoteContext } return result -instance HasReaderOptions ParserState where - extractReaderOptions = stateOptions - class HasHeaderMap st where extractHeaderMap :: st -> M.Map Inlines String updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> @@ -993,8 +1152,8 @@ instance HasIdentifierList ParserState where updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } class HasMacros st where - extractMacros :: st -> [Macro] - updateMacros :: ([Macro] -> [Macro]) -> st -> st + extractMacros :: st -> M.Map Text Macro + updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st instance HasMacros ParserState where extractMacros = stateMacros @@ -1008,6 +1167,24 @@ instance HasLastStrPosition ParserState where setLastStrPos pos st = st{ stateLastStrPos = Just pos } getLastStrPos st = stateLastStrPos st +class HasLogMessages st where + addLogMessage :: LogMessage -> st -> st + getLogMessages :: st -> [LogMessage] + +instance HasLogMessages ParserState where + addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st } + getLogMessages st = reverse $ stateLogMessages st + +class HasIncludeFiles st where + getIncludeFiles :: st -> [String] + addIncludeFile :: String -> st -> st + dropLatestIncludeFile :: st -> st + +instance HasIncludeFiles ParserState where + getIncludeFiles = stateContainers + addIncludeFile f s = s{ stateContainers = f : stateContainers s } + dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -1020,30 +1197,45 @@ defaultParserState = stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], - stateNotes' = [], + stateNotes' = M.empty, + stateNoteRefs = Set.empty, stateMeta = nullMeta, stateMeta' = return nullMeta, + stateCitations = M.empty, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = Set.empty, stateNextExample = 1, stateExamples = M.empty, - stateHasChapters = False, - stateMacros = [], + stateMacros = M.empty, stateRstDefaultRole = "title-reference", stateRstCustomRoles = M.empty, stateCaption = Nothing, stateInHtmlBlock = Nothing, - stateMarkdownAttribute = False, - stateWarnings = []} + stateFencedDivLevel = 0, + stateContainers = [], + stateLogMessages = [], + stateMarkdownAttribute = False + } + +-- | Add a log message. +logMessage :: (Stream s m a, HasLogMessages st) + => LogMessage -> ParserT s st m () +logMessage msg = updateState (addLogMessage msg) + +-- | Report all the accumulated log messages, according to verbosity level. +reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m () +reportLogMessages = do + msgs <- getLogMessages <$> getState + mapM_ report msgs -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext +guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext -- | Succeed only if the extension is disabled. guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext -- | Update the position on which the last string ended. updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m () @@ -1074,7 +1266,8 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, F Blocks)] -- used in markdown reader +type NoteTable' = M.Map String (SourcePos, F Blocks) +-- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) @@ -1091,37 +1284,40 @@ type SubstTable = M.Map Key Inlines -- with its associated identifier. If the identifier is null -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers --- in state. -registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) +-- in state. Issue a warning if an explicit identifier +-- is encountered that duplicates an earlier identifier +-- (explict or automatically generated). +registerHeader :: (Stream s m a, HasReaderOptions st, + HasHeaderMap st, HasLogMessages st, HasIdentifierList st) => Attr -> Inlines -> ParserT s st m Attr registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `Set.member` exts + if null ident && Ext_auto_identifiers `extensionEnabled` exts then do let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `Set.member` exts - then catMaybes $ map toAsciiChar id' + let id'' = if Ext_ascii_identifiers `extensionEnabled` exts + then mapMaybe toAsciiChar id' else id' updateState $ updateIdentifierList $ Set.insert id' updateState $ updateIdentifierList $ Set.insert id'' updateState $ updateHeaderMap $ insert' header' id' return (id'',classes,kvs) else do - unless (null ident) $ + unless (null ident) $ do + when (ident `Set.member` ids) $ do + pos <- getPosition + logMessage $ DuplicateIdentifier ident pos + updateState $ updateIdentifierList $ Set.insert ident updateState $ updateHeaderMap $ insert' header' ident return (ident,classes,kvs) --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m () -failUnlessSmart = getOption readerSmart >>= guard - smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) => ParserT s st m Inlines -> ParserT s st m Inlines smartPunctuation inlineParser = do - failUnlessSmart + guardEnabled Ext_smart choice [ quoted inlineParser, apostrophe, dash, ellipses ] apostrophe :: Stream s m Char => ParserT s st m Inlines @@ -1153,9 +1349,7 @@ failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) -> ParserT s st m () failIfInQuoteContext context = do context' <- getQuoteContext - if context' == context - then fail "already inside quotes" - else return () + when (context' == context) $ fail "already inside quotes" charOrRef :: Stream s m Char => String -> ParserT s st m Char charOrRef cs = @@ -1195,7 +1389,7 @@ ellipses = try (string "..." >> return (B.str "\8230")) dash :: (HasReaderOptions st, Stream s m Char) => ParserT s st m Inlines dash = try $ do - oldDashes <- getOption readerOldDashes + oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions if oldDashes then do char '-' @@ -1241,43 +1435,6 @@ token :: (Stream s m t) -> ParsecT s st m a token pp pos match = tokenPrim pp (\_ t _ -> pos t) match --- --- Macros --- - --- | Parse a \newcommand or \renewcommand macro definition. -macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) - => ParserT [Char] st m Blocks -macro = do - apply <- getOption readerApplyMacros - inp <- getInput - case parseMacroDefinitions inp of - ([], _) -> mzero - (ms, rest) -> do def' <- count (length inp - length rest) anyChar - if apply - then do - updateState $ \st -> - updateMacros (ms ++) st - return mempty - else return $ rawBlock "latex" def' - --- | Apply current macros to string. -applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) - => String - -> ParserT [Char] st m String -applyMacros' target = do - apply <- getOption readerApplyMacros - if apply - then do macros <- extractMacros <$> getState - return $ applyMacros macros target - else return target - --- | Append a warning to the log. -addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () -addWarning mbpos msg = - updateState $ \st -> st{ - stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : - stateWarnings st } infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) @@ -1285,10 +1442,53 @@ a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) extractIdClass :: Attr -> Attr extractIdClass (ident, cls, kvs) = (ident', cls', kvs') where - ident' = case (lookup "id" kvs) of - Just v -> v - Nothing -> ident - cls' = case (lookup "class" kvs) of + ident' = fromMaybe ident (lookup "id" kvs) + cls' = case lookup "class" kvs of Just cl -> words cl Nothing -> cls kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs + +insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st, + Functor mf, Applicative mf, Monad mf) + => ParserT [a] st m (mf Blocks) + -> (String -> [a]) + -> [FilePath] -> FilePath + -> ParserT [a] st m (mf Blocks) +insertIncludedFile' blocks totoks dirs f = do + oldPos <- getPosition + oldInput <- getInput + containers <- getIncludeFiles <$> getState + when (f `elem` containers) $ + throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + updateState $ addIncludeFile f + mbcontents <- readFileFromDirs dirs f + contents <- case mbcontents of + Just s -> return s + Nothing -> do + report $ CouldNotLoadIncludeFile f oldPos + return "" + setPosition $ newPos f 1 1 + setInput $ totoks contents + bs <- blocks + setInput oldInput + setPosition oldPos + updateState dropLatestIncludeFile + return bs + +-- | Parse content of include file as blocks. Circular includes result in an +-- @PandocParseError@. +insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) + => ParserT [a] st m Blocks + -> (String -> [a]) + -> [FilePath] -> FilePath + -> ParserT [a] st m Blocks +insertIncludedFile blocks totoks dirs f = + runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f + +-- | Parse content of include file as future blocks. Circular includes result in +-- an @PandocParseError@. +insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) + => ParserT String st m (Future st Blocks) + -> [FilePath] -> FilePath + -> ParserT String st m (Future st Blocks) +insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 256f38b0c..25c2373a6 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -76,22 +77,24 @@ module Text.Pandoc.Pretty ( ) where -import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) -import qualified Data.Sequence as Seq +import Control.Monad +import Control.Monad.State.Strict +import Data.Char (isSpace) import Data.Foldable (toList) import Data.List (intersperse) -import Data.String -import Control.Monad.State -import Data.Char (isSpace) import Data.Monoid ((<>)) +import Data.Sequence (Seq, ViewL (..), fromList, mapWithIndex, singleton, viewl, + (<|)) +import qualified Data.Sequence as Seq +import Data.String data RenderState a = RenderState{ - output :: [a] -- ^ In reverse order - , prefix :: String - , usePrefix :: Bool - , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping - , column :: Int - , newlines :: Int -- ^ Number of preceding newlines + output :: [a] -- ^ In reverse order + , prefix :: String + , usePrefix :: Bool + , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping + , column :: Int + , newlines :: Int -- ^ Number of preceding newlines } type DocState a = State (RenderState a) () @@ -106,10 +109,10 @@ data D = Text Int String | CarriageReturn | NewLine | BlankLines Int -- number of blank lines - deriving (Show) + deriving (Show, Eq) newtype Doc = Doc { unDoc :: Seq D } - deriving (Monoid, Show) + deriving (Monoid, Show, Eq) instance IsString Doc where fromString = text @@ -142,11 +145,10 @@ hcat = mconcat -- between them. infixr 6 <+> (<+>) :: Doc -> Doc -> Doc -(<+>) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> space <> y +(<+>) x y + | isEmpty x = y + | isEmpty y = x + | otherwise = x <> space <> y -- | Same as 'cat', but putting breakable spaces between the -- 'Doc's. @@ -156,20 +158,18 @@ hsep = foldr (<+>) empty infixr 5 $$ -- | @a $$ b@ puts @a@ above @b@. ($$) :: Doc -> Doc -> Doc -($$) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> cr <> y +($$) x y + | isEmpty x = y + | isEmpty y = x + | otherwise = x <> cr <> y infixr 5 $+$ -- | @a $+$ b@ puts @a@ above @b@, with a blank line between. ($+$) :: Doc -> Doc -> Doc -($+$) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> blankline <> y +($+$) x y + | isEmpty x = y + | isEmpty y = x + | otherwise = x <> blankline <> y -- | List version of '$$'. vcat :: [Doc] -> Doc @@ -184,21 +184,21 @@ nestle :: Doc -> Doc nestle (Doc d) = Doc $ go d where go x = case viewl x of (BlankLines _ :< rest) -> go rest - (NewLine :< rest) -> go rest - _ -> x + (NewLine :< rest) -> go rest + _ -> x -- | Chomps trailing blank space off of a 'Doc'. chomp :: Doc -> Doc chomp d = Doc (fromList dl') where dl = toList (unDoc d) dl' = reverse $ go $ reverse dl - go [] = [] - go (BreakingSpace : xs) = go xs + go [] = [] + go (BreakingSpace : xs) = go xs go (CarriageReturn : xs) = go xs - go (NewLine : xs) = go xs - go (BlankLines _ : xs) = go xs - go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs - go xs = xs + go (NewLine : xs) = go xs + go (BlankLines _ : xs) = go xs + go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs + go xs = xs outp :: (IsString a) => Int -> String -> DocState a outp off s | off < 0 = do -- offset < 0 means newline characters @@ -215,9 +215,9 @@ outp off s | off < 0 = do -- offset < 0 means newline characters outp off s = do -- offset >= 0 (0 might be combining char) st' <- get let pref = prefix st' - when (column st' == 0 && usePrefix st' && not (null pref)) $ do + when (column st' == 0 && usePrefix st' && not (null pref)) $ modify $ \st -> st{ output = fromString pref : output st - , column = column st + realLength pref } + , column = column st + realLength pref } modify $ \st -> st{ output = fromString s : output st , column = column st + off , newlines = 0 } @@ -306,10 +306,10 @@ renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs) renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs) renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs) renderList (BreakingSpace : xs) = do - let isText (Text _ _) = True - isText (Block _ _) = True - isText (AfterBreak _) = True - isText _ = False + let isText (Text _ _) = True + isText (Block _ _) = True + isText (AfterBreak _) = True + isText _ = False let isBreakingSpace BreakingSpace = True isBreakingSpace _ = False let xs' = dropWhile isBreakingSpace xs @@ -326,9 +326,7 @@ renderList (BreakingSpace : xs) = do renderList (AfterBreak s : xs) = do st <- get - if newlines st > 0 - then outp (realLength s) s - else return () + when (newlines st > 0) $ outp (realLength s) s renderList xs renderList (Block i1 s1 : Block i2 s2 : xs) = @@ -342,7 +340,7 @@ renderList (Block _width lns : xs) = do let oldPref = prefix st case column st - realLength oldPref of n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' } - _ -> return () + _ -> return () renderList $ intersperse CarriageReturn (map (Text 0) lns) modify $ \s -> s{ prefix = oldPref } renderList xs @@ -359,13 +357,13 @@ mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = | otherwise -> (lns1, lns2) pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" - sp xs = if addSpace then (' ' : xs) else xs + sp xs = if addSpace then ' ' : xs else xs offsetOf :: D -> Int -offsetOf (Text o _) = o -offsetOf (Block w _) = w -offsetOf BreakingSpace = 1 -offsetOf _ = 0 +offsetOf (Text o _) = o +offsetOf (Block w _) = w +offsetOf BreakingSpace = 1 +offsetOf _ = 0 -- | A literal string. text :: String -> Doc @@ -396,8 +394,8 @@ cr = Doc $ singleton CarriageReturn blankline :: Doc blankline = Doc $ singleton (BlankLines 1) --- | Inserts a blank lines unless they exists already. --- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@. +-- | Inserts blank lines unless they exist already. +-- (@blanklines m <> blanklines n@ has the same effect as @blanklines (max m n)@. blanklines :: Int -> Doc blanklines n = Doc $ singleton (BlankLines n) @@ -430,7 +428,7 @@ beforeNonBlank d = Doc $ singleton (BeforeNonBlank d) nowrap :: Doc -> Doc nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc where replaceSpace _ BreakingSpace = Text 1 " " - replaceSpace _ x = x + replaceSpace _ x = x -- | Content to print only if it comes at the beginning of a line, -- to be used e.g. for escaping line-initial `.` in groff man. @@ -440,8 +438,8 @@ afterBreak s = Doc $ singleton (AfterBreak s) -- | Returns the width of a 'Doc'. offset :: Doc -> Int offset d = case map realLength . lines . render Nothing $ d of - [] -> 0 - os -> maximum os + [] -> 0 + os -> maximum os -- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces. minOffset :: Doc -> Int @@ -466,7 +464,7 @@ height = length . lines . render Nothing block :: (String -> String) -> Int -> Doc -> Doc block filler width d - | width < 1 && not (isEmpty d) = error "Text.Pandoc.Pretty.block: width < 1" + | width < 1 && not (isEmpty d) = block filler 1 d | otherwise = Doc $ singleton $ Block width $ map filler $ chop width $ render (Just width) d @@ -554,4 +552,4 @@ charWidth c = -- | Get real length of string, taking into account combining and double-wide -- characters. realLength :: String -> Int -realLength = foldr (\a b -> charWidth a + b) 0 +realLength = sum . map charWidth diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index bc71f1392..27807a8c8 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2016 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,22 +29,22 @@ ByteString variant of 'readProcessWithExitCode'. -} module Text.Pandoc.Process (pipeProcess) where -import System.Process -import System.Exit (ExitCode (..)) +import Control.Concurrent (forkIO, newEmptyMVar, putMVar, takeMVar) import Control.Exception -import System.IO (hClose, hFlush) -import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) import Control.Monad (unless) import qualified Data.ByteString.Lazy as BL +import System.Exit (ExitCode (..)) +import System.IO (hClose, hFlush) +import System.Process {- | Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings instead of strings and allows setting environment variables. @readProcessWithExitCode@ creates an external process, reads its -standard output and standard error strictly, waits until the process -terminates, and then returns the 'ExitCode' of the process, -the standard output, and the standard error. +standard output strictly, waits until the process +terminates, and then returns the 'ExitCode' of the process +and the standard output. stderr is inherited from the parent. If an asynchronous exception is thrown to the thread executing @readProcessWithExitCode@, the forked process will be terminated and @@ -57,25 +57,21 @@ pipeProcess -> FilePath -- ^ Filename of the executable (see 'proc' for details) -> [String] -- ^ any arguments -> BL.ByteString -- ^ standard input - -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr + -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout pipeProcess mbenv cmd args input = mask $ \restore -> do - (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args) + (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args) { env = mbenv, std_in = CreatePipe, std_out = CreatePipe, - std_err = CreatePipe } + std_err = Inherit } flip onException - (do hClose inh; hClose outh; hClose errh; + (do hClose inh; hClose outh; terminateProcess pid; waitForProcess pid) $ restore $ do -- fork off a thread to start consuming stdout out <- BL.hGetContents outh waitOut <- forkWait $ evaluate $ BL.length out - -- fork off a thread to start consuming stderr - err <- BL.hGetContents errh - waitErr <- forkWait $ evaluate $ BL.length err - -- now write and flush any input let writeInput = do unless (BL.null input) $ do @@ -87,15 +83,13 @@ pipeProcess mbenv cmd args input = -- wait on the output waitOut - waitErr hClose outh - hClose errh -- wait on the process ex <- waitForProcess pid - return (ex, out, err) + return (ex, out) forkWait :: IO a -> IO (IO a) forkWait a = do diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs new file mode 100644 index 000000000..b9374ba06 --- /dev/null +++ b/src/Text/Pandoc/Readers.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This helper module exports the readers. + +Note: all of the readers assume that the input text has @'\n'@ +line endings. So if you get your input text from a web form, +you should remove @'\r'@ characters using @filter (/='\r')@. + +-} + +module Text.Pandoc.Readers + ( + -- * Readers: converting /to/ Pandoc format + Reader (..) + , readers + , readDocx + , readOdt + , readMarkdown + , readCommonMark + , readCreole + , readMediaWiki + , readVimwiki + , readRST + , readOrg + , readLaTeX + , readHtml + , readJATS + , readTextile + , readDocBook + , readOPML + , readHaddock + , readNative + , readJSON + , readTWiki + , readTikiWiki + , readTxt2Tags + , readEPUB + , readMuse + -- * Miscellaneous + , getReader + , getDefaultExtensions + ) where + +import Control.Monad.Except (throwError) +import Data.Aeson +import qualified Data.ByteString.Lazy as BL +import Data.List (intercalate) +import Data.Text (Text) +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Extensions +import Text.Pandoc.Options +import Text.Pandoc.Readers.CommonMark +import Text.Pandoc.Readers.Creole +import Text.Pandoc.Readers.DocBook +import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.EPUB +import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Readers.JATS (readJATS) +import Text.Pandoc.Readers.LaTeX +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Muse +import Text.Pandoc.Readers.Native +import Text.Pandoc.Readers.Odt +import Text.Pandoc.Readers.OPML +import Text.Pandoc.Readers.Org +import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.Textile +import Text.Pandoc.Readers.TikiWiki +import Text.Pandoc.Readers.TWiki +import Text.Pandoc.Readers.Txt2Tags +import Text.Pandoc.Readers.Vimwiki +import Text.Pandoc.Shared (mapLeft) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Parsec.Error + +data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) + +-- | Association list of formats and readers. +readers :: PandocMonad m => [(String, Reader m)] +readers = [ ("native" , TextReader readNative) + ,("json" , TextReader $ \o s -> + case readJSON o s of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "JSON parse error") + ,("markdown" , TextReader readMarkdown) + ,("markdown_strict" , TextReader readMarkdown) + ,("markdown_phpextra" , TextReader readMarkdown) + ,("markdown_github" , TextReader readMarkdown) + ,("markdown_mmd", TextReader readMarkdown) + ,("commonmark" , TextReader readCommonMark) + ,("creole" , TextReader readCreole) + ,("gfm" , TextReader readCommonMark) + ,("rst" , TextReader readRST) + ,("mediawiki" , TextReader readMediaWiki) + ,("vimwiki" , TextReader readVimwiki) + ,("docbook" , TextReader readDocBook) + ,("opml" , TextReader readOPML) + ,("org" , TextReader readOrg) + ,("textile" , TextReader readTextile) -- TODO : textile+lhs + ,("html" , TextReader readHtml) + ,("jats" , TextReader readJATS) + ,("latex" , TextReader readLaTeX) + ,("haddock" , TextReader readHaddock) + ,("twiki" , TextReader readTWiki) + ,("tikiwiki" , TextReader readTikiWiki) + ,("docx" , ByteStringReader readDocx) + ,("odt" , ByteStringReader readOdt) + ,("t2t" , TextReader readTxt2Tags) + ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) + ] + +-- | Retrieve reader, extensions based on formatSpec (format+extensions). +getReader :: PandocMonad m => String -> Either String (Reader m, Extensions) +getReader s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] + Right (readerName, setExts) -> + case lookup readerName readers of + Nothing -> Left $ "Unknown reader: " ++ readerName + Just r -> Right (r, setExts $ + getDefaultExtensions readerName) + +-- | Read pandoc document from JSON format. +readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc +readJSON _ = + mapLeft PandocParseError . eitherDecode' . BL.fromStrict . UTF8.fromText diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index d20d386e7..6fbc09c17 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.CommonMark - Copyright : Copyright (C) 2015 John MacFarlane + Copyright : Copyright (C) 2015-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,48 +32,94 @@ CommonMark is a strongly specified variant of Markdown: http://commonmark.org. module Text.Pandoc.Readers.CommonMark (readCommonMark) where -import CMark -import Data.Text (unpack, pack) +import CMarkGFM +import Control.Monad.State +import Data.Char (isAlphaNum, isLetter, isSpace, toLower) import Data.List (groupBy) +import qualified Data.Map as Map +import Data.Text (Text, unpack) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) import Text.Pandoc.Options -import Text.Pandoc.Error +import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Walk (walkM) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc -readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack - where opts' = if readerSmart opts - then [optNormalize, optSmart] - else [optNormalize] - -nodeToPandoc :: Node -> Pandoc -nodeToPandoc (Node _ DOCUMENT nodes) = - Pandoc nullMeta $ foldr addBlock [] nodes -nodeToPandoc n = -- shouldn't happen - Pandoc nullMeta $ foldr addBlock [] [n] - -addBlocks :: [Node] -> [Block] -addBlocks = foldr addBlock [] - -addBlock :: Node -> [Block] -> [Block] -addBlock (Node _ PARAGRAPH nodes) = - (Para (addInlines nodes) :) -addBlock (Node _ THEMATIC_BREAK _) = +readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMark opts s = return $ + (if isEnabled Ext_gfm_auto_identifiers opts + then addHeaderIdentifiers + else id) $ + nodeToPandoc opts $ commonmarkToNode opts' exts s + where opts' = [ optSmart | isEnabled Ext_smart opts ] + exts = [ extStrikethrough | isEnabled Ext_strikeout opts ] ++ + [ extTable | isEnabled Ext_pipe_tables opts ] ++ + [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] + +convertEmojis :: String -> String +convertEmojis (':':xs) = + case break (==':') xs of + (ys,':':zs) -> + case Map.lookup ys emojis of + Just s -> s ++ convertEmojis zs + Nothing -> ':' : ys ++ convertEmojis (':':zs) + _ -> ':':xs +convertEmojis (x:xs) = x : convertEmojis xs +convertEmojis [] = [] + +addHeaderIdentifiers :: Pandoc -> Pandoc +addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty + +addHeaderId :: Block -> State (Map.Map String Int) Block +addHeaderId (Header lev (_,classes,kvs) ils) = do + idmap <- get + let ident = toIdent ils + ident' <- case Map.lookup ident idmap of + Nothing -> do + put (Map.insert ident 1 idmap) + return ident + Just i -> do + put (Map.adjust (+ 1) ident idmap) + return (ident ++ "-" ++ show i) + return $ Header lev (ident',classes,kvs) ils +addHeaderId x = return x + +toIdent :: [Inline] -> String +toIdent = map (\c -> if isSpace c then '-' else c) + . filter (\c -> isLetter c || isAlphaNum c || isSpace c || + c == '_' || c == '-') + . map toLower . stringify + +nodeToPandoc :: ReaderOptions -> Node -> Pandoc +nodeToPandoc opts (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr (addBlock opts) [] nodes +nodeToPandoc opts n = -- shouldn't happen + Pandoc nullMeta $ foldr (addBlock opts) [] [n] + +addBlocks :: ReaderOptions -> [Node] -> [Block] +addBlocks opts = foldr (addBlock opts) [] + +addBlock :: ReaderOptions -> Node -> [Block] -> [Block] +addBlock opts (Node _ PARAGRAPH nodes) = + (Para (addInlines opts nodes) :) +addBlock _ (Node _ THEMATIC_BREAK _) = (HorizontalRule :) -addBlock (Node _ BLOCK_QUOTE nodes) = - (BlockQuote (addBlocks nodes) :) -addBlock (Node _ (HTML_BLOCK t) _) = - (RawBlock (Format "html") (unpack t) :) +addBlock opts (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks opts nodes) :) +addBlock opts (Node _ (HTML_BLOCK t) _) + | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: -addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = +addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id -addBlock (Node _ (CODE_BLOCK info t) _) = +addBlock _ (Node _ (CODE_BLOCK info t) _) = (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) -addBlock (Node _ (HEADING lev) nodes) = - (Header lev ("",[],[]) (addInlines nodes) :) -addBlock (Node _ (LIST listAttrs) nodes) = - (constructor (map (setTightness . addBlocks . children) nodes) :) +addBlock opts (Node _ (HEADING lev) nodes) = + (Header lev ("",[],[]) (addInlines opts nodes) :) +addBlock opts (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks opts . children) nodes) :) where constructor = case listType listAttrs of BULLET_LIST -> BulletList ORDERED_LIST -> OrderedList @@ -82,46 +128,108 @@ addBlock (Node _ (LIST listAttrs) nodes) = setTightness = if listTight listAttrs then map paraToPlain else id - paraToPlain (Para xs) = Plain (xs) + paraToPlain (Para xs) = Plain xs paraToPlain x = x delim = case listDelim listAttrs of - PERIOD_DELIM -> Period - PAREN_DELIM -> OneParen -addBlock (Node _ ITEM _) = id -- handled in LIST -addBlock _ = id + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock opts (Node _ (TABLE alignments) nodes) = + (Table [] aligns widths headers rows :) + where aligns = map fromTableCellAlignment alignments + fromTableCellAlignment NoAlignment = AlignDefault + fromTableCellAlignment LeftAligned = AlignLeft + fromTableCellAlignment RightAligned = AlignRight + fromTableCellAlignment CenterAligned = AlignCenter + widths = replicate numcols 0.0 + numcols = if null rows' + then 0 + else maximum $ map length rows' + rows' = map toRow $ filter isRow nodes + (headers, rows) = case rows' of + (h:rs) -> (h, rs) + [] -> ([], []) + isRow (Node _ TABLE_ROW _) = True + isRow _ = False + isCell (Node _ TABLE_CELL _) = True + isCell _ = False + toRow (Node _ TABLE_ROW ns) = map toCell $ filter isCell ns + toRow (Node _ t _) = error $ "toRow encountered non-row " ++ show t + toCell (Node _ TABLE_CELL []) = [] + toCell (Node _ TABLE_CELL (n:ns)) + | isBlockNode n = addBlocks opts (n:ns) + | otherwise = [Plain (addInlines opts (n:ns))] + toCell (Node _ t _) = error $ "toCell encountered non-cell " ++ show t +addBlock _ (Node _ TABLE_ROW _) = id -- handled in TABLE +addBlock _ (Node _ TABLE_CELL _) = id -- handled in TABLE +addBlock _ _ = id + +isBlockNode :: Node -> Bool +isBlockNode (Node _ nodetype _) = + case nodetype of + DOCUMENT -> True + THEMATIC_BREAK -> True + PARAGRAPH -> True + BLOCK_QUOTE -> True + HTML_BLOCK _ -> True + CUSTOM_BLOCK _ _ -> True + CODE_BLOCK _ _ -> True + HEADING _ -> True + LIST _ -> True + ITEM -> True + TEXT _ -> False + SOFTBREAK -> False + LINEBREAK -> False + HTML_INLINE _ -> False + CUSTOM_INLINE _ _ -> False + CODE _ -> False + EMPH -> False + STRONG -> False + LINK _ _ -> False + IMAGE _ _ -> False + STRIKETHROUGH -> False + TABLE _ -> False + TABLE_ROW -> False + TABLE_CELL -> False children :: Node -> [Node] children (Node _ _ ns) = ns -addInlines :: [Node] -> [Inline] -addInlines = foldr addInline [] +addInlines :: ReaderOptions -> [Node] -> [Inline] +addInlines opts = foldr (addInline opts) [] -addInline :: Node -> [Inline] -> [Inline] -addInline (Node _ (TEXT t) _) = (map toinl clumps ++) +addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] +addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++) where raw = unpack t clumps = groupBy samekind raw samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = Space - toinl xs = Str xs -addInline (Node _ LINEBREAK _) = (LineBreak :) -addInline (Node _ SOFTBREAK _) = (SoftBreak :) -addInline (Node _ (HTML_INLINE t) _) = - (RawInline (Format "html") (unpack t) :) + toinl (' ':_) = Space + toinl xs = Str $ if isEnabled Ext_emoji opts + then convertEmojis xs + else xs +addInline _ (Node _ LINEBREAK _) = (LineBreak :) +addInline opts (Node _ SOFTBREAK _) + | isEnabled Ext_hard_line_breaks opts = (LineBreak :) + | otherwise = (SoftBreak :) +addInline opts (Node _ (HTML_INLINE t) _) + | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: -addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = +addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id -addInline (Node _ (CODE t) _) = +addInline _ (Node _ (CODE t) _) = (Code ("",[],[]) (unpack t) :) -addInline (Node _ EMPH nodes) = - (Emph (addInlines nodes) :) -addInline (Node _ STRONG nodes) = - (Strong (addInlines nodes) :) -addInline (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline _ = id +addInline opts (Node _ EMPH nodes) = + (Emph (addInlines opts nodes) :) +addInline opts (Node _ STRONG nodes) = + (Strong (addInlines opts nodes) :) +addInline opts (Node _ STRIKETHROUGH nodes) = + (Strikeout (addInlines opts nodes) :) +addInline opts (Node _ (LINK url title) nodes) = + (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) +addInline opts (Node _ (IMAGE url title) nodes) = + (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) +addInline _ _ = id diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs new file mode 100644 index 000000000..505d1686d --- /dev/null +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -0,0 +1,320 @@ +{- + Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de> + + partly based on all the other readers, especialy the work by + John MacFarlane <jgm@berkeley.edu> and + Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + all bugs are solely created by me. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Creole + Copyright : Copyright (C) 2017 Sascha Wilde + License : GNU GPL, version 2 or above + + Maintainer : Sascha Wilde <wilde@sha-bang.de> + Stability : alpha + Portability : portable + +Conversion of creole text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Creole ( readCreole + ) where + +import Control.Monad.Except (guard, liftM2, throwError) +import qualified Data.Foldable as F +import Data.Maybe (fromMaybe) +import Data.Monoid +import Data.Text (Text) +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed) +import Text.Pandoc.Shared (crFilter) + + +-- | Read creole from an input string and return a Pandoc document. +readCreole :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readCreole opts s = do + res <- readWithM parseCreole def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case res of + Left e -> throwError e + Right d -> return d + +type CRLParser = ParserT [Char] ParserState + +-- +-- Utility functions +-- + +(<+>) :: (Monad m, Monoid a) => m a -> m a -> m a +(<+>) = liftM2 (<>) + +-- we have to redefine `enclosed' from Text.Pandoc.Parsing, because it +-- assumes, that there can't be a space after the start parser, but +-- with creole this is possible. +enclosed :: (Show end, PandocMonad m) => CRLParser m start -- ^ start parser + -> CRLParser m end -- ^ end parser + -> CRLParser m a -- ^ content parser (to be used repeatedly) + -> CRLParser m [a] +enclosed start end parser = try $ start >> many1Till parser end + +-- +-- main parser +-- + +specialChars :: [Char] +specialChars = "*/~{}\\|[]()<>\"'" + +parseCreole :: PandocMonad m => CRLParser m Pandoc +parseCreole = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + + +-- +-- block parsers +-- + +block :: PandocMonad m => CRLParser m B.Blocks +block = do + res <- mempty <$ skipMany1 blankline + <|> nowiki + <|> header + <|> horizontalRule + <|> anyList 1 + <|> table + <|> para + skipMany blankline + return res + +nowiki :: PandocMonad m => CRLParser m B.Blocks +nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart + >> manyTill content nowikiEnd) + where + content = brackets <|> line + brackets = try $ option "" ((:[]) <$> newline) + <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol) + line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol + eol = lookAhead $ try $ nowikiEnd <|> newline + nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline + nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline + +header :: PandocMonad m => CRLParser m B.Blocks +header = try $ do + skipSpaces + level <- + fmap length (many1 (char '=')) + guard $ level <= 6 + skipSpaces + content <- B.str <$> manyTill (noneOf "\n") headerEnd + return $ B.header level content + where + headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline + +unorderedList :: PandocMonad m => Int -> CRLParser m B.Blocks +unorderedList = list '*' B.bulletList + +orderedList :: PandocMonad m => Int -> CRLParser m B.Blocks +orderedList = list '#' B.orderedList + +anyList :: PandocMonad m => Int -> CRLParser m B.Blocks +anyList n = unorderedList n <|> orderedList n + +anyListItem :: PandocMonad m => Int -> CRLParser m B.Blocks +anyListItem n = listItem '*' n <|> listItem '#' n + +list :: PandocMonad m => Char -> ([B.Blocks] -> B.Blocks) -> Int -> CRLParser m B.Blocks +list c f n = + fmap f (many1 (itemPlusSublist <|> listItem c n)) + where itemPlusSublist = try $ listItem c n <+> anyList (n+1) + +listItem :: PandocMonad m => Char -> Int -> CRLParser m B.Blocks +listItem c n = + fmap (B.plain . B.trimInlines .mconcat) (listStart >> many1Till inline itemEnd) + where + listStart = try $ skipSpaces >> optional newline >> skipSpaces + >> count n (char c) + >> lookAhead (noneOf [c]) >> skipSpaces + itemEnd = endOfParaElement <|> nextItem n + <|> if n < 3 then nextItem (n+1) + else nextItem (n+1) <|> nextItem (n-1) + nextItem x = lookAhead $ try $ blankline >> anyListItem x >> return mempty + +table :: PandocMonad m => CRLParser m B.Blocks +table = try $ do + headers <- optionMaybe headerRow + rows <- many1 row + return $ B.simpleTable (fromMaybe [mempty] headers) rows + where + headerRow = try $ skipSpaces >> many1Till headerCell rowEnd + headerCell = B.plain . B.trimInlines . mconcat + <$> (string "|=" >> many1Till inline cellEnd) + row = try $ skipSpaces >> many1Till cell rowEnd + cell = B.plain . B.trimInlines . mconcat + <$> (char '|' >> many1Till inline cellEnd) + rowEnd = try $ optional (char '|') >> skipSpaces >> newline + cellEnd = lookAhead $ try $ char '|' <|> rowEnd + +para :: PandocMonad m => CRLParser m B.Blocks +para = fmap (result . mconcat) (many1Till inline endOfParaElement) + where + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + +endOfParaElement :: PandocMonad m => CRLParser m () +endOfParaElement = lookAhead $ endOfInput <|> endOfPara + <|> startOfList <|> startOfTable + <|> startOfHeader <|> hr <|> startOfNowiki + where + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + startOf :: PandocMonad m => CRLParser m a -> CRLParser m () + startOf p = try $ blankline >> p >> return mempty + startOfList = startOf $ anyListItem 1 + startOfTable = startOf table + startOfHeader = startOf header + startOfNowiki = startOf nowiki + hr = startOf horizontalRule + +horizontalRule :: PandocMonad m => CRLParser m B.Blocks +horizontalRule = try $ skipSpaces >> string "----" >> skipSpaces >> newline + >> return B.horizontalRule + +-- +-- inline parsers +-- + +inline :: PandocMonad m => CRLParser m B.Inlines +inline = choice [ whitespace + , escapedLink + , escapedChar + , link + , inlineNowiki + , placeholder + , image + , forcedLinebreak + , bold + , finalBold + , italics + , finalItalics + , str + , symbol + ] <?> "inline" + +escapedChar :: PandocMonad m => CRLParser m B.Inlines +escapedChar = + fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ") + +escapedLink :: PandocMonad m => CRLParser m B.Inlines +escapedLink = try $ do + char '~' + (orig, _) <- uri + return $ B.str orig + +image :: PandocMonad m => CRLParser m B.Inlines +image = try $ do + (orig, src) <- wikiImg + return $ B.image src "" (B.str orig) + where + linkSrc = many $ noneOf "|}\n\r\t" + linkDsc = char '|' >> many (noneOf "}\n\r\t") + wikiImg = try $ do + string "{{" + src <- linkSrc + dsc <- option "" linkDsc + string "}}" + return (dsc, src) + +link :: PandocMonad m => CRLParser m B.Inlines +link = try $ do + (orig, src) <- uriLink <|> wikiLink + return $ B.link src "" orig + where + linkSrc = many $ noneOf "|]\n\r\t" + linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines + linkDsc otxt = B.str + <$> try (option otxt + (char '|' >> many (noneOf "]\n\r\t"))) + linkImg = try $ char '|' >> image + wikiLink = try $ do + string "[[" + src <- linkSrc + dsc <- linkImg <|> linkDsc src + string "]]" + return (dsc, src) + uriLink = try $ do + (orig, src) <- uri + return (B.str orig, src) + +inlineNowiki :: PandocMonad m => CRLParser m B.Inlines +inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end) + where + start = try $ string "{{{" + end = try $ string "}}}" >> lookAhead (noneOf "}") + +placeholder :: PandocMonad m => CRLParser m B.Inlines +-- The semantics of the placeholder is basicallly implementation +-- dependent, so there is no way to DTRT for all cases. +-- So for now we just drop them. +placeholder = B.text <$> try (string "<<<" >> manyTill anyChar (string ">>>") + >> return "") + +whitespace :: PandocMonad m => CRLParser m B.Inlines +whitespace = lb <|> regsp + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +linebreak :: PandocMonad m => CRLParser m B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +symbol :: PandocMonad m => CRLParser m B.Inlines +symbol = fmap (B.str . (:[])) (oneOf specialChars) + +str :: PandocMonad m => CRLParser m B.Inlines +str = let strChar = noneOf ("\t\n " ++ specialChars) in + fmap B.str (many1 strChar) + +bold :: PandocMonad m => CRLParser m B.Inlines +bold = B.strong . mconcat <$> + enclosed (string "**") (try $ string "**") inline + +italics :: PandocMonad m => CRLParser m B.Inlines +italics = B.emph . mconcat <$> + enclosed (string "//") (try $ string "//") inline + +finalBold :: PandocMonad m => CRLParser m B.Inlines +finalBold = B.strong . mconcat <$> + try (string "**" >> many1Till inline endOfParaElement) + +finalItalics :: PandocMonad m => CRLParser m B.Inlines +finalItalics = B.emph . mconcat <$> + try (string "//" >> many1Till inline endOfParaElement) + +forcedLinebreak :: PandocMonad m => CRLParser m B.Inlines +forcedLinebreak = try $ string "\\\\" >> return B.linebreak diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 68552ccb3..728f77a05 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,22 +1,22 @@ +{-# LANGUAGE ExplicitForAll #-} module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) +import Control.Monad.State.Strict +import Data.Char (isSpace, toUpper) +import Data.Default import Data.Either (rights) +import Data.Foldable (asum) import Data.Generics -import Data.Char (isSpace) -import Control.Monad.State import Data.List (intersperse) import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter, safeRead) import Text.TeXMath (readMathML, writeTeX) -import Text.Pandoc.Error (PandocError) -import Control.Monad.Except -import Data.Default -import Data.Foldable (asum) +import Text.XML.Light {- @@ -50,7 +50,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] author - The name of an individual author [ ] authorblurb - A short description or note about an author [x] authorgroup - Wrapper for author information when a document has - multiple authors or collabarators + multiple authors or collaborators [x] authorinitials - The initials or other short identifier for an author [o] beginpage - The location of a page break in a print version of the document [ ] bibliocoverage - The spatial or temporal coverage of a document @@ -502,7 +502,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = ExceptT PandocError (State DBState) +type DB m = StateT DBState m data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -523,10 +523,12 @@ instance Default DBState where , dbContent = [] } -readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc -readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree - tree = normalizeTree . parseXML . handleInstructions $ inp +readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readDocBook _ inp = do + let tree = normalizeTree . parseXML . handleInstructions + $ T.unpack $ crFilter inp + (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree + return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. @@ -536,12 +538,12 @@ handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>': handleInstructions xs = case break (=='<') xs of (ys, []) -> ys ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + (ys, zs) -> ys ++ handleInstructions zs -getFigure :: Element -> DB Blocks +getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do tit <- case filterChild (named "title") e of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbFigureTitle = tit } res <- getBlocks e @@ -564,14 +566,12 @@ normalizeTree = everywhere (mkT go) go xs = xs convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" attrValue :: String -> Element -> String attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function named :: String -> Element -> Bool @@ -579,20 +579,20 @@ named s e = qName (elName e) == s -- -acceptingMetadata :: DB a -> DB a +acceptingMetadata :: PandocMonad m => DB m a -> DB m a acceptingMetadata p = do modify (\s -> s { dbAcceptsMeta = True } ) res <- p modify (\s -> s { dbAcceptsMeta = False }) return res -checkInMeta :: Monoid a => DB () -> DB a +checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a checkInMeta p = do accepts <- dbAcceptsMeta <$> get when accepts p return mempty -addMeta :: ToMetaValue a => String -> a -> DB () +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -631,7 +631,7 @@ addToStart toadd bs = -- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) -- A DocBook mediaobject is a wrapper around a set of alternative presentations -getMediaobject :: Element -> DB Inlines +getMediaobject :: PandocMonad m => Element -> DB m Inlines getMediaobject e = do (imageUrl, attr) <- case filterChild (named "imageobject") e of @@ -651,18 +651,20 @@ getMediaobject e = do || named "textobject" x || named "alt" x) el of Nothing -> return mempty - Just z -> mconcat <$> (mapM parseInline $ elContent z) + Just z -> mconcat <$> + mapM parseInline (elContent z) figTitle <- gets dbFigureTitle let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (imageWith attr imageUrl title) caption + fmap (imageWith attr imageUrl title) caption -getBlocks :: Element -> DB Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) +getBlocks :: PandocMonad m => Element -> DB m Blocks +getBlocks e = mconcat <$> + mapM parseBlock (elContent e) -parseBlock :: Content -> DB Blocks +parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty @@ -795,15 +797,16 @@ parseBlock (Elem e) = return $ p <> b <> x codeBlockWithLang = do let classes' = case attrValue "language" e of - "" -> [] - x -> [x] + "" -> [] + x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty Just z -> (para . (str "— " <>) . mconcat) - <$> (mapM parseInline $ elContent z) + <$> + mapM parseInline (elContent z) contents <- getBlocks e return $ blockQuote (contents <> attrib) listitems = mapM getBlocks $ filterChildren (named "listitem") e @@ -868,11 +871,11 @@ parseBlock (Elem e) = || x == '.') w Nothing -> 0 :: Double let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of - [] -> replicate numrows AlignDefault - cs -> map toAlignment cs + [] -> replicate numrows AlignDefault + cs -> map toAlignment cs let widths = case colspecs of [] -> replicate numrows 0 cs -> let ws = map toWidth cs @@ -892,7 +895,7 @@ parseBlock (Elem e) = headerText <- case filterChild (named "title") e `mplus` (filterChild (named "info") e >>= filterChild (named "title")) of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e @@ -902,8 +905,9 @@ parseBlock (Elem e) = lineItems = mapM getInlines $ filterChildren (named "line") e metaBlock = acceptingMetadata (getBlocks e) >> return mempty -getInlines :: Element -> DB Inlines -getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') +getInlines :: PandocMonad m => Element -> DB m Inlines +getInlines e' = (trimInlines . mconcat) <$> + mapM parseInline (elContent e') strContentRecursive :: Element -> String strContentRecursive = strContent . @@ -913,10 +917,10 @@ elementToStr :: Content -> Content elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x -parseInline :: Content -> DB Inlines +parseInline :: PandocMonad m => Content -> DB m Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref + return $ maybe (text $ map toUpper ref) text $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation displayMath @@ -957,8 +961,10 @@ parseInline (Elem e) = "userinput" -> codeWithLang "varargs" -> return $ code "(...)" "keycap" -> return (str $ strContent e) - "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) - "menuchoice" -> menuchoice <$> (mapM parseInline $ + "keycombo" -> keycombo <$> + mapM parseInline (elContent e) + "menuchoice" -> menuchoice <$> + mapM parseInline ( filter isGuiMenu $ elContent e) "xref" -> do content <- dbContent <$> get @@ -977,17 +983,18 @@ parseInline (Elem e) = ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just h -> h - _ -> ('#' : attrValue "linkend" e) + _ -> '#' : attrValue "linkend" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of - "bold" -> strong <$> innerInlines - "strong" -> strong <$> innerInlines + "bold" -> strong <$> innerInlines + "strong" -> strong <$> innerInlines "strikethrough" -> strikeout <$> innerInlines - _ -> emph <$> innerInlines - "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) + _ -> emph <$> innerInlines + "footnote" -> (note . mconcat) <$> + mapM parseBlock (elContent e) "title" -> return mempty "affiliation" -> return mempty -- Note: this isn't a real docbook tag; it's what we convert @@ -996,7 +1003,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> - (mapM parseInline $ elContent e) + mapM parseInline (elContent e) equation constructor = return $ mconcat $ map (constructor . writeTeX) $ rights diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 595c805bf..e58b0a905 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-} - +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2018 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -50,12 +51,13 @@ implemented, [-] means partially implemented): * Inlines - [X] Str - - [X] Emph (italics and underline both read as Emph) + - [X] Emph - [X] Strong - [X] Strikeout - [X] Superscript - [X] Subscript - [X] SmallCaps + - [-] Underline (was previously converted to Emph) - [ ] Quoted - [ ] Cite - [X] Code (styled with `VerbatimChar`) @@ -64,88 +66,91 @@ implemented, [-] means partially implemented): - [X] Math - [X] Link (links to an arbitrary bookmark create a span with the target as id and "anchor" class) - - [X] Image + - [X] Image - [X] Note (Footnotes and Endnotes are silently combined.) -} module Text.Pandoc.Readers.Docx - ( readDocxWithWarnings - , readDocx + ( readDocx ) where import Codec.Archive.Zip -import Text.Pandoc.Definition -import Text.Pandoc.Options +import Control.Monad.Reader +import Control.Monad.State.Strict +import qualified Data.ByteString.Lazy as B +import Data.Default (Default) +import Data.List (delete, intersect) +import qualified Data.Map as M +import Data.Maybe (isJust, fromMaybe) +import Data.Sequence (ViewL (..), viewl) +import qualified Data.Sequence as Seq +import qualified Data.Set as Set import Text.Pandoc.Builder -import Text.Pandoc.Walk -import Text.Pandoc.Readers.Docx.Parse -import Text.Pandoc.Readers.Docx.Lists +-- import Text.Pandoc.Definition +import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Options import Text.Pandoc.Readers.Docx.Combine +import Text.Pandoc.Readers.Docx.Lists +import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Shared -import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Data.List (delete, intersect) +import Text.Pandoc.Walk import Text.TeXMath (writeTeX) -import Data.Default (Default) -import qualified Data.ByteString.Lazy as B -import qualified Data.Map as M -import qualified Data.Set as Set -import Control.Monad.Reader -import Control.Monad.State -import Data.Sequence (ViewL(..), viewl) -import qualified Data.Sequence as Seq (null) #if !(MIN_VERSION_base(4,8,0)) import Data.Traversable (traverse) #endif - +import Control.Monad.Except (throwError) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Error -import Control.Monad.Except +import Text.Pandoc.Logging -readDocxWithWarnings :: ReaderOptions - -> B.ByteString - -> Either PandocError (Pandoc, MediaBag, [String]) -readDocxWithWarnings opts bytes +readDocx :: PandocMonad m + => ReaderOptions + -> B.ByteString + -> m Pandoc +readDocx opts bytes | Right archive <- toArchiveOrFail bytes , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do - (meta, blks, mediaBag, warnings) <- docxToOutput opts docx - return (Pandoc meta blks, mediaBag, parserWarnings ++ warnings) -readDocxWithWarnings _ _ = - Left (ParseFailure "couldn't parse docx file") - -readDocx :: ReaderOptions - -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readDocx opts bytes = do - (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes - return (pandoc, mediaBag) + mapM_ (P.report . DocxParserWarning) parserWarnings + (meta, blks) <- docxToOutput opts docx + return $ Pandoc meta blks +readDocx _ _ = + throwError $ PandocSomeError "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String - , docxMediaBag :: MediaBag - , docxDropCap :: Inlines - , docxWarnings :: [String] + , docxAnchorSet :: Set.Set String + , docxImmedPrevAnchor :: Maybe String + , docxMediaBag :: MediaBag + , docxDropCap :: Inlines + , docxWarnings :: [String] + -- keep track of (numId, lvl) values for + -- restarting + , docxListState :: M.Map (String, String) Integer + , docxPrevPara :: Inlines } instance Default DState where def = DState { docxAnchorMap = M.empty + , docxAnchorSet = mempty + , docxImmedPrevAnchor = Nothing , docxMediaBag = mempty , docxDropCap = mempty , docxWarnings = [] + , docxListState = M.empty + , docxPrevPara = mempty } -data DEnv = DEnv { docxOptions :: ReaderOptions - , docxInHeaderBlock :: Bool } +data DEnv = DEnv { docxOptions :: ReaderOptions + , docxInHeaderBlock :: Bool + } instance Default DEnv where def = DEnv def False -type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) - -evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a -evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx +type DocxContext m = ReaderT DEnv (StateT DState m) -addDocxWarning :: String -> DocxContext () -addDocxWarning msg = do - warnings <- gets docxWarnings - modify $ \s -> s {docxWarnings = msg : warnings} +evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a +evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -162,7 +167,7 @@ metaStyles = M.fromList [ ("Title", "title") , ("Abstract", "abstract")] sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) -sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp)) +sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp) isMetaPar :: BodyPart -> Bool isMetaPar (Paragraph pPr _) = @@ -174,28 +179,28 @@ isEmptyPar (Paragraph _ parParts) = all isEmptyParPart parParts where isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems - isEmptyParPart _ = False + isEmptyParPart _ = False isEmptyElem (TextRun s) = trim s == "" isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp - , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) + , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles) , (Just metaField) <- M.lookup c metaStyles = do inlines <- smushInlines <$> mapM parPartToInlines parParts remaining <- bodyPartsToMeta' bps let f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] - f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks) + f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks) f m (MetaList mv) = MetaList (m : mv) f m n = MetaList [m, n] return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps -bodyPartsToMeta :: [BodyPart] -> DocxContext Meta +bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta bodyPartsToMeta bps = do mp <- bodyPartsToMeta' bps let mp' = @@ -208,7 +213,7 @@ fixAuthors :: MetaValue -> MetaValue fixAuthors (MetaBlocks blks) = MetaList $ map g $ filter f blks where f (Para _) = True - f _ = False + f _ = False g (Para ils) = MetaInlines ils g _ = MetaInlines [] fixAuthors mv = mv @@ -220,106 +225,122 @@ codeDivs :: [String] codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> Inlines -runElemToInlines (TextRun s) = text s -runElemToInlines (LnBrk) = linebreak -runElemToInlines (Tab) = space -runElemToInlines (SoftHyphen) = text "\xad" -runElemToInlines (NoBreakHyphen) = text "\x2011" +runElemToInlines (TextRun s) = text s +runElemToInlines LnBrk = linebreak +runElemToInlines Tab = space +runElemToInlines SoftHyphen = text "\xad" +runElemToInlines NoBreakHyphen = text "\x2011" runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString (LnBrk) = ['\n'] -runElemToString (Tab) = ['\t'] -runElemToString (SoftHyphen) = ['\xad'] -runElemToString (NoBreakHyphen) = ['\x2011'] +runElemToString (TextRun s) = s +runElemToString LnBrk = ['\n'] +runElemToString Tab = ['\t'] +runElemToString SoftHyphen = ['\xad'] +runElemToString NoBreakHyphen = ['\x2011'] runToString :: Run -> String runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" +runToString _ = "" parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run +parPartToString (PlainRun run) = runToString run parPartToString (InternalHyperLink _ runs) = concatMap runToString runs parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" +parPartToString _ = "" blacklistedCharStyles :: [String] blacklistedCharStyles = ["Hyperlink"] -resolveDependentRunStyle :: RunStyle -> RunStyle +resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle resolveDependentRunStyle rPr | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles = - rPr - | Just (_, cs) <- rStyle rPr = - let rPr' = resolveDependentRunStyle cs - in - RunStyle { isBold = case isBold rPr of - Just bool -> Just bool - Nothing -> isBold rPr' - , isItalic = case isItalic rPr of - Just bool -> Just bool - Nothing -> isItalic rPr' - , isSmallCaps = case isSmallCaps rPr of - Just bool -> Just bool - Nothing -> isSmallCaps rPr' - , isStrike = case isStrike rPr of - Just bool -> Just bool - Nothing -> isStrike rPr' - , rVertAlign = case rVertAlign rPr of - Just valign -> Just valign - Nothing -> rVertAlign rPr' - , rUnderline = case rUnderline rPr of - Just ulstyle -> Just ulstyle - Nothing -> rUnderline rPr' - , rStyle = rStyle rPr } - | otherwise = rPr - -runStyleToTransform :: RunStyle -> (Inlines -> Inlines) + return rPr + | Just (_, cs) <- rStyle rPr = do + opts <- asks docxOptions + if isEnabled Ext_styles opts + then return rPr + else do rPr' <- resolveDependentRunStyle cs + return $ + RunStyle { isBold = case isBold rPr of + Just bool -> Just bool + Nothing -> isBold rPr' + , isItalic = case isItalic rPr of + Just bool -> Just bool + Nothing -> isItalic rPr' + , isSmallCaps = case isSmallCaps rPr of + Just bool -> Just bool + Nothing -> isSmallCaps rPr' + , isStrike = case isStrike rPr of + Just bool -> Just bool + Nothing -> isStrike rPr' + , rVertAlign = case rVertAlign rPr of + Just valign -> Just valign + Nothing -> rVertAlign rPr' + , rUnderline = case rUnderline rPr of + Just ulstyle -> Just ulstyle + Nothing -> rUnderline rPr' + , rStyle = rStyle rPr } + | otherwise = return rPr + +runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines) runStyleToTransform rPr | Just (s, _) <- rStyle rPr - , s `elem` spansToKeep = - let rPr' = rPr{rStyle = Nothing} - in - (spanWith ("", [s], [])) . (runStyleToTransform rPr') - | Just True <- isItalic rPr = - emph . (runStyleToTransform rPr {isItalic = Nothing}) - | Just True <- isBold rPr = - strong . (runStyleToTransform rPr {isBold = Nothing}) - | Just True <- isSmallCaps rPr = - smallcaps . (runStyleToTransform rPr {isSmallCaps = Nothing}) - | Just True <- isStrike rPr = - strikeout . (runStyleToTransform rPr {isStrike = Nothing}) - | Just SupScrpt <- rVertAlign rPr = - superscript . (runStyleToTransform rPr {rVertAlign = Nothing}) - | Just SubScrpt <- rVertAlign rPr = - subscript . (runStyleToTransform rPr {rVertAlign = Nothing}) - | Just "single" <- rUnderline rPr = - emph . (runStyleToTransform rPr {rUnderline = Nothing}) - | otherwise = id - -runToInlines :: Run -> DocxContext Inlines + , s `elem` spansToKeep = do + transform <- runStyleToTransform rPr{rStyle = Nothing} + return $ spanWith ("", [s], []) . transform + | Just (s, _) <- rStyle rPr = do + opts <- asks docxOptions + let extraInfo = if isEnabled Ext_styles opts + then spanWith ("", [], [("custom-style", s)]) + else id + transform <- runStyleToTransform rPr{rStyle = Nothing} + return $ extraInfo . transform + | Just True <- isItalic rPr = do + transform <- runStyleToTransform rPr{isItalic = Nothing} + return $ emph . transform + | Just True <- isBold rPr = do + transform <- runStyleToTransform rPr{isBold = Nothing} + return $ strong . transform + | Just True <- isSmallCaps rPr = do + transform <- runStyleToTransform rPr{isSmallCaps = Nothing} + return $ smallcaps . transform + | Just True <- isStrike rPr = do + transform <- runStyleToTransform rPr{isStrike = Nothing} + return $ strikeout . transform + | Just SupScrpt <- rVertAlign rPr = do + transform <- runStyleToTransform rPr{rVertAlign = Nothing} + return $ superscript . transform + | Just SubScrpt <- rVertAlign rPr = do + transform <- runStyleToTransform rPr{rVertAlign = Nothing} + return $ subscript . transform + | Just "single" <- rUnderline rPr = do + transform <- runStyleToTransform rPr{rUnderline = Nothing} + return $ underlineSpan . transform + | otherwise = return id + +runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs - , s `elem` codeStyles = - let rPr = resolveDependentRunStyle rs - codeString = code $ concatMap runElemToString runElems - in - return $ case rVertAlign rPr of - Just SupScrpt -> superscript codeString - Just SubScrpt -> subscript codeString - _ -> codeString + , s `elem` codeStyles = do + rPr <- resolveDependentRunStyle rs + let codeString = code $ concatMap runElemToString runElems + return $ case rVertAlign rPr of + Just SupScrpt -> superscript codeString + Just SubScrpt -> subscript codeString + _ -> codeString | otherwise = do - let ils = smushInlines (map runElemToInlines runElems) - return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils + rPr <- resolveDependentRunStyle rs + let ils = smushInlines (map runElemToInlines runElems) + transform <- runStyleToTransform rPr + return $ transform ils runToInlines (Footnote bps) = do - blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> mapM bodyPartToBlocks bps return $ note blksList runToInlines (Endnote bps) = do - blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) + blksList <- smushBlocks <$> mapM bodyPartToBlocks bps return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do - mediaBag <- gets docxMediaBag - modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) fp title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" @@ -330,20 +351,39 @@ extentToAttr (Just (w, h)) = showDim d = show (d / 914400) ++ "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: String -> Blocks -> DocxContext Inlines +blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool - notParaOrPlain (Para _) = False + notParaOrPlain (Para _) = False notParaOrPlain (Plain _) = False - notParaOrPlain _ = True - when (not $ null $ filter notParaOrPlain blkList) - (addDocxWarning $ "Docx comment " ++ cmtId ++ " will not retain formatting") - return $ fromList $ blocksToInlines blkList - -parPartToInlines :: ParPart -> DocxContext Inlines -parPartToInlines (PlainRun r) = runToInlines r -parPartToInlines (Insertion _ author date runs) = do + notParaOrPlain _ = True + unless ( not (any notParaOrPlain blkList)) $ + lift $ P.report $ DocxParserWarning $ + "Docx comment " ++ cmtId ++ " will not retain formatting" + return $ blocksToInlines' blkList + +-- The majority of work in this function is done in the primed +-- subfunction `partPartToInlines'`. We make this wrapper so that we +-- don't have to modify `docxImmedPrevAnchor` state after every function. +parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines +parPartToInlines parPart = + case parPart of + (BookMark _ anchor) | notElem anchor dummyAnchors -> do + inHdrBool <- asks docxInHeaderBlock + ils <- parPartToInlines' parPart + immedPrevAnchor <- gets docxImmedPrevAnchor + unless (isJust immedPrevAnchor || inHdrBool) + (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor}) + return ils + _ -> do + ils <- parPartToInlines' parPart + modify $ \s -> s{ docxImmedPrevAnchor = Nothing} + return ils + +parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines +parPartToInlines' (PlainRun r) = runToInlines r +parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> smushInlines <$> mapM runToInlines runs @@ -352,7 +392,7 @@ parPartToInlines (Insertion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["insertion"], [("author", author), ("date", date)]) return $ spanWith attr ils -parPartToInlines (Deletion _ author date runs) = do +parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> return mempty @@ -361,7 +401,7 @@ parPartToInlines (Deletion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["deletion"], [("author", author), ("date", date)]) return $ spanWith attr ils -parPartToInlines (CommentStart cmtId author date bodyParts) = do +parPartToInlines' (CommentStart cmtId author date bodyParts) = do opts <- asks docxOptions case readerTrackChanges opts of AllChanges -> do @@ -370,16 +410,16 @@ parPartToInlines (CommentStart cmtId author date bodyParts) = do let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) return $ spanWith attr ils _ -> return mempty -parPartToInlines (CommentEnd cmtId) = do +parPartToInlines' (CommentEnd cmtId) = do opts <- asks docxOptions case readerTrackChanges opts of AllChanges -> do let attr = ("", ["comment-end"], [("id", cmtId)]) return $ spanWith attr mempty _ -> return mempty -parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = +parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors = return mempty -parPartToInlines (BookMark _ anchor) = +parPartToInlines' (BookMark _ anchor) = -- We record these, so we can make sure not to overwrite -- user-defined anchor links with header auto ids. do @@ -395,27 +435,40 @@ parPartToInlines (BookMark _ anchor) = -- of rewriting user-defined anchor links. However, since these -- are not defined in pandoc, it seems like a necessary evil to -- avoid an extra pass. - let newAnchor = - if not inHdrBool && anchor `elem` (M.elems anchorMap) - then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) - else anchor - unless inHdrBool - (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) - return $ spanWith (newAnchor, ["anchor"], []) mempty -parPartToInlines (Drawing fp title alt bs ext) = do - mediaBag <- gets docxMediaBag - modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + immedPrevAnchor <- gets docxImmedPrevAnchor + case immedPrevAnchor of + Just prevAnchor -> do + unless inHdrBool + (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap}) + return mempty + Nothing -> do + let newAnchor = + if not inHdrBool && anchor `elem` M.elems anchorMap + then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) + else anchor + unless inHdrBool + (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) + return $ spanWith (newAnchor, ["anchor"], []) mempty +parPartToInlines' (Drawing fp title alt bs ext) = do + (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) fp title $ text alt -parPartToInlines Chart = do +parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" -parPartToInlines (InternalHyperLink anchor runs) = do +parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils -parPartToInlines (ExternalHyperLink target runs) = do +parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils -parPartToInlines (PlainOMath exps) = do +parPartToInlines' (PlainOMath exps) = return $ math $ writeTeX exps +parPartToInlines' (SmartTag runs) = + smushInlines <$> mapM runToInlines runs +parPartToInlines' (Field info runs) = + case info of + HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs + UnknownField -> smushInlines <$> mapM runToInlines runs +parPartToInlines' NullParPart = return mempty isAnchorSpan :: Inline -> Bool isAnchorSpan (Span (_, classes, kvs) _) = @@ -426,10 +479,10 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchor :: Blocks -> DocxContext Blocks +makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks makeHeaderAnchor bs = traverse makeHeaderAnchor' bs -makeHeaderAnchor' :: Block -> DocxContext Block +makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block -- If there is an anchor already there (an anchor span in the header, -- to be exact), we rename and associate the new id with the old one. makeHeaderAnchor' (Header n (ident, classes, kvs) ils) @@ -458,126 +511,184 @@ makeHeaderAnchor' blk = return blk -- Rewrite a standalone paragraph block as a plain singleParaToPlain :: Blocks -> Blocks singleParaToPlain blks - | (Para (ils) :< seeq) <- viewl $ unMany blks + | (Para ils :< seeq) <- viewl $ unMany blks , Seq.null seeq = singleton $ Plain ils singleParaToPlain blks = blks -cellToBlocks :: Cell -> DocxContext Blocks +cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks cellToBlocks (Cell bps) = do blks <- smushBlocks <$> mapM bodyPartToBlocks bps return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks -rowToBlocksList :: Row -> DocxContext [Blocks] +rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks] rowToBlocksList (Row cells) = do blksList <- mapM cellToBlocks cells return $ map singleParaToPlain blksList -trimLineBreaks :: [Inline] -> [Inline] -trimLineBreaks [] = [] -trimLineBreaks (LineBreak : ils) = trimLineBreaks ils -trimLineBreaks ils - | (LineBreak : ils') <- reverse ils = trimLineBreaks (reverse ils') -trimLineBreaks ils = ils +-- like trimInlines, but also take out linebreaks +trimSps :: Inlines -> Inlines +trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils + where isSp Space = True + isSp SoftBreak = True + isSp LineBreak = True + isSp _ = False -parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks) +parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) parStyleToTransform pPr | (c:cs) <- pStyle pPr - , c `elem` divsToKeep = - let pPr' = pPr { pStyle = cs } - in - (divWith ("", [c], [])) . (parStyleToTransform pPr') + , c `elem` divsToKeep = do + let pPr' = pPr { pStyle = cs } + transform <- parStyleToTransform pPr' + return $ divWith ("", [c], []) . transform | (c:cs) <- pStyle pPr, - c `elem` listParagraphDivs = + c `elem` listParagraphDivs = do let pPr' = pPr { pStyle = cs, indentation = Nothing} - in - (divWith ("", [c], [])) . (parStyleToTransform pPr') - | (_:cs) <- pStyle pPr - , Just True <- pBlockQuote pPr = - let pPr' = pPr { pStyle = cs } - in - blockQuote . (parStyleToTransform pPr') - | (_:cs) <- pStyle pPr = + transform <- parStyleToTransform pPr' + return $ divWith ("", [c], []) . transform + | (c:cs) <- pStyle pPr + , Just True <- pBlockQuote pPr = do + opts <- asks docxOptions + let pPr' = pPr { pStyle = cs } + transform <- parStyleToTransform pPr' + let extraInfo = if isEnabled Ext_styles opts + then divWith ("", [], [("custom-style", c)]) + else id + return $ extraInfo . blockQuote . transform + | (c:cs) <- pStyle pPr = do + opts <- asks docxOptions let pPr' = pPr { pStyle = cs} - in - parStyleToTransform pPr' + transform <- parStyleToTransform pPr' + let extraInfo = if isEnabled Ext_styles opts + then divWith ("", [], [("custom-style", c)]) + else id + return $ extraInfo . transform | null (pStyle pPr) , Just left <- indentation pPr >>= leftParIndent - , Just hang <- indentation pPr >>= hangingParIndent = + , Just hang <- indentation pPr >>= hangingParIndent = do let pPr' = pPr { indentation = Nothing } - in - case (left - hang) > 0 of - True -> blockQuote . (parStyleToTransform pPr') - False -> parStyleToTransform pPr' + transform <- parStyleToTransform pPr' + return $ case (left - hang) > 0 of + True -> blockQuote . transform + False -> transform | null (pStyle pPr), - Just left <- indentation pPr >>= leftParIndent = + Just left <- indentation pPr >>= leftParIndent = do let pPr' = pPr { indentation = Nothing } - in - case left > 0 of - True -> blockQuote . (parStyleToTransform pPr') - False -> parStyleToTransform pPr' -parStyleToTransform _ = id + transform <- parStyleToTransform pPr' + return $ case left > 0 of + True -> blockQuote . transform + False -> transform +parStyleToTransform _ = return id -bodyPartToBlocks :: BodyPart -> DocxContext Blocks +bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) - | not $ null $ codeDivs `intersect` (pStyle pPr) = - return - $ parStyleToTransform pPr - $ codeBlock - $ concatMap parPartToString parparts + | not $ null $ codeDivs `intersect` (pStyle pPr) = do + transform <- parStyleToTransform pPr + return $ + transform $ + codeBlock $ + concatMap parPartToString parparts | Just (style, n) <- pHeading pPr = do - ils <- local (\s-> s{docxInHeaderBlock=True}) $ + ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) makeHeaderAnchor $ headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do - ils <- smushInlines <$> mapM parPartToInlines parparts >>= - (return . fromList . trimLineBreaks . normalizeSpaces . toList) + ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts + prevParaIls <- gets docxPrevPara dropIls <- gets docxDropCap let ils' = dropIls <> ils if dropCap pPr then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } - return $ case isNull ils' of - True -> mempty - _ -> parStyleToTransform pPr $ para ils' + let ils'' = prevParaIls <> + (if isNull prevParaIls then mempty else space) <> + ils' + opts <- asks docxOptions + case () of + + _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) -> + return mempty + _ | Just (TrackedChange Insertion _) <- pChange pPr + , AcceptChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = mempty} + transform <- parStyleToTransform pPr + return $ transform $ para ils'' + _ | Just (TrackedChange Insertion _) <- pChange pPr + , RejectChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = ils''} + return mempty + _ | Just (TrackedChange Insertion cInfo) <- pChange pPr + , AllChanges <- readerTrackChanges opts + , ChangeInfo _ cAuthor cDate <- cInfo -> do + let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) + insertMark = spanWith attr mempty + transform <- parStyleToTransform pPr + return $ transform $ + para $ ils'' <> insertMark + _ | Just (TrackedChange Deletion _) <- pChange pPr + , AcceptChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = ils''} + return mempty + _ | Just (TrackedChange Deletion _) <- pChange pPr + , RejectChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = mempty} + transform <- parStyleToTransform pPr + return $ transform $ para ils'' + _ | Just (TrackedChange Deletion cInfo) <- pChange pPr + , AllChanges <- readerTrackChanges opts + , ChangeInfo _ cAuthor cDate <- cInfo -> do + let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) + insertMark = spanWith attr mempty + transform <- parStyleToTransform pPr + return $ transform $ + para $ ils'' <> insertMark + _ | otherwise -> do + modify $ \s -> s {docxPrevPara = mempty} + transform <- parStyleToTransform pPr + return $ transform $ para ils'' bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do - let - kvs = case levelInfo of - (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] + -- We check whether this current numId has previously been used, + -- since Docx expects us to pick up where we left off. + listState <- gets docxListState + let startFromState = M.lookup (numId, lvl) listState + (_, fmt,txt, startFromLevelInfo) = levelInfo + start = case startFromState of + Just n -> n + 1 + Nothing -> fromMaybe 1 startFromLevelInfo + kvs = [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", show start) + ] + modify $ \st -> st{ docxListState = M.insert (numId, lvl) start listState} blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks -bodyPartToBlocks (ListItem pPr _ _ _ parparts) = - let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} +bodyPartToBlocks (ListItem pPr _ _ _ parparts) = + let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr} in bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty -bodyPartToBlocks (Tbl cap _ look (r:rs)) = do +bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do let caption = text cap (hdr, rows) = case firstRowFormatting look of True | null rs -> (Nothing, [r]) | otherwise -> (Just r, rs) False -> (Nothing, r:rs) - cells <- mapM rowToBlocksList rows + cells <- mapM rowToBlocksList rows - let width = case cells of - r':_ -> length r' - -- shouldn't happen - [] -> 0 + let width = maybe 0 maximum $ nonEmpty $ map rowLength parts + -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out + -- our own, see + -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155 + nonEmpty [] = Nothing + nonEmpty l = Just l + rowLength :: Row -> Int + rowLength (Row c) = length c hdrCells <- case hdr of Just r' -> rowToBlocksList r' @@ -592,36 +703,54 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do widths = replicate width 0 :: [Double] return $ table caption (zip alignments widths) hdrCells cells -bodyPartToBlocks (OMathPara e) = do +bodyPartToBlocks (OMathPara e) = return $ para $ displayMath (writeTeX e) -- replace targets with generated anchors. -rewriteLink' :: Inline -> DocxContext Inline +rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline rewriteLink' l@(Link attr ils ('#':target, title)) = do anchorMap <- gets docxAnchorMap - return $ case M.lookup target anchorMap of - Just newTarget -> (Link attr ils ('#':newTarget, title)) - Nothing -> l + case M.lookup target anchorMap of + Just newTarget -> do + modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)} + return $ Link attr ils ('#':newTarget, title) + Nothing -> do + modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)} + return l rewriteLink' il = return il -rewriteLinks :: [Block] -> DocxContext [Block] +rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block] rewriteLinks = mapM (walkM rewriteLink') -bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag, [String]) +removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline] +removeOrphanAnchors'' s@(Span (ident, classes, _) ils) + | "anchor" `elem` classes = do + anchorSet <- gets docxAnchorSet + return $ if ident `Set.member` anchorSet + then [s] + else ils +removeOrphanAnchors'' il = return [il] + +removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline] +removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils + +removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block] +removeOrphanAnchors = mapM (walkM removeOrphanAnchors') + +bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block]) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks - mediaBag <- gets docxMediaBag - warnings <- gets docxWarnings - return $ (meta, - blks', - mediaBag, - warnings) - -docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag, [String]) + blks'' <- removeOrphanAnchors blks' + return (meta, blks'') + +docxToOutput :: PandocMonad m + => ReaderOptions + -> Docx + -> m (Meta, [Block]) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 39e0df825..003265e6e 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,15 +1,16 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - PatternGuards #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE TypeSynonymInstances #-} module Text.Pandoc.Readers.Docx.Combine ( smushInlines , smushBlocks ) where -import Text.Pandoc.Builder import Data.List -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>)) +import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>)) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Builder data Modifier a = Modifier (a -> a) | AttrModifier (Attr -> a -> a) Attr @@ -35,16 +36,16 @@ spaceOutInlines ils = right = case viewr contents of (_ :> Space) -> space _ -> mempty in - (left, (stackInlines fs $ trimInlines . Many $ contents), right) + (left, stackInlines fs $ trimInlines . Many $ contents, right) stackInlines :: [Modifier Inlines] -> Inlines -> Inlines stackInlines [] ms = ms stackInlines (NullModifier : fs) ms = stackInlines fs ms -stackInlines ((Modifier f) : fs) ms = +stackInlines (Modifier f : fs) ms = if isEmpty ms then stackInlines fs ms else f $ stackInlines fs ms -stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms +stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) unstackInlines ms = case ilModifier ms of @@ -56,15 +57,15 @@ unstackInlines ms = case ilModifier ms of ilModifier :: Inlines -> Modifier Inlines ilModifier ils = case viewl (unMany ils) of (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript + (Emph _) -> Modifier emph + (Strong _) -> Modifier strong + (SmallCaps _) -> Modifier smallcaps + (Strikeout _) -> Modifier strikeout + (Superscript _) -> Modifier superscript + (Subscript _) -> Modifier subscript (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier + (Span attr _) -> AttrModifier spanWith attr + _ -> NullModifier _ -> NullModifier ilInnards :: Inlines -> Inlines @@ -78,25 +79,25 @@ ilInnards ils = case viewl (unMany ils) of (Subscript lst) -> fromList lst (Link _ lst _) -> fromList lst (Span _ lst) -> fromList lst - _ -> ils + _ -> ils _ -> ils inlinesL :: Inlines -> (Inlines, Inlines) inlinesL ils = case viewl $ unMany ils of (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) + _ -> (mempty, ils) inlinesR :: Inlines -> (Inlines, Inlines) inlinesR ils = case viewr $ unMany ils of (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) + _ -> (ils, mempty) combineInlines :: Inlines -> Inlines -> Inlines combineInlines x y = let (xs', x') = inlinesR x (y', ys') = inlinesL y in - xs' <> (combineSingletonInlines x' y') <> ys' + xs' <> combineSingletonInlines x' y' <> ys' combineSingletonInlines :: Inlines -> Inlines -> Inlines combineSingletonInlines x y = @@ -113,10 +114,10 @@ combineSingletonInlines x y = stackInlines (x_rem_attr ++ y_rem_attr) mempty | isEmpty xs -> let (sp, y') = spaceOutInlinesL y in - (stackInlines x_rem_attr mempty) <> sp <> y' + stackInlines x_rem_attr mempty <> sp <> y' | isEmpty ys -> let (x', sp) = spaceOutInlinesR x in - x' <> sp <> (stackInlines y_rem_attr mempty) + x' <> sp <> stackInlines y_rem_attr mempty | otherwise -> let (x', xsp) = spaceOutInlinesR x (ysp, y') = spaceOutInlinesL y @@ -129,15 +130,15 @@ combineSingletonInlines x y = combineBlocks :: Blocks -> Blocks -> Blocks combineBlocks bs cs - | bs' :> (BlockQuote bs'') <- viewr (unMany bs) - , (BlockQuote cs'') :< cs' <- viewl (unMany cs) = - Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs' + | bs' :> BlockQuote bs'' <- viewr (unMany bs) + , BlockQuote cs'' :< cs' <- viewl (unMany cs) = + Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs' combineBlocks bs cs = bs <> cs instance (Monoid a, Eq a) => Eq (Modifier a) where - (Modifier f) == (Modifier g) = (f mempty == g mempty) - (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) - (NullModifier) == (NullModifier) = True + (Modifier f) == (Modifier g) = f mempty == g mempty + (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty + NullModifier == NullModifier = True _ == _ = False isEmpty :: (Monoid a, Eq a) => a -> Bool diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs new file mode 100644 index 000000000..6eeb55d2f --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -0,0 +1,89 @@ +{- +Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Fields + Copyright : Copyright (C) 2014-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +For parsing Field definitions in instText tags, as described in +ECMA-376-1:2016, §17.16.5 -} + +module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) + , parseFieldInfo + ) where + +import Text.Parsec +import Text.Parsec.String (Parser) + +type URL = String + +data FieldInfo = HyperlinkField URL + | UnknownField + deriving (Show) + +parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo = parse fieldInfo "" + +fieldInfo :: Parser FieldInfo +fieldInfo = + try (HyperlinkField <$> hyperlink) + <|> + return UnknownField + +escapedQuote :: Parser String +escapedQuote = string "\\\"" + +inQuotes :: Parser String +inQuotes = + (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + +quotedString :: Parser String +quotedString = do + char '"' + concat <$> manyTill inQuotes (try (char '"')) + +unquotedString :: Parser String +unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) + +fieldArgument :: Parser String +fieldArgument = quotedString <|> unquotedString + +-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 +hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch = do + sw <- string "\\l" + spaces + farg <- fieldArgument + return (sw, farg) + +hyperlink :: Parser URL +hyperlink = do + many space + string "HYPERLINK" + spaces + farg <- fieldArgument + switches <- spaces *> many hyperlinkSwitch + let url = case switches of + ("\\l", s) : _ -> farg ++ ('#': s) + _ -> farg + return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 395a53907..c0f05094a 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Lists - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2018 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -33,38 +33,33 @@ module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets , listParagraphDivs ) where -import Text.Pandoc.JSON -import Text.Pandoc.Generic (bottomUp) -import Text.Pandoc.Shared (trim) -import Control.Monad import Data.List import Data.Maybe +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) isListItem :: Block -> Bool isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True -isListItem _ = False +isListItem _ = False getLevel :: Block -> Maybe Integer -getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs -getLevel _ = Nothing +getLevel (Div (_, _, kvs) _) = read <$> lookup "level" kvs +getLevel _ = Nothing getLevelN :: Block -> Integer -getLevelN b = case getLevel b of - Just n -> n - Nothing -> -1 +getLevelN b = fromMaybe (-1) (getLevel b) getNumId :: Block -> Maybe Integer -getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs -getNumId _ = Nothing +getNumId (Div (_, _, kvs) _) = read <$> lookup "num-id" kvs +getNumId _ = Nothing getNumIdN :: Block -> Integer -getNumIdN b = case getNumId b of - Just n -> n - Nothing -> -1 +getNumIdN b = fromMaybe (-1) (getNumId b) getText :: Block -> Maybe String getText (Div (_, _, kvs) _) = lookup "text" kvs -getText _ = Nothing +getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes @@ -109,27 +104,27 @@ listParagraphDivs = ["ListParagraph"] handleListParagraphs :: [Block] -> [Block] handleListParagraphs [] = [] handleListParagraphs ( - (Div attr1@(_, classes1, _) blks1) : - (Div (ident2, classes2, kvs2) blks2) : + Div attr1@(_, classes1, _) blks1 : + Div (ident2, classes2, kvs2) blks2 : blks ) | "list-item" `elem` classes1 && - not ("list-item" `elem` classes2) && + notElem "list-item" classes2 && (not . null) (listParagraphDivs `intersect` classes2) = -- We don't want to keep this indent. let newDiv2 = - (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2 in - handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) -handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + handleListParagraphs (Div attr1 (blks1 ++ [newDiv2]) : blks) +handleListParagraphs (blk:blks) = blk : handleListParagraphs blks separateBlocks' :: Block -> [[Block]] -> [[Block]] -separateBlocks' blk ([] : []) = [[blk]] -separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' blk [[]] = [[blk]] +separateBlocks' b@(BulletList _) acc = init acc ++ [last acc ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = init acc ++ [last acc ++ [b]] -- The following is for the invisible bullet lists. This is how -- pandoc-generated ooxml does multiparagraph item lists. -separateBlocks' b acc | liftM trim (getText b) == Just "" = - (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc | fmap trim (getText b) == Just "" = + init acc ++ [last acc ++ [b]] separateBlocks' b acc = acc ++ [[b]] separateBlocks :: [Block] -> [[Block]] @@ -138,63 +133,60 @@ separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) flatToBullets' :: Integer -> [Block] -> [Block] flatToBullets' _ [] = [] flatToBullets' num xs@(b : elems) - | getLevelN b == num = b : (flatToBullets' num elems) + | getLevelN b == num = b : flatToBullets' num elems | otherwise = let bNumId = getNumIdN b bLevel = getLevelN b (children, remaining) = span (\b' -> - ((getLevelN b') > bLevel || - ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + getLevelN b' > bLevel || + (getLevelN b' == bLevel && getNumIdN b' == bNumId)) xs in case getListType b of Just (Enumerated attr) -> - (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) + OrderedList attr (separateBlocks $ flatToBullets' bLevel children) : + flatToBullets' num remaining _ -> - (BulletList (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) + BulletList (separateBlocks $ flatToBullets' bLevel children) : + flatToBullets' num remaining flatToBullets :: [Block] -> [Block] flatToBullets elems = flatToBullets' (-1) elems singleItemHeaderToHeader :: Block -> Block -singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h -singleItemHeaderToHeader blk = blk +singleItemHeaderToHeader (OrderedList _ [[h@Header{}]]) = h +singleItemHeaderToHeader blk = blk blocksToBullets :: [Block] -> [Block] blocksToBullets blks = map singleItemHeaderToHeader $ - bottomUp removeListDivs $ - flatToBullets $ (handleListParagraphs blks) + bottomUp removeListDivs $flatToBullets (handleListParagraphs blks) plainParaInlines :: Block -> [Inline] plainParaInlines (Plain ils) = ils -plainParaInlines (Para ils) = ils -plainParaInlines _ = [] +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] blocksToDefinitions' [] acc [] = reverse acc blocksToDefinitions' defAcc acc [] = - reverse $ (DefinitionList (reverse defAcc)) : acc + reverse $ DefinitionList (reverse defAcc) : acc blocksToDefinitions' defAcc acc - ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + (Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks) | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - pair = case remainingAttr2 == ("", [], []) of - True -> (concatMap plainParaInlines blks1, [blks2]) - False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) in blocksToDefinitions' (pair : defAcc) acc blks blocksToDefinitions' defAcc acc - ((Div (ident2, classes2, kvs2) blks2) : blks) + (Div (ident2, classes2, kvs2) blks2 : blks) | (not . null) defAcc && "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 + True -> blks2 False -> [Div remainingAttr2 blks2] ((defTerm, defItems):defs) = defAcc defAcc' = case null defItems of @@ -205,18 +197,18 @@ blocksToDefinitions' defAcc acc blocksToDefinitions' [] acc (b:blks) = blocksToDefinitions' [] (b:acc) blks blocksToDefinitions' defAcc acc (b:blks) = - blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + blocksToDefinitions' [] (b : DefinitionList (reverse defAcc) : acc) blks removeListDivs' :: Block -> [Block] removeListDivs' (Div (ident, classes, kvs) blks) | "list-item" `elem` classes = case delete "list-item" classes of - [] -> blks - classes' -> [Div (ident, classes', kvs) $ blks] + [] -> blks + classes' -> [Div (ident, classes', kvs) blks] removeListDivs' (Div (ident, classes, kvs) blks) | not $ null $ listParagraphDivs `intersect` classes = case classes \\ listParagraphDivs of - [] -> blks + [] -> blks classes' -> [Div (ident, classes', kvs) blks] removeListDivs' blk = [blk] diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index deb2caccf..1f7f07e36 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2018 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -49,28 +51,34 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ParagraphStyle(..) , Row(..) , Cell(..) + , TrackedChange(..) + , ChangeType(..) + , ChangeInfo(..) + , FieldInfo(..) , archiveToDocx , archiveToDocxWithWarnings ) where import Codec.Archive.Zip -import Text.XML.Light -import Data.Maybe -import Data.List -import System.FilePath +import Control.Applicative ((<|>)) +import Control.Monad.Except +import Control.Monad.Reader +import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad.Reader -import Control.Monad.State -import Control.Applicative ((<|>)) +import Data.Char (chr, ord, readLitChar) +import Data.List import qualified Data.Map as M -import Control.Monad.Except -import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive) -import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..)) -import Text.TeXMath (Exp) +import Data.Maybe +import System.FilePath import Text.Pandoc.Readers.Docx.Util -import Data.Char (readLitChar, ord, chr, isDigit) +import Text.Pandoc.Readers.Docx.Fields +import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.TeXMath (Exp) +import Text.TeXMath.Readers.OMML (readOMML) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.XML.Light +import qualified Text.XML.Light.Cursor as XMLC data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -84,10 +92,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] } +data ReaderState = ReaderState { stateWarnings :: [String] + , stateFldCharState :: FldCharState + } deriving Show -data DocxError = DocxError | WrongElem +data FldCharState = FldCharOpen + | FldCharFieldInfo FieldInfo + | FldCharContent FieldInfo [Run] + | FldCharClosed + deriving (Show) + +data DocxError = DocxError + | WrongElem deriving Show type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) @@ -97,7 +114,7 @@ runD dx re rs = runState (runReaderT (runExceptT dx) re) rs maybeToD :: Maybe a -> D a maybeToD (Just a) = return a -maybeToD Nothing = throwError DocxError +maybeToD Nothing = throwError DocxError eitherToD :: Either a b -> D b eitherToD (Right b) = return b @@ -115,6 +132,36 @@ mapD f xs = in concatMapM handler xs +unwrapSDT :: NameSpaces -> Content -> [Content] +unwrapSDT ns (Elem element) + | isElem ns "w" "sdt" element + , Just sdtContent <- findChildByName ns "w" "sdtContent" element + = concatMap (unwrapSDT ns) $ map Elem $ elChildren sdtContent +unwrapSDT _ content = [content] + +unwrapSDTchild :: NameSpaces -> Content -> Content +unwrapSDTchild ns (Elem element) = + Elem $ element { elContent = concatMap (unwrapSDT ns) (elContent element) } +unwrapSDTchild _ content = content + +walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor +walkDocument' ns cur = + let modifiedCur = XMLC.modifyContent (unwrapSDTchild ns) cur + in + case XMLC.nextDF modifiedCur of + Just cur' -> walkDocument' ns cur' + Nothing -> XMLC.root modifiedCur + +walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument ns element = + let cur = XMLC.fromContent (Elem element) + cur' = walkDocument' ns cur + in + case XMLC.toTree cur' of + Elem element' -> Just element' + _ -> Nothing + + data Docx = Docx Document deriving Show @@ -160,17 +207,27 @@ data Notes = Notes NameSpaces data Comments = Comments NameSpaces (M.Map String Element) deriving Show -data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer - , rightParIndent :: Maybe Integer +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer , hangingParIndent :: Maybe Integer} deriving Show -data ParagraphStyle = ParagraphStyle { pStyle :: [String] +data ChangeType = Insertion | Deletion + deriving Show + +data ChangeInfo = ChangeInfo ChangeId Author ChangeDate + deriving Show + +data TrackedChange = TrackedChange ChangeType ChangeInfo + deriving Show + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool , pHeading :: Maybe (String, Int) , pNumInfo :: Maybe (String, String) , pBlockQuote :: Maybe Bool + , pChange :: Maybe TrackedChange } deriving Show @@ -181,6 +238,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , pHeading = Nothing , pNumInfo = Nothing , pBlockQuote = Nothing + , pChange = Nothing } @@ -208,8 +266,7 @@ data Cell = Cell [BodyPart] type Extent = Maybe (Double, Double) data ParPart = PlainRun Run - | Insertion ChangeId Author ChangeDate [Run] - | Deletion ChangeId Author ChangeDate [Run] + | ChangedRuns TrackedChange [Run] | CommentStart CommentId Author CommentDate [BodyPart] | CommentEnd CommentId | BookMark BookMarkId Anchor @@ -218,6 +275,10 @@ data ParPart = PlainRun Run | Drawing FilePath String String B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] + | SmartTag [Run] + | Field FieldInfo [Run] + | NullParPart -- when we need to return nothing, but + -- not because of an error. deriving Show data Run = Run RunStyle [RunElem] @@ -233,19 +294,19 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool +data RunStyle = RunStyle { isBold :: Maybe Bool + , isItalic :: Maybe Bool , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rStyle :: Maybe CharStyle} + , isStrike :: Maybe Bool + , rVertAlign :: Maybe VertAlign + , rUnderline :: Maybe String + , rStyle :: Maybe CharStyle} deriving Show -data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) +data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) , isBlockQuote :: Maybe Bool - , numInfo :: Maybe (String, String) - , psStyle :: Maybe ParStyle} + , numInfo :: Maybe (String, String) + , psStyle :: Maybe ParStyle} deriving Show defaultRunStyle :: RunStyle @@ -281,7 +342,9 @@ archiveToDocxWithWarnings archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument - rState = ReaderState { stateWarnings = [] } + rState = ReaderState { stateWarnings = [] + , stateFldCharState = FldCharClosed + } (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState case eitherDoc of Right doc -> Right (Docx doc, stateWarnings st) @@ -294,14 +357,14 @@ archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry let namespaces = elemToNameSpaces docElem - bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem - body <- elemToBody namespaces bodyElem + bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem + let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) + body <- elemToBody namespaces bodyElem' return $ Document namespaces body elemToBody :: NameSpaces -> Element -> D Body elemToBody ns element | isElem ns "w" "body" element = - mapD (elemToBodyPart ns) (elChildren element) >>= - (\bps -> return $ Body bps) + fmap Body (mapD (elemToBodyPart ns) (elChildren element)) elemToBody _ _ = throwError WrongElem archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) @@ -322,15 +385,15 @@ archiveToStyles zf = isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle | isElem ns "w" "style" element - , Just styleType <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle - , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>= - findAttr (elemName ns "w" "val") - , Just ps <- parentStyle = (basedOnVal == getStyleId ps) + , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= + findAttrByName ns "w" "val" + , Just ps <- parentStyle = basedOnVal == getStyleId ps | isElem ns "w" "style" element - , Just styleType <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle - , Nothing <- findChild (elemName ns "w" "basedOn") element + , Nothing <- findChildByName ns "w" "basedOn" element , Nothing <- parentStyle = True | otherwise = False @@ -343,8 +406,8 @@ instance ElemToStyle CharStyle where cStyleType _ = "character" elemToStyle ns element parentStyle | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element - , Just styleId <- findAttr (elemName ns "w" "styleId") element = + , Just "character" <- findAttrByName ns "w" "type" element + , Just styleId <- findAttrByName ns "w" "styleId" element = Just (styleId, elemToRunStyle ns element parentStyle) | otherwise = Nothing getStyleId s = fst s @@ -353,8 +416,8 @@ instance ElemToStyle ParStyle where cStyleType _ = "paragraph" elemToStyle ns element parentStyle | isElem ns "w" "style" element - , Just "paragraph" <- findAttr (elemName ns "w" "type") element - , Just styleId <- findAttr (elemName ns "w" "styleId") element = + , Just "paragraph" <- findAttrByName ns "w" "type" element + , Just styleId <- findAttrByName ns "w" "styleId" element = Just (styleId, elemToParStyleData ns element parentStyle) | otherwise = Nothing getStyleId s = fst s @@ -368,10 +431,10 @@ getStyleChildren ns element parentStyle buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] buildBasedOnList ns element rootStyle = - case (getStyleChildren ns element rootStyle) of + case getStyleChildren ns element rootStyle of [] -> [] stys -> stys ++ - (concatMap (\s -> buildBasedOnList ns element (Just s)) stys) + concatMap (buildBasedOnList ns element . Just) stys archiveToNotes :: Archive -> Notes archiveToNotes zf = @@ -380,14 +443,14 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> elemToNameSpaces e + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> elemToNameSpaces e + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") + fn = fnElem >>= elemToNotes ns "footnote" + en = enElem >>= elemToNotes ns "endnote" in Notes ns fn en @@ -396,19 +459,19 @@ archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) cmts_namespaces = case cmtsElem of - Just e -> elemToNameSpaces e + Just e -> elemToNameSpaces e Nothing -> [] - cmts = (elemToComments cmts_namespaces) <$> cmtsElem + cmts = elemToComments cmts_namespaces <$> cmtsElem in case cmts of - Just c -> Comments cmts_namespaces c + Just c -> Comments cmts_namespaces c Nothing -> Comments cmts_namespaces M.empty filePathToRelType :: FilePath -> Maybe DocumentLocation -filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote -filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote -filePathToRelType _ = Nothing +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = @@ -439,24 +502,23 @@ lookupLevel :: String -> String -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl + lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttr (elemName ns "w" "numId") element - absNumId <- findChild (elemName ns "w" "abstractNumId") element - >>= findAttr (elemName ns "w" "val") + numId <- findAttrByName ns "w" "numId" element + absNumId <- findChildByName ns "w" "abstractNumId" element + >>= findAttrByName ns "w" "val" return $ Numb numId absNumId numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttr (elemName ns "w" "abstractNumId") element - let levelElems = findChildren (elemName ns "w" "lvl") element + absNumId <- findAttrByName ns "w" "abstractNumId" element + let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels absNumElemToAbsNum _ _ = Nothing @@ -464,26 +526,26 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttr (elemName ns "w" "ilvl") element - fmt <- findChild (elemName ns "w" "numFmt") element - >>= findAttr (elemName ns "w" "val") - txt <- findChild (elemName ns "w" "lvlText") element - >>= findAttr (elemName ns "w" "val") - let start = findChild (elemName ns "w" "start") element - >>= findAttr (elemName ns "w" "val") + ilvl <- findAttrByName ns "w" "ilvl" element + fmt <- findChildByName ns "w" "numFmt" element + >>= findAttrByName ns "w" "val" + txt <- findChildByName ns "w" "lvlText" element + >>= findAttrByName ns "w" "val" + let start = findChildByName ns "w" "start" element + >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) return (ilvl, fmt, txt, start) levelElemToLevel _ _ = Nothing archiveToNumbering' :: Archive -> Maybe Numbering -archiveToNumbering' zf = do +archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry let namespaces = elemToNameSpaces numberingElem - numElems = findChildren (elemName namespaces "w" "num") numberingElem - absNumElems = findChildren (elemName namespaces "w" "abstractNum") numberingElem + numElems = findChildrenByName namespaces "w" "num" numberingElem + absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem nums = mapMaybe (numElemToNum namespaces) numElems absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems return $ Numbering namespaces nums absNums @@ -496,22 +558,23 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) elemToNotes ns notetype element | isElem ns "w" (notetype ++ "s") element = let pairs = mapMaybe - (\e -> findAttr (elemName ns "w" "id") e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) - (findChildren (elemName ns "w" notetype) element) + (findChildrenByName ns "w" notetype element) in - Just $ M.fromList $ pairs + Just $ + M.fromList pairs elemToNotes _ _ _ = Nothing elemToComments :: NameSpaces -> Element -> M.Map String Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttr (elemName ns "w" "id") e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) - (findChildren (elemName ns "w" "comment") element) + (findChildrenByName ns "w" "comment" element) in - M.fromList $ pairs + M.fromList pairs elemToComments _ _ = M.empty @@ -520,16 +583,16 @@ elemToComments _ _ = M.empty elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = - let cols = findChildren (elemName ns "w" "gridCol") element + let cols = findChildrenByName ns "w" "gridCol" element in - mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger)) + mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger)) cols elemToTblGrid _ _ = throwError WrongElem elemToTblLook :: NameSpaces -> Element -> D TblLook elemToTblLook ns element | isElem ns "w" "tblLook" element = - let firstRow = findAttr (elemName ns "w" "firstRow") element - val = findAttr (elemName ns "w" "val") element + let firstRow = findAttrByName ns "w" "firstRow" element + val = findAttrByName ns "w" "val" element firstRowFmt = case firstRow of Just "1" -> True @@ -538,13 +601,13 @@ elemToTblLook ns element | isElem ns "w" "tblLook" element = Just bitMask -> testBitMask bitMask 0x020 Nothing -> False in - return $ TblLook{firstRowFormatting = firstRowFmt} + return TblLook{firstRowFormatting = firstRowFmt} elemToTblLook _ _ = throwError WrongElem elemToRow :: NameSpaces -> Element -> D Row elemToRow ns element | isElem ns "w" "tr" element = do - let cellElems = findChildren (elemName ns "w" "tc") element + let cellElems = findChildrenByName ns "w" "tc" element cells <- mapD (elemToCell ns) cellElems return $ Row cells elemToRow _ _ = throwError WrongElem @@ -558,15 +621,15 @@ elemToCell _ _ = throwError WrongElem elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation elemToParIndentation ns element | isElem ns "w" "ind" element = - Just $ ParIndentation { + Just ParIndentation { leftParIndent = - findAttr (elemName ns "w" "left") element >>= + findAttrByName ns "w" "left" element >>= stringToInteger , rightParIndent = - findAttr (elemName ns "w" "right") element >>= + findAttrByName ns "w" "right" element >>= stringToInteger , hangingParIndent = - findAttr (elemName ns "w" "hanging") element >>= + findAttrByName ns "w" "hanging" element >>= stringToInteger} elemToParIndentation _ _ = Nothing @@ -574,7 +637,7 @@ testBitMask :: String -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) + ((n', _) : _) -> (n' .|. n) /= 0 stringToInteger :: String -> Maybe Integer stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) @@ -582,7 +645,7 @@ stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element - , (c:_) <- findChildren (elemName ns "m" "oMathPara") element = + , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do expsLst <- eitherToD $ readOMML $ showElement c return $ OMathPara expsLst @@ -610,17 +673,17 @@ elemToBodyPart ns element _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do - let caption' = findChild (elemName ns "w" "tblPr") element - >>= findChild (elemName ns "w" "tblCaption") - >>= findAttr (elemName ns "w" "val") + let caption' = findChildByName ns "w" "tblPr" element + >>= findChildByName ns "w" "tblCaption" + >>= findAttrByName ns "w" "val" caption = (fromMaybe "" caption') - grid' = case findChild (elemName ns "w" "tblGrid") element of - Just g -> elemToTblGrid ns g + grid' = case findChildByName ns "w" "tblGrid" element of + Just g -> elemToTblGrid ns g Nothing -> return [] - tblLook' = case findChild (elemName ns "w" "tblPr") element >>= - findChild (elemName ns "w" "tblLook") + tblLook' = case findChildByName ns "w" "tblPr" element >>= + findChildByName ns "w" "tblLook" of - Just l -> elemToTblLook ns l + Just l -> elemToTblLook ns l Nothing -> return defaultTblLook grid <- grid' @@ -649,26 +712,22 @@ expandDrawingId s = do getTitleAndAlt :: NameSpaces -> Element -> (String, String) getTitleAndAlt ns element = - let mbDocPr = findChild (elemName ns "wp" "inline") element >>= - findChild (elemName ns "wp" "docPr") - title = case mbDocPr >>= findAttr (elemName ns "" "title") of - Just title' -> title' - Nothing -> "" - alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of - Just alt' -> alt' - Nothing -> "" + let mbDocPr = findChildByName ns "wp" "inline" element >>= + findChildByName ns "wp" "docPr" + title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart elemToParPart ns element | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element + , Just drawingElem <- findChildByName ns "w" "drawing" element , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (elemName ns "r" "embed") + >>= findAttrByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -676,9 +735,9 @@ elemToParPart ns element -- The below is an attempt to deal with images in deprecated vml format. elemToParPart ns element | isElem ns "w" "r" element - , Just _ <- findChild (elemName ns "w" "pict") element = + , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttr (elemName ns "r" "id") + >>= findAttrByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -687,86 +746,148 @@ elemToParPart ns element -- Chart elemToParPart ns element | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element + , Just drawingElem <- findChildByName ns "w" "drawing" element , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem - = return Chart + = return Chart +{- +The next one is a bit complicated. fldChar fields work by first +having a <w:fldChar fldCharType="begin"> in a run, then a run with +<w:instrText>, then a <w:fldChar fldCharType="separate"> run, then the +content runs, and finally a <w:fldChar fldCharType="end"> run. For +example (omissions and my comments in brackets): + + <w:r> + [...] + <w:fldChar w:fldCharType="begin"/> + </w:r> + <w:r> + [...] + <w:instrText xml:space="preserve"> HYPERLINK [hyperlink url] </w:instrText> + </w:r> + <w:r> + [...] + <w:fldChar w:fldCharType="separate"/> + </w:r> + <w:r w:rsidRPr=[...]> + [...] + <w:t>Foundations of Analysis, 2nd Edition</w:t> + </w:r> + <w:r> + [...] + <w:fldChar w:fldCharType="end"/> + </w:r> + +So we do this in a number of steps. If we encounter the fldchar begin +tag, we start open a fldchar state variable (see state above). We add +the instrtext to it as FieldInfo. Then we close that and start adding +the runs when we get to separate. Then when we get to end, we produce +the Field type with approriate FieldInfo and Runs. +-} elemToParPart ns element - | isElem ns "w" "r" element = - elemToRun ns element >>= (\r -> return $ PlainRun r) + | isElem ns "w" "r" element + , Just fldChar <- findChildByName ns "w" "fldChar" element + , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharClosed | fldCharType == "begin" -> do + modify $ \st -> st {stateFldCharState = FldCharOpen} + return NullParPart + FldCharFieldInfo info | fldCharType == "separate" -> do + modify $ \st -> st {stateFldCharState = FldCharContent info []} + return NullParPart + FldCharContent info runs | fldCharType == "end" -> do + modify $ \st -> st {stateFldCharState = FldCharClosed} + return $ Field info $ reverse runs + _ -> throwError WrongElem elemToParPart ns element - | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttr (elemName ns "w" "id") element - , Just cAuthor <- findAttr (elemName ns "w" "author") element - , Just cDate <- findAttr (elemName ns "w" "date") element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ Insertion cId cAuthor cDate runs + | isElem ns "w" "r" element + , Just instrText <- findChildByName ns "w" "instrText" element = do + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharOpen -> do + info <- eitherToD $ parseFieldInfo $ strContent instrText + modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} + return NullParPart + _ -> return NullParPart elemToParPart ns element - | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttr (elemName ns "w" "id") element - , Just cAuthor <- findAttr (elemName ns "w" "author") element - , Just cDate <- findAttr (elemName ns "w" "date") element = do + | isElem ns "w" "r" element = do + run <- elemToRun ns element + -- we check to see if we have an open FldChar in state that we're + -- recording. + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharContent info runs -> do + modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)} + return NullParPart + _ -> return $ PlainRun run +elemToParPart ns element + | Just change <- getTrackedChange ns element = do + runs <- mapD (elemToRun ns) (elChildren element) + return $ ChangedRuns change runs +elemToParPart ns element + | isElem ns "w" "smartTag" element = do runs <- mapD (elemToRun ns) (elChildren element) - return $ Deletion cId cAuthor cDate runs + return $ SmartTag runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttr (elemName ns "w" "id") element - , Just bmName <- findAttr (elemName ns "w" "name") element = + , Just bmId <- findAttrByName ns "w" "id" element + , Just bmName <- findAttrByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttr (elemName ns "r" "id") element = do + , Just relId <- findAttrByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of - Just target -> do - case findAttr (elemName ns "w" "anchor") element of + Just target -> + case findAttrByName ns "w" "anchor" element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttr (elemName ns "w" "anchor") element = do + , Just anchor <- findAttrByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttr (elemName ns "w" "id") element = do + , Just cmtId <- findAttrByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem - Nothing -> throwError WrongElem + Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttr (elemName ns "w" "id") element = + , Just cmtId <- findAttrByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) + fmap PlainOMath (eitherToD $ readOMML $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttr (elemName ns "w" "id") element - , Just cmtAuthor <- findAttr (elemName ns "w" "author") element - , Just cmtDate <- findAttr (elemName ns "w" "date") element = do + , Just cmtId <- findAttrByName ns "w" "id" element + , Just cmtAuthor <- findAttrByName ns "w" "author" element + , Just cmtDate <- findAttrByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem lookupFootnote :: String -> Notes -> Maybe Element -lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s) +lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s lookupEndnote :: String -> Notes -> Maybe Element -lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s) +lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent elemToExtent drawingElem = case (getDim "cx", getDim "cy") of (Just w, Just h) -> Just (w, h) - _ -> Nothing + _ -> Nothing where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem @@ -794,7 +915,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttr (elemName ns "w" "id") element = do + , Just fnId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -802,7 +923,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttr (elemName ns "w" "id") element = do + , Just enId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -813,8 +934,8 @@ childElemToRun _ _ = throwError WrongElem elemToRun :: NameSpaces -> Element -> D Run elemToRun ns element | isElem ns "w" "r" element - , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element = - do let choices = findChildren (elemName ns "mc" "Choice") altCont + , Just altCont <- findChildByName ns "mc" "AlternateContent" element = + do let choices = findChildrenByName ns "mc" "Choice" altCont choiceChildren = map head $ filter (not . null) $ map elChildren choices outputs <- mapD (childElemToRun ns) choiceChildren case outputs of @@ -822,15 +943,15 @@ elemToRun ns element [] -> throwError WrongElem elemToRun ns element | isElem ns "w" "r" element - , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + , Just drawingElem <- findChildByName ns "w" "drawing" element = childElemToRun ns drawingElem elemToRun ns element | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "footnoteReference") element = + , Just ref <- findChildByName ns "w" "footnoteReference" element = childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element - , Just ref <- findChild (elemName ns "w" "endnoteReference") element = + , Just ref <- findChildByName ns "w" "endnoteReference" element = childElemToRun ns ref elemToRun ns element | isElem ns "w" "r" element = do @@ -854,22 +975,37 @@ getParStyleField field stylemap styles = Just y getParStyleField _ _ _ = Nothing +getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange +getTrackedChange ns element + | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , Just cDate <- findAttrByName ns "w" "date" element = + Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) +getTrackedChange ns element + | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , Just cDate <- findAttrByName ns "w" "date" element = + Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) +getTrackedChange _ _ = Nothing + elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle elemToParagraphStyle ns element sty - | Just pPr <- findChild (elemName ns "w" "pPr") element = + | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (findAttr (elemName ns "w" "val")) - (findChildren (elemName ns "w" "pStyle") pPr) + (findAttrByName ns "w" "val") + (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = style , indentation = - findChild (elemName ns "w" "ind") pPr >>= + findChildByName ns "w" "ind" pPr >>= elemToParIndentation ns , dropCap = case - findChild (elemName ns "w" "framePr") pPr >>= - findAttr (elemName ns "w" "dropCap") + findChildByName ns "w" "framePr" pPr >>= + findAttrByName ns "w" "dropCap" of Just "none" -> False Just _ -> True @@ -877,13 +1013,20 @@ elemToParagraphStyle ns element sty , pHeading = getParStyleField headingLev sty style , pNumInfo = getParStyleField numInfo sty style , pBlockQuote = getParStyleField isBlockQuote sty style + , pChange = findChildByName ns "w" "rPr" pPr >>= + filterChild (\e -> isElem ns "w" "ins" e || + isElem ns "w" "moveTo" e || + isElem ns "w" "del" e || + isElem ns "w" "moveFrom" e + ) >>= + getTrackedChange ns } elemToParagraphStyle _ _ _ = defaultParagraphStyle checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag | Just t <- findChild tag rPr - , Just val <- findAttr (elemName ns "w" "val") t = + , Just val <- findAttrByName ns "w" "val" t = Just $ case val of "true" -> True "false" -> False @@ -897,11 +1040,11 @@ checkOnOff _ _ _ = Nothing elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element - | Just rPr <- findChild (elemName ns "w" "rPr") element = do + | Just rPr <- findChildByName ns "w" "rPr" element = do charStyles <- asks envCharStyles let parentSty = case - findChild (elemName ns "w" "rStyle") rPr >>= - findAttr (elemName ns "w" "val") + findChildByName ns "w" "rStyle" rPr >>= + findAttrByName ns "w" "val" of Just styName | Just style <- M.lookup styName charStyles -> Just (styName, style) @@ -911,7 +1054,7 @@ elemToRunStyleD _ _ = return defaultRunStyle elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle elemToRunStyle ns element parentStyle - | Just rPr <- findChild (elemName ns "w" "rPr") element = + | Just rPr <- findChildByName ns "w" "rPr" element = RunStyle { isBold = checkOnOff ns rPr (elemName ns "w" "b") @@ -919,32 +1062,31 @@ elemToRunStyle ns element parentStyle , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") , rVertAlign = - findChild (elemName ns "w" "vertAlign") rPr >>= - findAttr (elemName ns "w" "val") >>= + findChildByName ns "w" "vertAlign" rPr >>= + findAttrByName ns "w" "val" >>= \v -> Just $ case v of "superscript" -> SupScrpt "subscript" -> SubScrpt _ -> BaseLn , rUnderline = - findChild (elemName ns "w" "u") rPr >>= - findAttr (elemName ns "w" "val") + findChildByName ns "w" "u" rPr >>= + findAttrByName ns "w" "val" , rStyle = parentStyle } elemToRunStyle _ _ _ = defaultRunStyle -isNumericNotNull :: String -> Bool -isNumericNotNull str = (str /= []) && (all isDigit str) - getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) getHeaderLevel ns element - | Just styleId <- findAttr (elemName ns "w" "styleId") element + | Just styleId <- findAttrByName ns "w" "styleId" element , Just index <- stripPrefix "Heading" styleId - , isNumericNotNull index = Just (styleId, read index) - | Just styleId <- findAttr (elemName ns "w" "styleId") element - , Just index <- findChild (elemName ns "w" "name") element >>= - findAttr (elemName ns "w" "val") >>= + , Just n <- stringToInteger index + , n > 0 = Just (styleId, fromInteger n) + | Just styleId <- findAttrByName ns "w" "styleId" element + , Just index <- findChildByName ns "w" "name" element >>= + findAttrByName ns "w" "val" >>= stripPrefix "heading " - , isNumericNotNull index = Just (styleId, read index) + , Just n <- stringToInteger index + , n > 0 = Just (styleId, fromInteger n) getHeaderLevel _ _ = Nothing blockQuoteStyleIds :: [String] @@ -955,23 +1097,23 @@ blockQuoteStyleNames = ["Quote", "Block Text"] getBlockQuote :: NameSpaces -> Element -> Maybe Bool getBlockQuote ns element - | Just styleId <- findAttr (elemName ns "w" "styleId") element + | Just styleId <- findAttrByName ns "w" "styleId" element , styleId `elem` blockQuoteStyleIds = Just True - | Just styleName <- findChild (elemName ns "w" "name") element >>= - findAttr (elemName ns "w" "val") + | Just styleName <- findChildByName ns "w" "name" element >>= + findAttrByName ns "w" "val" , styleName `elem` blockQuoteStyleNames = Just True getBlockQuote _ _ = Nothing getNumInfo :: NameSpaces -> Element -> Maybe (String, String) getNumInfo ns element = do - let numPr = findChild (elemName ns "w" "pPr") element >>= - findChild (elemName ns "w" "numPr") + let numPr = findChildByName ns "w" "pPr" element >>= + findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= - findChild (elemName ns "w" "ilvl") >>= - findAttr (elemName ns "w" "val")) + findChildByName ns "w" "ilvl" >>= + findAttrByName ns "w" "val") numId <- numPr >>= - findChild (elemName ns "w" "numId") >>= - findAttr (elemName ns "w" "val") + findChildByName ns "w" "numId" >>= + findAttrByName ns "w" "val" return (numId, lvl) @@ -1015,10 +1157,10 @@ getSymChar ns element let [(char, _)] = readLitChar ("\\x" ++ s) in TextRun . maybe "" (:[]) $ getUnicode font char where - getCodepoint = findAttr (elemName ns "w" "char") element - getFont = stringToFont =<< findAttr (elemName ns "w" "font") element + getCodepoint = findAttrByName ns "w" "char" element + getFont = stringToFont =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs - lowerFromPrivate xs = xs + lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" elemToRunElems :: NameSpaces -> Element -> D [RunElem] @@ -1029,11 +1171,9 @@ elemToRunElems ns element let font = do fontElem <- findElement (qualName "rFonts") element stringToFont =<< - (foldr (<|>) Nothing $ - map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"]) + foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem setFont :: Maybe Font -> ReaderEnv -> ReaderEnv setFont f s = s{envFont = f} - diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 00906cf07..b32a73770 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -7,11 +7,11 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , hasStyleName ) where -import Text.XML.Light -import Text.Pandoc.Readers.Docx.Util -import Control.Monad.State -import Data.Char (toLower) -import qualified Data.Map as M +import Control.Monad.State.Strict +import Data.Char (toLower) +import qualified Data.Map as M +import Text.Pandoc.Readers.Docx.Util +import Text.XML.Light newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) newtype CharStyleMap = CharStyleMap ( M.Map String String ) @@ -30,7 +30,7 @@ instance StyleMap CharStyleMap where insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a insert (Just k) (Just v) m = alterMap (M.insert k v) m -insert _ _ m = m +insert _ _ m = m getStyleId :: (StyleMap a) => String -> a -> String getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index 33d69ccf3..d9d65bc07 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -3,10 +3,13 @@ module Text.Pandoc.Readers.Docx.Util ( , elemName , isElem , elemToNameSpaces + , findChildByName + , findChildrenByName + , findAttrByName ) where -import Text.XML.Light import Data.Maybe (mapMaybe) +import Text.XML.Light type NameSpaces = [(String, String)] @@ -15,7 +18,7 @@ elemToNameSpaces = mapMaybe attrToNSPair . elAttribs attrToNSPair :: Attr -> Maybe (String, String) attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing +attrToNSPair _ = Nothing elemName :: NameSpaces -> String -> String -> QName elemName ns prefix name = @@ -23,5 +26,21 @@ elemName ns prefix name = isElem :: NameSpaces -> String -> String -> Element -> Bool isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == lookup prefix ns + let ns' = ns ++ elemToNameSpaces element + in qName (elName element) == name && + qURI (elName element) == lookup prefix ns' + +findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element +findChildByName ns pref name el = + let ns' = ns ++ elemToNameSpaces el + in findChild (elemName ns' pref name) el + +findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element] +findChildrenByName ns pref name el = + let ns' = ns ++ elemToNameSpaces el + in findChildren (elemName ns' pref name) el + +findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String +findAttrByName ns pref name el = + let ns' = ns ++ elemToNameSpaces el + in findAttr (elemName ns' pref name) el diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 4c31bf1ae..3b13bbe13 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,55 +1,54 @@ -{-# LANGUAGE - ViewPatterns - , StandaloneDeriving - , TupleSections - , FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} + +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} module Text.Pandoc.Readers.EPUB (readEPUB) where -import Text.XML.Light +import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, + toArchiveOrFail) +import Control.DeepSeq (NFData, deepseq) +import Control.Monad (guard, liftM) +import Control.Monad.Except (throwError) +import qualified Data.ByteString.Lazy as BL (ByteString) +import Data.List (isInfixOf, isPrefixOf) +import qualified Data.Map as M (Map, elems, fromList, lookup) +import Data.Maybe (fromMaybe, mapMaybe) +import Data.Monoid ((<>)) +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TL +import Network.URI (unEscapeString) +import System.FilePath (dropFileName, dropFileName, normalise, splitFileName, + takeFileName, (</>)) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, insertMedia) import Text.Pandoc.Definition hiding (Attr) -import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Error -import Text.Pandoc.Walk (walk, query) -import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) -import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) -import Network.URI (unEscapeString) -import Text.Pandoc.MediaBag (MediaBag, insertMedia) -import Control.Monad.Except (MonadError, throwError, runExcept, Except) +import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension) import Text.Pandoc.MIME (MimeType) -import qualified Text.Pandoc.Builder as B -import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry - , findEntryByPath, Entry) -import qualified Data.ByteString.Lazy as BL (ByteString) -import System.FilePath ( takeFileName, (</>), dropFileName, normalise - , dropFileName - , splitFileName ) +import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) -import Control.Monad (guard, liftM, when) -import Data.List (isPrefixOf, isInfixOf) -import Data.Maybe (mapMaybe, fromMaybe) -import qualified Data.Map as M (Map, lookup, fromList, elems) -import Data.Monoid ((<>)) -import Control.DeepSeq (deepseq, NFData) - -import Debug.Trace (trace) +import Text.Pandoc.Walk (query, walk) +import Text.XML.Light type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) +readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc readEPUB opts bytes = case toArchiveOrFail bytes of - Right archive -> runEPUB $ archiveToEPUB opts $ archive - Left _ -> Left $ ParseFailure "Couldn't extract ePub file" + Right archive -> archiveToEPUB opts archive + Left _ -> throwError $ PandocParseError "Couldn't extract ePub file" -runEPUB :: Except PandocError a -> Either PandocError a -runEPUB = runExcept +-- runEPUB :: Except PandocError a -> Either PandocError a +-- runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -62,40 +61,36 @@ archiveToEPUB os archive = do Pandoc _ bs <- foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) `liftM` parseSpineElem root b) mempty spine - let ast = coverDoc <> (Pandoc meta bs) - let mediaBag = fetchImages (M.elems items) root archive ast - return $ (ast, mediaBag) + let ast = coverDoc <> Pandoc meta bs + fetchImages (M.elems items) root archive ast + return ast where - os' = os {readerParseRaw = True} - parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc + os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)} + parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do - when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) (unEscapeString -> path) = do fname <- findEntryByPathE (root </> path) archive - html <- either throwError return . - readHtml os' . - UTF8.toStringLazy $ - fromEntry fname + html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname return $ fixInternalReferences path html mimeToReader s _ (unEscapeString -> path) | s `elem` imageMimes = return $ imageToPandoc path - | otherwise = return $ mempty + | otherwise = return mempty -- paths should be absolute when this function is called -- renameImages should do this -fetchImages :: [(FilePath, MimeType)] +fetchImages :: PandocMonad m + => [(FilePath, MimeType)] -> FilePath -- ^ Root -> Archive -> Pandoc - -> MediaBag + -> m () fetchImages mimes root arc (query iq -> links) = - foldr (uncurry3 insertMedia) mempty - (mapMaybe getEntry links) + mapM_ (uncurry3 insertMedia) (mapMaybe getEntry links) where getEntry link = let abslink = normalise (root </> link) in @@ -104,7 +99,7 @@ fetchImages mimes root arc (query iq -> links) = iq :: Inline -> [FilePath] iq (Image _ _ (url, _)) = [url] -iq _ = [] +iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline @@ -121,13 +116,13 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest r <- mapM parseItem items let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover, (M.fromList r)) + return (cover, M.fromList r) where findCover e = maybe False (isInfixOf "cover-image") (findAttr (emptyName "properties") e) @@ -137,18 +132,18 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine - mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs + mapM (mkE "parseSpine" . flip M.lookup is) $ mapMaybe parseItemRef itemRefs where parseItemRef ref = do let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref) guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError PandocError m => Element -> m Meta +parseMeta :: PandocMonad m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -164,29 +159,29 @@ parseMetaItem e@(stripNamespace . elName -> field) meta = renameMeta :: String -> String renameMeta "creator" = "author" -renameMeta s = s +renameMeta s = s -getManifest :: MonadError PandocError m => Archive -> m (String, Element) +getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry let namespaces = mapMaybe attrToNSPair (elAttribs docElem) ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) - as <- liftM ((map attrToPair) . elAttribs) + as <- fmap (map attrToPair . elAttribs) (findElementE (QName "rootfile" (Just ns) Nothing) docElem) manifestFile <- mkE "Root not found" (lookup "full-path" as) let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive - liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) + fmap ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) -- Fixup fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = - (walk $ renameImages root) - . (walk $ fixBlockIRs filename) - . (walk $ fixInlineIRs filename) + walk (renameImages root) + . walk (fixBlockIRs filename) + . walk (fixInlineIRs filename) where (root, escapeURI -> filename) = splitFileName pathToFile @@ -221,7 +216,7 @@ fixAttrs :: FilePath -> B.Attr -> B.Attr fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) addHash :: String -> String -> String -addHash _ "" = "" +addHash _ "" = "" addHash s ident = takeFileName s ++ "#" ++ ident removeEPUBAttrs :: [(String, String)] -> [(String, String)] @@ -242,9 +237,6 @@ foldM' f z (x:xs) = do uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d uncurry3 f (a, b, c) = f a b c -traceM :: Monad m => String -> m () -traceM = flip trace (return ()) - -- Utility stripNamespace :: QName -> String @@ -252,7 +244,7 @@ stripNamespace (QName v _ _) = v attrToNSPair :: Attr -> Maybe (String, String) attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val) -attrToNSPair _ = Nothing +attrToNSPair _ = Nothing attrToPair :: Attr -> (String, String) attrToPair (Attr (QName name _ _) val) = (name, val) @@ -268,18 +260,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError PandocError m => QName -> Element -> m String +findAttrE :: PandocMonad m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry +findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError PandocError m => String -> m Element +parseXMLDocE :: PandocMonad m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError PandocError m => QName -> Element -> m Element +findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError PandocError m => String -> Maybe a -> m a -mkE s = maybe (throwError . ParseFailure $ s) return +mkE :: PandocMonad m => String -> Maybe a -> m a +mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index abe5f66ce..0e79f9ec3 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,7 +1,10 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, -ViewPatterns#-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,66 +37,79 @@ module Text.Pandoc.Readers.HTML ( readHtml , htmlInBalanced , isInlineTag , isBlockTag + , NamedTag(..) , isTextTag , isCommentTag ) where +import Control.Applicative ((<|>)) +import Control.Arrow (first) +import Control.Monad (guard, mplus, msum, mzero, unless, void) +import Control.Monad.Except (throwError) +import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) +import Data.Char (isAlphaNum, isDigit, isLetter) +import Data.Default (Default (..), def) +import Data.Foldable (for_) +import Data.List (isPrefixOf) +import Data.List.Split (wordsBy, splitWhen) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Monoid (First (..), (<>)) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (URI, nonStrictRelativeTo, parseURIReference) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match -import Text.Pandoc.Definition +import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField - , escapeURI, safeRead, mapLeft ) -import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) - , Extension (Ext_epub_html_exts, - Ext_native_divs, Ext_native_spans)) +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.Options ( + Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, + Ext_native_spans, Ext_raw_html, Ext_line_blocks), + ReaderOptions (readerExtensions, readerStripComments), + extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) +import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, + extractSpaces, safeRead, underlineSpan) import Text.Pandoc.Walk -import qualified Data.Map as M -import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) -import Data.Char ( isDigit ) -import Control.Monad ( guard, when, mzero, void, unless ) -import Control.Arrow ((***)) -import Control.Applicative ( (<|>) ) -import Data.Monoid (First (..)) -import Text.Printf (printf) -import Debug.Trace (trace) -import Text.TeXMath (readMathML, writeTeX) -import Data.Default (Default (..), def) -import Control.Monad.Reader (Reader,ask, asks, local, runReader) -import Network.URI (URI, parseURIReference, nonStrictRelativeTo) -import Text.Pandoc.Error -import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) -import Data.Monoid ((<>)) import Text.Parsec.Error -import qualified Data.Set as Set +import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Either PandocError Pandoc -readHtml opts inp = - mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty) - "source" tags - where tags = stripPrefixes . canonicalizeTags $ - parseTagsOptions parseOptions{ optTagPosition = True } inp - parseDoc = do - blocks <- (fixPlains False) . mconcat <$> manyTill block eof - meta <- stateMeta . parserState <$> getState - bs' <- replaceNotes (B.toList blocks) - return $ Pandoc meta bs' - getError (errorMessages -> ms) = case ms of - [] -> "" - (m:_) -> messageString m - -replaceNotes :: [Block] -> TagParser [Block] +readHtml :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> m Pandoc +readHtml opts inp = do + let tags = stripPrefixes . canonicalizeTags $ + parseTagsOptions parseOptions{ optTagPosition = True } + (crFilter inp) + parseDoc = do + blocks <- fixPlains False . mconcat <$> manyTill block eof + meta <- stateMeta . parserState <$> getState + bs' <- replaceNotes (B.toList blocks) + reportLogMessages + return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m + result <- flip runReaderT def $ + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty []) + "source" tags + case result of + Right doc -> return doc + Left err -> throwError $ PandocParseError $ getError err + +replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes = walkM replaceNotes' -replaceNotes' :: Inline -> TagParser Inline +replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes where getNotes = noteTable <$> getState @@ -105,38 +121,46 @@ data HTMLState = noteTable :: [(String, Blocks)], baseHref :: Maybe URI, identifiers :: Set.Set String, - headerMap :: M.Map Inlines String + headerMap :: M.Map Inlines String, + logMessages :: [LogMessage] } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext - , inChapter :: Bool -- ^ Set if in chapter section - , inPlain :: Bool -- ^ Set if in pPlain + , inChapter :: Bool -- ^ Set if in chapter section + , inPlain :: Bool -- ^ Set if in pPlain } -setInChapter :: HTMLParser s a -> HTMLParser s a +setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInChapter = local (\s -> s {inChapter = True}) -setInPlain :: HTMLParser s a -> HTMLParser s a +setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInPlain = local (\s -> s {inPlain = True}) -type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal) +type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) -type TagParser = HTMLParser [Tag String] +type TagParser m = HTMLParser m [Tag Text] -pBody :: TagParser Blocks +pHtml :: PandocMonad m => TagParser m Blocks +pHtml = try $ do + (TagOpen "html" attr) <- lookAhead pAnyTag + for_ (lookup "lang" attr) $ + updateState . B.setMeta "lang" . B.text . T.unpack + pInTags "html" block + +pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block -pHead :: TagParser Blocks +pHead :: PandocMonad m => TagParser m Blocks pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines - setTitle t = mempty <$ (updateState $ B.setMeta "title" t) + setTitle t = mempty <$ updateState (B.setMeta "title" t) pMetaTag = do - mt <- pSatisfy (~== TagOpen "meta" []) - let name = fromAttrib "name" mt + mt <- pSatisfy (matchTagOpen "meta" []) + let name = T.unpack $ fromAttrib "name" mt if null name then return mempty else do - let content = fromAttrib "content" mt + let content = T.unpack $ fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -144,15 +168,13 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag (stateMeta ps) } } return mempty pBaseTag = do - bt <- pSatisfy (~== TagOpen "base" []) + bt <- pSatisfy (matchTagOpen "base" []) updateState $ \st -> st{ baseHref = - parseURIReference $ fromAttrib "href" bt } + parseURIReference $ T.unpack $ fromAttrib "href" bt } return mempty -block :: TagParser Blocks +block :: PandocMonad m => TagParser m Blocks block = do - tr <- getOption readerTrace - pos <- getPosition res <- choice [ eSection , eSwitch B.para block @@ -166,94 +188,107 @@ block = do , pList , pHrule , pTable + , pHtml , pHead , pBody + , pLineBlock , pDiv , pPlain + , pFigure , pRawHtmlBlock ] - when tr $ trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + trace (take 60 $ show $ B.toList res) return res -namespaces :: [(String, TagParser Inlines)] +namespaces :: PandocMonad m => [(String, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] mathMLNamespace :: String mathMLNamespace = "http://www.w3.org/1998/Math/MathML" -eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a +eSwitch :: (PandocMonad m, Monoid a) + => (Inlines -> a) + -> TagParser m a + -> TagParser m a eSwitch constructor parser = try $ do guardEnabled Ext_epub_html_exts - pSatisfy (~== TagOpen "switch" []) + pSatisfy (matchTagOpen "switch" []) cases <- getFirst . mconcat <$> manyTill (First <$> (eCase <* skipMany pBlank) ) - (lookAhead $ try $ pSatisfy (~== TagOpen "default" [])) + (lookAhead $ try $ pSatisfy (matchTagOpen "default" [])) skipMany pBlank fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank) skipMany pBlank - pSatisfy (~== TagClose "switch") + pSatisfy (matchTagClose "switch") return $ maybe fallback constructor cases -eCase :: TagParser (Maybe Inlines) +eCase :: PandocMonad m => TagParser m (Maybe Inlines) eCase = do skipMany pBlank - TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" []) - case (flip lookup namespaces) =<< lookup "required-namespace" attr of - Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)) - Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case")) + TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "case" []) + let attr = toStringAttr attr' + case flip lookup namespaces =<< lookup "required-namespace" attr of + Just p -> Just <$> pInTags "case" (skipMany pBlank *> p <* skipMany pBlank) + Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case")) -eFootnote :: TagParser () +eFootnote :: PandocMonad m => TagParser m () eFootnote = try $ do let notes = ["footnote", "rearnote"] guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag - guard (maybe False (flip elem notes) (lookup "type" attr)) + (TagOpen tag attr') <- lookAhead pAnyTag + let attr = toStringAttr attr' + guard $ maybe False (`elem` notes) (lookup "type" attr) let ident = fromMaybe "" (lookup "id" attr) content <- pInTags tag block addNote ident content -addNote :: String -> Blocks -> TagParser () -addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)}) +addNote :: PandocMonad m => String -> Blocks -> TagParser m () +addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) -eNoteref :: TagParser Inlines +eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do guardEnabled Ext_epub_html_exts - TagOpen tag attr <- lookAhead $ pAnyTag - guard (maybe False (== "noteref") (lookup "type" attr)) + TagOpen tag attr' <- lookAhead pAnyTag + let attr = toStringAttr attr' + guard $ lookup "type" attr == Just "noteref" let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr) guard (not (null ident)) pInTags tag block return $ B.rawInline "noteref" ident -- Strip TOC if there is one, better to generate again -eTOC :: TagParser () +eTOC :: PandocMonad m => TagParser m () eTOC = try $ do guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag - guard (maybe False (== "toc") (lookup "type" attr)) + (TagOpen tag attr) <- lookAhead pAnyTag + guard $ lookup "type" attr == Just "toc" void (pInTags tag block) -pList :: TagParser Blocks +pList :: PandocMonad m => TagParser m Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -pBulletList :: TagParser Blocks +pBulletList :: PandocMonad m => TagParser m Blocks pBulletList = try $ do - pSatisfy (~== TagOpen "ul" []) + pSatisfy (matchTagOpen "ul" []) let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ul")) + not (matchTagClose "ul" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pListItem nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items -pListItem :: TagParser a -> TagParser Blocks +pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks pListItem nonItem = do - TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" []) - let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) - (liDiv <>) <$> pInTags "li" block <* skipMany nonItem + TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) + let attr = toStringAttr attr' + let addId ident bs = case B.toList bs of + (Plain ils:xs) -> B.fromList (Plain + [Span (ident, [], []) ils] : xs) + _ -> B.divWith (ident, [], []) bs + maybe id addId (lookup "id" attr) <$> + pInTags "li" block <* skipMany nonItem parseListStyleType :: String -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman @@ -271,9 +306,10 @@ parseTypeAttr "A" = UpperAlpha parseTypeAttr "1" = Decimal parseTypeAttr _ = DefaultStyle -pOrderedList :: TagParser Blocks +pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" []) + let attribs = toStringAttr attribs' let (start, style) = (sta', sty') where sta = fromMaybe "1" $ lookup "start" attribs @@ -295,23 +331,23 @@ pOrderedList = try $ do ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ol")) + not (matchTagClose "ol" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pListItem nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items -pDefinitionList :: TagParser Blocks +pDefinitionList :: PandocMonad m => TagParser m Blocks pDefinitionList = try $ do - pSatisfy (~== TagOpen "dl" []) + pSatisfy (matchTagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") return $ B.definitionList items -pDefListItem :: TagParser (Inlines, [Blocks]) +pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks]) pDefListItem = try $ do - let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && - not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) + let nonItem = pSatisfy (\t -> not (matchTagOpen "dt" [] t) && + not (matchTagOpen "dd" [] t) && not (matchTagClose "dl" t)) terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem @@ -322,133 +358,165 @@ fixPlains :: Bool -> Blocks -> Blocks fixPlains inList bs = if any isParaish bs' then B.fromList $ map plainToPara bs' else bs - where isParaish (Para _) = True - isParaish (CodeBlock _ _) = True - isParaish (Header _ _ _) = True - isParaish (BlockQuote _) = True - isParaish (BulletList _) = not inList - isParaish (OrderedList _ _) = not inList - isParaish (DefinitionList _) = not inList - isParaish _ = False + where isParaish Para{} = True + isParaish CodeBlock{} = True + isParaish Header{} = True + isParaish BlockQuote{} = True + isParaish BulletList{} = not inList + isParaish OrderedList{} = not inList + isParaish DefinitionList{} = not inList + isParaish _ = False plainToPara (Plain xs) = Para xs - plainToPara x = x + plainToPara x = x bs' = B.toList bs -pRawTag :: TagParser String +pRawTag :: PandocMonad m => TagParser m Text pRawTag = do tag <- pAnyTag let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] if tagOpen ignorable (const True) tag || tagClose ignorable tag - then return [] + then return mempty else return $ renderTags' [tag] -pDiv :: TagParser Blocks +pLineBlock :: PandocMonad m => TagParser m Blocks +pLineBlock = try $ do + guardEnabled Ext_line_blocks + _ <- pSatisfy $ tagOpen (=="div") (== [("class","line-block")]) + ils <- trimInlines . mconcat <$> manyTill inline (pSatisfy (tagClose (=="div"))) + let lns = map B.fromList $ + splitWhen (== LineBreak) $ filter (/= SoftBreak) $ + B.toList ils + return $ B.lineBlock lns + +pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs - let isDivLike "div" = True + let isDivLike "div" = True isDivLike "section" = True - isDivLike _ = False - TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + isDivLike "main" = True + isDivLike _ = False + TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + let attr = toStringAttr attr' contents <- pInTags tag block let (ident, classes, kvs) = mkAttr attr let classes' = if tag == "section" then "section":classes else classes - return $ B.divWith (ident, classes', kvs) contents + kvs' = if tag == "main" && isNothing (lookup "role" kvs) + then ("role", "main"):kvs + else kvs + return $ B.divWith (ident, classes', kvs') contents -pRawHtmlBlock :: TagParser Blocks +pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - parseRaw <- getOption readerParseRaw - if parseRaw && not (null raw) + raw <- T.unpack <$> (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag) + exts <- getOption readerExtensions + if extensionEnabled Ext_raw_html exts && not (null raw) then return $ B.rawBlock "html" raw - else return mempty + else ignore raw + +ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a +ignore raw = do + pos <- getPosition + -- raw can be null for tags like <!DOCTYPE>; see paRawTag + -- in this case we don't want a warning: + unless (null raw) $ + logMessage $ SkippedContent raw pos + return mempty -pHtmlBlock :: String -> TagParser String +pHtmlBlock :: PandocMonad m => Text -> TagParser m Text pHtmlBlock t = try $ do - open <- pSatisfy (~== TagOpen t []) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) - return $ renderTags' $ [open] ++ contents ++ [TagClose t] + open <- pSatisfy (matchTagOpen t []) + contents <- manyTill pAnyTag (pSatisfy (matchTagClose t)) + return $ renderTags' $ [open] <> contents <> [TagClose t] -- Sets chapter context -eSection :: TagParser Blocks +eSection :: PandocMonad m => TagParser m Blocks eSection = try $ do - let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as) + let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as) let sectTag = tagOpen (`elem` sectioningContent) matchChapter TagOpen tag _ <- lookAhead $ pSatisfy sectTag setInChapter (pInTags tag block) -headerLevel :: String -> TagParser Int -headerLevel tagtype = do - let level = read (drop 1 tagtype) - (try $ do - guardEnabled Ext_epub_html_exts - asks inChapter >>= guard - return (level - 1)) - <|> - return level - -eTitlePage :: TagParser () +headerLevel :: PandocMonad m => Text -> TagParser m Int +headerLevel tagtype = + case safeRead (T.unpack (T.drop 1 tagtype)) of + Just level -> +-- try (do +-- guardEnabled Ext_epub_html_exts +-- asks inChapter >>= guard +-- return (level - 1)) +-- <|> + return level + Nothing -> fail "Could not retrieve header level" + +eTitlePage :: PandocMonad m => TagParser m () eTitlePage = try $ do - let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) + let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as) let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") isTitlePage TagOpen tag _ <- lookAhead $ pSatisfy groupTag () <$ pInTags tag block -pHeader :: TagParser Blocks +pHeader :: PandocMonad m => TagParser m Blocks pHeader = try $ do - TagOpen tagtype attr <- pSatisfy $ + TagOpen tagtype attr' <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) - let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] + let attr = toStringAttr attr' + let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) + [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] - attr' <- registerHeader (ident, classes, keyvals) contents + attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith attr' level contents + else B.headerWith attr'' level contents -pHrule :: TagParser Blocks +pHrule :: PandocMonad m => TagParser m Blocks pHrule = do pSelfClosing (=="hr") (const True) return B.horizontalRule -pTable :: TagParser Blocks +pTable :: PandocMonad m => TagParser m Blocks pTable = try $ do - TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + TagOpen _ _ <- pSatisfy (matchTagOpen "table" []) skipMany pBlank caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank widths' <- (mconcat <$> many1 pColgroup) <|> many pCol let pTh = option [] $ pInTags "tr" (pCell "th") - pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") - pTBody = do pOptInTag "tbody" $ many1 pTr + pTr = try $ skipMany pBlank >> + pInTags "tr" (pCell "td" <|> pCell "th") + pTBody = pOptInTag "tbody" $ many1 pTr head'' <- pOptInTag "thead" pTh - head' <- pOptInTag "tbody" $ do - if null head'' - then pTh - else return head'' + head' <- map snd <$> + pOptInTag "tbody" + (if null head'' then pTh else return head'') rowsLs <- many pTBody rows' <- pOptInTag "tfoot" $ many pTr - TagClose _ <- pSatisfy (~== TagClose "table") - let rows'' = (concat rowsLs) ++ rows' + TagClose _ <- pSatisfy (matchTagClose "table") + let rows'' = concat rowsLs <> rows' + let rows''' = map (map snd) rows'' + -- let rows''' = map (map snd) rows'' -- fail on empty table - guard $ not $ null head' && null rows'' + guard $ not $ null head' && null rows''' let isSinglePlain x = case B.toList x of [] -> True [Plain _] -> True _ -> False - let isSimple = all isSinglePlain $ concat (head':rows'') - let cols = length $ if null head' then head rows'' else head' + let isSimple = all isSinglePlain $ concat (head':rows''') + let cols = length $ if null head' then head rows''' else head' -- add empty cells to short rows let addEmpties r = case cols - length r of - n | n > 0 -> r ++ replicate n mempty + n | n > 0 -> r <> replicate n mempty | otherwise -> r - let rows = map addEmpties rows'' - let aligns = replicate cols AlignDefault + let rows = map addEmpties rows''' + let aligns = case rows'' of + (cs:_) -> map fst cs + _ -> replicate cols AlignDefault let widths = if null widths' then if isSimple then replicate cols 0 @@ -456,80 +524,120 @@ pTable = try $ do else widths' return $ B.table caption (zip aligns widths) head' rows -pCol :: TagParser Double +pCol :: PandocMonad m => TagParser m Double pCol = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) + let attribs = toStringAttr attribs' skipMany pBlank - optional $ pSatisfy (~== TagClose "col") + optional $ pSatisfy (matchTagClose "col") skipMany pBlank - return $ case lookup "width" attribs of + let width = case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead ('0':'.':filter - (`notElem` " \t\r\n%'\";") xs) + fromMaybe 0.0 $ safeRead (filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) _ -> 0.0 Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead ('0':'.':init x) + fromMaybe 0.0 $ safeRead (init x) _ -> 0.0 + if width > 0.0 + then return $ width / 100.0 + else return 0.0 -pColgroup :: TagParser [Double] +pColgroup :: PandocMonad m => TagParser m [Double] pColgroup = try $ do - pSatisfy (~== TagOpen "colgroup" []) + pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -noColOrRowSpans :: Tag String -> Bool +noColOrRowSpans :: Tag Text -> Bool noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan" where isNullOrOne x = case fromAttrib x t of "" -> True "1" -> True _ -> False -pCell :: String -> TagParser [Blocks] +pCell :: PandocMonad m => Text -> TagParser m [(Alignment, Blocks)] pCell celltype = try $ do skipMany pBlank + tag <- lookAhead $ + pSatisfy (\t -> t ~== TagOpen celltype [] && noColOrRowSpans t) + let extractAlign' [] = "" + extractAlign' ("text-align":x:_) = x + extractAlign' (_:xs) = extractAlign' xs + let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) + let align = case maybeFromAttrib "align" tag `mplus` + (extractAlign <$> maybeFromAttrib "style" tag) of + Just "left" -> AlignLeft + Just "right" -> AlignRight + Just "center" -> AlignCenter + _ -> AlignDefault res <- pInTags' celltype noColOrRowSpans block skipMany pBlank - return [res] + return [(align, res)] -pBlockQuote :: TagParser Blocks +pBlockQuote :: PandocMonad m => TagParser m Blocks pBlockQuote = do contents <- pInTags "blockquote" block return $ B.blockQuote $ fixPlains False contents -pPlain :: TagParser Blocks +pPlain :: PandocMonad m => TagParser m Blocks pPlain = do contents <- setInPlain $ trimInlines . mconcat <$> many1 inline if B.isNull contents then return mempty else return $ B.plain contents -pPara :: TagParser Blocks +pPara :: PandocMonad m => TagParser m Blocks pPara = do contents <- trimInlines <$> pInTags "p" inline - return $ B.para contents + (do guardDisabled Ext_empty_paragraphs + guard (B.isNull contents) + return mempty) + <|> return (B.para contents) + +pFigure :: PandocMonad m => TagParser m Blocks +pFigure = try $ do + TagOpen _ _ <- pSatisfy (matchTagOpen "figure" []) + skipMany pBlank + let pImg = (\x -> (Just x, Nothing)) <$> + (pOptInTag "p" pImage <* skipMany pBlank) + pCapt = (\x -> (Nothing, Just x)) <$> do + bs <- pInTags "figcaption" block + return $ blocksToInlines' $ B.toList bs + pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure") + res <- many (pImg <|> pCapt <|> pSkip) + let mbimg = msum $ map fst res + let mbcap = msum $ map snd res + TagClose _ <- pSatisfy (matchTagClose "figure") + let caption = fromMaybe mempty mbcap + case B.toList <$> mbimg of + Just [Image attr _ (url, tit)] -> + return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + _ -> mzero -pCodeBlock :: TagParser Blocks +pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do - TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) + TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) + let attr = toStringAttr attr' contents <- manyTill pAnyTag (pCloses "pre" <|> eof) let rawText = concatMap tagToString contents -- drop leading newline if any let result' = case rawText of - '\n':xs -> xs - _ -> rawText + '\n':xs -> xs + _ -> rawText -- drop trailing newline if any let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + '\n':_ -> init result' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag String -> String -tagToString (TagText s) = s +tagToString :: Tag Text -> String +tagToString (TagText s) = T.unpack s tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToString _ = "" -inline :: TagParser Inlines +inline :: PandocMonad m => TagParser m Inlines inline = choice [ eNoteref , eSwitch id inline @@ -540,6 +648,7 @@ inline = choice , pSuperscript , pSubscript , pStrikeout + , pUnderline , pLineBreak , pLink , pImage @@ -549,30 +658,31 @@ inline = choice , pRawHtmlInline ] -pLocation :: TagParser () +pLocation :: PandocMonad m => TagParser m () pLocation = do (TagPosition r c) <- pSat isTagPosition setPosition $ newPos "input" r c -pSat :: (Tag String -> Bool) -> TagParser (Tag String) +pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition token show (const pos) (\x -> if f x then Just x else Nothing) -pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) +pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f -pAnyTag :: TagParser (Tag String) +pAnyTag :: PandocMonad m => TagParser m (Tag Text) pAnyTag = pSatisfy (const True) -pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool) - -> TagParser (Tag String) +pSelfClosing :: PandocMonad m + => (Text -> Bool) -> ([Attribute Text] -> Bool) + -> TagParser m (Tag Text) pSelfClosing f g = do open <- pSatisfy (tagOpen f g) optional $ pSatisfy (tagClose f) return open -pQ :: TagParser Inlines +pQ :: PandocMonad m => TagParser m Inlines pQ = do context <- asks quoteContext let quoteType = case context of @@ -587,45 +697,50 @@ pQ = do withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor -pEmph :: TagParser Inlines +pEmph :: PandocMonad m => TagParser m Inlines pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph -pStrong :: TagParser Inlines +pStrong :: PandocMonad m => TagParser m Inlines pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong -pSuperscript :: TagParser Inlines +pSuperscript :: PandocMonad m => TagParser m Inlines pSuperscript = pInlinesInTags "sup" B.superscript -pSubscript :: TagParser Inlines +pSubscript :: PandocMonad m => TagParser m Inlines pSubscript = pInlinesInTags "sub" B.subscript -pStrikeout :: TagParser Inlines -pStrikeout = do +pStrikeout :: PandocMonad m => TagParser m Inlines +pStrikeout = pInlinesInTags "s" B.strikeout <|> pInlinesInTags "strike" B.strikeout <|> pInlinesInTags "del" B.strikeout <|> - try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) + try (do pSatisfy (matchTagOpen "span" [("class","strikeout")]) contents <- mconcat <$> manyTill inline (pCloses "span") return $ B.strikeout contents) -pLineBreak :: TagParser Inlines +pUnderline :: PandocMonad m => TagParser m Inlines +pUnderline = pInlinesInTags "u" underlineSpan <|> pInlinesInTags "ins" underlineSpan + +pLineBreak :: PandocMonad m => TagParser m Inlines pLineBreak = do pSelfClosing (=="br") (const True) return B.linebreak -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag String -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs +maybeFromAttrib :: String -> Tag Text -> Maybe String +maybeFromAttrib name (TagOpen _ attrs) = + T.unpack <$> lookup (T.pack name) attrs maybeFromAttrib _ _ = Nothing -pLink :: TagParser Inlines +pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = fromAttrib "title" tag + let title = T.unpack $ fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + maybeFromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -639,128 +754,149 @@ pLink = try $ do _ -> url' return $ B.linkWith (uid, cls, []) (escapeURI url) title lab -pImage :: TagParser Inlines +pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "src" tag + let url' = T.unpack $ fromAttrib "src" tag let url = case (parseURIReference url', mbBaseHref) of (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) _ -> url' - let title = fromAttrib "title" tag - let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let title = T.unpack $ fromAttrib "title" tag + let alt = T.unpack $ fromAttrib "alt" tag + let uid = T.unpack $ fromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(k, v)] - let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] + v -> [(T.unpack k, T.unpack v)] + let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCode :: TagParser Inlines +pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do - (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + let attr = toStringAttr attr' result <- manyTill pAnyTag (pCloses open) - return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result + return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $ + innerText result -pSpan :: TagParser Inlines +pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do guardEnabled Ext_native_spans - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + TagOpen _ attr' <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + let attr = toStringAttr attr' contents <- pInTags "span" inline - let isSmallCaps = fontVariant == "small-caps" + let isSmallCaps = fontVariant == "small-caps" || "smallcaps" `elem` classes where styleAttr = fromMaybe "" $ lookup "style" attr fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr + classes = fromMaybe [] $ + words <$> lookup "class" attr let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents -pRawHtmlInline :: TagParser Inlines +pRawHtmlInline :: PandocMonad m => TagParser m Inlines pRawHtmlInline = do inplain <- asks inPlain result <- pSatisfy (tagComment (const True)) <|> if inplain then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag - parseRaw <- getOption readerParseRaw - if parseRaw - then return $ B.rawInline "html" $ renderTags' [result] - else return mempty + exts <- getOption readerExtensions + let raw = T.unpack $ renderTags' [result] + if extensionEnabled Ext_raw_html exts + then return $ B.rawInline "html" raw + else ignore raw mathMLToTeXMath :: String -> Either String String mathMLToTeXMath s = writeTeX <$> readMathML s -pMath :: Bool -> TagParser Inlines +toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr = map go + where go (x,y) = (T.unpack x, T.unpack y) + +pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do - open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True) + open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True) -- we'll assume math tags are MathML unless specially marked -- otherwise... + let attr = toStringAttr attr' unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math")) - case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of + contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math")) + case mathMLToTeXMath (T.unpack $ renderTags $ + [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - innerText contents + T.unpack $ innerText contents Right [] -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x -pInlinesInTags :: String -> (Inlines -> Inlines) - -> TagParser Inlines +pInlinesInTags :: PandocMonad m => Text -> (Inlines -> Inlines) + -> TagParser m Inlines pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline -pInTags :: (Monoid a) => String -> TagParser a -> TagParser a +pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a pInTags tagtype parser = pInTags' tagtype (const True) parser -pInTags' :: (Monoid a) => String -> (Tag String -> Bool) -> TagParser a - -> TagParser a +pInTags' :: (PandocMonad m, Monoid a) + => Text + -> (Tag Text -> Bool) + -> TagParser m a + -> TagParser m a pInTags' tagtype tagtest parser = try $ do pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t) mconcat <$> manyTill parser (pCloses tagtype <|> eof) -- parses p, preceeded by an optional opening tag -- and followed by an optional closing tags -pOptInTag :: String -> TagParser a -> TagParser a +pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do skipMany pBlank - optional $ pSatisfy (~== TagOpen tagtype []) + optional $ pSatisfy (matchTagOpen tagtype []) skipMany pBlank x <- p skipMany pBlank - optional $ pSatisfy (~== TagClose tagtype) + optional $ pSatisfy (matchTagClose tagtype) skipMany pBlank return x -pCloses :: String -> TagParser () +pCloses :: PandocMonad m => Text -> TagParser m () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of - (TagClose t') | t' == tagtype -> pAnyTag >> return () + (TagClose t') | t' == tagtype -> void pAnyTag (TagOpen t' _) | t' `closes` tagtype -> return () (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "dd" -> return () (TagClose "table") | tagtype == "td" -> return () (TagClose "table") | tagtype == "tr" -> return () + (TagClose "td") | tagtype `Set.member` blockHtmlTags -> return () + (TagClose "th") | tagtype `Set.member` blockHtmlTags -> return () + (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags + -> return () -- see #3794 _ -> mzero -pTagText :: TagParser Inlines +pTagText :: PandocMonad m => TagParser m Inlines pTagText = try $ do (TagText str) <- pSatisfy isTagText st <- getState qu <- ask - case flip runReader qu $ runParserT (many pTagContents) st "text" str of - Left _ -> fail $ "Could not parse `" ++ str ++ "'" + parsed <- lift $ lift $ + flip runReaderT qu $ runParserT (many pTagContents) st "text" str + case parsed of + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" Right result -> return $ mconcat result -pBlank :: TagParser () +pBlank :: PandocMonad m => TagParser m () pBlank = try $ do (TagText str) <- pSatisfy isTagText - guard $ all isSpace str + guard $ T.all isSpace str -type InlinesParser = HTMLParser String +type InlinesParser m = HTMLParser m Text -pTagContents :: InlinesParser Inlines +pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = B.displayMath <$> mathDisplay <|> B.math <$> mathInline @@ -770,7 +906,7 @@ pTagContents = <|> pSymbol <|> pBad -pStr :: InlinesParser Inlines +pStr :: PandocMonad m => InlinesParser m Inlines pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -778,24 +914,24 @@ pStr = do return $ B.str result isSpecial :: Char -> Bool -isSpecial '"' = True -isSpecial '\'' = True -isSpecial '.' = True -isSpecial '-' = True -isSpecial '$' = True +isSpecial '"' = True +isSpecial '\'' = True +isSpecial '.' = True +isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True isSpecial '\8221' = True -isSpecial _ = False +isSpecial _ = False -pSymbol :: InlinesParser Inlines +pSymbol :: PandocMonad m => InlinesParser m Inlines pSymbol = satisfy isSpecial >>= return . B.str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: InlinesParser Inlines +pBad :: PandocMonad m => InlinesParser m Inlines pBad = do c <- satisfy isBad let c' = case c of @@ -829,7 +965,7 @@ pBad = do _ -> '?' return $ B.str [c'] -pSpace :: InlinesParser Inlines +pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> if '\n' `elem` xs then return B.softbreak @@ -839,86 +975,96 @@ pSpace = many1 (satisfy isSpace) >>= \xs -> -- Constants -- -eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", - "del", "ins", - "progress", "map", "area", "noscript", "script", - "object", "svg", "video", "source"] - -{- -inlineHtmlTags :: [[Char]] -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] --} - -blockHtmlTags :: [String] -blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", - "blockquote", "body", "button", "canvas", - "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "fieldset", "figcaption", "figure", - "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "head", "header", "hgroup", "hr", "html", - "isindex", "menu", "noframes", "ol", "output", "p", "pre", - "section", "table", "tbody", "textarea", - "thead", "tfoot", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style"] +eitherBlockOrInline :: Set.Set Text +eitherBlockOrInline = Set.fromList + ["audio", "applet", "button", "iframe", "embed", + "del", "ins", "progress", "map", "area", "noscript", "script", + "object", "svg", "video", "source"] + +blockHtmlTags :: Set.Set Text +blockHtmlTags = Set.fromList + ["?xml", "!DOCTYPE", "address", "article", "aside", + "blockquote", "body", "canvas", + "caption", "center", "col", "colgroup", "dd", "details", + "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", + "footer", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "head", "header", "hgroup", "hr", "html", + "isindex", "main", "menu", "meta", "noframes", "ol", "output", "p", "pre", + "section", "table", "tbody", "textarea", + "thead", "tfoot", "ul", "dd", + "dt", "frameset", "li", "tbody", "td", "tfoot", + "th", "thead", "tr", "script", "style"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. -blockDocBookTags :: [String] -blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", - "orderedlist", "segmentedlist", "simplelist", - "variablelist", "caution", "important", "note", "tip", - "warning", "address", "literallayout", "programlisting", - "programlistingco", "screen", "screenco", "screenshot", - "synopsis", "example", "informalexample", "figure", - "informalfigure", "table", "informaltable", "para", - "simpara", "formalpara", "equation", "informalequation", - "figure", "screenshot", "mediaobject", "qandaset", - "procedure", "task", "cmdsynopsis", "funcsynopsis", - "classsynopsis", "blockquote", "epigraph", "msgset", - "sidebar", "title"] - -epubTags :: [String] -epubTags = ["case", "switch", "default"] - -blockTags :: [String] -blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags - -isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen isInlineTagName (const True) t || - tagClose isInlineTagName t || - tagComment (const True) t - where isInlineTagName x = x `notElem` blockTags - -isBlockTag :: Tag String -> Bool -isBlockTag t = tagOpen isBlockTagName (const True) t || - tagClose isBlockTagName t || - tagComment (const True) t - where isBlockTagName ('?':_) = True - isBlockTagName ('!':_) = True - isBlockTagName x = x `elem` blockTags - || x `elem` eitherBlockOrInline - -isTextTag :: Tag String -> Bool +blockDocBookTags :: Set.Set Text +blockDocBookTags = Set.fromList + ["calloutlist", "bibliolist", "glosslist", "itemizedlist", + "orderedlist", "segmentedlist", "simplelist", + "variablelist", "caution", "important", "note", "tip", + "warning", "address", "literallayout", "programlisting", + "programlistingco", "screen", "screenco", "screenshot", + "synopsis", "example", "informalexample", "figure", + "informalfigure", "table", "informaltable", "para", + "simpara", "formalpara", "equation", "informalequation", + "figure", "screenshot", "mediaobject", "qandaset", + "procedure", "task", "cmdsynopsis", "funcsynopsis", + "classsynopsis", "blockquote", "epigraph", "msgset", + "sidebar", "title"] + +epubTags :: Set.Set Text +epubTags = Set.fromList ["case", "switch", "default"] + +blockTags :: Set.Set Text +blockTags = Set.unions [blockHtmlTags, blockDocBookTags, epubTags] + +class NamedTag a where + getTagName :: a -> Maybe Text + +instance NamedTag (Tag Text) where + getTagName (TagOpen t _) = Just t + getTagName (TagClose t) = Just t + getTagName _ = Nothing + +instance NamedTag (Tag String) where + getTagName (TagOpen t _) = Just (T.pack t) + getTagName (TagClose t) = Just (T.pack t) + getTagName _ = Nothing + +isInlineTag :: NamedTag (Tag a) => Tag a -> Bool +isInlineTag t = isInlineTagName || isCommentTag t + where isInlineTagName = case getTagName t of + Just x -> x + `Set.notMember` blockTags + Nothing -> False + +isBlockTag :: NamedTag (Tag a) => Tag a -> Bool +isBlockTag t = isBlockTagName || isTagComment t + where isBlockTagName = + case getTagName t of + Just x + | "?" `T.isPrefixOf` x -> True + | "!" `T.isPrefixOf` x -> True + | otherwise -> x `Set.member` blockTags + || x `Set.member` eitherBlockOrInline + Nothing -> False + +isTextTag :: Tag a -> Bool isTextTag = tagText (const True) -isCommentTag :: Tag String -> Bool +isCommentTag :: Tag a -> Bool isCommentTag = tagComment (const True) -- taken from HXT and extended -- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags -closes :: String -> String -> Bool +closes :: Text -> Text -> Bool _ `closes` "body" = False _ `closes` "html" = False "body" `closes` "head" = True "a" `closes` "a" = True "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True +"td" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True "dd" `closes` t | t `elem` ["dt", "dd"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True @@ -926,12 +1072,12 @@ _ `closes` "html" = False "optgroup" `closes` "optgroup" = True "optgroup" `closes` "option" = True "option" `closes` "option" = True --- http://www.w3.org/TR/html-markup/p.html +-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote", "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section", + "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", "p", "pre", "section", "table", "ul"] = True -"meta" `closes` "meta" = True +_ `closes` "meta" = True "form" `closes` "form" = True "label" `closes` "label" = True "map" `closes` "map" = True @@ -942,17 +1088,18 @@ t `closes` "select" | t /= "option" = True "tfoot" `closes` t | t `elem` ["thead","colgroup"] = True "tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True t `closes` t2 | - t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && - t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" + t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","main","p"] && + t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" or "main" t1 `closes` t2 | - t1 `elem` blockTags && - t2 `notElem` (blockTags ++ eitherBlockOrInline) = True + t1 `Set.member` blockTags && + t2 `Set.notMember` blockTags && + t2 `Set.notMember` eitherBlockOrInline = True _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Monad m) +htmlInBalanced :: (HasReaderOptions st, Monad m) => (Tag String -> Bool) -> ParserT String st m String htmlInBalanced f = try $ do @@ -973,8 +1120,11 @@ htmlInBalanced f = try $ do let cs = ec - sc lscontents <- unlines <$> count ls anyLine cscontents <- count cs anyChar - (_,closetag) <- htmlTag (~== TagClose tn) - return (lscontents ++ cscontents ++ closetag) + closetag <- do + x <- many (satisfy (/='>')) + char '>' + return (x <> ">") + return (lscontents <> cscontents <> closetag) _ -> mzero _ -> mzero @@ -992,64 +1142,99 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts go n (t:ts') = (t :) <$> go n ts' go _ [] = mzero -hasTagWarning :: [Tag String] -> Bool +hasTagWarning :: [Tag a] -> Bool hasTagWarning (TagWarning _:_) = True -hasTagWarning _ = False +hasTagWarning _ = False -- | Matches a tag meeting a certain condition. -htmlTag :: Monad m +htmlTag :: (HasReaderOptions st, Monad m) => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do lookAhead (char '<') + startpos <- getPosition inp <- getInput - let (next : _) = canonicalizeTags $ parseTagsOptions - parseOptions{ optTagWarning = False } inp - guard $ f next + let ts = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = False + , optTagPosition = True } + (inp ++ " ") -- add space to ensure that + -- we get a TagPosition after the tag + (next, ln, col) <- case ts of + (TagPosition{} : next : TagPosition ln col : _) + | f next -> return (next, ln, col) + _ -> mzero + + -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> + -- should NOT be parsed as an HTML tag, see #2277, + -- so we exclude . even though it's a valid character + -- in XML element names + let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' + let isName s = case s of + [] -> False + ('?':_) -> True -- processing instruction + (c:cs) -> isLetter c && all isNameChar cs + + let endpos = if ln == 1 + then setSourceColumn startpos + (sourceColumn startpos + (col - 1)) + else setSourceColumn (setSourceLine startpos + (sourceLine startpos + (ln - 1))) + col + let endAngle = try $ + do char '>' + pos <- getPosition + guard $ pos >= endpos + let handleTag tagname = do - -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> - -- should NOT be parsed as an HTML tag, see #2277 - guard $ not ('.' `elem` tagname) + -- basic sanity check, since the parser is very forgiving + -- and finds tags in stuff like x<y) + guard $ isName tagname + guard $ not $ null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ not (null tagname) guard $ last tagname /= ':' - rendered <- manyTill anyChar (char '>') - return (next, rendered ++ ">") + char '<' + rendered <- manyTill anyChar endAngle + return (next, "<" ++ rendered ++ ">") case next of TagComment s | "<!--" `isPrefixOf` inp -> do - count (length s + 4) anyChar - skipMany (satisfy (/='>')) - char '>' - return (next, "<!--" ++ s ++ "-->") + string "<!--" + count (length s) anyChar + string "-->" + stripComments <- getOption readerStripComments + if stripComments + then return (next, "") + else return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" - TagOpen tagname _attr -> handleTag tagname - TagClose tagname -> handleTag tagname + TagOpen tagname attr -> do + guard $ all (isName . fst) attr + handleTag tagname + TagClose tagname -> + handleTag tagname _ -> mzero mkAttr :: [(String, String)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes + attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes -stripPrefixes :: [Tag String] -> [Tag String] +stripPrefixes :: [Tag Text] -> [Tag Text] stripPrefixes = map stripPrefix -stripPrefix :: Tag String -> Tag String +stripPrefix :: Tag Text -> Tag Text stripPrefix (TagOpen s as) = - TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as) + TagOpen (stripPrefix' s) (map (first stripPrefix') as) stripPrefix (TagClose s) = TagClose (stripPrefix' s) stripPrefix x = x -stripPrefix' :: String -> String +stripPrefix' :: Text -> Text stripPrefix' s = - case span (/= ':') s of - (_, "") -> s - (_, (_:ts)) -> ts + if T.null t then s else T.drop 1 t + where (_, t) = T.span (/= ':') s isSpace :: Char -> Bool isSpace ' ' = True @@ -1068,9 +1253,13 @@ instance HasHeaderMap HTMLState where extractHeaderMap = headerMap updateHeaderMap f s = s{ headerMap = f (headerMap s) } +instance HasLogMessages HTMLState where + addLogMessage m s = s{ logMessages = m : logMessages s } + getLogMessages = reverse . logMessages + -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m -instance HasQuoteContext st (Reader HTMLLocal) where +instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where getQuoteContext = asks quoteContext withQuoteContext q = local (\s -> s{quoteContext = q}) @@ -1088,19 +1277,32 @@ instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} getLastStrPos = getLastStrPos . parserState +-- For now we need a special verison here; the one in Shared has String type +renderTags' :: [Tag Text] -> Text +renderTags' = renderTagsOptions + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags tags = flip elem tags . T.toLower + -- EPUB Specific -- -- -sectioningContent :: [String] +sectioningContent :: [Text] sectioningContent = ["article", "aside", "nav", "section"] -groupingContent :: [String] +groupingContent :: [Text] groupingContent = ["p", "hr", "pre", "blockquote", "ol" , "ul", "li", "dl", "dt", "dt", "dd" , "figure", "figcaption", "div", "main"] +matchTagClose :: Text -> (Tag Text -> Bool) +matchTagClose t = (~== TagClose t) + +matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool) +matchTagOpen t as = (~== TagOpen t as) {- @@ -1108,7 +1310,7 @@ types :: [(String, ([String], Int))] types = -- Document divisions map (\s -> (s, (["section", "body"], 0))) ["volume", "part", "chapter", "division"] - ++ -- Document section and components + <> -- Document section and components [ ("abstract", ([], 0))] -} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 12953bb72..e98c79ed8 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -14,33 +14,40 @@ module Text.Pandoc.Readers.Haddock ( readHaddock ) where -import Text.Pandoc.Builder (Blocks, Inlines) -import qualified Text.Pandoc.Builder as B -import Data.Monoid ((<>)) -import Text.Pandoc.Shared (trim, splitBy) +import Control.Monad.Except (throwError) import Data.List (intersperse, stripPrefix) import Data.Maybe (fromMaybe) -import Text.Pandoc.Definition -import Text.Pandoc.Options +import Data.Monoid ((<>)) +import Data.Text (Text, unpack) import Documentation.Haddock.Parser import Documentation.Haddock.Types -import Debug.Trace (trace) - +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition import Text.Pandoc.Error +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter, splitBy, trim) + -- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse - -> Either PandocError Pandoc -readHaddock opts = +readHaddock :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of + Right result -> return result + Left e -> throwError e + +readHaddockEither :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse + -> Either PandocError Pandoc +readHaddockEither _opts = #if MIN_VERSION_haddock_library(1,2,0) - Right . B.doc . docHToBlocks . trace' . _doc . parseParas + Right . B.doc . docHToBlocks . _doc . parseParas #else - Right . B.doc . docHToBlocks . trace' . parseParas + Right . B.doc . docHToBlocks . parseParas #endif - where trace' x = if readerTrace opts - then trace (show x) x - else x docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks d' = @@ -90,7 +97,7 @@ docHToBlocks d' = isPlain (Plain _) = True isPlain _ = False extractContents (Plain xs) = xs - extractContents _ = [] + extractContents _ = [] docHToInlines :: Bool -> DocH String Identifier -> Inlines docHToInlines isCode d' = @@ -135,7 +142,7 @@ makeExample prompt expression result = <> B.space <> B.codeWith ([], ["haskell","expr"], []) (trim expression) <> B.linebreak - <> (mconcat $ intersperse B.linebreak $ map coder result') + <> mconcat (intersperse B.linebreak $ map coder result') where -- 1. drop trailing whitespace from the prompt, remember the prefix prefix = takeWhile (`elem` " \t") prompt diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs new file mode 100644 index 000000000..8158a4511 --- /dev/null +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -0,0 +1,496 @@ +{-# LANGUAGE ExplicitForAll, TupleSections #-} +module Text.Pandoc.Readers.JATS ( readJATS ) where +import Control.Monad.State.Strict +import Data.Char (isDigit, isSpace, toUpper) +import Data.Default +import Data.Generics +import Data.List (intersperse) +import qualified Data.Map as Map +import Data.Maybe (maybeToList, fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options +import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead) +import Text.TeXMath (readMathML, writeTeX) +import Text.XML.Light +import qualified Data.Set as S (fromList, member) +import Data.Set ((\\)) + +type JATS m = StateT JATSState m + +data JATSState = JATSState{ jatsSectionLevel :: Int + , jatsQuoteType :: QuoteType + , jatsMeta :: Meta + , jatsBook :: Bool + , jatsFigureTitle :: Inlines + , jatsContent :: [Content] + } deriving Show + +instance Default JATSState where + def = JATSState{ jatsSectionLevel = 0 + , jatsQuoteType = DoubleQuote + , jatsMeta = mempty + , jatsBook = False + , jatsFigureTitle = mempty + , jatsContent = [] } + + +readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readJATS _ inp = do + let tree = normalizeTree . parseXML + $ T.unpack $ crFilter inp + (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree + return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) + +-- normalize input, consolidating adjacent Text and CRef elements +normalizeTree :: [Content] -> [Content] +normalizeTree = everywhere (mkT go) + where go :: [Content] -> [Content] + go (Text (CData CDataRaw _ _):xs) = xs + go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = + Text (CData CDataText (s1 ++ s2) z):xs + go (Text (CData CDataText s1 z):CRef r:xs) = + Text (CData CDataText (s1 ++ convertEntity r) z):xs + go (CRef r:Text (CData CDataText s1 z):xs) = + Text (CData CDataText (convertEntity r ++ s1) z):xs + go (CRef r1:CRef r2:xs) = + Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + go xs = xs + +convertEntity :: String -> String +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) + +-- convenience function to get an attribute value, defaulting to "" +attrValue :: String -> Element -> String +attrValue attr = + fromMaybe "" . maybeAttrValue attr + +maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue attr elt = + lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + +-- convenience function +named :: String -> Element -> Bool +named s e = qName (elName e) == s + +-- + +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta field val = modify (setMeta field val) + +instance HasMeta JATSState where + setMeta field v s = s {jatsMeta = setMeta field v (jatsMeta s)} + deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)} + +isBlockElement :: Content -> Bool +isBlockElement (Elem e) = qName (elName e) `S.member` blocktags + where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags + paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap", + "code", "fig", "fig-group", "graphic", "media", "preformat", + "supplementary-material", "table-wrap", "table-wrap-group", + "alternatives", "disp-formula", "disp-formula-group"] + lists = ["def-list", "list"] + mathML = ["tex-math", "mml:math"] + other = ["p", "related-article", "related-object", "ack", "disp-quote", + "speech", "statement", "verse-group", "x"] + inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material", + "related-article", "related-object", "hr", "bold", "fixed-case", + "italic", "monospace", "overline", "overline-start", "overline-end", + "roman", "sans-serif", "sc", "strike", "underline", "underline-start", + "underline-end", "ruby", "alternatives", "inline-graphic", "private-char", + "chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev", + "milestone-end", "milestone-start", "named-content", "styled-content", + "fn", "target", "xref", "sub", "sup", "x", "address", "array", + "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic", + "media", "preformat", "supplementary-material", "table-wrap", + "table-wrap-group", "disp-formula", "disp-formula-group", + "citation-alternatives", "element-citation", "mixed-citation", + "nlm-citation", "award-id", "funding-source", "open-access", + "def-list", "list", "ack", "disp-quote", "speech", "statement", + "verse-group"] +isBlockElement _ = False + +-- Trim leading and trailing newline characters +trimNl :: String -> String +trimNl = reverse . go . reverse . go + where go ('\n':xs) = xs + go xs = xs + +-- function that is used by both graphic (in parseBlock) +-- and inline-graphic (in parseInline) +getGraphic :: PandocMonad m => Element -> JATS m Inlines +getGraphic e = do + let atVal a = attrValue a e + attr = (atVal "id", words $ atVal "role", []) + imageUrl = atVal "href" + captionOrLabel = case filterChild (\x -> named "caption" x + || named "label" x) e of + Nothing -> return mempty + Just z -> mconcat <$> + mapM parseInline (elContent z) + figTitle <- gets jatsFigureTitle + let (caption, title) = if isNull figTitle + then (captionOrLabel, atVal "title") + else (return figTitle, "fig:") + fmap (imageWith attr imageUrl title) caption + +getBlocks :: PandocMonad m => Element -> JATS m Blocks +getBlocks e = mconcat <$> + mapM parseBlock (elContent e) + + +parseBlock :: PandocMonad m => Content -> JATS m Blocks +parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE +parseBlock (Text (CData _ s _)) = if all isSpace s + then return mempty + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ map toUpper x +parseBlock (Elem e) = + case qName (elName e) of + "p" -> parseMixed para (elContent e) + "code" -> codeBlockWithLang + "preformat" -> codeBlockWithLang + "disp-quote" -> parseBlockquote + "list" -> case attrValue "list-type" e of + "bullet" -> bulletList <$> listitems + listType -> do + let start = fromMaybe 1 $ + (strContent <$> (filterElement (named "list-item") e + >>= filterElement (named "lable"))) + >>= safeRead + orderedListWith (start, parseListStyleType listType, DefaultDelim) + <$> listitems + "def-list" -> definitionList <$> deflistitems + "sec" -> gets jatsSectionLevel >>= sect . (+1) + "graphic" -> para <$> getGraphic e + "journal-meta" -> parseMetadata e + "article-meta" -> parseMetadata e + "custom-meta" -> parseMetadata e + "title" -> return mempty -- processed by header + "table" -> parseTable + "fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e + "table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e + "caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6 + "ref-list" -> parseRefList e + "?xml" -> return mempty + _ -> getBlocks e + where parseMixed container conts = do + let (ils,rest) = break isBlockElement conts + ils' <- (trimInlines . mconcat) <$> mapM parseInline ils + let p = if ils' == mempty then mempty else container ils' + case rest of + [] -> return p + (r:rs) -> do + b <- parseBlock r + x <- parseMixed container rs + return $ p <> b <> x + codeBlockWithLang = do + let classes' = case attrValue "language" e of + "" -> [] + x -> [x] + return $ codeBlockWith (attrValue "id" e, classes', []) + $ trimNl $ strContentRecursive e + parseBlockquote = do + attrib <- case filterChild (named "attribution") e of + Nothing -> return mempty + Just z -> (para . (str "— " <>) . mconcat) + <$> + mapM parseInline (elContent z) + contents <- getBlocks e + return $ blockQuote (contents <> attrib) + parseListStyleType "roman-lower" = LowerRoman + parseListStyleType "roman-upper" = UpperRoman + parseListStyleType "alpha-lower" = LowerAlpha + parseListStyleType "alpha-upper" = UpperAlpha + parseListStyleType _ = DefaultStyle + listitems = mapM getBlocks $ filterChildren (named "list-item") e + deflistitems = mapM parseVarListEntry $ filterChildren + (named "def-item") e + parseVarListEntry e' = do + let terms = filterChildren (named "term") e' + let items = filterChildren (named "def") e' + terms' <- mapM getInlines terms + items' <- mapM getBlocks items + return (mconcat $ intersperse (str "; ") terms', items') + parseTable = do + let isCaption x = named "title" x || named "caption" x + caption <- case filterChild isCaption e of + Just t -> getInlines t + Nothing -> return mempty + let e' = fromMaybe e $ filterChild (named "tgroup") e + let isColspec x = named "colspec" x || named "col" x + let colspecs = case filterChild (named "colgroup") e' of + Just c -> filterChildren isColspec c + _ -> filterChildren isColspec e' + let isRow x = named "row" x || named "tr" x + headrows <- case filterChild (named "thead") e' of + Just h -> case filterChild isRow h of + Just x -> parseRow x + Nothing -> return [] + Nothing -> return [] + bodyrows <- case filterChild (named "tbody") e' of + Just b -> mapM parseRow + $ filterChildren isRow b + Nothing -> mapM parseRow + $ filterChildren isRow e' + let toAlignment c = case findAttr (unqual "align") c of + Just "left" -> AlignLeft + Just "right" -> AlignRight + Just "center" -> AlignCenter + _ -> AlignDefault + let toWidth c = case findAttr (unqual "colwidth") c of + Just w -> fromMaybe 0 + $ safeRead $ '0': filter (\x -> + isDigit x || x == '.') w + Nothing -> 0 :: Double + let numrows = case bodyrows of + [] -> 0 + xs -> maximum $ map length xs + let aligns = case colspecs of + [] -> replicate numrows AlignDefault + cs -> map toAlignment cs + let widths = case colspecs of + [] -> replicate numrows 0 + cs -> let ws = map toWidth cs + tot = sum ws + in if all (> 0) ws + then map (/ tot) ws + else replicate numrows 0 + let headrows' = if null headrows + then replicate numrows mempty + else headrows + return $ table caption (zip aligns widths) + headrows' bodyrows + isEntry x = named "entry" x || named "td" x || named "th" x + parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry + sect n = do isbook <- gets jatsBook + let n' = if isbook || n == 0 then n + 1 else n + headerText <- case filterChild (named "title") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "title")) of + Just t -> getInlines t + Nothing -> return mempty + oldN <- gets jatsSectionLevel + modify $ \st -> st{ jatsSectionLevel = n } + b <- getBlocks e + let ident = attrValue "id" e + modify $ \st -> st{ jatsSectionLevel = oldN } + return $ headerWith (ident,[],[]) n' headerText <> b + +getInlines :: PandocMonad m => Element -> JATS m Inlines +getInlines e' = (trimInlines . mconcat) <$> + mapM parseInline (elContent e') + +parseMetadata :: PandocMonad m => Element -> JATS m Blocks +parseMetadata e = do + getTitle e + getAuthors e + getAffiliations e + return mempty + +getTitle :: PandocMonad m => Element -> JATS m () +getTitle e = do + tit <- case filterElement (named "article-title") e of + Just s -> getInlines s + Nothing -> return mempty + subtit <- case filterElement (named "subtitle") e of + Just s -> (text ": " <>) <$> + getInlines s + Nothing -> return mempty + when (tit /= mempty) $ addMeta "title" tit + when (subtit /= mempty) $ addMeta "subtitle" subtit + +getAuthors :: PandocMonad m => Element -> JATS m () +getAuthors e = do + authors <- mapM getContrib $ filterElements + (\x -> named "contrib" x && + attrValue "contrib-type" x == "author") e + authorNotes <- mapM getInlines $ filterElements (named "author-notes") e + let authors' = case (reverse authors, authorNotes) of + ([], _) -> [] + (_, []) -> authors + (a:as, ns) -> reverse as ++ [a <> mconcat ns] + unless (null authors) $ addMeta "author" authors' + +getAffiliations :: PandocMonad m => Element -> JATS m () +getAffiliations x = do + affs <- mapM getInlines $ filterChildren (named "aff") x + unless (null affs) $ addMeta "institute" affs + +getContrib :: PandocMonad m => Element -> JATS m Inlines +getContrib x = do + given <- maybe (return mempty) getInlines + $ filterElement (named "given-names") x + family <- maybe (return mempty) getInlines + $ filterElement (named "surname") x + if given == mempty && family == mempty + then return mempty + else if given == mempty || family == mempty + then return $ given <> family + else return $ given <> space <> family + +parseRefList :: PandocMonad m => Element -> JATS m Blocks +parseRefList e = do + refs <- mapM parseRef $ filterChildren (named "ref") e + addMeta "references" refs + return mempty + +parseRef :: PandocMonad m + => Element -> JATS m (Map.Map String MetaValue) +parseRef e = do + let refId = text $ attrValue "id" e + let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) + case filterChild (named "element-citation") e of + Just c -> do + let refType = text $ + case attrValue "publication-type" c of + "journal" -> "article-journal" + x -> x + (refTitle, refContainerTitle) <- do + t <- getInlineText "article-title" c + ct <- getInlineText "source" c + if t == mempty + then return (ct, mempty) + else return (t, ct) + refLabel <- getInlineText "label" c + refYear <- getInlineText "year" c + refVolume <- getInlineText "volume" c + refFirstPage <- getInlineText "fpage" c + refLastPage <- getInlineText "lpage" c + refPublisher <- getInlineText "publisher-name" c + refPublisherPlace <- getInlineText "publisher-loc" c + let refPages = refFirstPage <> (if refLastPage == mempty + then mempty + else text "\x2013" <> refLastPage) + let personGroups' = filterChildren (named "person-group") c + let getName nm = do + given <- maybe (return mempty) getInlines + $ filterChild (named "given-names") nm + family <- maybe (return mempty) getInlines + $ filterChild (named "surname") nm + return $ toMetaValue $ Map.fromList [ + ("given", given) + , ("family", family) + ] + personGroups <- mapM (\pg -> + do names <- mapM getName + (filterChildren (named "name") pg) + return (attrValue "person-group-type" pg, + toMetaValue names)) + personGroups' + return $ Map.fromList $ + [ ("id", toMetaValue refId) + , ("type", toMetaValue refType) + , ("title", toMetaValue refTitle) + , ("container-title", toMetaValue refContainerTitle) + , ("publisher", toMetaValue refPublisher) + , ("publisher-place", toMetaValue refPublisherPlace) + , ("title", toMetaValue refTitle) + , ("issued", toMetaValue + $ Map.fromList [ + ("year", refYear) + ]) + , ("volume", toMetaValue refVolume) + , ("page", toMetaValue refPages) + , ("citation-label", toMetaValue refLabel) + ] ++ personGroups + Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty + -- TODO handle mixed-citation + +strContentRecursive :: Element -> String +strContentRecursive = strContent . + (\e' -> e'{ elContent = map elementToStr $ elContent e' }) + +elementToStr :: Content -> Content +elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing +elementToStr x = x + +parseInline :: PandocMonad m => Content -> JATS m Inlines +parseInline (Text (CData _ s _)) = return $ text s +parseInline (CRef ref) = + return $ maybe (text $ map toUpper ref) text $ lookupEntity ref +parseInline (Elem e) = + case qName (elName e) of + "italic" -> emph <$> innerInlines + "bold" -> strong <$> innerInlines + "strike" -> strikeout <$> innerInlines + "sub" -> subscript <$> innerInlines + "sup" -> superscript <$> innerInlines + "underline" -> underlineSpan <$> innerInlines + "break" -> return linebreak + "sc" -> smallcaps <$> innerInlines + + "code" -> codeWithLang + "monospace" -> codeWithLang + + "inline-graphic" -> getGraphic e + "disp-quote" -> do + qt <- gets jatsQuoteType + let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote + modify $ \st -> st{ jatsQuoteType = qt' } + contents <- innerInlines + modify $ \st -> st{ jatsQuoteType = qt } + return $ if qt == SingleQuote + then singleQuoted contents + else doubleQuoted contents + + "xref" -> do + ils <- innerInlines + let rid = attrValue "rid" e + let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e + let attr = (attrValue "id" e, [], maybeToList refType) + return $ if refType == Just ("ref-type","bibr") + then cite [Citation{ + citationId = rid + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0}] ils + else linkWith attr ('#' : rid) "" ils + "ext-link" -> do + ils <- innerInlines + let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + Just h -> h + _ -> '#' : attrValue "rid" e + let ils' = if ils == mempty then str href else ils + let attr = (attrValue "id" e, [], []) + return $ linkWith attr href title ils' + + "disp-formula" -> formula displayMath + "inline-formula" -> formula math + "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e + "tex-math" -> return . math $ strContent e + + "email" -> return $ link ("mailto:" ++ strContent e) "" + $ str $ strContent e + "uri" -> return $ link (strContent e) "" $ str $ strContent e + "fn" -> (note . mconcat) <$> + mapM parseBlock (elContent e) + _ -> innerInlines + where innerInlines = (trimInlines . mconcat) <$> + mapM parseInline (elContent e) + mathML x = + case readMathML . showElement $ everywhere (mkT removePrefix) x of + Left _ -> mempty + Right m -> writeTeX m + formula constructor = do + let whereToLook = fromMaybe e $ filterElement (named "alternatives") e + texMaths = map strContent $ + filterChildren (named "tex-math") whereToLook + mathMLs = map mathML $ + filterChildren isMathML whereToLook + return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs + + isMathML x = qName (elName x) == "math" && + qPrefix (elName x) == Just "mml" + removePrefix elname = elname { qPrefix = Nothing } + codeWithLang = do + let classes' = case attrValue "language" e of + "" -> [] + l -> [l] + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index edcf35e51..57d2803ba 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,6 +1,10 @@ -{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -27,218 +31,596 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Portability : portable Conversion of LaTeX to 'Pandoc' document. + -} module Text.Pandoc.Readers.LaTeX ( readLaTeX, + applyMacros, rawLaTeXInline, rawLaTeXBlock, inlineCommand, - handleIncludes + tokenize, + untokenize ) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Shared -import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, - mathDisplay, mathInline) -import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Char ( chr, ord, isLetter, isAlphaNum ) -import Control.Monad.Trans (lift) +import Control.Applicative (many, optional, (<|>)) import Control.Monad -import Text.Pandoc.Builder -import Control.Applicative ((<|>), many, optional) -import Data.Maybe (fromMaybe, maybeToList) -import System.Environment (getEnv) -import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) -import Data.List (intercalate) +import Control.Monad.Except (throwError) +import Control.Monad.Trans (lift) +import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower) +import Data.Default +import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M -import qualified Control.Exception as E -import Text.Pandoc.Highlighting (fromListingsLanguage) +import Data.Maybe (fromMaybe, maybeToList) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Safe (minimumDef) +import System.FilePath (addExtension, replaceExtension, takeExtension) +import Text.Pandoc.BCP47 (Lang (..), renderLang) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, + readFileFromDirs, report, setResourcePath, + setTranslations, translateTerm, trace) +import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError)) +import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) -import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), + Tok (..), TokType (..)) +import Text.Pandoc.Shared +import qualified Text.Pandoc.Translations as Translations +import Text.Pandoc.Walk +import Text.Parsec.Pos +import qualified Text.Pandoc.Builder as B --- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Either PandocError Pandoc -readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } +-- for debugging: +-- import Text.Pandoc.Extensions (getDefaultExtensions) +-- import Text.Pandoc.Class (runIOorExplode, PandocIO) +-- import Debug.Trace (traceShowId) -parseLaTeX :: LP Pandoc +-- | Parse LaTeX from string and return 'Pandoc' document. +readLaTeX :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> m Pandoc +readLaTeX opts ltx = do + parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" + (tokenize "source" (crFilter ltx)) + case parsed of + Right result -> return result + Left e -> throwError $ PandocParsecError (T.unpack ltx) e + +parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do bs <- blocks eof st <- getState - let meta = stateMeta st - let (Pandoc _ bs') = doc bs + let meta = sMeta st + let doc' = doc bs + let headerLevel (Header n _ _) = [n] + headerLevel _ = [] + let bottomLevel = minimumDef 1 $ query headerLevel doc' + let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils + adjustHeaders _ x = x + let (Pandoc _ bs') = + -- handle the case where you have \part or \chapter + (if bottomLevel < 1 + then walk (adjustHeaders (1 - bottomLevel)) + else id) $ + walk (resolveRefs (sLabels st)) doc' return $ Pandoc meta bs' -type LP = Parser String ParserState - -anyControlSeq :: LP String -anyControlSeq = do - char '\\' - next <- option '\n' anyChar - case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - -controlSeq :: String -> LP String -controlSeq name = try $ do - char '\\' - case name of - "" -> mzero - [c] | not (isLetter c) -> string [c] - cs -> string cs <* notFollowedBy letter <* optional sp - return name - -dimenarg :: LP String -dimenarg = try $ do - ch <- option "" $ string "=" - num <- many1 digit - dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - return $ ch ++ num ++ dim - -sp :: LP () -sp = whitespace <|> endline - -whitespace :: LP () -whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - -endline :: LP () -endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) +resolveRefs :: M.Map String [Inline] -> Inline -> Inline +resolveRefs labels x@(Link (ident,classes,kvs) _ _) = + case (lookup "reference-type" kvs, + lookup "reference" kvs) of + (Just "ref", Just lab) -> + case M.lookup lab labels of + Just txt -> Link (ident,classes,kvs) txt ('#':lab, "") + Nothing -> x + _ -> x +resolveRefs _ x = x + + +-- testParser :: LP PandocIO a -> Text -> IO a +-- testParser p t = do +-- res <- runIOorExplode (runParserT p defaultLaTeXState{ +-- sOptions = def{ readerExtensions = +-- enableExtension Ext_raw_tex $ +-- getDefaultExtensions "latex" }} "source" (tokenize "source" t)) +-- case res of +-- Left e -> error (show e) +-- Right r -> return r + +newtype HeaderNum = HeaderNum [Int] + deriving (Show) + +renderHeaderNum :: HeaderNum -> String +renderHeaderNum (HeaderNum xs) = + intercalate "." (map show xs) + +incrementHeaderNum :: Int -> HeaderNum -> HeaderNum +incrementHeaderNum level (HeaderNum ns) = HeaderNum $ + case reverse (take level (ns ++ repeat 0)) of + (x:xs) -> reverse (x+1 : xs) + [] -> [] -- shouldn't happen + +data LaTeXState = LaTeXState{ sOptions :: ReaderOptions + , sMeta :: Meta + , sQuoteContext :: QuoteContext + , sMacros :: M.Map Text Macro + , sContainers :: [String] + , sHeaders :: M.Map Inlines String + , sLogMessages :: [LogMessage] + , sIdentifiers :: Set.Set String + , sVerbatimMode :: Bool + , sCaption :: Maybe Inlines + , sInListItem :: Bool + , sInTableCell :: Bool + , sLastHeaderNum :: HeaderNum + , sLabels :: M.Map String [Inline] + , sToggles :: M.Map String Bool + } + deriving Show + +defaultLaTeXState :: LaTeXState +defaultLaTeXState = LaTeXState{ sOptions = def + , sMeta = nullMeta + , sQuoteContext = NoQuote + , sMacros = M.empty + , sContainers = [] + , sHeaders = M.empty + , sLogMessages = [] + , sIdentifiers = Set.empty + , sVerbatimMode = False + , sCaption = Nothing + , sInListItem = False + , sInTableCell = False + , sLastHeaderNum = HeaderNum [] + , sLabels = M.empty + , sToggles = M.empty + } + +instance PandocMonad m => HasQuoteContext LaTeXState m where + getQuoteContext = sQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = sQuoteContext oldState + setState oldState { sQuoteContext = context } + result <- parser + newState <- getState + setState newState { sQuoteContext = oldQuoteContext } + return result + +instance HasLogMessages LaTeXState where + addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st } + getLogMessages st = reverse $ sLogMessages st + +instance HasIdentifierList LaTeXState where + extractIdentifierList = sIdentifiers + updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st } + +instance HasIncludeFiles LaTeXState where + getIncludeFiles = sContainers + addIncludeFile f s = s{ sContainers = f : sContainers s } + dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } + +instance HasHeaderMap LaTeXState where + extractHeaderMap = sHeaders + updateHeaderMap f st = st{ sHeaders = f $ sHeaders st } + +instance HasMacros LaTeXState where + extractMacros st = sMacros st + updateMacros f st = st{ sMacros = f (sMacros st) } + +instance HasReaderOptions LaTeXState where + extractReaderOptions = sOptions + +instance HasMeta LaTeXState where + setMeta field val st = + st{ sMeta = setMeta field val $ sMeta st } + deleteMeta field st = + st{ sMeta = deleteMeta field $ sMeta st } + +instance Default LaTeXState where + def = defaultLaTeXState + +type LP m = ParserT [Tok] LaTeXState m + +withVerbatimMode :: PandocMonad m => LP m a -> LP m a +withVerbatimMode parser = do + updateState $ \st -> st{ sVerbatimMode = True } + result <- parser + updateState $ \st -> st{ sVerbatimMode = False } + return result + +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => LP m a -> ParserT String s m (a, String) +rawLaTeXParser parser = do + inp <- getInput + let toks = tokenize "source" $ T.pack inp + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + let rawparser = (,) <$> withRaw parser <*> getState + res <- lift $ runParserT rawparser lstate "chunk" toks + case res of + Left _ -> mzero + Right ((val, raw), st) -> do + updateState (updateMacros (sMacros st <>)) + rawstring <- takeP (T.length (untokenize raw)) + return (val, rawstring) + +applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => String -> ParserT String s m String +applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> + do let retokenize = doMacros 0 *> + (toksToString <$> many (satisfyTok (const True))) + pstate <- getState + let lstate = def{ sOptions = extractReaderOptions pstate + , sMacros = extractMacros pstate } + res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + case res of + Left e -> fail (show e) + Right s' -> return s' + +rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXBlock = do + lookAhead (try (char '\\' >> letter)) + -- we don't want to apply newly defined latex macros to their own + -- definitions: + snd <$> rawLaTeXParser macroDef + <|> ((snd <$> rawLaTeXParser (environment <|> blockCommand)) >>= applyMacros) + +rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) + => ParserT String s m String +rawLaTeXInline = do + lookAhead (try (char '\\' >> letter)) + rawLaTeXParser (inlineEnvironment <|> inlineCommand') >>= applyMacros . snd + +inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand = do + lookAhead (try (char '\\' >> letter)) + fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') + +tokenize :: SourceName -> Text -> [Tok] +tokenize sourcename = totoks (initialPos sourcename) + +totoks :: SourcePos -> Text -> [Tok] +totoks pos t = + case T.uncons t of + Nothing -> [] + Just (c, rest) + | c == '\n' -> + Tok pos Newline "\n" + : totoks (setSourceColumn (incSourceLine pos 1) 1) rest + | isSpaceOrTab c -> + let (sps, rest') = T.span isSpaceOrTab t + in Tok pos Spaces sps + : totoks (incSourceColumn pos (T.length sps)) + rest' + | isAlphaNum c -> + let (ws, rest') = T.span isAlphaNum t + in Tok pos Word ws + : totoks (incSourceColumn pos (T.length ws)) rest' + | c == '%' -> + let (cs, rest') = T.break (== '\n') rest + in Tok pos Comment ("%" <> cs) + : totoks (incSourceColumn pos (1 + T.length cs)) rest' + | c == '\\' -> + case T.uncons rest of + Nothing -> [Tok pos (CtrlSeq " ") "\\"] + Just (d, rest') + | isLetterOrAt d -> + -- \makeatletter is common in macro defs; + -- ideally we should make tokenization sensitive + -- to \makeatletter and \makeatother, but this is + -- probably best for now + let (ws, rest'') = T.span isLetterOrAt rest + (ss, rest''') = T.span isSpaceOrTab rest'' + in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss) + : totoks (incSourceColumn pos + (1 + T.length ws + T.length ss)) rest''' + | isSpaceOrTab d || d == '\n' -> + let (w1, r1) = T.span isSpaceOrTab rest + (w2, (w3, r3)) = case T.uncons r1 of + Just ('\n', r2) + -> (T.pack "\n", + T.span isSpaceOrTab r2) + _ -> (mempty, (mempty, r1)) + ws = "\\" <> w1 <> w2 <> w3 + in case T.uncons r3 of + Just ('\n', _) -> + Tok pos (CtrlSeq " ") ("\\" <> w1) + : totoks (incSourceColumn pos (T.length ws)) + r1 + _ -> + Tok pos (CtrlSeq " ") ws + : totoks (incSourceColumn pos (T.length ws)) + r3 + | otherwise -> + Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d]) + : totoks (incSourceColumn pos 2) rest' + | c == '#' -> + let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest + in case safeRead (T.unpack t1) of + Just i -> + Tok pos (Arg i) ("#" <> t1) + : totoks (incSourceColumn pos (1 + T.length t1)) t2 + Nothing -> + Tok pos Symbol "#" + : totoks (incSourceColumn pos 1) t2 + | c == '^' -> + case T.uncons rest of + Just ('^', rest') -> + case T.uncons rest' of + Just (d, rest'') + | isLowerHex d -> + case T.uncons rest'' of + Just (e, rest''') | isLowerHex e -> + Tok pos Esc2 (T.pack ['^','^',d,e]) + : totoks (incSourceColumn pos 4) rest''' + _ -> + Tok pos Esc1 (T.pack ['^','^',d]) + : totoks (incSourceColumn pos 3) rest'' + | d < '\128' -> + Tok pos Esc1 (T.pack ['^','^',d]) + : totoks (incSourceColumn pos 3) rest'' + _ -> Tok pos Symbol "^" : + Tok (incSourceColumn pos 1) Symbol "^" : + totoks (incSourceColumn pos 2) rest' + _ -> Tok pos Symbol "^" + : totoks (incSourceColumn pos 1) rest + | otherwise -> + Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest + +isSpaceOrTab :: Char -> Bool +isSpaceOrTab ' ' = True +isSpaceOrTab '\t' = True +isSpaceOrTab _ = False + +isLetterOrAt :: Char -> Bool +isLetterOrAt '@' = True +isLetterOrAt c = isLetter c isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' -tildeEscape :: LP Char -tildeEscape = try $ do - string "^^" - c <- satisfy (\x -> x >= '\0' && x <= '\128') - d <- if isLowerHex c - then option "" $ count 1 (satisfy isLowerHex) - else return "" - if null d - then case ord c of - x | x >= 64 && x <= 127 -> return $ chr (x - 64) - | otherwise -> return $ chr (x + 64) - else return $ chr $ read ('0':'x':c:d) - -comment :: LP () -comment = do - char '%' - skipMany (satisfy (/='\n')) - optional newline - return () +untokenize :: [Tok] -> Text +untokenize = mconcat . map untoken + +untoken :: Tok -> Text +untoken (Tok _ _ t) = t + +satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok +satisfyTok f = + try $ do + res <- tokenPrim (T.unpack . untoken) updatePos matcher + doMacros 0 -- apply macros on remaining input stream + return res + where matcher t | f t = Just t + | otherwise = Nothing + updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos + updatePos _spos _ (Tok pos _ _ : _) = pos + updatePos spos _ [] = incSourceColumn spos 1 + +doMacros :: PandocMonad m => Int -> LP m () +doMacros n = do + verbatimMode <- sVerbatimMode <$> getState + unless verbatimMode $ do + inp <- getInput + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros spos ("end" <> name) ts + Tok _ (CtrlSeq "expandafter") _ : t : ts + -> do setInput ts + doMacros n + getInput >>= setInput . combineTok t + Tok spos (CtrlSeq name) _ : ts + -> handleMacros spos name ts + _ -> return () + where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) + | T.all isLetterOrAt w = + Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts + where (x1, x2) = T.break isSpaceOrTab x + combineTok t ts = t:ts + handleMacros spos name ts = do + macros <- sMacros <$> getState + case M.lookup name macros of + Nothing -> return () + Just (Macro expansionPoint numargs optarg newtoks) -> do + setInput ts + let getarg = try $ spaces >> bracedOrToken + args <- case optarg of + Nothing -> count numargs getarg + Just o -> + (:) <$> option o bracketedToks + <*> count (numargs - 1) getarg + -- first boolean param is true if we're tokenizing + -- an argument (in which case we don't want to + -- expand #1 etc.) + let addTok False (Tok _ (Arg i) _) acc | i > 0 + , i <= numargs = + foldr (addTok True) acc (args !! (i - 1)) + -- add space if needed after control sequence + -- see #4007 + addTok _ (Tok _ (CtrlSeq x) txt) + acc@(Tok _ Word _ : _) + | not (T.null txt) && + isLetter (T.last txt) = + Tok spos (CtrlSeq x) (txt <> " ") : acc + addTok _ t acc = setpos spos t : acc + ts' <- getInput + setInput $ foldr (addTok False) ts' newtoks + case expansionPoint of + ExpandWhenUsed -> + if n > 20 -- detect macro expansion loops + then throwError $ PandocMacroLoop (T.unpack name) + else doMacros (n + 1) + ExpandWhenDefined -> return () + + +setpos :: SourcePos -> Tok -> Tok +setpos spos (Tok _ tt txt) = Tok spos tt txt + +anyControlSeq :: PandocMonad m => LP m Tok +anyControlSeq = satisfyTok isCtrlSeq + where isCtrlSeq (Tok _ (CtrlSeq _) _) = True + isCtrlSeq _ = False + +anySymbol :: PandocMonad m => LP m Tok +anySymbol = satisfyTok isSym + where isSym (Tok _ Symbol _) = True + isSym _ = False + +spaces :: PandocMonad m => LP m () +spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +spaces1 :: PandocMonad m => LP m () +spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline])) + +tokTypeIn :: [TokType] -> Tok -> Bool +tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes + +controlSeq :: PandocMonad m => Text -> LP m Tok +controlSeq name = satisfyTok isNamed + where isNamed (Tok _ (CtrlSeq n) _) = n == name + isNamed _ = False + +symbol :: PandocMonad m => Char -> LP m Tok +symbol c = satisfyTok isc + where isc (Tok _ Symbol d) = case T.uncons d of + Just (c',_) -> c == c' + _ -> False + isc _ = False + +symbolIn :: PandocMonad m => [Char] -> LP m Tok +symbolIn cs = satisfyTok isInCs + where isInCs (Tok _ Symbol d) = case T.uncons d of + Just (c,_) -> c `elem` cs + _ -> False + isInCs _ = False + +sp :: PandocMonad m => LP m () +sp = whitespace <|> endline -bgroup :: LP () +whitespace :: PandocMonad m => LP m () +whitespace = () <$ satisfyTok isSpaceTok + where isSpaceTok (Tok _ Spaces _) = True + isSpaceTok _ = False + +newlineTok :: PandocMonad m => LP m () +newlineTok = () <$ satisfyTok isNewlineTok + +isNewlineTok :: Tok -> Bool +isNewlineTok (Tok _ Newline _) = True +isNewlineTok _ = False + +comment :: PandocMonad m => LP m () +comment = () <$ satisfyTok isCommentTok + where isCommentTok (Tok _ Comment _) = True + isCommentTok _ = False + +anyTok :: PandocMonad m => LP m Tok +anyTok = satisfyTok (const True) + +endline :: PandocMonad m => LP m () +endline = try $ do + newlineTok + lookAhead anyTok + notFollowedBy blankline + +blankline :: PandocMonad m => LP m () +blankline = try $ skipMany whitespace *> newlineTok + +primEscape :: PandocMonad m => LP m Char +primEscape = do + Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2]) + case toktype of + Esc1 -> case T.uncons (T.drop 2 t) of + Just (c, _) + | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) + | otherwise -> return (chr (ord c + 64)) + Nothing -> fail "Empty content of Esc1" + Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Just x -> return (chr x) + Nothing -> fail $ "Could not read: " ++ T.unpack t + _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen + +bgroup :: PandocMonad m => LP m Tok bgroup = try $ do - skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) - () <$ char '{' - <|> () <$ controlSeq "bgroup" - <|> () <$ controlSeq "begingroup" - -egroup :: LP () -egroup = () <$ char '}' - <|> () <$ controlSeq "egroup" - <|> () <$ controlSeq "endgroup" - -grouped :: Monoid a => LP a -> LP a -grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup) - -braced :: LP String -braced = bgroup *> (concat <$> manyTill - ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) - <|> try (string "\\}") - <|> try (string "\\{") - <|> try (string "\\\\") - <|> ((\x -> "{" ++ x ++ "}") <$> braced) - <|> count 1 anyChar - ) egroup) - -bracketed :: Monoid a => LP a -> LP a -bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) - -mathDisplay :: LP String -> LP Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) - -mathInline :: LP String -> LP Inlines -mathInline p = math <$> (try p >>= applyMacros') - -mathChars :: LP String -mathChars = - concat <$> many (escapedChar - <|> (snd <$> withRaw braced) - <|> many1 (satisfy isOrdChar)) - where escapedChar = try $ do char '\\' - c <- anyChar - return ['\\',c] - isOrdChar '$' = False - isOrdChar '{' = False - isOrdChar '}' = False - isOrdChar '\\' = False - isOrdChar _ = True - -quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines -quoted' f starter ender = do - startchs <- starter - smart <- getOption readerSmart - if smart - then do - ils <- many (notFollowedBy ender >> inline) - (ender >> return (f (mconcat ils))) <|> - (<> mconcat ils) <$> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) - else lit startchs - -doubleQuote :: LP Inlines -doubleQuote = do - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') - -singleQuote :: LP Inlines -singleQuote = do - smart <- getOption readerSmart - if smart - then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - else str <$> many1 (oneOf "`\'‘’") - -inline :: LP Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> inlineText - <|> inlineCommand - <|> inlineEnvironment - <|> inlineGroup - <|> (char '-' *> option (str "-") - (char '-' *> option (str "–") (str "—" <$ char '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (string "''")) - <|> (str "”" <$ char '”') - <|> (str "’" <$ char '\'') - <|> (str "’" <$ char '’') - <|> (str "\160" <$ char '~') - <|> mathDisplay (string "$$" *> mathChars <* string "$$") - <|> mathInline (char '$' *> mathChars <* char '$') - <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str . (:[]) <$> tildeEscape) - <|> (str . (:[]) <$> oneOf "[]") - <|> (str . (:[]) <$> oneOf "#&~^'`\"[]") -- TODO print warning? - -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters - -inlines :: LP Inlines -inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) - -inlineGroup :: LP Inlines + skipMany sp + symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + +egroup :: PandocMonad m => LP m Tok +egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" + +grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a +grouped parser = try $ do + bgroup + -- first we check for an inner 'grouped', because + -- {{a,b}} should be parsed the same as {a,b} + try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' 1 + where braced' (n :: Int) = + handleEgroup n <|> handleBgroup n <|> handleOther n + handleEgroup n = do + t <- egroup + if n == 1 + then return [] + else (t:) <$> braced' (n - 1) + handleBgroup n = do + t <- bgroup + (t:) <$> braced' (n + 1) + handleOther n = do + t <- anyTok + (t:) <$> braced' n + +bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a +bracketed parser = try $ do + symbol '[' + mconcat <$> manyTill parser (symbol ']') + +dimenarg :: PandocMonad m => LP m Text +dimenarg = try $ do + ch <- option False $ True <$ symbol '=' + Tok _ _ s <- satisfyTok isWordTok + guard $ T.take 2 (T.reverse s) `elem` + ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + let num = T.take (T.length s - 2) s + guard $ T.length num > 0 + guard $ T.all isDigit num + return $ T.pack ['=' | ch] <> s + +-- inline elements: + +word :: PandocMonad m => LP m Inlines +word = (str . T.unpack . untoken) <$> satisfyTok isWordTok + +regularSymbol :: PandocMonad m => LP m Inlines +regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol + where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t + isRegularSymbol _ = False + isSpecial c = c `Set.member` specialChars + +specialChars :: Set.Set Char +specialChars = Set.fromList "#$%&~_^\\{}" + +isWordTok :: Tok -> Bool +isWordTok (Tok _ Word _) = True +isWordTok _ = False + +inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do ils <- grouped inline if isNull ils @@ -247,386 +629,19 @@ inlineGroup = do -- we need the span so we can detitlecase bibtex entries; -- we need to know when something is {C}apitalized -block :: LP Blocks -block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) - <|> environment - <|> macro - <|> blockCommand - <|> paragraph - <|> grouped block - <|> (mempty <$ char '&') -- loose & in table environment - - -blocks :: LP Blocks -blocks = mconcat <$> many block - -getRawCommand :: String -> LP String -getRawCommand name' = do - rawargs <- withRaw (many (try (optional sp *> opt)) *> - option "" (try (optional sp *> dimenarg)) *> - many braced) - return $ '\\' : name' ++ snd rawargs - -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v -lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l - -blockCommand :: LP Blocks -blockCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*" <* optional sp) - let name' = name ++ star - let raw = do - rawcommand <- getRawCommand name' - transformed <- applyMacros' rawcommand - guard $ transformed /= rawcommand - notFollowedBy $ parseFromString inlines transformed - parseFromString blocks transformed - lookupListDefault raw [name',name] blockCommands - -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" - --- eat an optional argument and one or more arguments in braces -ignoreInlines :: String -> (String, LP Inlines) -ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) - where optargs = skipopts *> skipMany (try $ optional sp *> braced) - contseq = '\\':name - doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> withRaw optargs) - -ignoreBlocks :: String -> (String, LP Blocks) -ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) - where optargs = skipopts *> skipMany (try $ optional sp *> braced) - contseq = '\\':name - doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> withRaw optargs) - -blockCommands :: M.Map String (LP Blocks) -blockCommands = M.fromList $ - [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) - , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) - , ("author", mempty <$ (skipopts *> authors)) - -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) - , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - -- sectioning - , ("chapter", updateState (\s -> s{ stateHasChapters = True }) - *> section nullAttr 0) - , ("chapter*", updateState (\s -> s{ stateHasChapters = True }) - *> section ("",["unnumbered"],[]) 0) - , ("section", section nullAttr 1) - , ("section*", section ("",["unnumbered"],[]) 1) - , ("subsection", section nullAttr 2) - , ("subsection*", section ("",["unnumbered"],[]) 2) - , ("subsubsection", section nullAttr 3) - , ("subsubsection*", section ("",["unnumbered"],[]) 3) - , ("paragraph", section nullAttr 4) - , ("paragraph*", section ("",["unnumbered"],[]) 4) - , ("subparagraph", section nullAttr 5) - , ("subparagraph*", section ("",["unnumbered"],[]) 5) - -- beamer slides - , ("frametitle", section nullAttr 3) - , ("framesubtitle", section nullAttr 4) - -- letters - , ("opening", (para . trimInlines) <$> (skipopts *> tok)) - , ("closing", skipopts *> closing) - -- - , ("hrule", pure horizontalRule) - , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> looseItem) - , ("documentclass", skipopts *> braced *> preamble) - , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> setCaption) - , ("PandocStartInclude", startInclude) - , ("PandocEndInclude", endInclude) - , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - ] ++ map ignoreBlocks - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" - , "bibliographystyle" - , "maketitle", "makeindex", "makeglossary" - , "addcontentsline", "addtocontents", "addtocounter" - -- \ignore{} is used conventionally in literate haskell for definitions - -- that are to be processed by the compiler but not printed. - , "ignore" - , "hyperdef" - , "markboth", "markright", "markleft" - , "newpage" - ] - -addMeta :: ToMetaValue a => String -> a -> LP () -addMeta field val = updateState $ \st -> - st{ stateMeta = addMetaField field val $ stateMeta st } - -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') - -setCaption :: LP Blocks -setCaption = do - ils <- tok - mblabel <- option Nothing $ - try $ spaces' >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("data-label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ stateCaption = Just ils' } - return mempty - -resetCaption :: LP () -resetCaption = updateState $ \st -> st{ stateCaption = Nothing } - -authors :: LP () -authors = try $ do - char '{' - let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> - (inline <|> mempty <$ blockCommand)) - -- skip e.g. \vspace{10pt} - auths <- sepBy oneAuthor (controlSeq "and") - char '}' - addMeta "author" (map trimInlines auths) - -section :: Attr -> Int -> LP Blocks -section (ident, classes, kvs) lvl = do - hasChapters <- stateHasChapters `fmap` getState - let lvl' = if hasChapters then lvl + 1 else lvl - skipopts - contents <- grouped inline - lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) - attr' <- registerHeader (lab, classes, kvs) contents - return $ headerWith attr' lvl' contents - -inlineCommand :: LP Inlines -inlineCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - guard $ not $ isBlockCommand name - parseRaw <- getOption readerParseRaw - star <- option "" (string "*") - let name' = name ++ star - let raw = do - rawargs <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = '\\' : name ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - if transformed /= rawcommand - then parseFromString inlines transformed - else if parseRaw - then return $ rawInline "latex" rawcommand - else return mempty - (lookupListDefault mzero [name',name] inlineCommands <* - optional (try (string "{}"))) - <|> raw - -unlessParseRaw :: LP () -unlessParseRaw = getOption readerParseRaw >>= guard . not - -isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` blockCommands - - -inlineEnvironments :: M.Map String (LP Inlines) -inlineEnvironments = M.fromList - [ ("displaymath", mathEnv id Nothing "displaymath") - , ("math", math <$> verbEnv "math") - , ("equation", mathEnv id Nothing "equation") - , ("equation*", mathEnv id Nothing "equation*") - , ("gather", mathEnv id (Just "gathered") "gather") - , ("gather*", mathEnv id (Just "gathered") "gather*") - , ("multline", mathEnv id (Just "gathered") "multline") - , ("multline*", mathEnv id (Just "gathered") "multline*") - , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") - , ("align", mathEnv id (Just "aligned") "align") - , ("align*", mathEnv id (Just "aligned") "align*") - , ("alignat", mathEnv id (Just "aligned") "alignat") - , ("alignat*", mathEnv id (Just "aligned") "alignat*") - ] - -inlineCommands :: M.Map String (LP Inlines) -inlineCommands = M.fromList $ - [ ("emph", extractSpaces emph <$> tok) - , ("textit", extractSpaces emph <$> tok) - , ("textsl", extractSpaces emph <$> tok) - , ("textsc", extractSpaces smallcaps <$> tok) - , ("sout", extractSpaces strikeout <$> tok) - , ("textsuperscript", extractSpaces superscript <$> tok) - , ("textsubscript", extractSpaces subscript <$> tok) - , ("textbackslash", lit "\\") - , ("backslash", lit "\\") - , ("slash", lit "/") - , ("textbf", extractSpaces strong <$> tok) - , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) - , ("ldots", lit "…") - , ("dots", lit "…") - , ("mdots", lit "…") - , ("sim", lit "~") - , ("label", unlessParseRaw >> (inBrackets <$> tok)) - , ("ref", unlessParseRaw >> (inBrackets <$> tok)) - , ("noindent", unlessParseRaw >> return mempty) - , ("textgreek", tok) - , ("sep", lit ",") - , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty - , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) - , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline braced) - , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) - , ("P", lit "¶") - , ("S", lit "§") - , ("$", lit "$") - , ("%", lit "%") - , ("&", lit "&") - , ("#", lit "#") - , ("_", lit "_") - , ("{", lit "{") - , ("}", lit "}") - -- old TeX commands - , ("em", extractSpaces emph <$> inlines) - , ("it", extractSpaces emph <$> inlines) - , ("sl", extractSpaces emph <$> inlines) - , ("bf", extractSpaces strong <$> inlines) - , ("rm", inlines) - , ("itshape", extractSpaces emph <$> inlines) - , ("slshape", extractSpaces emph <$> inlines) - , ("scshape", extractSpaces smallcaps <$> inlines) - , ("bfseries", extractSpaces strong <$> inlines) - , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", try $ tok >>= accent hungarumlaut) - , ("`", option (str "`") $ try $ tok >>= accent grave) - , ("'", option (str "'") $ try $ tok >>= accent acute) - , ("^", option (str "^") $ try $ tok >>= accent circ) - , ("~", option (str "~") $ try $ tok >>= accent tilde) - , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) - , (".", option (str ".") $ try $ tok >>= accent dot) - , ("=", option (str "=") $ try $ tok >>= accent macron) - , ("c", option (str "c") $ try $ tok >>= accent cedilla) - , ("v", option (str "v") $ try $ tok >>= accent hacek) - , ("u", option (str "u") $ try $ tok >>= accent breve) - , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) - , (",", pure mempty) - , ("@", pure mempty) - , (" ", lit "\160") - , ("ps", pure $ str "PS." <> space) - , ("TeX", lit "TeX") - , ("LaTeX", lit "LaTeX") - , ("bar", lit "|") - , ("textless", lit "<") - , ("textgreater", lit ">") - , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) - , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) - , ("verb", doverb) - , ("lstinline", skipopts *> doverb) - , ("Verb", doverb) - , ("texttt", (code . stringify . toList) <$> tok) - , ("url", (unescapeURL <$> braced) >>= \url -> - pure (link url "" (str url))) - , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> - tok >>= \lab -> - pure (link url "" lab)) - , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . removeDoubleQuotes <$> braced - mkImage options src) - , ("enquote", enquote) - , ("cite", citation "cite" NormalCitation False) - , ("Cite", citation "Cite" NormalCitation False) - , ("citep", citation "citep" NormalCitation False) - , ("citep*", citation "citep*" NormalCitation False) - , ("citeal", citation "citeal" NormalCitation False) - , ("citealp", citation "citealp" NormalCitation False) - , ("citealp*", citation "citealp*" NormalCitation False) - , ("autocite", citation "autocite" NormalCitation False) - , ("smartcite", citation "smartcite" NormalCitation False) - , ("footcite", inNote <$> citation "footcite" NormalCitation False) - , ("parencite", citation "parencite" NormalCitation False) - , ("supercite", citation "supercite" NormalCitation False) - , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) - , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) - , ("citeyear", citation "citeyear" SuppressAuthor False) - , ("autocite*", citation "autocite*" SuppressAuthor False) - , ("cite*", citation "cite*" SuppressAuthor False) - , ("parencite*", citation "parencite*" SuppressAuthor False) - , ("textcite", citation "textcite" AuthorInText False) - , ("citet", citation "citet" AuthorInText False) - , ("citet*", citation "citet*" AuthorInText False) - , ("citealt", citation "citealt" AuthorInText False) - , ("citealt*", citation "citealt*" AuthorInText False) - , ("textcites", citation "textcites" AuthorInText True) - , ("cites", citation "cites" NormalCitation True) - , ("autocites", citation "autocites" NormalCitation True) - , ("footcites", inNote <$> citation "footcites" NormalCitation True) - , ("parencites", citation "parencites" NormalCitation True) - , ("supercites", citation "supercites" NormalCitation True) - , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) - , ("Autocite", citation "Autocite" NormalCitation False) - , ("Smartcite", citation "Smartcite" NormalCitation False) - , ("Footcite", citation "Footcite" NormalCitation False) - , ("Parencite", citation "Parencite" NormalCitation False) - , ("Supercite", citation "Supercite" NormalCitation False) - , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) - , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) - , ("Citeyear", citation "Citeyear" SuppressAuthor False) - , ("Autocite*", citation "Autocite*" SuppressAuthor False) - , ("Cite*", citation "Cite*" SuppressAuthor False) - , ("Parencite*", citation "Parencite*" SuppressAuthor False) - , ("Textcite", citation "Textcite" AuthorInText False) - , ("Textcites", citation "Textcites" AuthorInText True) - , ("Cites", citation "Cites" NormalCitation True) - , ("Autocites", citation "Autocites" NormalCitation True) - , ("Footcites", citation "Footcites" NormalCitation True) - , ("Parencites", citation "Parencites" NormalCitation True) - , ("Supercites", citation "Supercites" NormalCitation True) - , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) - , ("citetext", complexNatbibCitation NormalCitation) - , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> - complexNatbibCitation AuthorInText) - <|> citation "citeauthor" AuthorInText False) - , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= - addMeta "nocite")) - ] ++ map ignoreInlines - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks: - [ "index" ] +doLHSverb :: PandocMonad m => LP m Inlines +doLHSverb = + (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -mkImage :: [(String, String)] -> String -> LP Inlines +mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines mkImage options src = do - let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") - _ -> (k, v) - let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let replaceTextwidth (k,v) = + case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth + $ filter (\(k,_) -> k `elem` ["width", "height"]) options let attr = ("",[], kvs) let alt = str "image" case takeExtension src of @@ -635,40 +650,164 @@ mkImage options src = do return $ imageWith attr (addExtension src defaultExt) "" alt _ -> return $ imageWith attr src "" alt -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." +doxspace :: PandocMonad m => LP m Inlines +doxspace = + (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" -enquote :: LP Inlines +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines +dosiunitx = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- tok + let emptyOr160 "" = "" + emptyOr160 _ = "\160" + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] + +lit :: String -> LP m Inlines +lit = pure . str + +removeDoubleQuotes :: Text -> Text +removeDoubleQuotes t = + Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" + +doubleQuote :: PandocMonad m => LP m Inlines +doubleQuote = + quoted' doubleQuoted (try $ count 2 $ symbol '`') + (void $ try $ count 2 $ symbol '\'') + <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`']) + (void $ try $ sequence [symbol '"', symbol '\'']) + +singleQuote :: PandocMonad m => LP m Inlines +singleQuote = + quoted' singleQuoted ((:[]) <$> symbol '`') + (try $ symbol '\'' >> + notFollowedBy (satisfyTok startsWithLetter)) + <|> quoted' singleQuoted ((:[]) <$> symbol '‘') + (try $ symbol '’' >> + notFollowedBy (satisfyTok startsWithLetter)) + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + +quoted' :: PandocMonad m + => (Inlines -> Inlines) + -> LP m [Tok] + -> LP m () + -> LP m Inlines +quoted' f starter ender = do + startchs <- (T.unpack . untokenize) <$> starter + smart <- extensionEnabled Ext_smart <$> getOption readerExtensions + if smart + then do + ils <- many (notFollowedBy ender >> inline) + (ender >> return (f (mconcat ils))) <|> + (<> mconcat ils) <$> + lit (case startchs of + "``" -> "“" + "`" -> "‘" + cs -> cs) + else lit startchs + +enquote :: PandocMonad m => LP m Inlines enquote = do skipopts - context <- stateQuoteContext <$> getState - if context == InDoubleQuote + quoteContext <- sQuoteContext <$> getState + if quoteContext == InDoubleQuote then singleQuoted <$> withQuoteContext InSingleQuote tok else doubleQuoted <$> withQuoteContext InDoubleQuote tok -doverb :: LP Inlines +doAcronym :: PandocMonad m => String -> LP m Inlines +doAcronym form = do + acro <- braced + return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), + ("acronym-form", "singular+" ++ form)]) + $ str $ toksToString acro] + +doAcronymPlural :: PandocMonad m => String -> LP m Inlines +doAcronymPlural form = do + acro <- braced + plural <- lit "s" + return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), + ("acronym-form", "plural+" ++ form)]) $ + mconcat [str $ toksToString acro, plural]] + +doverb :: PandocMonad m => LP m Inlines doverb = do - marker <- anyChar - code <$> manyTill (satisfy (/='\n')) (char marker) - -doLHSverb :: LP Inlines -doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + (code . T.unpack . untokenize) <$> + manyTill (verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok pos toktype txt) <- satisfyTok (not . isNewlineTok) + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) + : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp + return $ Tok pos toktype t1 + +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do + options <- option [] keyvals + let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + (codeWith ("",classes,[]) . T.unpack . untokenize) <$> + manyTill (verbTok stopchar) (symbol stopchar) + +keyval :: PandocMonad m => LP m (String, String) +keyval = try $ do + Tok _ Word key <- satisfyTok isWordTok + let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= "," + isSpecSym _ = False + optional sp + val <- option [] $ do + symbol '=' + optional sp + braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym + <|> anyControlSeq) + optional sp + optional (symbol ',') + optional sp + return (T.unpack key, T.unpack . untokenize $ val) -lit :: String -> LP Inlines -lit = pure . str +keyvals :: PandocMonad m => LP m [(String, String)] +keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') -accent :: (Char -> String) -> Inlines -> LP Inlines -accent f ils = +accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines +accent c f = try $ do + ils <- tok case toList ils of (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) - [] -> mzero + [Space] -> return $ str [c] + [] -> return $ str [c] _ -> return ils grave :: Char -> String @@ -855,6 +994,19 @@ hacek 'Z' = "Ž" hacek 'z' = "ž" hacek c = [c] +ogonek :: Char -> String +ogonek 'a' = "ą" +ogonek 'e' = "ę" +ogonek 'o' = "ǫ" +ogonek 'i' = "į" +ogonek 'u' = "ų" +ogonek 'A' = "Ą" +ogonek 'E' = "Ę" +ogonek 'I' = "Į" +ogonek 'O' = "Ǫ" +ogonek 'U' = "Ų" +ogonek c = [c] + breve :: Char -> String breve 'A' = "Ă" breve 'a' = "ă" @@ -870,368 +1022,1181 @@ breve 'U' = "Ŭ" breve 'u' = "ŭ" breve c = [c] -tok :: LP Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar +toksToString :: [Tok] -> String +toksToString = T.unpack . untokenize + +mathDisplay :: String -> Inlines +mathDisplay = displayMath . trim -opt :: LP Inlines -opt = bracketed inline +mathInline :: String -> Inlines +mathInline = math . trim -rawopt :: LP String +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + contents <- trim . toksToString <$> + many (notFollowedBy (symbol '$') >> anyTok) + if display + then + mathDisplay contents <$ try (symbol '$' >> symbol '$') + <|> (guard (null contents) >> return (mathInline "")) + else mathInline contents <$ symbol '$' + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: PandocMonad m => LP m [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> manyTill citationLabel egroup + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + +citationLabel :: PandocMonad m => LP m String +citationLabel = do + optional spaces + toksToString <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* optional spaces + <* optional (symbol ',') + <* optional spaces) + where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char] + +cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + +citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines +complexNatbibCitation mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` (symbol ';') + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ toksToString raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + +inlineCommand' :: PandocMonad m => LP m Inlines +inlineCommand' = try $ do + Tok _ (CtrlSeq name) cmd <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] -- check non-starred as fallback + let raw = do + guard $ isInlineCommand name || not (isBlockCommand name) + rawcommand <- getRawCommand name (cmd <> star) + (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand)) + <|> ignore rawcommand + lookupListDefault raw names inlineCommands + +tok :: PandocMonad m => LP m Inlines +tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' + where singleChar' = do + Tok _ _ t <- singleChar + return (str (T.unpack t)) + +singleChar :: PandocMonad m => LP m Tok +singleChar = try $ do + Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) + guard $ not $ toktype == Symbol && + T.any (`Set.member` specialChars) t + if T.length t > 1 + then do + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp + return $ Tok pos toktype t1 + else return $ Tok pos toktype t + +opt :: PandocMonad m => LP m Inlines +opt = bracketed inline <|> (str . T.unpack <$> rawopt) + +rawopt :: PandocMonad m => LP m Text rawopt = do - contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> - try (string "\\[") <|> rawopt) + inner <- untokenize <$> bracketedToks optional sp - return $ "[" ++ contents ++ "]" + return $ "[" <> inner <> "]" -skipopts :: LP () +skipopts :: PandocMonad m => LP m () skipopts = skipMany rawopt -- opts in angle brackets are used in beamer -rawangle :: LP () +rawangle :: PandocMonad m => LP m () rawangle = try $ do - char '<' - skipMany (noneOf ">") - char '>' - return () + symbol '<' + () <$ manyTill anyTok (symbol '>') -skipangles :: LP () +skipangles :: PandocMonad m => LP m () skipangles = skipMany rawangle -inlineText :: LP Inlines -inlineText = str <$> many1 inlineChar +ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore raw = do + pos <- getPosition + report $ SkippedContent raw pos + return mempty -inlineChar :: LP Char -inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" +withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) +withRaw parser = do + inp <- getInput + result <- parser + nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok) + let raw = takeWhile (/= nxt) inp + return (result, raw) -environment :: LP Blocks -environment = do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name environments - <|> rawEnv name +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +unescapeURL :: String -> String +unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) +unescapeURL (x:xs) = x:unescapeURL xs +unescapeURL [] = "" -inlineEnvironment :: LP Inlines +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ + "\\end{" ++ T.unpack y ++ "}" + +mathEnv :: PandocMonad m => Text -> LP m String +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ T.unpack $ untokenize res + +inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do controlSeq "begin" - name <- braced + name <- untokenize <$> braced M.findWithDefault mzero name inlineEnvironments -rawEnv :: String -> LP Blocks -rawEnv name = do - parseRaw <- getOption readerParseRaw - rawOptions <- mconcat <$> many rawopt - let addBegin x = "\\begin{" ++ name ++ "}" ++ rawOptions ++ x - if parseRaw - then (rawBlock "latex" . addBegin) <$> - (withRaw (env name blocks) >>= applyMacros' . snd) - else env name blocks - ----- - -type IncludeParser = ParserT String [String] IO String - --- | Replace "include" commands with file contents. -handleIncludes :: String -> IO (Either PandocError String) -handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s - -includeParser' :: IncludeParser -includeParser' = - concat <$> many (comment' <|> escaped' <|> blob' <|> include' - <|> startMarker' <|> endMarker' - <|> verbCmd' <|> verbatimEnv' <|> backslash') - -comment' :: IncludeParser -comment' = do - char '%' - xs <- manyTill anyChar newline - return ('%':xs ++ "\n") - -escaped' :: IncludeParser -escaped' = try $ string "\\%" <|> string "\\\\" - -verbCmd' :: IncludeParser -verbCmd' = fmap snd <$> - withRaw $ try $ do - string "\\verb" - c <- anyChar - manyTill anyChar (char c) - -verbatimEnv' :: IncludeParser -verbatimEnv' = fmap snd <$> - withRaw $ try $ do - string "\\begin" - name <- braced' - guard $ name `elem` ["verbatim", "Verbatim", "BVerbatim", - "lstlisting", "minted", "alltt", "comment"] - manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") - -blob' :: IncludeParser -blob' = try $ many1 (noneOf "\\%") - -backslash' :: IncludeParser -backslash' = string "\\" - -braced' :: IncludeParser -braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") + ] -maybeAddExtension :: String -> FilePath -> FilePath -maybeAddExtension ext fp = - if null (takeExtension fp) - then addExtension fp ext - else fp +inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) +inlineCommands = M.union inlineLanguageCommands $ M.fromList + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) + , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) + , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) + , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) + , ("texttt", ttfamily) + , ("sout", extractSpaces strikeout <$> tok) + , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer + , ("lq", return (str "‘")) + , ("rq", return (str "’")) + , ("textquoteleft", return (str "‘")) + , ("textquoteright", return (str "’")) + , ("textquotedblleft", return (str "“")) + , ("textquotedblright", return (str "”")) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) + , ("textbackslash", lit "\\") + , ("backslash", lit "\\") + , ("slash", lit "/") + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) + , ("ldots", lit "…") + , ("vdots", lit "\8942") + , ("dots", lit "…") + , ("mdots", lit "…") + , ("sim", lit "~") + , ("sep", lit ",") + , ("label", rawInlineOr "label" dolabel) + , ("ref", rawInlineOr "ref" $ doref "ref") + , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty + , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty + , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty + , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok) + , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . toksToString <$> braced) + , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) + , ("P", lit "¶") + , ("S", lit "§") + , ("$", lit "$") + , ("%", lit "%") + , ("&", lit "&") + , ("#", lit "#") + , ("_", lit "_") + , ("{", lit "{") + , ("}", lit "}") + -- old TeX commands + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) + , ("rm", inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) + , ("/", pure mempty) -- italic correction + , ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", accent '\779' hungarumlaut) + , ("`", accent '`' grave) + , ("'", accent '\'' acute) + , ("^", accent '^' circ) + , ("~", accent '~' tilde) + , ("\"", accent '\776' umlaut) + , (".", accent '\775' dot) + , ("=", accent '\772' macron) + , ("c", accent '\807' cedilla) + , ("v", accent 'ˇ' hacek) + , ("u", accent '\774' breve) + , ("k", accent '\808' ogonek) + , ("textogonekcentered", accent '\808' ogonek) + , ("i", lit "i") + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional opt + spaces)) + , (",", lit "\8198") + , ("@", pure mempty) + , (" ", lit "\160") + , ("ps", pure $ str "PS." <> space) + , ("TeX", lit "TeX") + , ("LaTeX", lit "LaTeX") + , ("bar", lit "|") + , ("textless", lit "<") + , ("textgreater", lit ">") + , ("thanks", skipopts >> note <$> grouped block) + , ("footnote", skipopts >> note <$> grouped block) + , ("verb", doverb) + , ("lstinline", dolstinline) + , ("Verb", doverb) + , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + pure (link url "" (str url))) + , ("href", (unescapeURL . toksToString <$> + braced <* optional sp) >>= \url -> + tok >>= \lab -> pure (link url "" lab)) + , ("includegraphics", do options <- option [] keyvals + src <- unescapeURL . T.unpack . + removeDoubleQuotes . untokenize <$> braced + mkImage options src) + , ("enquote", enquote) + , ("figurename", doTerm Translations.Figure) + , ("prefacename", doTerm Translations.Preface) + , ("refname", doTerm Translations.References) + , ("bibname", doTerm Translations.Bibliography) + , ("chaptername", doTerm Translations.Chapter) + , ("partname", doTerm Translations.Part) + , ("contentsname", doTerm Translations.Contents) + , ("listfigurename", doTerm Translations.ListOfFigures) + , ("listtablename", doTerm Translations.ListOfTables) + , ("indexname", doTerm Translations.Index) + , ("abstractname", doTerm Translations.Abstract) + , ("tablename", doTerm Translations.Table) + , ("enclname", doTerm Translations.Encl) + , ("ccname", doTerm Translations.Cc) + , ("headtoname", doTerm Translations.To) + , ("pagename", doTerm Translations.Page) + , ("seename", doTerm Translations.See) + , ("seealsoname", doTerm Translations.SeeAlso) + , ("proofname", doTerm Translations.Proof) + , ("glossaryname", doTerm Translations.Glossary) + , ("lstlistingname", doTerm Translations.Listing) + , ("cite", citation "cite" NormalCitation False) + , ("Cite", citation "Cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) + , ("footcite", inNote <$> citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", inNote <$> citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) + , ("Footcite", citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation NormalCitation) + , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> + complexNatbibCitation AuthorInText) + <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) + , ("hyperlink", hyperlink) + , ("hypertarget", hypertargetInline) + -- glossaries package + , ("gls", doAcronym "short") + , ("Gls", doAcronym "short") + , ("glsdesc", doAcronym "long") + , ("Glsdesc", doAcronym "long") + , ("GLSdesc", doAcronym "long") + , ("acrlong", doAcronym "long") + , ("Acrlong", doAcronym "long") + , ("acrfull", doAcronym "full") + , ("Acrfull", doAcronym "full") + , ("acrshort", doAcronym "abbrv") + , ("Acrshort", doAcronym "abbrv") + , ("glspl", doAcronymPlural "short") + , ("Glspl", doAcronymPlural "short") + , ("glsdescplural", doAcronymPlural "long") + , ("Glsdescplural", doAcronymPlural "long") + , ("GLSdescplural", doAcronymPlural "long") + -- acronyms package + , ("ac", doAcronym "short") + , ("acf", doAcronym "full") + , ("acs", doAcronym "abbrv") + , ("acp", doAcronymPlural "short") + , ("acfp", doAcronymPlural "full") + , ("acsp", doAcronymPlural "abbrv") + -- siuntix + , ("SI", dosiunitx) + -- hyphenat + , ("bshyp", lit "\\\173") + , ("fshyp", lit "/\173") + , ("dothyp", lit ".\173") + , ("colonhyp", lit ":\173") + , ("hyp", lit "-") + , ("nohyphens", tok) + , ("textnhtt", ttfamily) + , ("nhttfamily", ttfamily) + -- LaTeX colors + , ("textcolor", coloredInline "color") + , ("colorbox", coloredInline "background-color") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") + -- xspace + , ("xspace", doxspace) + -- etoolbox + , ("ifstrequal", ifstrequal) + , ("newtoggle", braced >>= newToggle) + , ("toggletrue", braced >>= setToggle True) + , ("togglefalse", braced >>= setToggle False) + , ("iftoggle", try $ ifToggle >> inline) + -- biblatex misc + , ("RN", romanNumeralUpper) + , ("Rn", romanNumeralLower) + -- babel + , ("foreignlanguage", foreignlanguage) + ] -include' :: IncludeParser -include' = do - fs' <- try $ do - char '\\' - name <- try (string "include") - <|> try (string "input") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> manyTill anyChar (char ']') - fs <- (map trim . splitBy (==',')) <$> braced' - return $ if name == "usepackage" - then map (maybeAddExtension ".sty") fs - else map (maybeAddExtension ".tex") fs - pos <- getPosition - containers <- getState - let fn = case containers of - (f':_) -> f' - [] -> "input" - -- now process each include file in order... - rest <- getInput - results' <- forM fs' (\f -> do - when (f `elem` containers) $ - fail "Include file loop!" - contents <- lift $ readTeXFile f - return $ "\\PandocStartInclude{" ++ f ++ "}" ++ - contents ++ "\\PandocEndInclude{" ++ - fn ++ "}{" ++ show (sourceLine pos) ++ "}{" - ++ show (sourceColumn pos) ++ "}") - setInput $ concat results' ++ rest - return "" - -startMarker' :: IncludeParser -startMarker' = try $ do - string "\\PandocStartInclude" - fn <- braced' - updateState (fn:) - setPosition $ newPos fn 1 1 - return $ "\\PandocStartInclude{" ++ fn ++ "}" - -endMarker' :: IncludeParser -endMarker' = try $ do - string "\\PandocEndInclude" - fn <- braced' - ln <- braced' - co <- braced' - updateState tail - setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) - return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++ - co ++ "}" - -readTeXFile :: FilePath -> IO String -readTeXFile f = do - texinputs <- E.catch (getEnv "TEXINPUTS") $ \(_ :: E.SomeException) -> - return "." - let ds = splitBy (==':') texinputs - readFileFromDirs ds f - -readFileFromDirs :: [FilePath] -> FilePath -> IO String -readFileFromDirs [] _ = return "" -readFileFromDirs (d:ds) f = - E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) -> - readFileFromDirs ds f - ----- - -keyval :: LP (String, String) -keyval = try $ do - key <- many1 alphaNum - val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') - skipMany spaceChar - optional (char ',') - skipMany spaceChar - return (key, val) +foreignlanguage :: PandocMonad m => LP m Inlines +foreignlanguage = do + babelLang <- T.unpack . untokenize <$> braced + case babelLangToBCP47 babelLang of + Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok + _ -> tok +inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) +inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 + where + mk (polyglossia, bcp47Func) = + ("text" <> T.pack polyglossia, inlineLanguage bcp47Func) + +inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines +inlineLanguage bcp47Func = do + o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + <$> rawopt + let lang = renderLang $ bcp47Func o + extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok + +hyperlink :: PandocMonad m => LP m Inlines +hyperlink = try $ do + src <- toksToString <$> braced + lab <- tok + return $ link ('#':src) "" lab + +hypertargetBlock :: PandocMonad m => LP m Blocks +hypertargetBlock = try $ do + ref <- toksToString <$> braced + bs <- grouped block + case toList bs of + [Header 1 (ident,_,_) _] | ident == ref -> return bs + _ -> return $ divWith (ref, [], []) bs + +hypertargetInline :: PandocMonad m => LP m Inlines +hypertargetInline = try $ do + ref <- toksToString <$> braced + ils <- grouped inline + return $ spanWith (ref, [], []) ils -keyvals :: LP [(String, String)] -keyvals = try $ char '[' *> manyTill keyval (char ']') +romanNumeralUpper :: (PandocMonad m) => LP m Inlines +romanNumeralUpper = + str . toRomanNumeral <$> romanNumeralArg -alltt :: String -> LP Blocks -alltt t = walk strToCode <$> parseFromString blocks - (substitute " " "\\ " $ substitute "%" "\\%" $ - intercalate "\\\\\n" $ lines t) - where strToCode (Str s) = Code nullAttr s - strToCode x = x +romanNumeralLower :: (PandocMonad m) => LP m Inlines +romanNumeralLower = + str . map toLower . toRomanNumeral <$> romanNumeralArg -rawLaTeXBlock :: LP String -rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) +romanNumeralArg :: (PandocMonad m) => LP m Int +romanNumeralArg = spaces *> (parser <|> inBraces) + where + inBraces = do + symbol '{' + spaces + res <- parser + spaces + symbol '}' + return res + parser = do + Tok _ Word s <- satisfyTok isWordTok + let (digits, rest) = T.span isDigit s + unless (T.null rest) $ + fail "Non-digits in argument to \\Rn or \\RN" + safeRead $ T.unpack digits + +newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a +newToggle name = do + updateState $ \st -> + st{ sToggles = M.insert (toksToString name) False (sToggles st) } + return mempty -rawLaTeXInline :: LP Inline -rawLaTeXInline = do - raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw +setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a +setToggle on name = do + updateState $ \st -> + st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) } + return mempty -addImageCaption :: Blocks -> LP Blocks -addImageCaption = walkM go - where go (Image attr alt (src,tit)) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:") - Nothing -> Image attr alt (src,tit) - go x = return x +ifToggle :: PandocMonad m => LP m () +ifToggle = do + name <- braced + spaces + yes <- braced + spaces + no <- braced + toggles <- sToggles <$> getState + inp <- getInput + let name' = toksToString name + case M.lookup name' toggles of + Just True -> setInput (yes ++ inp) + Just False -> setInput (no ++ inp) + Nothing -> do + pos <- getPosition + report $ UndefinedToggle name' pos + return () -addTableCaption :: Blocks -> LP Blocks -addTableCaption = walkM go - where go (Table c als ws hs rs) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Table (toList ils) als ws hs rs - Nothing -> Table c als ws hs rs - go x = return x +doTerm :: PandocMonad m => Translations.Term -> LP m Inlines +doTerm term = str <$> translateTerm term + +ifstrequal :: (PandocMonad m, Monoid a) => LP m a +ifstrequal = do + str1 <- tok + str2 <- tok + ifequal <- braced + ifnotequal <- braced + if str1 == str2 + then getInput >>= setInput . (ifequal ++) + else getInput >>= setInput . (ifnotequal ++) + return mempty -environments :: M.Map String (LP Blocks) -environments = M.fromList - [ ("document", env "document" blocks <* skipMany anyChar) - , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) - , ("letter", env "letter" letterContents) - , ("minipage", env "minipage" $ - skipopts *> spaces' *> optional braced *> spaces' *> blocks) - , ("figure", env "figure" $ - resetCaption *> skipopts *> blocks >>= addImageCaption) - , ("center", env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable False >>= addTableCaption) - , ("table", env "table" $ - resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable True) - , ("tabular", env "tabular" $ simpTable False) - , ("quote", blockQuote <$> env "quote" blocks) - , ("quotation", blockQuote <$> env "quotation" blocks) - , ("verse", blockQuote <$> env "verse" blocks) - , ("itemize", bulletList <$> listenv "itemize" (many item)) - , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", orderedList') - , ("alltt", alltt =<< verbEnv "alltt") - , ("code", guardEnabled Ext_literate_haskell *> - (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - verbEnv "code")) - , ("comment", mempty <$ verbEnv "comment") - , ("verbatim", codeBlock <$> verbEnv "verbatim") - , ("Verbatim", fancyverbEnv "Verbatim") - , ("BVerbatim", fancyverbEnv "BVerbatim") - , ("lstlisting", do options <- option [] keyvals - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - ++ maybeToList (lookup "language" options - >>= fromListingsLanguage) - let attr = (fromMaybe "" (lookup "label" options),classes,kvs) - codeBlockWith attr <$> verbEnv "lstlisting") - , ("minted", do options <- option [] keyvals - lang <- grouped (many1 $ satisfy (/='}')) - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ - [ "numberLines" | - lookup "linenos" options == Just "true" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv "minted") - , ("obeylines", parseFromString - (para . trimInlines . mconcat <$> many inline) =<< - intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv para Nothing "displaymath") - , ("equation", mathEnv para Nothing "equation") - , ("equation*", mathEnv para Nothing "equation*") - , ("gather", mathEnv para (Just "gathered") "gather") - , ("gather*", mathEnv para (Just "gathered") "gather*") - , ("multline", mathEnv para (Just "gathered") "multline") - , ("multline*", mathEnv para (Just "gathered") "multline*") - , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") - , ("align", mathEnv para (Just "aligned") "align") - , ("align*", mathEnv para (Just "aligned") "align*") - , ("alignat", mathEnv para (Just "aligned") "alignat") - , ("alignat*", mathEnv para (Just "aligned") "alignat*") +coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline stylename = do + skipopts + color <- braced + spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + +ttfamily :: PandocMonad m => LP m Inlines +ttfamily = (code . stringify . toList) <$> tok + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' ("\\" <> name') + else fallback + +getRawCommand :: PandocMonad m => Text -> Text -> LP m String +getRawCommand name txt = do + (_, rawargs) <- withRaw $ + case name of + "write" -> do + void $ satisfyTok isWordTok -- digits + void braced + "titleformat" -> do + void braced + skipopts + void $ count 4 braced + "def" -> + void $ manyTill anyTok braced + _ -> do + skipangles + skipopts + option "" (try (optional sp *> dimenarg)) + void $ many braced + return $ T.unpack (txt <> untokenize rawargs) + +isBlockCommand :: Text -> Bool +isBlockCommand s = + s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks)) + || s `Set.member` treatAsBlock + +treatAsBlock :: Set.Set Text +treatAsBlock = Set.fromList + [ "let", "def", "DeclareRobustCommand" + , "newcommand", "renewcommand" + , "newenvironment", "renewenvironment" + , "providecommand", "provideenvironment" + -- newcommand, etc. should be parsed by macroDef, but we need this + -- here so these aren't parsed as inline commands to ignore + , "special", "pdfannot", "pdfstringdef" + , "bibliographystyle" + , "maketitle", "makeindex", "makeglossary" + , "addcontentsline", "addtocontents", "addtocounter" + -- \ignore{} is used conventionally in literate haskell for definitions + -- that are to be processed by the compiler but not printed. + , "ignore" + , "hyperdef" + , "markboth", "markright", "markleft" + , "hspace", "vspace" + , "newpage" + , "clearpage" + , "pagebreak" + , "titleformat" + ] + +isInlineCommand :: Text -> Bool +isInlineCommand s = + s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines)) + || s `Set.member` treatAsInline + +treatAsInline :: Set.Set Text +treatAsInline = Set.fromList + [ "index" + , "hspace" + , "vspace" + , "noindent" + , "newpage" + , "clearpage" + , "pagebreak" ] -letterContents :: LP Blocks -letterContents = do - bs <- blocks - st <- getState - -- add signature (author) and address (title) - let addr = case lookupMeta "address" (stateMeta st) of - Just (MetaBlocks [Plain xs]) -> - para $ trimInlines $ fromList xs - _ -> mempty - return $ addr <> bs -- sig added by \closing +dolabel :: PandocMonad m => LP m Inlines +dolabel = do + v <- braced + let refstr = toksToString v + return $ spanWith (refstr,[],[("label", refstr)]) + $ inBrackets $ str $ toksToString v + +doref :: PandocMonad m => String -> LP m Inlines +doref cls = do + v <- braced + let refstr = toksToString v + return $ linkWith ("",[],[ ("reference-type", cls) + , ("reference", refstr)]) + ('#':refstr) + "" + (inBrackets $ str refstr) + +lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where lookupList l m = msum $ map (`M.lookup` m) l + +inline :: PandocMonad m => LP m Inlines +inline = (mempty <$ comment) + <|> (space <$ whitespace) + <|> (softbreak <$ endline) + <|> word + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + <|> (symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-'))) + <|> doubleQuote + <|> singleQuote + <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) + <|> (str "”" <$ symbol '”') + <|> (str "’" <$ symbol '\'') + <|> (str "’" <$ symbol '’') + <|> (str "\160" <$ symbol '~') + <|> dollarsMath + <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) + <|> (str . (:[]) <$> primEscape) + <|> regularSymbol + <|> (do res <- symbolIn "#^'`\"[]&" + pos <- getPosition + let s = T.unpack (untoken res) + report $ ParsingUnescaped s pos + return $ str s) + +inlines :: PandocMonad m => LP m Inlines +inlines = mconcat <$> many inline + +-- block elements: + +begin_ :: PandocMonad m => Text -> LP m () +begin_ t = try (do + controlSeq "begin" + spaces + txt <- untokenize <$> braced + guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}") + +end_ :: PandocMonad m => Text -> LP m () +end_ t = try (do + controlSeq "end" + spaces + txt <- untokenize <$> braced + guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}") + +preamble :: PandocMonad m => LP m Blocks +preamble = mempty <$ many preambleBlock + where preambleBlock = spaces1 + <|> void include + <|> void macroDef + <|> void blockCommand + <|> void braced + <|> (notFollowedBy (begin_ "document") >> void anyTok) + +paragraph :: PandocMonad m => LP m Blocks +paragraph = do + x <- trimInlines . mconcat <$> many1 inline + if x == mempty + then return mempty + else return $ para x + +include :: PandocMonad m => LP m Blocks +include = do + (Tok _ (CtrlSeq name) _) <- + controlSeq "include" <|> controlSeq "input" <|> + controlSeq "subfile" <|> controlSeq "usepackage" + skipMany opt + fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," . + untokenize) <$> braced + let fs' = if name == "usepackage" + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mapM_ (insertIncluded dirs) fs' + return mempty + +insertIncluded :: PandocMonad m + => [FilePath] + -> FilePath + -> LP m () +insertIncluded dirs f = do + pos <- getPosition + containers <- getIncludeFiles <$> getState + when (f `elem` containers) $ + throwError $ PandocParseError $ "Include file loop at " ++ show pos + updateState $ addIncludeFile f + mbcontents <- readFileFromDirs dirs f + contents <- case mbcontents of + Just s -> return s + Nothing -> do + report $ CouldNotLoadIncludeFile f pos + return "" + getInput >>= setInput . (tokenize f (T.pack contents) ++) + updateState dropLatestIncludeFile + +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp + +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } + +authors :: PandocMonad m => LP m () +authors = try $ do + bgroup + let oneAuthor = mconcat <$> + many1 (notFollowedBy' (controlSeq "and") >> + (inline <|> mempty <$ blockCommand)) + -- skip e.g. \vspace{10pt} + auths <- sepBy oneAuthor (controlSeq "and") + egroup + addMeta "author" (map trimInlines auths) + +macroDef :: PandocMonad m => LP m Blocks +macroDef = + mempty <$ ((commandDef <|> environmentDef) <* doMacros 0) + where commandDef = do + (name, macro') <- newcommand <|> letmacro <|> defmacro + guardDisabled Ext_latex_macros <|> + updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) + environmentDef = do + (name, macro1, macro2) <- newenvironment + guardDisabled Ext_latex_macros <|> + do updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +letmacro :: PandocMonad m => LP m (Text, Macro) +letmacro = do + controlSeq "let" + Tok _ (CtrlSeq name) _ <- anyControlSeq + optional $ symbol '=' + spaces + contents <- bracedOrToken + return (name, Macro ExpandWhenDefined 0 Nothing contents) + +defmacro :: PandocMonad m => LP m (Text, Macro) +defmacro = try $ do + controlSeq "def" + Tok _ (CtrlSeq name) _ <- anyControlSeq + numargs <- option 0 $ argSeq 1 + -- we use withVerbatimMode, because macros are to be expanded + -- at point of use, not point of definition + contents <- withVerbatimMode bracedOrToken + return (name, Macro ExpandWhenUsed numargs Nothing contents) + +-- Note: we don't yet support fancy things like #1.#2 +argSeq :: PandocMonad m => Int -> LP m Int +argSeq n = do + Tok _ (Arg i) _ <- satisfyTok isArgTok + guard $ i == n + argSeq (n+1) <|> return n + +isArgTok :: Tok -> Bool +isArgTok (Tok _ (Arg _) _) = True +isArgTok _ = False + +bracedOrToken :: PandocMonad m => LP m [Tok] +bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar)) + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" <|> + controlSeq "DeclareRobustCommand" + optional $ symbol '*' + Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents <- withVerbatimMode bracedOrToken + when (mtype == "newcommand") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos + Nothing -> return () + return (name, Macro ExpandWhenUsed numargs optarg contents) + +newenvironment :: PandocMonad m => LP m (Text, Macro, Macro) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + optional $ symbol '*' + spaces + name <- untokenize <$> braced + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + startcontents <- withVerbatimMode bracedOrToken + spaces + endcontents <- withVerbatimMode bracedOrToken + when (mtype == "newenvironment") $ do + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos + Nothing -> return () + return (name, Macro ExpandWhenUsed numargs optarg startcontents, + Macro ExpandWhenUsed 0 Nothing endcontents) + +bracketedToks :: PandocMonad m => LP m [Tok] +bracketedToks = do + symbol '[' + mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']') + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead (T.unpack ds) of + Just i -> return i + _ -> return 0 + +setCaption :: PandocMonad m => LP m Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ sCaption = Just ils' } + return mempty -closing :: LP Blocks +looseItem :: PandocMonad m => LP m Blocks +looseItem = do + inListItem <- sInListItem <$> getState + guard $ not inListItem + skipopts + return mempty + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing } + +section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks +section starred (ident, classes, kvs) lvl = do + skipopts + contents <- grouped inline + lab <- option ident $ + try (spaces >> controlSeq "label" + >> spaces >> toksToString <$> braced) + let classes' = if starred then "unnumbered" : classes else classes + unless starred $ do + hn <- sLastHeaderNum <$> getState + let num = incrementHeaderNum lvl hn + updateState $ \st -> st{ sLastHeaderNum = num } + updateState $ \st -> st{ sLabels = M.insert lab + [Str (renderHeaderNum num)] + (sLabels st) } + attr' <- registerHeader (lab, classes', kvs) contents + return $ headerWith attr' lvl contents + +blockCommand :: PandocMonad m => LP m Blocks +blockCommand = try $ do + Tok _ (CtrlSeq name) txt <- anyControlSeq + guard $ name /= "begin" && name /= "end" + star <- option "" ("*" <$ symbol '*' <* optional sp) + let name' = name <> star + let names = ordNub [name', name] + let rawDefiniteBlock = do + guard $ isBlockCommand name + rawBlock "latex" <$> getRawCommand name (txt <> star) + -- heuristic: if it could be either block or inline, we + -- treat it if block if we have a sequence of block + -- commands followed by a newline. But we stop if we + -- hit a \startXXX, since this might start a raw ConTeXt + -- environment (this is important because this parser is + -- used by the Markdown reader). + let startCommand = try $ do + Tok _ (CtrlSeq n) _ <- anyControlSeq + guard $ "start" `T.isPrefixOf` n + let rawMaybeBlock = try $ do + guard $ not $ isInlineCommand name + curr <- rawBlock "latex" <$> getRawCommand name (txt <> star) + rest <- many $ notFollowedBy startCommand *> blockCommand + lookAhead $ blankline <|> startCommand + return $ curr <> mconcat rest + let raw = rawDefiniteBlock <|> rawMaybeBlock + lookupListDefault raw names blockCommands + +closing :: PandocMonad m => LP m Blocks closing = do contents <- tok st <- getState let extractInlines (MetaBlocks [Plain ys]) = ys extractInlines (MetaBlocks [Para ys ]) = ys - extractInlines _ = [] - let sigs = case lookupMeta "author" (stateMeta st) of + extractInlines _ = [] + let sigs = case lookupMeta "author" (sMeta st) of Just (MetaList xs) -> para $ trimInlines $ fromList $ intercalate [LineBreak] $ map extractInlines xs _ -> mempty return $ para (trimInlines contents) <> sigs -item :: LP Blocks -item = blocks *> controlSeq "item" *> skipopts *> blocks - -looseItem :: LP Blocks -looseItem = do - ctx <- stateParserContext `fmap` getState - if ctx == ListItemState - then mzero - else return mempty - -descItem :: LP (Inlines, [Blocks]) -descItem = do - blocks -- skip blocks before item - controlSeq "item" - optional sp - ils <- opt - bs <- blocks - return (ils, [bs]) - -env :: String -> LP a -> LP a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name)) - <?> ("\\end{" ++ name ++ "}")) +blockCommands :: PandocMonad m => M.Map Text (LP m Blocks) +blockCommands = M.fromList + [ ("par", mempty <$ skipopts) + , ("parbox", skipopts >> braced >> grouped blocks) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) + , ("author", mempty <$ (skipopts *> authors)) + -- -- in letter class, temp. store address & sig as title, author + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) + , ("signature", mempty <$ (skipopts *> authors)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) + -- Koma-script metadata commands + , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication")) + -- sectioning + , ("part", section False nullAttr (-1)) + , ("part*", section True nullAttr (-1)) + , ("chapter", section False nullAttr 0) + , ("chapter*", section True ("",["unnumbered"],[]) 0) + , ("section", section False nullAttr 1) + , ("section*", section True ("",["unnumbered"],[]) 1) + , ("subsection", section False nullAttr 2) + , ("subsection*", section True ("",["unnumbered"],[]) 2) + , ("subsubsection", section False nullAttr 3) + , ("subsubsection*", section True ("",["unnumbered"],[]) 3) + , ("paragraph", section False nullAttr 4) + , ("paragraph*", section True ("",["unnumbered"],[]) 4) + , ("subparagraph", section False nullAttr 5) + , ("subparagraph*", section True ("",["unnumbered"],[]) 5) + -- beamer slides + , ("frametitle", section False nullAttr 3) + , ("framesubtitle", section False nullAttr 4) + -- letters + , ("opening", (para . trimInlines) <$> (skipopts *> tok)) + , ("closing", skipopts *> closing) + -- memoir + , ("plainbreak", braced >> pure horizontalRule) + , ("plainbreak*", braced >> pure horizontalRule) + , ("fancybreak", braced >> pure horizontalRule) + , ("fancybreak*", braced >> pure horizontalRule) + , ("plainfancybreak", braced >> braced >> braced >> pure horizontalRule) + , ("plainfancybreak*", braced >> braced >> braced >> pure horizontalRule) + , ("pfbreak", pure horizontalRule) + , ("pfbreak*", pure horizontalRule) + -- + , ("hrule", pure horizontalRule) + , ("strut", pure mempty) + , ("rule", skipopts *> tok *> tok *> pure horizontalRule) + , ("item", looseItem) + , ("documentclass", skipopts *> braced *> preamble) + , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) + , ("caption", skipopts *> setCaption) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs . toksToString)) + -- includes + , ("lstinputlisting", inputListing) + , ("graphicspath", graphicsPath) + -- polyglossia + , ("setdefaultlanguage", setDefaultLanguage) + , ("setmainlanguage", setDefaultLanguage) + -- hyperlink + , ("hypertarget", hypertargetBlock) + -- LaTeX colors + , ("textcolor", coloredBlock "color") + , ("colorbox", coloredBlock "background-color") + ] + + +environments :: PandocMonad m => M.Map Text (LP m Blocks) +environments = M.fromList + [ ("document", env "document" blocks) + , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) + , ("letter", env "letter" letterContents) + , ("minipage", env "minipage" $ + skipopts *> spaces *> optional braced *> spaces *> blocks) + , ("figure", env "figure" $ skipopts *> figure) + , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) + , ("center", env "center" blocks) + , ("longtable", env "longtable" $ + resetCaption *> simpTable "longtable" False >>= addTableCaption) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) + , ("tabular*", env "tabular*" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", orderedList') + , ("alltt", alltt <$> env "alltt" blocks) + , ("code", guardEnabled Ext_literate_haskell *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("comment", mempty <$ verbEnv "comment") + , ("verbatim", codeBlock <$> verbEnv "verbatim") + , ("Verbatim", fancyverbEnv "Verbatim") + , ("BVerbatim", fancyverbEnv "BVerbatim") + , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals + codeBlockWith attr <$> verbEnv "lstlisting") + , ("minted", minted) + , ("obeylines", obeylines) + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") + -- etoolbox + , ("ifstrequal", ifstrequal) + , ("newtoggle", braced >>= newToggle) + , ("toggletrue", braced >>= setToggle True) + , ("togglefalse", braced >>= setToggle False) + , ("iftoggle", try $ ifToggle >> block) + ] + +environment :: PandocMonad m => LP m Blocks +environment = do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name environments + <|> rawEnv name -listenv :: String -> LP a -> LP a -listenv name p = try $ do - oldCtx <- stateParserContext `fmap` getState - updateState $ \st -> st{ stateParserContext = ListItemState } - res <- env name p - updateState $ \st -> st{ stateParserContext = oldCtx } - return res +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name -mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a -mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" +rawEnv :: PandocMonad m => Text -> LP m Blocks +rawEnv name = do + exts <- getOption readerExtensions + let parseRaw = extensionEnabled Ext_raw_tex exts + rawOptions <- mconcat <$> many rawopt + let beginCommand = "\\begin{" <> name <> "}" <> rawOptions + pos1 <- getPosition + (bs, raw) <- withRaw $ env name blocks + if parseRaw + then return $ rawBlock "latex" + $ T.unpack $ beginCommand <> untokenize raw + else do + report $ SkippedContent (T.unpack beginCommand) pos1 + pos2 <- getPosition + report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + return bs + +rawVerbEnv :: PandocMonad m => Text -> LP m Blocks +rawVerbEnv name = do + pos <- getPosition + (_, raw) <- withRaw $ verbEnv name + let raw' = "\\begin{tikzpicture}" ++ toksToString raw + exts <- getOption readerExtensions + let parseRaw = extensionEnabled Ext_raw_tex exts + if parseRaw + then return $ rawBlock "latex" raw' + else do + report $ SkippedContent raw' pos + return mempty -verbEnv :: String -> LP String -verbEnv name = do +verbEnv :: PandocMonad m => Text -> LP m String +verbEnv name = withVerbatimMode $ do skipopts optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - res <- manyTill anyChar endEnv - return $ stripTrailingNewlines res + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ toksToString res -fancyverbEnv :: String -> LP Blocks +fancyverbEnv :: PandocMonad m => Text -> LP m Blocks fancyverbEnv name = do options <- option [] keyvals let kvs = [ (if k == "firstnumber" @@ -1242,142 +2207,176 @@ fancyverbEnv name = do let attr = ("",classes,kvs) codeBlockWith attr <$> verbEnv name -orderedList' :: LP Blocks -orderedList' = do - optional sp - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ char '[' *> anyOrderedListMarker <* char ']' - spaces - optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced - spaces - start <- option 1 $ try $ do controlSeq "setcounter" - grouped (string "enum" *> many1 (oneOf "iv")) - optional sp - num <- grouped (many1 digit) - spaces - return (read num + 1 :: Int) - bs <- listenv "enumerate" (many item) - return $ orderedListWith (start, style, delim) bs - -paragraph :: LP Blocks -paragraph = do - x <- trimInlines . mconcat <$> many1 inline - if x == mempty - then return mempty - else return $ para x - -preamble :: LP Blocks -preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = void comment - <|> void sp - <|> void blanklines - <|> void macro - <|> void blockCommand - <|> void anyControlSeq - <|> void braced - <|> void anyChar - -------- +obeylines :: PandocMonad m => LP m Blocks +obeylines = + para . fromList . removeLeadingTrailingBreaks . + walk softBreakToHard . toList <$> env "obeylines" inlines + where softBreakToHard SoftBreak = LineBreak + softBreakToHard x = x + removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak . + reverse . dropWhile isLineBreak + isLineBreak LineBreak = True + isLineBreak _ = False + +minted :: PandocMonad m => LP m Blocks +minted = do + options <- option [] keyvals + lang <- toksToString <$> braced + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + let classes = [ lang | not (null lang) ] ++ + [ "numberLines" | + lookup "linenos" options == Just "true" ] + let attr = ("",classes,kvs) + codeBlockWith attr <$> verbEnv "minted" --- citations +letterContents :: PandocMonad m => LP m Blocks +letterContents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case lookupMeta "address" (sMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty + return $ addr <> bs -- sig added by \closing -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] +figure :: PandocMonad m => LP m Blocks +figure = try $ do + resetCaption + blocks >>= addImageCaption -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] +addImageCaption :: PandocMonad m => Blocks -> LP m Blocks +addImageCaption = walkM go + where go (Image attr alt (src,tit)) + | not ("fig:" `isPrefixOf` tit) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Image attr (toList ils) (src, "fig:" ++ tit) + Nothing -> Image attr alt (src,tit) + go x = return x -simpleCiteArgs :: LP [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - char '{' - optional sp - keys <- manyTill citationLabel (char '}') - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys +coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock stylename = try $ do + skipopts + color <- braced + notFollowedBy (grouped inline) + let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + constructor <$> grouped block + +graphicsPath :: PandocMonad m => LP m Blocks +graphicsPath = do + ps <- map toksToString <$> (bgroup *> manyTill braced egroup) + getResourcePath >>= setResourcePath . (++ ps) + return mempty -citationLabel :: LP String -citationLabel = optional sp *> - (many1 (satisfy isBibtexKeyChar) - <* optional sp - <* optional (char ',') - <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') -cites :: CitationMode -> Bool -> LP [Citation] -cites mode multi = try $ do - cits <- if multi - then many1 simpleCiteArgs - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs +alltt :: Blocks -> Blocks +alltt = walk strToCode + where strToCode (Str s) = Code nullAttr s + strToCode Space = RawInline (Format "latex") "\\ " + strToCode SoftBreak = LineBreak + strToCode x = x -citation :: String -> CitationMode -> Bool -> LP Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) +parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions options = + let kvs = [ (if k == "firstnumber" + then "startFrom" + else k, v) | (k,v) <- options ] + classes = [ "numberLines" | + lookup "numbers" options == Just "left" ] + ++ maybeToList (lookup "language" options + >>= fromListingsLanguage) + in (fromMaybe "" (lookup "label" options), classes, kvs) -complexNatbibCitation :: CitationMode -> LP Inlines -complexNatbibCitation mode = try $ do - let ils = (toList . trimInlines . mconcat) <$> - many (notFollowedBy (oneOf "\\};") >> inline) - let parseOne = try $ do - skipSpaces - pref <- ils - cit' <- inline -- expect a citation - let citlist = toList cit' - cits' <- case citlist of - [Cite cs _] -> return cs - _ -> mzero - suff <- ils - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff cits' - (c:cits, raw) <- withRaw $ grouped parseOne - return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ raw) +inputListing :: PandocMonad m => LP m Blocks +inputListing = do + pos <- getPosition + options <- option [] keyvals + f <- filter (/='"') . toksToString <$> braced + dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs f + codeLines <- case mbCode of + Just s -> return $ lines s + Nothing -> do + report $ CouldNotLoadIncludeFile f pos + return [] + let (ident,classes,kvs) = parseListingsOptions options + let language = case lookup "language" options >>= fromListingsLanguage of + Just l -> [l] + Nothing -> take 1 $ languagesByExtension (takeExtension f) + let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead + let lastline = fromMaybe (length codeLines) $ + lookup "lastline" options >>= safeRead + let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + drop (firstline - 1) codeLines + return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents + +-- lists + +item :: PandocMonad m => LP m Blocks +item = void blocks *> controlSeq "item" *> skipopts *> blocks + +descItem :: PandocMonad m => LP m (Inlines, [Blocks]) +descItem = do + blocks -- skip blocks before item + controlSeq "item" + optional sp + ils <- opt + bs <- blocks + return (ils, [bs]) --- tables +listenv :: PandocMonad m => Text -> LP m a -> LP m a +listenv name p = try $ do + oldInListItem <- sInListItem `fmap` getState + updateState $ \st -> st{ sInListItem = True } + res <- env name p + updateState $ \st -> st{ sInListItem = oldInListItem } + return res -parseAligns :: LP [Alignment] -parseAligns = try $ do - char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) - maybeBar - let cAlign = AlignCenter <$ char 'c' - let lAlign = AlignLeft <$ char 'l' - let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ (char 'p' >> braced) - let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign - aligns' <- sepEndBy alignChar maybeBar +orderedList' :: PandocMonad m => LP m Blocks +orderedList' = try $ do spaces - char '}' + let markerSpec = do + symbol '[' + ts <- toksToString <$> manyTill anyTok (symbol ']') + case runParser anyOrderedListMarker def "option" ts of + Right r -> return r + Left _ -> do + pos <- getPosition + report $ SkippedContent ("[" ++ ts ++ "]") pos + return (1, DefaultStyle, DefaultDelim) + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces - return aligns' + optional $ try $ controlSeq "setlength" + *> grouped (count 1 $ controlSeq "itemindent") + *> braced + spaces + start <- option 1 $ try $ do pos <- getPosition + controlSeq "setcounter" + ctr <- toksToString <$> braced + guard $ "enum" `isPrefixOf` ctr + guard $ all (`elem` ['i','v']) (drop 4 ctr) + optional sp + num <- toksToString <$> braced + case safeRead num of + Just i -> return (i + 1 :: Int) + Nothing -> do + report $ SkippedContent + ("\\setcounter{" ++ ctr ++ + "}{" ++ num ++ "}") pos + return 1 + bs <- listenv "enumerate" (many item) + return $ orderedListWith (start, style, delim) bs + +-- tables -hline :: LP () +hline :: PandocMonad m => LP m () hline = try $ do - spaces' + spaces controlSeq "hline" <|> -- booktabs rules: controlSeq "toprule" <|> @@ -1385,80 +2384,312 @@ hline = try $ do controlSeq "midrule" <|> controlSeq "endhead" <|> controlSeq "endfirsthead" - spaces' - optional $ bracketed (many1 (satisfy (/=']'))) + spaces + optional opt return () -lbreak :: LP () -lbreak = () <$ try (spaces' *> - (controlSeq "\\" <|> controlSeq "tabularnewline") <* - spaces') - -amp :: LP () -amp = () <$ try (spaces' *> char '&' <* spaces') - -parseTableRow :: Int -- ^ number of columns - -> LP [Blocks] -parseTableRow cols = try $ do - let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline - let minipage = try $ controlSeq "begin" *> string "{minipage}" *> - env "minipage" - (skipopts *> spaces' *> optional braced *> spaces' *> blocks) - let tableCell = minipage <|> - ((plain . trimInlines . mconcat) <$> many tableCellInline) - cells' <- sepBy1 tableCell amp - let numcells = length cells' - guard $ numcells <= cols && numcells >= 1 - guard $ cells' /= [mempty] - -- note: a & b in a three-column table leaves an empty 3rd cell: - let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces' - return cells'' +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' -spaces' :: LP () -spaces' = spaces *> skipMany (comment *> spaces) +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + _ -> return () -simpTable :: Bool -> LP Blocks -simpTable hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces' >> tok) +parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] +parseAligns = try $ do + let maybeBar = skipMany $ + sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced + let colWidth = try $ do + symbol '{' + ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + spaces + symbol '}' + case safeRead ds of + Just w -> return w + Nothing -> return 0.0 + let alignSpec = do + pref <- option [] alignPrefix + spaces + al <- alignChar + width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + pos <- getPosition + report $ SkippedContent s pos + return 0.0) + spaces + suff <- option [] alignSuffix + return (al, width, (pref, suff)) + let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro + symbol '*' + spaces + ds <- trim . toksToString <$> braced + spaces + spec <- braced + case safeRead ds of + Just n -> + getInput >>= setInput . (mconcat (replicate n spec) ++) + Nothing -> fail $ "Could not parse " ++ ds ++ " as number" + bgroup + spaces + maybeBar + aligns' <- many $ try $ spaces >> optional starAlign >> + (alignSpec <* maybeBar) + spaces + egroup + spaces + return aligns' + +parseTableRow :: PandocMonad m + => Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes + -> LP m [Blocks] +parseTableRow envname prefsufs = do + notFollowedBy (spaces *> end_ envname) + let cols = length prefsufs + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- many (notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + >> anyTok) + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff + rawcells <- mapM celltoks prefsufs + oldInput <- getInput + cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells + setInput oldInput + spaces + let numcells = length cells + guard $ numcells <= cols && numcells >= 1 + guard $ cells /= [mempty] + -- note: a & b in a three-column table leaves an empty 3rd cell: + return $ cells ++ replicate (cols - numcells) mempty + +parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell = do + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + updateState $ \st -> st{ sInTableCell = True } + cells <- plainify <$> blocks + updateState $ \st -> st{ sInTableCell = False } + return cells + +simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks +simpTable envname hasWidthParameter = try $ do + when hasWidthParameter $ () <$ (spaces >> tok) skipopts - aligns <- parseAligns - let cols = length aligns + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces skipMany hline - spaces' - header' <- option [] $ try (parseTableRow cols <* lbreak <* many1 hline) - spaces' - rows <- sepEndBy (parseTableRow cols) (lbreak <* optional (skipMany hline)) - spaces' + spaces + header' <- option [] $ try (parseTableRow envname prefsufs <* + lbreak <* many1 hline) + spaces + rows <- sepEndBy (parseTableRow envname prefsufs) + (lbreak <* optional (skipMany hline)) + spaces optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak - spaces' + spaces let header'' = if null header' then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end - return $ table mempty (zip aligns (repeat 0)) header'' rows + return $ table mempty (zip aligns widths) header'' rows -startInclude :: LP Blocks -startInclude = do - fn <- braced - setPosition $ newPos fn 1 1 - return mempty +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- sCaption <$> getState + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs + go x = return x -endInclude :: LP Blocks -endInclude = do - fn <- braced - ln <- braced - co <- braced - setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) - return mempty -removeDoubleQuotes :: String -> String -removeDoubleQuotes ('"':xs) = - case reverse xs of - '"':ys -> reverse ys - _ -> '"':xs -removeDoubleQuotes xs = xs +block :: PandocMonad m => LP m Blocks +block = do + res <- (mempty <$ spaces1) + <|> environment + <|> include + <|> macroDef + <|> blockCommand + <|> paragraph + <|> grouped block + trace (take 60 $ show $ B.toList res) + return res + +blocks :: PandocMonad m => LP m Blocks +blocks = mconcat <$> many block + +setDefaultLanguage :: PandocMonad m => LP m Blocks +setDefaultLanguage = do + o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + <$> rawopt + polylang <- toksToString <$> braced + case M.lookup polylang polyglossiaLangToBCP47 of + Nothing -> return mempty -- TODO mzero? warning? + Just langFunc -> do + let l = langFunc o + setTranslations l + updateState $ setMeta "lang" $ str (renderLang l) + return mempty + +polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 = M.fromList + [ ("arabic", \o -> case filter (/=' ') o of + "locale=algeria" -> Lang "ar" "" "DZ" [] + "locale=mashriq" -> Lang "ar" "" "SY" [] + "locale=libya" -> Lang "ar" "" "LY" [] + "locale=morocco" -> Lang "ar" "" "MA" [] + "locale=mauritania" -> Lang "ar" "" "MR" [] + "locale=tunisia" -> Lang "ar" "" "TN" [] + _ -> Lang "ar" "" "" []) + , ("german", \o -> case filter (/=' ') o of + "spelling=old" -> Lang "de" "" "DE" ["1901"] + "variant=austrian,spelling=old" + -> Lang "de" "" "AT" ["1901"] + "variant=austrian" -> Lang "de" "" "AT" [] + "variant=swiss,spelling=old" + -> Lang "de" "" "CH" ["1901"] + "variant=swiss" -> Lang "de" "" "CH" [] + _ -> Lang "de" "" "" []) + , ("lsorbian", \_ -> Lang "dsb" "" "" []) + , ("greek", \o -> case filter (/=' ') o of + "variant=poly" -> Lang "el" "" "polyton" [] + "variant=ancient" -> Lang "grc" "" "" [] + _ -> Lang "el" "" "" []) + , ("english", \o -> case filter (/=' ') o of + "variant=australian" -> Lang "en" "" "AU" [] + "variant=canadian" -> Lang "en" "" "CA" [] + "variant=british" -> Lang "en" "" "GB" [] + "variant=newzealand" -> Lang "en" "" "NZ" [] + "variant=american" -> Lang "en" "" "US" [] + _ -> Lang "en" "" "" []) + , ("usorbian", \_ -> Lang "hsb" "" "" []) + , ("latin", \o -> case filter (/=' ') o of + "variant=classic" -> Lang "la" "" "" ["x-classic"] + _ -> Lang "la" "" "" []) + , ("slovenian", \_ -> Lang "sl" "" "" []) + , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) + , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) + , ("afrikaans", \_ -> Lang "af" "" "" []) + , ("amharic", \_ -> Lang "am" "" "" []) + , ("assamese", \_ -> Lang "as" "" "" []) + , ("asturian", \_ -> Lang "ast" "" "" []) + , ("bulgarian", \_ -> Lang "bg" "" "" []) + , ("bengali", \_ -> Lang "bn" "" "" []) + , ("tibetan", \_ -> Lang "bo" "" "" []) + , ("breton", \_ -> Lang "br" "" "" []) + , ("catalan", \_ -> Lang "ca" "" "" []) + , ("welsh", \_ -> Lang "cy" "" "" []) + , ("czech", \_ -> Lang "cs" "" "" []) + , ("coptic", \_ -> Lang "cop" "" "" []) + , ("danish", \_ -> Lang "da" "" "" []) + , ("divehi", \_ -> Lang "dv" "" "" []) + , ("esperanto", \_ -> Lang "eo" "" "" []) + , ("spanish", \_ -> Lang "es" "" "" []) + , ("estonian", \_ -> Lang "et" "" "" []) + , ("basque", \_ -> Lang "eu" "" "" []) + , ("farsi", \_ -> Lang "fa" "" "" []) + , ("finnish", \_ -> Lang "fi" "" "" []) + , ("french", \_ -> Lang "fr" "" "" []) + , ("friulan", \_ -> Lang "fur" "" "" []) + , ("irish", \_ -> Lang "ga" "" "" []) + , ("scottish", \_ -> Lang "gd" "" "" []) + , ("ethiopic", \_ -> Lang "gez" "" "" []) + , ("galician", \_ -> Lang "gl" "" "" []) + , ("hebrew", \_ -> Lang "he" "" "" []) + , ("hindi", \_ -> Lang "hi" "" "" []) + , ("croatian", \_ -> Lang "hr" "" "" []) + , ("magyar", \_ -> Lang "hu" "" "" []) + , ("armenian", \_ -> Lang "hy" "" "" []) + , ("interlingua", \_ -> Lang "ia" "" "" []) + , ("indonesian", \_ -> Lang "id" "" "" []) + , ("icelandic", \_ -> Lang "is" "" "" []) + , ("italian", \_ -> Lang "it" "" "" []) + , ("japanese", \_ -> Lang "jp" "" "" []) + , ("khmer", \_ -> Lang "km" "" "" []) + , ("kurmanji", \_ -> Lang "kmr" "" "" []) + , ("kannada", \_ -> Lang "kn" "" "" []) + , ("korean", \_ -> Lang "ko" "" "" []) + , ("lao", \_ -> Lang "lo" "" "" []) + , ("lithuanian", \_ -> Lang "lt" "" "" []) + , ("latvian", \_ -> Lang "lv" "" "" []) + , ("malayalam", \_ -> Lang "ml" "" "" []) + , ("mongolian", \_ -> Lang "mn" "" "" []) + , ("marathi", \_ -> Lang "mr" "" "" []) + , ("dutch", \_ -> Lang "nl" "" "" []) + , ("nynorsk", \_ -> Lang "nn" "" "" []) + , ("norsk", \_ -> Lang "no" "" "" []) + , ("nko", \_ -> Lang "nqo" "" "" []) + , ("occitan", \_ -> Lang "oc" "" "" []) + , ("panjabi", \_ -> Lang "pa" "" "" []) + , ("polish", \_ -> Lang "pl" "" "" []) + , ("piedmontese", \_ -> Lang "pms" "" "" []) + , ("portuguese", \_ -> Lang "pt" "" "" []) + , ("romansh", \_ -> Lang "rm" "" "" []) + , ("romanian", \_ -> Lang "ro" "" "" []) + , ("russian", \_ -> Lang "ru" "" "" []) + , ("sanskrit", \_ -> Lang "sa" "" "" []) + , ("samin", \_ -> Lang "se" "" "" []) + , ("slovak", \_ -> Lang "sk" "" "" []) + , ("albanian", \_ -> Lang "sq" "" "" []) + , ("serbian", \_ -> Lang "sr" "" "" []) + , ("swedish", \_ -> Lang "sv" "" "" []) + , ("syriac", \_ -> Lang "syr" "" "" []) + , ("tamil", \_ -> Lang "ta" "" "" []) + , ("telugu", \_ -> Lang "te" "" "" []) + , ("thai", \_ -> Lang "th" "" "" []) + , ("turkmen", \_ -> Lang "tk" "" "" []) + , ("turkish", \_ -> Lang "tr" "" "" []) + , ("ukrainian", \_ -> Lang "uk" "" "" []) + , ("urdu", \_ -> Lang "ur" "" "" []) + , ("vietnamese", \_ -> Lang "vi" "" "" []) + ] + +babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 s = + case s of + "austrian" -> Just $ Lang "de" "" "AT" ["1901"] + "naustrian" -> Just $ Lang "de" "" "AT" [] + "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] + "nswissgerman" -> Just $ Lang "de" "" "CH" [] + "german" -> Just $ Lang "de" "" "DE" ["1901"] + "ngerman" -> Just $ Lang "de" "" "DE" [] + "lowersorbian" -> Just $ Lang "dsb" "" "" [] + "uppersorbian" -> Just $ Lang "hsb" "" "" [] + "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] + "slovene" -> Just $ Lang "sl" "" "" [] + "australian" -> Just $ Lang "en" "" "AU" [] + "canadian" -> Just $ Lang "en" "" "CA" [] + "british" -> Just $ Lang "en" "" "GB" [] + "newzealand" -> Just $ Lang "en" "" "NZ" [] + "american" -> Just $ Lang "en" "" "US" [] + "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs new file mode 100644 index 000000000..c9cbaa9b9 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -0,0 +1,51 @@ +{- +Copyright (C) 2017-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX.Types + Copyright : Copyright (C) 2017-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Types for LaTeX tokens and macros. +-} +module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) + , TokType(..) + , Macro(..) + , ExpansionPoint(..) + , SourcePos + ) +where +import Data.Text (Text) +import Text.Parsec.Pos (SourcePos) + +data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | + Esc1 | Esc2 | Arg Int + deriving (Eq, Ord, Show) + +data Tok = Tok SourcePos TokType Text + deriving (Eq, Ord, Show) + +data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed + deriving (Eq, Ord, Show) + +data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok] + deriving Show diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cd35a8738..14cf73de4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} + {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,63 +30,55 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown, - readMarkdownWithWarnings ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, sortBy, findIndex, intercalate ) +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) +import qualified Data.HashMap.Strict as H +import Data.List (intercalate, sortBy, transpose, elemIndex) import qualified Data.Map as M -import Data.Scientific (coefficient, base10Exponent) -import Data.Ord ( comparing ) -import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation ) import Data.Maybe -import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) -import Text.Pandoc.Generic (bottomUp) -import qualified Data.Text as T +import Data.Monoid ((<>)) +import Data.Ord (comparing) +import Data.Scientific (base10Exponent, coefficient) +import qualified Data.Set as Set import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Vector as V +import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..)) import qualified Data.Yaml as Yaml -import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..)) -import qualified Data.HashMap.Strict as H +import System.FilePath (addExtension, takeExtension) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) +import Text.Pandoc.Class (PandocMonad (..), report) +import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (tableWith) +import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, + isCommentTag, isInlineTag, isTextTag) +import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared -import Text.Pandoc.Pretty (charWidth) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, - isTextTag, isCommentTag ) -import Control.Monad -import System.FilePath (takeExtension, addExtension) -import Text.HTML.TagSoup -import qualified Data.Set as Set -import Text.Printf (printf) -import Debug.Trace (trace) -import Data.Monoid ((<>)) -import Text.Pandoc.Error -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser m = ParserT [Char] ParserState m -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - --- | Read markdown from an input string and return a pair of a Pandoc document --- and a list of warnings. -readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines +readMarkdown :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readMarkdown opts s = do + parsed <- readWithM parseMarkdown def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e -- -- Constants and data structure definitions @@ -117,44 +110,43 @@ isBlank _ = False -- -- | Succeeds when we're in list context. -inList :: MarkdownParser () +inList :: PandocMonad m => MarkdownParser m () inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: Parser [Char] st () +spnl :: PandocMonad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -indentSpaces :: MarkdownParser String +spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' = try $ do + xs <- many spaceChar + ys <- option "" $ try $ (:) <$> newline + <*> (many spaceChar <* notFollowedBy (char '\n')) + return (xs ++ ys) + +indentSpaces :: PandocMonad m => MarkdownParser m String indentSpaces = try $ do tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: MarkdownParser String +nonindentSpaces :: PandocMonad m => MarkdownParser m String nonindentSpaces = do - tabStop <- getOption readerTabStop - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" + n <- skipNonindentSpaces + return $ replicate n ' ' -- returns number of spaces parsed -skipNonindentSpaces :: MarkdownParser Int +skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int skipNonindentSpaces = do tabStop <- getOption readerTabStop - atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ') + gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar -atMostSpaces :: Int -> MarkdownParser Int -atMostSpaces n - | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 - | otherwise = return 0 - -litChar :: MarkdownParser Char +litChar :: PandocMonad m => MarkdownParser m Char litChar = escapedChar' <|> characterReference <|> noneOf "\n" @@ -162,30 +154,32 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) -inlinesInBalancedBrackets = do - char '[' - (_, raw) <- withRaw $ charsInBalancedBrackets 1 - guard $ not $ null raw - parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) - -charsInBalancedBrackets :: Int -> MarkdownParser () -charsInBalancedBrackets 0 = return () -charsInBalancedBrackets openBrackets = - (char '[' >> charsInBalancedBrackets (openBrackets + 1)) - <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) - <|> (( (() <$ code) - <|> (() <$ (escapedChar')) - <|> (newline >> notFollowedBy blankline) - <|> skipMany1 (noneOf "[]`\n\\") - <|> (() <$ count 1 (oneOf "`\\")) - ) >> charsInBalancedBrackets openBrackets) +inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) +inlinesInBalancedBrackets = + try $ char '[' >> withRaw (go 1) >>= + parseFromString inlines . stripBracket . snd + where stripBracket [] = [] + stripBracket xs = if last xs == ']' then init xs else xs + go :: PandocMonad m => Int -> MarkdownParser m () + go 0 = return () + go openBrackets = + (() <$ (escapedChar <|> + code <|> + rawHtmlInline <|> + rawLaTeXInline') >> go openBrackets) + <|> + (do char ']' + Control.Monad.when (openBrackets > 1) $ go (openBrackets - 1)) + <|> + (char '[' >> go (openBrackets + 1)) + <|> + (anyChar >> go openBrackets) -- -- document structure -- -rawTitleBlockLine :: MarkdownParser String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m String rawTitleBlockLine = do char '%' skipSpaces @@ -196,13 +190,13 @@ rawTitleBlockLine = do anyLine return $ trim $ unlines (first:rest) -titleLine :: MarkdownParser (F Inlines) +titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res + res <- parseFromString' inlines raw + return $ trimInlinesF res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) authorsLine = try $ do raw <- rawTitleBlockLine let sep = (char ';' <* spaces) <|> newline @@ -210,18 +204,18 @@ authorsLine = try $ do (trimInlinesF . mconcat <$> many (try $ notFollowedBy sep >> inline)) sep - sequence <$> parseFromString pAuthors raw + sequence <$> parseFromString' pAuthors raw -dateLine :: MarkdownParser (F Inlines) +dateLine :: PandocMonad m => MarkdownParser m (F Inlines) dateLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res + res <- parseFromString' inlines raw + return $ trimInlinesF res -titleBlock :: MarkdownParser () +titleBlock :: PandocMonad m => MarkdownParser m () titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: MarkdownParser () +pandocTitleBlock :: PandocMonad m => MarkdownParser m () pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -239,7 +233,8 @@ pandocTitleBlock = try $ do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } -yamlMetaBlock :: MarkdownParser (F Blocks) + +yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -250,87 +245,105 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - opts <- stateOptions <$> getState - meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ - H.foldrWithKey (\k v m -> - if ignorable k - then m - else case yamlToMeta opts v of - Left _ -> m - Right v' -> B.setMeta (T.unpack k) v' m) - nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta - Right _ -> do - addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - addWarning (Just $ setSourceLine - (setSourceColumn pos - (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - $ "Could not parse YAML header: " ++ - problem - _ -> addWarning (Just pos) - $ "Could not parse YAML header: " ++ - show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + case Yaml.decodeEither' $ UTF8.fromString rawYaml of + Right (Yaml.Object hashmap) -> do + let alist = H.toList hashmap + mapM_ (\(k, v) -> + if ignorable k + then return () + else do + v' <- yamlToMeta v + let k' = T.unpack k + updateState $ \st -> st{ stateMeta' = + do m <- stateMeta' st + -- if there's already a value, leave it unchanged + case lookupMeta k' m of + Just _ -> return m + Nothing -> do + v'' <- v' + return $ B.setMeta (T.unpack k) v'' m} + ) alist + Right Yaml.Null -> return () + Right _ -> do + logMessage $ + CouldNotParseYamlMetadata "not an object" + pos + return () + Left err' -> do + case err' of + InvalidYaml (Just YamlParseException{ + yamlProblem = problem + , yamlContext = _ctxt + , yamlProblemMark = Yaml.YamlMark { + yamlLine = yline + , yamlColumn = ycol + }}) -> + logMessage $ CouldNotParseYamlMetadata + problem (setSourceLine + (setSourceColumn pos + (sourceColumn pos + ycol)) + (sourceLine pos + 1 + yline)) + _ -> logMessage $ CouldNotParseYamlMetadata + (show err') pos + return () return mempty -- ignore fields ending with _ ignorable :: Text -> Bool ignorable t = (T.pack "_") `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue -toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) - where - toMeta p = - case p of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} - meta_exts = Set.fromList [ Ext_pandoc_title_block - , Ext_mmd_title_block - , Ext_yaml_metadata_block - ] - -yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue -yamlToMeta opts (Yaml.String t) = toMetaValue opts t -yamlToMeta _ (Yaml.Number n) +toMetaValue :: PandocMonad m + => Text -> MarkdownParser m (F MetaValue) +toMetaValue x = + parseFromString' parser' (T.unpack x) + where parser' = (asInlines <$> ((trimInlinesF . mconcat) + <$> try (guard (not endsWithNewline) + *> manyTill inline eof))) + <|> (asBlocks <$> parseBlocks) + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + asInlines p = do + p' <- p + return $ MetaInlines (B.toList p') + endsWithNewline = T.pack "\n" `T.isSuffixOf` x + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. + +yamlToMeta :: PandocMonad m + => Yaml.Value -> MarkdownParser m (F MetaValue) +yamlToMeta (Yaml.String t) = toMetaValue t +yamlToMeta (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = return $ MetaString $ show + | base10Exponent n >= 0 = return $ return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = return $ MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) - (V.toList xs) -yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> - if ignorable k - then m - else (do - v' <- yamlToMeta opts v - m' <- m - return (M.insert (T.unpack k) v' m'))) - (return M.empty) o -yamlToMeta _ _ = return $ MetaString "" - -stopLine :: MarkdownParser () + | otherwise = return $ return $ MetaString $ show n +yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b +yamlToMeta (Yaml.Array xs) = do + xs' <- mapM yamlToMeta (V.toList xs) + return $ do + xs'' <- sequence xs' + return $ B.toMetaValue xs'' +yamlToMeta (Yaml.Object o) = do + let alist = H.toList o + foldM (\m (k,v) -> + if ignorable k + then return m + else do + v' <- yamlToMeta v + return $ do + MetaMap m' <- m + v'' <- v' + return (MetaMap $ M.insert (T.unpack k) v'' m')) + (return $ MetaMap M.empty) + alist +yamlToMeta _ = return $ return $ MetaString "" + +stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -mmdTitleBlock :: MarkdownParser () +mmdTitleBlock :: PandocMonad m => MarkdownParser m () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block firstPair <- kvPair False @@ -340,49 +353,44 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val + let val' = MetaBlocks $ B.toList $ B.plain $B.text val return (key',val') -parseMarkdown :: MarkdownParser Pandoc +parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc parseMarkdown = do - -- markdown allows raw HTML - updateState $ \state -> state { stateOptions = - let oldOpts = stateOptions state in - oldOpts{ readerParseRaw = True } } optional titleBlock blocks <- parseBlocks st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st - eastAsianLineBreaks <- option False $ - True <$ guardEnabled Ext_east_asian_line_breaks - return $ (if eastAsianLineBreaks - then bottomUp softBreakFilter - else id) $ Pandoc meta bs - -softBreakFilter :: [Inline] -> [Inline] -softBreakFilter (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), (c:_)) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs -softBreakFilter xs = xs - -referenceKey :: MarkdownParser (F Blocks) + -- check for notes with no corresponding note references + let notesUsed = stateNoteRefs st + let notesDefined = M.keys (stateNotes' st) + mapM_ (\n -> unless (n `Set.member` notesUsed) $ + case M.lookup n (stateNotes' st) of + Just (pos, _) -> report (NoteDefinedButNotUsed n pos) + Nothing -> throwError $ + PandocShouldNeverHappenError "note not found") + notesDefined + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- stateMeta' st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = liftM unwords $ many $ try $ do + let sourceURL = fmap unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes @@ -392,7 +400,9 @@ referenceKey = try $ do src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> skipSpaces >> attributes + do guardEnabled Ext_link_attributes + skipSpaces >> optional newline >> skipSpaces + attributes addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines @@ -402,18 +412,22 @@ referenceKey = try $ do let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of - Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" - Nothing -> return () + Just (t,a) | not (t == target && a == attr') -> + -- We don't warn on two duplicate keys if the targets are also + -- the same. This can happen naturally with --reference-location=block + -- or section. See #3701. + logMessage $ DuplicateLinkReference raw pos + _ -> return () updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: MarkdownParser String +referenceTitle :: PandocMonad m => MarkdownParser m String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: Char -> MarkdownParser String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m String quotedTitle c = try $ do char c notFollowedBy spaces @@ -425,7 +439,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -436,23 +450,23 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: MarkdownParser String +noteMarker :: PandocMonad m => MarkdownParser m String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: MarkdownParser String +rawLine :: PandocMonad m => MarkdownParser m String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: MarkdownParser String +rawLines :: PandocMonad m => MarkdownParser m String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -464,26 +478,23 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw - let newnote = (ref, parsed) + parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState - case lookup ref oldnotes of - Just _ -> addWarning (Just pos) $ "Duplicate note reference `" ++ ref ++ "'" + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos Nothing -> return () - updateState $ \s -> s { stateNotes' = newnote : oldnotes } + updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes } return mempty -- -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: PandocMonad m => MarkdownParser m (F Blocks) block = do - tr <- getOption readerTrace - pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock @@ -493,10 +504,10 @@ block = do , header , lhsCodeBlock , divHtml + , divFenced , htmlBlock , table , codeBlockIndented - , guardEnabled Ext_latex_macros *> (macro >>= return . return) , rawTeXBlock , lineBlock , blockQuote @@ -509,30 +520,29 @@ block = do , para , plain ] <?> "block" - when tr $ do - st <- getState - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + trace (take 60 $ show $ B.toList $ runF res defaultParserState) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: PandocMonad m => MarkdownParser m (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxChar :: MarkdownParser Char +atxChar :: PandocMonad m => MarkdownParser m Char atxChar = do exts <- getOption readerExtensions - return $ if Set.member Ext_literate_haskell exts - then '=' else '#' + return $ if extensionEnabled Ext_literate_haskell exts + then '=' + else '#' -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: PandocMonad m => MarkdownParser m (F Blocks) atxHeader = try $ do - level <- atxChar >>= many1 . char >>= return . length + level <- fmap length (atxChar >>= many1 . char) notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list + guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar skipSpaces (text, raw) <- withRaw $ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) @@ -542,7 +552,7 @@ atxHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -atxClosing :: MarkdownParser Attr +atxClosing :: PandocMonad m => MarkdownParser m Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -553,7 +563,7 @@ atxClosing = try $ do blanklines return attr -setextHeaderEnd :: MarkdownParser Attr +setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr setextHeaderEnd = try $ do attr <- option nullAttr $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) @@ -561,13 +571,18 @@ setextHeaderEnd = try $ do blanklines return attr -mmdHeaderIdentifier :: MarkdownParser Attr +mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do - ident <- stripFirstAndLast . snd <$> reference + (_, raw) <- reference + let raw' = trim $ stripFirstAndLast raw + let ident = concat $ words $ map toLower raw' + let attr = (ident, [], []) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw' attr skipSpaces - return (ident,[],[]) + return attr -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: PandocMonad m => MarkdownParser m (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. @@ -579,13 +594,13 @@ setextHeader = try $ do underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + let level = fromMaybe 0 (elemIndex underlineChar setextHChars) + 1 attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = @@ -595,7 +610,7 @@ registerImplicitHeader raw attr@(ident, _, _) = do -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -609,20 +624,21 @@ hrule = try $ do -- code blocks -- -indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine = indentSpaces >> anyLineNewline -blockDelimiter :: (Char -> Bool) +blockDelimiter :: PandocMonad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] ParserState m Int blockDelimiter f len = try $ do + skipNonindentSpaces c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length + Nothing -> fmap ((+ 3) . length) (count 3 (char c) >> many (char c)) -attributes :: MarkdownParser Attr +attributes :: PandocMonad m => MarkdownParser m Attr attributes = try $ do char '{' spnl @@ -630,28 +646,28 @@ attributes = try $ do char '}' return $ foldl (\x f -> f x) nullAttr attrs -attribute :: MarkdownParser (Attr -> Attr) +attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: MarkdownParser String +identifier :: PandocMonad m => MarkdownParser m String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: MarkdownParser (Attr -> Attr) +identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do char '#' result <- identifier return $ \(_,cs,kvs) -> (result,cs,kvs) -classAttr :: MarkdownParser (Attr -> Attr) +classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) classAttr = try $ do char '.' result <- identifier return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs) -keyValAttr :: MarkdownParser (Attr -> Attr) +keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' @@ -664,33 +680,53 @@ keyValAttr = try $ do "class" -> (id',cs ++ words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) -specialAttr :: MarkdownParser (Attr -> Attr) +specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute = do + char '{' + skipMany spaceChar + char '=' + format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + skipMany spaceChar + char '}' + return format + +codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do + indentchars <- nonindentSpaces + let indentLevel = length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_fenced_code_attributes >> attributes) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) + contents <- intercalate "\n" <$> + manyTill (gobbleAtMostSpaces indentLevel >> anyLine) + (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ + case rawattr of + Left syn -> B.rawBlock syn contents + Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: String -> String toLanguageId = map toLower . go - where go "c++" = "cpp" + where go "c++" = "cpp" go "objective-c" = "objectivec" - go x = x + go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -701,7 +737,7 @@ codeBlockIndented = do return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> @@ -709,7 +745,7 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: MarkdownParser String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -717,13 +753,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: MarkdownParser String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: MarkdownParser String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> MarkdownParser String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -735,7 +771,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -746,10 +782,10 @@ birdTrackLine c = try $ do -- block quotes -- -emailBlockQuoteStart :: MarkdownParser Char +emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: MarkdownParser [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [String] emailBlockQuote = try $ do emailBlockQuoteStart let emailLine = many $ nonEndline <|> try @@ -763,113 +799,136 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" return $ B.blockQuote <$> contents -- -- list blocks -- -bulletListStart :: MarkdownParser () +bulletListStart :: PandocMonad m => MarkdownParser m () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - () <$ atMostSpaces (tabStop - (endpos - startpos)) + gobbleSpaces 1 <|> () <$ lookAhead newline + try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return () -anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do +orderedListStart :: PandocMonad m + => Maybe (ListNumberStyle, ListNumberDelim) + -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) +orderedListStart mbstydelim = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - res <- do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead - char '.' - return (start, DefaultStyle, DefaultDelim) - <|> do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, - -- insist on more than one space - when (delim == Period && (style == UpperAlpha || - (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ - () <$ spaceChar - return (num, style, delim) - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res - -listStart :: MarkdownParser () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) - -listLine :: MarkdownParser String -listLine = try $ do - notFollowedBy' (do indentSpaces - many spaceChar + (do guardDisabled Ext_fancy_lists + start <- many1 digit >>= safeRead + char '.' + gobbleSpaces 1 <|> () <$ lookAhead newline + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (start, DefaultStyle, DefaultDelim)) + <|> + (do (num, style, delim) <- maybe + anyOrderedListMarker + (\(sty,delim) -> (\start -> (start,sty,delim)) <$> + orderedListMarker sty delim) + mbstydelim + gobbleSpaces 1 <|> () <$ lookAhead newline + -- if it could be an abbreviated first name, + -- insist on more than one space + when (delim == Period && (style == UpperAlpha || + (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ + () <$ lookAhead (newline <|> spaceChar) + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (num, style, delim)) + +listStart :: PandocMonad m => MarkdownParser m () +listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) + +listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine continuationIndent = try $ do + notFollowedBy' (do gobbleSpaces continuationIndent + skipMany spaceChar listStart) notFollowedByHtmlCloser - optional (() <$ indentSpaces) + notFollowedByDivCloser + optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: MarkdownParser String +listLineCommon :: PandocMonad m => MarkdownParser m String listLineCommon = concat <$> manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') - <|> liftM snd (htmlTag isCommentTag) + <|> fmap snd (htmlTag isCommentTag) <|> count 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: MarkdownParser a - -> MarkdownParser String -rawListItem start = try $ do +rawListItem :: PandocMonad m + => Bool -- four space rule + -> MarkdownParser m a + -> MarkdownParser m (String, Int) +rawListItem fourSpaceRule start = try $ do + pos1 <- getPosition start + pos2 <- getPosition + let continuationIndent = if fourSpaceRule + then 4 + else sourceColumn pos2 - sourceColumn pos1 first <- listLineCommon - rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine) + rest <- many (do notFollowedBy listStart + notFollowedBy (() <$ codeBlockFenced) + notFollowedBy blankline + listLine continuationIndent) blanks <- many blankline - return $ unlines (first:rest) ++ blanks + let result = unlines (first:rest) ++ blanks + return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: MarkdownParser String -listContinuation = try $ do - lookAhead indentSpaces - result <- many1 listContinuationLine +listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation continuationIndent = try $ do + x <- try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + notFollowedByDivCloser + gobbleSpaces continuationIndent + anyLineNewline + xs <- many $ try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + notFollowedByDivCloser + gobbleSpaces continuationIndent <|> notFollowedBy' listStart + anyLineNewline blanks <- many blankline - return $ concat result ++ blanks + return $ concat (x:xs) ++ blanks + +notFollowedByDivCloser :: PandocMonad m => MarkdownParser m () +notFollowedByDivCloser = + guardDisabled Ext_fenced_divs <|> + do divLevel <- stateFencedDivLevel <$> getState + guard (divLevel < 1) <|> notFollowedBy divFenceEnd -notFollowedByHtmlCloser :: MarkdownParser () +notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () notFollowedByHtmlCloser = do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just t -> notFollowedBy' $ htmlTag (~== TagClose t) Nothing -> return () -listContinuationLine :: MarkdownParser String -listContinuationLine = try $ do - notFollowedBy blankline - notFollowedBy' listStart - notFollowedByHtmlCloser - optional indentSpaces - result <- anyLine - return $ result ++ "\n" - -listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) -listItem start = try $ do - first <- rawListItem start - continuations <- many listContinuation +listItem :: PandocMonad m + => Bool -- four-space rule + -> MarkdownParser m a + -> MarkdownParser m (F Blocks) +listItem fourSpaceRule start = try $ do + (first, continuationIndent) <- rawListItem fourSpaceRule start + continuations <- many (listContinuation continuationIndent) -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -878,39 +937,34 @@ listItem start = try $ do setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may contain various block elements: let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: PandocMonad m => MarkdownParser m (F Blocks) orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart + (start, style, delim) <- lookAhead (orderedListStart Nothing) unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem - ( try $ do - optional newline -- if preceded by Plain block in a list - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - res <- orderedListMarker style delim - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res ) - start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items - -bulletList :: MarkdownParser (F Blocks) + fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule) + <|> return (style == Example) + items <- fmap sequence $ many1 $ listItem fourSpaceRule + (orderedListStart (Just (style, delim))) + start' <- (start <$ guardEnabled Ext_startnum) <|> return 1 + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items + +bulletList :: PandocMonad m => MarkdownParser m (F Blocks) bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule) + <|> return False + items <- fmap sequence $ many1 $ listItem fourSpaceRule bulletListStart + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: MarkdownParser () +defListMarker :: PandocMonad m => MarkdownParser m () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' @@ -921,52 +975,53 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks])) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks . (++"\n")) raw + term <- parseFromString' (trimInlinesF <$> inlines) rawLine' + contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: Bool -> MarkdownParser String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker - firstline <- anyLine + firstline <- anyLineNewline let dline = try ( do notFollowedBy blankline notFollowedByHtmlCloser + notFollowedByDivCloser if compact -- laziness not compatible with compact then () <$ indentSpaces else (() <$ indentSpaces) <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- liftM concat $ many $ try $ do + cont <- fmap concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ + return $ trimr (firstline ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do lookAhead (anyLine >> - optional (blankline >> notFollowedBy (table >> return ())) >> + optional (blankline >> notFollowedBy (Control.Monad.void table)) >> -- don't capture table caption as def list! defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) compactDefinitionList = do guardEnabled Ext_compact_definition_lists items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + return $ B.definitionList <$> fmap compactifyDL items -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) normalDefinitionList = do guardEnabled Ext_definition_lists items <- fmap sequence $ many1 $ definitionListItem False @@ -976,10 +1031,10 @@ normalDefinitionList = do -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: PandocMonad m => MarkdownParser m (F Blocks) para = try $ do exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline + result <- trimInlinesF <$> inlines1 option (B.plain <$> result) $ try $ do newline @@ -997,29 +1052,35 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero + <|> do guardEnabled Ext_fenced_divs + divLevel <- stateFencedDivLevel <$> getState + if divLevel > 0 + then lookAhead divFenceEnd + else mzero return $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `Set.member` exts -> + | not (null alt) && + Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' -plain :: MarkdownParser (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline +plain :: PandocMonad m => MarkdownParser m (F Blocks) +plain = fmap B.plain . trimInlinesF <$> inlines1 -- -- raw html -- -htmlElement :: MarkdownParser String +htmlElement :: PandocMonad m => MarkdownParser m String htmlElement = rawVerbatimBlock <|> strictHtmlBlock - <|> liftM snd (htmlTag isBlockTag) + <|> fmap snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock = do guardEnabled Ext_raw_html try (do @@ -1044,43 +1105,59 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks) htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ if null first + then mempty + else return $ B.rawBlock "html" first -strictHtmlBlock :: MarkdownParser String +strictHtmlBlock :: PandocMonad m => MarkdownParser m String strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: MarkdownParser String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m String rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True isVerbTag (TagOpen "script" _) = True isVerbTag _ = False -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> - rawConTeXtEnvironment `sepEndBy1` blankline) - spaces - return $ return result - -rawHtmlBlocks :: MarkdownParser (F Blocks) + lookAhead $ try $ char '\\' >> letter + result <- (B.rawBlock "context" . trim . concat <$> + many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand) + <*> spnl')) + <|> (B.rawBlock "latex" . trim . concat <$> + many1 ((++) <$> rawLaTeXBlock <*> spnl')) + return $ case B.toList result of + [RawBlock _ cs] + | all (`elem` [' ','\t','\n']) cs -> return mempty + -- don't create a raw block for suppressed macro defs + _ -> return result + +conTeXtCommand :: PandocMonad m => MarkdownParser m String +conTeXtCommand = oneOfStrings ["\\placeformula"] + +rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + -- we don't want '<td> text' to be a code block: + skipMany spaceChar + indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) + let block' = do notFollowedBy' closer + gobbleAtMostSpaces indentlevel + block + contents <- mconcat <$> many block' result <- (closer >>= \(_, rawcloser) -> return ( return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> @@ -1101,11 +1178,11 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) + mapM (parseFromString' (trimInlinesF <$> inlines)) return $ B.lineBlock <$> sequence lines' -- @@ -1114,8 +1191,9 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: PandocMonad m + => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1125,8 +1203,9 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1137,17 +1216,17 @@ simpleTableHeader headless = try $ do let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + let aligns = zipWith alignType (map (: []) rawHeads) lengths let rawHeads' = if headless then replicate (length dashes) "" else rawHeads heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' + $ + mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1161,25 +1240,26 @@ alignType strLst len = let nonempties = filter (not . null) $ map trimr strLst (leftSpace, rightSpace) = case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: MarkdownParser String +tableFooter :: PandocMonad m => MarkdownParser m String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: MarkdownParser Char +tableSep :: PandocMonad m => MarkdownParser m Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. -rawTableLine :: [Int] - -> MarkdownParser [String] +rawTableLine :: PandocMonad m + => [Int] + -> MarkdownParser m [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -1187,31 +1267,34 @@ rawTableLine indices = do splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine :: [Int] - -> MarkdownParser (F [Blocks]) +tableLine :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + fmap sequence . mapM (parseFromString' (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). -multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) +multilineRow :: PandocMonad m + => [Int] + -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: PandocMonad m => MarkdownParser m (F Inlines) tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces - string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines + (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:" + trimInlinesF <$> inlines1 <* blanklines -- Parse a simple table with '---' header and one line per row. -simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1224,13 +1307,15 @@ simpleTable headless = do -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +multilineTable :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter -multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) +multilineTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> MarkdownParser m (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1243,7 +1328,7 @@ multilineTableHeader headless = try $ do let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' rawHeadsList <- if headless - then liftM (map (:[]) . tail . + then fmap (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map (tail . splitStringByIndices (init indices)) @@ -1253,101 +1338,18 @@ multilineTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ - mapM (parseFromString (mconcat <$> many plain)) $ - map trim rawHeads + mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices) -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: Char -> Parser [Char] st ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: Char -> Parser [Char] st [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: Char -> MarkdownParser Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) +gridTable :: PandocMonad m => Bool -- ^ Headerless table + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable headless = gridTableWith' parseBlocks headless -gridTableRawLine :: [Int] -> MarkdownParser [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: MarkdownParser [Char] -gridTableFooter = blanklines - -pipeBreak :: MarkdownParser ([Alignment], [Int]) +pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1359,7 +1361,7 @@ pipeBreak = try $ do blankline return $ unzip (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar @@ -1372,41 +1374,40 @@ pipeTable = try $ do numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> - fromIntegral (len + 1) / fromIntegral numColumns) - seplengths + fromIntegral len / fromIntegral (sum seplengths)) + seplengths else replicate (length aligns) 0.0 - return $ (aligns, widths, heads', sequence lines'') + return (aligns, widths, heads', sequence lines'') -sepPipe :: MarkdownParser () +sepPipe :: PandocMonad m => MarkdownParser m () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) pipeTableRow = try $ do scanForPipe skipMany spaceChar openPipe <- (True <$ char '|') <|> return False -- split into cells - let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') + let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= - parseFromString pipeTableCell - cells <- cellContents `sepEndBy1` (char '|') + parseFromString' pipeTableCell + cells <- cellContents `sepEndBy1` char '|' -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) blankline return $ sequence cells -pipeTableCell :: MarkdownParser (F Blocks) -pipeTableCell = do - result <- many inline - if null result - then return mempty - else return $ B.plain . mconcat <$> sequence result +pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) +pipeTableCell = + (do result <- inlines1 + return $ B.plain <$> result) + <|> return mempty -pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1414,15 +1415,15 @@ pipeTableHeaderPart = try $ do right <- optionMaybe (char ':') skipMany spaceChar let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right - return $ - ((case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter), len) + return + (case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: PandocMonad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1432,22 +1433,23 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) - -> MarkdownParser sep - -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith :: PandocMonad m + => MarkdownParser m (F [Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser m (F [Blocks])) + -> MarkdownParser m sep + -> MarkdownParser m end + -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) + let widths = if null indices then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') + return (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: PandocMonad m => MarkdownParser m (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1462,8 +1464,8 @@ table = try $ do (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption - Just c -> return c + Nothing -> option (return mempty) tableCaption + Just c -> return c -- renormalize widths if greater than 100%: let totalWidth = sum widths let widths' = if totalWidth < 1 @@ -1479,7 +1481,13 @@ table = try $ do -- inline -- -inline :: MarkdownParser (F Inlines) +inlines :: PandocMonad m => MarkdownParser m (F Inlines) +inlines = mconcat <$> many inline + +inlines1 :: PandocMonad m => MarkdownParser m (F Inlines) +inlines1 = mconcat <$> many1 inline + +inline :: PandocMonad m => MarkdownParser m (F Inlines) inline = choice [ whitespace , bareURL , str @@ -1499,6 +1507,7 @@ inline = choice [ whitespace , autoLink , spanHtml , rawHtmlInline + , escapedNewline , escapedChar , rawLaTeXInline' , exampleRef @@ -1509,32 +1518,36 @@ inline = choice [ whitespace , ltSign ] <?> "inline" -escapedChar' :: MarkdownParser Char +escapedChar' :: PandocMonad m => MarkdownParser m Char escapedChar' = try $ do char '\\' (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> (guardEnabled Ext_angle_brackets_escapable >> oneOf "\\`*_{}[]()>#+-.!~\"<>") - <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines) +escapedNewline = try $ do + guardEnabled Ext_escaped_line_breaks + char '\\' + lookAhead (char '\n') -- don't consume the newline (see #3730) + return $ return B.linebreak + +escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do result <- escapedChar' case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + _ -> return $ return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' return $ return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' @@ -1542,10 +1555,10 @@ exampleRef = try $ do return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' @@ -1554,28 +1567,36 @@ symbol = do return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + result <- (trim . concat) <$> + many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result - -math :: MarkdownParser (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> - ((getOption readerSmart >>= guard) *> (return <$> apostrophe) + rawattr <- + (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + <|> + (Right <$> option ("",[],[]) + (try (guardEnabled Ext_inline_code_attributes >> attributes))) + return $ return $ + case rawattr of + Left syn -> B.rawInline syn result + Right attr -> B.codeWith attr result + +math :: PandocMonad m => MarkdownParser m (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) + <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?> + (guardEnabled Ext_smart *> (return <$> apostrophe) <* notFollowedBy (space <|> satisfy isPunctuation)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. -enclosure :: Char - -> MarkdownParser (F Inlines) +enclosure :: PandocMonad m + => Char + -> MarkdownParser m (F Inlines) enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1584,14 +1605,14 @@ enclosure c = do <|> (guard =<< notAfterString) cs <- many1 (char c) (return (B.str cs) <>) <$> whitespace - <|> do + <|> case length cs of - 3 -> three c - 2 -> two c mempty - 1 -> one c mempty - _ -> return (return $ B.str cs) + 3 -> three c + 2 -> two c mempty + 1 -> one c mempty + _ -> return (return $ B.str cs) -ender :: Char -> Int -> MarkdownParser () +ender :: PandocMonad m => Char -> Int -> MarkdownParser m () ender c n = try $ do count n (char c) guard (c == '*') @@ -1602,104 +1623,96 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) + (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) + <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) + (ender c 2 >> updateLastStrPos >> + return (B.strong <$> (prefix' <> contents))) <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) + (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' -- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (Show b) - => MarkdownParser a - -> MarkdownParser b - -> MarkdownParser (F Inlines) +inlinesBetween :: PandocMonad m + => (Show b) + => MarkdownParser m a + -> MarkdownParser m b + -> MarkdownParser m (F Inlines) inlinesBetween start end = (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) +strikeout :: PandocMonad m => MarkdownParser m (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) +superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = fmap B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) +subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = fmap B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) +whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: PandocMonad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 alphaNum + result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos - let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - isSmart <- getOption readerSmart - if isSmart - then case likelyAbbrev result of - [] -> return $ return $ B.str result - xs -> choice (map (\x -> - try (string x >> oneOf " \n" >> - lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result - --- | if the string matches the beginning of an abbreviation (before --- the first period, return strings that would finish the abbreviation. -likelyAbbrev :: String -> [String] -likelyAbbrev x = - let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", - "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", - "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", - "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", - "ch.", "sec.", "cf.", "cp."] - abbrPairs = map (break (=='.')) abbrevs - in map snd $ filter (\(y,_) -> y == x) abbrPairs + (do guardEnabled Ext_smart + abbrevs <- getOption readerAbbreviations + if not (null result) && last result == '.' && result `Set.member` abbrevs + then try (do ils <- whitespace <|> endline + lookAhead alphaNum + return $ do + ils' <- ils + if ils' == B.space + then return (B.str result <> B.str "\160") + else -- linebreak or softbreak + return (ils' <> B.str result <> B.str "\160")) + <|> return (return (B.str result)) + else return (return (B.str result))) + <|> return (return (B.str result)) -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: PandocMonad m => MarkdownParser m (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1711,6 +1724,7 @@ endline = try $ do guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser + notFollowedByDivCloser (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) @@ -1721,23 +1735,25 @@ endline = try $ do -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) -reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets +reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference = do + guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") + guardDisabled Ext_citations <|> notFollowedBy' (string "[@") + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: MarkdownParser [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] parenthesizedChars = do result <- charsInBalanced '(' ')' litChar return $ '(' : result ++ ")" -- source for a link, with optional title -source :: MarkdownParser (String, String) +source :: PandocMonad m => MarkdownParser m (String, String) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> (count 1 litChar)) + <|> (notFollowedBy (oneOf " )") >> count 1 litChar) <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) let sourceURL = (unwords . words . concat) <$> many urlChunk let betweenAngles = try $ @@ -1748,10 +1764,10 @@ source = do char ')' return (escapeURI $ trimr src, tit) -linkTitle :: MarkdownParser String +linkTitle :: PandocMonad m => MarkdownParser m String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: PandocMonad m => MarkdownParser m (F Inlines) link = try $ do st <- getState guard $ stateAllowLinks st @@ -1760,21 +1776,31 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -bracketedSpan :: MarkdownParser (F Inlines) +bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines) bracketedSpan = try $ do guardEnabled Ext_bracketed_spans (lab,_) <- reference attr <- attributes - let (ident,classes,keyvals) = attr - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> lab - _ -> return $ B.spanWith attr <$> lab - -regLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) + return $ if isSmallCaps attr + then B.smallcaps <$> lab + else B.spanWith attr <$> lab + +-- | We treat a span as SmallCaps if class is "smallcaps" (and +-- no other attributes are set or if style is "font-variant:small-caps" +-- (and no other attributes are set). +isSmallCaps :: Attr -> Bool +isSmallCaps ("",["smallcaps"],[]) = True +isSmallCaps ("",[],kvs) = + case lookup "style" kvs of + Just s -> map toLower (filter (`notElem` " \t;") s) == + "font-variant:small-caps" + Nothing -> False +isSmallCaps _ = False + +regLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> F Inlines + -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ @@ -1782,20 +1808,24 @@ regLink constructor lab = try $ do return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) +referenceLink :: PandocMonad m + => (Attr -> String -> String -> Inlines -> Inlines) + -> (F Inlines, String) + -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (_,raw') <- option (mempty, "") $ - lookAhead (try (guardEnabled Ext_citations >> - spnl >> normalCite >> return (mempty, ""))) + lookAhead (try (do guardEnabled Ext_citations + guardDisabled Ext_spaced_reference_links <|> spnl + normalCite + return (mempty, ""))) <|> - try (spnl >> reference) + try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' - parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + parsedRaw <- parseFromString' inlines raw' + fallback <- parseFromString' inlines $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1820,11 +1850,11 @@ referenceLink constructor (lab, raw) = do dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB where dropRB (']':xs) = xs - dropRB xs = xs + dropRB xs = xs dropLB ('[':xs) = xs - dropLB xs = xs + dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks @@ -1832,7 +1862,7 @@ bareURL = try $ do notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink = try $ do getState >>= guard . stateAllowLinks char '<' @@ -1846,7 +1876,7 @@ autoLink = try $ do guardEnabled Ext_link_attributes >> attributes return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference @@ -1856,54 +1886,55 @@ image = try $ do _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: PandocMonad m => MarkdownParser m (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker + updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) } return $ do notes <- asksF stateNotes' - case lookup ref notes of + case M.lookup ref notes of Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do + Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve -- notes, to avoid infinite looping with notes inside -- notes: - let contents' = runF contents st{ stateNotes' = [] } + let contents' = runF contents st{ stateNotes' = M.empty } return $ B.note contents' -inlineNote :: MarkdownParser (F Inlines) +inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets return $ B.note . B.para <$> contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines) rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex + lookAhead $ try $ char '\\' >> letter + notFollowedBy' rawConTeXtEnvironment + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> (many1 letter) - contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar)) + <|> many1 letter + contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1911,14 +1942,11 @@ spanHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents - -divHtml :: MarkdownParser (F Blocks) + return $ if isSmallCaps (ident, classes, keyvals) + then B.smallcaps <$> contents + else B.spanWith (ident, classes, keyvals) <$> contents + +divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1940,7 +1968,29 @@ divHtml = try $ do else -- avoid backtracing return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +divFenced :: PandocMonad m => MarkdownParser m (F Blocks) +divFenced = try $ do + guardEnabled Ext_fenced_divs + string ":::" + skipMany (char ':') + skipMany spaceChar + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + skipMany spaceChar + skipMany (char ':') + blankline + updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } + bs <- mconcat <$> manyTill block divFenceEnd + updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } + return $ B.divWith attribs <$> bs + +divFenceEnd :: PandocMonad m => MarkdownParser m () +divFenceEnd = try $ do + string ":::" + skipMany (char ':') + blanklines + return () + +rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1962,7 +2012,7 @@ rawHtmlInline = do emojiChars :: [Char] emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] -emoji :: MarkdownParser (F Inlines) +emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' @@ -1974,21 +2024,22 @@ emoji = try $ do -- Citations -cite :: MarkdownParser (F Inlines) +cite :: PandocMonad m => MarkdownParser m (F Inlines) cite = do guardEnabled Ext_citations - citations <- textualCite + textualCite <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs - return citations + return $ flip B.cite (B.text raw) <$> cs -textualCite :: MarkdownParser (F Inlines) +textualCite :: PandocMonad m => MarkdownParser m (F Inlines) textualCite = try $ do - (_, key) <- citeKey + (suppressAuthor, key) <- citeKey let first = Citation{ citationId = key , citationPrefix = [] , citationSuffix = [] - , citationMode = AuthorInText + , citationMode = if suppressAuthor + then SuppressAuthor + else AuthorInText , citationNoteNum = 0 , citationHash = 0 } @@ -2003,7 +2054,7 @@ textualCite = try $ do let (spaces',raw') = span isSpace raw spc | null spaces' = mempty | otherwise = B.space - lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' + lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback @@ -2017,7 +2068,7 @@ textualCite = try $ do Just n -> B.str (show n) _ -> B.cite [first] $ B.str $ '@':key) -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do spnl char '[' @@ -2032,7 +2083,7 @@ bareloc c = try $ do rest' <- rest return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: MarkdownParser (F [Citation]) +normalCite :: PandocMonad m => MarkdownParser m (F [Citation]) normalCite = try $ do char '[' spnl @@ -2041,7 +2092,7 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: PandocMonad m => MarkdownParser m (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl @@ -2050,14 +2101,14 @@ suffix = try $ do then (B.space <>) <$> rest else rest -prefix :: MarkdownParser (F Inlines) +prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) + manyTill inline (char ']' <|> fmap (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) +citeList :: PandocMonad m => MarkdownParser m (F [Citation]) citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: PandocMonad m => MarkdownParser m (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -2065,23 +2116,23 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser (F Inlines) + return Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: PandocMonad m => MarkdownParser m (F Inlines) smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice (map (return <$>) [apostrophe, dash, ellipses]) -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ @@ -2091,10 +2142,10 @@ singleQuoted = try $ do -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return + withQuoteContext InDoubleQuote (doubleQuoteEnd >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + <|> return (return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0dea22c53..c19ef2f46 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -36,43 +38,50 @@ _ parse templates? -} module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where -import Text.Pandoc.Definition -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import Data.Monoid ((<>)) -import Text.Pandoc.Options -import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) -import Text.Pandoc.XML ( fromEntities ) -import Text.Pandoc.Parsing hiding ( nested ) -import Text.Pandoc.Walk ( walk ) -import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) import Control.Monad -import Data.List (intersperse, intercalate, isPrefixOf ) -import Text.HTML.TagSoup -import Data.Sequence (viewl, ViewL(..), (<|)) +import Control.Monad.Except (throwError) +import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F +import Data.List (intercalate, intersperse, isPrefixOf) import qualified Data.Map as M +import Data.Maybe (fromMaybe, maybeToList) +import Data.Monoid ((<>)) +import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Char (isDigit, isSpace) -import Data.Maybe (fromMaybe) -import Text.Printf (printf) -import Debug.Trace (trace) - -import Text.Pandoc.Error +import Data.Text (Text, unpack) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (nested) +import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) +import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, + trim) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readMediaWiki opts s = - readWith parseMediaWiki MWState{ mwOptions = opts - , mwMaxNestingLevel = 4 - , mwNextLinkNumber = 1 - , mwCategoryLinks = [] - , mwHeaderMap = M.empty - , mwIdentifierList = Set.empty - } - (s ++ "\n") +readMediaWiki :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readMediaWiki opts s = do + parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts + , mwMaxNestingLevel = 4 + , mwNextLinkNumber = 1 + , mwCategoryLinks = [] + , mwHeaderMap = M.empty + , mwIdentifierList = Set.empty + , mwLogMessages = [] + , mwInTT = False + } + (unpack (crFilter s) ++ "\n") + case parsed of + Right result -> return result + Left e -> throwError e data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int @@ -80,9 +89,11 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwCategoryLinks :: [Inlines] , mwHeaderMap :: M.Map Inlines String , mwIdentifierList :: Set.Set String + , mwLogMessages :: [LogMessage] + , mwInTT :: Bool } -type MWParser = Parser [Char] MWState +type MWParser m = ParserT [Char] MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -95,13 +106,17 @@ instance HasIdentifierList MWState where extractIdentifierList = mwIdentifierList updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st } +instance HasLogMessages MWState where + addLogMessage m s = s{ mwLogMessages = m : mwLogMessages s } + getLogMessages = reverse . mwLogMessages + -- -- auxiliary functions -- -- This is used to prevent exponential blowups for things like: -- ''a'''a''a'''a''a'''a''a'''a -nested :: MWParser a -> MWParser a +nested :: PandocMonad m => MWParser m a -> MWParser m a nested p = do nestlevel <- mwMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -116,7 +131,7 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: String -> MWParser () +sym :: PandocMonad m => String -> MWParser m () sym s = () <$ try (string s) newBlockTags :: [String] @@ -131,16 +146,16 @@ isBlockTag' tag = isBlockTag tag isInlineTag' :: Tag String -> Bool isInlineTag' (TagComment _) = True -isInlineTag' t = not (isBlockTag' t) +isInlineTag' t = not (isBlockTag' t) eitherBlockOrInline :: [String] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] -htmlComment :: MWParser () +htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: String -> MWParser Inlines +inlinesInTags :: PandocMonad m => String -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) if '/' `elem` raw -- self-closing tag @@ -148,7 +163,7 @@ inlinesInTags tag = try $ do else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: String -> MWParser Blocks +blocksInTags :: PandocMonad m => String -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" @@ -162,7 +177,7 @@ blocksInTags tag = try $ do then return mempty else mconcat <$> manyTill block closer -charsInTags :: String -> MWParser [Char] +charsInTags :: PandocMonad m => String -> MWParser m [Char] charsInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) if '/' `elem` raw -- self-closing tag @@ -173,7 +188,7 @@ charsInTags tag = try $ do -- main parser -- -parseMediaWiki :: MWParser Pandoc +parseMediaWiki :: PandocMonad m => MWParser m Pandoc parseMediaWiki = do bs <- mconcat <$> many block spaces @@ -182,16 +197,15 @@ parseMediaWiki = do let categories = if null categoryLinks then mempty else B.para $ mconcat $ intersperse B.space categoryLinks + reportLogMessages return $ B.doc $ bs <> categories -- -- block parsers -- -block :: MWParser Blocks +block :: PandocMonad m => MWParser m Blocks block = do - tr <- getOption readerTrace - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> table <|> header @@ -204,28 +218,28 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + trace (take 60 $ show $ B.toList res) return res -para :: MWParser Blocks +para :: PandocMonad m => MWParser m Blocks para = do contents <- trimInlines . mconcat <$> many1 inline if F.all (==Space) contents then return mempty else return $ B.para contents -table :: MWParser Blocks +table :: PandocMonad m => MWParser m Blocks table = do tableStart - styles <- option [] parseAttrs <* blankline + styles <- option [] parseAttrs + skipMany spaceChar + optional blanklines let tableWidth = case lookup "width" styles of Just w -> fromMaybe 1.0 $ parseWidth w Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep - hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!')) + hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!') (cellspecs',hdr) <- unzip <$> tableRow let widths = map ((tableWidth *) . snd) cellspecs' let restwidth = tableWidth - sum widths @@ -244,10 +258,10 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: MWParser [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(String,String)] parseAttrs = many1 parseAttr -parseAttr :: MWParser (String, String) +parseAttr :: PandocMonad m => MWParser m (String, String) parseAttr = try $ do skipMany spaceChar k <- many1 letter @@ -256,27 +270,23 @@ parseAttr = try $ do <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) -tableStart :: MWParser () +tableStart :: PandocMonad m => MWParser m () tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|" -tableEnd :: MWParser () +tableEnd :: PandocMonad m => MWParser m () tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}" -rowsep :: MWParser () +rowsep :: PandocMonad m => MWParser m () rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <* - optional parseAttr <* blanklines - -cellsep :: MWParser () -cellsep = try $ - (guardColumnOne *> skipSpaces <* - ( (char '|' <* notFollowedBy (oneOf "-}+")) - <|> (char '!') - ) - ) - <|> (() <$ try (string "||")) - <|> (() <$ try (string "!!")) - -tableCaption :: MWParser Inlines + many (char '-') <* optional parseAttr <* blanklines + +cellsep :: PandocMonad m => MWParser m () +cellsep = try $ do + skipSpaces + (char '|' *> notFollowedBy (oneOf "-}+") *> optional (char '|')) + <|> (char '!' *> optional (char '!')) + +tableCaption :: PandocMonad m => MWParser m Inlines tableCaption = try $ do guardColumnOne skipSpaces @@ -284,19 +294,21 @@ tableCaption = try $ do optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces) (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) -tableRow :: MWParser [((Alignment, Double), Blocks)] +tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] tableRow = try $ skipMany htmlComment *> many tableCell -tableCell :: MWParser ((Alignment, Double), Blocks) +tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) tableCell = try $ do cellsep skipMany spaceChar attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* notFollowedBy (char '|') skipMany spaceChar + pos' <- getPosition ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> ((snd <$> withRaw table) <|> count 1 anyChar)) - bs <- parseFromString (mconcat <$> many block) ls + bs <- parseFromString (do setPosition pos' + mconcat <$> many block) ls let align = case lookup "align" attrs of Just "left" -> AlignLeft Just "right" -> AlignRight @@ -311,9 +323,9 @@ parseWidth :: String -> Maybe Double parseWidth s = case reverse s of ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + _ -> Nothing -template :: MWParser String +template :: PandocMonad m => MWParser m String template = try $ do string "{{" notFollowedBy (char '{') @@ -322,7 +334,7 @@ template = try $ do contents <- manyTill chunk (try $ string "}}") return $ "{{" ++ concat contents ++ "}}" -blockTag :: MWParser Blocks +blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of @@ -341,27 +353,27 @@ trimCode :: String -> String trimCode ('\n':xs) = stripTrailingNewlines xs trimCode xs = stripTrailingNewlines xs -syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks +syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs - let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline + let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart contents <- charsInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents -hrule :: MWParser Blocks +hrule :: PandocMonad m => MWParser m Blocks hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) -guardColumnOne :: MWParser () +guardColumnOne :: PandocMonad m => MWParser m () guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) -preformatted :: MWParser Blocks +preformatted :: PandocMonad m => MWParser m Blocks preformatted = try $ do guardColumnOne char ' ' - let endline' = B.linebreak <$ (try $ newline <* char ' ') + let endline' = B.linebreak <$ try (newline <* char ' ') let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x @@ -370,7 +382,7 @@ preformatted = try $ do (htmlTag (~== TagOpen "nowiki" []) *> manyTill anyChar (htmlTag (~== TagClose "nowiki"))) let inline' = whitespace' <|> endline' <|> nowiki' - <|> (try $ notFollowedBy newline *> inline) + <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' let spacesStr (Str xs) = all isSpace xs spacesStr _ = False @@ -385,10 +397,10 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ (Code a1 (x ++ y)) : zs + normalizeCode $ Code a1 (x ++ y) : zs normalizeCode (x:xs) = x : normalizeCode xs -header :: MWParser Blocks +header :: PandocMonad m => MWParser m Blocks header = try $ do guardColumnOne eqs <- many1 (char '=') @@ -398,13 +410,13 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr lev contents -bulletList :: MWParser Blocks +bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* optional (htmlTag (~== TagClose "ul"))) ) -orderedList :: MWParser Blocks +orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try @@ -415,10 +427,10 @@ orderedList = let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) -definitionList :: MWParser Blocks +definitionList :: PandocMonad m => MWParser m Blocks definitionList = B.definitionList <$> many1 defListItem -defListItem :: MWParser (Inlines, [Blocks]) +defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks]) defListItem = try $ do terms <- mconcat . intersperse B.linebreak <$> many defListTerm -- we allow dd with no dt, or dt with no dd @@ -429,44 +441,49 @@ defListItem = try $ do else many (listItem ':') return (terms, defs) -defListTerm :: MWParser Inlines -defListTerm = char ';' >> skipMany spaceChar >> anyLine >>= - parseFromString (trimInlines . mconcat <$> many inline) +defListTerm :: PandocMonad m => MWParser m Inlines +defListTerm = do + guardColumnOne + char ';' + skipMany spaceChar + pos' <- getPosition + anyLine >>= parseFromString (do setPosition pos' + trimInlines . mconcat <$> many inline) -listStart :: Char -> MWParser () +listStart :: PandocMonad m => Char -> MWParser m () listStart c = char c *> notFollowedBy listStartChar -listStartChar :: MWParser Char +listStartChar :: PandocMonad m => MWParser m Char listStartChar = oneOf "*#;:" -anyListStart :: MWParser Char -anyListStart = char '*' - <|> char '#' - <|> char ':' - <|> char ';' +anyListStart :: PandocMonad m => MWParser m Char +anyListStart = guardColumnOne >> oneOf "*#:;" -li :: MWParser Blocks +li :: PandocMonad m => MWParser m Blocks li = lookAhead (htmlTag (~== TagOpen "li" [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces -listItem :: Char -> MWParser Blocks +listItem :: PandocMonad m => Char -> MWParser m Blocks listItem c = try $ do + guardColumnOne extras <- many (try $ char c <* lookAhead listStartChar) if null extras then listItem' c else do skipMany spaceChar + pos' <- getPosition first <- concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> (concat <$> manyTill listChunk newline)) - contents <- parseFromString (many1 $ listItem' c) + contents <- parseFromString (do setPosition pos' + many1 $ listItem' c) (unlines (first : rest)) case c of - '*' -> return $ B.bulletList contents - '#' -> return $ B.orderedList contents - ':' -> return $ B.definitionList [(mempty, contents)] - _ -> mzero + '*' -> return $ B.bulletList contents + '#' -> return $ B.orderedList contents + ':' -> return $ B.definitionList [(mempty, contents)] + _ -> mzero -- The point of this is to handle stuff like -- * {{cite book @@ -475,30 +492,32 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: MWParser String +listChunk :: PandocMonad m => MWParser m String listChunk = template <|> count 1 anyChar -listItem' :: Char -> MWParser Blocks +listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar + pos' <- getPosition first <- concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> (concat <$> manyTill listChunk newline)) - parseFromString (firstParaToPlain . mconcat <$> many1 block) + parseFromString (do setPosition pos' + firstParaToPlain . mconcat <$> many1 block) $ unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = case viewl (B.unMany contents) of - (Para xs) :< ys -> B.Many $ (Plain xs) <| ys - _ -> contents + Para xs :< ys -> B.Many $ Plain xs <| ys + _ -> contents -- -- inline parsers -- -inline :: MWParser Inlines +inline :: PandocMonad m => MWParser m Inlines inline = whitespace <|> url <|> str @@ -516,10 +535,10 @@ inline = whitespace <|> (B.rawInline "mediawiki" <$> template) <|> special -str :: MWParser Inlines +str :: PandocMonad m => MWParser m Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) -math :: MWParser Inlines +math :: PandocMonad m => MWParser m Inlines math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) <|> (B.math . trim <$> charsInTags "math") <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) @@ -529,13 +548,13 @@ math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) mStart = string "\\(" mEnd = try (string "\\)") -variable :: MWParser String +variable :: PandocMonad m => MWParser m String variable = try $ do string "{{{" contents <- manyTill anyChar (try $ string "}}}") return $ "{{{" ++ contents ++ "}}}" -inlineTag :: MWParser Inlines +inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do (tag, _) <- lookAhead $ htmlTag isInlineTag' case tag of @@ -553,22 +572,27 @@ inlineTag = do TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" TagOpen "code" _ -> encode <$> inlinesInTags "code" - TagOpen "tt" _ -> encode <$> inlinesInTags "tt" + TagOpen "tt" _ -> do + inTT <- mwInTT <$> getState + updateState $ \st -> st{ mwInTT = True } + result <- encode <$> inlinesInTags "tt" + updateState $ \st -> st{ mwInTT = inTT } + return result TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) -special :: MWParser Inlines +special :: PandocMonad m => MWParser m Inlines special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> oneOf specialChars) -inlineHtml :: MWParser Inlines +inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' -whitespace :: MWParser Inlines +whitespace :: PandocMonad m => MWParser m Inlines whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment) <|> B.softbreak <$ endline -endline :: MWParser () +endline :: PandocMonad m => MWParser m () endline = () <$ try (newline <* notFollowedBy spaceChar <* notFollowedBy newline <* @@ -577,30 +601,30 @@ endline = () <$ try (newline <* notFollowedBy' header <* notFollowedBy anyListStart) -imageIdentifiers :: [MWParser ()] +imageIdentifiers :: PandocMonad m => [MWParser m ()] imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] -image :: MWParser Inlines +image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers fname <- addUnderscores <$> many1 (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px") + dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of - w:[] -> [("width", w)] - w:(h:[]) -> [("width", w), ("height", h)] - _ -> [] + [w] -> [("width", w)] + [w, h] -> [("width", w), ("height", h)] + _ -> [] let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption -imageOption :: MWParser String +imageOption :: PandocMonad m => MWParser m String imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -612,14 +636,14 @@ imageOption = try $ char '|' *> opt <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) collapseUnderscores :: String -> String -collapseUnderscores [] = [] +collapseUnderscores [] = [] collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs +collapseUnderscores (x:xs) = x : collapseUnderscores xs addUnderscores :: String -> String addUnderscores = collapseUnderscores . intercalate "_" . words -internalLink :: MWParser Inlines +internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" pagename <- unwords . words <$> many (noneOf "|]") @@ -627,7 +651,7 @@ internalLink = try $ do ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) sym "]]" linktrail <- B.text <$> many letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) @@ -637,7 +661,7 @@ internalLink = try $ do return mempty else return link -externalLink :: MWParser Inlines +externalLink :: PandocMonad m => MWParser m Inlines externalLink = try $ do char '[' (_, src) <- uri @@ -649,29 +673,33 @@ externalLink = try $ do return $ B.str $ show num return $ B.link src "" lab -url :: MWParser Inlines +url :: PandocMonad m => MWParser m Inlines url = do (orig, src) <- uri return $ B.link src "" (B.str orig) -- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines +inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines inlinesBetween start end = (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -emph :: MWParser Inlines +emph :: PandocMonad m => MWParser m Inlines emph = B.emph <$> nested (inlinesBetween start end) where start = sym "''" >> lookAhead nonspaceChar end = try $ notFollowedBy' (() <$ strong) >> sym "''" -strong :: MWParser Inlines +strong :: PandocMonad m => MWParser m Inlines strong = B.strong <$> nested (inlinesBetween start end) where start = sym "'''" >> lookAhead nonspaceChar end = try $ sym "'''" -doubleQuotes :: MWParser Inlines -doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) +doubleQuotes :: PandocMonad m => MWParser m Inlines +doubleQuotes = do + guardEnabled Ext_smart + inTT <- mwInTT <$> getState + guard (not inTT) + B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs new file mode 100644 index 000000000..4a9523e84 --- /dev/null +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -0,0 +1,924 @@ +{-# LANGUAGE FlexibleContexts #-} +{- + Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Muse + Copyright : Copyright (C) 2017-2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : alpha + Portability : portable + +Conversion of Muse text to 'Pandoc' document. +-} +{- +TODO: +- Page breaks (five "*") +- Org tables +- table.el tables +- Images with attributes (floating and width) +- Citations and <biblio> +- <play> environment +-} +module Text.Pandoc.Readers.Muse (readMuse) where + +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isLetter) +import Data.Default +import Data.List (stripPrefix, intercalate) +import Data.List.Split (splitOn) +import qualified Data.Map as M +import qualified Data.Set as Set +import Data.Maybe (fromMaybe, isNothing) +import Data.Text (Text, unpack) +import System.FilePath (takeExtension) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (F) +import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.Shared (crFilter, underlineSpan) + +-- | Read Muse from an input string and return a Pandoc document. +readMuse :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readMuse opts s = do + res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s)) + case res of + Left e -> throwError e + Right d -> return d + +type F = Future MuseState + +data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata + , museOptions :: ReaderOptions + , museHeaders :: M.Map Inlines String -- ^ List of headers and ids (used for implicit ref links) + , museIdentifierList :: Set.Set String + , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed + , museLogMessages :: [LogMessage] + , museNotes :: M.Map String (SourcePos, F Blocks) + , museInLink :: Bool + , museInPara :: Bool + } + +instance Default MuseState where + def = defaultMuseState + +defaultMuseState :: MuseState +defaultMuseState = MuseState { museMeta = return nullMeta + , museOptions = def + , museHeaders = M.empty + , museIdentifierList = Set.empty + , museLastStrPos = Nothing + , museLogMessages = [] + , museNotes = M.empty + , museInLink = False + , museInPara = False + } + +type MuseParser = ParserT String MuseState + +instance HasReaderOptions MuseState where + extractReaderOptions = museOptions + +instance HasHeaderMap MuseState where + extractHeaderMap = museHeaders + updateHeaderMap f st = st{ museHeaders = f $ museHeaders st } + +instance HasIdentifierList MuseState where + extractIdentifierList = museIdentifierList + updateIdentifierList f st = st{ museIdentifierList = f $ museIdentifierList st } + +instance HasLastStrPosition MuseState where + setLastStrPos pos st = st{ museLastStrPos = Just pos } + getLastStrPos st = museLastStrPos st + +instance HasLogMessages MuseState where + addLogMessage m s = s{ museLogMessages = m : museLogMessages s } + getLogMessages = reverse . museLogMessages + +-- +-- main parser +-- + +parseMuse :: PandocMonad m => MuseParser m Pandoc +parseMuse = do + many directive + blocks <- parseBlocks + st <- getState + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- museMeta st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +-- +-- utility functions +-- + +eol :: Stream s m Char => ParserT s st m () +eol = void newline <|> eof + +htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlElement tag = try $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar endtag + return (htmlAttrToPandoc attr, content) + where + endtag = void $ htmlTag (~== TagClose tag) + +htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlBlock tag = try $ do + many spaceChar + res <- htmlElement tag + manyTill spaceChar eol + return res + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContent :: PandocMonad m + => String -> MuseParser m (Attr, F Blocks) +parseHtmlContent tag = try $ do + many spaceChar + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + manyTill spaceChar eol + content <- parseBlocksTill (try $ manyTill spaceChar endtag) + manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline + return (htmlAttrToPandoc attr, content) + where + endtag = void $ htmlTag (~== TagClose tag) + +commonPrefix :: String -> String -> String +commonPrefix _ [] = [] +commonPrefix [] _ = [] +commonPrefix (x:xs) (y:ys) + | x == y = x : commonPrefix xs ys + | otherwise = [] + +atStart :: PandocMonad m => MuseParser m a -> MuseParser m a +atStart p = do + pos <- getPosition + st <- getState + guard $ museLastStrPos st /= Just pos + p + +someUntil :: (Stream s m t) + => ParserT s u m a + -> ParserT s u m b + -> ParserT s u m ([a], b) +someUntil p end = do + first <- p + (rest, e) <- manyUntil p end + return (first:rest, e) + +-- +-- directive parsers +-- + +-- While not documented, Emacs Muse allows "-" in directive name +parseDirectiveKey :: PandocMonad m => MuseParser m String +parseDirectiveKey = do + char '#' + many (letter <|> char '-') + +parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective = do + key <- parseDirectiveKey + spaceChar + value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol + return (key, value) + +parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective = do + key <- parseDirectiveKey + many1 spaceChar + value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective + many blankline + return (key, value) + where + endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey)) + +directive :: PandocMonad m => MuseParser m () +directive = do + ext <- getOption readerExtensions + (key, value) <- if extensionEnabled Ext_amuse ext then parseAmuseDirective else parseEmacsDirective + updateState $ \st -> st { museMeta = B.setMeta (translateKey key) <$> value <*> museMeta st } + where translateKey "cover" = "cover-image" + translateKey x = x + +-- +-- block parsers +-- + +parseBlocks :: PandocMonad m + => MuseParser m (F Blocks) +parseBlocks = + try parseEnd <|> + try blockStart <|> + try listStart <|> + try paraStart + where + parseEnd = mempty <$ eof + blockStart = do first <- header <|> blockElements <|> emacsNoteBlock + rest <- parseBlocks + return $ first B.<> rest + listStart = do + updateState (\st -> st { museInPara = False }) + (first, rest) <- anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks + return $ first B.<> rest + paraStart = do + indent <- length <$> many spaceChar + (first, rest) <- paraUntil parseBlocks + let first' = if indent >= 2 && indent < 6 then B.blockQuote <$> first else first + return $ first' B.<> rest + +parseBlocksTill :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks) +parseBlocksTill end = + try parseEnd <|> + try blockStart <|> + try listStart <|> + try paraStart + where + parseEnd = mempty <$ end + blockStart = do first <- blockElements + rest <- continuation + return $ first B.<> rest + listStart = do + updateState (\st -> st { museInPara = False }) + (first, e) <- anyListUntil ((Left <$> end) <|> (Right <$> continuation)) + case e of + Left _ -> return first + Right rest -> return $ first B.<> rest + paraStart = do (first, e) <- paraUntil ((Left <$> end) <|> (Right <$> continuation)) + case e of + Left _ -> return first + Right rest -> return $ first B.<> rest + continuation = parseBlocksTill end + +listItemContentsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m a + -> MuseParser m (F Blocks, a) +listItemContentsUntil col pre end = + try blockStart <|> + try listStart <|> + try paraStart + where + parsePre = do e <- pre + return (mempty, e) + parseEnd = do e <- end + return (mempty, e) + paraStart = do + (first, e) <- paraUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end)) + case e of + Left ee -> return (first, ee) + Right (rest, ee) -> return (first B.<> rest, ee) + blockStart = do first <- blockElements + (rest, e) <- parsePre <|> continuation <|> parseEnd + return (first B.<> rest, e) + listStart = do + updateState (\st -> st { museInPara = False }) + (first, e) <- anyListUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end)) + case e of + Left ee -> return (first, ee) + Right (rest, ee) -> return (first B.<> rest, ee) + continuation = try $ do blank <- optionMaybe blankline + skipMany blankline + indentWith col + updateState (\st -> st { museInPara = museInPara st && isNothing blank }) + listItemContentsUntil col pre end + +parseBlock :: PandocMonad m => MuseParser m (F Blocks) +parseBlock = do + res <- blockElements <|> para + trace (take 60 $ show $ B.toList $ runF res def) + return res + where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) + +blockElements :: PandocMonad m => MuseParser m (F Blocks) +blockElements = do + updateState (\st -> st { museInPara = False }) + choice [ mempty <$ blankline + , comment + , separator + , example + , exampleTag + , literalTag + , centerTag + , rightTag + , quoteTag + , divTag + , verseTag + , lineBlock + , table + , commentTag + ] + +comment :: PandocMonad m => MuseParser m (F Blocks) +comment = try $ do + char ';' + optional (spaceChar >> many (noneOf "\n")) + eol + return mempty + +separator :: PandocMonad m => MuseParser m (F Blocks) +separator = try $ do + string "----" + many $ char '-' + many spaceChar + eol + return $ return B.horizontalRule + +header :: PandocMonad m => MuseParser m (F Blocks) +header = try $ do + getPosition >>= \pos -> guard (sourceColumn pos == 1) + level <- fmap length $ many1 $ char '*' + guard $ level <= 5 + spaceChar + content <- trimInlinesF . mconcat <$> manyTill inline eol + anchorId <- option "" parseAnchor + attr <- registerHeader (anchorId, [], []) (runF content def) + return $ B.headerWith attr level <$> content + +example :: PandocMonad m => MuseParser m (F Blocks) +example = try $ do + string "{{{" + optional blankline + contents <- manyTill anyChar $ try (optional blankline >> string "}}}") + return $ return $ B.codeBlock contents + +-- Trim up to one newline from the beginning of the string. +lchop :: String -> String +lchop s = case s of + '\n':ss -> ss + _ -> s + +-- Trim up to one newline from the end of the string. +rchop :: String -> String +rchop = reverse . lchop . reverse + +dropSpacePrefix :: [String] -> [String] +dropSpacePrefix lns = + map (drop maxIndent) lns + where flns = filter (not . all (== ' ')) lns + maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns + +exampleTag :: PandocMonad m => MuseParser m (F Blocks) +exampleTag = try $ do + (attr, contents) <- htmlBlock "example" + return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents + +literalTag :: PandocMonad m => MuseParser m (F Blocks) +literalTag = + (return . rawBlock) <$> htmlBlock "literal" + where + -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content + +-- <center> tag is ignored +centerTag :: PandocMonad m => MuseParser m (F Blocks) +centerTag = snd <$> parseHtmlContent "center" + +-- <right> tag is ignored +rightTag :: PandocMonad m => MuseParser m (F Blocks) +rightTag = snd <$> parseHtmlContent "right" + +quoteTag :: PandocMonad m => MuseParser m (F Blocks) +quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote" + +-- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025 +divTag :: PandocMonad m => MuseParser m (F Blocks) +divTag = do + (attrs, content) <- parseHtmlContent "div" + return $ B.divWith attrs <$> content + +verseLine :: PandocMonad m => MuseParser m (F Inlines) +verseLine = do + indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty + rest <- manyTill (choice inlineList) newline + return $ trimInlinesF $ mconcat (pure indent : rest) + +verseLines :: PandocMonad m => MuseParser m (F Blocks) +verseLines = do + lns <- many verseLine + return $ B.lineBlock <$> sequence lns + +verseTag :: PandocMonad m => MuseParser m (F Blocks) +verseTag = do + (_, content) <- htmlBlock "verse" + parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content) + +commentTag :: PandocMonad m => MuseParser m (F Blocks) +commentTag = htmlBlock "comment" >> return mempty + +-- Indented paragraph is either center, right or quote +paraUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +paraUntil end = do + state <- getState + guard $ not $ museInPara state + setState $ state{ museInPara = True } + (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) + updateState (\st -> st { museInPara = False }) + return (fmap B.para $ trimInlinesF $ mconcat l, e) + +noteMarker :: PandocMonad m => MuseParser m String +noteMarker = try $ do + char '[' + first <- oneOf "123456789" + rest <- manyTill digit (char ']') + return $ first:rest + +-- Amusewiki version of note +-- Parsing is similar to list item, except that note marker is used instead of list marker +amuseNoteBlockUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +amuseNoteBlockUntil end = try $ do + guardEnabled Ext_amuse + ref <- noteMarker <* spaceChar + pos <- getPosition + updateState (\st -> st { museInPara = False }) + (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end + oldnotes <- museNotes <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + return (mempty, e) + +-- Emacs version of note +-- Notes are allowed only at the end of text, no indentation is required. +emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks) +emacsNoteBlock = try $ do + guardDisabled Ext_amuse + pos <- getPosition + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillNote + oldnotes <- museNotes <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + return mempty + where + blocksTillNote = + many1Till parseBlock (eof <|> () <$ lookAhead noteMarker) + +-- +-- Verse markup +-- + +lineVerseLine :: PandocMonad m => MuseParser m (F Inlines) +lineVerseLine = try $ do + string "> " + indent <- B.str <$> many (char ' ' >> pure '\160') + rest <- manyTill (choice inlineList) eol + return $ trimInlinesF $ mconcat (pure indent : rest) + +blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines) +blanklineVerseLine = try $ do + char '>' + blankline + pure mempty + +lineBlock :: PandocMonad m => MuseParser m (F Blocks) +lineBlock = try $ do + col <- sourceColumn <$> getPosition + lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1)) + return $ B.lineBlock <$> sequence lns + +-- +-- lists +-- + +bulletListItemsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +bulletListItemsUntil indent end = try $ do + char '-' + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil (indent + 2) (Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (Left <$> end) + case e of + Left ee -> return ([x], ee) + Right (xs, ee) -> return (x:xs, ee) + +bulletListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +bulletListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guard $ indent /= 0 + (items, e) <- bulletListItemsUntil indent end + return (B.bulletList <$> sequence items, e) + +-- | Parses an ordered list marker and returns list attributes. +anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes +anyMuseOrderedListMarker = do + (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha + char '.' + return (start, style, Period) + +museOrderedListMarker :: PandocMonad m + => ListNumberStyle + -> MuseParser m Int +museOrderedListMarker style = do + (_, start) <- case style of + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + _ -> fail "Unhandled case" + char '.' + return start + +orderedListItemsUntil :: PandocMonad m + => Int + -> ListNumberStyle + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +orderedListItemsUntil indent style end = + continuation + where + continuation = try $ do + pos <- getPosition + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil (sourceColumn pos) (Right <$> try (optionMaybe blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (Left <$> end) + case e of + Left ee -> return ([x], ee) + Right (xs, ee) -> return (x:xs, ee) + +orderedListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +orderedListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guard $ indent /= 0 + p@(_, style, _) <- anyMuseOrderedListMarker + guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] + (items, e) <- orderedListItemsUntil indent style end + return (B.orderedListWith p <$> sequence items, e) + +descriptionsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +descriptionsUntil indent end = do + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil indent (Right <$> try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (Left <$> end) + case e of + Right (xs, ee) -> return (x:xs, ee) + Left ee -> return ([x], ee) + +definitionListItemsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F (Inlines, [Blocks])], a) +definitionListItemsUntil indent end = + continuation + where + continuation = try $ do + pos <- getPosition + term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::") + (x, e) <- descriptionsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> indentWith indent >> continuation)) <|> (Left <$> end)) + let xx = do + term' <- term + x' <- sequence x + return (term', x') + case e of + Left ee -> return ([xx], ee) + Right (xs, ee) -> return (xx:xs, ee) + +definitionListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +definitionListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse + (items, e) <- definitionListItemsUntil indent end + return (B.definitionList <$> sequence items, e) + +anyListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +anyListUntil end = + bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end + +-- +-- tables +-- + +data MuseTable = MuseTable + { museTableCaption :: Inlines + , museTableHeaders :: [[Blocks]] + , museTableRows :: [[Blocks]] + , museTableFooters :: [[Blocks]] + } + +data MuseTableElement = MuseHeaderRow (F [Blocks]) + | MuseBodyRow (F [Blocks]) + | MuseFooterRow (F [Blocks]) + | MuseCaption (F Inlines) + +museToPandocTable :: MuseTable -> Blocks +museToPandocTable (MuseTable caption headers body footers) = + B.table caption attrs headRow rows + where ncol = maximum (0 : map length (headers ++ body ++ footers)) + attrs = replicate ncol (AlignDefault, 0.0) + headRow = if null headers then [] else head headers + rows = (if null headers then [] else tail headers) ++ body ++ footers + +museAppendElement :: MuseTable + -> MuseTableElement + -> F MuseTable +museAppendElement tbl element = + case element of + MuseHeaderRow row -> do + row' <- row + return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } + MuseBodyRow row -> do + row' <- row + return tbl{ museTableRows = museTableRows tbl ++ [row'] } + MuseFooterRow row-> do + row' <- row + return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } + MuseCaption inlines -> do + inlines' <- inlines + return tbl{ museTableCaption = inlines' } + +tableCell :: PandocMonad m => MuseParser m (F Blocks) +tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) + where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol + +tableElements :: PandocMonad m => MuseParser m [MuseTableElement] +tableElements = tableParseElement `sepEndBy1` eol + +elementsToTable :: [MuseTableElement] -> F MuseTable +elementsToTable = foldM museAppendElement emptyTable + where emptyTable = MuseTable mempty mempty mempty mempty + +table :: PandocMonad m => MuseParser m (F Blocks) +table = try $ do + rows <- tableElements + let tbl = elementsToTable rows + let pandocTbl = museToPandocTable <$> tbl :: F Blocks + return pandocTbl + +tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement = tableParseHeader + <|> tableParseBody + <|> tableParseFooter + <|> tableParseCaption + +tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow n = try $ do + fields <- tableCell `sepBy2` fieldSep + return $ sequence fields + where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) + fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) + +tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement +tableParseHeader = MuseHeaderRow <$> tableParseRow 2 + +tableParseBody :: PandocMonad m => MuseParser m MuseTableElement +tableParseBody = MuseBodyRow <$> tableParseRow 1 + +tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement +tableParseFooter = MuseFooterRow <$> tableParseRow 3 + +tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +tableParseCaption = try $ do + many spaceChar + string "|+" + MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) + +-- +-- inline parsers +-- + +inlineList :: PandocMonad m => [MuseParser m (F Inlines)] +inlineList = [ whitespace + , br + , anchor + , footnote + , strong + , strongTag + , emph + , emphTag + , underlined + , superscriptTag + , subscriptTag + , strikeoutTag + , verbatimTag + , nbsp + , link + , code + , codeTag + , inlineLiteralTag + , str + , symbol + ] + +inline :: PandocMonad m => MuseParser m (F Inlines) +inline = endline <|> choice inlineList <?> "inline" + +endline :: PandocMonad m => MuseParser m (F Inlines) +endline = try $ do + newline + notFollowedBy blankline + returnF B.softbreak + +parseAnchor :: PandocMonad m => MuseParser m String +parseAnchor = try $ do + getPosition >>= \pos -> guard (sourceColumn pos == 1) + char '#' + first <- letter + rest <- many (letter <|> digit) + skipMany spaceChar <|> void newline + return $ first:rest + +anchor :: PandocMonad m => MuseParser m (F Inlines) +anchor = try $ do + anchorId <- parseAnchor + return $ return $ B.spanWith (anchorId, [], []) mempty + +footnote :: PandocMonad m => MuseParser m (F Inlines) +footnote = try $ do + ref <- noteMarker + return $ do + notes <- asksF museNotes + case M.lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just (_pos, contents) -> do + st <- askF + let contents' = runF contents st { museNotes = M.empty } + return $ B.note contents' + +whitespace :: PandocMonad m => MuseParser m (F Inlines) +whitespace = try $ do + skipMany1 spaceChar + return $ return B.space + +br :: PandocMonad m => MuseParser m (F Inlines) +br = try $ do + string "<br>" + return $ return B.linebreak + +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) +emphasisBetween c = try $ enclosedInlines c c + +enclosedInlines :: (PandocMonad m, Show a, Show b) + => MuseParser m a + -> MuseParser m b + -> MuseParser m (F Inlines) +enclosedInlines start end = try $ + trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter)) + +inlineTag :: PandocMonad m + => (Inlines -> Inlines) + -> String + -> MuseParser m (F Inlines) +inlineTag f tag = try $ do + htmlTag (~== TagOpen tag []) + res <- manyTill inline (void $ htmlTag (~== TagClose tag)) + return $ f <$> mconcat res + +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = inlineTag B.strong "strong" + +strong :: PandocMonad m => MuseParser m (F Inlines) +strong = fmap B.strong <$> emphasisBetween (string "**") + +emph :: PandocMonad m => MuseParser m (F Inlines) +emph = fmap B.emph <$> emphasisBetween (char '*') + +underlined :: PandocMonad m => MuseParser m (F Inlines) +underlined = do + guardDisabled Ext_amuse -- Supported only by Emacs Muse + fmap underlineSpan <$> emphasisBetween (char '_') + +emphTag :: PandocMonad m => MuseParser m (F Inlines) +emphTag = inlineTag B.emph "em" + +superscriptTag :: PandocMonad m => MuseParser m (F Inlines) +superscriptTag = inlineTag B.superscript "sup" + +subscriptTag :: PandocMonad m => MuseParser m (F Inlines) +subscriptTag = inlineTag B.subscript "sub" + +strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) +strikeoutTag = inlineTag B.strikeout "del" + +verbatimTag :: PandocMonad m => MuseParser m (F Inlines) +verbatimTag = return . B.text . snd <$> htmlElement "verbatim" + +nbsp :: PandocMonad m => MuseParser m (F Inlines) +nbsp = try $ do + string "~~" + return $ return $ B.str "\160" + +code :: PandocMonad m => MuseParser m (F Inlines) +code = try $ do + atStart $ char '=' + contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '=' + guard $ not $ null contents + guard $ head contents `notElem` " \t\n" + guard $ last contents `notElem` " \t\n" + notFollowedBy $ satisfy isLetter + return $ return $ B.code contents + +codeTag :: PandocMonad m => MuseParser m (F Inlines) +codeTag = do + (attrs, content) <- htmlElement "code" + return $ return $ B.codeWith attrs content + +inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) +inlineLiteralTag = + (return . rawInline) <$> htmlElement "literal" + where + -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs + rawInline (attrs, content) = B.rawInline (format attrs) content + +str :: PandocMonad m => MuseParser m (F Inlines) +str = do + result <- many1 alphaNum + updateLastStrPos + return $ return $ B.str result + +symbol :: PandocMonad m => MuseParser m (F Inlines) +symbol = return . B.str <$> count 1 nonspaceChar + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do + st <- getState + guard $ not $ museInLink st + setState $ st{ museInLink = True } + (url, title, content) <- linkText + updateState (\state -> state { museInLink = False }) + return $ case stripPrefix "URL:" url of + Nothing -> if isImageUrl url + then B.image url title <$> fromMaybe (return mempty) content + else B.link url title <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension + +linkContent :: PandocMonad m => MuseParser m (F Inlines) +linkContent = do + char '[' + trimInlinesF . mconcat <$> many1Till inline (string "]") + +linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText = do + string "[[" + url <- many1Till anyChar $ char ']' + content <- optionMaybe linkContent + char ']' + return (url, "", content) diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 4ec164e19..88f6bfe8f 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2015 John MacFarlane + Copyright : Copyright (C) 2011-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,8 +31,12 @@ Conversion of a string representation of a pandoc type (@Pandoc@, module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition +import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (safeRead) +import Control.Monad.Except (throwError) +import Data.Text (Text, unpack) +import Text.Pandoc.Class import Text.Pandoc.Error -- | Read native formatted text and return a Pandoc document. @@ -45,19 +49,23 @@ import Text.Pandoc.Error -- -- > Pandoc nullMeta [Plain [Str "hi"]] -- -readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) +readNative :: PandocMonad m + => ReaderOptions + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readNative _ s = + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "couldn't read native" -readBlocks :: String -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) +readBlocks :: Text -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) -readBlock :: String -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) +readBlock :: Text -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) -readInlines :: String -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) - -readInline :: String -> Either PandocError Inline -readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) +readInlines :: Text -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) +readInline :: Text -> Either PandocError Inline +readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 4dcf5e5a0..82266748f 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,26 +1,28 @@ {-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where +import Control.Monad.State.Strict import Data.Char (toUpper) -import Text.Pandoc.Options -import Text.Pandoc.Definition +import Data.Default +import Data.Generics +import Data.Maybe (fromMaybe) +import Data.Text (Text, pack, unpack) +import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) +import Text.Pandoc.Shared (crFilter, blocksToInlines') import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) -import Data.Generics -import Control.Monad.State -import Data.Default -import Control.Monad.Except -import Text.Pandoc.Error -type OPML = ExceptT PandocError (State OPMLState) +type OPML m = StateT OPMLState m data OPMLState = OPMLState{ opmlSectionLevel :: Int , opmlDocTitle :: Inlines , opmlDocAuthors :: [Inlines] , opmlDocDate :: Inlines + , opmlOptions :: ReaderOptions } deriving Show instance Default OPMLState where @@ -28,14 +30,19 @@ instance Default OPMLState where , opmlDocTitle = mempty , opmlDocAuthors = [] , opmlDocDate = mempty - } + , opmlOptions = def + } -readOPML :: ReaderOptions -> String -> Either PandocError Pandoc -readOPML _ inp = setTitle (opmlDocTitle st') - . setAuthors (opmlDocAuthors st') - . setDate (opmlDocDate st') - . doc . mconcat <$> bs - where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) +readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readOPML opts inp = do + (bs, st') <- runStateT + (mapM parseBlock $ normalizeTree $ + parseXML (unpack (crFilter inp))) def{ opmlOptions = opts } + return $ + setTitle (opmlDocTitle st') $ + setAuthors (opmlDocAuthors st') $ + setDate (opmlDocDate st') $ + doc $ mconcat bs -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -53,30 +60,32 @@ normalizeTree = everywhere (mkT go) go xs = xs convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" attrValue :: String -> Element -> String attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -exceptT :: Either PandocError a -> OPML a -exceptT = either throwError return +-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a +-- exceptT = either throwError return -asHtml :: String -> OPML Inlines -asHtml s = (\(Pandoc _ bs) -> case bs of - [Plain ils] -> fromList ils - _ -> mempty) <$> exceptT (readHtml def s) +asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml s = do + opts <- gets opmlOptions + Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s) + return $ blocksToInlines' bs -asMarkdown :: String -> OPML Blocks -asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) +asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown s = do + opts <- gets opmlOptions + Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s) + return $ fromList bs -getBlocks :: Element -> OPML Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) +getBlocks :: PandocMonad m => Element -> OPML m Blocks +getBlocks e = mconcat <$> mapM parseBlock (elContent e) -parseBlock :: Content -> OPML Blocks +parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 046fb4d6d..875c18a85 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -32,31 +32,45 @@ Entry point to the odt reader. module Text.Pandoc.Readers.Odt ( readOdt ) where -import Codec.Archive.Zip -import qualified Text.XML.Light as XML +import Codec.Archive.Zip +import qualified Text.XML.Light as XML -import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy as B -import System.FilePath +import System.FilePath -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.Options -import Text.Pandoc.MediaBag -import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad.Except (throwError) -import Text.Pandoc.Readers.Odt.ContentReader -import Text.Pandoc.Readers.Odt.StyleReader +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.MediaBag +import Text.Pandoc.Options +import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Shared (filteredFilesFromArchive) +import Text.Pandoc.Readers.Odt.ContentReader +import Text.Pandoc.Readers.Odt.StyleReader --- -readOdt :: ReaderOptions +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Shared (filteredFilesFromArchive) + +readOdt :: PandocMonad m + => ReaderOptions -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readOdt _ bytes = bytesToOdt bytes-- of + -> m Pandoc +readOdt opts bytes = case readOdt' opts bytes of + Right (doc, mb) -> do + P.setMediaBag mb + return doc + Left e -> throwError e + +-- +readOdt' :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag) +readOdt' _ bytes = bytesToOdt bytes-- of -- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) -- Left err -> Left err @@ -64,7 +78,7 @@ readOdt _ bytes = bytesToOdt bytes-- of bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) bytesToOdt bytes = case toArchiveOrFail bytes of Right archive -> archiveToOdt archive - Left _ -> Left $ ParseFailure "Couldn't parse odt file." + Left _ -> Left $ PandocParseError "Couldn't parse odt file." -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) @@ -85,7 +99,7 @@ archiveToOdt archive | otherwise -- Not very detailed, but I don't think more information would be helpful - = Left $ ParseFailure "Couldn't parse odt file." + = Left $ PandocParseError "Couldn't parse odt file." where filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index b056f1ecc..73bed545e 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE Arrows #-} -{-# LANGUAGE TupleSections #-} + {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TupleSections #-} {- Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> @@ -38,17 +38,17 @@ faster and easier to implement this way. module Text.Pandoc.Readers.Odt.Arrows.State where -import Prelude hiding ( foldr, foldl ) +import Prelude hiding (foldl, foldr) -import qualified Control.Category as Cat -import Control.Arrow -import Control.Monad +import Control.Arrow +import qualified Control.Category as Cat +import Control.Monad -import Data.Foldable -import Data.Monoid +import Data.Foldable +import Data.Monoid -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible newtype ArrowState state a b = ArrowState @@ -59,10 +59,6 @@ withState :: (state -> a -> (state, b)) -> ArrowState state a b withState = ArrowState . uncurry -- | Constructor -withState' :: ((state, a) -> (state, b)) -> ArrowState state a b -withState' = ArrowState - --- | Constructor modifyState :: (state -> state ) -> ArrowState state a a modifyState = ArrowState . first @@ -79,10 +75,6 @@ extractFromState :: (state -> b ) -> ArrowState state x b extractFromState f = ArrowState $ \(state,_) -> (state, f state) -- | Constructor -withUnchangedState :: (state -> a -> b ) -> ArrowState state a b -withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) - --- | Constructor tryModifyState :: (state -> Either f state) -> ArrowState state a (Either f a) tryModifyState f = ArrowState $ \(state,a) @@ -90,7 +82,7 @@ tryModifyState f = ArrowState $ \(state,a) instance Cat.Category (ArrowState s) where id = ArrowState id - arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1) + arrow2 . arrow1 = ArrowState $ runArrowState arrow2 . runArrowState arrow1 instance Arrow (ArrowState state) where arr = ignoringState @@ -107,43 +99,9 @@ instance ArrowChoice (ArrowState state) where Left l -> (s, Left l) Right r -> second Right $ runArrowState a (s,r) -instance ArrowLoop (ArrowState state) where - loop a = ArrowState $ \(s, x) - -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) - in (s', x') - instance ArrowApply (ArrowState state) where app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) - --- | Embedding of a state arrow in a state arrow with a different state type. -switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y -switchState there back a = ArrowState $ first there - >>> runArrowState a - >>> first back - --- | Lift a state arrow to modify the state of an arrow --- with a different state type. -liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x -liftToState unlift a = modifyState $ unlift &&& id - >>> runArrowState a - >>> snd - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like the identity arrow, --- save for side effects in the state. -withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x -withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' -withSubState' unlift a = ArrowState $ runArrowState unlift - >>> switch - >>> runArrowState a - >>> switch - where switch (x,y) = (y,x) - -- | Switches the type of the state temporarily. -- Drops the intermediate result state, behaving like a fallible -- identity arrow, save for side effects in the state. @@ -175,49 +133,13 @@ foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f where a' x (s',m) = second (m <>) $ runArrowState a (s',x) --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f - where a' (s',m) x = second (m <>) $ runArrowState a (s',x) - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldS' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldSL' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ e _ = e - -- | Fold a state arrow through something 'Foldable'. Collect the results in a -- 'MonadPlus'. iterateS :: (Foldable f, MonadPlus m) => ArrowState s x y -> ArrowState s (f x) (m y) iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f - where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x) + where a' x (s',m) = second (mplus m.return) $ runArrowState a (s',x) -- | Fold a state arrow through something 'Foldable'. Collect the results in a -- 'MonadPlus'. @@ -225,7 +147,7 @@ iterateSL :: (Foldable f, MonadPlus m) => ArrowState s x y -> ArrowState s (f x) (m y) iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f - where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x) + where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) -- | Fold a fallible state arrow through something 'Foldable'. @@ -239,15 +161,3 @@ iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f (s'',Right m') -> (s'',Right $ mplus m $ return m') (_ ,Left e ) -> (s ,Left e ) a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateSL' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index 218a85661..ef8b2d18a 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -39,14 +39,11 @@ with an equivalent return value. -- We export everything module Text.Pandoc.Readers.Odt.Arrows.Utils where -import Control.Arrow -import Control.Monad ( join, MonadPlus(..) ) +import Control.Arrow +import Control.Monad (join) -import qualified Data.Foldable as F -import Data.Monoid - -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') and2 = (&&&) @@ -63,12 +60,6 @@ and5 :: (Arrow a) and6 :: (Arrow a) => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 -> a b (c0,c1,c2,c3,c4,c5 ) -and7 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 - -> a b (c0,c1,c2,c3,c4,c5,c6 ) -and8 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 - -> a b (c0,c1,c2,c3,c4,c5,c6,c7) and3 a b c = (and2 a b ) &&& c >>^ \((z,y ) , x) -> (z,y,x ) @@ -78,10 +69,6 @@ and5 a b c d e = (and4 a b c d ) &&& e >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) and6 a b c d e f = (and5 a b c d e ) &&& f >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) -and7 a b c d e f g = (and6 a b c d e f ) &&& g - >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) -and8 a b c d e f g h = (and7 a b c d e f g) &&& h - >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z liftA2 f a b = a &&& b >>^ uncurry f @@ -98,19 +85,11 @@ liftA5 :: (Arrow a) => (z->y->x->w->v -> r) liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) -> a b z->a b y->a b x->a b w->a b v->a b u -> a b r -liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t - -> a b r -liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s - -> a b r liftA3 fun a b c = and3 a b c >>^ uncurry3 fun liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun -liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun -liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun liftA :: (Arrow a) => (y -> z) -> a b y -> a b z liftA fun a = a >>^ fun @@ -124,28 +103,12 @@ liftA fun a = a >>^ fun duplicate :: (Arrow a) => a b (b,b) duplicate = arr $ join (,) --- | Lifts the combination of two values into an arrow. -joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z -joinOn = arr.uncurry - -- | Applies a function to the uncurried result-pair of an arrow-application. -- (The %-symbol was chosen to evoke an association with pairs.) (>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d a >>% f = a >>^ uncurry f --- | '(>>%)' with its arguments flipped -(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(%<<) = flip (>>%) - --- | Precomposition with an uncurried function -(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f %>> a = uncurry f ^>> a - --- | Precomposition with an uncurried function (right to left variant) -(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<%) = flip (%>>) - -infixr 2 >>%, %<<, %>>, <<% +infixr 2 >>% -- | Duplicate a value and apply an arrow to the second instance. @@ -156,56 +119,6 @@ infixr 2 >>%, %<<, %>>, <<% keepingTheValue :: (Arrow a) => a b c -> a b (b,c) keepingTheValue a = returnA &&& a --- | Duplicate a value and apply an arrow to the first instance. --- Aequivalent to --- > \a -> duplicate >>> first a --- or --- > \a -> a &&& returnA -keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) -keepingTheValue' a = a &&& returnA - --- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. --- Actually, it's the more complex '(>=>)', because 'bind' alone does not --- combine as nicely in arrow form. --- The current implementation is not the most efficient one, because it can --- not return directly if a 'Nothing' is encountered. That in turn follows --- from the type system, as 'Nothing' has an "invisible" type parameter that --- can not be dropped early. --- --- Also, there probably is a way to generalize this to other monads --- or applicatives, but I'm leaving that as an exercise to the reader. --- I have a feeling there is a new Arrow-typeclass to be found that is less --- restrictive than 'ArrowApply'. If it is already out there, --- I have not seen it yet. ('ArrowPlus' for example is not general enough.) -(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) -a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join - -infixr 2 >>>= - --- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. --- (But still different from a true bind) -(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) -(>++<) = liftA2 mplus - --- | Left-compose with a pure function -leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) -leftLift = left.arr - --- | Right-compose with a pure function -rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') -rightLift = right.arr - - -( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') -( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') -( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') - -l ^+++ r = leftLift l >>> right r -l +++^ r = left l >>> rightLift r -l ^+++^ r = leftLift l >>> rightLift r - -infixr 2 ^+++, +++^, ^+++^ - ( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d ( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d ( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d @@ -218,33 +131,12 @@ infixr 2 ^||| , |||^, ^|||^ ( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') ( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') -( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') l ^&&& r = arr l &&& r l &&&^ r = l &&& arr r -l ^&&&^ r = arr l &&& arr r - -infixr 3 ^&&&, &&&^, ^&&&^ -( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') -( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') -( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') +infixr 3 ^&&&, &&&^ -l ^*** r = arr l *** r -l ***^ r = l *** arr r -l ^***^ r = arr l *** arr r - -infixr 3 ^***, ***^, ^***^ - --- | A version of --- --- >>> \p -> arr (\x -> if p x the Right x else Left x) --- --- but with p being an arrow -choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) -choose checkValue = keepingTheValue checkValue >>^ select - where select (x,True ) = Right x - select (x,False ) = Left x -- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) @@ -258,130 +150,15 @@ maybeToChoice = arr maybeToEither returnV :: (Arrow a) => c -> a x c returnV = arr.const --- | 'returnA' dropping everything -returnA_ :: (Arrow a) => a _b () -returnA_ = returnV () - --- | Wrapper for an arrow that can be evaluated im parallel. All --- Arrows can be evaluated in parallel, as long as they return a --- monoid. -newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } - deriving (Eq, Ord, Show) - -instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where - mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend - --- | Evaluates a collection of arrows in a parallel fashion. --- --- This is in essence a fold of '(&&&)' over the collection, --- so the actual execution order and parallelity depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- --- This function can be seen as a generalization of --- 'Control.Applicative.sequenceA' to arrows or as an alternative to --- a fold with 'Control.Applicative.WrappedArrow', which --- substitutes the monoid with function application. --- -coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m -coEval = evalParallelArrow . (F.foldMap CoEval) - -- | Defines Left as failure, Right as success type FallibleArrow a input failure success = a input (Either failure success) -type ReFallibleArrow a failure success success' - = FallibleArrow a (Either failure success) failure success' - --- | Wrapper for fallible arrows. Fallible arrows are all arrows that return --- an Either value where left is a faliure and right is a success value. -newtype AlternativeArrow a input failure success - = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } - - -instance (ArrowChoice a, Monoid failure) - => Monoid (AlternativeArrow a input failure success) where - mempty = TryArrow $ returnV $ Left mempty - (TryArrow a) `mappend` (TryArrow b) - = TryArrow $ a &&& b - >>^ \(a',~b') - -> ( (\a'' -> left (mappend a'') b') ||| Right ) - a' - --- | Evaluates a collection of fallible arrows, trying each one in succession. --- Left values are interpreted as failures, right values as successes. --- --- The evaluation is stopped once an arrow succeeds. --- Up to that point, all failures are collected in the failure-monoid. --- Note that '()' is a monoid, and thus can serve as a failure-collector if --- you are uninterested in the exact failures. --- --- This is in essence a fold of '(&&&)' over the collection, enhanced with a --- little bit of repackaging, so the actual execution order depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- -tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) - => f (FallibleArrow a b failure success) - -> FallibleArrow a b failure success -tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) - --- -liftSuccess :: (ArrowChoice a) - => (success -> success') - -> ReFallibleArrow a failure success success' -liftSuccess = rightLift - -- liftAsSuccess :: (ArrowChoice a) => a x success -> FallibleArrow a x failure success liftAsSuccess a = a >>^ Right --- -asFallibleArrow :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -asFallibleArrow a = a >>^ Right - --- | Raises an error into a 'ReFallibleArrow' if the arrow is already in --- "error mode" -liftError :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -liftError e = leftLift (e <>) - --- | Raises an error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseA :: (ArrowChoice a) - => failure - -> FallibleArrow a x failure success -_raiseA e = returnV (Left e) - --- | Raises an empty error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseAEmpty :: (ArrowChoice a, Monoid failure) - => FallibleArrow a x failure success -_raiseAEmpty = _raiseA mempty - --- | Raises an error into a 'ReFallibleArrow', possibly appending the new error --- to an existing one -raiseA :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -raiseA e = arr $ Left.(either (<> e) (const e)) - --- | Raises an empty error into a 'ReFallibleArrow'. If there already is an --- error, nothing changes. --- (Note that this function is only aequivalent to @raiseA mempty@ iff the --- failure monoid follows the monoid laws.) -raiseAEmpty :: (ArrowChoice a, Monoid failure) - => ReFallibleArrow a failure success success -raiseAEmpty = arr (fromRight (const mempty) >>> Left) - - -- | Execute the second arrow if the first succeeds (>>?) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -410,20 +187,6 @@ a >>?^? b = a >>> Left ^|||^ b -> FallibleArrow a x failure success' a ^>>? b = a ^>> Left ^||| b --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> success') - -> FallibleArrow a x failure success' -a ^>>?^ f = arr $ a >>> right f - --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^?) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a ^>>?^? f = a ^>> Left ^|||^ f - -- | Execute the second, non-fallible arrow if the first arrow succeeds (>>?!) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -448,38 +211,14 @@ a ^>>?% f = arr a >>?^ (uncurry f) --- (>>?%?) :: (ArrowChoice a) => FallibleArrow a x f (b,b') - -> (b -> b' -> (Either f c)) + -> (b -> b' -> Either f c) -> FallibleArrow a x f c -a >>?%? f = a >>?^? (uncurry f) +a >>?%? f = a >>?^? uncurry f infixr 1 >>?, >>?^, >>?^? -infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! +infixr 1 ^>>?, >>?! infixr 1 >>?%, ^>>?%, >>?%? --- | Keep values that are Right, replace Left values by a constant. -ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v -ifFailedUse v = arr $ either (const v) id - --- | '(&&)' lifted into an arrow -(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<&&>) = liftA2 (&&) - --- | '(||)' lifted into an arrow -(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<||>) = liftA2 (||) - --- | An equivalent of '(&&)' in a fallible arrow -(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s - -> FallibleArrow a x f s' - -> FallibleArrow a x f (s,s') -(>&&<) = liftA2 chooseMin - --- | An equivalent of '(||)' in some forms of fallible arrows -(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s - -> FallibleArrow a x f s - -> FallibleArrow a x f s -(>||<) = liftA2 chooseMax - -- | An arrow version of a short-circuit (<|>) ifFailedDo :: (ArrowChoice a) => FallibleArrow a x f y @@ -489,7 +228,4 @@ ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) where repackage (x , Left _) = Left x repackage (_ , Right y) = Right y -infixr 4 <&&>, <||>, >&&<, >||< infixr 1 `ifFailedDo` - - diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs index 1f095bade..51c2da788 100644 --- a/src/Text/Pandoc/Readers/Odt/Base.hs +++ b/src/Text/Pandoc/Readers/Odt/Base.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} + {- Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> @@ -32,12 +32,11 @@ Core types of the odt reader. module Text.Pandoc.Readers.Odt.Base where -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Namespaces type OdtConverterState s = XMLConverterState Namespace s type XMLReader s a b = FallibleXMLConverter Namespace s a b type XMLReaderSafe s a b = XMLConverter Namespace s a b - diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 2672b01ef..380f16c66 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,8 +1,8 @@ {-# LANGUAGE Arrows #-} -{-# LANGUAGE TupleSections #-} {-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> @@ -39,29 +39,28 @@ module Text.Pandoc.Readers.Odt.ContentReader , read_body ) where -import Control.Arrow -import Control.Applicative hiding ( liftA, liftA2, liftA3 ) +import Control.Applicative hiding (liftA, liftA2, liftA3) +import Control.Arrow -import qualified Data.ByteString.Lazy as B -import qualified Data.Map as M -import Data.List ( find, intercalate ) -import Data.Maybe +import qualified Data.ByteString.Lazy as B +import Data.List (find, intercalate) +import qualified Data.Map as M +import Data.Maybe -import qualified Text.XML.Light as XML +import qualified Text.XML.Light as XML -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Text.Pandoc.Shared +import Text.Pandoc.Builder +import Text.Pandoc.MediaBag (MediaBag, insertMedia) +import Text.Pandoc.Shared -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.StyleReader +import Text.Pandoc.Readers.Odt.Base +import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.StyleReader -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Generic.XMLConverter import qualified Data.Set as Set @@ -94,8 +93,6 @@ data ReaderState , envMedia :: Media -- | Hold binary resources used in the document , odtMediaBag :: MediaBag --- , sequences --- , trackedChangeIDs } deriving ( Show ) @@ -250,7 +247,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty -- | Input: basis for a new header anchor --- Ouput: saved new anchor +-- Output: saved new anchor getHeaderAnchor :: OdtReaderSafe Inlines Anchor getHeaderAnchor = proc title -> do state <- getExtraState -< () @@ -296,7 +293,7 @@ withNewStyle a = proc x -> do modifier <- arr modifierFromStyleDiff -< triple fShouldTrace <- isStyleToTrace -< style case fShouldTrace of - Right shouldTrace -> do + Right shouldTrace -> if shouldTrace then do pushStyle -< style @@ -325,7 +322,7 @@ type InlineModifier = Inlines -> Inlines modifierFromStyleDiff :: PropertyTriple -> InlineModifier modifierFromStyleDiff propertyTriple = composition $ - (getVPosModifier propertyTriple) + getVPosModifier propertyTriple : map (first ($ propertyTriple) >>> ifThen_else ignore) [ (hasEmphChanged , emph ) , (hasChanged isStrong , strong ) @@ -344,9 +341,9 @@ modifierFromStyleDiff propertyTriple = Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps) getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore - getVPosModifier' ( _ , VPosSub ) = subscript - getVPosModifier' ( _ , VPosSuper ) = superscript - getVPosModifier' ( _ , _ ) = ignore + getVPosModifier' ( _ , VPosSub ) = subscript + getVPosModifier' ( _ , VPosSuper ) = superscript + getVPosModifier' ( _ , _ ) = ignore hasEmphChanged :: PropertyTriple -> Bool hasEmphChanged = swing any [ hasChanged isEmphasised @@ -355,17 +352,17 @@ modifierFromStyleDiff propertyTriple = ] hasChanged property triple@(_, property -> newProperty, _) = - maybe True (/=newProperty) (lookupPreviousValue property triple) + (/= Just newProperty) (lookupPreviousValue property triple) hasChangedM property triple@(_, textProps,_) = fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple - lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties) + lookupPreviousValue f = lookupPreviousStyleValue (fmap f . textProperties) lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties) lookupPreviousStyleValue f (ReaderState{..},_,mFamily) - = ( findBy f $ extendedStylePropertyChain styleTrace styleSet ) + = findBy f (extendedStylePropertyChain styleTrace styleSet) <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily ) @@ -569,7 +566,7 @@ read_text_seq = matchingElement NsText "sequence" -- specifically. I honor that, although the current implementation of '(<>)' --- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein. +-- for 'Inlines' in "Text.Pandoc.Builder" will collapse them again. -- The rational is to be prepared for future modifications. read_spaces :: InlineMatcher read_spaces = matchingElement NsText "s" ( @@ -663,7 +660,7 @@ read_list = matchingElement NsText "list" -- read_list_item :: ElementMatcher [Blocks] read_list_item = matchingElement NsText "list-item" - $ liftA (compactify'.(:[])) + $ liftA (compactify.(:[])) ( matchChildContent' [ read_paragraph , read_header , read_list @@ -749,7 +746,7 @@ read_table_row = matchingElement NsTable "table-row" -- read_table_cell :: ElementMatcher [Blocks] read_table_cell = matchingElement NsTable "table-cell" - $ liftA (compactify'.(:[])) + $ liftA (compactify.(:[])) $ matchChildContent' [ read_paragraph ] @@ -796,8 +793,7 @@ read_image_src = matchingElement NsDraw "image" Left _ -> returnV "" -< () read_frame_title :: InlineMatcher -read_frame_title = matchingElement NsSVG "title" - $ (matchChildContent [] read_plain_text) +read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) read_frame_text_box :: InlineMatcher read_frame_text_box = matchingElement NsDraw "text-box" @@ -806,12 +802,12 @@ read_frame_text_box = matchingElement NsDraw "text-box" arr read_img_with_caption -< toList paragraphs read_img_with_caption :: [Block] -> Inlines -read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) = +read_img_with_caption (Para [Image attr alt (src,title)] : _) = singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption -read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) = +read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows -read_img_with_caption ( (Para (_ : xs)) : ys) = - read_img_with_caption ((Para xs) : ys) +read_img_with_caption ( Para (_ : xs) : ys) = + read_img_with_caption (Para xs : ys) read_img_with_caption _ = mempty @@ -899,9 +895,6 @@ read_reference_ref = matchingElement NsText "reference-ref" -- Entry point ---------------------- ---read_plain_content :: OdtReaderSafe _x Inlines ---read_plain_content = strContent >>^ text - read_text :: OdtReaderSafe _x Pandoc read_text = matchChildContent' [ read_header , read_paragraph @@ -915,8 +908,8 @@ post_process (Pandoc m blocks) = Pandoc m (post_process' blocks) post_process' :: [Block] -> [Block] -post_process' ((Table _ a w h r) : (Div ("", ["caption"], _) [Para inlines] ) : xs) = - (Table inlines a w h r) : ( post_process' xs ) +post_process' (Table _ a w h r : Div ("", ["caption"], _) [Para inlines] : xs) = + Table inlines a w h r : post_process' xs post_process' bs = bs read_body :: OdtReader _x (Pandoc, MediaBag) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 877443543..f8ea5c605 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} + {- Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> @@ -39,11 +39,7 @@ compatible instances of "ArrowChoice". -- We export everything module Text.Pandoc.Readers.Odt.Generic.Fallible where -import Control.Applicative -import Control.Monad - -import qualified Data.Foldable as F -import Data.Monoid ((<>)) +import Data.Monoid ((<>)) -- | Default for now. Will probably become a class at some point. type Failure = () @@ -51,16 +47,6 @@ type Failure = () type Fallible a = Either Failure a --- | False -> Left (), True -> Right () -boolToEither :: Bool -> Fallible () -boolToEither False = Left () -boolToEither True = Right () - --- | False -> Left (), True -> Right () -boolToChoice :: Bool -> Fallible () -boolToChoice False = Left () -boolToChoice True = Right () - -- maybeToEither :: Maybe a -> Fallible a maybeToEither (Just a) = Right a @@ -71,21 +57,11 @@ eitherToMaybe :: Either _l a -> Maybe a eitherToMaybe (Left _) = Nothing eitherToMaybe (Right a) = Just a --- | > untagEither === either id id -untagEither :: Either a a -> a -untagEither (Left a) = a -untagEither (Right a) = a - -- | > fromLeft f === either f id fromLeft :: (a -> b) -> Either a b -> b fromLeft f (Left a) = f a fromLeft _ (Right b) = b --- | > fromRight f === either id f -fromRight :: (a -> b) -> Either b a -> b -fromRight _ (Left b) = b -fromRight f (Right a) = f a - -- | > recover a === fromLeft (const a) === either (const a) id recover :: a -> Either _f a -> a recover a (Left _) = a @@ -110,24 +86,6 @@ collapseEither (Left f ) = Left f collapseEither (Right (Left f)) = Left f collapseEither (Right (Right x)) = Right x --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- both are returned. -chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') -chooseMin = chooseMinWith (,) - --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- a combination is returned. -chooseMinWith :: (Monoid a) => (b -> b' -> c) - -> Either a b - -> Either a b' - -> Either a c -chooseMinWith (><) (Right a) (Right b) = Right $ a >< b -chooseMinWith _ (Left a) (Left b) = Left $ a <> b -chooseMinWith _ (Left a) _ = Left a -chooseMinWith _ _ (Left b) = Left b - -- | If either of the values represents a non-error, the result is a -- (possibly combined) non-error. If both values represent an error, an error -- is returned. @@ -152,109 +110,17 @@ chooseMaxWith _ _ (Right b) = Right b class ChoiceVector v where spreadChoice :: v (Either f a) -> Either f (v a) --- Let's do a few examples first - -instance ChoiceVector Maybe where - spreadChoice (Just (Left f)) = Left f - spreadChoice (Just (Right x)) = Right (Just x) - spreadChoice Nothing = Right Nothing - -instance ChoiceVector (Either l) where - spreadChoice (Right (Left f)) = Left f - spreadChoice (Right (Right x)) = Right (Right x) - spreadChoice (Left x ) = Right (Left x) - instance ChoiceVector ((,) a) where spreadChoice (_, Left f) = Left f spreadChoice (x, Right y) = Right (x,y) -- Wasn't there a newtype somewhere with the elements flipped? --- --- More instances later, first some discussion. --- --- I'll have to freshen up on type system details to see how (or if) to do --- something like --- --- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where --- > : --- --- But maybe it would be even better to use something like --- --- > class ChoiceVector v v' f | v -> v' f where --- > spreadChoice :: v -> Either f v' --- --- That way, more places in @v@ could spread the cheer, e.g.: --- --- As before: --- -- ( a , Either f b) (a , b) f --- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where --- > spreadChoice (_, Left f) = Left f --- > spreadChoice (a, Right b) = Right (a,b) --- --- But also: --- -- ( Either f a , b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where --- > spreadChoice (Right a,b) = Right (a,b) --- > spreadChoice (Left f,_) = Left f --- --- And maybe even: --- -- ( Either f a , Either f b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where --- > spreadChoice (Right a , Right b) = Right (a,b) --- > spreadChoice (Left f , _ ) = Left f --- > spreadChoice ( _ , Left f) = Left f --- --- Of course that would lead to a lot of overlapping instances... --- But I can't think of a different way. A selector function might help, --- but not even a "Data.Traversable" is powerful enough for that. --- But maybe someone has already solved all this with a lens library. --- --- Well, it's an interesting academic question. But for practical purposes, --- I have more than enough right now. - -instance ChoiceVector ((,,) a b) where - spreadChoice (_,_, Left f) = Left f - spreadChoice (a,b, Right x) = Right (a,b,x) - -instance ChoiceVector ((,,,) a b c) where - spreadChoice (_,_,_, Left f) = Left f - spreadChoice (a,b,c, Right x) = Right (a,b,c,x) - -instance ChoiceVector ((,,,,) a b c d) where - spreadChoice (_,_,_,_, Left f) = Left f - spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) - -instance ChoiceVector (Const a) where - spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types - --- | Fails on the first error -instance ChoiceVector [] where - spreadChoice = sequence -- using the monad instance of Either. - -- Could be generalized to "Data.Traversable" - but why play - -- with UndecidableInstances unless this is really needed. - -- | Wrapper for a list. While the normal list instance of 'ChoiceVector' -- fails whenever it can, this type will never fail. newtype SuccessList a = SuccessList { collectNonFailing :: [a] } deriving ( Eq, Ord, Show ) instance ChoiceVector SuccessList where - spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing + spreadChoice = Right . SuccessList . foldr unTagRight [] . collectNonFailing where unTagRight (Right x) = (x:) unTagRight _ = id - --- | Like 'catMaybes', but for 'Either'. -collectRights :: [Either _l r] -> [r] -collectRights = collectNonFailing . untag . spreadChoice . SuccessList - where untag = fromLeft (error "Unexpected Left") - --- | A version of 'collectRights' generalized to other containers. The --- container must be both "reducible" and "buildable". Most general containers --- should fullfill these requirements, but there is no single typeclass --- (that I know of) for that. --- Therefore, they are split between 'Foldable' and 'MonadPlus'. --- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) -collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r -collectRightsF = F.foldr unTagRight mzero - where unTagRight (Right x) = mplus $ return x - unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6c10ed61d..556517259 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -1,6 +1,6 @@ + + {-# LANGUAGE TypeOperators #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} {- @@ -38,8 +38,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , uncurry4 , uncurry5 , uncurry6 -, uncurry7 -, uncurry8 , swap , reverseComposition , bool @@ -53,12 +51,12 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , composition ) where -import Control.Category ( Category, (>>>), (<<<) ) -import qualified Control.Category as Cat ( id ) -import Control.Monad ( msum ) +import Control.Category (Category, (<<<), (>>>)) +import qualified Control.Category as Cat (id) +import Control.Monad (msum) -import qualified Data.Foldable as F ( Foldable, foldr ) -import Data.Maybe +import qualified Data.Foldable as F (Foldable, foldr) +import Data.Maybe -- | Aequivalent to @@ -148,15 +146,11 @@ uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z -uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z -uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z uncurry3 fun (a,b,c ) = fun a b c uncurry4 fun (a,b,c,d ) = fun a b c d uncurry5 fun (a,b,c,d,e ) = fun a b c d e uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f -uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g -uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h swap :: (a,b) -> (b,a) swap (a,b) = (b,a) @@ -168,4 +162,3 @@ findBy :: (a -> Maybe b) -> [a] -> Maybe b findBy _ [] = Nothing findBy f ((f -> Just x):_ ) = Just x findBy f ( _:xs) = findBy f xs - diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 8c03d1a09..428048427 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -41,50 +41,17 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , XMLConverterState , XMLConverter , FallibleXMLConverter -, swapPosition -, runConverter -, runConverter'' , runConverter' -, runConverterF' -, runConverterF -, getCurrentElement , getExtraState , setExtraState , modifyExtraState -, convertingExtraState , producingExtraState -, lookupNSiri -, lookupNSprefix -, readNSattributes -, elemName -, elemNameIs -, strContent -, elContent -, currentElem -, currentElemIs -, expectElement -, elChildren -, findChildren -, filterChildren -, filterChildrenName , findChild' -, findChild -, filterChild' -, filterChild -, filterChildName' -, filterChildName -, isSet , isSet' , isSetWithDefault -, hasAttrValueOf' -, failIfNotAttrValueOf -, isThatTheAttrValue -, searchAttrIn -, searchAttrWith , searchAttr , lookupAttr , lookupAttr' -, lookupAttrWithDefault , lookupDefaultingAttr , findAttr' , findAttr @@ -93,25 +60,9 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , readAttr' , readAttrWithDefault , getAttr --- , (>/<) --- , (?>/<) , executeIn -, collectEvery , withEveryL -, withEvery , tryAll -, tryAll' -, IdXMLConverter -, MaybeEConverter -, ElementMatchConverter -, MaybeCConverter -, ContentMatchConverter -, makeMatcherE -, makeMatcherC -, prepareMatchersE -, prepareMatchersC -, matchChildren -, matchContent'' , matchContent' , matchContent ) where @@ -120,8 +71,8 @@ import Control.Applicative hiding ( liftA, liftA2 ) import Control.Monad ( MonadPlus ) import Control.Arrow +import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Foldable as F import Data.Default import Data.Maybe @@ -210,17 +161,6 @@ currentElement state = head (parentElements state) -- | Replace the current position by another, modifying the extra state -- in the process -swapPosition :: (extraState -> extraState') - -> [XML.Element] - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState' -swapPosition f stack state - = state { parentElements = stack - , moreState = f (moreState state) - } - --- | Replace the current position by another, modifying the extra state --- in the process swapStack' :: XMLConverterState nsID extraState -> [XML.Element] -> ( XMLConverterState nsID extraState , [XML.Element] ) @@ -264,14 +204,6 @@ runConverter :: XMLConverter nsID extraState input output -> output runConverter converter state input = snd $ runArrowState converter (state,input) --- -runConverter'' :: (NameSpaceID nsID) - => XMLConverter nsID extraState (Fallible ()) output - -> extraState - -> XML.Element - -> output -runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () - runConverter' :: (NameSpaceID nsID) => FallibleXMLConverter nsID extraState () success -> extraState @@ -280,20 +212,6 @@ runConverter' :: (NameSpaceID nsID) runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () -- -runConverterF' :: FallibleXMLConverter nsID extraState x y - -> XMLConverterState nsID extraState - -> Fallible x -> Fallible y -runConverterF' a s e = runConverter (returnV e >>? a) s e - --- -runConverterF :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState XML.Element x - -> extraState - -> Fallible XML.Element -> Fallible x -runConverterF a s = either failWith - (\e -> runConverter a (createStartState e s) e) - --- getCurrentElement :: XMLConverter nsID extraState x XML.Element getCurrentElement = extractFromState currentElement @@ -430,57 +348,15 @@ elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName -------------------------------------------------------------------------------- -- -strContent :: XMLConverter nsID extraState x String -strContent = getCurrentElement - >>^ XML.strContent - --- elContent :: XMLConverter nsID extraState x [XML.Content] elContent = getCurrentElement >>^ XML.elContent -------------------------------------------------------------------------------- --- Current element --------------------------------------------------------------------------------- - --- -currentElem :: XMLConverter nsID extraState x (XML.QName) -currentElem = getCurrentElement - >>^ XML.elName - -currentElemIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x Bool -currentElemIs nsID name = getCurrentElement - >>> elemNameIs nsID name - - - -{- -currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> - (XML.qName >>^ (&&).(== name) ) - ^&&&^ - (XML.qIRI >>^ (==) ) - ) >>% (.) - ) &&& lookupNSiri nsID >>% ($) --} - --- -expectElement :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x () -expectElement nsID name = currentElemIs nsID name - >>^ boolToChoice - --------------------------------------------------------------------------------- -- Chilren -------------------------------------------------------------------------------- -- -elChildren :: XMLConverter nsID extraState x [XML.Element] -elChildren = getCurrentElement - >>^ XML.elChildren - -- findChildren :: (NameSpaceID nsID) => nsID -> ElementName @@ -490,18 +366,6 @@ findChildren nsID name = elemName nsID name >>% XML.findChildren -- -filterChildren :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildren p = getCurrentElement - >>^ XML.filterChildren p - --- -filterChildrenName :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildrenName p = getCurrentElement - >>^ XML.filterChildrenName p - --- findChild' :: (NameSpaceID nsID) => nsID -> ElementName @@ -517,45 +381,12 @@ findChild :: (NameSpaceID nsID) findChild nsID name = findChild' nsID name >>> maybeToChoice --- -filterChild' :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChild' p = getCurrentElement - >>^ XML.filterChild p - --- -filterChild :: (XML.Element -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChild p = filterChild' p - >>> maybeToChoice - --- -filterChildName' :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChildName' p = getCurrentElement - >>^ XML.filterChildName p - --- -filterChildName :: (XML.QName -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChildName p = filterChildName' p - >>> maybeToChoice - -------------------------------------------------------------------------------- -- Attributes -------------------------------------------------------------------------------- -- -isSet :: (NameSpaceID nsID) - => nsID -> AttributeName - -> (Either Failure Bool) - -> FallibleXMLConverter nsID extraState x Bool -isSet nsID attrName deflt - = findAttr' nsID attrName - >>^ maybe deflt stringToBool - --- isSet' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe Bool) @@ -570,34 +401,6 @@ isSetWithDefault nsID attrName def' = isSet' nsID attrName >>^ fromMaybe def' --- -hasAttrValueOf' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x Bool -hasAttrValueOf' nsID attrName attrValue - = findAttr nsID attrName - >>> ( const False ^|||^ (==attrValue)) - --- -failIfNotAttrValueOf :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> FallibleXMLConverter nsID extraState x () -failIfNotAttrValueOf nsID attrName attrValue - = hasAttrValueOf' nsID attrName attrValue - >>^ boolToChoice - --- | Is the value that is currently transported in the arrow the value of --- the specified attribute? -isThatTheAttrValue :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState AttributeValue Bool -isThatTheAttrValue nsID attrName - = keepingTheValue - (findAttr nsID attrName) - >>% right.(==) - -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary searchAttrIn :: (NameSpaceID nsID) @@ -608,18 +411,6 @@ searchAttrIn nsID attrName dict = findAttr nsID attrName >>?^? maybeToChoice.(`lookup` dict ) - --- | Lookup value in a dictionary. Fail if no attribute found. If value not in --- dictionary, return default value -searchAttrWith :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrWith nsID attrName defV dict - = findAttr nsID attrName - >>?^ (fromMaybe defV).(`lookup` dict ) - -- | Lookup value in a dictionary. If attribute or value not found, -- return default value searchAttr :: (NameSpaceID nsID) @@ -789,16 +580,6 @@ prepareIteration nsID name = keepingTheValue (findChildren nsID name) >>% distributeValue --- | Applies a converter to every child element of a specific type. --- Collects results in a 'Monoid'. --- Fails completely if any conversion fails. -collectEvery :: (NameSpaceID nsID, Monoid m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a m - -> FallibleXMLConverter nsID extraState a m -collectEvery nsID name a = prepareIteration nsID name - >>> foldS' (switchingTheStack a) - -- withEveryL :: (NameSpaceID nsID) => nsID -> ElementName @@ -824,17 +605,7 @@ tryAll :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [a] tryAll nsID name a = prepareIteration nsID name >>> iterateS (switchingTheStack a) - >>^ collectRights - --- | Applies a converter to every child element of a specific type. --- Collects all successful results. -tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b (c a) -tryAll' nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ collectRightsF + >>^ rights -------------------------------------------------------------------------------- -- Matching children @@ -843,15 +614,6 @@ tryAll' nsID name a = prepareIteration nsID name type IdXMLConverter nsID moreState x = XMLConverter nsID moreState x x -type MaybeEConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) - --- Chainable converter that helps deciding which converter to actually use. -type ElementMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeEConverter nsID extraState x, XML.Element) - type MaybeCConverter nsID moreState x = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) @@ -862,26 +624,6 @@ type ContentMatchConverter nsID extraState x (MaybeCConverter nsID extraState x, XML.Content) -- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML elements to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherE :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ElementMatchConverter nsID extraState a -makeMatcherE nsID name c = ( second ( - elemNameIs nsID name - >>^ bool Nothing (Just tryC) - ) - >>% (<|>) - ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd - --- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. -- The fold used to match elements however is very simple, so to use it, -- this function wraps the converter in another converter that unifies @@ -914,13 +656,6 @@ makeMatcherC nsID name c = ( second ( contentToElem _ -> failEmpty -- Creates and chains a bunch of matchers -prepareMatchersE :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ElementMatchConverter nsID extraState x ---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) -prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) - --- Creates and chains a bunch of matchers prepareMatchersC :: (NameSpaceID nsID) => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] -> ContentMatchConverter nsID extraState x @@ -928,52 +663,6 @@ prepareMatchersC :: (NameSpaceID nsID) prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) -- | Takes a list of element-data - converter groups and --- * Finds all children of the current element --- * Matches each group to each child in order (at most one group per child) --- * Filters non-matched children --- * Chains all found converters in child-order --- * Applies the chain to the input element -matchChildren :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchChildren lookups = let matcher = prepareMatchersE lookups - in keepingTheValue ( - elChildren - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the element and drop the element - -- in the return value - swallowElem element converter = (,element) ^>> converter >>^ fst - --- -matchContent'' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent'' lookups = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowContent content converter = (,content) ^>> converter >>^ fst - - --- | Takes a list of element-data - converter groups and -- * Finds all content of the current element -- * Matches each group to each piece of content in order -- (at most one group per piece of content) @@ -1018,14 +707,6 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool :: (Monoid failure) => String -> Either failure Bool -stringToBool val -- stringToBool' val >>> maybeToChoice - | val `elem` trueValues = succeedWith True - | val `elem` falseValues = succeedWith False - | otherwise = failEmpty - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - stringToBool' :: String -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index deb009998..92e12931d 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -31,11 +31,11 @@ Namespaces used in odt files. module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) ) where -import Data.List ( isPrefixOf ) -import Data.Maybe ( fromMaybe, listToMaybe ) -import qualified Data.Map as M ( empty, insert ) +import Data.List (isPrefixOf) +import qualified Data.Map as M (empty, insert) +import Data.Maybe (fromMaybe, listToMaybe) -import Text.Pandoc.Readers.Odt.Generic.Namespaces +import Text.Pandoc.Readers.Odt.Generic.Namespaces instance NameSpaceID Namespace where @@ -48,7 +48,7 @@ instance NameSpaceID Namespace where findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri] +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri] nsIDmap :: NameSpaceIRIs Namespace nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 26ba6df82..58be8e4a3 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -1,9 +1,8 @@ -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE Arrows #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} + {- Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> @@ -50,49 +49,36 @@ module Text.Pandoc.Readers.Odt.StyleReader , ListLevelType (..) , LengthOrPercent (..) , lookupStyle -, getTextProperty -, getTextProperty' -, getParaProperty -, getListStyle , getListLevelStyle , getStyleFamily -, lookupDefaultStyle , lookupDefaultStyle' , lookupListStyleByName -, getPropertyChain -, textPropertyChain -, stylePropertyChain -, stylePropertyChain' -, getStylePropertyChain , extendedStylePropertyChain -, extendedStylePropertyChain' -, liftStyles , readStylesAt ) where -import Control.Arrow -import Control.Applicative hiding ( liftA, liftA2, liftA3 ) +import Control.Applicative hiding (liftA, liftA2, liftA3) +import Control.Arrow -import qualified Data.Foldable as F -import qualified Data.Map as M -import qualified Data.Set as S -import Data.Char ( isDigit ) -import Data.Default -import Data.List ( unfoldr ) -import Data.Maybe +import Data.Char (isDigit) +import Data.Default +import qualified Data.Foldable as F +import Data.List (unfoldr) +import qualified Data.Map as M +import Data.Maybe +import qualified Data.Set as S -import qualified Text.XML.Light as XML +import qualified Text.XML.Light as XML -import Text.Pandoc.Readers.Odt.Arrows.State -import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Utils -import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Generic.Fallible +import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM +import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.Base +import Text.Pandoc.Readers.Odt.Base +import Text.Pandoc.Readers.Odt.Namespaces readStylesAt :: XML.Element -> Fallible Styles @@ -118,7 +104,7 @@ instance Lookupable FontPitch where instance Default FontPitch where def = PitchVariable --- The font pitch can be specifed in a style directly. Normally, however, +-- The font pitch can be specified in a style directly. Normally, however, -- it is defined in the font. That is also the specs' recommendation. -- -- Thus, we want @@ -145,13 +131,12 @@ type StyleReaderSafe a b = XMLReaderSafe FontPitches a b -- | A reader for font pitches fontPitchReader :: XMLReader _s _x FontPitches fontPitchReader = executeIn NsOffice "font-face-decls" ( - ( withEveryL NsStyle "font-face" $ liftAsSuccess ( + withEveryL NsStyle "font-face" (liftAsSuccess ( findAttr' NsStyle "name" &&& lookupDefaultingAttr NsStyle "font-pitch" - ) - ) - >>?^ ( M.fromList . (foldl accumLegalPitches []) ) + )) + >>?^ ( M.fromList . foldl accumLegalPitches [] ) ) where accumLegalPitches ls (Nothing,_) = ls accumLegalPitches ls (Just n,p) = (n,p):ls @@ -230,15 +215,15 @@ instance Lookupable StyleFamily where ] -- | A named style -data Style = Style { styleFamily :: Maybe StyleFamily - , styleParentName :: Maybe StyleName - , listStyle :: Maybe StyleName - , styleProperties :: StyleProperties +data Style = Style { styleFamily :: Maybe StyleFamily + , styleParentName :: Maybe StyleName + , listStyle :: Maybe StyleName + , styleProperties :: StyleProperties } deriving ( Eq, Show ) -data StyleProperties = SProps { textProperties :: Maybe TextProperties - , paraProperties :: Maybe ParaProperties +data StyleProperties = SProps { textProperties :: Maybe TextProperties + , paraProperties :: Maybe ParaProperties -- , tableColProperties :: Maybe TColProperties -- , tableRowProperties :: Maybe TRowProperties -- , tableCellProperties :: Maybe TCellProperties @@ -354,8 +339,8 @@ instance Read XslUnit where readsPrec _ "em" = [(XslUnitEM , "")] readsPrec _ _ = [] --- | Rough conversion of measures into millimeters. --- Pixels and em's are actually implemetation dependant/relative measures, +-- | Rough conversion of measures into millimetres. +-- Pixels and em's are actually implementation dependant/relative measures, -- so I could not really easily calculate anything exact here even if I wanted. -- But I do not care about exactness right now, as I only use measures -- to determine if a paragraph is "indented" or not. @@ -397,11 +382,11 @@ data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType instance Show ListLevelStyle where show ListLevelStyle{..} = "<LLS|" - ++ (show listLevelType) + ++ show listLevelType ++ "|" - ++ (maybeToString listItemPrefix) - ++ (show listItemFormat) - ++ (maybeToString listItemSuffix) + ++ maybeToString listItemPrefix + ++ show listItemFormat + ++ maybeToString listItemSuffix ++ ">" where maybeToString = fromMaybe "" @@ -439,7 +424,7 @@ instance Read ListItemNumberFormat where -------------------------------------------------------------------------------- -- Readers -- --- ...it seems like a whole lot of this should be automatically deriveable +-- ...it seems like a whole lot of this should be automatically derivable -- or at least moveable into a class. Most of this is data concealed in -- code. -------------------------------------------------------------------------------- @@ -497,14 +482,14 @@ readTextProperties = ( liftA6 PropT ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) ( searchAttr NsXSL_FO "font-weight" False isFontBold ) - ( findPitch ) + findPitch ( getAttr NsStyle "text-position" ) - ( readUnderlineMode ) - ( readStrikeThroughMode ) + readUnderlineMode + readStrikeThroughMode ) where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] isFontBold = ("normal",False):("bold",True) - :(map ((,True).show) ([100,200..900]::[Int])) + :map ((,True).show) ([100,200..900]::[Int]) readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) readUnderlineMode = readLineMode "text-underline-mode" @@ -524,7 +509,7 @@ readLineMode modeAttr styleAttr = proc x -> do Nothing -> returnA -< Just UnderlineModeNormal else returnA -< Nothing where - isLinePresent = [("none",False)] ++ map (,True) + isLinePresent = ("none",False) : map (,True) [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" , "long-dash" , "solid" , "wave" ] @@ -561,20 +546,18 @@ readListStyle = findAttr NsStyle "name" >>?! keepingTheValue ( liftA ListStyle - $ ( liftA3 SM.union3 + $ liftA3 SM.union3 ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) - ( readListLevelStyles NsText "list-level-style-image" LltImage ) - ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle + ( readListLevelStyles NsText "list-level-style-image" LltImage ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle ) -- readListLevelStyles :: Namespace -> ElementName -> ListLevelType -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) readListLevelStyles namespace elementName levelType = - ( tryAll namespace elementName (readListLevelStyle levelType) + tryAll namespace elementName (readListLevelStyle levelType) >>^ SM.fromList - ) -- readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) @@ -624,20 +607,11 @@ lookupStyle :: StyleName -> Styles -> Maybe Style lookupStyle name Styles{..} = M.lookup name stylesByName -- -lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties -lookupDefaultStyle family Styles{..} = fromMaybe def - (M.lookup family defaultStyleMap) - --- lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties lookupDefaultStyle' Styles{..} family = fromMaybe def (M.lookup family defaultStyleMap) -- -getListStyle :: Style -> Styles -> Maybe ListStyle -getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) - --- lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle lookupListStyleByName name Styles{..} = M.lookup name listStylesByName @@ -655,7 +629,7 @@ parents style styles = unfoldr findNextParent style -- Ha! getStyleFamily :: Style -> Styles -> Maybe StyleFamily getStyleFamily style@Style{..} styles = styleFamily - <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles) + <|> F.asum (map (`getStyleFamily` styles) $ parents style styles) -- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property -- values are specified. Instead, a value might be inherited from a @@ -677,68 +651,7 @@ stylePropertyChain style styles -- extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] extendedStylePropertyChain [] _ = [] -extendedStylePropertyChain [style] styles = (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) -extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) - ++ (extendedStylePropertyChain trace styles) --- Optimizable with Data.Sequence - --- -extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] -extendedStylePropertyChain' [] _ = Nothing -extendedStylePropertyChain' [style] styles = Just ( - (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) - ) -extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) - (extendedStylePropertyChain' trace styles) - --- -stylePropertyChain' :: Styles -> Style -> [StyleProperties] -stylePropertyChain' = flip stylePropertyChain - --- -getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] -getStylePropertyChain name styles = maybe [] - (`stylePropertyChain` styles) - (lookupStyle name styles) - --- -getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] -getPropertyChain extract style styles = catMaybes - $ map extract - $ stylePropertyChain style styles - --- -textPropertyChain :: Style -> Styles -> [TextProperties] -textPropertyChain = getPropertyChain textProperties - --- -paraPropertyChain :: Style -> Styles -> [ParaProperties] -paraPropertyChain = getPropertyChain paraProperties - --- -getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a -getTextProperty extract style styles = fmap extract - $ listToMaybe - $ textPropertyChain style styles - --- -getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a -getTextProperty' extract style styles = F.asum - $ map extract - $ textPropertyChain style styles - --- -getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a -getParaProperty extract style styles = fmap extract - $ listToMaybe - $ paraPropertyChain style styles - --- | Lifts the reader into another readers' state. -liftStyles :: (OdtConverterState s -> OdtConverterState Styles) - -> (OdtConverterState Styles -> OdtConverterState s ) - -> XMLReader s x x -liftStyles extract inject = switchState extract inject - $ convertingExtraState M.empty readAllStyles - +extendedStylePropertyChain [style] styles = stylePropertyChain style styles + ++ maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)) +extendedStylePropertyChain (style:trace) styles = stylePropertyChain style styles + ++ extendedStylePropertyChain trace styles diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 4e1c926da..292830bd2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -27,29 +27,42 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where -import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) -import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) -import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState ) +import Text.Pandoc.Readers.Org.Blocks (blockList, meta) +import Text.Pandoc.Readers.Org.ParserState (optionsToParserState) +import Text.Pandoc.Readers.Org.Parsing (OrgParser, readWithM) -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.Options +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Options +import Text.Pandoc.Parsing (reportLogMessages) +import Text.Pandoc.Shared (crFilter) -import Control.Monad.Reader ( runReader ) +import Control.Monad.Except (throwError) +import Control.Monad.Reader (runReaderT) +import Data.Text (Text) +import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. -readOrg :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readOrg opts s = flip runReader def $ - readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") +readOrg :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readOrg opts s = do + parsed <- flip runReaderT def $ + readWithM parseOrg (optionsToParserState opts) + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left _ -> throwError $ PandocParseError "problem parsing org" -- -- Parser -- -parseOrg :: OrgParser Pandoc +parseOrg :: PandocMonad m => OrgParser m Pandoc parseOrg = do blocks' <- blockList meta' <- meta + reportLogMessages return $ Pandoc meta' blocks' diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index b1004dda6..424102cb0 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.BlockStarts + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -40,11 +40,11 @@ module Text.Pandoc.Readers.Org.BlockStarts , endOfBlock ) where -import Control.Monad ( void ) +import Control.Monad (void) import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) -hline :: OrgParser () +hline :: Monad m => OrgParser m () hline = try $ do skipSpaces string "-----" @@ -54,58 +54,66 @@ hline = try $ do return () -- | Read the start of a header line, return the header level -headerStart :: OrgParser Int +headerStart :: Monad m => OrgParser m Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos -tableStart :: OrgParser Char +tableStart :: Monad m => OrgParser m Char tableStart = try $ skipSpaces *> char '|' -latexEnvStart :: OrgParser String -latexEnvStart = try $ do +gridTableStart :: Monad m => OrgParser m () +gridTableStart = try $ skipSpaces <* char '+' <* char '-' + + +latexEnvStart :: Monad m => OrgParser m String +latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: OrgParser String + latexEnvName :: Monad m => OrgParser m String latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") - --- | Parses bullet list marker. -bulletListStart :: OrgParser () -bulletListStart = try $ - choice - [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1 - , () <$ skipSpaces1 <* char '*' <* skipSpaces1 - ] - -genericListStart :: OrgParser String - -> OrgParser Int -genericListStart listMarker = try $ - (+) <$> (length <$> many spaceChar) - <*> (length <$> listMarker <* many1 spaceChar) - -orderedListStart :: OrgParser Int +bulletListStart :: Monad m => OrgParser m Int +bulletListStart = try $ do + ind <- length <$> many spaceChar + -- Unindented lists cannot use '*' bullets. + oneOf (if ind == 0 then "+-" else "*+-") + skipSpaces1 <|> lookAhead eol + return (ind + 1) + +genericListStart :: Monad m + => OrgParser m String + -> OrgParser m Int +genericListStart listMarker = try $ do + ind <- length <$> many spaceChar + void listMarker + skipSpaces1 <|> lookAhead eol + return (ind + 1) + +eol :: Monad m => OrgParser m () +eol = void (char '\n') + +orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") -drawerStart :: OrgParser String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline +drawerStart :: Monad m => OrgParser m String +drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline where drawerName = char ':' *> manyTill nonspaceChar (char ':') -metaLineStart :: OrgParser () +metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" -commentLineStart :: OrgParser () +commentLineStart :: Monad m => OrgParser m () commentLineStart = try $ skipSpaces <* string "# " -exampleLineStart :: OrgParser () +exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: OrgParser String +noteMarker :: Monad m => OrgParser m String noteMarker = try $ do char '[' choice [ many1Till digit (char ']') @@ -114,17 +122,18 @@ noteMarker = try $ do ] -- | Succeeds if the parser is at the end of a block. -endOfBlock :: OrgParser () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart +endOfBlock :: Monad m => OrgParser m () +endOfBlock = lookAhead . try $ + void blankline <|> anyBlockStart where -- Succeeds if there is a new block starting at this position. - anyBlockStart :: OrgParser () + anyBlockStart :: Monad m => OrgParser m () anyBlockStart = try . choice $ [ exampleLineStart , hline , metaLineStart , commentLineStart + , gridTableStart , void noteMarker , void tableStart , void drawerStart @@ -133,4 +142,3 @@ endOfBlock = lookAhead . try $ do , void bulletListStart , void orderedListStart ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 484d97482..fa016283c 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,8 +1,5 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,10 +15,11 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} - +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2017 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Blocks + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -33,247 +31,61 @@ module Text.Pandoc.Readers.Org.Blocks , meta ) where -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, + originalLang, translateLang) + +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) + +import Control.Monad (foldM, guard, mzero, void) +import Data.Char (isSpace, toLower, toUpper) +import Data.Default (Default) +import Data.List (foldl', isPrefixOf) +import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Monoid ((<>)) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify', compactify'DL, safeRead ) - -import Control.Monad ( foldM, guard, mzero, void ) -import Data.Char ( isSpace, toLower, toUpper) -import Data.Default ( Default ) -import Data.List ( foldl', isPrefixOf ) -import Data.Maybe ( fromMaybe, isNothing ) -import Data.Monoid ((<>)) - --- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: Int -> OrgParser (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: OrgParser () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: OrgParser [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Headline -> OrgParser Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Headline -> OrgParser Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Headline -> OrgParser Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Headline -> OrgParser Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Headline -> OrgParser Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: OrgParser TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - +import qualified Text.Pandoc.Walk as Walk -- -- parsing blocks -- -- | Get a list of blocks. -blockList :: OrgParser [Block] +blockList :: PandocMonad m => OrgParser m [Block] blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof + headlines <- documentTree blocks inline st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks + headlineBlocks <- headlineToBlocks $ runF headlines st + -- ignore first headline, it's the document's title + return . drop 1 . B.toList $ headlineBlocks --- | Get the meta information safed in the state. -meta :: OrgParser Meta +-- | Get the meta information saved in the state. +meta :: Monad m => OrgParser m Meta meta = do meta' <- metaExport runF meta' <$> getState -blocks :: OrgParser (F Blocks) +blocks :: PandocMonad m => OrgParser m (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) -block :: OrgParser (F Blocks) +block :: PandocMonad m => OrgParser m (F Blocks) block = choice [ mempty <$ blanklines , table , orgBlock , figure , example , genericDrawer + , include , specialLine , horizontalRule , list @@ -283,6 +95,11 @@ block = choice [ mempty <$ blanklines ] <?> "block" +-- | Parse a horizontal rule into a block element +horizontalRule :: Monad m => OrgParser m (F Blocks) +horizontalRule = return B.horizontalRule <$ try hline + + -- -- Block Attributes -- @@ -297,7 +114,7 @@ data BlockAttributes = BlockAttributes -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = +attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of @@ -306,18 +123,18 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) -stringyMetaAttribute attrCheck = try $ do +stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName skipSpaces - attrValue <- anyLine + attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) -blockAttributes :: OrgParser BlockAttributes +blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) + kv <- many stringyMetaAttribute + guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv @@ -326,7 +143,7 @@ blockAttributes = try $ do Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes + return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' @@ -334,13 +151,7 @@ blockAttributes = try $ do } where attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False + attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -350,17 +161,18 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value -keyValues :: OrgParser [(String, String)] +-- | Parse key-value pairs for HTML attributes +keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: OrgParser String + key :: Monad m => OrgParser m String key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - value :: OrgParser String + value :: Monad m => OrgParser m String value = skipSpaces *> manyTill anyChar endOfValue - endOfValue :: OrgParser () + endOfValue :: Monad m => OrgParser m () endOfValue = lookAhead $ (() <$ try (many1 spaceChar <* key)) <|> () <$ newline @@ -371,12 +183,12 @@ keyValues = try $ -- -- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. -orgBlock :: OrgParser (F Blocks) +orgBlock :: PandocMonad m => OrgParser m (F Blocks) orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case (map toLower blkType) of + case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -390,25 +202,25 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: OrgParser String + blockHeaderStart :: Monad m => OrgParser m String blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord lowercase :: String -> String lowercase = map toLower -rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) +rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where - parsedBlockContent :: OrgParser (F Blocks) + parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType parseFromString blocks (raw ++ "\n") -- | Read the raw string content of a block -rawBlockContent :: String -> OrgParser String +rawBlockContent :: Monad m => String -> OrgParser m String rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop @@ -418,18 +230,17 @@ rawBlockContent blockType = try $ do . map (tabsToSpaces tabLen . commaEscaped) $ blkLines where - rawLine :: OrgParser String + rawLine :: Monad m => OrgParser m String rawLine = try $ ("" <$ blankline) <|> anyLine - blockEnder :: OrgParser () + blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) stripIndent :: [String] -> [String] stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) + shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String @@ -437,7 +248,7 @@ rawBlockContent blockType = try $ do tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String @@ -448,18 +259,18 @@ rawBlockContent blockType = try $ do commaEscaped cs = cs -- | Read but ignore all remaining block headers. -ignHeaders :: OrgParser () +ignHeaders :: Monad m => OrgParser m () ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: String -> OrgParser (F Blocks) +exportBlock :: Monad m => String -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType returnF (B.rawBlock (map toLower exportType) contents) -verseBlock :: String -> OrgParser (F Blocks) +verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType @@ -468,7 +279,7 @@ verseBlock blockType = try $ do where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: String -> OrgParser (F Inlines) + parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) parseVerseLine cs = do let (initialSpaces, indentedLine) = span isSpace cs let nbspIndent = if null initialSpaces @@ -480,23 +291,20 @@ verseBlock blockType = try $ do -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock + resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs - let includeCode = exportsCode kv - let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent return $ - (if includeCode then labelledBlck else mempty) <> - (if includeResults then resultBlck else mempty) + (if exportsCode kv then labelledBlck else mempty) <> + (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = @@ -505,60 +313,97 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) + exportsCode :: [(String, String)] -> Bool + exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs + exportsResults :: [(String, String)] -> Bool + exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -trailingResultsBlock :: OrgParser (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do +-- | Parse the result of an evaluated babel code block. +babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) +babelResultsBlock = try $ do blanklines - stringAnyCase "#+RESULTS:" - blankline + resultsMarker <|> + (lookAhead . void . try $ + manyTill (metaLineStart *> anyLineNewline) resultsMarker) block + where + resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments --- TODO: We currently don't handle switches. -codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord - _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline - let pandocLang = translateLang language - return $ - if hasRundocParameters parameters - then ( [ pandocLang, rundocBlockClass ] - , map toRundocAttrib (("language", language) : parameters) + return ( translateLang language : switchClasses + , originalLang language <> switchKv <> parameters ) - else ([ pandocLang ], parameters) - where - hasRundocParameters = not . null -switch :: OrgParser (Char, Maybe String) -switch = try $ simpleSwitch <|> lineNumbersSwitch +switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes = try $ do + switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) + return $ foldr addToAttr ([], []) switches where - simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) - lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> - (string "-l \"" *> many1Till nonspaceChar (char '"')) + addToAttr :: (Char, Maybe String, SwitchPolarity) + -> ([String], [(String, String)]) + -> ([String], [(String, String)]) + addToAttr ('n', lineNum, pol) (cls, kv) = + let kv' = case lineNum of + Just num -> ("startFrom", num):kv + Nothing -> kv + cls' = case pol of + SwitchPlus -> "continuedSourceBlock":cls + SwitchMinus -> cls + in ("numberLines":cls', kv') + addToAttr _ x = x + +-- | Whether a switch flag is specified with @+@ or @-@. +data SwitchPolarity = SwitchPlus | SwitchMinus + deriving (Show, Eq) + +-- | Parses a switch's polarity. +switchPolarity :: Monad m => OrgParser m SwitchPolarity +switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -blockOption :: OrgParser (String, String) +-- | Parses a source block switch option. +switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch + where + simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter + labelSwitch = genericSwitch 'l' $ + char '"' *> many1Till nonspaceChar (char '"') + +-- | Generic source block switch-option parser. +genericSwitch :: Monad m + => Char + -> OrgParser m String + -> OrgParser m (Char, Maybe String, SwitchPolarity) +genericSwitch c p = try $ do + polarity <- switchPolarity <* char c <* skipSpaces + arg <- optionMaybe p + return (c, arg, polarity) + +-- | Reads a line number switch option. The line number switch can be used with +-- example and source blocks. +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (many digit) + +blockOption :: Monad m => OrgParser m (String, String) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: OrgParser String +orgParamValue :: Monad m => OrgParser m String orgParamValue = try $ skipSpaces - *> notFollowedBy (char ':' ) - *> many1 nonspaceChar + *> notFollowedBy orgArgKey + *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces - -horizontalRule :: OrgParser (F Blocks) -horizontalRule = return B.horizontalRule <$ try hline + where + endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") + <|> try (skipSpaces1 <* orgArgKey) -- @@ -568,7 +413,7 @@ horizontalRule = return B.horizontalRule <$ try hline -- | A generic drawer which has no special meaning for org-mode. -- Whether or not this drawer is included in the output depends on the drawers -- export setting. -genericDrawer :: OrgParser (F Blocks) +genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do name <- map toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) @@ -576,44 +421,25 @@ genericDrawer = try $ do -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of + case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty - _ -> drawerDiv name <$> parseLines content + _ -> drawerDiv name <$> parseLines content where - parseLines :: [String] -> OrgParser (F Blocks) + parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) parseLines = parseFromString blocks . (++ "\n") . unlines drawerDiv :: String -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: OrgParser String +drawerLine :: Monad m => OrgParser m String drawerLine = anyLine -drawerEnd :: OrgParser String +drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: OrgParser Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: OrgParser (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: OrgParser PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: OrgParser PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -- -- Figures @@ -621,7 +447,7 @@ propertiesDrawer = try $ do -- | Figures or an image paragraph (i.e. an image on a line by itself). Only -- images with a caption attribute are interpreted as figures. -figure :: OrgParser (F Blocks) +figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph @@ -629,10 +455,10 @@ figure = try $ do Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs + let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: OrgParser String + selfTarget :: PandocMonad m => OrgParser m String selfTarget = try $ char '[' *> linkTarget <* char ']' imageBlock :: Bool -> BlockAttributes -> String -> F Blocks @@ -654,7 +480,7 @@ figure = try $ do else "fig:" ++ cs -- | Succeeds if looking at the end of the current paragraph -endOfParagraph :: OrgParser () +endOfParagraph :: Monad m => OrgParser m () endOfParagraph = try $ skipSpaces *> newline *> endOfBlock @@ -663,11 +489,10 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- -- | Example code marked up by a leading colon. -example :: OrgParser (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine +example :: Monad m => OrgParser m (F Blocks) +example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where - exampleLine :: OrgParser String + exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine exampleCode :: String -> Blocks @@ -678,10 +503,59 @@ exampleCode = B.codeBlockWith ("", ["example"], []) -- Comments, Options and Metadata -- -specialLine :: OrgParser (F Blocks) +specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine -rawExportLine :: OrgParser Blocks +-- | Include the content of a file. +include :: PandocMonad m => OrgParser m (F Blocks) +include = try $ do + metaLineStart <* stringAnyCase "include:" <* skipSpaces + filename <- includeTarget + includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + params <- keyValues + blocksParser <- case includeArgs of + ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw + ["export"] -> return . returnF $ B.fromList [] + ["export", format] -> return $ pure . B.rawBlock format <$> parseRaw + ("src" : rest) -> do + let attr = case rest of + [lang] -> (mempty, [lang], mempty) + _ -> nullAttr + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ return . B.fromList . blockFilter params <$> blockList + insertIncludedFileF blocksParser ["."] filename + where + includeTarget :: PandocMonad m => OrgParser m FilePath + includeTarget = do + char '"' + manyTill (noneOf "\n\r\t") (char '"') + + parseRaw :: PandocMonad m => OrgParser m String + parseRaw = many anyChar + + blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter params blks = + let minlvl = lookup "minlevel" params + in case (minlvl >>= safeRead :: Maybe Int) of + Nothing -> blks + Just lvl -> let levels = Walk.query headerLevel blks + -- CAVE: partial function in else + curMin = if null levels then 0 else minimum levels + in Walk.walk (shiftHeader (curMin - lvl)) blks + + headerLevel :: Block -> [Int] + headerLevel (Header lvl _attr _content) = [lvl] + headerLevel _ = [] + + shiftHeader :: Int -> Block -> Block + shiftHeader shift blk = + if shift <= 0 + then blk + else case blk of + (Header lvl attr content) -> Header (lvl - shift) attr content + _ -> blk + +rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart key <- metaKey @@ -689,7 +563,7 @@ rawExportLine = try $ do then B.rawBlock key <$> anyLine else mzero -commentLine :: OrgParser Blocks +commentLine :: Monad m => OrgParser m Blocks commentLine = commentLineStart *> anyLine *> pure mempty @@ -714,12 +588,21 @@ data OrgTableRow = OrgContentRow (F [Blocks]) -- should be generated using a builder function. data OrgTable = OrgTable { orgTableColumnProperties :: [ColumnProperty] - , orgTableHeader :: [Blocks] - , orgTableRows :: [[Blocks]] + , orgTableHeader :: [Blocks] + , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) -table = try $ do +table :: PandocMonad m => OrgParser m (F Blocks) +table = gridTableWith blocks True <|> orgTable + +-- | A normal org table +orgTable :: PandocMonad m => OrgParser m (F Blocks) +orgTable = try $ do + -- don't allow a table on the first line of a list item; org requires that + -- tables start at first non-space character on the line + let isFirstInListItem st = orgStateParserContext st == ListItemState && + isNothing (orgStateLastPreCharPos st) + guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart do @@ -731,7 +614,7 @@ orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) + let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns @@ -741,22 +624,22 @@ orgToPandocTable (OrgTable colProps heads lns) caption = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) + <$> columnRelWidth colProp <*> totalWidth in (align', width') -tableRows :: OrgParser [OrgTableRow] +tableRows :: PandocMonad m => OrgParser m [OrgTableRow] tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) -tableContentRow :: OrgParser OrgTableRow +tableContentRow :: PandocMonad m => OrgParser m OrgTableRow tableContentRow = try $ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: PandocMonad m => OrgParser m (F Blocks) tableContentCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell -tableAlignRow :: OrgParser OrgTableRow +tableAlignRow :: Monad m => OrgParser m OrgTableRow tableAlignRow = try $ do tableStart colProps <- many1Till columnPropertyCell newline @@ -764,10 +647,10 @@ tableAlignRow = try $ do guard $ any (/= def) colProps return $ OrgAlignRow colProps -columnPropertyCell :: OrgParser ColumnProperty +columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) + emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' @@ -776,18 +659,18 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <* char '>' <* emptyCell) -tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar :: Monad m => OrgParser m Alignment tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft , char 'c' *> return AlignCenter , char 'r' *> return AlignRight ] -tableHline :: OrgParser OrgTableRow +tableHline :: Monad m => OrgParser m OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) -endOfCell :: OrgParser Char +endOfCell :: Monad m => OrgParser m Char endOfCell = try $ char '|' <|> lookAhead newline rowsToTable :: [OrgTableRow] @@ -813,45 +696,45 @@ normalizeTable (OrgTable colProps heads rows) = rowToContent :: OrgTable -> OrgTableRow -> F OrgTable -rowToContent orgTable row = +rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable + singleRowPromotedToHeader = case tbl of + OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> + tbl{ orgTableHeader = b , orgTableRows = [] } + _ -> tbl setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } + setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow - let oldRows = orgTableRows orgTable + let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } + return tbl{ orgTableRows = oldRows ++ [newRow] } -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: Monad m => OrgParser m (F Blocks) latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + returnF $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c , "\\end{", e, "}\n" ] -latexEnd :: String -> OrgParser () +latexEnd :: Monad m => String -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces <* string ("\\end{" ++ envName ++ "}") @@ -861,74 +744,70 @@ latexEnd envName = try $ -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: PandocMonad m => OrgParser m (F Blocks) noteBlock = try $ do - ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillHeaderOrNote + ref <- noteMarker <* skipSpaces <* updateLastPreCharPos + content <- mconcat <$> many1Till block endOfFootnote addToNotesTable (ref, content) return mempty where - blocksTillHeaderOrNote = - many1Till block (eof <|> () <$ lookAhead noteMarker - <|> () <$ lookAhead headerStart) + endOfFootnote = eof + <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart + <|> () <$ lookAhead (try $ blankline *> blankline) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) + notFollowedBy' headerStart ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block -- is directly followed by a list item, in which case the block is read as -- plain text. try (guard nl - *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + <|> return (B.plain <$> ils) -- -- list blocks -- -list :: OrgParser (F Blocks) +list :: PandocMonad m => OrgParser m (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser (F Blocks) -definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem $ bulletListStart' (Just n)) - -bulletList :: OrgParser (F Blocks) -bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem (bulletListStart' $ Just n)) - -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence - <$> many1 (listItem orderedListStart) - -bulletListStart' :: Maybe Int -> OrgParser Int --- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- length <$> many spaceChar - oneOf (bullets $ ind == 0) - skipSpaces1 - return (ind + 1) -bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf (bullets $ n == 1) - many1 spaceChar - return n - --- Unindented lists are legal, but they can't use '*' bullets. --- We return n to maintain compatibility with the generic listItem. -bullets :: Bool -> String -bullets unindented = if unindented then "+-" else "*+-" - -definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) -definitionListItem parseMarkerGetLength = try $ do - markerLength <- parseMarkerGetLength +definitionList :: PandocMonad m => OrgParser m (F Blocks) +definitionList = try $ do + indent <- lookAhead bulletListStart + fmap (B.definitionList . compactifyDL) . sequence + <$> many1 (definitionListItem (bulletListStart `indented` indent)) + +bulletList :: PandocMonad m => OrgParser m (F Blocks) +bulletList = try $ do + indent <- lookAhead bulletListStart + fmap (B.bulletList . compactify) . sequence + <$> many1 (listItem (bulletListStart `indented` indent)) + +indented :: Monad m => OrgParser m Int -> Int -> OrgParser m Int +indented indentedMarker minIndent = try $ do + n <- indentedMarker + guard (minIndent <= n) + return n + +orderedList :: PandocMonad m => OrgParser m (F Blocks) +orderedList = try $ do + indent <- lookAhead orderedListStart + fmap (B.orderedList . compactify) . sequence + <$> many1 (listItem (orderedListStart `indented` indent)) + +definitionListItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F (Inlines, [Blocks])) +definitionListItem parseIndentedMarker = try $ do + markerLength <- parseIndentedMarker term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) @@ -940,12 +819,12 @@ definitionListItem parseMarkerGetLength = try $ do definitionMarker = spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) - --- parse raw text for one list item, excluding start marker and continuations -listItem :: OrgParser Int - -> OrgParser (F Blocks) -listItem start = try . withContext ListItemState $ do - markerLength <- try start +-- | parse raw text for one list item +listItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F Blocks) +listItem parseIndentedMarker = try . withContext ListItemState $ do + markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) @@ -953,24 +832,11 @@ listItem start = try . withContext ListItemState $ do -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int - -> OrgParser String -listContinuation markerLength = try $ +listContinuation :: Monad m => Int + -> OrgParser m String +listContinuation markerLength = try $ do notFollowedBy' blankline - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + mappend <$> (concat <$> many1 listLine) + <*> many blankline where listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Int -> OrgParser String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: OrgParser String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs new file mode 100644 index 000000000..f77778ec9 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -0,0 +1,304 @@ +{- +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.Org.DocumentTree + Copyright : Copyright (C) 2014-2018 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Parsers for org-mode headlines and document subtrees +-} +module Text.Pandoc.Readers.Org.DocumentTree + ( documentTree + , headlineToBlocks + ) where + +import Control.Arrow ((***)) +import Control.Monad (guard, void) +import Data.Char (toLower, toUpper) +import Data.List (intersperse) +import Data.Monoid ((<>)) +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Data.Map as Map +import qualified Text.Pandoc.Builder as B + +-- +-- Org headers +-- + +-- | Parse input as org document tree. +documentTree :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> OrgParser m (F Headline) +documentTree blocks inline = do + initialBlocks <- blocks + headlines <- sequence <$> manyTill (headline blocks inline 1) eof + title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState + return $ do + headlines' <- headlines + initialBlocks' <- initialBlocks + title' <- title + return Headline + { headlineLevel = 0 + , headlineTodoMarker = Nothing + , headlineText = B.fromList title' + , headlineTags = mempty + , headlineProperties = mempty + , headlineContents = initialBlocks' + , headlineChildren = headlines' + } + where + getTitle :: Map.Map String MetaValue -> [Inline] + getTitle metamap = + case Map.lookup "title" metamap of + Just (MetaInlines inlns) -> inlns + _ -> [] + +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq) + +-- | Create a tag containing the given string. +toTag :: String -> Tag +toTag = Tag + +-- | The key (also called name or type) of a property. +newtype PropertyKey = PropertyKey { fromKey :: String } + deriving (Show, Eq, Ord) + +-- | Create a property key containing the given string. Org mode keys are +-- case insensitive and are hence converted to lower case. +toPropertyKey :: String -> PropertyKey +toPropertyKey = PropertyKey . map toLower + +-- | The value assigned to a property. +newtype PropertyValue = PropertyValue { fromValue :: String } + +-- | Create a property value containing the given string. +toPropertyValue :: String -> PropertyValue +toPropertyValue = PropertyValue + +-- | Check whether the property value is non-nil (i.e. truish). +isNonNil :: PropertyValue -> Bool +isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] + +-- | Key/value pairs from a PROPERTIES drawer +type Properties = [(PropertyKey, PropertyValue)] + +-- | Org mode headline (i.e. a document subtree). +data Headline = Headline + { headlineLevel :: Int + , headlineTodoMarker :: Maybe TodoMarker + , headlineText :: Inlines + , headlineTags :: [Tag] + , headlineProperties :: Properties + , headlineContents :: Blocks + , headlineChildren :: [Headline] + } + +-- | Read an Org mode headline and its contents (i.e. a document subtree). +-- @lvl@ gives the minimum acceptable level of the tree. +headline :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> Int + -> OrgParser m (F Headline) +headline blocks inline lvl = try $ do + level <- headerStart + guard (lvl <= level) + todoKw <- optionMaybe todoKeyword + title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle + tags <- option [] headerTags + newline + properties <- option mempty propertiesDrawer + contents <- blocks + children <- many (headline blocks inline (level + 1)) + return $ do + title' <- title + contents' <- contents + children' <- sequence children + return Headline + { headlineLevel = level + , headlineTodoMarker = todoKw + , headlineText = title' + , headlineTags = tags + , headlineProperties = properties + , headlineContents = contents' + , headlineChildren = children' + } + where + endOfTitle :: Monad m => OrgParser m () + endOfTitle = void . lookAhead $ optional headerTags *> newline + + headerTags :: Monad m => OrgParser m [Tag] + headerTags = try $ + let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) + +-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks +headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +headlineToBlocks hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + case () of + _ | any isNoExportTag headlineTags -> return mempty + _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln + _ | isCommentTitle headlineText -> return mempty + _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln + _ | otherwise -> headlineToHeaderWithContents hdln + +isNoExportTag :: Tag -> Bool +isNoExportTag = (== toTag "noexport") + +isArchiveTag :: Tag -> Bool +isArchiveTag = (== toTag "ARCHIVE") + +-- | Check if the title starts with COMMENT. +-- FIXME: This accesses builder internals not intended for use in situations +-- like these. Replace once keyword parsing is supported. +isCommentTitle :: Inlines -> Bool +isCommentTitle (B.toList -> (Str "COMMENT":_)) = True +isCommentTitle _ = False + +archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +archivedHeadlineToBlocks hdln = do + archivedTreesOption <- getExportSetting exportArchivedTrees + case archivedTreesOption of + ArchivedTreesNoExport -> return mempty + ArchivedTreesExport -> headlineToHeaderWithContents hdln + ArchivedTreesHeadlineOnly -> headlineToHeader hdln + +headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithList hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + header <- headlineToHeader hdln + listElements <- mapM headlineToBlocks headlineChildren + let listBlock = if null listElements + then mempty + else B.orderedList listElements + let headerText = if maxHeadlineLevels == headlineLevel + then header + else flattenHeader header + return $ headerText <> headlineContents <> listBlock + where + flattenHeader :: Blocks -> Blocks + flattenHeader blks = + case B.toList blks of + (Header _ _ inlns:_) -> B.para (B.fromList inlns) + _ -> mempty + +headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithContents hdln@Headline {..} = do + header <- headlineToHeader hdln + childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren + return $ header <> headlineContents <> childrenBlocks + +headlineToHeader :: Monad m => Headline -> OrgParser m Blocks +headlineToHeader Headline {..} = do + exportTodoKeyword <- getExportSetting exportWithTodoKeywords + exportTags <- getExportSetting exportWithTags + let todoText = if exportTodoKeyword + then case headlineTodoMarker of + Just kw -> todoKeywordToInlines kw <> B.space + Nothing -> mempty + else mempty + let text = todoText <> headlineText <> + if exportTags + then tagsToInlines headlineTags + else mempty + let propAttr = propertiesToAttr headlineProperties + attr <- registerHeader propAttr headlineText + return $ B.headerWith attr headlineLevel text + +todoKeyword :: Monad m => OrgParser m TodoMarker +todoKeyword = try $ do + taskStates <- activeTodoMarkers <$> getState + let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) <* spaceChar) + choice (map kwParser taskStates) + +todoKeywordToInlines :: TodoMarker -> Inlines +todoKeywordToInlines tdm = + let todoText = todoMarkerName tdm + todoState = map toLower . show $ todoMarkerState tdm + classes = [todoState, todoText] + in B.spanWith (mempty, classes, mempty) (B.str todoText) + +propertiesToAttr :: Properties -> Attr +propertiesToAttr properties = + let + toStringPair = fromKey *** fromValue + customIdKey = toPropertyKey "custom_id" + classKey = toPropertyKey "class" + unnumberedKey = toPropertyKey "unnumbered" + specialProperties = [customIdKey, classKey, unnumberedKey] + id' = maybe mempty fromValue . lookup customIdKey $ properties + cls = maybe mempty fromValue . lookup classKey $ properties + kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + $ properties + isUnnumbered = + maybe False isNonNil . lookup unnumberedKey $ properties + in + (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + +tagsToInlines :: [Tag] -> Inlines +tagsToInlines [] = mempty +tagsToInlines tags = + (B.space <>) . mconcat . intersperse (B.str "\160") . map tagToInline $ tags + where + tagToInline :: Tag -> Inlines + tagToInline t = tagSpan t . B.smallcaps . B.str $ fromTag t + +-- | Wrap the given inline in a span, marking it as a tag. +tagSpan :: Tag -> Inlines -> Inlines +tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)]) + + + + + +-- | Read a :PROPERTIES: drawer and return the key/value pairs contained +-- within. +propertiesDrawer :: Monad m => OrgParser m Properties +propertiesDrawer = try $ do + drawerType <- drawerStart + guard $ map toUpper drawerType == "PROPERTIES" + manyTill property (try endOfDrawer) + where + property :: Monad m => OrgParser m (PropertyKey, PropertyValue) + property = try $ (,) <$> key <*> value + + key :: Monad m => OrgParser m PropertyKey + key = fmap toPropertyKey . try $ + skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + + value :: Monad m => OrgParser m PropertyValue + value = fmap toPropertyValue . try $ + skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + + endOfDrawer :: Monad m => OrgParser m String + endOfDrawer = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 764e5b0d5..6a70c50b9 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ExportSettings + Copyright : © 2016–2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -29,22 +29,22 @@ module Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) where -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.Maybe ( listToMaybe ) +import Control.Monad (mzero, void) +import Data.Char (toLower) +import Data.Maybe (listToMaybe) -- | Read and handle space separated org-mode export settings. -exportSettings :: OrgParser () +exportSettings :: Monad m => OrgParser m () exportSettings = void $ sepBy spaces exportSetting -- | Setter function for export settings. type ExportSettingSetter a = a -> ExportSettings -> ExportSettings -- | Read and process a single org-mode export option. -exportSetting :: OrgParser () +exportSetting :: Monad m => OrgParser m () exportSetting = choice [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val }) , booleanSetting "'" (\val es -> es { exportSmartQuotes = val }) @@ -52,7 +52,7 @@ exportSetting = choice , booleanSetting "-" (\val es -> es { exportSpecialStrings = val }) , ignoredSetting ":" , ignoredSetting "<" - , ignoredSetting "\\n" + , booleanSetting "\\n" (\val es -> es { exportPreserveBreaks = val }) , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) , ignoredSetting "c" @@ -71,7 +71,7 @@ exportSetting = choice , ignoredSetting "pri" , ignoredSetting "prop" , ignoredSetting "stat" - , ignoredSetting "tags" + , booleanSetting "tags" (\val es -> es { exportWithTags = val }) , ignoredSetting "tasks" , ignoredSetting "tex" , ignoredSetting "timestamp" @@ -81,10 +81,11 @@ exportSetting = choice , ignoredSetting "|" ] <?> "export setting" -genericExportSetting :: OrgParser a +genericExportSetting :: Monad m + => OrgParser m a -> String -> ExportSettingSetter a - -> OrgParser () + -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do _ <- string settingIdentifier *> char ':' value <- optionParser @@ -94,11 +95,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser () +booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: String -> ExportSettingSetter Int -> OrgParser () +integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -106,9 +107,10 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. -archivedTreeSetting :: String +archivedTreeSetting :: Monad m + => String -> ExportSettingSetter ArchivedTreesOption - -> OrgParser () + -> OrgParser m () archivedTreeSetting = genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean where @@ -125,9 +127,10 @@ archivedTreeSetting = else ArchivedTreesNoExport -- | A list or a complement list (i.e. a list starting with `not`). -complementableListSetting :: String +complementableListSetting :: Monad m + => String -> ExportSettingSetter (Either [String] [String]) - -> OrgParser () + -> OrgParser m () complementableListSetting = genericExportSetting $ choice [ Left <$> complementStringList , Right <$> stringList @@ -135,31 +138,31 @@ complementableListSetting = genericExportSetting $ choice ] where -- Read a plain list of strings. - stringList :: OrgParser [String] + stringList :: Monad m => OrgParser m [String] stringList = try $ char '(' *> sepBy elispString spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: OrgParser [String] + complementStringList :: Monad m => OrgParser m [String] complementStringList = try $ string "(not " *> sepBy elispString spaces <* char ')' - elispString :: OrgParser String + elispString :: Monad m => OrgParser m String elispString = try $ char '"' *> manyTill alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: String -> OrgParser () +ignoredSetting :: Monad m => String -> OrgParser m () ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) -- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are -- interpreted as true. -elispBoolean :: OrgParser Bool +elispBoolean :: Monad m => OrgParser m Bool elispBoolean = try $ do value <- many1 nonspaceChar return $ case map toLower value of diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 7e1bb61c2..3a12f38d0 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Inlines + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -33,73 +33,75 @@ module Text.Pandoc.Readers.Org.Inlines , linkTarget ) where -import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) +import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, + originalLang, translateLang) +import Text.Pandoc.Builder (Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) -import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) +import Text.Pandoc.Shared (underlineSpan) +import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Prelude hiding (sequence) -import Control.Monad ( guard, mplus, mzero, when, void ) -import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse ) -import Data.Maybe ( fromMaybe ) +import Control.Monad (guard, mplus, mzero, unless, void, when) +import Control.Monad.Trans (lift) +import Data.Char (isAlphaNum, isSpace) +import Data.List (intersperse) import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Data.Traversable (sequence) +import Data.Maybe (fromMaybe) +import Data.Monoid ((<>)) +import Data.Traversable (sequence) +import Prelude hiding (sequence) -- -- Functions acting on the parser state -- -recordAnchorId :: String -> OrgParser () +recordAnchorId :: PandocMonad m => String -> OrgParser m () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + s{ orgStateAnchorIds = i : orgStateAnchorIds s } -pushToInlineCharStack :: Char -> OrgParser () +pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () pushToInlineCharStack c = updateState $ \s -> s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } -popInlineCharStack :: OrgParser () +popInlineCharStack :: PandocMonad m => OrgParser m () popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } -surroundingEmphasisChar :: OrgParser [Char] +surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState -startEmphasisNewlinesCounting :: Int -> OrgParser () +startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Just maxNewlines } -decEmphasisNewlinesCount :: OrgParser () +decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () decEmphasisNewlinesCount = updateState $ \s -> s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } -newlinesCountWithinLimits :: OrgParser Bool +newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool newlinesCountWithinLimits = do st <- getState return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True -resetEmphasisNewlines :: OrgParser () +resetEmphasisNewlines :: PandocMonad m => OrgParser m () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } -addToNotesTable :: OrgNoteRecord -> OrgParser () +addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } -- | Parse a single Org-mode inline element -inline :: OrgParser (F Inlines) +inline :: PandocMonad m => OrgParser m (F Inlines) inline = choice [ whitespace , linebreak @@ -119,13 +121,14 @@ inline = , superscript , inlineLaTeX , exportSnippet + , macro , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" -- | Read the rest of the input as inlines. -inlines :: OrgParser (F Inlines) +inlines :: PandocMonad m => OrgParser m (F Inlines) inlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: @@ -133,30 +136,31 @@ specialChars :: [Char] specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" -whitespace :: OrgParser (F Inlines) +whitespace :: PandocMonad m => OrgParser m (F Inlines) whitespace = pure B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos <?> "whitespace" -linebreak :: OrgParser (F Inlines) +linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser (F Inlines) +str :: PandocMonad m => OrgParser m (F Inlines) str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser (F Inlines) +endline :: PandocMonad m => OrgParser m (F Inlines) endline = try $ do newline notFollowedBy' endOfBlock decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return . return $ B.softbreak + useHardBreaks <- exportPreserveBreaks . orgStateExportSettings <$> getState + returnF (if useHardBreaks then B.linebreak else B.softbreak) -- @@ -174,7 +178,7 @@ endline = try $ do -- contributors. All this should be consolidated once an official Org-mode -- citation syntax has emerged. -cite :: OrgParser (F Inlines) +cite :: PandocMonad m => OrgParser m (F Inlines) cite = try $ berkeleyCite <|> do guardEnabled Ext_citations (cs, raw) <- withRaw $ choice @@ -182,43 +186,44 @@ cite = try $ berkeleyCite <|> do , orgRefCite , berkeleyTextualCite ] - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) <$> cs -- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: OrgParser (F [Citation]) +pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) pandocOrgCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -orgRefCite :: OrgParser (F [Citation]) +orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) orgRefCite = try $ choice [ normalOrgRefCite , fmap (:[]) <$> linkLikeOrgRefCite ] -normalOrgRefCite :: OrgParser (F [Citation]) +normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) normalOrgRefCite = try $ do mode <- orgRefCiteMode - -- org-ref style citation key, parsed into a citation of the given mode - let orgRefCiteItem :: OrgParser (F Citation) - orgRefCiteItem = try $ do - key <- orgRefCiteKey - returnF $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = mode - , citationNoteNum = 0 - , citationHash = 0 - } - firstCitation <- orgRefCiteItem - moreCitations <- many (try $ char ',' *> orgRefCiteItem) + firstCitation <- orgRefCiteList mode + moreCitations <- many (try $ char ',' *> orgRefCiteList mode) return . sequence $ firstCitation : moreCitations - where + where + -- | A list of org-ref style citation keys, parsed as citation of the given + -- citation mode. + orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) + orgRefCiteList citeMode = try $ do + key <- orgRefCiteKey + returnF Citation + { citationId = key + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = citeMode + , citationNoteNum = 0 + , citationHash = 0 + } -- | Read an Berkeley-style Org-mode citation. Berkeley citation style was -- develop and adjusted to Org-mode style by John MacFarlane and Richard -- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: OrgParser (F Inlines) +berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) berkeleyCite = try $ do bcl <- berkeleyCitationList return $ do @@ -229,11 +234,11 @@ berkeleyCite = try $ do return $ if parens then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix + . maybe id (alterFirst . prependPrefix) prefix + . maybe id (alterLast . appendSuffix) suffix $ citationList else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) + <> toListOfCites (map toInTextMode citationList) <> maybe mempty (", " <>) suffix where toCite :: [Citation] -> Inlines @@ -247,7 +252,7 @@ berkeleyCite = try $ do alterFirst, alterLast :: (a -> a) -> [a] -> [a] alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs + alterFirst f (c:cs) = f c : cs alterLast f = reverse . alterFirst f . reverse prependPrefix, appendSuffix :: Inlines -> Citation -> Citation @@ -255,12 +260,12 @@ berkeleyCite = try $ do appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool + { berkeleyCiteParens :: Bool , berkeleyCiteCommonPrefix :: Maybe Inlines , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] + , berkeleyCiteCitations :: [Citation] } -berkeleyCitationList :: OrgParser (F BerkeleyCitationList) +berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) berkeleyCitationList = try $ do char '[' parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] @@ -268,29 +273,29 @@ berkeleyCitationList = try $ do skipSpaces commonPrefix <- optionMaybe (try $ citationListPart <* char ';') citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) + commonSuffix <- optionMaybe (try citationListPart) char ']' return (BerkeleyCitationList parens <$> sequence commonPrefix <*> sequence commonSuffix <*> citations) where - citationListPart :: OrgParser (F Inlines) + citationListPart :: PandocMonad m => OrgParser m (F Inlines) citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do notFollowedBy' citeKey notFollowedBy (oneOf ";]") inline -berkeleyBareTag :: OrgParser () +berkeleyBareTag :: PandocMonad m => OrgParser m () berkeleyBareTag = try $ void berkeleyBareTag' -berkeleyParensTag :: OrgParser () +berkeleyParensTag :: PandocMonad m => OrgParser m () berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' -berkeleyBareTag' :: OrgParser () +berkeleyBareTag' :: PandocMonad m => OrgParser m () berkeleyBareTag' = try $ void (string "cite") -berkeleyTextualCite :: OrgParser (F [Citation]) +berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) berkeleyTextualCite = try $ do (suppressAuthor, key) <- citeKey returnF . return $ Citation @@ -305,14 +310,14 @@ berkeleyTextualCite = try $ do -- The following is what a Berkeley-style bracketed textual citation parser -- would look like. However, as these citations are a subset of Pandoc's Org -- citation style, this isn't used. --- berkeleyBracketedTextualCite :: OrgParser (F [Citation]) +-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -- berkeleyBracketedTextualCite = try . (fmap head) $ -- enclosedByPair '[' ']' berkeleyTextualCite -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. -linkLikeOrgRefCite :: OrgParser (F Citation) +linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) linkLikeOrgRefCite = try $ do _ <- string "[[" mode <- orgRefCiteMode @@ -335,13 +340,20 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: OrgParser String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) +orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey = + let citeKeySpecialChars = "-_:\\./," :: String + isCiteKeySpecialChar c = c `elem` citeKeySpecialChars + isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c + endOfCitation = try $ do + many $ satisfy isCiteKeySpecialChar + satisfy $ not . isCiteKeyChar + in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. -orgRefCiteMode :: OrgParser CitationMode +orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode orgRefCiteMode = choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) [ ("cite", AuthorInText) @@ -352,10 +364,10 @@ orgRefCiteMode = , ("citeyear", SuppressAuthor) ] -citeList :: OrgParser (F [Citation]) +citeList :: PandocMonad m => OrgParser m (F [Citation]) citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser (F Citation) +citation :: PandocMonad m => OrgParser m (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -363,15 +375,16 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return Citation + { citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) @@ -384,39 +397,39 @@ citation = try $ do then (B.space <>) <$> rest else rest -footnote :: OrgParser (F Inlines) +footnote :: PandocMonad m => OrgParser m (F Inlines) footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser (F Inlines) +inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" ref <- many alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ + unless (null ref) $ addToNotesTable ("fn:" ++ ref, note) return $ B.note <$> note -referencedNote :: OrgParser (F Inlines) +referencedNote :: PandocMonad m => OrgParser m (F Inlines) referencedNote = try $ do ref <- noteMarker return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" ++ ref ++ "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } return $ B.note contents' -linkOrImage :: OrgParser (F Inlines) +linkOrImage :: PandocMonad m => OrgParser m (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink <?> "link or image" -explicitOrImageLink :: OrgParser (F Inlines) +explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) explicitOrImageLink = try $ do char '[' srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget @@ -427,34 +440,34 @@ explicitOrImageLink = try $ do src <- srcF case cleanLinkString title of Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty + pure . B.link src "" $ B.image imgSrc mempty mempty _ -> linkToInlinesF src =<< title' -selflinkOrImage :: OrgParser (F Inlines) +selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' return $ linkToInlinesF src (B.str src) -plainLink :: OrgParser (F Inlines) +plainLink :: PandocMonad m => OrgParser m (F Inlines) plainLink = try $ do (orig, src) <- uri returnF $ B.link src "" (B.str orig) -angleLink :: OrgParser (F Inlines) +angleLink :: PandocMonad m => OrgParser m (F Inlines) angleLink = try $ do char '<' link <- plainLink char '>' return link -linkTarget :: OrgParser String +linkTarget :: PandocMonad m => OrgParser m String linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: OrgParser String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat :: String -> OrgParser m (F String) applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link return $ do @@ -487,7 +500,7 @@ internalLink link title = do -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser (F Inlines) +anchor :: PandocMonad m => OrgParser m (F Inlines) anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId @@ -509,23 +522,23 @@ solidify = map replaceSpecialChar | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") - let attrClasses = [translateLang lang, rundocBlockClass] - let attrKeyVal = map toRundocAttrib (("language", lang) : opts) + let attrClasses = [translateLang lang] + let attrKeyVal = originalLang lang <> opts returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where - inlineBlockOption :: OrgParser (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (String, String) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: OrgParser String + orgInlineParamValue :: PandocMonad m => OrgParser m String orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') @@ -533,7 +546,7 @@ inlineCodeBlock = try $ do <* skipSpaces -emphasizedText :: OrgParser (F Inlines) +emphasizedText :: PandocMonad m => OrgParser m (F Inlines) emphasizedText = do state <- getState guard . exportEmphasizedText . orgStateExportSettings $ state @@ -544,60 +557,64 @@ emphasizedText = do , underline ] -enclosedByPair :: Char -- ^ opening char +enclosedByPair :: PandocMonad m + => Char -- ^ opening char -> Char -- ^ closing char - -> OrgParser a -- ^ parser - -> OrgParser [a] + -> OrgParser m a -- ^ parser + -> OrgParser m [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser (F Inlines) +emph :: PandocMonad m => OrgParser m (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' -strong :: OrgParser (F Inlines) +strong :: PandocMonad m => OrgParser m (F Inlines) strong = fmap B.strong <$> emphasisBetween '*' -strikeout :: OrgParser (F Inlines) +strikeout :: PandocMonad m => OrgParser m (F Inlines) strikeout = fmap B.strikeout <$> emphasisBetween '+' --- There is no underline, so we use strong instead. -underline :: OrgParser (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' +underline :: PandocMonad m => OrgParser m (F Inlines) +underline = fmap underlineSpan <$> emphasisBetween '_' -verbatim :: OrgParser (F Inlines) +verbatim :: PandocMonad m => OrgParser m (F Inlines) verbatim = return . B.code <$> verbatimBetween '=' -code :: OrgParser (F Inlines) +code :: PandocMonad m => OrgParser m (F Inlines) code = return . B.code <$> verbatimBetween '~' -subscript :: OrgParser (F Inlines) +subscript :: PandocMonad m => OrgParser m (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser (F Inlines) +superscript :: PandocMonad m => OrgParser m (F Inlines) superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser (F Inlines) +math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser (F Inlines) +displayMath :: PandocMonad m => OrgParser m (F Inlines) displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -updatePositions :: Char - -> OrgParser Char +updatePositions :: PandocMonad m + => Char + -> OrgParser m Char updatePositions c = do + st <- getState + let emphasisPreChars = orgStateEmphasisPreChars st when (c `elem` emphasisPreChars) updateLastPreCharPos when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos return c -symbol :: OrgParser (F Inlines) +symbol :: PandocMonad m => OrgParser m (F Inlines) symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) -emphasisBetween :: Char - -> OrgParser (F Inlines) +emphasisBetween :: PandocMonad m + => Char + -> OrgParser m (F Inlines) emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -606,8 +623,9 @@ emphasisBetween c = try $ do resetEmphasisNewlines return res -verbatimBetween :: Char - -> OrgParser String +verbatimBetween :: PandocMonad m + => Char + -> OrgParser m String verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -615,8 +633,9 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: Char - -> OrgParser String +mathStringBetween :: PandocMonad m + => Char + -> OrgParser m String mathStringBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines @@ -626,8 +645,9 @@ mathStringBetween c = try $ do return $ body ++ [final] -- | Parse a single character between @c@ using math rules -math1CharBetween :: Char - -> OrgParser String +math1CharBetween :: PandocMonad m + => Char + -> OrgParser m String math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars @@ -635,13 +655,14 @@ math1CharBetween c = try $ do eof <|> () <$ lookAhead (oneOf mathPostChars) return [res] -rawMathBetween :: String +rawMathBetween :: PandocMonad m + => String -> String - -> OrgParser String + -> OrgParser m String rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) -- | Parses the start (opening character) of emphasis -emphasisStart :: Char -> OrgParser Char +emphasisStart :: PandocMonad m => Char -> OrgParser m Char emphasisStart c = try $ do guard =<< afterEmphasisPreChar guard =<< notAfterString @@ -654,7 +675,7 @@ emphasisStart c = try $ do return c -- | Parses the closing character of emphasis -emphasisEnd :: Char -> OrgParser Char +emphasisEnd :: PandocMonad m => Char -> OrgParser m Char emphasisEnd c = try $ do guard =<< notAfterForbiddenBorderChar char c @@ -662,14 +683,16 @@ emphasisEnd c = try $ do updateLastStrPos popInlineCharStack return c - where acceptablePostChars = - surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) + where + acceptablePostChars = do + emphasisPostChars <- orgStateEmphasisPostChars <$> getState + surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) -mathStart :: Char -> OrgParser Char +mathStart :: PandocMonad m => Char -> OrgParser m Char mathStart c = try $ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) -mathEnd :: Char -> OrgParser Char +mathEnd :: PandocMonad m => Char -> OrgParser m Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c @@ -677,15 +700,15 @@ mathEnd c = try $ do return res -enclosedInlines :: OrgParser a - -> OrgParser b - -> OrgParser (F Inlines) +enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a + -> OrgParser m b + -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: OrgParser a - -> OrgParser b - -> OrgParser String +enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a + -> OrgParser m b + -> OrgParser m String enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) where onSingleLine = try $ many1Till (noneOf "\n\r") end @@ -694,10 +717,10 @@ enclosedRaw start end = try $ -- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume -- newlines. -many1TillNOrLessNewlines :: Int - -> OrgParser Char - -> OrgParser a - -> OrgParser String +many1TillNOrLessNewlines :: PandocMonad m => Int + -> OrgParser m Char + -> OrgParser m a + -> OrgParser m String many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -715,17 +738,9 @@ many1TillNOrLessNewlines n p end = try $ -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` -- for details). --- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) -emphasisPreChars :: [Char] -emphasisPreChars = "\t \"'({" - --- | Chars allowed at after emphasis -emphasisPostChars :: [Char] -emphasisPostChars = "\t\n !\"'),-.:;?\\}" - -- | Chars not allowed at the (inner) border of emphasis emphasisForbiddenBorderChars :: [Char] -emphasisForbiddenBorderChars = "\t\n\r \"'," +emphasisForbiddenBorderChars = "\t\n\r " -- | The maximum number of newlines within emphasisAllowedNewlines :: Int @@ -746,29 +761,29 @@ mathAllowedNewlines :: Int mathAllowedNewlines = 2 -- | Whether we are right behind a char allowed before emphasis -afterEmphasisPreChar :: OrgParser Bool +afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool afterEmphasisPreChar = do pos <- getPosition lastPrePos <- orgStateLastPreCharPos <$> getState return . fromMaybe True $ (== pos) <$> lastPrePos -- | Whether the parser is right after a forbidden border char -notAfterForbiddenBorderChar :: OrgParser Bool +notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool notAfterForbiddenBorderChar = do pos <- getPosition lastFBCPos <- orgStateLastForbiddenCharPos <$> getState return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser (F Inlines) +subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) where enclosing (left, right) s = left : s ++ [right] -simpleSubOrSuperString :: OrgParser String +simpleSubOrSuperString :: PandocMonad m => OrgParser m String simpleSubOrSuperString = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state @@ -777,17 +792,18 @@ simpleSubOrSuperString = try $ do <*> many1 alphaNum ] -inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) inlineLaTeX = try $ do cmd <- inlineLaTeXCommand + ils <- (lift . lift) $ parseAsInlineLaTeX cmd maybe mzero returnF $ - parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd + parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils where parseAsMath :: String -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: String -> Maybe Inlines - parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs parseAsMathMLSym :: String -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) @@ -795,19 +811,22 @@ inlineLaTeX = try $ do where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 state :: ParserState - state = def{ stateOptions = def{ readerParseRaw = True }} + state = def{ stateOptions = def{ readerExtensions = + enableExtension Ext_raw_tex (readerExtensions def) } } texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline + texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand :: PandocMonad m => OrgParser m String inlineLaTeXCommand = try $ do rest <- getInput - case runParser rawLaTeXInline def "source" rest of - Right (RawInline _ cs) -> do + st <- getState + parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest + case parsed of + Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. let cmdNoSpc = dropWhileEnd isSpace cs @@ -820,33 +839,56 @@ inlineLaTeXCommand = try $ do dropWhileEnd :: (a -> Bool) -> [a] -> [a] dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] -exportSnippet :: OrgParser (F Inlines) +exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" format <- many1Till (alphaNum <|> char '-') (char ':') snippet <- manyTill anyChar (try $ string "@@") returnF $ B.rawInline format snippet -smart :: OrgParser (F Inlines) -smart = do - getOption readerSmart >>= guard - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) +macro :: PandocMonad m => OrgParser m (F Inlines) +macro = try $ do + recursionDepth <- orgStateMacroDepth <$> getState + guard $ recursionDepth < 15 + string "{{{" + name <- many alphaNum + args <- ([] <$ string "}}}") + <|> char '(' *> argument `sepBy` char ',' <* eoa + expander <- lookupMacro name <$> getState + case expander of + Nothing -> mzero + Just fn -> do + updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 } + res <- parseFromString (mconcat <$> many inline) $ fn args + updateState $ \s -> s { orgStateMacroDepth = recursionDepth } + return res + where + argument = many $ notFollowedBy eoa *> noneOf "," + eoa = string ")}}}" + +smart :: PandocMonad m => OrgParser m (F Inlines) +smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] where orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> dash <* updatePositions '-' orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") - -singleQuoted :: OrgParser (F Inlines) + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> ellipses <* updatePositions '.' + orgApostrophe = do + guardEnabled Ext_smart + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + returnF (B.str "\x2019") + +guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () +guardOrSmartEnabled b = do + smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions + guard (b || smartExtension) + +singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -856,12 +898,15 @@ singleQuoted = try $ do -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: OrgParser (F Inlines) +doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + let doubleQuotedContent = withQuoteContext InDoubleQuote $ do + doubleQuoteEnd + updateLastForbiddenCharPos + return . fmap B.doubleQuoted . trimInlinesF $ contents + let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents + doubleQuotedContent <|> leftQuoteAndContent diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 1fea3e890..6ad403fd8 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,7 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} {- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org.Meta - Copyright : Copyright (C) 2014-2017 Albert Krewinkel + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -33,25 +33,26 @@ module Text.Pandoc.Readers.Org.Meta , metaLine ) where -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ExportSettings (exportSettings) +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Blocks, Inlines ) -import Text.Pandoc.Definition +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.List ( intersperse ) +import Control.Monad (mzero, void, when) +import Data.Char (toLower) +import Data.List (intersperse) import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Network.HTTP ( urlEncode ) +import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. -metaExport :: OrgParser (F Meta) +metaExport :: Monad m => OrgParser m (F Meta) metaExport = do st <- getState let settings = orgStateExportSettings st @@ -68,29 +69,32 @@ removeMeta key meta' = -- | Parse and handle a single line containing meta information -- The order, in which blocks are tried, makes sure that we're not looking at -- the beginning of a block, so we don't need to check for it -metaLine :: OrgParser Blocks +metaLine :: PandocMonad m => OrgParser m Blocks metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) -declarationLine :: OrgParser () +declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do key <- map toLower <$> metaKey (key', value) <- metaValue key - updateState $ \st -> - let meta' = B.setMeta key' <$> value <*> pure nullMeta - in st { orgStateMeta = meta' <> orgStateMeta st } + let addMetaValue st = + st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + when (key' /= "results") $ updateState addMetaValue -metaKey :: OrgParser String +metaKey :: Monad m => OrgParser m String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: String -> OrgParser (String, (F MetaValue)) +metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of "author" -> (key,) <$> metaInlinesCommaSeparated + "keywords" -> (key,) <$> metaInlinesCommaSeparated "title" -> (key,) <$> metaInlines + "subtitle" -> (key,) <$> metaInlines "date" -> (key,) <$> metaInlines + "nocite" -> (key,) <$> accumulatingList key metaInlines "header-includes" -> (key,) <$> accumulatingList key metaInlines "latex_header" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "latex") @@ -103,32 +107,32 @@ metaValue key = accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString -metaInlines :: OrgParser (F MetaValue) +metaInlines :: PandocMonad m => OrgParser m (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline -metaInlinesCommaSeparated :: OrgParser (F MetaValue) +metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' newline - authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs + items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList - return $ MetaList . map toMetaInlines <$> sequence authors + return $ MetaList . map toMetaInlines <$> sequence items -metaString :: OrgParser (F MetaValue) +metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: (String -> String) -> OrgParser (F MetaValue) +metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: String -> OrgParser (F MetaValue) +metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: String - -> OrgParser (F MetaValue) - -> OrgParser (F MetaValue) +accumulatingList :: Monad m => String + -> OrgParser m (F MetaValue) + -> OrgParser m (F MetaValue) accumulatingList key p = do value <- p meta' <- orgStateMeta <$> getState @@ -141,7 +145,7 @@ accumulatingList key p = do -- -- export options -- -optionLine :: OrgParser () +optionLine :: Monad m => OrgParser m () optionLine = try $ do key <- metaKey case key of @@ -150,16 +154,19 @@ optionLine = try $ do "todo" -> todoSequence >>= updateState . registerTodoSequence "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence - _ -> mzero + "macro" -> macroDefinition >>= updateState . registerMacro + "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar + "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar + _ -> mzero -addLinkFormat :: String +addLinkFormat :: Monad m => String -> (String -> String) - -> OrgParser () + -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat :: Monad m => OrgParser m (String, String -> String) parseLinkFormat = try $ do linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat @@ -167,9 +174,8 @@ parseLinkFormat = try $ do -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: OrgParser (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend +parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares replacePlain = try $ (\x -> concat . flip intersperse x) @@ -181,13 +187,34 @@ parseFormat = try $ do rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline +setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState +setEmphasisPreChar csMb st = + let preChars = case csMb of + Nothing -> orgStateEmphasisPreChars defaultOrgParserState + Just cs -> cs + in st { orgStateEmphasisPreChars = preChars } + +setEmphasisPostChar :: Maybe [Char] -> OrgParserState -> OrgParserState +setEmphasisPostChar csMb st = + let postChars = case csMb of + Nothing -> orgStateEmphasisPostChars defaultOrgParserState + Just cs -> cs + in st { orgStateEmphasisPostChars = postChars } + +emphChars :: Monad m => OrgParser m (Maybe [Char]) +emphChars = do + skipSpaces + safeRead <$> anyLine + +inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines) +inlinesTillNewline = do + updateLastPreCharPos + trimInlinesF . mconcat <$> manyTill inline newline -- -- ToDo Sequences and Keywords -- -todoSequence :: OrgParser TodoSequence +todoSequence :: Monad m => OrgParser m TodoSequence todoSequence = try $ do todoKws <- todoKeywords doneKws <- optionMaybe $ todoDoneSep *> todoKeywords @@ -201,13 +228,13 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: OrgParser [String] + todoKeywords :: Monad m => OrgParser m [String] todoKeywords = try $ let keyword = many1 nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) - todoDoneSep :: OrgParser () + todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 keywordsToSequence :: [String] -> [String] -> TodoSequence @@ -215,3 +242,27 @@ todoSequence = try $ do let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers + +macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition = try $ do + macroName <- many1 nonspaceChar <* skipSpaces + firstPart <- expansionPart + (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) + let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder + return (macroName, expander) + where + placeholder :: Monad m => OrgParser m Int + placeholder = try . fmap read $ char '$' *> many1 digit + + expansionPart :: Monad m => OrgParser m String + expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + + alternate :: [a] -> [a] -> [a] + alternate [] ys = ys + alternate xs [] = xs + alternate (x:xs) (y:ys) = x : y : alternate xs ys + + reorder :: [Int] -> [String] -> [String] + reorder perm xs = + let element n = take 1 $ drop (n - 1) xs + in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 38f95ca95..6316766fa 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,8 +1,7 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,8 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ParserState + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -30,16 +29,21 @@ Define the Org-mode parser state. -} module Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) + , defaultOrgParserState , OrgParserLocal (..) , OrgNoteRecord , HasReaderOptions (..) , HasQuoteContext (..) + , HasMacros (..) , TodoMarker (..) , TodoSequence , TodoState (..) , activeTodoMarkers , registerTodoSequence - , F(..) + , MacroExpander + , lookupMacro + , registerMacro + , F , askF , asksF , trimInlinesF @@ -50,24 +54,28 @@ module Text.Pandoc.Readers.Org.ParserState , optionsToParserState ) where -import Control.Monad (liftM, liftM2) -import Control.Monad.Reader (Reader, runReader, ask, asks, local) +import Control.Monad.Reader (ReaderT, asks, local) -import Data.Default (Default(..)) +import Data.Default (Default (..)) import qualified Data.Map as M import qualified Data.Set as Set - -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Text.Pandoc.Definition ( Meta(..), nullMeta ) -import Text.Pandoc.Options ( ReaderOptions(..) ) -import Text.Pandoc.Parsing ( HasHeaderMap(..) - , HasIdentifierList(..) - , HasLastStrPosition(..) - , HasQuoteContext(..) - , HasReaderOptions(..) - , ParserContext(..) - , QuoteContext(..) - , SourcePos ) +import Data.Text (Text) + +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Definition (Meta (..), nullMeta) +import Text.Pandoc.Logging +import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), + HasIncludeFiles (..), HasLastStrPosition (..), + HasLogMessages (..), HasMacros (..), + HasQuoteContext (..), HasReaderOptions (..), + ParserContext (..), QuoteContext (..), SourcePos, + askF, asksF, returnF, runF, trimInlinesF) +import Text.Pandoc.Readers.LaTeX.Types (Macro) + +-- | This is used to delay evaluation until all relevant information has been +-- parsed and made available in the parser state. +type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. type OrgNoteRecord = (String, F Blocks) @@ -76,6 +84,8 @@ type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. type OrgLinkFormatters = M.Map String (String -> String) +-- | Macro expander function +type MacroExpander = [String] -> String -- | The states in which a todo item can be data TodoState = Todo | Done @@ -95,22 +105,34 @@ type TodoSequence = [TodoMarker] data OrgParserState = OrgParserState { orgStateAnchorIds :: [String] , orgStateEmphasisCharStack :: [Char] + , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before + -- emphasis; spaces and newlines are + -- always ok in addition to what is + -- specified here. + , orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis , orgStateEmphasisNewlines :: Maybe Int , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String + , orgStateIncludeFiles :: [String] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters + , orgStateMacros :: M.Map String MacroExpander + , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateOptions :: ReaderOptions , orgStateParserContext :: ParserContext , orgStateTodoSequences :: [TodoSequence] + , orgLogMessages :: [LogMessage] + , orgMacros :: M.Map Text Macro } -data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } +data OrgParserLocal = OrgParserLocal + { orgLocalQuoteContext :: QuoteContext + } instance Default OrgParserLocal where def = OrgParserLocal NoQuote @@ -122,7 +144,7 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } -instance HasQuoteContext st (Reader OrgParserLocal) where +instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where getQuoteContext = asks orgLocalQuoteContext withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) @@ -134,26 +156,47 @@ instance HasHeaderMap OrgParserState where extractHeaderMap = orgStateHeaderMap updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } +instance HasLogMessages OrgParserState where + addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } + getLogMessages st = reverse $ orgLogMessages st + +instance HasMacros OrgParserState where + extractMacros st = orgMacros st + updateMacros f st = st{ orgMacros = f (orgMacros st) } + +instance HasIncludeFiles OrgParserState where + getIncludeFiles = orgStateIncludeFiles + addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } + dropLatestIncludeFile st = + st { orgStateIncludeFiles = drop 1 $ orgStateIncludeFiles st } + instance Default OrgParserState where def = defaultOrgParserState defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateAnchorIds = [] + , orgStateEmphasisPreChars = "-\t ('\"{" + , orgStateEmphasisPostChars = "-\t\n .,:!?;'\")}[" , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateExportSettings = def , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty + , orgStateIncludeFiles = [] , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty + , orgStateMacros = M.empty + , orgStateMacroDepth = 0 , orgStateMeta = return nullMeta , orgStateNotes' = [] , orgStateOptions = def , orgStateParserContext = NullState , orgStateTodoSequences = [] + , orgLogMessages = [] + , orgMacros = M.empty } optionsToParserState :: ReaderOptions -> OrgParserState @@ -177,6 +220,15 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences +lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro macroName = M.lookup macroName . orgStateMacros + +registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro (name, expander) st = + let curMacros = orgStateMacros st + in st{ orgStateMacros = M.insert name expander curMacros } + + -- -- Export Settings @@ -191,20 +243,22 @@ data ArchivedTreesOption = -- | Export settings <http://orgmode.org/manual/Export-settings.html> -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings - { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees + , exportDrawers :: Either [String] [String] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. - , exportEmphasizedText :: Bool -- ^ Parse emphasized text - , exportHeadlineLevels :: Int + , exportEmphasizedText :: Bool -- ^ Parse emphasized text + , exportHeadlineLevels :: Int -- ^ Maximum depth of headlines, deeper headlines are convert to list - , exportSmartQuotes :: Bool -- ^ Parse quotes smartly - , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly - , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts - , exportWithAuthor :: Bool -- ^ Include author in final meta-data - , exportWithCreator :: Bool -- ^ Include creator in final meta-data - , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportPreserveBreaks :: Bool -- ^ Whether to preserve linebreaks + , exportSmartQuotes :: Bool -- ^ Parse quotes smartly + , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly + , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts + , exportWithAuthor :: Bool -- ^ Include author in final meta-data + , exportWithCreator :: Bool -- ^ Include creator in final meta-data + , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportWithTags :: Bool -- ^ Keep tags as part of headlines , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers } @@ -217,43 +271,13 @@ defaultExportSettings = ExportSettings , exportDrawers = Left ["LOGBOOK"] , exportEmphasizedText = True , exportHeadlineLevels = 3 - , exportSmartQuotes = True + , exportPreserveBreaks = False + , exportSmartQuotes = False , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True , exportWithCreator = True , exportWithEmail = True + , exportWithTags = True , exportWithTodoKeywords = True } - - --- --- Parser state reader --- - --- | Reader monad wrapping the parser state. This is used to delay evaluation --- until all relevant information has been parsed and made available in the --- parser state. See also the newtype of the same name in --- Text.Pandoc.Parsing. -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Functor, Applicative, Monad) - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: Monad m => a -> m (F a) -returnF = return . return diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 95415f823..36420478b 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Parsing + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -31,6 +31,8 @@ functions are adapted to Org-mode specific functionality. module Text.Pandoc.Readers.Org.Parsing ( OrgParser , anyLine + , anyLineNewline + , indentWith , blanklines , newline , parseFromString @@ -70,8 +72,11 @@ module Text.Pandoc.Readers.Org.Parsing , dash , ellipses , citeKey + , gridTableWith + , insertIncludedFileF -- * Re-exports from Text.Pandoc.Parsec , runParser + , runParserT , getInput , char , letter @@ -107,24 +112,24 @@ module Text.Pandoc.Readers.Org.Parsing , getPosition ) where -import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, + parseFromString) import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline - , parseFromString ) -import Control.Monad ( guard ) -import Control.Monad.Reader ( Reader ) +import Control.Monad (guard) +import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: OrgParser String +anyLine :: Monad m => OrgParser m String anyLine = P.anyLine <* updateLastPreCharPos @@ -132,7 +137,7 @@ anyLine = -- The version Text.Pandoc.Parsing cannot be used, as we need additional parts -- of the state saved and restored. -parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a parseFromString parser str' = do oldLastPreCharPos <- orgStateLastPreCharPos <$> getState updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } @@ -141,33 +146,34 @@ parseFromString parser str' = do return result -- | Skip one or more tab or space characters. -skipSpaces1 :: OrgParser () +skipSpaces1 :: Monad m => OrgParser m () skipSpaces1 = skipMany1 spaceChar -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: OrgParser Char +newline :: Monad m => OrgParser m Char newline = P.newline <* updateLastPreCharPos <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: OrgParser [Char] +blanklines :: Monad m => OrgParser m [Char] blanklines = P.blanklines <* updateLastPreCharPos <* updateLastForbiddenCharPos -- | Succeeds when we're in list context. -inList :: OrgParser () +inList :: Monad m => OrgParser m () inList = do ctx <- orgStateParserContext <$> getState guard (ctx == ListItemState) -- | Parse in different context -withContext :: ParserContext -- ^ New parser context - -> OrgParser a -- ^ Parser to run in that context - -> OrgParser a +withContext :: Monad m + => ParserContext -- ^ New parser context + -> OrgParser m a -- ^ Parser to run in that context + -> OrgParser m a withContext context parser = do oldContext <- orgStateParserContext <$> getState updateState $ \s -> s{ orgStateParserContext = context } @@ -180,19 +186,19 @@ withContext context parser = do -- -- | Get an export setting. -getExportSetting :: (ExportSettings -> a) -> OrgParser a +getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a getExportSetting s = s . orgStateExportSettings <$> getState -- | Set the current position as the last position at which a forbidden char -- was found (i.e. a character which is not allowed at the inner border of -- markup). -updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos :: Monad m => OrgParser m () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} -- | Set the current parser position as the position at which a character was -- seen which allows inline markup to follow. -updateLastPreCharPos :: OrgParser () +updateLastPreCharPos :: Monad m => OrgParser m () updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} @@ -201,15 +207,15 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: OrgParser String +orgArgKey :: Monad m => OrgParser m String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: OrgParser String +orgArgWord :: Monad m => OrgParser m String orgArgWord = many1 orgArgWordChar -- | Chars treated as part of a word in plists. -orgArgWordChar :: OrgParser Char +orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 8c87cfa25..cba72cc07 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Shared + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -29,14 +29,12 @@ Utility functions used in other Pandoc Org modules. module Text.Pandoc.Readers.Org.Shared ( cleanLinkString , isImageFilename - , rundocBlockClass - , toRundocAttrib + , originalLang , translateLang ) where -import Control.Arrow ( first ) -import Data.Char ( isAlphaNum ) -import Data.List ( isPrefixOf, isSuffixOf ) +import Data.Char (isAlphaNum) +import Data.List (isPrefixOf, isSuffixOf) -- | Check whether the given string looks like the path to of URL of an image. @@ -58,8 +56,8 @@ cleanLinkString s = '.':'/':_ -> Just s -- relative path '.':'.':'/':_ -> Just s -- relative path -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' - _ | isUrl s -> Just s -- URL + 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' + _ | isUrl s -> Just s -- URL _ -> Nothing where isUrl :: String -> Bool @@ -68,17 +66,17 @@ cleanLinkString s = in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme && not (null path) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" +-- | Creates an key-value pair marking the original language name specified for +-- a piece of source code. --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - --- | Prefix the name of a attribute, marking it as a code execution parameter. -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first (rundocPrefix ++) +-- | Creates an key-value attributes marking the original language name +-- specified for a piece of source code. +originalLang :: String -> [(String, String)] +originalLang lang = + let transLang = translateLang lang + in if transLang == lang + then [] + else [("org-language", lang)] -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e05b6cba2..e88d997f0 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,38 +30,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( - readRST, - readRSTWithWarnings - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Builder (setMeta, fromList) -import Text.Pandoc.Shared -import Text.Pandoc.Parsing -import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero ) -import Data.List ( findIndex, intercalate, - transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) -import Data.Maybe (fromMaybe) +module Text.Pandoc.Readers.RST ( readRST ) where +import Control.Arrow (second) +import Control.Monad (forM_, guard, liftM, mplus, mzero, when) +import Control.Monad.Except (throwError) +import Control.Monad.Identity (Identity (..)) +import Data.Char (isHexDigit, isSpace, toLower, toUpper) +import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, + nub, sort, transpose, union) import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import qualified Text.Pandoc.Builder as B -import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit, isSpace) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid ((<>)) +import Data.Sequence (ViewR (..), viewr) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) +import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV) +import Text.Pandoc.Definition import Text.Pandoc.Error +import Text.Pandoc.ImageSize (lengthToDim, scaleDimension) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Printf (printf) --- | Parse reStructuredText string and return Pandoc document. -readRST :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") - -readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) -readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") +-- TODO: +-- [ ] .. parsed-literal -type RSTParser = Parser [Char] ParserState +-- | Parse reStructuredText string and return Pandoc document. +readRST :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readRST opts s = do + parsed <- readWithM parseRST def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e + +type RSTParser m = ParserT [Char] ParserState m -- -- Constants and data structure definitions @@ -87,9 +100,9 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level attr text):rest) = - (Header (level - num) attr text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders num (Header level attr text:rest) = + Header (level - num) attr text:promoteHeaders num rest +promoteHeaders num (other:rest) = other:promoteHeaders num rest promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) @@ -101,11 +114,11 @@ titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata titleTransform (bs, meta) = let (bs', meta') = case bs of - ((Header 1 _ head1):(Header 2 _ head2):rest) + (Header 1 _ head1:Header 2 _ head2:rest) | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ setMeta "subtitle" (fromList head2) meta) - ((Header 1 _ head1):rest) + (Header 1 _ head1:rest) | not (any (isHeader 1) rest) -> -- title only (promoteHeaders 1 rest, setMeta "title" (fromList head1) meta) @@ -121,8 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.mapKeys (\k -> if k == "authors" then "author" else k) - $ metamap + $ M.mapKeys (\k -> + if k == "authors" + then "author" + else k) metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x splitAuthors (MetaBlocks [Para xs]) @@ -131,6 +146,12 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds splitAuthors x = x splitAuthors' = map normalizeSpaces . splitOnSemi . concatMap factorSemi + normalizeSpaces = reverse . dropWhile isSp . reverse . + dropWhile isSp + isSp Space = True + isSp SoftBreak = True + isSp LineBreak = True + isSp _ = False splitOnSemi = splitBy (==Str ";") factorSemi (Str []) = [] factorSemi (Str s) = case break (==';') s of @@ -141,41 +162,61 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds factorSemi (Str ys) factorSemi x = [x] -parseRST :: RSTParser Pandoc +parseRST :: PandocMonad m => RSTParser m Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> lineClump) eof + manyTill (referenceKey <|> anchorDef <|> + noteBlock <|> citationBlock <|> + headerBlock <|> lineClump) eof setInput docMinusKeys setPosition startPos st' <- getState let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } + updateState $ \s -> s { stateNotes = reverse reversedNotes + , stateHeaders = mempty + , stateIdentifiers = mempty } -- now parse it for real... blocks <- B.toList <$> parseBlocks + citations <- (sort . M.toList . stateCitations) <$> getState + citationItems <- mapM parseCitation citations + let refBlock = if null citationItems + then [] + else [Div ("citations",[],[]) $ + B.toList $ B.definitionList citationItems] standalone <- getOption readerStandalone state <- getState let meta = stateMeta state let (blocks', meta') = if standalone then titleTransform (blocks, meta) else (blocks, meta) - return $ Pandoc meta' blocks' + reportLogMessages + return $ Pandoc meta' (blocks' ++ refBlock) + +parseCitation :: PandocMonad m + => (String, String) -> RSTParser m (Inlines, [Blocks]) +parseCitation (ref, raw) = do + contents <- parseFromString' parseBlocks raw + return (B.spanWith (ref, ["citation-label"], []) (B.str ref), + [contents]) + -- -- parsing blocks -- -parseBlocks :: RSTParser Blocks +parseBlocks :: PandocMonad m => RSTParser m Blocks parseBlocks = mconcat <$> manyTill block eof -block :: RSTParser Blocks +block :: PandocMonad m => RSTParser m Blocks block = choice [ codeBlock , blockQuote , fieldList , directive + , anchor , comment , header , hrule @@ -191,7 +232,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Int -> RSTParser (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent @@ -204,15 +245,15 @@ rawFieldListItem minIndent = try $ do let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" return (name, raw) -fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) +fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent term <- parseInlineFromString name - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) -fieldList :: RSTParser Blocks +fieldList :: PandocMonad m => RSTParser m Blocks fieldList = try $ do indent <- length <$> lookAhead (many spaceChar) items <- many1 $ fieldListItem indent @@ -224,43 +265,62 @@ fieldList = try $ do -- line block -- -lineBlock :: RSTParser Blocks +lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines lines'' <- mapM parseInlineFromString lines' return $ B.lineBlock lines'' +lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective body = do + lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + return $ B.lineBlock lines' + -- -- paragraph block -- -- note: paragraph can end in a :: starting a code block -para :: RSTParser Blocks +para :: PandocMonad m => RSTParser m Blocks para = try $ do result <- trimInlines . mconcat <$> many1 inline option (B.plain result) $ try $ do newline blanklines case viewr (B.unMany result) of - ys :> (Str xs) | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `isSuffixOf` xs -> do raw <- option mempty codeBlockBody return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) <> raw _ -> return (B.para result) -plain :: RSTParser Blocks +plain :: PandocMonad m => RSTParser m Blocks plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- header blocks -- -header :: RSTParser Blocks +header :: PandocMonad m => RSTParser m Blocks header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: RSTParser Blocks -doubleHeader = try $ do +doubleHeader :: PandocMonad m => RSTParser m Blocks +doubleHeader = do + (txt, c) <- doubleHeader' + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case elemIndex (DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], length headerTable + 1) + setState (state { stateHeaderTable = headerTable' }) + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt + +doubleHeader' :: PandocMonad m => RSTParser m (Inlines, Char) +doubleHeader' = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line let lenTop = length (c:rest) @@ -268,48 +328,45 @@ doubleHeader = try $ do newline txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () + let len = sourceColumn pos - 1 + when (len > lenTop) $ fail "title longer than border" blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. + return (txt, c) + +-- a header with line on the bottom only +singleHeader :: PandocMonad m => RSTParser m Blocks +singleHeader = do + (txt, c) <- singleHeader' state <- getState let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + let (headerTable',level) = case elemIndex (SingleHeader c) headerTable of Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + Nothing -> (headerTable ++ [SingleHeader c], length headerTable + 1) setState (state { stateHeaderTable = headerTable' }) attr <- registerHeader nullAttr txt return $ B.headerWith attr level txt --- a header with line on the bottom only -singleHeader :: RSTParser Blocks -singleHeader = try $ do +singleHeader' :: PandocMonad m => RSTParser m (Inlines, Char) +singleHeader' = try $ do notFollowedBy' whitespace - txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline}) + lookAhead $ anyLine >> oneOf underlineChars + txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) pos <- getPosition - let len = (sourceColumn pos) - 1 + let len = sourceColumn pos - 1 blankline c <- oneOf underlineChars count (len - 1) (char c) many (char c) blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - attr <- registerHeader nullAttr txt - return $ B.headerWith attr level txt + return (txt, c) -- -- hrule block -- -hrule :: Parser [Char] st Blocks +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -323,14 +380,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> Parser [Char] st [Char] +indentedLine :: Monad m => String -> ParserT [Char] st m [Char] indentedLine indents = try $ do string indents anyLine -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: Parser [Char] st [Char] +indentedBlock :: Monad m => ParserT [Char] st m [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -339,24 +396,24 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -quotedBlock :: Parser [Char] st [Char] +quotedBlock :: Monad m => ParserT [Char] st m [Char] quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ unlines lns -codeBlockStart :: Parser [Char] st Char +codeBlockStart :: Monad m => ParserT [Char] st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Parser [Char] st Blocks +codeBlock :: Monad m => ParserT [Char] st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Parser [Char] st Blocks +codeBlockBody :: Monad m => ParserT [Char] st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) -lhsCodeBlock :: RSTParser Blocks +lhsCodeBlock :: Monad m => RSTParser m Blocks lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell @@ -366,14 +423,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns -latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -381,99 +438,159 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (drop 1) lns else lns -birdTrackLine :: Parser [Char] st [Char] +birdTrackLine :: Monad m => ParserT [Char] st m [Char] birdTrackLine = char '>' >> anyLine -- -- block quotes -- -blockQuote :: RSTParser Blocks +blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n\n" return $ B.blockQuote contents +{- +Unsupported options for include: +tab-width +encoding +-} + +includeDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +includeDirective top fields body = do + let f = trim top + guard $ not (null f) + guard $ null (trim body) + -- options + let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead + let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead + oldPos <- getPosition + oldInput <- getInput + containers <- stateContainers <$> getState + when (f `elem` containers) $ + throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + updateState $ \s -> s{ stateContainers = f : stateContainers s } + mbContents <- readFileFromDirs ["."] f + contentLines <- case mbContents of + Just s -> return $ lines s + Nothing -> do + logMessage $ CouldNotLoadIncludeFile f oldPos + return [] + let numLines = length contentLines + let startLine' = case startLine of + Nothing -> 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let endLine' = case endLine of + Nothing -> numLines + 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + let contentLines' = drop (startLine' - 1) + $ take (endLine' - 1) contentLines + let contentLines'' = (case trim <$> lookup "end-before" fields of + Just patt -> takeWhile (not . (patt `isInfixOf`)) + Nothing -> id) . + (case trim <$> lookup "start-after" fields of + Just patt -> drop 1 . + dropWhile (not . (patt `isInfixOf`)) + Nothing -> id) $ contentLines' + let contents' = unlines contentLines'' ++ "\n" + case lookup "code" fields of + Just lang -> do + let numberLines = lookup "number-lines" fields + let classes = trimr lang : ["numberLines" | isJust numberLines] ++ + maybe [] words (lookup "class" fields) + let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines + let ident = maybe "" trimr $ lookup "name" fields + let attribs = (ident, classes, kvs) + return $ B.codeBlockWith attribs contents' + Nothing -> case lookup "literal" fields of + Just _ -> return $ B.rawBlock "rst" contents' + Nothing -> do + setPosition $ newPos f 1 1 + setInput contents' + bs <- optional blanklines >> + (mconcat <$> many block) + setInput oldInput + setPosition oldPos + updateState $ \s -> s{ stateContainers = + tail $ stateContainers s } + return bs + + -- -- list blocks -- -list :: RSTParser Blocks +list :: PandocMonad m => RSTParser m Blocks list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: RSTParser (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n" return (term, [contents]) -definitionList :: RSTParser Blocks +definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Parser [Char] st Int +bulletListStart :: Monad m => ParserT [Char] st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers - white <- many1 spaceChar + white <- many1 spaceChar <|> "" <$ lookAhead (char '\n') return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) -orderedListStart :: ListNumberStyle +orderedListStart :: Monad m => ListNumberStyle -> ListNumberDelim - -> RSTParser Int + -> RSTParser m Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar + white <- many1 spaceChar <|> "" <$ lookAhead (char '\n') return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> RSTParser [Char] +listLine :: Monad m => Int -> RSTParser m [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength - line <- anyLine - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> RSTParser [Char] -indentWith num = do - tabStop <- getOption readerTabStop - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + anyLineNewline -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: RSTParser Int - -> RSTParser (Int, [Char]) +rawListItem :: Monad m => RSTParser m Int + -> RSTParser m (Int, [Char]) rawListItem start = try $ do markerLength <- start - firstLine <- anyLine + firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + return (markerLength, firstLine ++ concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> RSTParser [Char] +listContinuation :: Monad m => Int -> RSTParser m [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: RSTParser Int - -> RSTParser Blocks +listItem :: PandocMonad m + => RSTParser m Int + -> RSTParser m Blocks listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline <* lookAhead start), - many1 blankline ] -- whole list must end with blank. + skipMany1 blankline <|> () <$ lookAhead start -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -481,52 +598,52 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks + parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of - [Para xs] -> B.singleton $ Plain xs - [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys] - [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys] - [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] + [Para xs] -> + B.singleton $ Plain xs + [Para xs, BulletList ys] -> + B.fromList [Plain xs, BulletList ys] + [Para xs, OrderedList s ys] -> + B.fromList [Plain xs, OrderedList s ys] + [Para xs, DefinitionList ys] -> + B.fromList [Plain xs, DefinitionList ys] _ -> parsed -orderedList :: RSTParser Blocks +orderedList :: PandocMonad m => RSTParser m Blocks orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar) items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify' items + let items' = compactify items return $ B.orderedListWith (start, style, delim) items' -bulletList :: RSTParser Blocks -bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) +bulletList :: PandocMonad m => RSTParser m Blocks +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart) -- -- directive (e.g. comment, container, compound-paragraph) -- -comment :: RSTParser Blocks +comment :: Monad m => RSTParser m Blocks comment = try $ do string ".." skipMany1 spaceChar <|> (() <$ lookAhead newline) - notFollowedBy' directiveLabel + -- notFollowedBy' directiveLabel -- comment comes after directive so unnec. manyTill anyChar blanklines optional indentedBlock return mempty -directiveLabel :: RSTParser String +directiveLabel :: Monad m => RSTParser m String directiveLabel = map toLower <$> many1Till (letter <|> char '-') (try $ string "::") -directive :: RSTParser Blocks +directive :: PandocMonad m => RSTParser m Blocks directive = try $ do string ".." directive' --- TODO: line-block, parsed-literal, table, csv-table, list-table --- date --- include --- title -directive' :: RSTParser Blocks +directive' :: PandocMonad m => RSTParser m Blocks directive' = do skipMany1 spaceChar label <- directiveLabel @@ -541,43 +658,71 @@ directive' = do body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" - imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height") + name = trim $ fromMaybe "" (lookup "name" fields) + imgAttr cl = ("", classes, widthAttr ++ heightAttr) where - classes = words $ maybe "" trim $ lookup cl fields - getAtt k = case lookup k fields of - Just v -> [(k, filter (not . isSpace) v)] - Nothing -> [] + classes = words $ maybe "" trim (lookup cl fields) ++ + maybe "" (\x -> "align-" ++ trim x) + (lookup "align" fields) + scale = case trim <$> lookup "scale" fields of + Just v -> case reverse v of + '%':vv -> + case safeRead (reverse vv) of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> + case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 + widthAttr = maybe [] (\x -> [("width", + show $ scaleDimension scale x)]) + $ lookup "width" fields >>= + (lengthToDim . filter (not . isSpace)) + heightAttr = maybe [] (\x -> [("height", + show $ scaleDimension scale x)]) + $ lookup "height" fields >>= + (lengthToDim . filter (not . isSpace)) case label of + "include" -> includeDirective top fields body' + "table" -> tableDirective top fields body' + "list-table" -> listTableDirective top fields body' + "csv-table" -> csvTableDirective top fields body' + "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields - "container" -> parseFromString parseBlocks body' + "role" -> addNewRole top $ map (second trim) fields + "container" -> B.divWith (name, "container" : words top, []) <$> + parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey parseInlineFromString (trim $ unicodeTransform top) - "compound" -> parseFromString parseBlocks body' - "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' - "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' - "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' + "compound" -> parseFromString' parseBlocks body' + "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' + "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' + "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", - "important","note","tip","warning"] -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' - return $ B.divWith ("",["admonition", label],[]) bod - "admonition" -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' - return $ B.divWith ("",["admonition"],[]) bod + "important","note","tip","warning","admonition"] -> + do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + let lab = case label of + "admonition" -> mempty + (l:ls) -> B.divWith ("",["admonition-title"],[]) + (B.para (B.str (toUpper l : ls))) + [] -> mempty + return $ B.divWith ("",[label],[]) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields tit <- B.para . B.strong <$> parseInlineFromString (trim top ++ if null subtit then "" else (": " ++ subtit)) - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["sidebar"],[]) $ tit <> bod "topic" -> do tit <- B.para . B.strong <$> parseInlineFromString top - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["topic"],[]) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> s { stateRstDefaultRole = @@ -588,14 +733,15 @@ directive' = do codeblock (words $ fromMaybe [] $ lookup "class" fields) (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) + let attribs = ("", ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do - (caption, legend) <- parseFromString extractCaption body' + (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend + return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" + caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -606,28 +752,150 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + let attrs = ("", splitBy isSpace $ trim top, + map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block - _ -> parseFromString parseBlocks body' + _ -> parseFromString' parseBlocks body' return $ B.divWith attrs children other -> do pos <- getPosition - addWarning (Just pos) $ "ignoring unknown directive: " ++ other - return mempty + logMessage $ SkippedContent (".. " ++ other) pos + bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + return $ B.divWith ("",[other],[]) bod + +tableDirective :: PandocMonad m + => String -> [(String, String)] -> String -> RSTParser m Blocks +tableDirective top fields body = do + bs <- parseFromString' parseBlocks body + case B.toList bs of + [Table _ aligns' widths' header' rows'] -> do + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + columns <- getOption readerColumns + let numOfCols = length header' + let normWidths ws = + map (/ max 1.0 (fromIntegral (columns - numOfCols))) ws + let widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0.0 + Just "grid" -> widths' + Just specs -> normWidths + $ map (fromMaybe (0 :: Double) . safeRead) + $ splitBy (`elem` (" ," :: String)) specs + Nothing -> widths' + -- align is not applicable since we can't represent whole table align + return $ B.singleton $ Table (B.toList title) + aligns' widths header' rows' + _ -> return mempty + + +-- TODO: :stub-columns:. +-- Only the first row becomes the header even if header-rows: > 1, +-- since Pandoc doesn't support a table with multiple header rows. +-- We don't need to parse :align: as it represents the whole table align. +listTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +listTableDirective top fields body = do + bs <- parseFromString' parseBlocks body + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let rows = takeRows $ B.toList bs + headerRowsNum = fromMaybe (0 :: Int) $ + lookup "header-rows" fields >>= safeRead + (headerRow,bodyRows,numOfCols) = case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ + splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows + where takeRows [BulletList rows] = map takeCells rows + takeRows _ = [] + takeCells [BulletList cells] = map B.fromList cells + takeCells _ = [] + normWidths ws = map (/ max 1 (sum ws)) ws + +csvTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +csvTableDirective top fields rawcsv = do + let explicitHeader = trim <$> lookup "header" fields + let opts = defaultCSVOptions{ + csvDelim = case trim <$> lookup "delim" fields of + Just "tab" -> '\t' + Just "space" -> ' ' + Just [c] -> c + _ -> ',' + , csvQuote = case trim <$> lookup "quote" fields of + Just [c] -> c + _ -> '"' + , csvEscape = case trim <$> lookup "escape" fields of + Just [c] -> Just c + _ -> Nothing + , csvKeepSpace = case trim <$> lookup "keepspace" fields of + Just "true" -> True + _ -> False + } + let headerRowsNum = fromMaybe (case explicitHeader of + Just _ -> 1 :: Int + Nothing -> 0 :: Int) $ + lookup "header-rows" fields >>= safeRead + rawcsv' <- case trim <$> + lookup "file" fields `mplus` lookup "url" fields of + Just u -> do + (bs, _) <- fetchItem u + return $ UTF8.toString bs + Nothing -> return rawcsv + let res = parseCSV opts (T.pack $ case explicitHeader of + Just h -> h ++ "\n" ++ rawcsv' + Nothing -> rawcsv') + case res of + Left e -> + throwError $ PandocParsecError "csv table" e + Right rawrows -> do + let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseRow = mapM parseCell + rows <- mapM parseRow rawrows + let (headerRow,bodyRows,numOfCols) = + case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let normWidths ws = map (/ max 1 (sum ws)) ws + let widths = + case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths + $ map (fromMaybe (0 :: Double) . safeRead) + $ splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix -addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole :: PandocMonad m + => String -> [(String, String)] -> RSTParser m Blocks addNewRole roleString fields = do - (role, parentRole) <- parseFromString inheritedRole roleString + pos <- getPosition + (role, parentRole) <- parseFromString' inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of Just (r', f', a') -> getBaseRole (r', f', a') roles - Nothing -> (r, f, a) + Nothing -> (r, f, a) (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt @@ -641,34 +909,32 @@ addNewRole roleString fields = do in (ident, nub . (role :) . annotate $ classes, keyValues) -- warn about syntax we ignore - flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (baseRole /= "code") $ addWarning Nothing $ - "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ baseRole ++ ": not :code:" - "format" -> when (baseRole /= "raw") $ addWarning Nothing $ - "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ baseRole ++ ": not :raw:" - _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ - ": in definition of role :" ++ role ++ ": in" + forM_ fields $ \(key, _) -> case key of + "language" -> when (baseRole /= "code") $ logMessage $ + SkippedContent ":language: [because parent of role is not :code:]" + pos + "format" -> when (baseRole /= "raw") $ logMessage $ + SkippedContent ":format: [because parent of role is not :raw:]" pos + _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ - addWarning Nothing $ - "ignoring :format: fields after the first in the definition of role :" - ++ role ++": in" + logMessage $ SkippedContent + ":format: [after first in definition of role]" + pos when (parentRole == "code" && countKeys "language" > 1) $ - addWarning Nothing $ - "ignoring :language: fields after the first in the definition of role :" - ++ role ++": in" + logMessage $ SkippedContent + ":language: [after first in definition of role]" pos updateState $ \s -> s { stateRstCustomRoles = M.insert role (baseRole, fmt, attr) customRoles } - return $ B.singleton Null + return mempty where countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') + <|> pure "span") -- Can contain character codes as decimal numbers or @@ -700,24 +966,29 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc where (ds,rest) = span isHexDigit s mbc = safeRead ('\'':'\\':'x':ds ++ "'") -extractCaption :: RSTParser (Inlines, Blocks) +extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do capt <- trimInlines . mconcat <$> many inline legend <- optional blanklines >> (mconcat <$> many block) return (capt,legend) --- divide string by blanklines +-- divide string by blanklines, and surround with +-- \begin{aligned}...\end{aligned} if needed. toChunks :: String -> [String] toChunks = dropWhile null - . map (trim . unlines) + . map (addAligned . trim . unlines) . splitBy (all (`elem` (" \t" :: String))) . lines + -- we put this in an aligned environment if it contains \\, see #4254 + where addAligned s = if "\\\\" `isInfixOf` s + then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + else s -codeblock :: [String] -> Maybe String -> String -> String -> RSTParser Blocks +codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks codeblock classes numberLines lang body = return $ B.codeBlockWith attribs $ stripTrailingNewlines body where attribs = ("", classes', kvs) classes' = "sourceCode" : lang - : maybe [] (\_ -> ["numberLines"]) numberLines + : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = case numberLines of Just "" -> [] @@ -728,30 +999,52 @@ codeblock classes numberLines lang body = --- note block --- -noteBlock :: RSTParser [Char] +noteBlock :: Monad m => RSTParser m [Char] noteBlock = try $ do + (ref, raw, replacement) <- noteBlock' noteMarker + updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } + -- return blanks so line count isn't affected + return replacement + +citationBlock :: Monad m => RSTParser m [Char] +citationBlock = try $ do + (ref, raw, replacement) <- noteBlock' citationMarker + updateState $ \s -> + s { stateCitations = M.insert ref raw (stateCitations s), + stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + (stateKeys s) } + -- return blanks so line count isn't affected + return replacement + +noteBlock' :: Monad m + => RSTParser m String -> RSTParser m (String, String, String) +noteBlock' marker = try $ do startPos <- getPosition string ".." spaceChar >> skipMany spaceChar - ref <- noteMarker + ref <- marker first <- (spaceChar >> skipMany spaceChar >> anyLine) <|> (newline >> return "") blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let newnote = (ref, raw) - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + return (ref, raw, replacement) + +citationMarker :: Monad m => RSTParser m [Char] +citationMarker = do + char '[' + res <- simpleReferenceName + char ']' + return res -noteMarker :: RSTParser [Char] +noteMarker :: Monad m => RSTParser m [Char] noteMarker = do char '[' res <- many1 digit - <|> (try $ char '#' >> liftM ('#':) simpleReferenceName') + <|> + try (char '#' >> liftM ('#':) simpleReferenceName) <|> count 1 (oneOf "#*") char ']' return res @@ -760,39 +1053,26 @@ noteMarker = do -- reference key -- -quotedReferenceName :: RSTParser Inlines +quotedReferenceName :: PandocMonad m => RSTParser m String quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- trimInlines . mconcat <$> many1Till inline (char '`') - return label' - -unquotedReferenceName :: RSTParser Inlines -unquotedReferenceName = try $ do - label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') - return label' + manyTill anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: Parser [Char] st String -simpleReferenceName' = do +simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) + <|> try (oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) -simpleReferenceName :: Parser [Char] st Inlines -simpleReferenceName = do - raw <- simpleReferenceName' - return $ B.str raw +referenceName :: PandocMonad m => RSTParser m String +referenceName = quotedReferenceName <|> simpleReferenceName -referenceName :: RSTParser Inlines -referenceName = quotedReferenceName <|> - (try $ simpleReferenceName <* lookAhead (char ':')) <|> - unquotedReferenceName - -referenceKey :: RSTParser [Char] +referenceKey :: PandocMonad m => RSTParser m [Char] referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] @@ -801,16 +1081,16 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: Parser [Char] st [Char] +targetURI :: Monad m => ParserT [Char] st m [Char] targetURI = do skipSpaces optional newline contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines - return $ escapeURI $ trim $ contents + return $ escapeURI $ trim contents -substKey :: RSTParser () +substKey :: PandocMonad m => RSTParser m () substKey = try $ do string ".." skipMany1 spaceChar @@ -826,31 +1106,85 @@ substKey = try $ do [Para ils] -> return $ B.fromList ils _ -> mzero let key = toKey $ stripFirstAndLast ref - updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } + updateState $ \s -> s{ stateSubstitutions = + M.insert key il $ stateSubstitutions s } -anonymousKey :: RSTParser () +anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } - -stripTicks :: String -> String -stripTicks = reverse . stripTick . reverse . stripTick - where stripTick ('`':xs) = xs - stripTick xs = xs - -regularKey :: RSTParser () + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } + +referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames = do + let rn = try $ do + string ".. _" + ref <- quotedReferenceName + <|> many ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) + char ':' + return ref + first <- rn + rest <- many (try (blanklines *> rn)) + return (first:rest) + +regularKey :: PandocMonad m => RSTParser m () regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' + -- we allow several references to the same URL, e.g. + -- .. _hello: + -- .. _goodbye: url.com + refs <- referenceNames src <- targetURI - let key = toKey $ stripTicks ref - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + guard $ not (null src) + let keys = map toKey refs + forM_ keys $ \key -> + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } + +anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef = try $ do + (refs, raw) <- withRaw $ try (referenceNames <* blanklines) + forM_ refs $ \rawkey -> + updateState $ \s -> s { stateKeys = + M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + -- keep this for 2nd round of parsing, where we'll add the divs (anchor) + return raw + +anchor :: PandocMonad m => RSTParser m Blocks +anchor = try $ do + refs <- referenceNames + blanklines + b <- block + let addDiv ref = B.divWith (ref, [], []) + let emptySpanWithId id' = Span (id',[],[]) [] + -- put identifier on next block: + case B.toList b of + [Header lev (_,classes,kvs) txt] -> + case reverse refs of + [] -> return b + (r:rs) -> return $ B.singleton $ + Header lev (r,classes,kvs) + (txt ++ map emptySpanWithId rs) + -- we avoid generating divs for headers, + -- because it hides them from promoteHeader, see #4240 + _ -> return $ foldr addDiv b refs + +headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock = do + ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') + (ident,_,_) <- registerHeader nullAttr txt + let key = toKey (stringify txt) + updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + $ stateKeys s } + return raw + -- -- tables @@ -869,45 +1203,53 @@ regularKey = try $ do -- Grid tables TODO: -- - column spans -dashedLine :: Char -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> RSTParser Char +simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: RSTParser [Char] +simpleTableFooter :: Monad m => RSTParser m [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> RSTParser [String] -simpleTableRawLine indices = do - line <- many1Till anyChar newline - return (simpleTableSplitLine indices line) +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine + +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell indices = try $ do + cs <- simpleTableRawLine indices + let isEmptyCell = all (\c -> c == ' ' || c == '\t') + guard $ any isEmptyCell cs + return cs -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> RSTParser [[Block]] +simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices - colLines <- return [] -- TODO - let cols = map unlines . transpose $ firstLine : colLines - mapM (parseFromString (B.toList . mconcat <$> many plain)) cols + conLines <- many $ simpleTableRawLineWithEmptyCell indices + let cols = map unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] + mapM (parseFromString' parseBlocks) cols simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = - map trim + map trimr $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> RSTParser ([[Block]], [Alignment], [Int]) +simpleTableHeader :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -921,26 +1263,37 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (B.toList . mconcat <$> many plain)) $ - map trim rawHeads + heads <- mapM ( parseFromString' (mconcat <$> many plain) . trim) rawHeads return (heads, aligns, indices) -- Parse a simple table. -simpleTable :: Bool -- ^ Headerless table - -> RSTParser Blocks +simpleTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + let wrapIdFst (a, b, c) = (Identity a, b, c) + wrapId = fmap Identity + tbl <- runIdentity <$> tableWith + (wrapIdFst <$> simpleTableHeader headless) + (wrapId <$> simpleTableRow) + sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) - return $ B.singleton $ Table c a (replicate (length a) 0) h l + case B.toList tbl of + [Table c a _w h l] -> return $ B.singleton $ + Table c a (replicate (length a) 0) h l + _ -> + throwError $ PandocShouldNeverHappenError + "tableWith returned something unexpected" where sep = return () -- optional (simpleTableSep '-') -gridTable :: Bool -- ^ Headerless table - -> RSTParser Blocks -gridTable headerless = B.singleton - <$> gridTableWith (B.toList <$> parseBlocks) headerless +gridTable :: PandocMonad m + => Bool -- ^ Headerless table + -> RSTParser m Blocks +gridTable headerless = runIdentity <$> + gridTableWith (Identity <$> parseBlocks) headerless -table :: RSTParser Blocks +table :: PandocMonad m => RSTParser m Blocks table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" @@ -948,7 +1301,7 @@ table = gridTable False <|> simpleTable False <|> -- inline -- -inline :: RSTParser Inlines +inline :: PandocMonad m => RSTParser m Inlines inline = choice [ note -- can start with whitespace, so try before ws , whitespace , link @@ -964,29 +1317,30 @@ inline = choice [ note -- can start with whitespace, so try before ws , escapedChar , symbol ] <?> "inline" -parseInlineFromString :: String -> RSTParser Inlines -parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) +parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines +parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) -hyphens :: RSTParser Inlines +hyphens :: Monad m => RSTParser m Inlines hyphens = do result <- many1 (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Parser [Char] st Inlines +escapedChar :: Monad m => ParserT [Char] st m Inlines escapedChar = do c <- escaped anyChar - return $ if c == ' ' -- '\ ' is null in RST + return $ if c == ' ' || c == '\n' || c == '\r' + -- '\ ' is null in RST then mempty else B.str [c] -symbol :: RSTParser Inlines +symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars return $ B.str [result] -- parses inline code, between codeStart and codeEnd -code :: RSTParser Inlines +code :: Monad m => RSTParser m Inlines code = try $ do string "``" result <- manyTill anyChar (try (string "``")) @@ -994,7 +1348,7 @@ code = try $ do $ trim $ unwords $ lines result -- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: RSTParser a -> RSTParser a +atStart :: Monad m => RSTParser m a -> RSTParser m a atStart p = do pos <- getPosition st <- getState @@ -1002,11 +1356,11 @@ atStart p = do guard $ stateLastStrPos st /= Just pos p -emph :: RSTParser Inlines +emph :: PandocMonad m => RSTParser m Inlines emph = B.emph . trimInlines . mconcat <$> enclosed (atStart $ char '*') (char '*') inline -strong :: RSTParser Inlines +strong :: PandocMonad m => RSTParser m Inlines strong = B.strong . trimInlines . mconcat <$> enclosed (atStart $ string "**") (try $ string "**") inline @@ -1018,12 +1372,13 @@ strong = B.strong . trimInlines . mconcat <$> -- - Classes are silently discarded in addNewRole -- - Lacks sensible implementation for title-reference (which is the default) -- - Allows direct use of the :raw: role, rST only allows inherited use. -interpretedRole :: RSTParser Inlines +interpretedRole :: PandocMonad m => RSTParser m Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter renderRole contents Nothing role nullAttr -renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole :: PandocMonad m + => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ B.str contents "superscript" -> return $ B.superscript $ B.str contents @@ -1048,10 +1403,8 @@ renderRole contents fmt role attr = case role of case M.lookup custom customRoles of Just (newRole, newFmt, newAttr) -> renderRole contents newFmt newRole newAttr - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" - return $ B.str contents -- Undefined role + Nothing -> -- undefined role + return $ B.spanWith ("",[],[("role",role)]) (B.str contents) where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) @@ -1061,33 +1414,33 @@ renderRole contents fmt role attr = case role of pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" addClass :: String -> Attr -> Attr -addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) +addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues) -roleName :: RSTParser String +roleName :: PandocMonad m => RSTParser m String roleName = many1 (letter <|> char '-') -roleMarker :: RSTParser String +roleMarker :: PandocMonad m => RSTParser m String roleMarker = char ':' *> roleName <* char ':' -roleBefore :: RSTParser (String,String) +roleBefore :: PandocMonad m => RSTParser m (String,String) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: RSTParser (String,String) +roleAfter :: PandocMonad m => RSTParser m (String,String) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: RSTParser [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar -whitespace :: RSTParser Inlines +whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" -str :: RSTParser Inlines +str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) result <- many1 strChar @@ -1095,26 +1448,24 @@ str = do return $ B.str result -- an endline character that can be treated as a space, not a structural break -endline :: RSTParser Inlines +endline :: Monad m => RSTParser m Inlines endline = try $ do newline notFollowedBy blankline -- parse potential list-starts at beginning of line differently in a list: st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> + when (stateParserContext st == ListItemState) $ notFollowedBy (anyOrderedListMarker >> spaceChar) >> notFollowedBy' bulletListStart - else return () return B.softbreak -- -- links -- -link :: RSTParser Inlines +link :: PandocMonad m => RSTParser m Inlines link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: RSTParser Inlines +explicitLink :: PandocMonad m => RSTParser m Inlines explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code @@ -1128,61 +1479,73 @@ explicitLink = try $ do then B.str src else label' -- `link <google_>` is a reference link to _google! - (src',tit,attr) <- case reverse src of - '_':xs -> do - keyTable <- stateKeys <$> getState - let key = toKey $ reverse xs - case M.lookup key keyTable of - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ - show key - return ("","",nullAttr) - Just ((s,t),a) -> return (s,t,a) - _ -> return (src, "", nullAttr) + ((src',tit),attr) <- case reverse src of + '_':xs -> lookupKey [] (toKey (reverse xs)) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -referenceLink :: RSTParser Inlines +citationName :: PandocMonad m => RSTParser m String +citationName = do + raw <- citationMarker + return $ "[" ++ raw ++ "]" + +referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do - (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <* - char '_' - state <- getState - let keyTable = stateKeys state + ref <- (referenceName <|> citationName) <* char '_' + let label' = B.text ref let isAnonKey (Key ('_':_)) = True isAnonKey _ = False - key <- option (toKey $ stripTicks ref) $ + state <- getState + let keyTable = stateKeys state + key <- option (toKey ref) $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable - if null anonKeys - then mzero - else return (head anonKeys) - ((src,tit), attr) <- case M.lookup key keyTable of - Nothing -> do - pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ - show key - return (("",""),nullAttr) - Just val -> return val + case anonKeys of + [] -> mzero + (k:_) -> return k + ((src,tit), attr) <- lookupKey [] key -- if anonymous link, remove key so it won't be used again - when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } + when (isAnonKey key) $ updateState $ \s -> + s{ stateKeys = M.delete key keyTable } return $ B.linkWith attr src tit label' -autoURI :: RSTParser Inlines +-- We keep a list of oldkeys so we can detect lookup loops. +lookupKey :: PandocMonad m + => [Key] -> Key -> RSTParser m ((String, String), Attr) +lookupKey oldkeys key = do + pos <- getPosition + state <- getState + let keyTable = stateKeys state + case M.lookup key keyTable of + Nothing -> do + let Key key' = key + logMessage $ ReferenceNotFound key' pos + return (("",""),nullAttr) + -- check for keys of the form link_, which need to be resolved: + Just ((u@(_:_),""),_) | last u == '_' -> do + let rawkey = init u + let newkey = toKey rawkey + if newkey `elem` oldkeys + then do + logMessage $ CircularReference rawkey pos + return (("",""),nullAttr) + else lookupKey (key:oldkeys) newkey + Just val -> return val + +autoURI :: Monad m => RSTParser m Inlines autoURI = do (orig, src) <- uri return $ B.link src "" $ B.str orig -autoEmail :: RSTParser Inlines +autoEmail :: Monad m => RSTParser m Inlines autoEmail = do (orig, src) <- emailAddress return $ B.link src "" $ B.str orig -autoLink :: RSTParser Inlines +autoLink :: PandocMonad m => RSTParser m Inlines autoLink = autoURI <|> autoEmail -subst :: RSTParser Inlines +subst :: PandocMonad m => RSTParser m Inlines subst = try $ do (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline state <- getState @@ -1191,12 +1554,11 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - addWarning (Just pos) $ - "Could not find reference for " ++ show key + logMessage $ ReferenceNotFound (show key) pos return mempty Just target -> return target -note :: RSTParser Inlines +note :: PandocMonad m => RSTParser m Inlines note = try $ do optional whitespace ref <- noteMarker @@ -1206,8 +1568,7 @@ note = try $ do case lookup ref notes of Nothing -> do pos <- getPosition - addWarning (Just pos) $ - "Could not find note for " ++ show ref + logMessage $ ReferenceNotFound ref pos return mempty Just raw -> do -- We temporarily empty the note list while parsing the note, @@ -1215,8 +1576,8 @@ note = try $ do -- Note references inside other notes are allowed in reST, but -- not yet in this implementation. updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - let newnotes = if (ref == "*" || ref == "#") -- auto-numbered + contents <- parseFromString' parseBlocks raw + let newnotes = if ref == "*" || ref == "#" -- auto-numbered -- delete the note so the next auto-numbered note -- doesn't get the same contents: then deleteFirstsBy (==) notes [(ref,raw)] @@ -1224,20 +1585,20 @@ note = try $ do updateState $ \st -> st{ stateNotes = newnotes } return $ B.note contents -smart :: RSTParser Inlines +smart :: PandocMonad m => RSTParser m Inlines smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice [apostrophe, dash, ellipses] -singleQuoted :: RSTParser Inlines +singleQuoted :: PandocMonad m => RSTParser m Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -doubleQuoted :: RSTParser Inlines +doubleQuoted :: PandocMonad m => RSTParser m Inlines doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 76a25ad82..75e3f89eb 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -1,4 +1,7 @@ -{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> @@ -30,54 +33,50 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of twiki text to 'Pandoc' document. -} module Text.Pandoc.Readers.TWiki ( readTWiki - , readTWikiWithWarnings ) where -import Text.Pandoc.Definition +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isAlphaNum) +import qualified Data.Foldable as F +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Control.Monad -import Text.Printf (printf) -import Debug.Trace (trace) +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.XML (fromEntities) -import Data.Maybe (fromMaybe) -import Text.HTML.TagSoup -import Data.Char (isAlphaNum) -import qualified Data.Foldable as F -import Text.Pandoc.Error -- | Read twiki from an input string and return a Pandoc document. -readTWiki :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readTWiki opts s = - (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") - -readTWikiWithWarnings :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readTWikiWithWarnings opts s = - (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") - where parseTWikiWithWarnings = do - doc <- parseTWiki - warnings <- stateWarnings <$> getState - return (doc, warnings) - -type TWParser = Parser [Char] ParserState +readTWiki :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readTWiki opts s = do + res <- readWithM parseTWiki def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case res of + Left e -> throwError e + Right d -> return d + +type TWParser = ParserT [Char] ParserState -- -- utility functions -- -tryMsg :: String -> TWParser a -> TWParser a +tryMsg :: String -> TWParser m a -> TWParser m a tryMsg msg p = try p <?> msg -skip :: TWParser a -> TWParser () +skip :: TWParser m a -> TWParser m () skip parser = parser >> return () -nested :: TWParser a -> TWParser a +nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do nestlevel <- stateMaxNestingLevel <$> getState guard $ nestlevel > 0 @@ -86,7 +85,7 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: String -> TWParser (Attr, String) +htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) content <- manyTill anyChar (endtag <|> endofinput) @@ -103,23 +102,24 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) classes = maybe [] words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] -parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a]) +parseHtmlContentWithAttrs :: PandocMonad m + => String -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content return (attr, parsedContent) where - parseContent = parseFromString $ nested $ manyTill parser endOfContent + parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: String -> TWParser a -> TWParser [a] +parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd -- -- main parser -- -parseTWiki :: TWParser Pandoc +parseTWiki :: PandocMonad m => TWParser m Pandoc parseTWiki = do bs <- mconcat <$> many block spaces @@ -131,20 +131,16 @@ parseTWiki = do -- block parsers -- -block :: TWParser B.Blocks +block :: PandocMonad m => TWParser m B.Blocks block = do - tr <- getOption readerTrace - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> blockElements <|> para skipMany blankline - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + trace (take 60 $ show $ B.toList res) return res -blockElements :: TWParser B.Blocks +blockElements :: PandocMonad m => TWParser m B.Blocks blockElements = choice [ separator , header , verbatim @@ -155,10 +151,10 @@ blockElements = choice [ separator , noautolink ] -separator :: TWParser B.Blocks +separator :: PandocMonad m => TWParser m B.Blocks separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule -header :: TWParser B.Blocks +header :: PandocMonad m => TWParser m B.Blocks header = tryMsg "header" $ do string "---" level <- many1 (char '+') >>= return . length @@ -167,45 +163,47 @@ header = tryMsg "header" $ do skipSpaces content <- B.trimInlines . mconcat <$> manyTill inline newline attr <- registerHeader ("", classes, []) content - return $ B.headerWith attr level $ content + return $ B.headerWith attr level content -verbatim :: TWParser B.Blocks +verbatim :: PandocMonad m => TWParser m B.Blocks verbatim = (htmlElement "verbatim" <|> htmlElement "pre") >>= return . (uncurry B.codeBlockWith) -literal :: TWParser B.Blocks +literal :: PandocMonad m => TWParser m B.Blocks literal = htmlElement "literal" >>= return . rawBlock where format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: String -> TWParser B.Blocks +list :: PandocMonad m => String -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: String -> TWParser B.Blocks +definitionList :: PandocMonad m => String -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " elements <- many $ parseDefinitionListItem (prefix ++ concat indent) return $ B.definitionList elements where - parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks]) + parseDefinitionListItem :: PandocMonad m + => String -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do string (indent ++ "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return $ (mconcat term, [line]) -bulletList :: String -> TWParser B.Blocks +bulletList :: PandocMonad m => String -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: String -> TWParser B.Blocks +orderedList :: PandocMonad m => String -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") -parseList :: String -> TWParser Char -> TWParser a -> TWParser B.Blocks +parseList :: PandocMonad m + => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do (indent, style) <- lookAhead $ string prefix *> listStyle <* delim blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) @@ -222,10 +220,12 @@ parseList prefix marker delim = do style <- marker return (concat indent, style) -parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks +parseListItem :: (PandocMonad m, Show a) + => String -> TWParser m a -> TWParser m B.Blocks parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker -listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks +listItemLine :: (PandocMonad m, Show a) + => String -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat where lineContent = do @@ -235,14 +235,14 @@ listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat filterSpaces = reverse . dropWhile (== ' ') . reverse listContinuation = notFollowedBy (string prefix >> marker) >> string " " >> lineContent - parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= return . B.plain . mconcat nestedList = list prefix lastNewline = try $ char '\n' <* eof newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList -table :: TWParser B.Blocks +table :: PandocMonad m => TWParser m B.Blocks table = try $ do tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip rows <- many1 tableParseRow @@ -254,7 +254,7 @@ table = try $ do columns rows = replicate (columCount rows) mempty columCount rows = length $ head rows -tableParseHeader :: TWParser ((Alignment, Double), B.Blocks) +tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks) tableParseHeader = try $ do char '|' leftSpaces <- many spaceChar >>= return . length @@ -270,27 +270,27 @@ tableParseHeader = try $ do | left > right = (AlignRight, 0) | otherwise = (AlignLeft, 0) -tableParseRow :: TWParser [B.Blocks] +tableParseRow :: PandocMonad m => TWParser m [B.Blocks] tableParseRow = many1Till tableParseColumn newline -tableParseColumn :: TWParser B.Blocks +tableParseColumn :: PandocMonad m => TWParser m B.Blocks tableParseColumn = char '|' *> skipSpaces *> tableColumnContent (skipSpaces >> char '|') <* skipSpaces <* optional tableEndOfRow -tableEndOfRow :: TWParser Char +tableEndOfRow :: PandocMonad m => TWParser m Char tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' -tableColumnContent :: TWParser a -> TWParser B.Blocks +tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat where content = continuation <|> inline continuation = try $ char '\\' >> newline >> return mempty -blockQuote :: TWParser B.Blocks +blockQuote :: PandocMonad m => TWParser m B.Blocks blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat -noautolink :: TWParser B.Blocks +noautolink :: PandocMonad m => TWParser m B.Blocks noautolink = do (_, content) <- htmlElement "noautolink" st <- getState @@ -299,9 +299,9 @@ noautolink = do setState $ st{ stateAllowLinks = True } return $ mconcat blocks where - parseContent = parseFromString $ many $ block + parseContent = parseFromString' $ many $ block -para :: TWParser B.Blocks +para :: PandocMonad m => TWParser m B.Blocks para = many1Till inline endOfParaElement >>= return . result . mconcat where endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement @@ -317,7 +317,7 @@ para = many1Till inline endOfParaElement >>= return . result . mconcat -- inline parsers -- -inline :: TWParser B.Inlines +inline :: PandocMonad m => TWParser m B.Inlines inline = choice [ whitespace , br , macro @@ -338,36 +338,39 @@ inline = choice [ whitespace , symbol ] <?> "inline" -whitespace :: TWParser B.Inlines +whitespace :: PandocMonad m => TWParser m B.Inlines whitespace = (lb <|> regsp) >>= return where lb = try $ skipMany spaceChar >> linebreak >> return B.space regsp = try $ skipMany1 spaceChar >> return B.space -br :: TWParser B.Inlines +br :: PandocMonad m => TWParser m B.Inlines br = try $ string "%BR%" >> return B.linebreak -linebreak :: TWParser B.Inlines +linebreak :: PandocMonad m => TWParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) where lastNewline = eof >> return mempty innerNewline = return B.space -between :: (Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c +between :: (Monoid c, PandocMonad m, Show b) + => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) + -> TWParser m c between start end p = mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) -enclosed :: (Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b +enclosed :: (Monoid b, PandocMonad m, Show a) + => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof endSpace = (spaceChar <|> newline) >> return B.space -macro :: TWParser B.Inlines +macro :: PandocMonad m => TWParser m B.Inlines macro = macroWithParameters <|> withoutParameters where withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan emptySpan name = buildSpan name [] mempty -macroWithParameters :: TWParser B.Inlines +macroWithParameters :: PandocMonad m => TWParser m B.Inlines macroWithParameters = try $ do char '%' name <- macroName @@ -382,22 +385,22 @@ buildSpan className kvs = B.spanWith attrs additionalClasses = maybe [] words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: TWParser String +macroName :: PandocMonad m => TWParser m String macroName = do first <- letter rest <- many $ alphaNum <|> char '_' return (first:rest) -attributes :: TWParser (String, [(String, String)]) +attributes :: PandocMonad m => TWParser m (String, [(String, String)]) attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= return . foldr (either mkContent mkKvs) ([], []) where spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkContent c ([], kvs) = (c, kvs) + mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) mkKvs kv (cont, rest) = (cont, (kv : rest)) -attribute :: TWParser (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either String (String, String)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -411,49 +414,51 @@ attribute = withKey <|> withoutKey | allowSpaces == True = many1 $ noneOf "}" | otherwise = many1 $ noneOf " }" -nestedInlines :: Show a => TWParser a -> TWParser B.Inlines +nestedInlines :: (Show a, PandocMonad m) + => TWParser m a -> TWParser m B.Inlines nestedInlines end = innerSpace <|> nestedInline where innerSpace = try $ whitespace <* (notFollowedBy end) nestedInline = notFollowedBy whitespace >> nested inline -strong :: TWParser B.Inlines +strong :: PandocMonad m => TWParser m B.Inlines strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong -strongHtml :: TWParser B.Inlines +strongHtml :: PandocMonad m => TWParser m B.Inlines strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) >>= return . B.strong . mconcat -strongAndEmph :: TWParser B.Inlines +strongAndEmph :: PandocMonad m => TWParser m B.Inlines strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong -emph :: TWParser B.Inlines +emph :: PandocMonad m => TWParser m B.Inlines emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph -emphHtml :: TWParser B.Inlines +emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) >>= return . B.emph . mconcat -nestedString :: Show a => TWParser a -> TWParser String +nestedString :: (Show a, PandocMonad m) + => TWParser m a -> TWParser m String nestedString end = innerSpace <|> (count 1 nonspaceChar) where innerSpace = try $ many1 spaceChar <* notFollowedBy end -boldCode :: TWParser B.Inlines +boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities -htmlComment :: TWParser B.Inlines +htmlComment :: PandocMonad m => TWParser m B.Inlines htmlComment = htmlTag isCommentTag >> return mempty -code :: TWParser B.Inlines +code :: PandocMonad m => TWParser m B.Inlines code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities -codeHtml :: TWParser B.Inlines +codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content -autoLink :: TWParser B.Inlines +autoLink :: PandocMonad m => TWParser m B.Inlines autoLink = try $ do state <- getState guard $ stateAllowLinks state @@ -467,36 +472,36 @@ autoLink = try $ do | c == '/' = True | otherwise = isAlphaNum c -str :: TWParser B.Inlines +str :: PandocMonad m => TWParser m B.Inlines str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str -nop :: TWParser B.Inlines +nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (skip exclamation <|> skip nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" followContent = many1 nonspaceChar >>= return . B.str . fromEntities -symbol :: TWParser B.Inlines +symbol :: PandocMonad m => TWParser m B.Inlines symbol = count 1 nonspaceChar >>= return . B.str -smart :: TWParser B.Inlines +smart :: PandocMonad m => TWParser m B.Inlines smart = do - getOption readerSmart >>= guard + guardEnabled Ext_smart doubleQuoted <|> singleQuoted <|> choice [ apostrophe , dash , ellipses ] -singleQuoted :: TWParser B.Inlines +singleQuoted :: PandocMonad m => TWParser m B.Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= (return . B.singleQuoted . B.trimInlines . mconcat) -doubleQuoted :: TWParser B.Inlines +doubleQuoted :: PandocMonad m => TWParser m B.Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) @@ -504,7 +509,7 @@ doubleQuoted = try $ do return (B.doubleQuoted $ B.trimInlines contents)) <|> (return $ (B.str "\8220") B.<> contents) -link :: TWParser B.Inlines +link :: PandocMonad m => TWParser m B.Inlines link = try $ do st <- getState guard $ stateAllowLinks st @@ -513,13 +518,13 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: TWParser (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) linkText = do string "[[" url <- many1Till anyChar (char ']') - content <- option [B.str url] linkContent + content <- option (B.str url) (mconcat <$> linkContent) char ']' - return (url, "", mconcat content) + return (url, "", content) where linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent - parseLinkContent = parseFromString $ many1 inline + parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs deleted file mode 100644 index e5778b123..000000000 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ /dev/null @@ -1,48 +0,0 @@ -{- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of TeX math to a list of 'Pandoc' inline elements. --} -module Text.Pandoc.Readers.TeXMath ( texMathToInlines ) where - -import Text.Pandoc.Definition -import Text.TeXMath - --- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. --- Defaults to raw formula between @$@ or @$$@ characters if entire formula --- can't be converted. -texMathToInlines :: MathType - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> [Inline] -texMathToInlines mt inp = - case writePandoc dt `fmap` readTeX inp of - Right (Just ils) -> ils - _ -> [Str (delim ++ inp ++ delim)] - where (dt, delim) = case mt of - DisplayMath -> (DisplayBlock, "$$") - InlineMath -> (DisplayInline, "$") - diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8dbbf7be2..30bb6a715 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,6 +1,6 @@ {- -Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' - and John MacFarlane +Copyright (C) 2010-2012 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' + 2010-2018 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2012 Paul Rivier + 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> @@ -51,44 +52,42 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where +import Control.Monad (guard, liftM) +import Control.Monad.Except (throwError) +import Data.Char (digitToInt, isUpper) +import Data.List (intercalate, intersperse, transpose) +import Data.Monoid ((<>)) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup (Tag (..), fromAttrib) +import Text.HTML.TagSoup.Match +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.CSS import Text.Pandoc.Definition -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag ) -import Text.Pandoc.Shared (trim) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.HTML.TagSoup (fromAttrib, Tag(..)) -import Text.HTML.TagSoup.Match -import Data.List ( intercalate, transpose, intersperse ) -import Data.Char ( digitToInt, isUpper ) -import Control.Monad ( guard, liftM, when ) -import Data.Monoid ((<>)) -import Text.Printf -import Debug.Trace (trace) -import Text.Pandoc.Error +import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) +import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan) -- | Parse a Textile text and return a Pandoc document. -readTextile :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError Pandoc -readTextile opts s = - (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") +readTextile :: PandocMonad m + => ReaderOptions -- ^ Reader options + -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> m Pandoc +readTextile opts s = do + parsed <- readWithM parseTextile def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case parsed of + Right result -> return result + Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: Parser [Char] ParserState Pandoc +parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default, - -- but we do not enable smart punctuation unless it is explicitly - -- asked for, for better conversion to other light markup formats - oldOpts <- stateOptions `fmap` getState - updateState $ \state -> state{ stateOptions = - oldOpts{ readerParseRaw = True - , readerOldDashes = True - } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes @@ -103,15 +102,15 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: Parser [Char] ParserState [Char] +noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: Parser [Char] ParserState [Char] +noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker optional blankline - contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock) + contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock) endPos <- getPosition let newnote = (ref, contents ++ "\n") st <- getState @@ -121,11 +120,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: Parser [Char] ParserState Blocks +parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [Parser [Char] ParserState Blocks] +blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -140,26 +139,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: Parser [Char] ParserState Blocks +block :: PandocMonad m => ParserT [Char] ParserState m Blocks block = do res <- choice blockParsers <?> "block" - pos <- getPosition - tr <- getOption readerTrace - when tr $ - trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList res)) (return ()) + trace (take 60 $ show $ B.toList res) return res -commentBlock :: Parser [Char] ParserState Blocks +commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: Parser [Char] ParserState Blocks +codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: Parser [Char] ParserState Blocks +codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -179,11 +174,10 @@ trimTrailingNewlines :: String -> String trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: Parser [Char] ParserState Blocks +codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) - optional blanklines -- drop leading newline if any let result'' = case result' of '\n':xs -> xs @@ -198,7 +192,7 @@ codeBlockPre = try $ do return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: Parser [Char] ParserState Blocks +header :: PandocMonad m => ParserT [Char] ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -210,14 +204,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: Parser [Char] ParserState Blocks +blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: Parser [Char] st Blocks +hrule :: PandocMonad m => ParserT [Char] st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -232,66 +226,67 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: Parser [Char] ParserState Blocks +anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> Parser [Char] ParserState Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace - p <- mconcat <$> many listInline + contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> + try (newline >> codeBlockPre)) newline sublist <- option mempty (anyListAtDepth (depth + 1)) - return $ (B.plain p) <> sublist + return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: Parser [Char] ParserState Blocks +definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: Parser [Char] ParserState () +listStart :: PandocMonad m => ParserT [Char] ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: Char -> Parser [Char] st () +genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: Parser [Char] ParserState () +basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: Parser [Char] ParserState Inlines +definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -300,34 +295,30 @@ definitionListStart = try $ do <|> try (lookAhead (() <$ string ":=")) ) -listInline :: Parser [Char] ParserState Inlines -listInline = try (notFollowedBy newline >> inline) - <|> try (endline <* notFollowedBy listStart) - -- | A definition list item in textile begins with '- ', followed by -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: Parser [Char] ParserState [Blocks] + where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) - $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline - multilineDef :: Parser [Char] ParserState [Blocks] + $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline + multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) -- this ++ "\n\n" does not look very good - ds <- parseFromString parseBlocks (s ++ "\n\n") + ds <- parseFromString' parseBlocks (s ++ "\n\n") return [ds] -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: Parser [Char] ParserState Blocks +rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -335,14 +326,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: Parser [Char] ParserState Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: Parser [Char] ParserState Blocks +para :: PandocMonad m => ParserT [Char] ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -353,7 +344,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: Parser [Char] ParserState (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -366,18 +357,18 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' - (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes + (isHeader, alignment) <- option (False, AlignDefault) cellAttributes notFollowedBy blankline raw <- trim <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) - content <- mconcat <$> parseFromString (many inline) raw + content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: Parser [Char] ParserState [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -387,7 +378,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: Parser [Char] ParserState Blocks +table :: PandocMonad m => ParserT [Char] ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -395,8 +386,8 @@ table = try $ do _ <- attributes char '.' rawcapt <- trim <$> anyLine - parseFromString (mconcat <$> many inline) rawcapt - rawrows <- many1 $ (skipMany ignorableRow) >> tableRow + parseFromString' (mconcat <$> many inline) rawcapt + rawrows <- many1 $ skipMany ignorableRow >> tableRow skipMany ignorableRow blanklines let (headers, rows) = case rawrows of @@ -411,7 +402,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: Parser [Char] ParserState () +ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -420,7 +411,7 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: String -> Parser [Char] ParserState () +explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () explicitBlockStart name = try $ do string name attributes @@ -430,9 +421,10 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. -maybeExplicitBlock :: String -- ^ block tag name - -> Parser [Char] ParserState Blocks -- ^ implicit block - -> Parser [Char] ParserState Blocks +maybeExplicitBlock :: PandocMonad m + => String -- ^ block tag name + -> ParserT [Char] ParserState m Blocks -- ^ implicit block + -> ParserT [Char] ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -445,12 +437,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: Parser [Char] ParserState Inlines -inline = do - choice inlineParsers <?> "inline" +inline :: PandocMonad m => ParserT [Char] ParserState m Inlines +inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: [Parser [Char] ParserState Inlines] +inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -470,13 +461,13 @@ inlineParsers = [ str ] -- | Inline markups -inlineMarkup :: Parser [Char] ParserState Inlines +inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph , simpleInline (char '*') B.strong , simpleInline (char '_') B.emph - , simpleInline (char '+') B.emph -- approximates underline + , simpleInline (char '+') underlineSpan , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout , simpleInline (char '^') B.superscript , simpleInline (char '~') B.subscript @@ -484,35 +475,35 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: Parser [Char] st Inlines +mark :: PandocMonad m => ParserT [Char] st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: Parser [Char] st Inlines +reg :: PandocMonad m => ParserT [Char] st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: Parser [Char] st Inlines +tm :: PandocMonad m => ParserT [Char] st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: Parser [Char] st Inlines +copy :: PandocMonad m => ParserT [Char] st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: Parser [Char] ParserState Inlines +note :: PandocMonad m => ParserT [Char] ParserState m Inlines note = try $ do - ref <- (char '[' *> many1 digit <* char ']') + ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState case lookup ref notes of - Nothing -> fail "note not found" - Just raw -> B.note <$> parseFromString parseBlocks raw + Nothing -> fail "note not found" + Just raw -> B.note <$> parseFromString' parseBlocks raw -- | Special chars markupChars :: [Char] @@ -530,22 +521,22 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: Parser [Char] ParserState String +hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) return $ intercalate "-" (x:xs) -wordChunk :: Parser [Char] ParserState String +wordChunk :: PandocMonad m => ParserT [Char] ParserState m String wordChunk = try $ do hd <- noneOf wordBoundaries - tl <- many ( (noneOf wordBoundaries) <|> + tl <- many ( noneOf wordBoundaries <|> try (notFollowedBy' note *> oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) return $ hd:tl -- | Any string -str :: Parser [Char] ParserState Inlines +str :: PandocMonad m => ParserT [Char] ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -558,11 +549,11 @@ str = do return $ B.str fullStr -- | Some number of space chars -whitespace :: Parser [Char] st Inlines +whitespace :: PandocMonad m => ParserT [Char] st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: Parser [Char] ParserState Inlines +endline :: PandocMonad m => ParserT [Char] ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -570,18 +561,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: Parser [Char] ParserState Inlines +rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: Parser [Char] ParserState Inlines +rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - B.singleton <$> rawLaTeXInline + B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: Parser [Char] ParserState Inlines +link :: PandocMonad m => ParserT [Char] ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -591,8 +582,9 @@ link = try $ do char ':' let stop = if bracketed then char ']' - else lookAhead $ space <|> - try (oneOf "!.,;:" *> (space <|> newline)) + else lookAhead $ space <|> eof' <|> + try (oneOf "!.,;:" *> + (space <|> newline <|> eof')) url <- many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr @@ -600,7 +592,7 @@ link = try $ do else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: Parser [Char] ParserState Inlines +image :: PandocMonad m => ParserT [Char] ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes @@ -612,50 +604,51 @@ image = try $ do char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: Parser [Char] ParserState Inlines +escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: Parser [Char] ParserState Inlines +escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedEqs = B.str <$> - (try $ string "==" *> manyTill anyChar' (try $ string "==")) + try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: Parser [Char] ParserState Inlines +escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines escapedTag = B.str <$> - (try $ string "<notextile>" *> + try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: Parser [Char] ParserState Inlines +symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines symbol = B.str . singleton <$> (notFollowedBy newline *> notFollowedBy rawHtmlBlock *> oneOf wordBoundaries) -- | Inline code -code :: Parser [Char] ParserState Inlines +code :: PandocMonad m => ParserT [Char] ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: Parser [Char] ParserState Char +anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char anyChar' = - satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline) + satisfy (/='\n') <|> + try (char '\n' <* notFollowedBy blankline) -code1 :: Parser [Char] ParserState Inlines +code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines code1 = B.code <$> surrounded (char '@') anyChar' -code2 :: Parser [Char] ParserState Inlines +code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: Parser [Char] ParserState Attr -attributes = (foldl (flip ($)) ("",[],[])) <$> +attributes :: PandocMonad m => ParserT [Char] ParserState m Attr +attributes = foldl (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: Parser [Char] ParserState (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> @@ -664,11 +657,11 @@ specialAttribute = do notFollowedBy spaceChar return $ addStyle ("text-align:" ++ alignStr) -attribute :: Parser [Char] ParserState (Attr -> Attr) +attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: Parser [Char] ParserState (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' ws <- words `fmap` manyTill anyChar' (char ')') @@ -679,7 +672,7 @@ classIdAttr = try $ do -- (class class #id) classes' -> return $ \(_,_,keyvals) -> ("",classes',keyvals) -styleAttr :: Parser [Char] ParserState (Attr -> Attr) +styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' return $ addStyle style @@ -690,21 +683,23 @@ addStyle style (id',classes,keyvals) = where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] -langAttr :: Parser [Char] ParserState (Attr -> Attr) +langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. -surrounded :: Parser [Char] st t -- ^ surrounding parser - -> Parser [Char] st a -- ^ content parser (to be used repeatedly) - -> Parser [Char] st [a] +surrounded :: (PandocMonad m, Show t) + => ParserT [Char] st m t -- ^ surrounding parser + -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) + -> ParserT [Char] st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) -simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline :: PandocMonad m + => ParserT [Char] ParserState m t -- ^ surrounding parser + -> (Inlines -> Inlines) -- ^ Inline constructor + -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -718,7 +713,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: Parser [Char] ParserState Inlines +groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -731,3 +726,5 @@ groupedInlineMarkup = try $ do singleton :: a -> [a] singleton x = [x] +eof' :: Monad m => ParserT [Char] s m Char +eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs new file mode 100644 index 000000000..a92f7bed2 --- /dev/null +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -0,0 +1,654 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE TypeSynonymInstances #-} + +{- | + Module : Text.Pandoc.Readers.TikiWiki + Copyright : Copyright (C) 2017 Robin Lee Powell + License : GPLv2 + + Maintainer : Robin Lee Powell <robinleepowell@gmail.com> + Stability : alpha + Portability : portable + +Conversion of TikiWiki text to 'Pandoc' document. +-} + +module Text.Pandoc.Readers.TikiWiki ( readTikiWiki + ) where + +import Control.Monad +import Control.Monad.Except (throwError) +import qualified Data.Foldable as F +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (CommonState (..), PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Logging (Verbosity (..)) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.XML (fromEntities) +import Text.Printf (printf) + +-- | Read TikiWiki from an input string and return a Pandoc document. +readTikiWiki :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readTikiWiki opts s = do + res <- readWithM parseTikiWiki def{ stateOptions = opts } + (T.unpack (crFilter s) ++ "\n\n") + case res of + Left e -> throwError e + Right d -> return d + +type TikiWikiParser = ParserT [Char] ParserState + +-- +-- utility functions +-- + +tryMsg :: PandocMonad m => String -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> msg + +skip :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m () +skip parser = Control.Monad.void parser + +nested :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +-- +-- main parser +-- + +parseTikiWiki :: PandocMonad m => TikiWikiParser m Pandoc +parseTikiWiki = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + +block :: PandocMonad m => TikiWikiParser m B.Blocks +block = do + verbosity <- getsCommonState stVerbosity + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + when (verbosity >= INFO) $ + trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + return res + +blockElements :: PandocMonad m => TikiWikiParser m B.Blocks +blockElements = choice [ table + , hr + , header + , mixedList + , definitionList + , codeMacro + ] + +-- top +-- ---- +-- bottom +-- +-- ---- +-- +hr :: PandocMonad m => TikiWikiParser m B.Blocks +hr = try $ do + string "----" + many (char '-') + newline + return B.horizontalRule + +-- ! header +-- +-- !! header level two +-- +-- !!! header level 3 +-- +header :: PandocMonad m => TikiWikiParser m B.Blocks +header = tryMsg "header" $ do + level <- fmap length (many1 (char '!')) + guard $ level <= 6 + skipSpaces + content <- B.trimInlines . mconcat <$> manyTill inline newline + attr <- registerHeader nullAttr content + return $B.headerWith attr level content + +tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] +tableRow = try $ do +-- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) +-- return $ map (B.plain . mconcat) row + row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + return $ map B.plain row + where + parseColumn x = do + parsed <- parseFromString (many1 inline) x + return $ mconcat parsed + + + +-- Tables: +-- +-- ||foo|| +-- +-- ||row1-column1|row1-column2||row2-column1|row2-column2|| +-- +-- ||row1-column1|row1-column2 +-- row2-column1|row2-column2|| +-- +-- ||row1-column1|row1-column2 +-- row2-column1|row2-column2||row3-column1|row3-column2|| +-- +-- || Orange | Apple | more +-- Bread | Pie | more +-- Butter | Ice cream | and more || +-- +table :: PandocMonad m => TikiWikiParser m B.Blocks +table = try $ do + string "||" + rows <- sepBy1 tableRow (try $ string "\n" <|> (string "||" <* notFollowedBy (string "\n"))) + string "||" + newline + -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows + return $B.simpleTable (headers rows) rows + where + -- The headers are as many empty srings as the number of columns + -- in the first row + headers rows = map (B.plain . B.str) $replicate (length $ head rows) "" + +para :: PandocMonad m => TikiWikiParser m B.Blocks +para = fmap (result . mconcat) ( many1Till inline endOfParaElement) + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> skip blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + +-- ;item 1: definition 1 +-- ;item 2: definition 2-1 +-- + definition 2-2 +-- ;item ''3'': definition ''3'' +-- +definitionList :: PandocMonad m => TikiWikiParser m B.Blocks +definitionList = tryMsg "definitionList" $ do + elements <-many1 parseDefinitionListItem + return $ B.definitionList elements + where + parseDefinitionListItem :: PandocMonad m => TikiWikiParser m (B.Inlines, [B.Blocks]) + parseDefinitionListItem = do + skipSpaces >> char ';' <* skipSpaces + term <- many1Till inline $ char ':' <* skipSpaces + line <- listItemLine 1 + return (mconcat term, [B.plain line]) + +data ListType = None | Numbered | Bullet deriving (Ord, Eq, Show) + +data ListNesting = LN { lntype :: ListType, lnnest :: Int } deriving (Ord, Eq, Show) + +-- The first argument is a stack (most recent == head) of our list +-- nesting status; the list type and the nesting level; if we're in +-- a number list in a bullet list it'd be +-- [LN Numbered 2, LN Bullet 1] +-- +-- Mixed list example: +-- +-- # one +-- # two +-- ** two point one +-- ** two point two +-- # three +-- # four +-- +mixedList :: PandocMonad m => TikiWikiParser m B.Blocks +mixedList = try $ do + items <- try $ many1 listItem + return $ mconcat $ fixListNesting $ spanFoldUpList (LN None 0) items + +-- See the "Handling Lists" section of DESIGN-CODE for why this +-- function exists. It's to post-process the lists and do some +-- mappends. +-- +-- We need to walk the tree two items at a time, so we can see what +-- we're going to join *to* before we get there. +-- +-- Because of that, it seemed easier to do it by hand than to try to +-- figre out a fold or something. +fixListNesting :: [B.Blocks] -> [B.Blocks] +fixListNesting [] = [] +fixListNesting [first] = [recurseOnList first] +-- fixListNesting nestall | trace ("\n\nfixListNesting: " ++ (show nestall)) False = undefined +-- fixListNesting nestall@(first:second:rest) = +fixListNesting (first:second:rest) = + let secondBlock = head $ B.toList second in + case secondBlock of + BulletList _ -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest + OrderedList _ _ -> fixListNesting $ mappend (recurseOnList first) (recurseOnList second) : rest + _ -> recurseOnList first : fixListNesting (second:rest) + +-- This function walks the Block structure for fixListNesting, +-- because it's a bit complicated, what with converting to and from +-- lists and so on. +recurseOnList :: B.Blocks -> B.Blocks +-- recurseOnList item | trace ("rOL: " ++ (show $ length $ B.toList item) ++ ", " ++ (show $ B.toList item)) False = undefined +recurseOnList items + | length (B.toList items) == 1 = + let itemBlock = head $ B.toList items in + case itemBlock of + BulletList listItems -> B.bulletList $ fixListNesting $ map B.fromList listItems + OrderedList _ listItems -> B.orderedList $ fixListNesting $ map B.fromList listItems + _ -> items + + -- The otherwise works because we constructed the blocks, and we + -- know for a fact that no mappends have been run on them; each + -- Blocks consists of exactly one Block. + -- + -- Anything that's not like that has already been processed by + -- fixListNesting; don't bother to process it again. + | otherwise = items + + +-- Turn the list if list items into a tree by breaking off the first +-- item, splitting the remainder of the list into items that are in +-- the tree of the first item and those that aren't, wrapping the +-- tree of the first item in its list time, and recursing on both +-- sections. +spanFoldUpList :: ListNesting -> [(ListNesting, B.Blocks)] -> [B.Blocks] +spanFoldUpList _ [] = [] +spanFoldUpList ln [first] = + listWrap ln (fst first) [snd first] +spanFoldUpList ln (first:rest) = + let (span1, span2) = span (splitListNesting (fst first)) rest + newTree1 = listWrap ln (fst first) $ snd first : spanFoldUpList (fst first) span1 + newTree2 = spanFoldUpList ln span2 + in + newTree1 ++ newTree2 + +-- Decide if the second item should be in the tree of the first +-- item, which is true if the second item is at a deeper nesting +-- level and of the same type. +splitListNesting :: ListNesting -> (ListNesting, B.Blocks) -> Bool +splitListNesting ln1 (ln2, _) + | lnnest ln1 < lnnest ln2 = + True + | ln1 == ln2 = + True + | otherwise = + False + +-- If we've moved to a deeper nesting level, wrap the new level in +-- the appropriate type of list. +listWrap :: ListNesting -> ListNesting -> [B.Blocks] -> [B.Blocks] +listWrap upperLN curLN retTree = + if upperLN == curLN then + retTree + else + case lntype curLN of + None -> [] + Bullet -> [B.bulletList retTree] + Numbered -> [B.orderedList retTree] + +listItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +listItem = choice [ + bulletItem + , numberedItem + ] + + +-- * Start each line +-- * with an asterisk (*). +-- ** More asterisks gives deeper +-- *** and deeper levels. +-- +bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +bulletItem = try $ do + prefix <- many1 $ char '*' + many1 $ char ' ' + content <- listItemLine (length prefix) + return (LN Bullet (length prefix), B.plain content) + +-- # Start each line +-- # with a number (1.). +-- ## More number signs gives deeper +-- ### and deeper +-- +numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks) +numberedItem = try $ do + prefix <- many1 $ char '#' + many1 $ char ' ' + content <- listItemLine (length prefix) + return (LN Numbered (length prefix), B.plain content) + +listItemLine :: PandocMonad m => Int -> TikiWikiParser m B.Inlines +listItemLine nest = lineContent >>= parseContent + where + lineContent = do + content <- anyLine + continuation <- optionMaybe listContinuation + return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation + filterSpaces = reverse . dropWhile (== ' ') . reverse + listContinuation = string (replicate nest '+') >> lineContent + parseContent x = do + parsed <- parseFromString (many1 inline) x + return $ mconcat parsed + +-- Turn the CODE macro attributes into Pandoc code block attributes. +mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs rawAttrs = ("", classes, rawAttrs) + where + -- "colors" is TikiWiki CODE macro for "name of language to do + -- highlighting for"; turn the value into a class + color = fromMaybe "" $ lookup "colors" rawAttrs + -- ln = 1 means line numbering. It's also the default. So we + -- emit numberLines as a class unless ln = 0 + lnRaw = fromMaybe "1" $ lookup "ln" rawAttrs + ln = if lnRaw == "0" then + "" + else + "numberLines" + classes = filter (/= "") [color, ln] + +codeMacro :: PandocMonad m => TikiWikiParser m B.Blocks +codeMacro = try $ do + string "{CODE(" + rawAttrs <- macroAttrs + string ")}" + body <- manyTill anyChar (try (string "{CODE}")) + newline + if not (null rawAttrs) + then + return $ B.codeBlockWith (mungeAttrs rawAttrs) body + else + return $ B.codeBlock body + + +-- +-- inline parsers +-- + +inline :: PandocMonad m => TikiWikiParser m B.Inlines +inline = choice [ whitespace + , noparse + , strong + , emph + , nbsp + , image + , htmlComment + , strikeout + , code + , wikiLink + , notExternalLink + , externalLink + , superTag + , superMacro + , subTag + , subMacro + , escapedChar + , colored + , centered + , underlined + , boxed + , breakChars + , str + , symbol + ] <?> "inline" + +whitespace :: PandocMonad m => TikiWikiParser m B.Inlines +whitespace = lb <|> regsp + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +nbsp :: PandocMonad m => TikiWikiParser m B.Inlines +nbsp = try $ do + string "~hs~" + return $ B.str " NOT SUPPORTED BEGIN: ~hs~ (non-breaking space) :END " + +-- UNSUPPORTED, as the desired behaviour (that the data be +-- *retained* and stored as a comment) doesn't exist in calibre, and +-- silently throwing data out seemed bad. +htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines +htmlComment = try $ do + string "~hc~" + inner <- many1 $ noneOf "~" + string "~/hc~" + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + +linebreak :: PandocMonad m => TikiWikiParser m B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +between :: (Monoid c, PandocMonad m, Show b) => TikiWikiParser m a -> TikiWikiParser m b -> (TikiWikiParser m b -> TikiWikiParser m c) -> TikiWikiParser m c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Monoid b, PandocMonad m, Show a) => TikiWikiParser m a -> (TikiWikiParser m a -> TikiWikiParser m b) -> TikiWikiParser m b +enclosed sep p = between sep (try $ sep <* endMarker) p + where + endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|'_") <|> eof + endSpace = (spaceChar <|> newline) >> return B.space + + +nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* notFollowedBy end + nestedInline = notFollowedBy whitespace >> nested inline + +-- {img attId="39" imalign="right" link="http://info.tikiwiki.org" alt="Panama Hat"} +-- +-- {img attId="37", thumb="mouseover", styleimage="border", desc="150"} +-- +-- {img src="img/wiki_up/393px-Pears.jpg" thumb="y" imalign="center" stylebox="border" button="y" desc="Pretty pears" max="200" rel="box"} +-- +image :: PandocMonad m => TikiWikiParser m B.Inlines +image = try $ do + string "{img " + rawAttrs <- sepEndBy1 imageAttr spaces + string "}" + let src = fromMaybe "" $ lookup "src" rawAttrs + let title = fromMaybe src $ lookup "desc" rawAttrs + let alt = fromMaybe title $ lookup "alt" rawAttrs + let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs + if not (null src) + then + return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) + else + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END " + where + printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + +imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr = try $ do + key <- many1 (noneOf "=} \t\n") + char '=' + optional $ char '"' + value <- many1 (noneOf "}\"\n") + optional $ char '"' + optional $ char ',' + return (key, value) + + +-- __strong__ +strong :: PandocMonad m => TikiWikiParser m B.Inlines +strong = try $ fmap B.strong (enclosed (string "__") nestedInlines) + +-- ''emph'' +emph :: PandocMonad m => TikiWikiParser m B.Inlines +emph = try $ fmap B.emph (enclosed (string "''") nestedInlines) + +-- ~246~ +escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines +escapedChar = try $ do + string "~" + inner <- many1 $ oneOf "0123456789" + string "~" + return $B.str [toEnum (read inner :: Int) :: Char] + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +centered :: PandocMonad m => TikiWikiParser m B.Inlines +centered = try $ do + string "::" + inner <- many1 $ noneOf ":\n" + string "::" + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +colored :: PandocMonad m => TikiWikiParser m B.Inlines +colored = try $ do + string "~~" + inner <- many1 $ noneOf "~\n" + string "~~" + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +underlined :: PandocMonad m => TikiWikiParser m B.Inlines +underlined = try $ do + string "===" + inner <- many1 $ noneOf "=\n" + string "===" + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + +-- UNSUPPORTED, as there doesn't seem to be any facility in calibre +-- for this +boxed :: PandocMonad m => TikiWikiParser m B.Inlines +boxed = try $ do + string "^" + inner <- many1 $ noneOf "^\n" + string "^" + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + +-- --text-- +strikeout :: PandocMonad m => TikiWikiParser m B.Inlines +strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines) + +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String +nestedString end = innerSpace <|> count 1 nonspaceChar + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +breakChars :: PandocMonad m => TikiWikiParser m B.Inlines +breakChars = try $ string "%%%" >> return B.linebreak + +-- superscript: foo{TAG(tag=>sup)}super{TAG}foo / bar{SUP()}super2{SUP}bar +superTag :: PandocMonad m => TikiWikiParser m B.Inlines +superTag = try $ fmap (B.superscript . B.text . fromEntities) ( between (string "{TAG(tag=>sup)}") (string "{TAG}") nestedString) + +superMacro :: PandocMonad m => TikiWikiParser m B.Inlines +superMacro = try $ do + string "{SUP(" + manyTill anyChar (string ")}") + body <- manyTill anyChar (string "{SUP}") + return $ B.superscript $ B.text body + +-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux +subTag :: PandocMonad m => TikiWikiParser m B.Inlines +subTag = try $ fmap (B.subscript . B.text . fromEntities) ( between (string "{TAG(tag=>sub)}") (string "{TAG}") nestedString) + +subMacro :: PandocMonad m => TikiWikiParser m B.Inlines +subMacro = try $ do + string "{SUB(" + manyTill anyChar (string ")}") + body <- manyTill anyChar (string "{SUB}") + return $ B.subscript $ B.text body + +-- -+text+- +code :: PandocMonad m => TikiWikiParser m B.Inlines +code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString) + +macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr = try $ do + key <- many1 (noneOf "=)") + char '=' + optional $ char '"' + value <- many1 (noneOf " )\"") + optional $ char '"' + return (key, value) + +macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs = try $ sepEndBy macroAttr spaces + +-- ~np~ __not bold__ ~/np~ +noparse :: PandocMonad m => TikiWikiParser m B.Inlines +noparse = try $ do + string "~np~" + body <- manyTill anyChar (string "~/np~") + return $ B.str body + +str :: PandocMonad m => TikiWikiParser m B.Inlines +str = fmap B.str (many1 alphaNum <|> count 1 characterReference) + +symbol :: PandocMonad m => TikiWikiParser m B.Inlines +symbol = fmap B.str (count 1 nonspaceChar) + +-- [[not a link] +notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines +notExternalLink = try $ do + start <- string "[[" + body <- many (noneOf "\n[]") + end <- string "]" + return $ B.text (start ++ body ++ end) + +-- [http://www.somesite.org url|Some Site title] +-- ((internal link)) +-- +-- The ((...)) wiki links and [...] external links are handled +-- exactly the same; this abstracts that out +makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink start middle end = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, anchor) <- wikiLinkText start middle end + parsedTitle <- parseFromString (many1 inline) title + setState $ st{ stateAllowLinks = True } + return $ B.link (url++anchor) "" $mconcat parsedTitle + +wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText start middle end = do + string start + url <- many1 (noneOf $ middle ++ "\n") + seg1 <- option url linkContent + seg2 <- option "" linkContent + string end + if seg2 /= "" + then + return (url, seg2, seg1) + else + return (url, seg1, "") + where + linkContent = do + char '|' + many (noneOf middle) + +externalLink :: PandocMonad m => TikiWikiParser m B.Inlines +externalLink = makeLink "[" "]|" "]" + +-- NB: this wiki linking is unlikely to work for anyone besides me +-- (rlpowell); it happens to work for me because my Hakyll code has +-- post-processing that treats pandoc .md titles as valid link +-- targets, so something like +-- [see also this other post](My Other Page) is perfectly valid. +wikiLink :: PandocMonad m => TikiWikiParser m B.Inlines +wikiLink = makeLink "((" ")|" "))" diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0aafc83c7..f4dda7a11 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com> @@ -29,39 +28,39 @@ Conversion of txt2tags formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags , getT2TMeta , T2TMeta (..) - , readTxt2TagsNoMacros) + ) where -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Data.Monoid ((<>)) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) -import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) +import Control.Monad (guard, void, when) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.Reader (Reader, asks, runReader) import Data.Char (toLower) -import Data.List (transpose, intersperse, intercalate) -import Data.Maybe (fromMaybe) ---import Network.URI (isURI) -- Not sure whether to use this function -import Control.Monad (void, guard, when) import Data.Default -import Control.Monad.Reader (Reader, runReader, asks) -import Text.Pandoc.Error - -import Data.Time.LocalTime (getZonedTime) -import System.Directory(getModificationTime) +import Data.List (intercalate, transpose) +import Data.Maybe (fromMaybe) +import Data.Monoid ((<>)) +import Data.Text (Text) +import qualified Data.Text as T import Data.Time.Format (formatTime) +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time (defaultTimeLocale) -import System.IO.Error (catchIOError) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (space, spaces, uri) +import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, + underlineSpan) type T2T = ParserT String ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile - , infile :: FilePath -- ^ Input file + date :: String -- ^ Current date + , mtime :: String -- ^ Last modification time of infile + , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -69,26 +68,38 @@ instance Default T2TMeta where def = T2TMeta "" "" "" "" -- | Get the meta information required by Txt2Tags macros -getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta -getT2TMeta inps out = do - curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime +getT2TMeta :: PandocMonad m => m T2TMeta +getT2TMeta = do + inps <- P.getInputFiles + outp <- fromMaybe "" <$> P.getOutputFile + curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime let getModTime = fmap (formatTime defaultTimeLocale "%T") . - getModificationTime + P.getModificationTime curMtime <- case inps of - [] -> formatTime defaultTimeLocale "%T" <$> getZonedTime - _ -> catchIOError + [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime + _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) out + return $ T2TMeta curDate curMtime (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc -readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") +readTxt2Tags :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readTxt2Tags opts s = do + meta <- getT2TMeta + let parsed = flip runReader meta $ + readWithM parseT2T (def {stateOptions = opts}) $ + T.unpack (crFilter s) ++ "\n\n" + case parsed of + Right result -> return result + Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc -readTxt2TagsNoMacros = readTxt2Tags def +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc parseT2T = do @@ -137,7 +148,7 @@ setting = do string "%!" keyword <- ignoreSpacesCap (many1 alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar (newline)) + value <- ignoreSpacesCap (manyTill anyChar newline) return (keyword, value) -- Blocks @@ -146,7 +157,7 @@ parseBlocks :: T2T Blocks parseBlocks = mconcat <$> manyTill block eof block :: T2T Blocks -block = do +block = choice [ mempty <$ blanklines , quote @@ -184,7 +195,7 @@ para = try $ do listStart = try bulletListStart <|> orderedListStart commentBlock :: T2T Blocks -commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment +commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment -- Seperator and Strong line treated the same hrule :: T2T Blocks @@ -198,7 +209,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -210,16 +221,16 @@ list :: T2T Blocks list = choice [bulletList, orderedList, definitionList] bulletList :: T2T Blocks -bulletList = B.bulletList . compactify' +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart parseBlocks) orderedList :: T2T Blocks -orderedList = B.orderedList . compactify' +orderedList = B.orderedList . compactify <$> many1 (listItem orderedListStart parseBlocks) definitionList :: T2T Blocks -definitionList = try $ do - B.definitionList . compactify'DL <$> +definitionList = try $ + B.definitionList . compactifyDL <$> many1 (listItem definitionListStart definitionListEnd) definitionListEnd :: T2T (Inlines, [Blocks]) @@ -250,7 +261,7 @@ listItem start end = try $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString end $ firstLine ++ blank ++ rest + parseFromString' end $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -262,12 +273,6 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -anyLineNewline :: T2T String -anyLineNewline = (++ "\n") <$> anyLine - -indentWith :: Int -> T2T String -indentWith n = count n space - -- Table table :: T2T Blocks @@ -276,17 +281,17 @@ table = try $ do rows <- many1 (many commentLine *> tableRow) let columns = transpose rows let ncolumns = length columns - let aligns = map (foldr1 findAlign) (map (map fst) columns) + let aligns = map (foldr1 findAlign . map fst) columns let rows' = map (map snd) rows let size = maximum (map length rows') let rowsPadded = map (pad size) rows' - let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty + let headerPadded = if null tableHeader then mempty else pad size tableHeader return $ B.table mempty (zip aligns (replicate ncolumns 0.0)) headerPadded rowsPadded pad :: (Monoid a) => Int -> [a] -> [a] -pad n xs = xs ++ (replicate (n - length xs) mempty) +pad n xs = xs ++ replicate (n - length xs) mempty findAlign :: Alignment -> Alignment -> Alignment @@ -309,7 +314,7 @@ genericRow start = try $ do tableCell :: T2T (Alignment, Blocks) tableCell = try $ do leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead - content <- (manyTill inline (try $ lookAhead (cellEnd))) + content <- manyTill inline (try $ lookAhead cellEnd) rightSpaces <- length <$> many space let align = case compare leftSpaces rightSpaces of @@ -317,9 +322,9 @@ tableCell = try $ do EQ -> AlignCenter GT -> AlignRight endOfCell - return $ (align, B.plain (B.trimInlines $ mconcat content)) + return (align, B.plain (B.trimInlines $ mconcat content)) where - cellEnd = (void newline <|> (many1 space *> endOfCell)) + cellEnd = void newline <|> (many1 space *> endOfCell) endOfCell :: T2T () endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline) @@ -342,10 +347,10 @@ taggedBlock = do genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupArea p f s = try $ (do +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea p f s = try (do string s *> blankline - f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline)))) + f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks blockMarkupLine p f s = try (f <$> (string s *> space *> p)) @@ -363,7 +368,7 @@ parseInlines :: T2T Inlines parseInlines = trimInlines . mconcat <$> many1 inline inline :: T2T Inlines -inline = do +inline = choice [ endline , macro @@ -385,16 +390,16 @@ inline = do ] bold :: T2T Inlines -bold = inlineMarkup inline B.strong '*' (B.str) +bold = inlineMarkup inline B.strong '*' B.str underline :: T2T Inlines -underline = inlineMarkup inline B.emph '_' (B.str) +underline = inlineMarkup inline underlineSpan '_' B.str strike :: T2T Inlines -strike = inlineMarkup inline B.strikeout '-' (B.str) +strike = inlineMarkup inline B.strikeout '-' B.str italic :: T2T Inlines -italic = inlineMarkup inline B.emph '/' (B.str) +italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id @@ -413,7 +418,7 @@ tagged = do -- Glued meaning that markup must be tight to content -- Markup can't pass newlines inlineMarkup :: Monoid a - => (T2T a) -- Content parser + => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence -> (String -> a) -- Special Case to handle ****** @@ -425,20 +430,24 @@ inlineMarkup p f c special = try $ do when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") $ + body <- optionMaybe (try $ manyTill (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar end <- many1 (char c) - let parser inp = parseFromString (mconcat <$> many p) inp - let start' = special (drop 2 start) + let parser inp = parseFromString' (mconcat <$> many p) inp + let start' = case drop 2 start of + "" -> mempty + xs -> special xs body' <- parser (middle ++ [lastChar]) - let end' = special (drop 2 end) + let end' = case drop 2 end of + "" -> mempty + xs -> special xs return $ f (start' <> body' <> end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = (replicate (l - 4) c) + let body' = replicate (l - 4) c return $ f (special body') link :: T2T Inlines @@ -453,8 +462,8 @@ titleLink = try $ do guard (length tokens >= 2) char ']' let link' = last tokens - guard (length link' > 0) - let tit = concat (intersperse " " (init tokens)) + guard $ not $ null link' + let tit = unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -479,7 +488,7 @@ macro = try $ do -- raw URLs in text are automatically linked url :: T2T Inlines url = try $ do - (rawUrl, escapedUrl) <- (try uri <|> emailAddress) + (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) uri :: T2T (String, String) @@ -520,8 +529,7 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions)) - ext <- oneOfStrings extensions + (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' return $ B.image (path ++ ext) "" mempty @@ -550,11 +558,10 @@ endline = try $ do notFollowedBy quote notFollowedBy list notFollowedBy table - return $ B.softbreak + return B.softbreak str :: T2T Inlines -str = try $ do - B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs new file mode 100644 index 000000000..d717a1ba8 --- /dev/null +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -0,0 +1,673 @@ +{- + Copyright (C) 2017-2018 Yuchen Pei <me@ypei.me> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Vimwiki + Copyright : Copyright (C) 2017-2018 Yuchen Pei + License : GNU GPL, version 2 or above + + Maintainer : Yuchen Pei <me@ypei.me> + Stability : alpha + Portability : portable + +Conversion of vimwiki text to 'Pandoc' document. +-} +{-- +[X]: implemented +[O]: not implemented +* block parsers: + * [X] header + * [X] hrule + * [X] comment + * [X] blockquote + * [X] preformatted -- using codeblock + * [X] displaymath + * [X] bulletlist / orderedlist + * [X] todo lists -- using span. + * [X] table + * [X] centered table -- using div + * [O] colspan and rowspan -- see issue #1024 + * [X] paragraph + * [X] definition list +* inline parsers: + * [X] bareURL + * [X] strong + * [X] emph + * [X] strikeout + * [X] code + * [X] link + * [X] image + * [X] inline math + * [X] tag + * [X] sub- and super-scripts +* misc: + * [X] `TODO:` mark + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- ignored +--} + +module Text.Pandoc.Readers.Vimwiki ( readVimwiki + ) where +import Control.Monad (guard) +import Control.Monad.Except (throwError) +import Data.Default +import Data.List (isInfixOf, isPrefixOf) +import Data.Maybe +import Data.Monoid ((<>)) +import Data.Text (Text, unpack) +import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines) +import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, + codeBlockWith, definitionList, + displayMath, divWith, emph, + headerWith, horizontalRule, image, + imageWith, link, math, orderedList, + para, plain, setMeta, simpleTable, + softbreak, space, spanWith, str, + strikeout, strong, subscript, + superscript) +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition (Attr, Block (BulletList, OrderedList), + Inline (Space), ListNumberDelim (..), + ListNumberStyle (..), Meta, Pandoc (..), + nullMeta) +import Text.Pandoc.Options (ReaderOptions) +import Text.Pandoc.Parsing (F, ParserState, ParserT, blanklines, emailAddress, + many1Till, orderedListMarker, readWithM, + registerHeader, runF, spaceChar, stateMeta', + stateOptions, uri) +import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast) +import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, + spaces, string) +import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, + manyTill, notFollowedBy, option, skipMany1) +import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) + +readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki opts s = do + res <- readWithM parseVimwiki def{ stateOptions = opts } + (unpack (crFilter s)) + case res of + Left e -> throwError e + Right result -> return result + +type VwParser = ParserT [Char] ParserState + + +-- constants + +specialChars :: [Char] +specialChars = "=*-#[]_~{}`$|:%^," + +spaceChars :: [Char] +spaceChars = " \t\n" + +-- main parser + +parseVimwiki :: PandocMonad m => VwParser m Pandoc +parseVimwiki = do + bs <- mconcat <$> many block + spaces + eof + st <- getState + let meta = runF (stateMeta' st) st + return $ Pandoc meta (toList bs) + +-- block parser + +block :: PandocMonad m => VwParser m Blocks +block = do + res <- choice [ mempty <$ blanklines + , header + , hrule + , mempty <$ comment + , mixedList + , preformatted + , displayMath + , table + , mempty <$ placeholder + , blockQuote + , definitionList + , para + ] + trace (take 60 $ show $ toList res) + return res + +blockML :: PandocMonad m => VwParser m Blocks +blockML = choice [preformatted, displayMath, table] + +header :: PandocMonad m => VwParser m Blocks +header = try $ do + sp <- many spaceChar + eqs <- many1 (char '=') + spaceChar + let lev = length eqs + guard $ lev <= 6 + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> string eqs >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, + if sp == "" then [] else ["justcenter"], []) contents + return $ B.headerWith attr lev contents + +para :: PandocMonad m => VwParser m Blocks +para = try $ do + contents <- trimInlines . mconcat <$> many1 inline + if all (==Space) (toList contents) + then return mempty + else return $ B.para contents + +hrule :: PandocMonad m => VwParser m Blocks +hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline) + +comment :: PandocMonad m => VwParser m () +comment = try $ do + many spaceChar >> string "%%" >> many (noneOf "\n") + return () + +blockQuote :: PandocMonad m => VwParser m Blocks +blockQuote = try $ do + string " " + contents <- trimInlines . mconcat <$> many1 inlineBQ + if all (==Space) (toList contents) + then return mempty + else return $ B.blockQuote $ B.plain contents + +definitionList :: PandocMonad m => VwParser m Blocks +definitionList = try $ + B.definitionList <$> many1 (dlItemWithDT <|> dlItemWithoutDT) + +dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithDT = do + dt <- definitionTerm + dds <- many definitionDef + return (dt, dds) + +dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) +dlItemWithoutDT = do + dds <- many1 definitionDef + return (mempty, dds) + +definitionDef :: PandocMonad m => VwParser m Blocks +definitionDef = try $ + notFollowedBy definitionTerm >> many spaceChar + >> (definitionDef1 <|> definitionDef2) + +definitionDef1 :: PandocMonad m => VwParser m Blocks +definitionDef1 = try $ mempty <$ defMarkerE + +definitionDef2 :: PandocMonad m => VwParser m Blocks +definitionDef2 = try $ B.plain <$> + (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) + + +definitionTerm :: PandocMonad m => VwParser m Inlines +definitionTerm = try $ do + x <- definitionTerm1 <|> definitionTerm2 + guard (stringify x /= "") + return x + +definitionTerm1 :: PandocMonad m => VwParser m Inlines +definitionTerm1 = try $ + trimInlines . mconcat <$> manyTill inline' (try defMarkerE) + +definitionTerm2 :: PandocMonad m => VwParser m Inlines +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' + (try $lookAhead (defMarkerM >> notFollowedBy hasDefMarkerM)) + +defMarkerM :: PandocMonad m => VwParser m Char +defMarkerM = string "::" >> spaceChar + +defMarkerE :: PandocMonad m => VwParser m Char +defMarkerE = string "::" >> newline + +hasDefMarkerM :: PandocMonad m => VwParser m String +hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) + +preformatted :: PandocMonad m => VwParser m Blocks +preformatted = try $ do + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") + lookAhead newline + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + >> many spaceChar >> newline)) + if (contents /= "") && (head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + else return $ B.codeBlockWith (makeAttr attrText) contents + +makeAttr :: String -> Attr +makeAttr s = + let xs = splitBy (`elem` " \t") s in + ("", [], mapMaybe nameValue xs) + +nameValue :: String -> Maybe (String, String) +nameValue s = + let t = splitBy (== '=') s in + if length t /= 2 + then Nothing + else let (a, b) = (head t, last t) in + if (length b < 2) || ((head b, last b) /= ('"', '"')) + then Nothing + else Just (a, stripFirstAndLast b) + + +displayMath :: PandocMonad m => VwParser m Blocks +displayMath = try $ do + many spaceChar >> string "{{$" + mathTag <- option "" mathTagParser + many space + contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + >> many spaceChar >> newline)) + let contentsWithTags + | mathTag == "" = contents + | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents + ++ "\n\\end{" ++ mathTag ++ "}" + return $ B.para $ B.displayMath contentsWithTags + + +mathTagLaTeX :: String -> String +mathTagLaTeX s = case s of + "equation" -> "" + "equation*" -> "" + "gather" -> "gathered" + "gather*" -> "gathered" + "multline" -> "gathered" + "multline*" -> "gathered" + "eqnarray" -> "aligned" + "eqnarray*" -> "aligned" + "align" -> "aligned" + "align*" -> "aligned" + "alignat" -> "aligned" + "alignat*" -> "aligned" + _ -> s + + +mixedList :: PandocMonad m => VwParser m Blocks +mixedList = try $ do + (bl, _) <- mixedList' (-1) + return $ head bl + +mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int) +mixedList' prevInd = do + (curInd, builder) <- option (-1, "na") (lookAhead listStart) + if curInd < prevInd + then return ([], curInd) + else do + listStart + curLine <- listItemContent + let listBuilder = + if builder == "ul" then B.bulletList else B.orderedList + (subList, lowInd) <- mixedList' curInd + if lowInd >= curInd + then do + (sameIndList, endInd) <- mixedList' lowInd + let curList = combineList curLine subList ++ sameIndList + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + else do + let (curList, endInd) = (combineList curLine subList, + lowInd) + if curInd > prevInd + then return ([listBuilder curList], endInd) + else return (curList, endInd) + +plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks +plainInlineML' w = do + xs <- many inlineML + newline + return $ B.plain $ trimInlines $ mconcat $ w:xs + +plainInlineML :: PandocMonad m => VwParser m Blocks +plainInlineML = notFollowedBy listStart >> spaceChar >> plainInlineML' mempty + + +listItemContent :: PandocMonad m => VwParser m Blocks +listItemContent = try $ do + w <- option mempty listTodoMarker + x <- plainInlineML' w + y <- many blocksThenInline + z <- many blockML + return $ mconcat $ x:y ++ z + +blocksThenInline :: PandocMonad m => VwParser m Blocks +blocksThenInline = try $ do + y <- many1 blockML + x <- plainInlineML + return $ mconcat $ y ++ [x] + +listTodoMarker :: PandocMonad m => VwParser m Inlines +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) + (oneOf " .oOX") + return $ makeListMarkerSpan x + +makeListMarkerSpan :: Char -> Inlines +makeListMarkerSpan x = + let cl = case x of + ' ' -> "done0" + '.' -> "done1" + 'o' -> "done2" + 'O' -> "done3" + 'X' -> "done4" + _ -> "" + in + B.spanWith ("", [cl], []) mempty + +combineList :: Blocks -> [Blocks] -> [Blocks] +combineList x [y] = case toList y of + [BulletList z] -> [fromList $ toList x + ++ [BulletList z]] + [OrderedList attr z] -> [fromList $ toList x + ++ [OrderedList attr z]] + _ -> x:[y] +combineList x xs = x:xs + +listStart :: PandocMonad m => VwParser m (Int, String) +listStart = try $ do + s <- many spaceChar + listType <- bulletListMarkers <|> orderedListMarkers + spaceChar + return (length s, listType) + +bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers = "ul" <$ (char '*' <|> char '-') + +orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers = + ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) + <|> ("ol" <$ char '#') + +--many need trimInlines +table :: PandocMonad m => VwParser m Blocks +table = try $ do + indent <- lookAhead (many spaceChar) + (th, trs) <- table1 <|> table2 + let tab = B.simpleTable th trs + if indent == "" + then return tab + else return $ B.divWith ("", ["center"], []) tab + +-- table with header +table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table1 = try $ do + th <- tableRow + many1 tableHeaderSeparator + trs <- many tableRow + return (th, trs) + +-- headerless table +table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]]) +table2 = try $ do + trs <- many1 tableRow + return (replicate (length $ head trs) mempty, trs) + +tableHeaderSeparator :: PandocMonad m => VwParser m () +tableHeaderSeparator = try $ do + many spaceChar >> char '|' >> many1 (many1 (char '-') >> char '|') + >> many spaceChar >> newline + return () + +tableRow :: PandocMonad m => VwParser m [Blocks] +tableRow = try $ do + many spaceChar >> char '|' + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + >> newline)) + guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") + tr <- many tableCell + many spaceChar >> char '\n' + return tr + +tableCell :: PandocMonad m => VwParser m Blocks +tableCell = try $ + B.plain . trimInlines . mconcat <$> manyTill inline' (char '|') + +placeholder :: PandocMonad m => VwParser m () +placeholder = try $ + choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh + +ph :: PandocMonad m => String -> VwParser m () +ph s = try $ do + many spaceChar >>string ('%':s) >> spaceChar + contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) + --use lookAhead because of placeholder in the whitespace parser + let meta' = return $ B.setMeta s contents nullMeta :: F Meta + updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } + +noHtmlPh :: PandocMonad m => VwParser m () +noHtmlPh = try $ + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + >> lookAhead newline) + +templatePh :: PandocMonad m => VwParser m () +templatePh = try $ + () <$ (many spaceChar >> string "%template" >>many (noneOf "\n") + >> lookAhead newline) + +-- inline parser + +inline :: PandocMonad m => VwParser m Inlines +inline = choice $ whitespace endlineP:inlineList + +inlineList :: PandocMonad m => [VwParser m Inlines] +inlineList = [ bareURL + , todoMark + , str + , strong + , emph + , strikeout + , code + , link + , image + , inlineMath + , tag + , superscript + , subscript + , special + ] + +-- inline parser without softbreaks or comment breaks +inline' :: PandocMonad m => VwParser m Inlines +inline' = choice $ whitespace':inlineList + +-- inline parser for blockquotes +inlineBQ :: PandocMonad m => VwParser m Inlines +inlineBQ = choice $ whitespace endlineBQ:inlineList + +-- inline parser for mixedlists +inlineML :: PandocMonad m => VwParser m Inlines +inlineML = choice $ whitespace endlineML:inlineList + +str :: PandocMonad m => VwParser m Inlines +str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars) + +whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines +whitespace endline = B.space <$ (skipMany1 spaceChar <|> + try (newline >> (comment <|> placeholder))) + <|> B.softbreak <$ endline + +whitespace' :: PandocMonad m => VwParser m Inlines +whitespace' = B.space <$ skipMany1 spaceChar + +special :: PandocMonad m => VwParser m Inlines +special = B.str <$> count 1 (oneOf specialChars) + +bareURL :: PandocMonad m => VwParser m Inlines +bareURL = try $ do + (orig, src) <- uri <|> emailAddress + return $ B.link src "" (B.str orig) + +strong :: PandocMonad m => VwParser m Inlines +strong = try $ do + s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") + guard $ (head s `notElem` spaceChars) + && (last s `notElem` spaceChars) + char '*' + contents <- mconcat <$>manyTill inline' (char '*' + >> notFollowedBy alphaNum) + return $ B.spanWith (makeId contents, [], []) mempty + <> B.strong contents + +makeId :: Inlines -> String +makeId i = concat (stringify <$> toList i) + +emph :: PandocMonad m => VwParser m Inlines +emph = try $ do + s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") + guard $ (head s `notElem` spaceChars) + && (last s `notElem` spaceChars) + char '_' + contents <- mconcat <$>manyTill inline' (char '_' + >> notFollowedBy alphaNum) + return $ B.emph contents + +strikeout :: PandocMonad m => VwParser m Inlines +strikeout = try $ do + string "~~" + contents <- mconcat <$>many1Till inline' (string "~~") + return $ B.strikeout contents + +code :: PandocMonad m => VwParser m Inlines +code = try $ do + char '`' + contents <- many1Till (noneOf "\n") (char '`') + return $ B.code contents + +superscript :: PandocMonad m => VwParser m Inlines +superscript = try $ + B.superscript . mconcat <$> (char '^' >> many1Till inline' (char '^')) + +subscript :: PandocMonad m => VwParser m Inlines +subscript = try $ + B.subscript . mconcat <$> (string ",," + >> many1Till inline' (try $ string ",,")) + +link :: PandocMonad m => VwParser m Inlines +link = try $ do + string "[[" + contents <- lookAhead $ manyTill anyChar (string "]]") + case '|' `elem` contents of + False -> do + manyTill anyChar (string "]]") +-- not using try here because [[hell]o]] is not rendered as a link in vimwiki + return $ B.link (procLink contents) "" (B.str contents) + True -> do + url <- manyTill anyChar $ char '|' + lab <- mconcat <$> manyTill inline (string "]]") + return $ B.link (procLink url) "" lab + +image :: PandocMonad m => VwParser m Inlines +image = try $ do + string "{{" + contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") + images $ length $ filter (== '|') contentText + +images :: PandocMonad m => Int -> VwParser m Inlines +images k + | k == 0 = do + imgurl <- manyTill anyChar (try $ string "}}") + return $ B.image (procImgurl imgurl) "" (B.str "") + | k == 1 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$> manyTill inline (try $ string "}}") + return $ B.image (procImgurl imgurl) "" alt + | k == 2 = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$>manyTill inline (char '|') + attrText <- manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + | otherwise = do + imgurl <- manyTill anyChar (char '|') + alt <- mconcat <$>manyTill inline (char '|') + attrText <- manyTill anyChar (char '|') + manyTill anyChar (try $ string "}}") + return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt + +procLink' :: String -> String +procLink' s + | take 6 s == "local:" = "file" ++ drop 5 s + | take 6 s == "diary:" = "diary/" ++ drop 6 s ++ ".html" + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) + = s + | s == "" = "" + | last s == '/' = s + | otherwise = s ++ ".html" + +procLink :: String -> String +procLink s = procLink' x ++ y + where (x, y) = break (=='#') s + +procImgurl :: String -> String +procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s + +inlineMath :: PandocMonad m => VwParser m Inlines +inlineMath = try $ do + char '$' + contents <- many1Till (noneOf "\n") (char '$') + return $ B.math contents + +tag :: PandocMonad m => VwParser m Inlines +tag = try $ do + char ':' + s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space)) + guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") + let ss = splitBy (==':') s + return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss) + +todoMark :: PandocMonad m => VwParser m Inlines +todoMark = try $ do + string "TODO:" + return $ B.spanWith ("", ["todo"], []) (B.str "TODO:") + +-- helper functions and parsers +endlineP :: PandocMonad m => VwParser m () +endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote) + +endlineBQ :: PandocMonad m => VwParser m () +endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") + +endlineML :: PandocMonad m => VwParser m () +endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) + +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +nFBTTBSB :: PandocMonad m => VwParser m () +nFBTTBSB = + notFollowedBy newline <* + notFollowedBy hrule <* + notFollowedBy tableRow <* + notFollowedBy header <* + notFollowedBy listStart <* + notFollowedBy preformatted <* + notFollowedBy displayMath <* + notFollowedBy hasDefMarker + +hasDefMarker :: PandocMonad m => VwParser m () +hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars) + +makeTagSpan' :: String -> Inlines +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> + B.spanWith (s, ["tag"], []) (B.str s) + +makeTagSpan :: String -> Inlines +makeTagSpan s = B.space <> makeTagSpan' s + +mathTagParser :: PandocMonad m => VwParser m String +mathTagParser = do + s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars) + (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)) + char '%' >> string s >> char '%' + return $ mathTagLaTeX s diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index d08d636df..a1c5c919e 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2016 John MacFarlane + Copyright : Copyright (C) 2011-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,32 +30,36 @@ Functions for converting an HTML file into one that can be viewed offline, by incorporating linked images, CSS, and scripts into the HTML using data URIs. -} -module Text.Pandoc.SelfContained ( makeSelfContained ) where -import Text.HTML.TagSoup -import Network.URI (isURI, escapeURIString, URI(..), parseURI) +module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where +import Codec.Compression.GZip as Gzip +import Control.Applicative ((<|>)) +import Control.Monad.Except (throwError) +import Control.Monad.Trans (lift) +import Data.ByteString (ByteString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B -import Data.ByteString (ByteString) -import System.FilePath (takeExtension, takeDirectory, (</>)) -import Data.Char (toLower, isAscii, isAlphaNum) -import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim) -import Text.Pandoc.MediaBag (MediaBag) +import Data.Char (isAlphaNum, isAscii, toLower) +import Data.List (isPrefixOf) +import Data.Monoid ((<>)) +import Network.URI (escapeURIString) +import System.FilePath (takeDirectory, takeExtension, (</>)) +import Text.HTML.TagSoup +import Text.Pandoc.Class (PandocMonad (..), fetchItem, getInputFiles, report, + setInputFiles) +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) +import Text.Pandoc.Shared (isURI, renderTags', trim) import Text.Pandoc.UTF8 (toString) -import Text.Pandoc.Options (WriterOptions(..)) -import Data.List (isPrefixOf) -import Control.Applicative ((<|>)) -import Text.Parsec (runParserT, ParsecT) +import Text.Parsec (ParsecT, runParserT) import qualified Text.Parsec as P -import Control.Monad.Trans (lift) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -makeDataURI :: String -> ByteString -> String -makeDataURI mime raw = +makeDataURI :: (MimeType, ByteString) -> String +makeDataURI (mime, raw) = if textual then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) @@ -64,64 +68,143 @@ makeDataURI mime raw = then mime ++ ";charset=utf-8" else mime -- mime type already has charset -convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) -convertTag media sourceURL t@(TagOpen tagname as) +convertTags :: PandocMonad m => [Tag String] -> m [Tag String] +convertTags [] = return [] +convertTags (t@TagOpen{}:ts) + | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts +convertTags (t@(TagOpen tagname as):ts) | tagname `elem` - ["img", "embed", "video", "input", "audio", "source", "track"] = do + ["img", "embed", "video", "input", "audio", "source", "track", + "section"] = do as' <- mapM processAttribute as - return $ TagOpen tagname as' + rest <- convertTags ts + return $ TagOpen tagname as' : rest where processAttribute (x,y) = - if x == "src" || x == "href" || x == "poster" + if x `elem` ["src", "data-src", "href", "poster", "data-background-image"] then do - enc <- getDataURI media sourceURL (fromAttrib "type" t) y + enc <- getDataURI (fromAttrib "type" t) y return (x, enc) else return (x,y) -convertTag media sourceURL t@(TagOpen "script" as) = +convertTags (t@(TagOpen "script" as):TagClose "script":ts) = case fromAttrib "src" t of - [] -> return t + [] -> (t:) <$> convertTags ts src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src - return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag media sourceURL t@(TagOpen "link" as) = + let typeAttr = fromAttrib "type" t + res <- getData typeAttr src + rest <- convertTags ts + case res of + Left dataUri -> return $ TagOpen "script" + (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) : + TagClose "script" : rest + Right (mime, bs) + | ("text/javascript" `isPrefixOf` mime || + "application/javascript" `isPrefixOf` mime || + "application/x-javascript" `isPrefixOf` mime) && + not ("</script" `B.isInfixOf` bs) -> + return $ + TagOpen "script" [("type", typeAttr)|not (null typeAttr)] + : TagText (toString bs) + : TagClose "script" + : rest + | otherwise -> + return $ TagOpen "script" + (("src",makeDataURI (mime, bs)) : + [(x,y) | (x,y) <- as, x /= "src"]) : + TagClose "script" : rest +convertTags (t@(TagOpen "link" as):ts) = case fromAttrib "href" t of - [] -> return t + [] -> (t:) <$> convertTags ts src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src - return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ _ t = return t - -cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString - -> IO ByteString -cssURLs media sourceURL d orig = do - res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig + res <- getData (fromAttrib "type" t) src + case res of + Left dataUri -> do + rest <- convertTags ts + return $ TagOpen "link" + (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) : + rest + Right (mime, bs) + | "text/css" `isPrefixOf` mime + && null (fromAttrib "media" t) + && not ("</" `B.isInfixOf` bs) -> do + rest <- convertTags $ + dropWhile (==TagClose "link") ts + return $ + TagOpen "style" [("type", mime)] + : TagText (toString bs) + : TagClose "style" + : rest + | otherwise -> do + rest <- convertTags ts + return $ TagOpen "link" + (("href",makeDataURI (mime, bs)) : + [(x,y) | (x,y) <- as, x /= "href"]) : rest +convertTags (t:ts) = (t:) <$> convertTags ts + +cssURLs :: PandocMonad m + => FilePath -> ByteString -> m ByteString +cssURLs d orig = do + res <- runParserT (parseCSSUrls d) () "css" orig case res of - Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig + Left e -> do + report $ CouldNotParseCSS (show e) + return orig Right bs -> return bs -parseCSSUrls :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -parseCSSUrls media sourceURL d = B.concat <$> P.many - (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) +parseCSSUrls :: PandocMonad m + => FilePath -> ParsecT ByteString () m ByteString +parseCSSUrls d = B.concat <$> P.many + (pCSSWhite <|> pCSSComment <|> pCSSImport d <|> pCSSUrl d <|> pCSSOther) + +pCSSImport :: PandocMonad m + => FilePath -> ParsecT ByteString () m ByteString +pCSSImport d = P.try $ do + P.string "@import" + P.spaces + res <- (pQuoted <|> pUrl) >>= handleCSSUrl d + P.spaces + P.char ';' + P.spaces + case res of + Left b -> return $ B.pack "@import " <> b + Right (_, b) -> return b -- Note: some whitespace in CSS is significant, so we can't collapse it! -pCSSWhite :: ParsecT ByteString () IO ByteString +pCSSWhite :: PandocMonad m => ParsecT ByteString () m ByteString pCSSWhite = B.singleton <$> P.space <* P.spaces -pCSSComment :: ParsecT ByteString () IO ByteString +pCSSComment :: PandocMonad m => ParsecT ByteString () m ByteString pCSSComment = P.try $ do P.string "/*" P.manyTill P.anyChar (P.try (P.string "*/")) return B.empty -pCSSOther :: ParsecT ByteString () IO ByteString -pCSSOther = do +pCSSOther :: PandocMonad m => ParsecT ByteString () m ByteString +pCSSOther = (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|> - (B.singleton <$> P.char 'u') <|> - (B.singleton <$> P.char '/') + (B.singleton <$> P.char 'u') <|> + (B.singleton <$> P.char '/') + +pCSSUrl :: PandocMonad m + => FilePath -> ParsecT ByteString () m ByteString +pCSSUrl d = P.try $ do + res <- pUrl >>= handleCSSUrl d + case res of + Left b -> return b + Right (mt,b) -> do + let enc = makeDataURI (mt, b) + return (B.pack $ "url(" ++ enc ++ ")") + +pQuoted :: PandocMonad m + => ParsecT ByteString () m (String, ByteString) +pQuoted = P.try $ do + quote <- P.oneOf "\"'" + url <- P.manyTill P.anyChar (P.char quote) + let fallback = B.pack ([quote] ++ trim url ++ [quote]) + return (url, fallback) -pCSSUrl :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -pCSSUrl media sourceURL d = P.try $ do +pUrl :: PandocMonad m + => ParsecT ByteString () m (String, ByteString) +pUrl = P.try $ do P.string "url(" P.spaces quote <- P.option Nothing (Just <$> P.oneOf "\"'") @@ -130,49 +213,66 @@ pCSSUrl media sourceURL d = P.try $ do P.char ')' let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ maybe "" (:[]) quote ++ ")") - case trim url of - '#':_ -> return fallback - 'd':'a':'t':'a':':':_ -> return fallback + return (url, fallback) + +handleCSSUrl :: PandocMonad m + => FilePath -> (String, ByteString) + -> ParsecT ByteString () m + (Either ByteString (MimeType, ByteString)) +handleCSSUrl d (url, fallback) = + case escapeURIString (/='|') (trim url) of + '#':_ -> return $ Left fallback + 'd':'a':'t':'a':':':_ -> return $ Left fallback u -> do let url' = if isURI u then u else d </> u - enc <- lift $ getDataURI media sourceURL "" url' - return (B.pack $ "url(" ++ enc ++ ")") + res <- lift $ getData "" url' + case res of + Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")") + Right (mt, raw) -> do + -- note that the downloaded CSS may + -- itself contain url(...). + b <- if "text/css" `isPrefixOf` mt + then cssURLs d raw + else return raw + return $ Right (mt, b) +getDataURI :: PandocMonad m => MimeType -> String -> m String +getDataURI mimetype src = do + res <- getData mimetype src + case res of + Left uri -> return uri + Right x -> return $ makeDataURI x -getDataURI :: MediaBag -> Maybe String -> MimeType -> String - -> IO String -getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri -getDataURI media sourceURL mimetype src = do +getData :: PandocMonad m + => MimeType -> String + -> m (Either String (MimeType, ByteString)) +getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri +getData mimetype src = do let ext = map toLower $ takeExtension src - fetchResult <- fetchItem' media sourceURL src - (raw, respMime) <- case fetchResult of - Left msg -> err 67 $ "Could not fetch " ++ src ++ - "\n" ++ show msg - Right x -> return x + (raw, respMime) <- fetchItem src let raw' = if ext == ".gz" - then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks - $ [raw] + then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] else raw - let mime = case (mimetype, respMime) of - ("",Nothing) -> error + mime <- case (mimetype, respMime) of + ("",Nothing) -> throwError $ PandocSomeError $ "Could not determine mime type for `" ++ src ++ "'" - (x, Nothing) -> x - (_, Just x ) -> x - let cssSourceURL = case parseURI src of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriPath = "", - uriQuery = "", - uriFragment = "" } - _ -> Nothing - result <- if mime == "text/css" - then cssURLs media cssSourceURL (takeDirectory src) raw' + (x, Nothing) -> return x + (_, Just x ) -> return x + result <- if "text/css" `isPrefixOf` mime + then do + oldInputs <- getInputFiles + setInputFiles [src] + res <- cssURLs (takeDirectory src) raw' + setInputFiles oldInputs + return res else return raw' - return $ makeDataURI mime result + return $ Right (mime, result) + + -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: WriterOptions -> String -> IO String -makeSelfContained opts inp = do +makeSelfContained :: PandocMonad m => String -> m String +makeSelfContained inp = do let tags = parseTags inp - out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags + out' <- convertTags tags return $ renderTags' out' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bd2da945e..52e1447db 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,8 +1,12 @@ -{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, PatternGuards, - ViewPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses #-} + +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -49,21 +53,18 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, + crFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing orderedListMarkers, - normalizeSpaces, extractSpaces, - normalize, - normalizeInlines, - normalizeBlocks, removeFormatting, + deNote, stringify, capitalize, compactify, - compactify', - compactify'DL, + compactifyDL, linesToPara, Element (..), hierarchicalize, @@ -71,29 +72,26 @@ module Text.Pandoc.Shared ( inlineListToIdentifier, isHeaderBlock, headerShift, + stripEmptyParagraphs, isTightList, addMetaField, makeMeta, + eastAsianLineBreakFilter, + underlineSpan, -- * TagSoup HTML handling renderTags', -- * File handling inDirectory, - getDefaultReferenceDocx, - getDefaultReferenceODT, - readDataFile, - readDataFileUTF8, - fetchItem, - fetchItem', - openURL, collapseFilePath, filteredFilesFromArchive, + -- * URI handling + schemes, + isURI, -- * Error handling - err, - warn, mapLeft, - hush, -- * for squashing blocks blocksToInlines, + blocksToInlines', -- * Safe read safeRead, -- * Temp directory @@ -102,74 +100,37 @@ module Text.Pandoc.Shared ( pandocVersion ) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) -import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) -import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import System.Environment (getProgName) -import System.Exit (exitWith, ExitCode(..)) -import Data.Char ( toLower, isLower, isUpper, isAlpha, - isLetter, isDigit, isSpace ) -import Data.List ( find, stripPrefix, intercalate ) -import Data.Maybe (mapMaybe) -import Data.Version ( showVersion ) +import Codec.Archive.Zip +import qualified Control.Exception as E +import Control.Monad (MonadPlus (..), msum, unless) +import qualified Control.Monad.State.Strict as S +import qualified Data.ByteString.Lazy as BL +import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper, + toLower) +import Data.Data (Data, Typeable) +import Data.List (find, intercalate, intersperse, stripPrefix) import qualified Data.Map as M -import Network.URI ( escapeURIString, nonStrictRelativeTo, - unEscapeString, parseURIReference, isAllowedInURI, - parseURI, URI(..) ) +import Data.Maybe (mapMaybe) +import Data.Monoid ((<>)) +import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr) import qualified Data.Set as Set +import qualified Data.Text as T +import Data.Version (showVersion) +import Network.URI (URI (uriScheme), escapeURIString, parseURI) +import Paths_pandoc (version) import System.Directory -import System.FilePath (splitDirectories, isPathSeparator) +import System.FilePath (isPathSeparator, splitDirectories) import qualified System.FilePath.Posix as Posix -import Text.Pandoc.MIME (MimeType, getMimeType) -import System.FilePath ( (</>), takeExtension, dropExtension) -import Data.Generics (Typeable, Data) -import qualified Control.Monad.State as S -import Control.Monad.Trans (MonadIO (..)) -import qualified Control.Exception as E -import Control.Monad (msum, unless, MonadPlus(..)) -import Text.Pandoc.Pretty (charWidth) -import Text.Pandoc.Compat.Time -import Data.Time.Clock.POSIX -import System.IO (stderr) import System.IO.Temp -import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), - renderOptions) -import Data.Monoid ((<>)) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as B8 -import Data.ByteString.Base64 (decodeLenient) -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Text as T (toUpper, pack, unpack) -import Data.ByteString.Lazy (toChunks, fromChunks) -import qualified Data.ByteString.Lazy as BL -import Paths_pandoc (version) - -import Codec.Archive.Zip - -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#else -import Paths_pandoc (getDataFileName) -#endif -#ifdef HTTP_CLIENT -import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host)) -import Network.HTTP.Client (parseRequest) -import Network.HTTP.Client (newManager) -import Network.HTTP.Client.Internal (addProxy) -import Network.HTTP.Client.TLS (tlsManagerSettings) -import System.Environment (getEnv) -import Network.HTTP.Types.Header ( hContentType) -import Network (withSocketsDo) -#else -import Network.URI (parseURI) -import Network.HTTP (findHeader, rspBody, - RequestMethod(..), HeaderName(..), mkRequest) -import Network.Browser (browse, setAllowRedirects, setOutHandler, request) -#endif +import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions, + renderTagsOptions) +import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..)) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Compat.Time +import Text.Pandoc.Definition +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.Pretty (charWidth) +import Text.Pandoc.Walk -- | Version number of pandoc library. pandocVersion :: String @@ -185,11 +146,11 @@ splitBy _ [] = [] splitBy isSep lst = let (first, rest) = break isSep lst rest' = dropWhile isSep rest - in first:(splitBy isSep rest') + in first:splitBy isSep rest' splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest) +splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest where (first, rest) = splitAt x lst -- | Split string into chunks divided at specified indices. @@ -197,7 +158,7 @@ splitStringByIndices :: [Int] -> [Char] -> [[Char]] splitStringByIndices [] lst = [lst] splitStringByIndices (x:xs) lst = let (first, rest) = splitAt' x lst in - first : (splitStringByIndices (map (\y -> y - x) xs) rest) + first : splitStringByIndices (map (\y -> y - x) xs) rest splitAt' :: Int -> [Char] -> ([Char],[Char]) splitAt' _ [] = ([],[]) @@ -236,9 +197,9 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch])) escapeStringUsing :: [(Char, String)] -> String -> String escapeStringUsing _ [] = "" escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest + case lookup x escapeTable of + Just str -> str ++ rest + Nothing -> x:rest where rest = escapeStringUsing escapeTable xs -- | Strip trailing newlines from string. @@ -260,35 +221,33 @@ trimr = reverse . triml . reverse -- | Strip leading and trailing characters from string stripFirstAndLast :: String -> String stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str + drop 1 $ take (length str - 1) str -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). camelCaseToHyphenated :: String -> String camelCaseToHyphenated [] = "" camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) + a:'-':toLower b:camelCaseToHyphenated rest +camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest -- | Convert number < 4000 to uppercase roman numeral. toRomanNumeral :: Int -> String -toRomanNumeral x = - if x >= 4000 || x < 0 - then "?" - else case x of - _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) - _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) - _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) - _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) - _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) - _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) - _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) - _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) - _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) - _ | x == 9 -> "IX" - _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) - _ | x == 4 -> "IV" - _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) - _ -> "" +toRomanNumeral x + | x >= 4000 || x < 0 = "?" + | x >= 1000 = "M" ++ toRomanNumeral (x - 1000) + | x >= 900 = "CM" ++ toRomanNumeral (x - 900) + | x >= 500 = "D" ++ toRomanNumeral (x - 500) + | x >= 400 = "CD" ++ toRomanNumeral (x - 400) + | x >= 100 = "C" ++ toRomanNumeral (x - 100) + | x >= 90 = "XC" ++ toRomanNumeral (x - 90) + | x >= 50 = "L" ++ toRomanNumeral (x - 50) + | x >= 40 = "XL" ++ toRomanNumeral (x - 40) + | x >= 10 = "X" ++ toRomanNumeral (x - 10) + | x == 9 = "IX" + | x >= 5 = "V" ++ toRomanNumeral (x - 5) + | x == 4 = "IV" + | x >= 1 = "I" ++ toRomanNumeral (x - 1) + | otherwise = "" -- | Escape whitespace and some punctuation characters in URI. escapeURI :: String -> String @@ -296,26 +255,23 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] - --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. +-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop + -> T.Text -- ^ Input + -> T.Text +tabFilter 0 = id +tabFilter tabStop = T.unlines . map go . T.lines + where go s = + let (s1, s2) = T.break (== '\t') s + in if T.null s2 + then s1 + else s1 <> T.replicate + (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") + <> go (T.drop 1 s2) + +-- | Strip out DOS line endings. +crFilter :: T.Text -> T.Text +crFilter = T.filter (/= '\r') -- -- Date/time @@ -329,7 +285,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) where rejectBadYear day = case toGregorian day of (y, _, _) | y >= 1601 && y <= 9999 -> Just day - _ -> Nothing + _ -> Nothing parsetimeWith = #if MIN_VERSION_time(1,5,0) parseTimeM True defaultTimeLocale @@ -337,7 +293,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") parseTime defaultTimeLocale #endif formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", + "%e %B %Y", "%b. %e, %Y", "%B %e, %Y", "%Y%m%d", "%Y%m", "%Y"] -- @@ -366,23 +322,6 @@ orderedListMarkers (start, numstyle, numdelim) = TwoParens -> "(" ++ str ++ ")" in map inDelim nums --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of - [] -> [] - (x:xs) -> Space : x : cleanup xs - cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest - -isSpaceOrEmpty :: Inline -> Bool -isSpaceOrEmpty Space = True -isSpaceOrEmpty (Str "") = True -isSpaceOrEmpty _ = False - -- | Extract the leading and trailing spaces from inside an inline element -- and place them outside the element. SoftBreaks count as Spaces for -- these purposes. @@ -399,183 +338,43 @@ extractSpaces f is = _ -> mempty in (left <> f (B.trimInlines . B.Many $ contents) <> right) --- | Normalize @Pandoc@ document, consolidating doubled 'Space's, --- combining adjacent 'Str's and 'Emph's, remove 'Null's and --- empty elements, etc. -normalize :: Pandoc -> Pandoc -normalize (Pandoc (Meta meta) blocks) = - Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks) - where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs - go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs - go (MetaList ms) = MetaList $ map go ms - go (MetaMap m) = MetaMap $ M.map go m - go x = x - -normalizeBlocks :: [Block] -> [Block] -normalizeBlocks (Null : xs) = normalizeBlocks xs -normalizeBlocks (Div attr bs : xs) = - Div attr (normalizeBlocks bs) : normalizeBlocks xs -normalizeBlocks (BlockQuote bs : xs) = - case normalizeBlocks bs of - [] -> normalizeBlocks xs - bs' -> BlockQuote bs' : normalizeBlocks xs -normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs -normalizeBlocks (BulletList items : xs) = - BulletList (map normalizeBlocks items) : normalizeBlocks xs -normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs -normalizeBlocks (OrderedList attr items : xs) = - OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs -normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs -normalizeBlocks (DefinitionList items : xs) = - DefinitionList (map go items) : normalizeBlocks xs - where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs) -normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs -normalizeBlocks (RawBlock f x : xs) = - case normalizeBlocks xs of - (RawBlock f' x' : rest) | f' == f -> - RawBlock f (x ++ ('\n':x')) : rest - rest -> RawBlock f x : rest -normalizeBlocks (Para ils : xs) = - case normalizeInlines ils of - [] -> normalizeBlocks xs - ils' -> Para ils' : normalizeBlocks xs -normalizeBlocks (Plain ils : xs) = - case normalizeInlines ils of - [] -> normalizeBlocks xs - ils' -> Plain ils' : normalizeBlocks xs -normalizeBlocks (Header lev attr ils : xs) = - Header lev attr (normalizeInlines ils) : normalizeBlocks xs -normalizeBlocks (Table capt aligns widths hdrs rows : xs) = - Table (normalizeInlines capt) aligns widths - (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows) - : normalizeBlocks xs -normalizeBlocks (x:xs) = x : normalizeBlocks xs -normalizeBlocks [] = [] - -normalizeInlines :: [Inline] -> [Inline] -normalizeInlines (Str x : ys) = - case concat (x : map fromStr strs) of - "" -> rest - n -> Str n : rest - where - (strs, rest) = span isStr $ normalizeInlines ys - isStr (Str _) = True - isStr _ = False - fromStr (Str z) = z - fromStr _ = error "normalizeInlines - fromStr - not a Str" -normalizeInlines (Space : SoftBreak : ys) = - SoftBreak : normalizeInlines ys -normalizeInlines (Space : ys) = - if null rest - then [] - else Space : rest - where isSp Space = True - isSp _ = False - rest = dropWhile isSp $ normalizeInlines ys -normalizeInlines (Emph xs : zs) = - case normalizeInlines zs of - (Emph ys : rest) -> normalizeInlines $ - Emph (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Emph xs' : rest -normalizeInlines (Strong xs : zs) = - case normalizeInlines zs of - (Strong ys : rest) -> normalizeInlines $ - Strong (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Strong xs' : rest -normalizeInlines (Subscript xs : zs) = - case normalizeInlines zs of - (Subscript ys : rest) -> normalizeInlines $ - Subscript (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Subscript xs' : rest -normalizeInlines (Superscript xs : zs) = - case normalizeInlines zs of - (Superscript ys : rest) -> normalizeInlines $ - Superscript (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Superscript xs' : rest -normalizeInlines (SmallCaps xs : zs) = - case normalizeInlines zs of - (SmallCaps ys : rest) -> normalizeInlines $ - SmallCaps (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> SmallCaps xs' : rest -normalizeInlines (Strikeout xs : zs) = - case normalizeInlines zs of - (Strikeout ys : rest) -> normalizeInlines $ - Strikeout (normalizeInlines $ xs ++ ys) : rest - rest -> case normalizeInlines xs of - [] -> rest - xs' -> Strikeout xs' : rest -normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys -normalizeInlines (RawInline f xs : zs) = - case normalizeInlines zs of - (RawInline f' ys : rest) | f == f' -> normalizeInlines $ - RawInline f (xs ++ ys) : rest - rest -> RawInline f xs : rest -normalizeInlines (Code _ "" : ys) = normalizeInlines ys -normalizeInlines (Code attr xs : zs) = - case normalizeInlines zs of - (Code attr' ys : rest) | attr == attr' -> normalizeInlines $ - Code attr (xs ++ ys) : rest - rest -> Code attr xs : rest --- allow empty spans, they may carry identifiers etc. --- normalizeInlines (Span _ [] : ys) = normalizeInlines ys -normalizeInlines (Span attr xs : zs) = - case normalizeInlines zs of - (Span attr' ys : rest) | attr == attr' -> normalizeInlines $ - Span attr (normalizeInlines $ xs ++ ys) : rest - rest -> Span attr (normalizeInlines xs) : rest -normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) : - normalizeInlines ys -normalizeInlines (Quoted qt ils : ys) = - Quoted qt (normalizeInlines ils) : normalizeInlines ys -normalizeInlines (Link attr ils t : ys) = - Link attr (normalizeInlines ils) t : normalizeInlines ys -normalizeInlines (Image attr ils t : ys) = - Image attr (normalizeInlines ils) t : normalizeInlines ys -normalizeInlines (Cite cs ils : ys) = - Cite cs (normalizeInlines ils) : normalizeInlines ys -normalizeInlines (x : xs) = x : normalizeInlines xs -normalizeInlines [] = [] - -- | Extract inlines, removing formatting. removeFormatting :: Walkable Inline a => a -> [Inline] -removeFormatting = query go . walk deNote +removeFormatting = query go . walk (deNote . deQuote) where go :: Inline -> [Inline] - go (Str xs) = [Str xs] - go Space = [Space] - go SoftBreak = [SoftBreak] - go (Code _ x) = [Str x] - go (Math _ x) = [Str x] - go LineBreak = [Space] - go _ = [] - deNote (Note _) = Str "" - deNote x = x + go (Str xs) = [Str xs] + go Space = [Space] + go SoftBreak = [SoftBreak] + go (Code _ x) = [Str x] + go (Math _ x) = [Str x] + go LineBreak = [Space] + go _ = [] + +deNote :: Inline -> Inline +deNote (Note _) = Str "" +deNote x = x + +deQuote :: Inline -> Inline +deQuote (Quoted SingleQuote xs) = + Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"]) +deQuote (Quoted DoubleQuote xs) = + Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"]) +deQuote x = x -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). stringify :: Walkable Inline a => a -> String -stringify = query go . walk deNote +stringify = query go . walk (deNote . deQuote) where go :: Inline -> [Char] - go Space = " " - go SoftBreak = " " - go (Str x) = x - go (Code _ x) = x - go (Math _ x) = x + go Space = " " + go SoftBreak = " " + go (Str x) = x + go (Code _ x) = x + go (Math _ x) = x go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 - go LineBreak = " " - go _ = "" - deNote (Note _) = Str "" - deNote x = x + go LineBreak = " " + go _ = "" -- | Bring all regular text in a pandoc structure to uppercase. -- @@ -589,28 +388,12 @@ capitalize = walk go go x = x -- | Change final list item from @Para@ to @Plain@ if the list contains --- no other @Para@ blocks. -compactify :: [[Block]] -- ^ List of list items (each a list of blocks) - -> [[Block]] -compactify [] = [] -compactify items = - case (init items, last items) of - (_,[]) -> items - (others, final) -> - case last final of - Para a -> case (filter isPara $ concat items) of - -- if this is only Para, change to Plain - [_] -> others ++ [init final ++ [Plain a]] - _ -> items - _ -> items - --- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather -- than @[Block]@. -compactify' :: [Blocks] -- ^ List of list items (each a list of blocks) +compactify :: [Blocks] -- ^ List of list items (each a list of blocks) -> [Blocks] -compactify' [] = [] -compactify' items = +compactify [] = [] +compactify items = let (others, final) = (init items, last items) in case reverse (B.toList final) of (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of @@ -619,9 +402,9 @@ compactify' items = _ -> items _ -> items --- | Like @compactify'@, but acts on items of definition lists. -compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] -compactify'DL items = +-- | Like @compactify@, but acts on items of definition lists. +compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactifyDL items = let defs = concatMap snd items in case reverse (concatMap B.toList defs) of (Para x:xs) @@ -663,7 +446,7 @@ instance Walkable Inline Element where ils' <- walkM f ils elts' <- walkM f elts return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x + query f (Blk x) = query f x query f (Sec _ _ _ ils elts) = query f ils <> query f elts instance Walkable Block Element where @@ -674,7 +457,7 @@ instance Walkable Block Element where ils' <- walkM f ils elts' <- walkM f elts return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x + query f (Blk x) = query f x query f (Sec _ _ _ ils elts) = query f ils <> query f elts @@ -687,8 +470,8 @@ inlineListToIdentifier = map (nbspToSp . toLower) . filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . stringify - where nbspToSp '\160' = ' ' - nbspToSp x = x + where nbspToSp '\160' = ' ' + nbspToSp x = x -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] @@ -696,7 +479,7 @@ hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) [] hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element] hierarchicalizeWithIds [] = return [] -hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do +hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do lastnum <- S.get let lastnum' = take level lastnum let newnum = case length lastnum' of @@ -709,26 +492,26 @@ hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do sectionContents' <- hierarchicalizeWithIds sectionContents rest' <- hierarchicalizeWithIds rest return $ Sec level newnum attr title' sectionContents' : rest' -hierarchicalizeWithIds ((Div ("",["references"],[]) - (Header level (ident,classes,kvs) title' : xs)):ys) = - hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) - title') : (xs ++ ys)) +hierarchicalizeWithIds (Div ("",["references"],[]) + (Header level (ident,classes,kvs) title' : xs):ys) = + hierarchicalizeWithIds (Header level (ident,"references":classes,kvs) + title' : (xs ++ ys)) hierarchicalizeWithIds (x:rest) = do rest' <- hierarchicalizeWithIds rest - return $ (Blk x) : rest' + return $ Blk x : rest' headerLtEq :: Int -> Block -> Bool -headerLtEq level (Header l _ _) = l <= level -headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level -headerLtEq _ _ = False +headerLtEq level (Header l _ _) = l <= level +headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level +headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> Set.Set String -> String uniqueIdent title' usedIdents = let baseIdent = case inlineListToIdentifier title' of - "" -> "section" - x -> x + "" -> "section" + x -> x numIdent n = baseIdent ++ "-" ++ show n in if baseIdent `Set.member` usedIdents then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of @@ -738,8 +521,8 @@ uniqueIdent title' usedIdents -- | True if block is a Header block. isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _ _) = True -isHeaderBlock _ = False +isHeaderBlock Header{} = True +isHeaderBlock _ = False -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc @@ -748,6 +531,14 @@ headerShift n = walk shift shift (Header level attr inner) = Header (level + n) attr inner shift x = x +-- | Remove empty paragraphs. +stripEmptyParagraphs :: Pandoc -> Pandoc +stripEmptyParagraphs = walk go + where go :: [Block] -> [Block] + go = filter (not . isEmptyParagraph) + isEmptyParagraph (Para []) = True + isEmptyParagraph _ = False + -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool isTightList = all firstIsPlain @@ -765,8 +556,8 @@ addMetaField key val (Meta meta) = Meta $ M.insertWith combine key (toMetaValue val) meta where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) combine newval x = MetaList [x, newval] - tolist (MetaList ys) = ys - tolist y = [y] + tolist (MetaList ys) = ys + tolist y = [y] -- | Create 'Meta' from old-style title, authors, date. This is -- provided to ease the transition from the old API. @@ -774,8 +565,24 @@ makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta makeMeta title authors date = addMetaField "title" (B.fromList title) $ addMetaField "author" (map B.fromList authors) - $ addMetaField "date" (B.fromList date) - $ nullMeta + $ addMetaField "date" (B.fromList date) nullMeta + +-- | Remove soft breaks between East Asian characters. +eastAsianLineBreakFilter :: Pandoc -> Pandoc +eastAsianLineBreakFilter = bottomUp go + where go (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), c:_) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs + go xs = xs + +-- | Builder for underline. +-- This probably belongs in Builder.hs in pandoc-types. +-- Will be replaced once Underline is an element. +underlineSpan :: Inlines -> Inlines +underlineSpan = B.spanWith ("", ["underline"], []) + -- -- TagSoup HTML handling @@ -787,7 +594,7 @@ renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", "meta", "link"] , optRawTag = matchTags ["script", "style"] } - where matchTags = \tags -> flip elem tags . map toLower + where matchTags tags = flip elem tags . map toLower -- -- File handling @@ -800,226 +607,14 @@ inDirectory path action = E.bracket setCurrentDirectory (const $ setCurrentDirectory path >> action) -getDefaultReferenceDocx :: Maybe FilePath -> IO Archive -getDefaultReferenceDocx datadir = do - let paths = ["[Content_Types].xml", - "_rels/.rels", - "docProps/app.xml", - "docProps/core.xml", - "word/document.xml", - "word/fontTable.xml", - "word/footnotes.xml", - "word/numbering.xml", - "word/settings.xml", - "word/webSettings.xml", - "word/styles.xml", - "word/_rels/document.xml.rels", - "word/_rels/footnotes.xml.rels", - "word/theme/theme1.xml"] - let toLazy = fromChunks . (:[]) - let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> - getCurrentTime - contents <- toLazy <$> readDataFile datadir - ("docx/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.docx") - if exists - then return (Just (d </> "reference.docx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - -getDefaultReferenceODT :: Maybe FilePath -> IO Archive -getDefaultReferenceODT datadir = do - let paths = ["mimetype", - "manifest.rdf", - "styles.xml", - "content.xml", - "meta.xml", - "settings.xml", - "Configurations2/accelerator/current.xml", - "Thumbnails/thumbnail.png", - "META-INF/manifest.xml"] - let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime - contents <- (fromChunks . (:[])) `fmap` - readDataFile datadir ("odt/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then return (Just (d </> "reference.odt")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - - -readDefaultDataFile :: FilePath -> IO BS.ByteString -readDefaultDataFile "reference.docx" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing -readDefaultDataFile "reference.odt" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing -readDefaultDataFile fname = -#ifdef EMBED_DATA_FILES - case lookup (makeCanonical fname) dataFiles of - Nothing -> err 97 $ "Could not find data file " ++ fname - Just contents -> return contents - where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - transformPathParts = reverse . foldl go [] - go as "." = as - go (_:as) ".." = as - go as x = x : as -#else - getDataFileName fname' >>= checkExistence >>= BS.readFile - where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname - -checkExistence :: FilePath -> IO FilePath -checkExistence fn = do - exists <- doesFileExist fn - if exists - then return fn - else err 97 ("Could not find data file " ++ fn) -#endif - --- | Read file from specified user data directory or, if not found there, from --- Cabal data directory. -readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString -readDataFile Nothing fname = readDefaultDataFile fname -readDataFile (Just userDir) fname = do - exists <- doesFileExist (userDir </> fname) - if exists - then BS.readFile (userDir </> fname) - else readDefaultDataFile fname - --- | Same as 'readDataFile' but returns a String instead of a ByteString. -readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String -readDataFileUTF8 userDir fname = - UTF8.toString `fmap` readDataFile userDir fname - --- | Specialized version of parseURIReference that disallows --- single-letter schemes. Reason: these are usually windows absolute --- paths. -parseURIReference' :: String -> Maybe URI -parseURIReference' s = - case parseURIReference s of - Just u - | length (uriScheme u) > 2 -> Just u - | null (uriScheme u) -> Just u -- protocol-relative - _ -> Nothing - --- | Fetch an image or other item from the local filesystem or the net. --- Returns raw content and maybe mime type. -fetchItem :: Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -fetchItem sourceURL s = - case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of - (Just u, s') -> -- try fetching from relative path at source - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u - Nothing -> openURL s' -- will throw error - (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon - Nothing -> openURL s' -- will throw error - (Nothing, s') -> - case parseURI s' of -- requires absolute URI - -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (show u') - Just u' | uriScheme u' == "file:" -> - E.try $ readLocalFile $ dropWhile (=='/') (uriPath u') - _ -> E.try $ readLocalFile fp -- get from local file system - where readLocalFile f = do - cont <- BS.readFile f - return (cont, mime) - httpcolon = URI{ uriScheme = "http:", - uriAuthority = Nothing, - uriPath = "", - uriQuery = "", - uriFragment = "" } - dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ dropFragmentAndQuery s - mime = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" - x -> getMimeType x - ensureEscaped = escapeURIString isAllowedInURI . map convertSlash - convertSlash '\\' = '/' - convertSlash x = x - --- | Like 'fetchItem', but also looks for items in a 'MediaBag'. -fetchItem' :: MediaBag -> Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -fetchItem' media sourceURL s = do - case lookupMedia s media of - Nothing -> fetchItem sourceURL s - Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) - --- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -openURL u - | Just u'' <- stripPrefix "data:" u = - let mime = takeWhile (/=',') u'' - contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return $ Right (decodeLenient contents, Just mime) -#ifdef HTTP_CLIENT - | otherwise = withSocketsDo $ E.try $ do - let parseReq = parseRequest - (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" - req <- parseReq u - req' <- case proxy of - Left _ -> return req - Right pr -> (parseReq pr >>= \r -> - return $ addProxy (host r) (port r) req) - `mplus` return req - resp <- newManager tlsManagerSettings >>= httpLbs req' - return (BS.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) -#else - | otherwise = E.try $ getBodyAndMimeType `fmap` browse - (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." - setOutHandler $ const (return ()) - setAllowRedirects True - request (getRequest' u')) - where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) - getRequest' uriString = case parseURI uriString of - Nothing -> error ("Not a valid URL: " ++ - uriString) - Just v -> mkRequest GET v - u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI -#endif - -- -- Error reporting -- -err :: Int -> String -> IO a -err exitCode msg = do - name <- getProgName - UTF8.hPutStrLn stderr $ name ++ ": " ++ msg - exitWith $ ExitFailure exitCode - return undefined - -warn :: MonadIO m => String -> m () -warn msg = liftIO $ do - name <- getProgName - UTF8.hPutStrLn stderr $ "[" ++ name ++ " warning] " ++ msg - mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left x) = Left (f x) +mapLeft f (Left x) = Left (f x) mapLeft _ (Right x) = Right x -hush :: Either a b -> Maybe b -hush (Left _) = Nothing -hush (Right x) = Just x - -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" @@ -1034,14 +629,14 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories where go rs "." = rs go r@(p:rs) ".." = case p of - ".." -> ("..":r) - (checkPathSeperator -> Just True) -> ("..":r) - _ -> rs + ".." -> "..":r + (checkPathSeperator -> Just True) -> "..":r + _ -> rs go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]] go rs x = x:rs - isSingleton [] = Nothing + isSingleton [] = Nothing isSingleton [x] = Just x - isSingleton _ = Nothing + isSingleton _ = Nothing checkPathSeperator = fmap isPathSeparator . isSingleton -- @@ -1054,41 +649,109 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) + +-- +-- IANA URIs +-- + +-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus +-- the unofficial schemes doi, javascript, isbn, pmid. +schemes :: Set.Set String +schemes = Set.fromList + -- Official IANA schemes + [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" + , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" + , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" + , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" + , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" + , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" + , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" + , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" + , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" + , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" + , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" + , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" + , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" + , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" + , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" + , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" + , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" + , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" + , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" + , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" + , "ms-settings-cloudstorage", "ms-settings-connectabledevices" + , "ms-settings-displays-topology", "ms-settings-emailandaccounts" + , "ms-settings-language", "ms-settings-location", "ms-settings-lock" + , "ms-settings-nfctransactions", "ms-settings-notifications" + , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" + , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" + , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" + , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" + , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" + , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" + , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" + , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" + , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" + , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" + , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" + , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" + , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" + , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" + , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" + , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" + , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" + , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" + , "z39.50s" + -- Inofficial schemes + , "doi", "isbn", "javascript", "pmid" + ] + +-- | Check if the string is a valid URL with a IANA or frequently used but +-- unofficial scheme (see @schemes@). +isURI :: String -> Bool +isURI = maybe False hasKnownScheme . parseURI + where + hasKnownScheme = (`Set.member` schemes) . map toLower . + filter (/= ':') . uriScheme + --- --- Squash blocks into inlines --- -blockToInlines :: Block -> [Inline] -blockToInlines (Plain ils) = ils -blockToInlines (Para ils) = ils -blockToInlines (LineBlock lns) = combineLines lns -blockToInlines (CodeBlock attr str) = [Code attr str] -blockToInlines (RawBlock fmt str) = [RawInline fmt str] -blockToInlines (BlockQuote blks) = blocksToInlines blks +blockToInlines :: Block -> Inlines +blockToInlines (Plain ils) = B.fromList ils +blockToInlines (Para ils) = B.fromList ils +blockToInlines (LineBlock lns) = B.fromList $ combineLines lns +blockToInlines (CodeBlock attr str) = B.codeWith attr str +blockToInlines (RawBlock (Format fmt) str) = B.rawInline fmt str +blockToInlines (BlockQuote blks) = blocksToInlines' blks blockToInlines (OrderedList _ blkslst) = - concatMap blocksToInlines blkslst + mconcat $ map blocksToInlines' blkslst blockToInlines (BulletList blkslst) = - concatMap blocksToInlines blkslst + mconcat $ map blocksToInlines' blkslst blockToInlines (DefinitionList pairslst) = - concatMap f pairslst + mconcat $ map f pairslst where - f (ils, blkslst) = ils ++ - [Str ":", Space] ++ - (concatMap blocksToInlines blkslst) -blockToInlines (Header _ _ ils) = ils -blockToInlines (HorizontalRule) = [] + f (ils, blkslst) = B.fromList ils <> B.str ":" <> B.space <> + mconcat (map blocksToInlines' blkslst) +blockToInlines (Header _ _ ils) = B.fromList ils +blockToInlines HorizontalRule = mempty blockToInlines (Table _ _ _ headers rows) = - intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl - where - tbl = headers : rows -blockToInlines (Div _ blks) = blocksToInlines blks -blockToInlines Null = [] + mconcat $ intersperse B.linebreak $ + map (mconcat . map blocksToInlines') (headers:rows) +blockToInlines (Div _ blks) = blocksToInlines' blks +blockToInlines Null = mempty + +blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines +blocksToInlinesWithSep sep = + mconcat . intersperse sep . map blockToInlines -blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline] -blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks +blocksToInlines' :: [Block] -> Inlines +blocksToInlines' = blocksToInlinesWithSep parSep + where parSep = B.space <> B.str "¶" <> B.space blocksToInlines :: [Block] -> [Inline] -blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space] +blocksToInlines = B.toList . blocksToInlines' -- diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index e19dba3e2..9d63555c2 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -40,9 +40,9 @@ getSlideLevel = go 6 | otherwise = go least (x:xs) go least (_ : xs) = go least xs go least [] = least - nonHOrHR (Header{}) = False - nonHOrHR (HorizontalRule) = False - nonHOrHR _ = True + nonHOrHR Header{} = False + nonHOrHR HorizontalRule = False + nonHOrHR _ = True -- | Prepare a block list to be passed to hierarchicalize. prepSlides :: Int -> [Block] -> [Block] diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index d15d27438..4be0d081c 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - OverloadedStrings, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleInstances #-} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeSynonymInstances #-} {- -Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2009-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2016 John MacFarlane + Copyright : Copyright (C) 2009-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,44 +33,52 @@ A simple templating system with variable substitution and conditionals. -} -module Text.Pandoc.Templates ( renderTemplate +module Text.Pandoc.Templates ( module Text.DocTemplates , renderTemplate' - , TemplateTarget - , varListToJSON - , compileTemplate - , Template - , getDefaultTemplate ) where + , getDefaultTemplate + ) where -import Text.DocTemplates (Template, TemplateTarget, compileTemplate, - renderTemplate, applyTemplate, - varListToJSON) -import Data.Aeson (ToJSON(..)) +import Control.Monad.Except (throwError) +import Data.Aeson (ToJSON (..)) import qualified Data.Text as T -import System.FilePath ((</>), (<.>)) -import qualified Control.Exception.Extensible as E (try, IOException) -import Text.Pandoc.Shared (readDataFileUTF8) +import System.FilePath ((<.>), (</>)) +import Text.DocTemplates (Template, TemplateTarget, applyTemplate, + compileTemplate, renderTemplate, varListToJSON) +import Text.Pandoc.Class (PandocMonad, readDataFile) +import Text.Pandoc.Error +import qualified Text.Pandoc.UTF8 as UTF8 -- | Get default template for the specified writer. -getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first - -> String -- ^ Name of writer - -> IO (Either E.IOException String) -getDefaultTemplate user writer = do +getDefaultTemplate :: PandocMonad m + => String -- ^ Name of writer + -> m String +getDefaultTemplate writer = do let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of - "native" -> return $ Right "" - "json" -> return $ Right "" - "docx" -> return $ Right "" - "fb2" -> return $ Right "" - "odt" -> getDefaultTemplate user "opendocument" - "markdown_strict" -> getDefaultTemplate user "markdown" - "multimarkdown" -> getDefaultTemplate user "markdown" - "markdown_github" -> getDefaultTemplate user "markdown" - "markdown_mmd" -> getDefaultTemplate user "markdown" - "markdown_phpextra" -> getDefaultTemplate user "markdown" + "native" -> return "" + "json" -> return "" + "docx" -> return "" + "fb2" -> return "" + "pptx" -> return "" + "odt" -> getDefaultTemplate "opendocument" + "html" -> getDefaultTemplate "html5" + "docbook" -> getDefaultTemplate "docbook5" + "epub" -> getDefaultTemplate "epub3" + "beamer" -> getDefaultTemplate "latex" + "markdown_strict" -> getDefaultTemplate "markdown" + "multimarkdown" -> getDefaultTemplate "markdown" + "markdown_github" -> getDefaultTemplate "markdown" + "markdown_mmd" -> getDefaultTemplate "markdown" + "markdown_phpextra" -> getDefaultTemplate "markdown" + "gfm" -> getDefaultTemplate "commonmark" _ -> let fname = "templates" </> "default" <.> format - in E.try $ readDataFileUTF8 user fname - --- | Like 'applyTemplate', but raising an error if compilation fails. -renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b -renderTemplate' template = either error id . applyTemplate (T.pack template) + in UTF8.toString <$> readDataFile fname +-- | Like 'applyTemplate', but runs in PandocMonad and +-- raises an error if compilation fails. +renderTemplate' :: (PandocMonad m, ToJSON a, TemplateTarget b) + => String -> a -> m b +renderTemplate' template context = + case applyTemplate (T.pack template) context of + Left e -> throwError (PandocTemplateError e) + Right r -> return r diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs new file mode 100644 index 000000000..949618178 --- /dev/null +++ b/src/Text/Pandoc/Translations.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- +Copyright (C) 2017-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Translations + Copyright : Copyright (C) 2017-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data types for localization. + +Translations are stored in @data/translations/langname.trans@, +where langname can be the full BCP47 language specifier, or +just the language part. File format is: + +> # A comment, ignored +> Figure: Figura +> Index: Indeksi + +-} +module Text.Pandoc.Translations ( + Term(..) + , Translations + , lookupTerm + , readTranslations + ) +where +import Data.Aeson.Types (typeMismatch) +import qualified Data.HashMap.Strict as HM +import qualified Data.Map as M +import Data.Text as T +import Data.Yaml as Yaml +import GHC.Generics (Generic) +import Text.Pandoc.Shared (safeRead) +import qualified Text.Pandoc.UTF8 as UTF8 + +data Term = + Preface + | References + | Abstract + | Bibliography + | Chapter + | Appendix + | Contents + | ListOfFigures + | ListOfTables + | Index + | Figure + | Table + | Part + | Page + | See + | SeeAlso + | Encl + | Cc + | To + | Proof + | Glossary + | Listing + deriving (Show, Eq, Ord, Generic, Enum, Read) + +newtype Translations = Translations (M.Map Term String) + deriving (Show, Generic, Monoid) + +instance FromJSON Term where + parseJSON (String t) = case safeRead (T.unpack t) of + Just t' -> pure t' + Nothing -> fail $ "Invalid Term name " ++ + show t + parseJSON invalid = typeMismatch "Term" invalid + +instance FromJSON Translations where + parseJSON (Object hm) = do + xs <- mapM addItem (HM.toList hm) + return $ Translations (M.fromList xs) + where addItem (k,v) = + case safeRead (T.unpack k) of + Nothing -> fail $ "Invalid Term name " ++ show k + Just t -> + case v of + (String s) -> return (t, T.unpack $ T.strip s) + inv -> typeMismatch "String" inv + parseJSON invalid = typeMismatch "Translations" invalid + +lookupTerm :: Term -> Translations -> Maybe String +lookupTerm t (Translations tm) = M.lookup t tm + +readTranslations :: String -> Either String Translations +readTranslations s = + case Yaml.decodeEither' $ UTF8.fromString s of + Left err' -> Left $ prettyPrintParseException err' + Right t -> Right t diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 62a662029..3f759958f 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,16 +29,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile - , writeFile , getContents + , writeFileWith + , writeFile + , putStrWith , putStr + , putStrLnWith , putStrLn + , hPutStrWith , hPutStr + , hPutStrLnWith , hPutStrLn , hGetContents , toString + , toText , fromString + , fromText , toStringLazy + , fromTextLazy + , toTextLazy , fromStringLazy , encodePath , decodeArg @@ -45,39 +55,59 @@ module Text.Pandoc.UTF8 ( readFile where -import System.IO hiding (readFile, writeFile, getContents, - putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) -import qualified System.IO as IO import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.Text.Encoding as T +import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Text as T +import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL +import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile) +import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr, + putStrLn, readFile, writeFile) +import qualified System.IO as IO readFile :: FilePath -> IO String readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h -writeFile :: FilePath -> String -> IO () -writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s - getContents :: IO String getContents = hGetContents stdin +writeFileWith :: Newline -> FilePath -> String -> IO () +writeFileWith eol f s = + withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s + +writeFile :: FilePath -> String -> IO () +writeFile = writeFileWith nativeNewline + +putStrWith :: Newline -> String -> IO () +putStrWith eol s = hPutStrWith eol stdout s + putStr :: String -> IO () -putStr s = hPutStr stdout s +putStr = putStrWith nativeNewline + +putStrLnWith :: Newline -> String -> IO () +putStrLnWith eol s = hPutStrLnWith eol stdout s putStrLn :: String -> IO () -putStrLn s = hPutStrLn stdout s +putStrLn = putStrLnWith nativeNewline + +hPutStrWith :: Newline -> Handle -> String -> IO () +hPutStrWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStr h s hPutStr :: Handle -> String -> IO () -hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s +hPutStr = hPutStrWith nativeNewline + +hPutStrLnWith :: Newline -> Handle -> String -> IO () +hPutStrLnWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStrLn h s hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s +hPutStrLn = hPutStrLnWith nativeNewline hGetContents :: Handle -> IO String hGetContents = fmap toString . B.hGetContents @@ -85,34 +115,47 @@ hGetContents = fmap toString . B.hGetContents -- >> hSetNewlineMode h universalNewlineMode -- >> IO.hGetContents h --- | Drop BOM (byte order marker) if present at beginning of string. --- Note that Data.Text converts the BOM to code point FEFF, zero-width --- no-break space, so if the string begins with this we strip it off. -dropBOM :: String -> String -dropBOM ('\xFEFF':xs) = xs -dropBOM xs = xs - -filterCRs :: String -> String -filterCRs ('\r':'\n':xs) = '\n': filterCRs xs -filterCRs ('\r':xs) = '\n' : filterCRs xs -filterCRs (x:xs) = x : filterCRs xs -filterCRs [] = [] +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toText :: B.ByteString -> T.Text +toText = T.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `B.isPrefixOf` bs + then B.drop 3 bs + else bs + filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String -toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 +toString = T.unpack . toText -fromString :: String -> B.ByteString -fromString = T.encodeUtf8 . T.pack +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toTextLazy :: BL.ByteString -> TL.Text +toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `BL.isPrefixOf` bs + then BL.drop 3 bs + else bs + filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 +toStringLazy = TL.unpack . toTextLazy + +fromText :: T.Text -> B.ByteString +fromText = T.encodeUtf8 + +fromTextLazy :: TL.Text -> BL.ByteString +fromTextLazy = TL.encodeUtf8 + +fromString :: String -> B.ByteString +fromString = fromText . T.pack fromStringLazy :: String -> BL.ByteString -fromStringLazy = TL.encodeUtf8 . TL.pack +fromStringLazy = fromTextLazy . TL.pack encodePath :: FilePath -> FilePath encodePath = id diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 5d05fa303..4d99324db 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,13 +29,12 @@ UUID generation using Version 4 (random method) described in RFC4122. See http://tools.ietf.org/html/rfc4122 -} -module Text.Pandoc.UUID ( UUID, getRandomUUID ) where +module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where -import Text.Printf ( printf ) -import System.Random ( randomIO ) +import Data.Bits (clearBit, setBit) import Data.Word -import Data.Bits ( setBit, clearBit ) -import Control.Monad ( liftM ) +import System.Random (RandomGen, getStdGen, randoms) +import Text.Printf (printf) data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 @@ -64,14 +63,15 @@ instance Show UUID where printf "%02x" o ++ printf "%02x" p -getRandomUUID :: IO UUID -getRandomUUID = do - let getRN :: a -> IO Word8 - getRN _ = liftM fromIntegral (randomIO :: IO Int) - [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int]) +getUUID :: RandomGen g => g -> UUID +getUUID gen = + let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8] -- set variant - let i' = i `setBit` 7 `clearBit` 6 + i' = i `setBit` 7 `clearBit` 6 -- set version (0100 for random) - let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 - return $ UUID a b c d e f g' h i' j k l m n o p + g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + in + UUID a b c d e f g' h i' j k l m n o p +getRandomUUID :: IO UUID +getRandomUUID = getUUID <$> getStdGen diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs new file mode 100644 index 000000000..596a8680e --- /dev/null +++ b/src/Text/Pandoc/Writers.hs @@ -0,0 +1,195 @@ +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} + +{- | + Module : Text.Pandoc + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This helper module exports all writers functions. +-} +module Text.Pandoc.Writers + ( + -- * Writers: converting /from/ Pandoc format + Writer(..) + , writers + , writeAsciiDoc + , writeBeamer + , writeCommonMark + , writeConTeXt + , writeCustom + , writeDZSlides + , writeDocbook4 + , writeDocbook5 + , writeDocx + , writeDokuWiki + , writeEPUB2 + , writeEPUB3 + , writeFB2 + , writeHaddock + , writeHtml4 + , writeHtml4String + , writeHtml5 + , writeHtml5String + , writeICML + , writeJATS + , writeJSON + , writeLaTeX + , writeMan + , writeMarkdown + , writeMediaWiki + , writeMs + , writeMuse + , writeNative + , writeODT + , writeOPML + , writeOpenDocument + , writeOrg + , writePlain + , writePowerpoint + , writeRST + , writeRTF + , writeRevealJs + , writeS5 + , writeSlideous + , writeSlidy + , writeTEI + , writeTexinfo + , writeTextile + , writeZimWiki + , getWriter + ) where + +import Data.Aeson +import qualified Data.ByteString.Lazy as BL +import Data.List (intercalate) +import Data.Text (Text) +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.Pandoc.Options +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Writers.AsciiDoc +import Text.Pandoc.Writers.CommonMark +import Text.Pandoc.Writers.ConTeXt +import Text.Pandoc.Writers.Custom +import Text.Pandoc.Writers.Docbook +import Text.Pandoc.Writers.Docx +import Text.Pandoc.Writers.DokuWiki +import Text.Pandoc.Writers.EPUB +import Text.Pandoc.Writers.FB2 +import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.ICML +import Text.Pandoc.Writers.JATS +import Text.Pandoc.Writers.LaTeX +import Text.Pandoc.Writers.Man +import Text.Pandoc.Writers.Markdown +import Text.Pandoc.Writers.MediaWiki +import Text.Pandoc.Writers.Ms +import Text.Pandoc.Writers.Muse +import Text.Pandoc.Writers.Native +import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.OpenDocument +import Text.Pandoc.Writers.OPML +import Text.Pandoc.Writers.Org +import Text.Pandoc.Writers.Powerpoint +import Text.Pandoc.Writers.RST +import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.TEI +import Text.Pandoc.Writers.Texinfo +import Text.Pandoc.Writers.Textile +import Text.Pandoc.Writers.ZimWiki +import Text.Parsec.Error + +data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text) + | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) + +-- | Association list of formats and writers. +writers :: PandocMonad m => [ ( String, Writer m) ] +writers = [ + ("native" , TextWriter writeNative) + ,("json" , TextWriter $ \o d -> return $ writeJSON o d) + ,("docx" , ByteStringWriter writeDocx) + ,("odt" , ByteStringWriter writeODT) + ,("pptx" , ByteStringWriter writePowerpoint) + ,("epub" , ByteStringWriter writeEPUB3) + ,("epub2" , ByteStringWriter writeEPUB2) + ,("epub3" , ByteStringWriter writeEPUB3) + ,("fb2" , TextWriter writeFB2) + ,("html" , TextWriter writeHtml5String) + ,("html4" , TextWriter writeHtml4String) + ,("html5" , TextWriter writeHtml5String) + ,("icml" , TextWriter writeICML) + ,("s5" , TextWriter writeS5) + ,("slidy" , TextWriter writeSlidy) + ,("slideous" , TextWriter writeSlideous) + ,("dzslides" , TextWriter writeDZSlides) + ,("revealjs" , TextWriter writeRevealJs) + ,("docbook" , TextWriter writeDocbook5) + ,("docbook4" , TextWriter writeDocbook4) + ,("docbook5" , TextWriter writeDocbook5) + ,("jats" , TextWriter writeJATS) + ,("opml" , TextWriter writeOPML) + ,("opendocument" , TextWriter writeOpenDocument) + ,("latex" , TextWriter writeLaTeX) + ,("beamer" , TextWriter writeBeamer) + ,("context" , TextWriter writeConTeXt) + ,("texinfo" , TextWriter writeTexinfo) + ,("man" , TextWriter writeMan) + ,("ms" , TextWriter writeMs) + ,("markdown" , TextWriter writeMarkdown) + ,("markdown_strict" , TextWriter writeMarkdown) + ,("markdown_phpextra" , TextWriter writeMarkdown) + ,("markdown_github" , TextWriter writeMarkdown) + ,("markdown_mmd" , TextWriter writeMarkdown) + ,("plain" , TextWriter writePlain) + ,("rst" , TextWriter writeRST) + ,("mediawiki" , TextWriter writeMediaWiki) + ,("dokuwiki" , TextWriter writeDokuWiki) + ,("zimwiki" , TextWriter writeZimWiki) + ,("textile" , TextWriter writeTextile) + ,("rtf" , TextWriter writeRTF) + ,("org" , TextWriter writeOrg) + ,("asciidoc" , TextWriter writeAsciiDoc) + ,("haddock" , TextWriter writeHaddock) + ,("commonmark" , TextWriter writeCommonMark) + ,("gfm" , TextWriter writeCommonMark) + ,("tei" , TextWriter writeTEI) + ,("muse" , TextWriter writeMuse) + ] + +-- | Retrieve writer, extensions based on formatSpec (format+extensions). +getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions) +getWriter s + = case parseFormatSpec s of + Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] + Right (writerName, setExts) -> + case lookup writerName writers of + Nothing -> Left $ "Unknown writer: " ++ writerName + Just r -> Right (r, setExts $ + getDefaultExtensions writerName) + +writeJSON :: WriterOptions -> Pandoc -> Text +writeJSON _ = UTF8.toText . BL.toStrict . encode diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e9d3dccf1..f91fa8fa0 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,49 +37,59 @@ that it has omitted the construct. AsciiDoc: <http://www.methods.co.nz/asciidoc/> -} module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where +import Control.Monad.State.Strict +import Data.Aeson (Result (..), Value (String), fromJSON, toJSON) +import Data.Char (isPunctuation, isSpace) +import Data.List (intercalate, intersperse, stripPrefix) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isJust) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, space) -import Data.Maybe (fromMaybe) -import Data.List ( stripPrefix, intersperse, intercalate ) import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Control.Monad.State -import qualified Data.Map as M -import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) -import qualified Data.Text as T -import Data.Char (isSpace, isPunctuation) +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared -data WriterState = WriterState { defListMarker :: String +data WriterState = WriterState { defListMarker :: String , orderedListLevel :: Int , bulletListLevel :: Int , intraword :: Bool + , autoIds :: Set.Set String } -- | Convert Pandoc to AsciiDoc. -writeAsciiDoc :: WriterOptions -> Pandoc -> String +writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeAsciiDoc opts document = - evalState (pandocToAsciiDoc opts document) WriterState{ + evalStateT (pandocToAsciiDoc opts document) WriterState{ defListMarker = "::" , orderedListLevel = 1 , bulletListLevel = 1 , intraword = False + , autoIds = Set.empty } +type ADW = StateT WriterState + -- | Return asciidoc representation of document. -pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String +pandocToAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> ADW m Text pandocToAsciiDoc opts (Pandoc meta blocks) = do let titleblock = not $ null (docTitle meta) && null (docAuthors meta) && null (docDate meta) let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToAsciiDoc opts) - (fmap (render colwidth) . inlineListToAsciiDoc opts) + (fmap render' . blockListToAsciiDoc opts) + (fmap render' . inlineListToAsciiDoc opts) meta let addTitleLine (String t) = String $ t <> "\n" <> T.replicate (T.length t) "=" @@ -93,12 +103,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do let context = defField "body" main $ defField "toc" (writerTableOfContents opts && - writerTemplate opts /= Nothing) - $ defField "titleblock" titleblock - $ metadata' + isJust (writerTemplate opts)) + $defField "titleblock" titleblock metadata' case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for AsciiDoc. escapeString :: String -> String @@ -118,18 +127,19 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker beginsWithOrderedListMarker :: String -> Bool beginsWithOrderedListMarker str = case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True + Left _ -> False + Right _ -> True -- | Convert Pandoc block element to asciidoc. -blockToAsciiDoc :: WriterOptions -- ^ Options +blockToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> ADW m Doc blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do +blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines @@ -145,9 +155,11 @@ blockToAsciiDoc opts (LineBlock lns) = do let joinWithLinefeeds = nowrap . mconcat . intersperse cr contents <- joinWithLinefeeds <$> mapM docify lns return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline -blockToAsciiDoc _ (RawBlock f s) +blockToAsciiDoc _ b@(RawBlock f s) | f == "asciidoc" = return $ text s - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToAsciiDoc _ HorizontalRule = return $ blankline <> text "'''''" <> blankline blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do @@ -155,21 +167,25 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do let len = offset contents -- ident seem to be empty most of the time and asciidoc will generate them automatically -- so lets make them not show up when null - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + ids <- gets autoIds + let autoId = uniqueIdent inlines ids + modify $ \st -> st{ autoIds = Set.insert autoId ids } + let identifier = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + then empty else "[[" <> text ident <> "]]" let setext = writerSetextHeaders opts - return $ + return (if setext then identifier $$ contents $$ (case level of - 1 -> text $ replicate len '-' - 2 -> text $ replicate len '~' - 3 -> text $ replicate len '^' - 4 -> text $ replicate len '+' - _ -> empty) <> blankline + 1 -> text $ replicate len '-' + 2 -> text $ replicate len '~' + 3 -> text $ replicate len '^' + 4 -> text $ replicate len '+' + _ -> empty) <> blankline else identifier $$ text (replicate level '=') <> space <> contents <> blankline) -blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $ +blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush ( if null classes then "...." $$ text str $$ "...." else attrs $$ "----" $$ text str $$ "----") @@ -194,7 +210,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do let isSimple = all (== 0) widths let relativePercentWidths = if isSimple then widths - else map (/ (sum widths)) widths + else map (/ sum widths) widths let widths'' :: [Integer] widths'' = map (floor . (* 100)) relativePercentWidths -- ensure that the widths sum to 100 @@ -210,7 +226,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do AlignCenter -> "^" AlignRight -> ">" AlignDefault -> "") ++ - if wi == 0 then "" else (show wi ++ "%") + if wi == 0 then "" else show wi ++ "%" let headerspec = if all null headers then empty else text "options=\"header\"," @@ -256,21 +272,21 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $ - zip markers' items + contents <- zipWithM (orderedListItemToAsciiDoc opts) markers' items return $ cat contents <> blankline blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline blockToAsciiDoc opts (Div (ident,_,_) bs) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else ("[[" <> text ident <> "]]") contents <- blockListToAsciiDoc opts bs return $ identifier $$ contents -- | Convert bullet list item (list of blocks) to asciidoc. -bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToAsciiDoc :: PandocMonad m + => WriterOptions -> [Block] -> ADW m Doc bulletListItemToAsciiDoc opts blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc + let addBlock :: PandocMonad m => Doc -> Block -> ADW m Doc addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x @@ -278,7 +294,7 @@ bulletListItemToAsciiDoc opts blocks = do return $ d <> cr <> chomp x addBlock d b = do x <- blockToAsciiDoc opts b return $ d <> cr <> text "+" <> cr <> chomp x - lev <- bulletListLevel `fmap` get + lev <- gets bulletListLevel modify $ \s -> s{ bulletListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } @@ -286,37 +302,38 @@ bulletListItemToAsciiDoc opts blocks = do return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. -orderedListItemToAsciiDoc :: WriterOptions -- ^ options +orderedListItemToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ options -> String -- ^ list item marker -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> ADW m Doc orderedListItemToAsciiDoc opts marker blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc - addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b + let addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b return $ d <> cr <> chomp x addBlock d b = do x <- blockToAsciiDoc opts b return $ d <> cr <> text "+" <> cr <> chomp x - lev <- orderedListLevel `fmap` get + lev <- gets orderedListLevel modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. -definitionListItemToAsciiDoc :: WriterOptions +definitionListItemToAsciiDoc :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> State WriterState Doc + -> ADW m Doc definitionListItemToAsciiDoc opts (label, defs) = do labelText <- inlineListToAsciiDoc opts label - marker <- defListMarker `fmap` get + marker <- gets defListMarker if marker == "::" then modify (\st -> st{ defListMarker = ";;"}) else modify (\st -> st{ defListMarker = "::"}) let divider = cr <> text "+" <> cr - let defsToAsciiDoc :: [Block] -> State WriterState Doc + let defsToAsciiDoc :: PandocMonad m => [Block] -> ADW m Doc defsToAsciiDoc ds = (vcat . intersperse divider . map chomp) `fmap` mapM (blockToAsciiDoc opts) ds defs' <- mapM defsToAsciiDoc defs @@ -325,15 +342,16 @@ definitionListItemToAsciiDoc opts (label, defs) = do return $ labelText <> text marker <> cr <> contents <> cr -- | Convert list of Pandoc block elements to asciidoc. -blockListToAsciiDoc :: WriterOptions -- ^ Options +blockListToAsciiDoc :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> ADW m Doc blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks data SpacyLocation = End | Start -- | Convert list of Pandoc inline elements to asciidoc. -inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToAsciiDoc :: PandocMonad m => WriterOptions -> [Inline] -> ADW m Doc inlineListToAsciiDoc opts lst = do oldIntraword <- gets intraword setIntraword False @@ -369,14 +387,14 @@ inlineListToAsciiDoc opts lst = do isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c isSpacy _ _ = False -setIntraword :: Bool -> State WriterState () +setIntraword :: PandocMonad m => Bool -> ADW m () setIntraword b = modify $ \st -> st{ intraword = b } -withIntraword :: State WriterState a -> State WriterState a +withIntraword :: PandocMonad m => ADW m a -> ADW m a withIntraword p = setIntraword True *> p <* setIntraword False -- | Convert Pandoc inline element to asciidoc. -inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc +inlineToAsciiDoc :: PandocMonad m => WriterOptions -> Inline -> ADW m Doc inlineToAsciiDoc opts (Emph lst) = do contents <- inlineListToAsciiDoc opts lst isIntraword <- gets intraword @@ -408,16 +426,19 @@ inlineToAsciiDoc _ (Math InlineMath str) = return $ "latexmath:[$" <> text str <> "$]" inlineToAsciiDoc _ (Math DisplayMath str) = return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline f s) +inlineToAsciiDoc _ il@(RawInline f s) | f == "asciidoc" = return $ text s + | otherwise = do + report $ InlineNotRendered il + return empty | otherwise = return empty -inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr +inlineToAsciiDoc _ LineBreak = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts SoftBreak = case writerWrapText opts of - WrapAuto -> return space + WrapAuto -> return space WrapPreserve -> return cr - WrapNone -> return space + WrapNone -> return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- relative: link:downloads/foo.zip[download foo.zip] @@ -431,20 +452,20 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True - _ -> False + _ -> False return $ if useAuto then text srcSuffix else prefix <> text src <> "[" <> linktext <> "]" inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] - let txt = if (null alternate) || (alternate == [Str ""]) + let txt = if null alternate || (alternate == [Str ""]) then [Str "image"] else alternate linktext <- inlineListToAsciiDoc opts txt let linktitle = if null tit then empty else ",title=\"" <> text tit <> "\"" - showDim dir = case (dimension dir attr) of + showDim dir = case dimension dir attr of Just (Percent a) -> ["scaledwidth=" <> text (show (Percent a))] Just dim -> @@ -464,6 +485,6 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" inlineToAsciiDoc opts (Span (ident,_,_) ils) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if null ident then empty else ("[[" <> text ident <> "]]") contents <- inlineListToAsciiDoc opts ils return $ identifier <> contents diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 88a92eb47..7a6eb2948 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2015-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.CommonMark - Copyright : Copyright (C) 2015 John MacFarlane + Copyright : Copyright (C) 2015-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,34 +32,43 @@ CommonMark: <http://commonmark.org> -} module Text.Pandoc.Writers.CommonMark (writeCommonMark) where -import Text.Pandoc.Writers.HTML (writeHtmlString) +import CMarkGFM +import Control.Monad.State.Strict (State, get, modify, runState) +import Data.Foldable (foldrM) +import Data.List (transpose) +import Data.Monoid (Any (..), (<>)) +import Data.Text (Text) +import qualified Data.Text as T +import Network.HTTP (urlEncode) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Text.Pandoc.Shared (isTightList, linesToPara) +import Text.Pandoc.Options +import Text.Pandoc.Shared (isTightList, linesToPara, substitute) import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import CMark -import qualified Data.Text as T -import Control.Monad.Identity (runIdentity, Identity) -import Control.Monad.State (runState, State, modify, get) -import Text.Pandoc.Walk (walkM) -- | Convert Pandoc to CommonMark. -writeCommonMark :: WriterOptions -> Pandoc -> String -writeCommonMark opts (Pandoc meta blocks) = rendered - where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') - (blocks', notes) = runState (walkM processNotes blocks) [] - notes' = if null notes - then [] - else [OrderedList (1, Decimal, Period) $ reverse notes] - metadata = runIdentity $ metaToJSON opts - (blocksToCommonMark opts) - (inlinesToCommonMark opts) - meta - context = defField "body" main $ metadata - rendered = case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context +writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeCommonMark opts (Pandoc meta blocks) = do + let (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + main <- blocksToCommonMark opts (blocks' ++ notes') + metadata <- metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +softBreakToSpace :: Inline -> Inline +softBreakToSpace SoftBreak = Space +softBreakToSpace x = x processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do @@ -70,111 +80,235 @@ processNotes x = return x node :: NodeType -> [Node] -> Node node = Node Nothing -blocksToCommonMark :: WriterOptions -> [Block] -> Identity String -blocksToCommonMark opts bs = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node DOCUMENT (blocksToNodes bs) - where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing +blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m Text +blocksToCommonMark opts bs = do + let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + nodes <- blocksToNodes opts bs + return $ T.stripEnd $ + nodeToCommonmark cmarkOpts colwidth $ + node DOCUMENT nodes -inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m Text inlinesToCommonMark opts ils = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node PARAGRAPH (inlinesToNodes ils) + nodeToCommonmark cmarkOpts colwidth $ + node PARAGRAPH (inlinesToNodes opts ils) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing -blocksToNodes :: [Block] -> [Node] -blocksToNodes = foldr blockToNodes [] - -blockToNodes :: Block -> [Node] -> [Node] -blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) -blockToNodes (LineBlock lns) = blockToNodes $ linesToPara lns -blockToNodes (CodeBlock (_,classes,_) xs) = - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) -blockToNodes (RawBlock fmt xs) - | fmt == Format "html" = (node (HTML_BLOCK (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] :) -blockToNodes (BlockQuote bs) = - (node BLOCK_QUOTE (blocksToNodes bs) :) -blockToNodes (BulletList items) = - (node (LIST ListAttributes{ - listType = BULLET_LIST, - listDelim = PERIOD_DELIM, - listTight = isTightList items, - listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes (OrderedList (start, _sty, delim) items) = - (node (LIST ListAttributes{ - listType = ORDERED_LIST, - listDelim = case delim of - OneParen -> PAREN_DELIM - TwoParens -> PAREN_DELIM - _ -> PERIOD_DELIM, - listTight = isTightList items, - listStart = start }) (map (node ITEM . blocksToNodes) items) :) -blockToNodes HorizontalRule = (node THEMATIC_BREAK [] :) -blockToNodes (Header lev _ ils) = (node (HEADING lev) (inlinesToNodes ils) :) -blockToNodes (Div _ bs) = (blocksToNodes bs ++) -blockToNodes (DefinitionList items) = blockToNodes (BulletList items') +blocksToNodes :: PandocMonad m => WriterOptions -> [Block] -> m [Node] +blocksToNodes opts = foldrM (blockToNodes opts) [] + +blockToNodes :: PandocMonad m => WriterOptions -> Block -> [Node] -> m [Node] +blockToNodes opts (Plain xs) ns = + return (node PARAGRAPH (inlinesToNodes opts xs) : ns) +blockToNodes opts (Para xs) ns = + return (node PARAGRAPH (inlinesToNodes opts xs) : ns) +blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns +blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) +blockToNodes opts (RawBlock fmt xs) ns + | fmt == Format "html" && isEnabled Ext_raw_html opts + = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + | fmt == Format "latex" || fmt == Format "tex" && isEnabled Ext_raw_tex opts + = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + | otherwise = return ns +blockToNodes opts (BlockQuote bs) ns = do + nodes <- blocksToNodes opts bs + return (node BLOCK_QUOTE nodes : ns) +blockToNodes opts (BulletList items) ns = do + nodes <- mapM (blocksToNodes opts) items + return (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM) nodes) : ns) +blockToNodes opts (OrderedList (start, _sty, delim) items) ns = do + nodes <- mapM (blocksToNodes opts) items + return (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM) nodes) : ns) +blockToNodes _ HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) +blockToNodes opts (Header lev _ ils) ns = + return (node (HEADING lev) (inlinesToNodes opts ils) : ns) +blockToNodes opts (Div attr bs) ns = do + nodes <- blocksToNodes opts bs + let op = tagWithAttributes opts True False "div" attr + if isEnabled Ext_raw_html opts + then return (node (HTML_BLOCK op) [] : nodes ++ + [node (HTML_BLOCK (T.pack "</div>")) []] ++ ns) + else return (nodes ++ ns) +blockToNodes opts (DefinitionList items) ns = + blockToNodes opts (BulletList items') ns where items' = map dlToBullet items - dlToBullet (term, ((Para xs : ys) : zs)) = + dlToBullet (term, (Para xs : ys) : zs) = Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, ((Plain xs : ys) : zs)) = + dlToBullet (term, (Plain xs : ys) : zs) = Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs dlToBullet (term, xs) = Para term : concat xs -blockToNodes t@(Table _ _ _ _ _) = - (node (HTML_BLOCK (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) -blockToNodes Null = id - -inlinesToNodes :: [Inline] -> [Node] -inlinesToNodes = foldr inlineToNodes [] - -inlineToNodes :: Inline -> [Node] -> [Node] -inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) -inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) -inlineToNodes LineBreak = (node LINEBREAK [] :) -inlineToNodes SoftBreak = (node SOFTBREAK [] :) -inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) -inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) -inlineToNodes (Strikeout xs) = - ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) -inlineToNodes (Superscript xs) = - ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++ +blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do + let allcells = concat (headers:rows) + let isLineBreak LineBreak = Any True + isLineBreak _ = Any False + let isPlainOrPara [Para _] = True + isPlainOrPara [Plain _] = True + isPlainOrPara [] = True + isPlainOrPara _ = False + let isSimple = all isPlainOrPara allcells && + not ( getAny (query isLineBreak allcells) ) + if isEnabled Ext_pipe_tables opts && isSimple + then do + -- We construct a table manually as a CUSTOM_BLOCK, for + -- two reasons: (1) cmark-gfm currently doesn't support + -- rendering TABLE nodes; (2) we can align the column sides; + -- (3) we can render the caption as a regular paragraph. + let capt' = node PARAGRAPH (inlinesToNodes opts capt) + -- backslash | in code and raw: + let fixPipe (Code attr xs) = + Code attr (substitute "|" "\\|" xs) + fixPipe (RawInline format xs) = + RawInline format (substitute "|" "\\|" xs) + fixPipe x = x + let toCell [Plain ils] = T.strip + $ nodeToCommonmark [] Nothing + $ node (CUSTOM_INLINE mempty mempty) + $ inlinesToNodes opts + $ walk (fixPipe . softBreakToSpace) ils + toCell [Para ils] = T.strip + $ nodeToCommonmark [] Nothing + $ node (CUSTOM_INLINE mempty mempty) + $ inlinesToNodes opts + $ walk (fixPipe . softBreakToSpace) ils + toCell [] = "" + toCell xs = error $ "toCell encountered " ++ show xs + let separator = " | " + let starter = "| " + let ender = " |" + let rawheaders = map toCell headers + let rawrows = map (map toCell) rows + let maximum' [] = 0 + maximum' xs = maximum xs + let colwidths = map (maximum' . map T.length) $ + transpose (rawheaders:rawrows) + let toHeaderLine len AlignDefault = T.replicate len "-" + toHeaderLine len AlignLeft = ":" <> + T.replicate (max (len - 1) 1) "-" + toHeaderLine len AlignRight = + T.replicate (max (len - 1) 1) "-" <> ":" + toHeaderLine len AlignCenter = ":" <> + T.replicate (max (len - 2) 1) (T.pack "-") <> ":" + let rawheaderlines = zipWith toHeaderLine colwidths aligns + let headerlines = starter <> T.intercalate separator rawheaderlines <> + ender + let padContent (align, w) t' = + let padding = w - T.length t' + halfpadding = padding `div` 2 + in case align of + AlignRight -> T.replicate padding " " <> t' + AlignCenter -> T.replicate halfpadding " " <> t' <> + T.replicate (padding - halfpadding) " " + _ -> t' <> T.replicate padding " " + let toRow xs = starter <> T.intercalate separator + (zipWith padContent (zip aligns colwidths) xs) <> + ender + let table' = toRow rawheaders <> "\n" <> headerlines <> "\n" <> + T.intercalate "\n" (map toRow rawrows) + return (node (CUSTOM_BLOCK table' mempty) [] : + if null capt + then ns + else capt' : ns) + else do -- fall back to raw HTML + s <- writeHtml5String def $! Pandoc nullMeta [t] + return (node (HTML_BLOCK s) [] : ns) +blockToNodes _ Null ns = return ns + +inlinesToNodes :: WriterOptions -> [Inline] -> [Node] +inlinesToNodes opts = foldr (inlineToNodes opts) [] + +inlineToNodes :: WriterOptions -> Inline -> [Node] -> [Node] +inlineToNodes opts (Str s) = (node (TEXT (T.pack s')) [] :) + where s' = if isEnabled Ext_smart opts + then unsmartify opts s + else s +inlineToNodes _ Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes _ LineBreak = (node LINEBREAK [] :) +inlineToNodes opts SoftBreak + | isEnabled Ext_hard_line_breaks opts = (node LINEBREAK [] :) + | writerWrapText opts == WrapNone = (node (TEXT " ") [] :) + | otherwise = (node SOFTBREAK [] :) +inlineToNodes opts (Emph xs) = (node EMPH (inlinesToNodes opts xs) :) +inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :) +inlineToNodes opts (Strikeout xs) = + if isEnabled Ext_strikeout opts + then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :) + else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++ + [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) +inlineToNodes opts (Superscript xs) = + ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) -inlineToNodes (Subscript xs) = - ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++ +inlineToNodes opts (Subscript xs) = + ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) -inlineToNodes (SmallCaps xs) = - ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) [] - : inlinesToNodes xs ++ +inlineToNodes opts (SmallCaps xs) = + ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) [] + : inlinesToNodes opts xs ++ [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) -inlineToNodes (Link _ ils (url,tit)) = - (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (Image _ ils (url,tit)) = - (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (RawInline fmt xs) - | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :) -inlineToNodes (Quoted qt ils) = - ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) +inlineToNodes opts (Link _ ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) +-- title beginning with fig: indicates implicit figure +inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) = + inlineToNodes opts (Image alt ils (url,tit)) +inlineToNodes opts (Image _ ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) +inlineToNodes opts (RawInline fmt xs) + | fmt == Format "html" && isEnabled Ext_raw_html opts + = (node (HTML_INLINE (T.pack xs)) [] :) + | (fmt == Format "latex" || fmt == Format "tex") && isEnabled Ext_raw_tex opts + = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + | otherwise = id +inlineToNodes opts (Quoted qt ils) = + ((node (TEXT start) [] : + inlinesToNodes opts ils ++ [node (TEXT end) []]) ++) where (start, end) = case qt of - SingleQuote -> (T.pack "‘", T.pack "’") - DoubleQuote -> (T.pack "“", T.pack "”") -inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) -inlineToNodes (Math mt str) = - case mt of - InlineMath -> - (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) - DisplayMath -> - (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) -inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Note _) = id -- should not occur + SingleQuote + | isEnabled Ext_smart opts -> ("'","'") + | otherwise -> ("‘", "’") + DoubleQuote + | isEnabled Ext_smart opts -> ("\"", "\"") + | otherwise -> ("“", "”") +inlineToNodes _ (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes opts (Math mt str) = + case writerHTMLMathMethod opts of + WebTeX url -> + let core = inlineToNodes opts + (Image nullAttr [Str str] (url ++ urlEncode str, str)) + sep = if mt == DisplayMath + then (node LINEBREAK [] :) + else id + in (sep . core . sep) + _ -> + case mt of + InlineMath -> + (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes opts (Span attr ils) = + let nodes = inlinesToNodes opts ils + op = tagWithAttributes opts True False "span" attr + in if isEnabled Ext_raw_html opts + then ((node (HTML_INLINE op) [] : nodes ++ + [node (HTML_INLINE (T.pack "</span>")) []]) ++) + else (nodes ++) +inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) +inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index c663c75ce..f94c12d89 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,20 +30,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into ConTeXt. -} module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where +import Control.Monad.State.Strict +import Data.Char (ord, isDigit) +import Data.List (intercalate, intersperse) +import Data.Maybe (mapMaybe) +import Data.Text (Text) +import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 +import Text.Pandoc.Class (PandocMonad, report, toLang) import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Walk (query) -import Text.Printf ( printf ) -import Data.List ( intercalate, intersperse ) -import Data.Char ( ord ) -import Data.Maybe ( catMaybes ) -import Control.Monad.State import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Network.URI ( isURI, unEscapeString ) +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk (query) +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -50,37 +55,43 @@ data WriterState = , stOptions :: WriterOptions -- writer options } +data Tabl = Xtb | Ntb deriving (Show, Eq) + orderedListStyles :: [Char] orderedListStyles = cycle "narg" -- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String +writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeConTeXt options document = let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 , stOptions = options } - in evalState (pandocToConTeXt options document) defaultWriterState + in evalStateT (pandocToConTeXt options document) defaultWriterState -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String +type WM = StateT WriterState + +pandocToConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> WM m Text pandocToConTeXt options (Pandoc meta blocks) = do let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToConTeXt) - (fmap (render colwidth) . inlineListToConTeXt) + (fmap render' . blockListToConTeXt) + (fmap render' . inlineListToConTeXt) meta body <- mapM (elementToConTeXt options) $ hierarchicalize blocks - let main = (render colwidth . vcat) body - let layoutFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> + let main = (render' . vcat) body + let layoutFromMargins = intercalate [','] $ mapMaybe (\(x,y) -> ((x ++ "=") ++) <$> getField y metadata) [("leftmargin","margin-left") ,("rightmargin","margin-right") ,("top","margin-top") ,("bottom","margin-bottom") ] + mblang <- fromBCP47 (getLang options meta) let context = defField "toc" (writerTableOfContents options) $ defField "placelist" (intercalate ("," :: String) $ take (writerTOCDepth options + @@ -93,14 +104,17 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "body" main $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) - $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + $ maybe id (defField "context-lang") mblang + $ (case getField "papersize" metadata of + Just (('a':d:ds) :: String) + | all isDigit (d:ds) -> resetField "papersize" + (('A':d:ds) :: String) + _ -> id) metadata + let context' = defField "context-dir" (toContextDir + $ getField "dir" context) context + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' toContextDir :: Maybe String -> String toContextDir (Just "rtl") = "r2l" @@ -110,24 +124,24 @@ toContextDir _ = "" -- | escape things as needed for ConTeXt escapeCharForConTeXt :: WriterOptions -> Char -> String escapeCharForConTeXt opts ch = - let ligatures = writerTeXLigatures opts in + let ligatures = isEnabled Ext_smart opts in case ch of - '{' -> "\\{" - '}' -> "\\}" - '\\' -> "\\letterbackslash{}" - '$' -> "\\$" - '|' -> "\\letterbar{}" - '%' -> "\\letterpercent{}" - '~' -> "\\lettertilde{}" - '#' -> "\\#" - '[' -> "{[}" - ']' -> "{]}" - '\160' -> "~" + '{' -> "\\{" + '}' -> "\\}" + '\\' -> "\\letterbackslash{}" + '$' -> "\\$" + '|' -> "\\letterbar{}" + '%' -> "\\letterpercent{}" + '~' -> "\\lettertilde{}" + '#' -> "\\#" + '[' -> "{[}" + ']' -> "{]}" + '\160' -> "~" '\x2014' | ligatures -> "---" '\x2013' | ligatures -> "--" '\x2019' | ligatures -> "'" '\x2026' -> "\\ldots{}" - x -> [x] + x -> [x] -- | Escape string for ConTeXt stringToConTeXt :: WriterOptions -> String -> String @@ -137,20 +151,20 @@ stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) toLabel :: String -> String toLabel z = concatMap go z where go x - | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) + | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) | otherwise = [x] -- | Convert Elements to ConTeXt -elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc +elementToConTeXt :: PandocMonad m => WriterOptions -> Element -> WM m Doc elementToConTeXt _ (Blk block) = blockToConTeXt block elementToConTeXt opts (Sec level _ attr title' elements) = do header' <- sectionHeader attr level title' + footer' <- sectionFooter attr level innerContents <- mapM (elementToConTeXt opts) elements - return $ vcat (header' : innerContents) + return $ header' $$ vcat innerContents $$ footer' -- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block - -> State WriterState Doc +blockToConTeXt :: PandocMonad m => Block -> WM m Doc blockToConTeXt Null = return empty blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure @@ -175,9 +189,12 @@ blockToConTeXt (CodeBlock _ str) = return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline -blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt b@(RawBlock _ _ ) = do + report $ BlockNotRendered b + return empty blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" + mblang <- fromBCP47 (lookup "lang" kvs) let wrapRef txt = if null ident then txt else ("\\reference" <> brackets (text $ toLabel ident) <> @@ -186,12 +203,12 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do Just "rtl" -> align "righttoleft" Just "ltr" -> align "lefttoright" _ -> id - wrapLang txt = case lookup "lang" kvs of + wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" + <> text lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline - fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs + (wrapBlank . wrapLang . wrapDir . wrapRef) <$> blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -201,9 +218,9 @@ blockToConTeXt (BulletList lst) = do blockToConTeXt (OrderedList (start, style', delim) lst) = do st <- get let level = stOrderedListLevel st - put $ st {stOrderedListLevel = level + 1} + put st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} + put st {stOrderedListLevel = level} let start' = if start == 1 then "" else "start=" ++ show start let delim' = case delim of DefaultDelim -> "" @@ -238,39 +255,83 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline -- If this is ever executed, provide a default for the reference identifier. blockToConTeXt (Header level attr lst) = sectionHeader attr level lst blockToConTeXt (Table caption aligns widths heads rows) = do - let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' - AlignRight -> 'r' - AlignCenter -> 'c' - AlignDefault -> 'l'): - if colWidth == 0 - then "|" - else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") - let colDescriptors = "|" ++ (concat $ - zipWith colDescriptor widths aligns) - headers <- if all null heads - then return empty - else liftM ($$ "\\HL") $ tableRowToConTeXt heads + opts <- gets stOptions + let tabl = if isEnabled Ext_ntb opts + then Ntb + else Xtb captionText <- inlineListToConTeXt caption - rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> (if null caption - then brackets "none" - else empty) - <> braces captionText $$ - "\\starttable" <> brackets (text colDescriptors) $$ - "\\HL" $$ headers $$ - vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline - -tableRowToConTeXt :: [[Block]] -> State WriterState Doc -tableRowToConTeXt cols = do - cols' <- mapM blockListToConTeXt cols - return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR" - -listItemToConTeXt :: [Block] -> State WriterState Doc + headers <- if all null heads + then return empty + else tableRowToConTeXt tabl aligns widths heads + rows' <- mapM (tableRowToConTeXt tabl aligns widths) rows + body <- tableToConTeXt tabl headers rows' + return $ "\\startplacetable" <> brackets ( + if null caption + then "location=none" + else "title=" <> braces captionText + ) $$ body $$ "\\stopplacetable" <> blankline + +tableToConTeXt :: PandocMonad m => Tabl -> Doc -> [Doc] -> WM m Doc +tableToConTeXt Xtb heads rows = + return $ "\\startxtable" $$ + (if isEmpty heads + then empty + else "\\startxtablehead[head]" $$ heads $$ "\\stopxtablehead") $$ + (if null rows + then empty + else "\\startxtablebody[body]" $$ vcat (init rows) $$ "\\stopxtablebody" $$ + "\\startxtablefoot[foot]" $$ last rows $$ "\\stopxtablefoot") $$ + "\\stopxtable" +tableToConTeXt Ntb heads rows = + return $ "\\startTABLE" $$ + (if isEmpty heads + then empty + else "\\startTABLEhead" $$ heads $$ "\\stopTABLEhead") $$ + (if null rows + then empty + else "\\startTABLEbody" $$ vcat (init rows) $$ "\\stopTABLEbody" $$ + "\\startTABLEfoot" $$ last rows $$ "\\stopTABLEfoot") $$ + "\\stopTABLE" + +tableRowToConTeXt :: PandocMonad m => Tabl -> [Alignment] -> [Double] -> [[Block]] -> WM m Doc +tableRowToConTeXt Xtb aligns widths cols = do + cells <- mapM (tableColToConTeXt Xtb) $ zip3 aligns widths cols + return $ "\\startxrow" $$ vcat cells $$ "\\stopxrow" +tableRowToConTeXt Ntb aligns widths cols = do + cells <- mapM (tableColToConTeXt Ntb) $ zip3 aligns widths cols + return $ vcat cells $$ "\\NC\\NR" + +tableColToConTeXt :: PandocMonad m => Tabl -> (Alignment, Double, [Block]) -> WM m Doc +tableColToConTeXt tabl (align, width, blocks) = do + cellContents <- blockListToConTeXt blocks + let colwidth = if width == 0 + then empty + else "width=" <> braces (text (printf "%.2f\\textwidth" width)) + let halign = alignToConTeXt align + let options = (if keys == empty + then empty + else brackets keys) <> space + where keys = hcat $ intersperse "," $ filter (empty /=) [halign, colwidth] + tableCellToConTeXt tabl options cellContents + +tableCellToConTeXt :: PandocMonad m => Tabl -> Doc -> Doc -> WM m Doc +tableCellToConTeXt Xtb options cellContents = + return $ "\\startxcell" <> options <> cellContents <> " \\stopxcell" +tableCellToConTeXt Ntb options cellContents = + return $ "\\NC" <> options <> cellContents + +alignToConTeXt :: Alignment -> Doc +alignToConTeXt align = case align of + AlignLeft -> "align=right" + AlignRight -> "align=left" + AlignCenter -> "align=middle" + AlignDefault -> empty + +listItemToConTeXt :: PandocMonad m => [Block] -> WM m Doc listItemToConTeXt list = blockListToConTeXt list >>= - return . ("\\item" $$) . (nest 2) + return . ("\\item" $$) . nest 2 -defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc +defListItemToConTeXt :: PandocMonad m => ([Inline], [[Block]]) -> WM m Doc defListItemToConTeXt (term, defs) = do term' <- inlineListToConTeXt term def' <- liftM vsep $ mapM blockListToConTeXt defs @@ -278,12 +339,13 @@ defListItemToConTeXt (term, defs) = do "\\stopdescription" <> blankline -- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState Doc +blockListToConTeXt :: PandocMonad m => [Block] -> WM m Doc blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToConTeXt :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> WM m Doc inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst -- We add a \strut after a line break that precedes a space, -- or the space gets swallowed @@ -292,13 +354,14 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst addStruts xs addStruts (x:xs) = x : addStruts xs addStruts [] = [] - isSpacey Space = True + isSpacey Space = True isSpacey (Str ('\160':_)) = True - isSpacey _ = False + isSpacey _ = False -- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToConTeXt :: PandocMonad m + => Inline -- ^ Inline to convert + -> WM m Doc inlineToConTeXt (Emph lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\em " <> contents @@ -338,8 +401,10 @@ inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" <> space inlineToConTeXt (RawInline "context" str) = return $ text str inlineToConTeXt (RawInline "tex" str) = return $ text str -inlineToConTeXt (RawInline _ _) = return empty -inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr +inlineToConTeXt il@(RawInline _ _) = do + report $ InlineNotRendered il + return empty +inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr inlineToConTeXt SoftBreak = do wrapText <- gets (writerWrapText . stOptions) return $ case wrapText of @@ -348,7 +413,7 @@ inlineToConTeXt SoftBreak = do WrapPreserve -> cr inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections -inlineToConTeXt (Link _ txt (('#' : ref), _)) = do +inlineToConTeXt (Link _ txt ('#' : ref, _)) = do opts <- gets stOptions contents <- inlineListToConTeXt txt let ref' = toLabel $ stringToConTeXt opts ref @@ -374,7 +439,7 @@ inlineToConTeXt (Link _ txt (src, _)) = do inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do opts <- gets stOptions let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of + in case dimension dir attr of Just (Pixel a) -> [d <> text (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> @@ -397,84 +462,104 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] - codeBlock _ = [] + codeBlock _ = [] let codeBlocks = query codeBlock contents return $ if null codeBlocks then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do + mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) + wrapLang txt = case mblang of + Just lng -> "\\start\\language[" <> text lng <> "]" <> txt <> "\\stop " Nothing -> txt - fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils + (wrapLang . wrapDir) <$> inlineListToConTeXt ils -- | Craft the section header, inserting the section reference, if supplied. -sectionHeader :: Attr +sectionHeader :: PandocMonad m + => Attr -> Int -> [Inline] - -> State WriterState Doc -sectionHeader (ident,classes,_) hdrLevel lst = do + -> WM m Doc +sectionHeader (ident,classes,kvs) hdrLevel lst = do + opts <- gets stOptions contents <- inlineListToConTeXt lst - st <- get - let opts = stOptions st + levelText <- sectionLevelToText opts (ident,classes,kvs) hdrLevel + let ident' = if null ident + then empty + else "reference=" <> braces (text (toLabel ident)) + let contents' = if contents == empty + then empty + else "title=" <> braces contents + let options = if keys == empty || levelText == empty + then empty + else brackets keys + where keys = hcat $ intersperse "," $ filter (empty /=) [contents', ident'] + let starter = if writerSectionDivs opts + then "\\start" + else "\\" + return $ starter <> levelText <> options <> blankline + +-- | Craft the section footer +sectionFooter :: PandocMonad m => Attr -> Int -> WM m Doc +sectionFooter attr hdrLevel = do + opts <- gets stOptions + levelText <- sectionLevelToText opts attr hdrLevel + return $ if writerSectionDivs opts + then "\\stop" <> levelText <> blankline + else empty + +-- | Generate a textual representation of the section level +sectionLevelToText :: PandocMonad m => WriterOptions -> Attr -> Int -> WM m Doc +sectionLevelToText opts (_,classes,_) hdrLevel = do let level' = case writerTopLevelDivision opts of TopLevelPart -> hdrLevel - 2 TopLevelChapter -> hdrLevel - 1 TopLevelSection -> hdrLevel TopLevelDefault -> hdrLevel - let ident' = toLabel ident let (section, chapter) = if "unnumbered" `elem` classes then (text "subject", text "title") else (text "section", text "chapter") return $ case level' of - -1 -> text "\\part" <> braces contents - 0 -> char '\\' <> chapter <> braces contents - n | n >= 1 && n <= 5 -> char '\\' - <> text (concat (replicate (n - 1) "sub")) - <> section - <> (if (not . null) ident' - then brackets (text ident') - else empty) - <> braces contents - <> blankline - _ -> contents <> blankline - -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') + -1 -> text "part" + 0 -> chapter + n | n >= 1 -> text (concat (replicate (n - 1) "sub")) + <> section + _ -> empty -- cannot happen + +fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l +fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" +fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" +fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" +fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" +fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" +fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" +fromBCP47' (Just (Lang l _ _ _) ) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index cf641dcd6..3daa8d0cf 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,11 +1,6 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE FlexibleInstances, OverloadedStrings, - ScopedTypeVariables, DeriveDataTypeable, CPP #-} -#if MIN_VERSION_base(4,8,0) -#else -{-# LANGUAGE OverlappingInstances #-} -#endif -{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE FlexibleInstances #-} +{- Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -35,20 +30,27 @@ Conversion of 'Pandoc' documents to custom markup using a lua writer. -} module Text.Pandoc.Writers.Custom ( writeCustom ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Data.List ( intersperse ) -import Data.Char ( toLower ) -import Data.Typeable -import Scripting.Lua (LuaState, StackValue, callfunc) -import Text.Pandoc.Writers.Shared -import qualified Scripting.Lua as Lua -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad (when) +import Control.Arrow ((***)) import Control.Exception +import Control.Monad (when) +import Control.Monad.Trans (MonadIO (liftIO)) +import Data.Char (toLower) +import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text, pack) +import Data.Typeable +import Foreign.Lua (Lua, ToLuaStack (..), callFunc) +import Foreign.Lua.Api +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath) +import Text.Pandoc.Lua.StackInstances () +import Text.Pandoc.Lua.Util (addValue, dostring') +import Text.Pandoc.Options import Text.Pandoc.Templates -import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Writers.Shared attrToMap :: Attr -> M.Map String String attrToMap (id',classes,keyvals) = M.fromList @@ -56,119 +58,43 @@ attrToMap (id',classes,keyvals) = M.fromList : ("class", unwords classes) : keyvals -#if MIN_VERSION_hslua(0,4,0) -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Char] where -#else -instance StackValue [Char] where -#endif - push lua cs = Lua.push lua (UTF8.fromString cs) - peek lua i = do - res <- Lua.peek lua i - return $ UTF8.toString `fmap` res - valuetype _ = Lua.TSTRING -#else -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue a => StackValue [a] where -#else -instance StackValue a => StackValue [a] where -#endif - push lua xs = do - Lua.createtable lua (length xs + 1) 0 - let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i - mapM_ addValue $ zip [1..] xs - peek lua i = do - top <- Lua.gettop lua - let i' = if i < 0 then top + i + 1 else i - Lua.pushnil lua - lst <- getList lua i' - Lua.pop lua 1 - return (Just lst) - valuetype _ = Lua.TTABLE - -getList :: StackValue a => LuaState -> Int -> IO [a] -getList lua i' = do - continue <- Lua.next lua i' - if continue - then do - next <- Lua.peek lua (-1) - Lua.pop lua 1 - x <- maybe (fail "peek returned Nothing") return next - rest <- getList lua i' - return (x : rest) - else return [] -#endif - -instance StackValue Format where - push lua (Format f) = Lua.push lua (map toLower f) - peek l n = fmap Format `fmap` Lua.peek l n - valuetype _ = Lua.TSTRING - -instance (StackValue a, StackValue b) => StackValue (M.Map a b) where - push lua m = do - let xs = M.toList m - Lua.createtable lua (length xs + 1) 0 - let addValue (k, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - mapM_ addValue xs - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -instance (StackValue a, StackValue b) => StackValue (a,b) where - push lua (k,v) = do - Lua.createtable lua 2 0 - Lua.push lua k - Lua.push lua v - Lua.rawset lua (-3) - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Inline] where -#else -instance StackValue [Inline] where -#endif - push l ils = Lua.push l =<< inlineListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Block] where -#else -instance StackValue [Block] where -#endif - push l ils = Lua.push l =<< blockListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -instance StackValue MetaValue where - push l (MetaMap m) = Lua.push l m - push l (MetaList xs) = Lua.push l xs - push l (MetaBool x) = Lua.push l x - push l (MetaString s) = Lua.push l s - push l (MetaInlines ils) = Lua.push l ils - push l (MetaBlocks bs) = Lua.push l bs - peek _ _ = undefined - valuetype (MetaMap _) = Lua.TTABLE - valuetype (MetaList _) = Lua.TTABLE - valuetype (MetaBool _) = Lua.TBOOLEAN - valuetype (MetaString _) = Lua.TSTRING - valuetype (MetaInlines _) = Lua.TSTRING - valuetype (MetaBlocks _) = Lua.TSTRING - -instance StackValue Citation where - push lua cit = do - Lua.createtable lua 6 0 - let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - addValue ("citationId", citationId cit) - addValue ("citationPrefix", citationPrefix cit) - addValue ("citationSuffix", citationSuffix cit) - addValue ("citationMode", show (citationMode cit)) - addValue ("citationNoteNum", citationNoteNum cit) - addValue ("citationHash", citationHash cit) - peek = undefined - valuetype _ = Lua.TTABLE +newtype Stringify a = Stringify a + +instance ToLuaStack (Stringify Format) where + push (Stringify (Format f)) = push (map toLower f) + +instance ToLuaStack (Stringify [Inline]) where + push (Stringify ils) = push =<< inlineListToCustom ils + +instance ToLuaStack (Stringify [Block]) where + push (Stringify blks) = push =<< blockListToCustom blks + +instance ToLuaStack (Stringify MetaValue) where + push (Stringify (MetaMap m)) = push (fmap Stringify m) + push (Stringify (MetaList xs)) = push (map Stringify xs) + push (Stringify (MetaBool x)) = push x + push (Stringify (MetaString s)) = push s + push (Stringify (MetaInlines ils)) = push (Stringify ils) + push (Stringify (MetaBlocks bs)) = push (Stringify bs) + +instance ToLuaStack (Stringify Citation) where + push (Stringify cit) = do + createtable 6 0 + addValue "citationId" $ citationId cit + addValue "citationPrefix" . Stringify $ citationPrefix cit + addValue "citationSuffix" . Stringify $ citationSuffix cit + addValue "citationMode" $ show (citationMode cit) + addValue "citationNoteNum" $ citationNoteNum cit + addValue "citationHash" $ citationHash cit + +-- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the +-- associated value. +newtype KeyValue a b = KeyValue (a, b) + +instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where + push (KeyValue (k, v)) = do + newtable + addValue k v data PandocLuaException = PandocLuaException String deriving (Show, Typeable) @@ -176,147 +102,147 @@ data PandocLuaException = PandocLuaException String instance Exception PandocLuaException -- | Convert Pandoc to custom markup. -writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String +writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text writeCustom luaFile opts doc@(Pandoc meta _) = do - luaScript <- UTF8.readFile luaFile - enc <- getForeignEncoding - setForeignEncoding utf8 - lua <- Lua.newstate - Lua.openlibs lua - status <- Lua.loadstring lua luaScript luaFile - -- check for error in lua script (later we'll change the return type - -- to handle this more gracefully): - when (status /= 0) $ -#if MIN_VERSION_hslua(0,4,0) - Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString -#else - Lua.tostring lua 1 >>= throw . PandocLuaException -#endif - Lua.call lua 0 0 - -- TODO - call hierarchicalize, so we have that info - rendered <- docToCustom lua opts doc - context <- metaToJSON opts - (blockListToCustom lua) - (inlineListToCustom lua) - meta - Lua.close lua - setForeignEncoding enc - let body = rendered + luaScript <- liftIO $ UTF8.readFile luaFile + res <- runPandocLua $ do + registerScriptPath luaFile + stat <- dostring' luaScript + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (stat /= OK) $ + tostring 1 >>= throw . PandocLuaException . UTF8.toString + -- TODO - call hierarchicalize, so we have that info + rendered <- docToCustom opts doc + context <- metaToJSON opts + blockListToCustom + inlineListToCustom + meta + return (rendered, context) + let (body, context) = case res of + Left e -> throw (PandocLuaException (show e)) + Right x -> x case writerTemplate opts of - Nothing -> return body - Just tpl -> return $ renderTemplate' tpl $ setField "body" body context + Nothing -> return $ pack body + Just tpl -> + case applyTemplate (pack tpl) $ setField "body" body context of + Left e -> throw (PandocTemplateError e) + Right r -> return (pack r) -docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String -docToCustom lua opts (Pandoc (Meta metamap) blocks) = do - body <- blockListToCustom lua blocks - callfunc lua "Doc" body metamap (writerVariables opts) +docToCustom :: WriterOptions -> Pandoc -> Lua String +docToCustom opts (Pandoc (Meta metamap) blocks) = do + body <- blockListToCustom blocks + callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts) -- | Convert Pandoc block element to Custom. -blockToCustom :: LuaState -- ^ Lua state - -> Block -- ^ Block element - -> IO String +blockToCustom :: Block -- ^ Block element + -> Lua String -blockToCustom _ Null = return "" +blockToCustom Null = return "" -blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines +blockToCustom (Plain inlines) = callFunc "Plain" (Stringify inlines) -blockToCustom lua (Para [Image attr txt (src,tit)]) = - callfunc lua "CaptionedImage" src tit txt (attrToMap attr) +blockToCustom (Para [Image attr txt (src,tit)]) = + callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr) -blockToCustom lua (Para inlines) = callfunc lua "Para" inlines +blockToCustom (Para inlines) = callFunc "Para" (Stringify inlines) -blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList +blockToCustom (LineBlock linesList) = callFunc "LineBlock" (map Stringify linesList) -blockToCustom lua (RawBlock format str) = - callfunc lua "RawBlock" format str +blockToCustom (RawBlock format str) = + callFunc "RawBlock" (Stringify format) str -blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule" +blockToCustom HorizontalRule = callFunc "HorizontalRule" -blockToCustom lua (Header level attr inlines) = - callfunc lua "Header" level inlines (attrToMap attr) +blockToCustom (Header level attr inlines) = + callFunc "Header" level (Stringify inlines) (attrToMap attr) -blockToCustom lua (CodeBlock attr str) = - callfunc lua "CodeBlock" str (attrToMap attr) +blockToCustom (CodeBlock attr str) = + callFunc "CodeBlock" str (attrToMap attr) -blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks +blockToCustom (BlockQuote blocks) = callFunc "BlockQuote" (Stringify blocks) -blockToCustom lua (Table capt aligns widths headers rows') = - callfunc lua "Table" capt (map show aligns) widths headers rows' +blockToCustom (Table capt aligns widths headers rows) = + let aligns' = map show aligns + capt' = Stringify capt + headers' = map Stringify headers + rows' = map (map Stringify) rows + in callFunc "Table" capt' aligns' widths headers' rows' -blockToCustom lua (BulletList items) = callfunc lua "BulletList" items +blockToCustom (BulletList items) = callFunc "BulletList" (map Stringify items) -blockToCustom lua (OrderedList (num,sty,delim) items) = - callfunc lua "OrderedList" items num (show sty) (show delim) +blockToCustom (OrderedList (num,sty,delim) items) = + callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) -blockToCustom lua (DefinitionList items) = - callfunc lua "DefinitionList" items +blockToCustom (DefinitionList items) = + callFunc "DefinitionList" + (map (KeyValue . (Stringify *** map Stringify)) items) -blockToCustom lua (Div attr items) = - callfunc lua "Div" items (attrToMap attr) +blockToCustom (Div attr items) = + callFunc "Div" (Stringify items) (attrToMap attr) -- | Convert list of Pandoc block elements to Custom. -blockListToCustom :: LuaState -- ^ Options - -> [Block] -- ^ List of block elements - -> IO String -blockListToCustom lua xs = do - blocksep <- callfunc lua "Blocksep" - bs <- mapM (blockToCustom lua) xs +blockListToCustom :: [Block] -- ^ List of block elements + -> Lua String +blockListToCustom xs = do + blocksep <- callFunc "Blocksep" + bs <- mapM blockToCustom xs return $ mconcat $ intersperse blocksep bs -- | Convert list of Pandoc inline elements to Custom. -inlineListToCustom :: LuaState -> [Inline] -> IO String -inlineListToCustom lua lst = do - xs <- mapM (inlineToCustom lua) lst - return $ concat xs +inlineListToCustom :: [Inline] -> Lua String +inlineListToCustom lst = do + xs <- mapM inlineToCustom lst + return $ mconcat xs -- | Convert Pandoc inline element to Custom. -inlineToCustom :: LuaState -> Inline -> IO String +inlineToCustom :: Inline -> Lua String -inlineToCustom lua (Str str) = callfunc lua "Str" str +inlineToCustom (Str str) = callFunc "Str" str -inlineToCustom lua Space = callfunc lua "Space" +inlineToCustom Space = callFunc "Space" -inlineToCustom lua SoftBreak = callfunc lua "SoftBreak" +inlineToCustom SoftBreak = callFunc "SoftBreak" -inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst +inlineToCustom (Emph lst) = callFunc "Emph" (Stringify lst) -inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst +inlineToCustom (Strong lst) = callFunc "Strong" (Stringify lst) -inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst +inlineToCustom (Strikeout lst) = callFunc "Strikeout" (Stringify lst) -inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst +inlineToCustom (Superscript lst) = callFunc "Superscript" (Stringify lst) -inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst +inlineToCustom (Subscript lst) = callFunc "Subscript" (Stringify lst) -inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst +inlineToCustom (SmallCaps lst) = callFunc "SmallCaps" (Stringify lst) -inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst +inlineToCustom (Quoted SingleQuote lst) = callFunc "SingleQuoted" (Stringify lst) -inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst +inlineToCustom (Quoted DoubleQuote lst) = callFunc "DoubleQuoted" (Stringify lst) -inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs +inlineToCustom (Cite cs lst) = callFunc "Cite" (Stringify lst) (map Stringify cs) -inlineToCustom lua (Code attr str) = - callfunc lua "Code" str (attrToMap attr) +inlineToCustom (Code attr str) = + callFunc "Code" str (attrToMap attr) -inlineToCustom lua (Math DisplayMath str) = - callfunc lua "DisplayMath" str +inlineToCustom (Math DisplayMath str) = + callFunc "DisplayMath" str -inlineToCustom lua (Math InlineMath str) = - callfunc lua "InlineMath" str +inlineToCustom (Math InlineMath str) = + callFunc "InlineMath" str -inlineToCustom lua (RawInline format str) = - callfunc lua "RawInline" format str +inlineToCustom (RawInline format str) = + callFunc "RawInline" (Stringify format) str -inlineToCustom lua (LineBreak) = callfunc lua "LineBreak" +inlineToCustom LineBreak = callFunc "LineBreak" -inlineToCustom lua (Link attr txt (src,tit)) = - callfunc lua "Link" txt src tit (attrToMap attr) +inlineToCustom (Link attr txt (src,tit)) = + callFunc "Link" (Stringify txt) src tit (attrToMap attr) -inlineToCustom lua (Image attr alt (src,tit)) = - callfunc lua "Image" alt src tit (attrToMap attr) +inlineToCustom (Image attr alt (src,tit)) = + callFunc "Image" (Stringify alt) src tit (attrToMap attr) -inlineToCustom lua (Note contents) = callfunc lua "Note" contents +inlineToCustom (Note contents) = callFunc "Note" (Stringify contents) -inlineToCustom lua (Span attr items) = - callfunc lua "Span" items (attrToMap attr) +inlineToCustom (Span attr items) = + callFunc "Span" (Stringify items) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 44f96d700..3034fade5 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,34 +29,43 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where +module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where +import Control.Monad.Reader +import Data.Char (toLower) +import Data.Generics (everywhere, mkT) +import Data.List (isPrefixOf, isSuffixOf, stripPrefix) +import Data.Monoid (Any (..)) +import Data.Text (Text) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.XML +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath -import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) -import Data.Char ( toLower ) -import Data.Monoid ( Any(..) ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.XML import Text.TeXMath import qualified Text.XML.Light as Xml -import Data.Generics (everywhere, mkT) + +data DocBookVersion = DocBook4 | DocBook5 + deriving (Eq, Show) + +type DB = ReaderT DocBookVersion -- | Convert list of authors to a docbook <author> section -authorToDocbook :: WriterOptions -> [Inline] -> B.Inlines -authorToDocbook opts name' = - let name = render Nothing $ inlinesToDocbook opts name' - colwidth = if writerWrapText opts == WrapAuto +authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines +authorToDocbook opts name' = do + name <- render Nothing <$> inlinesToDocbook opts name' + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - in B.rawInline "docbook" $ render colwidth $ + return $ B.rawInline "docbook" $ render colwidth $ if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name @@ -66,52 +76,63 @@ authorToDocbook opts name' = let namewords = words name lengthname = length namewords (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) + 0 -> ("","") + 1 -> ("", name) + n -> (unwords (take (n-1) namewords), last namewords) in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ inTagsSimple "surname" (text $ escapeStringForXML lastname) +writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocbook4 opts d = + runReaderT (writeDocbook opts d) DocBook4 + +writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocbook5 opts d = + runReaderT (writeDocbook opts d) DocBook5 + -- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc meta blocks) = +writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text +writeDocbook opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks - colwidth = if writerWrapText opts == WrapAuto + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - render' = render colwidth - opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) + let render' :: Doc -> Text + render' = render colwidth + let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr) (writerTemplate opts) && - TopLevelDefault == writerTopLevelDivision opts) + TopLevelDefault == writerTopLevelDivision opts then opts{ writerTopLevelDivision = TopLevelChapter } else opts - -- The numbering here follows LaTeX's internal numbering - startLvl = case writerTopLevelDivision opts' of + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' = map (authorToDocbook opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToDocbook opts' startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToDocbook opts') + auths' <- mapM (authorToDocbook opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToDocbook opts' startLvl) . + hierarchicalize) + (fmap render' . inlinesToDocbook opts') meta' - main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) - context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML _ -> True - _ -> False) - $ metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + main <- (render' . vcat) <$> mapM (elementToDocbook opts' startLvl) elements + let context = defField "body" main + $ + defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Int -> Element -> Doc +elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc elementToDocbook opts _ (Blk block) = blockToDocbook opts block -elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = +elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do + version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] @@ -119,24 +140,25 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = tag = case lvl of -1 -> "part" 0 -> "chapter" - n | n >= 1 && n <= 5 -> if writerDocbook5 opts + n | n >= 1 && n <= 5 -> if version == DocBook5 then "section" else "sect" ++ show n _ -> "simplesect" - idName = if writerDocbook5 opts + idName = if version == DocBook5 then "xml:id" else "id" idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] - nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] + nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] attribs = nsAttr ++ idAttr - in inTags True tag attribs $ - inTagsSimple "title" (inlinesToDocbook opts title) $$ - vcat (map (elementToDocbook opts (lvl + 1)) elements') + contents <- mapM (elementToDocbook opts (lvl + 1)) elements' + title' <- inlinesToDocbook opts title + return $ inTags True tag attribs $ + inTagsSimple "title" title' $$ vcat contents -- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) +blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc +blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -145,74 +167,82 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of -- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToDocbook :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc deflistItemsToDocbook opts items = - vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items + vcat <$> mapM (uncurry (deflistItemToDocbook opts)) items -- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToDocbook opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') +deflistItemToDocbook :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc +deflistItemToDocbook opts term defs = do + term' <- inlinesToDocbook opts term + def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "varlistentry" $ + inTagsIndented "term" term' $$ + inTagsIndented "listitem" def' -- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items +listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc +listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items -- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc listItemToDocbook opts item = - inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item + inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) imageToDocbook :: WriterOptions -> Attr -> String -> Doc imageToDocbook _ attr src = selfClosingTag "imagedata" $ ("fileref", src) : idAndRole attr ++ dims where dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of + go dir dstr = case dimension dir attr of Just a -> [(dstr, show a)] Nothing -> [] -- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook _ Null = empty +blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc +blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: blockToDocbook opts (Div (ident,_,_) [Para lst]) = let attribs = [("id", ident) | not (null ident)] in if hasLineBreaks lst - then flush $ nowrap $ inTags False "literallayout" attribs - $ inlinesToDocbook opts lst - else inTags True "para" attribs $ inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ - blocksToDocbook opts (map plainToPara bs) -blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize + then (flush . nowrap . inTags False "literallayout" attribs) + <$> inlinesToDocbook opts lst + else inTags True "para" attribs <$> inlinesToDocbook opts lst +blockToDocbook opts (Div (ident,_,_) bs) = do + contents <- blocksToDocbook opts (map plainToPara bs) + return $ + (if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) $$ contents +blockToDocbook _ h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = - let alt = inlinesToDocbook opts txt - capt = if null txt +blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do + alt <- inlinesToDocbook opts txt + let capt = if null txt then empty else inTagsSimple "title" alt - in inTagsIndented "figure" $ + return $ inTagsIndented "figure" $ capt $$ - (inTagsIndented "mediaobject" $ - (inTagsIndented "imageobject" - (imageToDocbook opts attr src)) $$ + inTagsIndented "mediaobject" ( + inTagsIndented "imageobject" + (imageToDocbook opts attr src) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToDocbook opts (Para lst) - | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst - | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst + | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout") + <$> inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst blockToDocbook opts (LineBlock lns) = blockToDocbook opts $ linesToPara lns blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = + inTagsIndented "blockquote" <$> blocksToDocbook opts blocks +blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ text ("<programlisting" ++ lang ++ ">") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") where lang = if null langs @@ -224,11 +254,11 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = +blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] - in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = + inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = return empty +blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] @@ -239,40 +269,46 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerRoman -> [("numeration", "lowerroman")] spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest - in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = + items <- if start == 1 + then listItemsToDocbook opts (first:rest) + else do + first' <- blocksToDocbook opts (map plainToPara first) + rest' <- listItemsToDocbook opts rest + return $ + inTags True "listitem" [("override",show start)] first' $$ + rest' + return $ inTags True "orderedlist" attribs items +blockToDocbook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] - in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook opts (RawBlock f str) - | f == "docbook" = text str -- raw XML block - | f == "html" = if writerDocbook5 opts - then empty -- No html in Docbook5 - else text str -- allow html for backwards compatibility - | otherwise = empty -blockToDocbook _ HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = - let captionDoc = if null caption - then empty - else inTagsIndented "title" - (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" + inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst +blockToDocbook _ b@(RawBlock f str) + | f == "docbook" = return $ text str -- raw XML block + | f == "html" = do + version <- ask + if version == DocBook5 + then return empty -- No html in Docbook5 + else return $ text str -- allow html for backwards compatibility + | otherwise = do + report $ BlockNotRendered b + return empty +blockToDocbook _ HorizontalRule = return empty -- not semantic +blockToDocbook opts (Table caption aligns widths headers rows) = do + captionDoc <- if null caption + then return empty + else inTagsIndented "title" <$> + inlinesToDocbook opts caption + let tableType = if isEmpty captionDoc then "informaltable" else "table" percent w = show (truncate (100*w) :: Integer) ++ "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" ([("colwidth", percent w) | w > 0] ++ [("align", alignmentToString al)])) widths aligns - head' = if all null headers - then empty - else inTagsIndented "thead" $ - tableRowToDocbook opts headers - body' = inTagsIndented "tbody" $ - vcat $ map (tableRowToDocbook opts) rows - in inTagsIndented tableType $ captionDoc $$ - (inTags True "tgroup" [("cols", show (length headers))] $ + head' <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToDocbook opts headers + body' <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToDocbook opts) rows + return $ inTagsIndented tableType $ captionDoc $$ + inTags True "tgroup" [("cols", show (length headers))] ( coltags $$ head' $$ body') hasLineBreaks :: [Inline] -> Bool @@ -280,101 +316,111 @@ hasLineBreaks = getAny . query isLineBreak . walk removeNote where removeNote :: Inline -> Inline removeNote (Note _) = Str "" - removeNote x = x + removeNote x = x isLineBreak :: Inline -> Any isLineBreak LineBreak = Any True - isLineBreak _ = Any False + isLineBreak _ = Any False alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook :: WriterOptions +tableRowToDocbook :: PandocMonad m + => WriterOptions -> [[Block]] - -> Doc + -> DB m Doc tableRowToDocbook opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols + (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols -tableItemToDocbook :: WriterOptions +tableItemToDocbook :: PandocMonad m + => WriterOptions -> [Block] - -> Doc + -> DB m Doc tableItemToDocbook opts item = - inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item + (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item -- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst +inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc +inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc +inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" $ inlinesToDocbook opts lst + inTagsSimple "emphasis" <$> inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst + inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] $ + inTags False "emphasis" [("role", "strikethrough")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" $ inlinesToDocbook opts lst + inTagsSimple "superscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" $ inlinesToDocbook opts lst + inTagsSimple "subscript" <$> inlinesToDocbook opts lst inlineToDocbook opts (SmallCaps lst) = - inTags False "emphasis" [("role", "smallcaps")] $ + inTags False "emphasis" [("role", "smallcaps")] <$> inlinesToDocbook opts lst inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToDocbook opts lst + inTagsSimple "quote" <$> inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook opts (Span (ident,_,_) ils) = - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) <> + ((if null ident + then mempty + else selfClosingTag "anchor" [("id", ident)]) <>) <$> inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = - inTagsSimple "literal" $ text (escapeStringForXML str) + return $ inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) - | isMathML (writerHTMLMathMethod opts) = - case writeMathML dt <$> readTeX str of - Right r -> inTagsSimple tagtype - $ text $ Xml.ppcElement conf - $ fixNS - $ removeAttr r - Left _ -> inlinesToDocbook opts - $ texMathToInlines t str - | otherwise = inlinesToDocbook opts $ texMathToInlines t str - where (dt, tagtype) = case t of - InlineMath -> (DisplayInline,"inlineequation") - DisplayMath -> (DisplayBlock,"informalequation") + | isMathML (writerHTMLMathMethod opts) = do + res <- convertMath writeMathML t str + case res of + Right r -> return $ inTagsSimple tagtype + $ text $ Xml.ppcElement conf + $ fixNS + $ removeAttr r + Left il -> inlineToDocbook opts il + | otherwise = + texMathToInlines t str >>= inlinesToDocbook opts + where tagtype = case t of + InlineMath -> "inlineequation" + DisplayMath -> "informalequation" conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP removeAttr e = e{ Xml.elAttribs = [] } fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') -inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x - | otherwise = empty -inlineToDocbook _ LineBreak = text "\n" -inlineToDocbook _ Space = space +inlineToDocbook _ il@(RawInline f x) + | f == "html" || f == "docbook" = return $ text x + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToDocbook _ LineBreak = return $ text "\n" +-- currently ignore, would require the option to add custom +-- styles to the document +inlineToDocbook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToDocbook _ SoftBreak = space +inlineToDocbook _ SoftBreak = return space inlineToDocbook opts (Link attr txt (src, _)) | Just email <- stripPrefix "mailto:" src = let emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ email + escapeStringForXML email in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToDocbook opts txt <+> - char '(' <> emailLink <> char ')' - | otherwise = - (if isPrefixOf "#" src - then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else if writerDocbook5 opts + [Str s] | escapeURI s == email -> return emailLink + _ -> do contents <- inlinesToDocbook opts txt + return $ contents <+> + char '(' <> emailLink <> char ')' + | otherwise = do + version <- ask + (if "#" `isPrefixOf` src + then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr + else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ - inlinesToDocbook opts txt -inlineToDocbook opts (Image attr _ (src, tit)) = + else inTags False "ulink" $ ("url", src) : idAndRole attr ) + <$> inlinesToDocbook opts txt +inlineToDocbook opts (Image attr _ (src, tit)) = return $ let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ @@ -382,11 +428,11 @@ inlineToDocbook opts (Image attr _ (src, tit)) = in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ imageToDocbook opts attr src inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" $ blocksToDocbook opts contents + inTagsIndented "footnote" <$> blocksToDocbook opts contents isMathML :: HTMLMathMethod -> Bool -isMathML (MathML _) = True -isMathML _ = False +isMathML MathML = True +isMathML _ = False idAndRole :: Attr -> [(String, String)] idAndRole (id',cls,_) = ident ++ role diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3fc5d22a2..4542389a2 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,44 +32,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Codec.Archive.Zip +import Control.Applicative ((<|>)) +import Control.Monad.Except (catchError) +import Control.Monad.Reader +import Control.Monad.State.Strict import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 +import Data.Char (isSpace, ord, toLower) +import Data.List (intercalate, isPrefixOf, isSuffixOf) import qualified Data.Map as M +import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip +import qualified Data.Text as T import Data.Time.Clock.POSIX -import System.Environment +import Skylighting +import System.Random (randomR, StdGen, mkStdGen) +import Text.Pandoc.BCP47 (getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.ImageSize -import Text.Pandoc.Shared hiding (Element) -import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, + getMimeTypeDef) import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlight ) -import Text.Pandoc.Walk -import Text.XML.Light as XML -import Text.TeXMath import Text.Pandoc.Readers.Docx.StyleMap -import Text.Pandoc.Readers.Docx.Util (elemName) -import Control.Monad.Reader -import Control.Monad.State -import Skylighting -import Data.Unique (hashUnique, newUnique) -import System.Random (randomRIO) +import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared (fixDisplayMath, metaValueToInlines) import Text.Printf (printf) -import qualified Control.Exception as E -import Data.Monoid ((<>)) -import qualified Data.Text as T -import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, - extensionFromMimeType) -import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing) -import Data.Char (ord, isSpace, toLower) +import Text.TeXMath +import Text.XML.Light as XML +import Text.XML.Light.Cursor as XMLC +import Text.Pandoc.Writers.OOXML data ListMarker = NoMarker | BulletMarker @@ -79,28 +82,28 @@ listMarkerToId BulletMarker = "991" listMarkerToId (NumberMarker sty delim n) = '9' : '9' : styNum : delimNum : show n where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' data WriterEnv = WriterEnv{ envTextProperties :: [Element] , envParaProperties :: [Element] - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: String - , envChangesDate :: String - , envPrintWidth :: Integer + , envRTL :: Bool + , envListLevel :: Int + , envListNumId :: Int + , envInDel :: Bool + , envChangesAuthor :: String + , envChangesDate :: String + , envPrintWidth :: Integer } defaultWriterEnv :: WriterEnv @@ -117,22 +120,25 @@ defaultWriterEnv = WriterEnv{ envTextProperties = [] data WriterState = WriterState{ stFootnotes :: [Element] + , stComments :: [([(String,String)], [Inline])] , stSectionIds :: Set.Set String , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) + , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] , stInsId :: Int , stDelId :: Int , stStyleMaps :: StyleMaps , stFirstPara :: Bool , stTocTitle :: [Inline] - , stDynamicParaProps :: [String] - , stDynamicTextProps :: [String] + , stDynamicParaProps :: Set.Set String + , stDynamicTextProps :: Set.Set String + , stCurId :: Int } defaultWriterState :: WriterState defaultWriterState = WriterState{ stFootnotes = defaultFootnotes + , stComments = [] , stSectionIds = Set.empty , stExternalLinks = M.empty , stImages = M.empty @@ -141,44 +147,29 @@ defaultWriterState = WriterState{ , stDelId = 1 , stStyleMaps = defaultStyleMaps , stFirstPara = False - , stTocTitle = normalizeInlines [Str "Table of Contents"] - , stDynamicParaProps = [] - , stDynamicTextProps = [] + , stTocTitle = [Str "Table of Contents"] + , stDynamicParaProps = Set.empty + , stDynamicTextProps = Set.empty + , stCurId = 20 } -type WS = ReaderT WriterEnv (StateT WriterState IO) - -mknode :: Node t => String -> [(String,String)] -> t -> Element -mknode s attrs = - add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) +type WS m = ReaderT WriterEnv (StateT WriterState m) -nodename :: String -> QName -nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) - -toLazy :: B.ByteString -> BL.ByteString -toLazy = BL.fromChunks . (:[]) - -renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> - UTF8.fromStringLazy (showElement elt) renumIdMap :: Int -> [Element] -> M.Map String String renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] replaceAttr _ _ [] = [] replaceAttr f val (a:as) | f (attrKey a) = - (XML.Attr (attrKey a) val) : (replaceAttr f val as) - | otherwise = a : (replaceAttr f val as) + XML.Attr (attrKey a) val : replaceAttr f val as + | otherwise = a : replaceAttr f val as -renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -187,7 +178,7 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -- | Certain characters are invalid in XML even if escaped. @@ -206,38 +197,36 @@ isValidChar (ord -> c) | 0x10000 <= c && c <= 0x10FFFF = True | otherwise = False -metaValueToInlines :: MetaValue -> [Inline] -metaValueToInlines (MetaString s) = normalizeInlines [Str s] -metaValueToInlines (MetaInlines ils) = ils -metaValueToInlines (MetaBlocks bs) = query return bs -metaValueToInlines (MetaBool b) = [Str $ show b] -metaValueToInlines _ = [] - --- | Produce an Docx file from a Pandoc document. -writeDocx :: WriterOptions -- ^ Writer options +writeDocx :: (PandocMonad m) + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO BL.ByteString + -> m BL.ByteString writeDocx opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath $ doc - username <- lookup "USERNAME" <$> getEnvironment - utctime <- getCurrentTime - distArchive <- getDefaultReferenceDocx datadir - refArchive <- case writerReferenceDocx opts of - Just f -> liftM (toArchive . toLazy) $ B.readFile f - Nothing -> getDefaultReferenceDocx datadir + let doc' = walk fixDisplayMath doc + username <- P.lookupEnv "USERNAME" + utctime <- P.getCurrentTime + distArchive <- (toArchive . BL.fromStrict) <$> do + oldUserDataDir <- P.getUserDataDir + P.setUserDataDir Nothing + res <- P.readDefaultDataFile "reference.docx" + P.setUserDataDir oldUserDataDir + return res + refArchive <- case writerReferenceDoc opts of + Just f -> toArchive <$> P.readFileLazy f + Nothing -> (toArchive . BL.fromStrict) <$> + P.readDataFile "reference.docx" parsedDoc <- parseXml refArchive distArchive "word/document.xml" let wname f qn = qPrefix qn == Just "w" && f (qName qn) let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc -- Gets the template size - let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) - let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrBy ((=="w") . qName) - let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) - let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) - let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName) -- Get the avaible area (converting the size and the margins to int and -- doing the difference @@ -248,8 +237,29 @@ writeDocx opts doc@(Pandoc meta _) = do ) -- styles + mblang <- toLang $ getLang opts meta + let addLang :: Element -> Element + addLang e = case mblang >>= \l -> + (return . XMLC.toTree . go (renderLang l) + . XMLC.fromElement) e of + Just (Elem e') -> e' + _ -> e -- return original + where go :: String -> Cursor -> Cursor + go l cursor = case XMLC.findRec (isLangElt . current) cursor of + Nothing -> cursor + Just t -> XMLC.modifyContent (setval l) t + setval :: String -> Content -> Content + setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ + elAttribs e' } + setval _ x = x + setvalattr :: String -> XML.Attr -> XML.Attr + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x + isLangElt (Elem e') = qName (elName e') == "lang" + isLangElt _ = False + let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath + styledoc <- addLang <$> parseXml refArchive distArchive stylepath -- parse styledoc for heading styles let styleMaps = getStyleMaps styledoc @@ -271,20 +281,20 @@ writeDocx opts doc@(Pandoc meta _) = do envRTL = isRTLmeta , envChangesAuthor = fromMaybe "unknown" username , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime - , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , envPrintWidth = maybe 420 (\x -> quot x 20) pgContentWidth } - ((contents, footnotes), st) <- runStateT - (runReaderT - (writeOpenXML opts{writerWrapText = WrapNone} doc') - env) - initialSt + ((contents, footnotes, comments), st) <- runStateT + (runReaderT + (writeOpenXML opts{writerWrapText = WrapNone} doc') + env) + initialSt let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let toImageEntry (_,path,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs let stdAttributes = @@ -316,7 +326,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () - let mkImageOverride (_, imgpath, mbMimeType, _, _) = + let mkImageOverride (_, imgpath, mbMimeType, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = @@ -340,6 +350,8 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml") ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") + ,("/word/comments.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml") ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ @@ -386,13 +398,16 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments", + "rId8", + "comments.xml") ] let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -409,7 +424,7 @@ writeDocx opts doc@(Pandoc meta _) = do (elChildren sectpr') in add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs - Nothing -> (mknode "w:sectPr" [] ()) + Nothing -> mknode "w:sectPr" [] () -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' let contents' = contents ++ [sectpr] @@ -431,6 +446,10 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] linkrels + -- comments + let commentsEntry = toEntry "word/comments.xml" epochtime + $ renderXml $ mknode "w:comments" stdAttributes comments + -- styles -- We only want to inject paragraph and text properties that @@ -438,26 +457,19 @@ writeDocx opts doc@(Pandoc meta _) = do -- are normalized as lowercase. let newDynamicParaProps = filter (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps) - (stDynamicParaProps st) + (Set.toList $ stDynamicParaProps st) newDynamicTextProps = filter (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps) - (stDynamicTextProps st) + (Set.toList $ stDynamicTextProps st) let newstyles = map newParaPropToOpenXml newDynamicParaProps ++ map newTextPropToOpenXml newDynamicTextProps ++ - (styleToOpenXml styleMaps $ writerHighlightStyle opts) - let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } - where - modifyContent - | writerHighlight opts = (++ map Elem newstyles) - | otherwise = filter notTokStyle - notTokStyle (Elem el) = notStyle el || notTokId el - notTokStyle _ = True - notStyle = (/= elemName' "style") . elName - notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") - tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) - elemName' = elemName (sNameSpaces styleMaps) "w" + (case writerHighlightStyle opts of + Nothing -> [] + Just sty -> styleToOpenXml styleMaps sty) + let styledoc' = styledoc{ elContent = elContent styledoc ++ + map Elem newstyles } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -472,6 +484,11 @@ writeDocx opts doc@(Pandoc meta _) = do , qName (elName e) == "abstractNum" ] ++ [Elem e | e <- allElts , qName (elName e) == "num" ] } + + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> map stringify xs + _ -> [] + let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -481,6 +498,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : mknode "cp:keywords" [] (intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -509,6 +527,7 @@ writeDocx opts doc@(Pandoc meta _) = do , "w:consecutiveHyphenLimit" , "w:hyphenationZone" , "w:doNotHyphenateCap" + , "w:evenAndOddHeaders" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList @@ -535,6 +554,7 @@ writeDocx opts doc@(Pandoc meta _) = do let archive = foldr addEntryToArchive emptyArchive $ contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : + commentsEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : imageEntries ++ headerFooterEntries ++ @@ -583,12 +603,12 @@ styleToOpenXml sm style = [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ] ] tokStyles = tokenStyles style - tokFeature f toktype = maybe False f $ lookup toktype tokStyles + tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles tokCol toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenColor =<< lookup toktype tokStyles) + $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style tokBg toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenBackground =<< lookup toktype tokStyles) + $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing | otherwise = Just $ @@ -599,11 +619,11 @@ styleToOpenXml sm style = , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () - : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) - $ backgroundColor style ) + : + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -622,11 +642,14 @@ copyChildren refArchive distArchive path timestamp elNames = do baseListId :: Int baseListId = 1000 -mkNumbering :: [ListMarker] -> IO [Element] +mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element] mkNumbering lists = do - elts <- mapM mkAbstractNum (ordNub lists) + elts <- evalStateT (mapM mkAbstractNum (ordNub lists)) (mkStdGen 1848) return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] +maxListLevel :: Int +maxListLevel = 8 + mkNum :: ListMarker -> Int -> Element mkNum marker numid = mknode "w:num" [("w:numId",show numid)] @@ -636,15 +659,19 @@ mkNum marker numid = BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] + $ mknode "w:startOverride" [("w:val",show start)] ()) + [0..maxListLevel] -mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum :: (PandocMonad m) => ListMarker -> StateT StdGen m Element mkAbstractNum marker = do - nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) + gen <- get + let (nsid, gen') = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen + put gen' return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () - : map (mkLvl marker) [0..6] + : map (mkLvl marker) + [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = @@ -675,33 +702,35 @@ mkLvl marker lvl = bulletFor 3 = "\x2013" bulletFor 4 = "\x2022" bulletFor 5 = "\x2013" - bulletFor _ = "\x2022" - styleFor UpperAlpha _ = "upperLetter" - styleFor LowerAlpha _ = "lowerLetter" - styleFor UpperRoman _ = "upperRoman" - styleFor LowerRoman _ = "lowerRoman" - styleFor Decimal _ = "decimal" + bulletFor x = bulletFor (x `mod` 6) + styleFor UpperAlpha _ = "upperLetter" + styleFor LowerAlpha _ = "lowerLetter" + styleFor UpperRoman _ = "upperRoman" + styleFor LowerRoman _ = "lowerRoman" + styleFor Decimal _ = "decimal" styleFor DefaultStyle 1 = "decimal" styleFor DefaultStyle 2 = "lowerLetter" styleFor DefaultStyle 3 = "lowerRoman" styleFor DefaultStyle 4 = "decimal" styleFor DefaultStyle 5 = "lowerLetter" - styleFor DefaultStyle 6 = "lowerRoman" - styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" + styleFor DefaultStyle 0 = "lowerRoman" + styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) + styleFor _ _ = "decimal" + patternFor OneParen s = s ++ ")" patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor _ s = s ++ "." -getNumId :: WS Int +getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists -makeTOC :: WriterOptions -> WS [Element] + +makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts | writerTableOfContents opts = do - let depth = "1-"++(show (writerTOCDepth opts)) + let depth = "1-"++show (writerTOCDepth opts) let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) - return $ + return [mknode "w:sdt" [] ([ mknode "w:sdtPr" [] ( mknode "w:docPartObj" [] ( @@ -725,22 +754,20 @@ makeTOC _ = return [] -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) +writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element]) writeOpenXML opts (Pandoc meta blocks) = do - let tit = docTitle meta ++ case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> LineBreak : xs - _ -> [] + let tit = docTitle meta let auths = docAuthors meta let dat = docDate meta let abstract' = case lookupMeta "abstract" meta of - Just (MetaBlocks bs) -> bs + Just (MetaBlocks bs) -> bs Just (MetaInlines ils) -> [Plain ils] - _ -> [] + _ -> [] let subtitle' = case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> xs Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs - _ -> [] + _ -> [] title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ @@ -750,23 +777,40 @@ writeOpenXML opts (Pandoc meta blocks) = do then return [] else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs - convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs - convertSpace xs = xs + convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + convertSpace xs = xs let blocks' = bottomUp convertSpace blocks - doc' <- (setFirstPara >> blocksToOpenXML opts blocks') - notes' <- reverse `fmap` gets stFootnotes + doc' <- setFirstPara >> blocksToOpenXML opts blocks' + notes' <- reverse <$> gets stFootnotes + comments <- reverse <$> gets stComments + let toComment (kvs, ils) = do + annotation <- inlinesToOpenXML opts ils + return $ + mknode "w:comment" [('w':':':k,v) | (k,v) <- kvs] + [ mknode "w:p" [] $ + [ mknode "w:pPr" [] + [ mknode "w:pStyle" [("w:val", "CommentText")] () ] + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () + , mknode "w:annotationRef" [] () + ] + ] + ] ++ annotation + ] + comments' <- mapM toComment comments toc <- makeTOC opts let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc - return (meta' ++ doc', notes') + return (meta' ++ doc', notes', comments') -- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] +blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () -pStyleM :: String -> WS XML.Element +pStyleM :: (PandocMonad m) => String -> WS m XML.Element pStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sParaStyleMap styleMaps @@ -775,61 +819,75 @@ pStyleM styleName = do rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyleM :: String -> WS XML.Element +rStyleM :: (PandocMonad m) => String -> WS m XML.Element rStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sCharStyleMap styleMaps return $ mknode "w:rStyle" [("w:val",sty')] () -getUniqueId :: MonadIO m => m String +getUniqueId :: (PandocMonad m) => WS m String -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel -getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique +getUniqueId = do + n <- gets stCurId + modify $ \st -> st{stCurId = n + 1} + return $ show n -- | Key for specifying user-defined docx styles. dynamicStyleKey :: String dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: WriterOptions -> Block -> WS [Element] +blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk -blockToOpenXML' :: WriterOptions -> Block -> WS [Element] +blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] -blockToOpenXML' opts (Div (ident,classes,kvs) bs) - | Just sty <- lookup dynamicStyleKey kvs = do - modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)} - withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) -blockToOpenXML' opts (Div (_,["references"],_) bs) = do - let (hs, bs') = span isHeaderBlock bs - header <- blocksToOpenXML opts hs - -- We put the Bibliography style on paragraphs after the header - rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' - return (header ++ rest) -blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs +blockToOpenXML' opts (Div (ident,classes,kvs) bs) = do + stylemod <- case lookup dynamicStyleKey kvs of + Just sty -> do + modify $ \s -> + s{stDynamicParaProps = Set.insert sty + (stDynamicParaProps s)} + return $ withParaPropM (pStyleM sty) + _ -> return id + dirmod <- case lookup "dir" kvs of + Just "rtl" -> return $ local (\env -> env { envRTL = True }) + Just "ltr" -> return $ local (\env -> env { envRTL = False }) + _ -> return id + let (hs, bs') = if "references" `elem` classes + then span isHeaderBlock bs + else ([], bs) + let bibmod = if "references" `elem` classes + then withParaPropM (pStyleM "Bibliography") + else id + header <- dirmod $ stylemod $ blocksToOpenXML opts hs + contents <- dirmod $ bibmod $ stylemod $ blocksToOpenXML opts bs' + if null ident + then return $ header ++ contents + else do + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return $ bookmarkStart : header ++ contents ++ [bookmarkEnd] blockToOpenXML' opts (Header lev (ident,_,_) lst) = do setFirstPara paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst - usedIdents <- gets stSectionIds - let bookmarkName = if null ident - then uniqueIdent lst usedIdents - else ident - modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } - id' <- getUniqueId - let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + if null ident + then return [mknode "w:p" [] (paraProps ++contents)] + else do + let bookmarkName = ident + modify $ \s -> s{ stSectionIds = Set.insert bookmarkName + $ stSectionIds s } + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () - let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return [mknode "w:p" [] (paraProps ++ + [bookmarkStart] ++ contents ++ [bookmarkEnd])] blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure @@ -838,31 +896,34 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do let prop = pCustomStyle $ if null alt then "Figure" - else "FigureWithCaption" + else "CaptionedFigure" paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode --- fixDisplayMath sometimes produces a Para [] as artifact -blockToOpenXML' _ (Para []) = return [] -blockToOpenXML' opts (Para lst) = do - isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - bodyTextStyle <- pStyleM "Body Text" - let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [bodyTextStyle]] - ps -> ps - modify $ \s -> s { stFirstPara = False } - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] +blockToOpenXML' opts (Para lst) + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] + | otherwise = do + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] + [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns -blockToOpenXML' _ (RawBlock format str) +blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | otherwise = do + report $ BlockNotRendered b + return [] blockToOpenXML' opts (BlockQuote blocks) = do p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara @@ -914,9 +975,9 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","TableNormal")] () : + ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : - mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : + mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] @@ -945,7 +1006,7 @@ blockToOpenXML' opts (DefinitionList items) = do setFirstPara return l -definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] +definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] definitionListItemToOpenXML opts (term,defs) = do term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) @@ -953,12 +1014,12 @@ definitionListItemToOpenXML opts (term,defs) = do $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' -addList :: ListMarker -> WS () +addList :: (PandocMonad m) => ListMarker -> WS m () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } -listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] +listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first @@ -968,121 +1029,162 @@ listItemToOpenXML opts numid (first:rest) = do alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" AlignDefault -> "left" -- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] +inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst -withNumId :: Int -> WS a -> WS a +withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a withNumId numid = local $ \env -> env{ envListNumId = numid } -asList :: WS a -> WS a +asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -getTextProps :: WS [Element] +getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties return $ if null props then [] else [mknode "w:rPr" [] props] -withTextProp :: Element -> WS a -> WS a +withTextProp :: PandocMonad m => Element -> WS m a -> WS m a withTextProp d p = local (\env -> env {envTextProperties = d : envTextProperties env}) p -withTextPropM :: WS Element -> WS a -> WS a +withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withTextPropM = (. flip withTextProp) . (>>=) -getParaProps :: Bool -> WS [Element] +getParaProps :: PandocMonad m => Bool -> WS m [Element] getParaProps displayMathPara = do props <- asks envParaProperties listLevel <- asks envListLevel numid <- asks envListNumId - let listPr = if listLevel >= 0 && not displayMathPara - then [ mknode "w:numPr" [] - [ mknode "w:numId" [("w:val",show numid)] () - , mknode "w:ilvl" [("w:val",show listLevel)] () ] - ] - else [] + let listPr = [mknode "w:numPr" [] + [ mknode "w:numId" [("w:val",show numid)] () + , mknode "w:ilvl" [("w:val",show listLevel)] () ] | listLevel >= 0 && not displayMathPara] return $ case props ++ listPr of [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: Element -> WS a -> WS a +withParaProp :: PandocMonad m => Element -> WS m a -> WS m a withParaProp d p = local (\env -> env {envParaProperties = d : envParaProperties env}) p -withParaPropM :: WS Element -> WS a -> WS a +withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) -formattedString :: String -> WS [Element] -formattedString str = do - props <- getTextProps +formattedString :: PandocMonad m => String -> WS m [Element] +formattedString str = + -- properly handle soft hyphens + case splitBy (=='\173') str of + [w] -> formattedString' w + ws -> do + sh <- formattedRun [mknode "w:softHyphen" [] ()] + intercalate sh <$> mapM formattedString' ws + +formattedString' :: PandocMonad m => String -> WS m [Element] +formattedString' str = do inDel <- asks envInDel - return [ mknode "w:r" [] $ - props ++ - [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] (stripInvalidChars str) ] ] + formattedRun [ mknode (if inDel then "w:delText" else "w:t") + [("xml:space","preserve")] (stripInvalidChars str) ] -setFirstPara :: WS () +formattedRun :: PandocMonad m => [Element] -> WS m [Element] +formattedRun els = do + props <- getTextProps + return [ mknode "w:r" [] $ props ++ els ] + +setFirstPara :: PandocMonad m => WS m () setFirstPara = modify $ \s -> s { stFirstPara = True } -- | Convert an inline element to OpenXML. -inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il -inlineToOpenXML' :: WriterOptions -> Inline -> WS [Element] -inlineToOpenXML' _ (Str str) = formattedString str +inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML' _ (Str str) = + formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") -inlineToOpenXML' opts (Span (ident,classes,kvs) ils) - | Just sty <- lookup dynamicStyleKey kvs = do - let kvs' = filter ((dynamicStyleKey, sty)/=) kvs - modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)} - withTextProp (rCustomStyle sty) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | "insertion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - insId <- gets stInsId - modify $ \s -> s{stInsId = (insId + 1)} - x <- inlinesToOpenXML opts ils - return [ mknode "w:ins" [("w:id", (show insId)), - ("w:author", author), - ("w:date", date)] - x ] - | "deletion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - delId <- gets stDelId - modify $ \s -> s{stDelId = (delId + 1)} - x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils) - return [ mknode "w:del" [("w:id", (show delId)), - ("w:author", author), - ("w:date", date)] - x ] - | otherwise = do - let off x = withTextProp (mknode x [("w:val","0")] ()) - ((if "csl-no-emph" `elem` classes then off "w:i" else id) . - (if "csl-no-strong" `elem` classes then off "w:b" else id) . - (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) - $ inlinesToOpenXML opts ils +inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do + -- prefer the "id" in kvs, since that is the one produced by the docx + -- reader. + let ident' = fromMaybe ident (lookup "id" kvs) + kvs' = filter (("id" /=) . fst) kvs + modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } + return [ mknode "w:commentRangeStart" [("w:id", ident')] () ] +inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = + -- prefer the "id" in kvs, since that is the one produced by the docx + -- reader. + let ident' = fromMaybe ident (lookup "id" kvs) + in + return [ mknode "w:commentRangeEnd" [("w:id", ident')] () + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] + ] +inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do + stylemod <- case lookup dynamicStyleKey kvs of + Just sty -> do + modify $ \s -> + s{stDynamicTextProps = Set.insert sty + (stDynamicTextProps s)} + return $ withTextPropM (rStyleM sty) + _ -> return id + let dirmod = case lookup "dir" kvs of + Just "rtl" -> local (\env -> env { envRTL = True }) + Just "ltr" -> local (\env -> env { envRTL = False }) + _ -> id + let off x = withTextProp (mknode x [("w:val","0")] ()) + let pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes + then off "w:smallCaps" + else id) + insmod <- if "insertion" `elem` classes + then do + defaultAuthor <- asks envChangesAuthor + defaultDate <- asks envChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + insId <- gets stInsId + modify $ \s -> s{stInsId = insId + 1} + return $ \f -> do + x <- f + return [ mknode "w:ins" + [("w:id", show insId), + ("w:author", author), + ("w:date", date)] x ] + else return id + delmod <- if "deletion" `elem` classes + then do + defaultAuthor <- asks envChangesAuthor + defaultDate <- asks envChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + delId <- gets stDelId + modify $ \s -> s{stDelId = delId + 1} + return $ \f -> local (\env->env{envInDel=True}) $ do + x <- f + return [mknode "w:del" + [("w:id", show delId), + ("w:author", author), + ("w:date", date)] x] + else return id + contents <- insmod $ delmod $ dirmod $ stylemod $ pmod + $ inlinesToOpenXML opts ils + if null ident + then return contents + else do + id' <- getUniqueId + let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () + return $ bookmarkStart : contents ++ [bookmarkEnd] inlineToOpenXML' opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML' opts (Emph lst) = @@ -1100,40 +1202,40 @@ inlineToOpenXML' opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML' _ LineBreak = return [br] -inlineToOpenXML' _ (RawInline f str) +inlineToOpenXML' _ il@(RawInline f str) | f == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | otherwise = do + report $ InlineNotRendered il + return [] inlineToOpenXML' opts (Quoted quoteType lst) = inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] where (open, close) = case quoteType of SingleQuote -> ("\x2018", "\x2019") DoubleQuote -> ("\x201C", "\x201D") inlineToOpenXML' opts (Math mathType str) = do - let displayType = if mathType == DisplayMath - then DisplayBlock - else DisplayInline - when (displayType == DisplayBlock) setFirstPara - case writeOMML displayType <$> readTeX str of - Right r -> return [r] - Left e -> do - warn $ "Cannot convert the following TeX math, skipping:\n" ++ str ++ - "\n" ++ e - inlinesToOpenXML opts (texMathToInlines mathType str) + when (mathType == DisplayMath) setFirstPara + res <- (lift . lift) (convertMath writeOMML mathType str) + case res of + Right r -> return [r] + Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) + mapM formattedString (lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rCustomStyle (show toktype) ] , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] withTextProp (rCustomStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted + $ if isNothing (writerHighlightStyle opts) + then unhighlighted + else case highlight (writerSyntaxMap opts) + formatOpenXML attrs str of + Right h -> return h + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + unhighlighted inlineToOpenXML' opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId @@ -1151,7 +1253,7 @@ inlineToOpenXML' opts (Note bs) = do , envTextProperties = [] }) (withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs) - let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents + let newnote = mknode "w:footnote" [("w:id", notenum)] contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle @@ -1173,81 +1275,109 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr alt (src, title)) = do - -- first, check to see if we've already done this image pageWidth <- asks envPrintWidth imgs <- gets stImages - case M.lookup src imgs of - Just (_,_,_,elt,_) -> return [elt] - Nothing -> do - res <- liftIO $ - fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - -- emit alt text - inlinesToOpenXML opts alt - Right (img, mt) -> do - ident <- ("rId"++) `fmap` getUniqueId - let (xpt,ypt) = desiredSizeInPoints opts attr - (either (const def) id (imageSize img)) - -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) - let cNvPicPr = mknode "pic:cNvPicPr" [] $ - mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () - let nvPicPr = mknode "pic:nvPicPr" [] - [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () - , cNvPicPr ] - let blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () - , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] - let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ] - let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ - mknode "a:avLst" [] () - let ln = mknode "a:ln" [("w","9525")] - [ mknode "a:noFill" [] () - , mknode "a:headEnd" [] () - , mknode "a:tailEnd" [] () ] - let spPr = mknode "pic:spPr" [("bwMode","auto")] - [xfrm, prstGeom, mknode "a:noFill" [] (), ln] - let graphic = mknode "a:graphic" [] $ - mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] - [ mknode "pic:pic" [] - [ nvPicPr - , blipFill - , spPr ] ] - let imgElt = mknode "w:r" [] $ - mknode "w:drawing" [] $ - mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () - , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] () - , graphic ] - let imgext = case mt >>= extensionFromMimeType of - Just x -> '.':x - Nothing -> case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Just Eps -> ".eps" - Nothing -> "" - if null imgext - then -- without an extension there is no rule for content type - inlinesToOpenXML opts alt -- return alt to avoid corrupted docx - else do - let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = mt <|> getMimeType imgpath - -- insert mime type to use in constructing [Content_Types].xml - modify $ \st -> st{ stImages = - M.insert src (ident, imgpath, mbMimeType, imgElt, img) - $ stImages st } - return [imgElt] + let + stImage = M.lookup src imgs + generateImgElt (ident, _, _, img) = + let + (xpt,ypt) = desiredSizeInPoints opts attr + (either (const def) id (imageSize opts img)) + -- 12700 emu = 1 pt + (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + (pageWidth * 12700) + cNvPicPr = mknode "pic:cNvPicPr" [] $ + mknode "a:picLocks" [("noChangeArrowheads","1") + ,("noChangeAspect","1")] () + nvPicPr = mknode "pic:nvPicPr" [] + [ mknode "pic:cNvPr" + [("descr",src),("id","0"),("name","Picture")] () + , cNvPicPr ] + blipFill = mknode "pic:blipFill" [] + [ mknode "a:blip" [("r:embed",ident)] () + , mknode "a:stretch" [] $ + mknode "a:fillRect" [] () + ] + xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x","0"),("y","0")] () + , mknode "a:ext" [("cx",show xemu) + ,("cy",show yemu)] () ] + prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + spPr = mknode "pic:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + graphic = mknode "a:graphic" [] $ + mknode "a:graphicData" + [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] + [ mknode "pic:pic" [] + [ nvPicPr + , blipFill + , spPr + ] + ] + imgElt = mknode "w:r" [] $ + mknode "w:drawing" [] $ + mknode "wp:inline" [] + [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + , mknode "wp:effectExtent" + [("b","0"),("l","0"),("r","0"),("t","0")] () + , mknode "wp:docPr" + [ ("descr", stringify alt) + , ("title", title) + , ("id","1") + , ("name","Picture") + ] () + , graphic + ] + in + imgElt + + case stImage of + Just imgData -> return $ [generateImgElt imgData] + Nothing -> ( do --try + (img, mt) <- P.fetchItem src + ident <- ("rId"++) `fmap` getUniqueId + + let + imgext = case mt >>= extensionFromMimeType of + Just x -> '.':x + Nothing -> case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Just Pdf -> ".pdf" + Just Eps -> ".eps" + Just Svg -> ".svg" + Just Emf -> ".emf" + Nothing -> "" + imgpath = "media/" ++ ident ++ imgext + mbMimeType = mt <|> getMimeType imgpath + + imgData = (ident, imgpath, mbMimeType, img) + + if null imgext + then -- without an extension there is no rule for content type + inlinesToOpenXML opts alt -- return alt to avoid corrupted docx + else do + -- insert mime type to use in constructing [Content_Types].xml + modify $ \st -> st { stImages = M.insert src imgData $ stImages st } + return [generateImgElt imgData] + ) + `catchError` ( \e -> do + report $ CouldNotFetchResource src (show e) + -- emit alt text + inlinesToOpenXML opts alt + ) br :: Element -br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +br = breakElement "textWrapping" + +breakElement :: String -> Element +breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ] -- Word will insert these footnotes into the settings.xml file -- (whether or not they're visible in the document). If they're in the @@ -1255,35 +1385,18 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -- problems. So we want to make sure we insert them into our document. defaultFootnotes :: [Element] defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] $ + [("w:type", "separator"), ("w:id", "-1")] [ mknode "w:p" [] $ [mknode "w:r" [] $ [ mknode "w:separator" [] ()]]] , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] $ + [("w:type", "continuationSeparator"), ("w:id", "0")] [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] -parseXml :: Archive -> Archive -> String -> IO Element -parseXml refArchive distArchive relpath = - case findEntryByPath relpath refArchive `mplus` - findEntryByPath relpath distArchive of - Nothing -> fail $ relpath ++ " missing in reference docx" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Just d -> return d - --- | Scales the image to fit the page --- sizes are passed in emu -fitToPage :: (Double, Double) -> Integer -> (Integer, Integer) -fitToPage (x, y) pageWidth - -- Fixes width to the page width and scales the height - | x > fromIntegral pageWidth = - (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) - | otherwise = (floor x, floor y) - -withDirection :: WS a -> WS a + +withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do isRTL <- asks envRTL paraProps <- asks envParaProperties @@ -1296,8 +1409,8 @@ withDirection x = do if isRTL -- if we are going right-to-left, we (re?)add the properties. then flip local x $ - \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps' - , envTextProperties = (mknode "w:rtl" [] ()) : textProps' + \env -> env { envParaProperties = mknode "w:bidi" [] () : paraProps' + , envTextProperties = mknode "w:rtl" [] () : textProps' } else flip local x $ \env -> env { envParaProperties = paraProps' , envTextProperties = textProps' diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 7459f1b42..dda21d23d 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> @@ -39,71 +39,64 @@ DokuWiki: <https://www.dokuwiki.org/dokuwiki> -} module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where +import Control.Monad (zipWithM) +import Control.Monad.Reader (ReaderT, ask, local, runReaderT) +import Control.Monad.State.Strict (StateT, evalStateT) +import Data.Default (Default (..)) +import Data.List (intercalate, intersect, isPrefixOf, transpose) +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions( - writerTableOfContents - , writerTemplate - , writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting - , camelCaseToHyphenated, trimr, normalize, substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intersect, intercalate, isPrefixOf, transpose ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) -import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Logging +import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) +import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, + removeFormatting, substitute, trimr) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared (defField, metaToJSON) data WriterState = WriterState { - stNotes :: Bool -- True if there are notes } data WriterEnvironment = WriterEnvironment { - stIndent :: String -- Indent after the marker at the beginning of list items - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + stIndent :: String -- Indent after the marker at the beginning of list items + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell) } instance Default WriterState where - def = WriterState { stNotes = False } + def = WriterState {} instance Default WriterEnvironment where def = WriterEnvironment { stIndent = "" , stUseTags = False , stBackSlashLB = False } -type DokuWiki = ReaderT WriterEnvironment (State WriterState) +type DokuWiki m = ReaderT WriterEnvironment (StateT WriterState m) -- | Convert Pandoc to DokuWiki. -writeDokuWiki :: WriterOptions -> Pandoc -> String +writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDokuWiki opts document = - runDokuWiki (pandocToDokuWiki opts $ normalize document) + runDokuWiki (pandocToDokuWiki opts document) -runDokuWiki :: DokuWiki a -> a -runDokuWiki = flip evalState def . flip runReaderT def +runDokuWiki :: PandocMonad m => DokuWiki m a -> m a +runDokuWiki = flip evalStateT def . flip runReaderT def -- | Return DokuWiki representation of document. -pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String +pandocToDokuWiki :: PandocMonad m + => WriterOptions -> Pandoc -> DokuWiki m Text pandocToDokuWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToDokuWiki opts) (inlineListToDokuWiki opts) meta body <- blockListToDokuWiki opts blocks - notesExist <- stNotes <$> get - let notes = if notesExist - then "" -- TODO Was "\n<references />" Check whether I can really remove this: - -- if it is definitely to do with footnotes, can remove this whole bit - else "" - let main = body ++ notes + let main = pack body let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata + $ defField "toc" (writerTableOfContents opts) metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape special characters for DokuWiki. escapeString :: String -> String @@ -112,9 +105,10 @@ escapeString = substitute "__" "%%__%%" . substitute "//" "%%//%%" -- | Convert Pandoc block element to DokuWiki. -blockToDokuWiki :: WriterOptions -- ^ Options +blockToDokuWiki :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> DokuWiki String + -> DokuWiki m String blockToDokuWiki _ Null = return "" @@ -149,12 +143,13 @@ blockToDokuWiki opts (Para inlines) = do blockToDokuWiki opts (LineBlock lns) = blockToDokuWiki opts $ linesToPara lns -blockToDokuWiki _ (RawBlock f str) +blockToDokuWiki _ b@(RawBlock f str) | f == Format "dokuwiki" = return str -- See https://www.dokuwiki.org/wiki:syntax -- use uppercase HTML tag for block-level content: | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" - | otherwise = return "" + | otherwise = "" <$ + report (BlockNotRendered b) blockToDokuWiki _ HorizontalRule = return "\n----\n" @@ -177,7 +172,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do "visualfoxpro", "winbatch", "xml", "xpp", "z80"] return $ "<code" ++ (case at of - [] -> ">\n" + [] -> ">\n" (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" blockToDokuWiki opts (BlockQuote blocks) = do @@ -198,7 +193,7 @@ blockToDokuWiki opts (Table capt aligns _ headers rows) = do rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows let widths = map (maximum . map length) $ transpose (headers':rows') let padTo (width, al) s = - case (width - length s) of + case width - length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault then s ++ replicate x ' ' @@ -278,21 +273,27 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet list item (list of blocks) to DokuWiki. -listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String +listItemToDokuWiki :: PandocMonad m + => WriterOptions -> [Block] -> DokuWiki m String listItemToDokuWiki opts items = do - contents <- blockListToDokuWiki opts items useTags <- stUseTags <$> ask if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + then do + contents <- blockListToDokuWiki opts items + return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" else do + bs <- mapM (blockToDokuWiki opts) items + let contents = case items of + [_, CodeBlock _ _] -> concat bs + _ -> vcat bs indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "* " ++ contents -- | Convert ordered list item (list of blocks) to DokuWiki. -- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki -orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String +orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m String orderedListItemToDokuWiki opts items = do contents <- blockListToDokuWiki opts items useTags <- stUseTags <$> ask @@ -301,24 +302,25 @@ orderedListItemToDokuWiki opts items = do else do indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "- " ++ contents -- | Convert definition list item (label, list of blocks) to DokuWiki. -definitionListItemToDokuWiki :: WriterOptions +definitionListItemToDokuWiki :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> DokuWiki String + -> DokuWiki m String definitionListItemToDokuWiki opts (label, items) = do labelText <- inlineListToDokuWiki opts label contents <- mapM (blockListToDokuWiki opts) items useTags <- stUseTags <$> ask if useTags then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ - (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) + intercalate "\n" (map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) else do indent <- stIndent <$> ask backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent + let indent' = if backSlash then drop 2 indent else indent return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. @@ -337,18 +339,19 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + CodeBlock _ _ -> True + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -368,14 +371,15 @@ backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs where f '\n' = "\\\\ " f c = [c] g (' ' : '\\':'\\': xs) = xs - g s = s + g s = s -- Auxiliary functions for tables: -tableItemToDokuWiki :: WriterOptions - -> Alignment - -> [Block] - -> DokuWiki String +tableItemToDokuWiki :: PandocMonad m + => WriterOptions + -> Alignment + -> [Block] + -> DokuWiki m String tableItemToDokuWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " @@ -388,22 +392,32 @@ tableItemToDokuWiki opts align' item = do return $ mkcell contents -- | Convert list of Pandoc block elements to DokuWiki. -blockListToDokuWiki :: WriterOptions -- ^ Options +blockListToDokuWiki :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> DokuWiki String + -> DokuWiki m String blockListToDokuWiki opts blocks = do backSlash <- stBackSlashLB <$> ask + let blocks' = consolidateRawBlocks blocks if backSlash - then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks - else vcat <$> mapM (blockToDokuWiki opts) blocks + then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks' + else vcat <$> mapM (blockToDokuWiki opts) blocks' + +consolidateRawBlocks :: [Block] -> [Block] +consolidateRawBlocks [] = [] +consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) + | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) +consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs -- | Convert list of Pandoc inline elements to DokuWiki. -inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String +inlineListToDokuWiki :: PandocMonad m + => WriterOptions -> [Inline] -> DokuWiki m String inlineListToDokuWiki opts lst = - concat <$> (mapM (inlineToDokuWiki opts) lst) + concat <$> mapM (inlineToDokuWiki opts) lst -- | Convert Pandoc inline element to DokuWiki. -inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String +inlineToDokuWiki :: PandocMonad m + => WriterOptions -> Inline -> DokuWiki m String inlineToDokuWiki opts (Span _attrs ils) = inlineListToDokuWiki opts ils @@ -460,12 +474,12 @@ inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim DisplayMath -> "$$" InlineMath -> "$" -inlineToDokuWiki _ (RawInline f str) +inlineToDokuWiki _ il@(RawInline f str) | f == Format "dokuwiki" = return str | f == Format "html" = return $ "<html>" ++ str ++ "</html>" - | otherwise = return "" + | otherwise = "" <$ report (InlineNotRendered il) -inlineToDokuWiki _ (LineBreak) = return "\\\\\n" +inlineToDokuWiki _ LineBreak = return "\\\\\n" inlineToDokuWiki opts SoftBreak = case writerWrapText opts of @@ -498,7 +512,6 @@ inlineToDokuWiki opts (Image attr alt (source, tit)) = do inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents - modify (\s -> s { stNotes = True }) return $ "((" ++ contents' ++ "))" -- note - may not work for notes with multiple blocks @@ -507,7 +520,7 @@ imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = "?" ++ w go (Just w) (Just h) = "?" ++ w ++ "x" ++ h go Nothing (Just h) = "?0x" ++ h diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 00bf4a81c..7b4853a24 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,47 +31,47 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} -module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) -import qualified Data.Map as M -import qualified Data.Set as Set -import Data.Maybe ( fromMaybe, catMaybes ) -import Data.List ( isPrefixOf, isInfixOf, intercalate ) -import System.Environment ( getEnv ) -import Text.Printf (printf) -import System.FilePath ( takeExtension, takeFileName ) -import System.FilePath.Glob ( namesMatching ) -import Network.HTTP ( urlEncode ) +module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where +import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, + fromArchive, fromEntry, toEntry) +import Control.Monad (mplus, unless, when, zipWithM) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, + gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Data.Time.Clock.POSIX ( getPOSIXTime ) -import Text.Pandoc.Compat.Time -import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim - , normalizeDate, readDataFile, stringify, warn - , hierarchicalize, fetchItem' ) -import qualified Text.Pandoc.Shared as S (Element(..)) +import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.List (intercalate, isInfixOf, isPrefixOf) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) +import qualified Data.Set as Set +import qualified Data.Text as TS +import qualified Data.Text.Lazy as TL +import Network.HTTP (urlEncode) +import System.FilePath (takeExtension, takeFileName, makeRelative) +import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options ( WriterOptions(..) - , WrapOption(..) - , HTMLMathMethod(..) - , EPUBVersion(..) - , ObfuscationMethod(NoObfuscation) ) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk, walkM, query) -import Control.Monad.State (modify, get, State, put, evalState) -import Control.Monad (mplus, liftM, when) -import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs - , strContent, lookupAttr, Node(..), QName(..), parseXML - , onlyElems, node, ppElement) -import Text.Pandoc.UUID (getRandomUUID) -import Text.Pandoc.Writers.HTML ( writeHtml ) -import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import qualified Control.Exception as E -import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) +import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), + ObfuscationMethod (NoObfuscation), WrapOption (..), + WriterOptions (..)) +import Text.Pandoc.Shared (hierarchicalize, normalizeDate, renderTags', + safeRead, stringify, trim, uniqueIdent) +import qualified Text.Pandoc.Shared as S (Element (..)) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.UUID (getUUID) +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) +import Text.Printf (printf) +import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), + add_attrs, lookupAttr, node, onlyElems, parseXML, + ppElement, showElement, strContent, unode, unqual) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -76,51 +79,55 @@ import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBState = EPUBState { + stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + , stEpubSubdir :: String + } + +type E m = StateT EPUBState m + data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheet :: Maybe Stylesheet - , epubPageDirection :: Maybe ProgressionDirection + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheets :: [FilePath] + , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] } deriving Show -data Stylesheet = StylesheetPath FilePath - | StylesheetContents String - deriving Show - data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: String + , dateEvent :: Maybe String } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: String + , identifierScheme :: Maybe String } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String } deriving Show data ProgressionDirection = LTR | RTL deriving Show @@ -143,15 +150,29 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry +mkEntry path content = do + epubSubdir <- gets stEpubSubdir + let addEpubSubdir :: Entry -> Entry + addEpubSubdir e = e{ eRelativePath = + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ eRelativePath e } + epochtime <- floor <$> lift P.getPOSIXTime + return $ + (if path == "mimetype" || "META-INF" `isPrefixOf` path + then id + else addEpubSubdir) $ toEntry path epochtime content + +getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) then do - randomId <- fmap show getRandomUUID + randomId <- (show . getUUID) <$> lift P.newStdGen return $ m{ epubIdentifier = [Identifier randomId Nothing] } else return m let addLanguage m = @@ -159,16 +180,19 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- E.catch (liftM - (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: E.SomeException) in return "en-US") + mLang <- lift $ P.lookupEnv "LANG" + let localeLang = + case mLang of + Just lang -> + map (\c -> if c == '_' then '-' else c) $ + takeWhile (/='.') lang + Nothing -> "en-US" return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) then do - currentTime <- getCurrentTime + currentTime <- lift P.getCurrentTime return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -225,12 +249,16 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s +metaValueToString (MetaString s) = s metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs -metaValueToString (MetaBool True) = "true" -metaValueToString (MetaBool False) = "false" -metaValueToString _ = "" +metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaBool True) = "true" +metaValueToString (MetaBool False) = "false" +metaValueToString _ = "" + +metaValueToPaths:: MetaValue -> [FilePath] +metaValueToPaths (MetaList xs) = map metaValueToString xs +metaValueToPaths x = [metaValueToString x] getList :: String -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = @@ -267,19 +295,18 @@ getCreator s meta = getList s meta handleMetaValue getDate :: String -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = - Date{ dateText = maybe "" id $ + Date{ dateText = fromMaybe "" $ M.lookup "text" m >>= normalizeDate' . metaValueToString , dateEvent = metaValueToString <$> M.lookup "event" m } - handleMetaValue mv = Date { dateText = maybe "" - id $ normalizeDate' $ metaValueToString mv + handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } simpleList :: String -> Meta -> [String] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] + Just x -> [metaValueToString x] + Nothing -> [] metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ @@ -299,8 +326,9 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubCoverImage = coverImage - , epubStylesheet = stylesheet + , epubStylesheets = stylesheets , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -320,70 +348,125 @@ metadataFromMeta opts meta = EPUBMetadata{ rights = metaValueToString <$> lookupMeta "rights" meta coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) - stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` - ((StylesheetPath . metaValueToString) <$> - lookupMeta "stylesheet" meta) + stylesheets = fromMaybe [] + (metaValueToPaths <$> lookupMeta "stylesheet" meta) ++ + [f | ("css",f) <- writerVariables opts] pageDirection = case map toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing + ibooksFields = case lookupMeta "ibooks" meta of + Just (MetaMap mp) + -> M.toList $ M.map metaValueToString mp + _ -> [] + +-- | Produce an EPUB2 file from a Pandoc document. +writeEPUB2 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB2 = writeEPUB EPUB2 + +-- | Produce an EPUB3 file from a Pandoc document. +writeEPUB3 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB3 = writeEPUB EPUB3 -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: WriterOptions -- ^ Writer options +writeEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeEPUB opts doc@(Pandoc meta _) = do - let version = fromMaybe EPUB2 (writerEpubVersion opts) + -> m B.ByteString +writeEPUB epubVersion opts doc = do + let epubSubdir = writerEpubSubdirectory opts + -- sanity check on epubSubdir + unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + throwError $ PandocEpubSubdirectoryError epubSubdir + let initState = EPUBState { stMediaPaths = [], stEpubSubdir = epubSubdir } + evalStateT (pandocToEPUB epubVersion opts doc) initState + +pandocToEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions + -> Pandoc + -> E m B.ByteString +pandocToEPUB version opts doc@(Pandoc meta _) = do + epubSubdir <- gets stEpubSubdir let epub3 = version == EPUB3 - epochtime <- floor `fmap` getPOSIXTime - let mkEntry path content = toEntry path epochtime content + let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . + writeHtmlStringForEPUB version o + metadata <- getEPUBMetadata opts meta + + -- stylesheet + stylesheets <- case epubStylesheets metadata of + [] -> (\x -> [B.fromChunks [x]]) <$> + P.readDataFile "epub.css" + fs -> mapM P.readFileLazy fs + stylesheetEntries <- zipWithM + (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) + stylesheets [(1 :: Int)..] + let vars = ("epub3", if epub3 then "true" else "false") - : ("css", "stylesheet.css") - : writerVariables opts + : [(x,y) | (x,y) <- writerVariables opts, x /= "css"] + + let cssvars useprefix = map (\e -> ("css", + (if useprefix + then "../" + else "") + ++ makeRelative epubSubdir (eRelativePath e))) + stylesheetEntries + let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True - , writerHtml5 = epub3 , writerVariables = vars , writerHTMLMathMethod = if epub3 - then MathML Nothing + then MathML else writerHTMLMathMethod opts , writerWrapText = WrapAuto } - metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml - opts'{ writerVariables = ("coverpage","true"):vars } - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) - imgContent <- B.readFile img - return ( [mkEntry "cover.xhtml" cpContent] - , [mkEntry coverImage imgContent] ) + let coverImage = takeFileName img + cpContent <- lift $ writeHtml + opts'{ writerVariables = + ("coverpage","true"): + cssvars True ++ vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + imgContent <- lift $ P.readFileLazy img + coverEntry <- mkEntry "text/cover.xhtml" cpContent + coverImageEntry <- mkEntry ("media/" ++ coverImage) + imgContent + return ( [ coverEntry ] + , [ coverImageEntry ] ) -- title page - let tpContent = renderHtml $ writeHtml opts'{ - writerVariables = ("titlepage","true"):vars } - (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent + tpContent <- lift $ writeHtml opts'{ + writerVariables = ("titlepage","true"): + cssvars True ++ vars } + (Pandoc meta []) + tpEntry <- mkEntry "text/title_page.xhtml" tpContent -- handle pictures - mediaRef <- newIORef [] - Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= - walkM (transformBlock opts' mediaRef) - picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef - + -- mediaRef <- P.newIORef [] + Pandoc _ blocks <- walkM (transformInline opts') doc >>= + walkM transformBlock + picEntries <- (mapMaybe (snd . snd)) <$> gets stMediaPaths -- handle fonts let matchingGlob f = do - xs <- namesMatching f + xs <- lift $ P.glob f when (null xs) $ - warn $ f ++ " did not match any font files." + report $ CouldNotFetchResource f "glob did not match any font files" return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< + lift (P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -420,7 +503,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mbnum <- if "unnumbered" `elem` classes then return Nothing else case splitAt (n - 1) nums of - (ks, (m:_)) -> do + (ks, m:_) -> do let nums' = ks ++ [m+1] put nums' return $ Just (ks ++ [m]) @@ -467,77 +550,93 @@ writeEPUB opts doc@(Pandoc meta _) = do Chapter mbnum $ walk fixInternalReferences bs) chapters' - let chapToEntry :: Int -> Chapter -> Entry - chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) - $ renderHtml - $ writeHtml opts'{ writerNumberOffset = - fromMaybe [] mbnum } - $ case bs of - (Header _ _ xs : _) -> - -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> - Pandoc nullMeta bs - - let chapterEntries = zipWith chapToEntry [1..] chapters + let chapToEntry num (Chapter mbnum bs) = + mkEntry ("text/" ++ showChapter num) =<< + writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum + , writerVariables = cssvars True ++ vars } + (case bs of + (Header _ _ xs : _) -> + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs + _ -> Pandoc nullMeta bs) + + chapterEntries <- zipWithM chapToEntry [1..] chapters -- incredibly inefficient (TODO): let containsMathML ent = epub3 && - "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + "<math" `isInfixOf` + B8.unpack (fromEntry ent) let containsSVG ent = epub3 && - "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + "<svg" `isInfixOf` + B8.unpack (fromEntry ent) let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf let chapterNode ent = unode "item" ! - ([("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), + ([("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of - [] -> [] - xs -> [("properties", unwords xs)]) + [] -> [] + xs -> [("properties", unwords xs)]) $ () + let chapterRefNode ent = unode "itemref" ! - [("idref", toId $ eRelativePath ent)] $ () + [("idref", toId $ makeRelative epubSubdir + $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "application/octet-stream" + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", fromMaybe "" $ + getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle' meta of [] -> case epubTitle metadata of - [] -> "UNTITLED" + [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x let tocTitle = fromMaybe plainTitle $ metaValueToString <$> lookupMeta "toc-title" meta - let uuid = case epubIdentifier metadata of - (x:_) -> identifierText x -- use first identifier as UUID - [] -> error "epubIdentifier is null" -- shouldn't happen - currentTime <- getCurrentTime + uuid <- case epubIdentifier metadata of + (x:_) -> return $ identifierText x -- use first identifier as UUID + [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen + currentTime <- lift P.getCurrentTime let contentsData = UTF8.fromStringLazy $ ppTopElement $ - unode "package" ! [("version", case version of - EPUB2 -> "2.0" - EPUB3 -> "3.0") - ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ + unode "package" ! + ([("version", case version of + EPUB2 -> "2.0" + EPUB3 -> "3.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","epub-id-1") + ] ++ + [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $ [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () - , unode "item" ! [("id","style"), ("href","stylesheet.css") - ,("media-type","text/css")] $ () , unode "item" ! ([("id","nav") ,("href","nav.xhtml") ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ + [ unode "item" ! [("id","style"), ("href",fp) + ,("media-type","text/css")] $ () | + fp <- map + (makeRelative epubSubdir . eRelativePath) + stylesheetEntries ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ (case cpicEntry of [] -> [] @@ -546,7 +645,8 @@ writeEPUB opts doc@(Pandoc meta _) = do (pictureNode x)]) ++ map pictureNode picEntries ++ map fontNode fontEntries - , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $ + , unode "spine" ! ( + ("toc","ncx") : progressionDirection) $ case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! @@ -565,48 +665,54 @@ writeEPUB opts doc@(Pandoc meta _) = do ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing + [("type","cover") + ,("title","Cover") + ,("href","text/cover.xhtml")] $ () + | isJust (epubCoverImage metadata) ] ] - let contentsEntry = mkEntry "content.opf" contentsData + contentsEntry <- mkEntry "content.opf" contentsData -- toc.ncx let secs = hierarchicalize blocks' let tocLevel = writerTOCDepth opts - let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> S.Element -> State Int Element + let navPointNode :: PandocMonad m + => (Int -> [Inline] -> String -> [Element] -> Element) + -> S.Element -> StateT Int m Element navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) let showNums :: [Int] -> String showNums = intercalate "." . map show - let tit' = stringify ils let tit = if writerNumberSections opts && not (null nums) - then showNums nums ++ " " ++ tit' - else tit' - let src = case lookup ident reftable of - Just x -> x - Nothing -> error (ident ++ " not found in reftable") + then Span ("", ["section-header-number"], []) + [Str (showNums nums)] : Space : ils + else ils + src <- case lookup ident reftable of + Just x -> return x + Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable" let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel - isSec _ = False + isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk" - let navMapFormatter :: Int -> String -> String -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" ++ src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] + , unode "content" ! [("src", "text/title_page.xhtml")] + $ () ] + navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ @@ -623,34 +729,48 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> [] Just img -> [unode "meta" ! [("name","cover"), ("content", toId img)] $ ()] - , unode "docTitle" $ unode "text" $ plainTitle + , unode "docTitle" $ unode "text" plainTitle , unode "navMap" $ - tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 + tpNode : navMap ] - let tocEntry = mkEntry "toc.ncx" tocData + tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] - $ tit) + (unode "a" ! + [("href", "text/" ++ src)] + $ titElements) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] + where titElements = parseXML titRendered + titRendered = case P.runPure + (writeHtmlStringForEPUB version + opts{ writerTemplate = Nothing } + (Pandoc nullMeta + [Plain $ walk delink tit])) of + Left _ -> TS.pack $ stringify tit + Right x -> x + -- can't have a element inside a... + delink (Link _ ils _) = Span ("", [], []) ils + delink x = x let navtag = if epub3 then "nav" else "div" - let navBlocks = [RawBlock (Format "html") $ ppElement $ + tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1 + let navBlocks = [RawBlock (Format "html") + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarks = if epub3 then [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("hidden","hidden")] $ [ unode "ol" $ [ unode "li" - [ unode "a" ! [("href", "cover.xhtml") + [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ "Cover"] | epubCoverImage metadata /= Nothing @@ -664,52 +784,50 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ] else [] - let navData = renderHtml $ writeHtml - opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"): + cssvars False ++ vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) - let navEntry = mkEntry "nav.xhtml" navData + navEntry <- mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" + mimetypeEntry <- mkEntry "mimetype" $ + UTF8.fromStringLazy "application/epub+zip" -- container.xml let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") + unode "rootfile" ! [("full-path", + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ "content.opf") ,("media-type","application/oebps-package+xml")] $ () - let containerEntry = mkEntry "META-INF/container.xml" containerData + containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" - let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple - - -- stylesheet - stylesheet <- case epubStylesheet metadata of - Just (StylesheetPath fp) -> UTF8.readFile fp - Just (StylesheetContents s) -> return s - Nothing -> UTF8.toString `fmap` - readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet + appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : - contentsEntry : tocEntry : navEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) + let archive = foldr addEntryToArchive emptyArchive $ + [mimetypeEntry, containerEntry, appleEntry, + contentsEntry, tocEntry, navEntry, tpEntry] ++ + stylesheetEntries ++ picEntries ++ cpicEntry ++ + cpgEntry ++ chapterEntries ++ fontEntries return $ fromArchive archive metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element metadataElement version md currentTime = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes + ++ languageNodes ++ ibooksNodes ++ creatorNodes ++ contributorNodes ++ subjectNodes ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes @@ -728,6 +846,8 @@ metadataElement version md currentTime = [] -> [] (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] + ibooksNodes = map ibooksNode (epubIbooksFields md) + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -747,7 +867,7 @@ metadataElement version md currentTime = ("content",toId img)] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3 ] + showDateTimeISO8601 currentTime | version == EPUB3 ] dcTag n s = unode ("dc:" ++ n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) @@ -791,99 +911,92 @@ metadataElement version md currentTime = (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix "ISBN-10" = "02" - schemeToOnix "GTIN-13" = "03" - schemeToOnix "UPC" = "04" - schemeToOnix "ISMN-10" = "05" - schemeToOnix "DOI" = "06" - schemeToOnix "LCCN" = "13" - schemeToOnix "GTIN-14" = "14" - schemeToOnix "ISBN-13" = "15" + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" schemeToOnix "Legal deposit number" = "17" - schemeToOnix "URN" = "22" - schemeToOnix "OCLC" = "23" - schemeToOnix "ISMN-13" = "25" - schemeToOnix "ISBN-A" = "26" - schemeToOnix "JP" = "27" - schemeToOnix "OLCC" = "28" - schemeToOnix _ = "01" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Tag String - -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag :: PandocMonad m + => Tag String + -> E m (Tag String) +transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && - lookup "data-external" attr == Nothing = do + isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef opts mediaRef src - newposter <- modifyMediaRef opts mediaRef poster + newsrc <- modifyMediaRef src + newposter <- modifyMediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", newsrc) | not (null newsrc)] ++ - [("poster", newposter) | not (null newposter)] + [("src", "../" ++ newsrc) | not (null newsrc)] ++ + [("poster", "../" ++ newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag - -modifyMediaRef :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] - -> FilePath - -> IO FilePath -modifyMediaRef _ _ "" = return "" -modifyMediaRef opts mediaRef oldsrc = do - media <- readIORef mediaRef +transformTag tag = return tag + +modifyMediaRef :: PandocMonad m + => FilePath + -> E m FilePath +modifyMediaRef "" = return "" +modifyMediaRef oldsrc = do + media <- gets stMediaPaths case lookup oldsrc media of Just (n,_) -> return n - Nothing -> do - res <- fetchItem' (writerMediaBag opts) - (writerSourceURL opts) oldsrc - (new, mbEntry) <- - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return (oldsrc, Nothing) - Right (img,mbMime) -> do - let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) - epochtime <- floor `fmap` getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img - return (new, Just entry) - modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) - return new - -transformBlock :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Block - -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) + Nothing -> catchError + (do (img, mbMime) <- P.fetchItem oldsrc + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + entry <- mkEntry new (B.fromChunks . (:[]) $ img) + modify $ \st -> st{ stMediaPaths = + (oldsrc, (new, Just entry)):media} + return new) + (\e -> do + report $ CouldNotFetchResource oldsrc (show e) + return oldsrc) + +transformBlock :: PandocMonad m + => Block + -> E m Block +transformBlock (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') -transformBlock _ _ b = return b +transformBlock b = return b -transformInline :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media +transformInline :: PandocMonad m + => WriterOptions -> Inline - -> IO Inline -transformInline opts mediaRef (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef opts mediaRef src - return $ Image attr lab (newsrc, tit) -transformInline opts mediaRef (x@(Math t m)) + -> E m Inline +transformInline _opts (Image attr lab (src,tit)) = do + newsrc <- modifyMediaRef src + return $ Image attr lab ("../" ++ newsrc, tit) +transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] -transformInline opts mediaRef (RawInline fmt raw) + return $ Span ("",["math",mathclass],[]) + [Image nullAttr [x] ("../" ++ newsrc, "")] +transformInline _opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -transformInline _ _ x = return x +transformInline _ x = return x (!) :: (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) @@ -898,8 +1011,8 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . let (ds,ys) = break (==';') xs rest = drop 1 ys in case safeRead ('\'':'\\':ds ++ "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs + Just x -> x : unEntity rest + Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs mediaTypeOf :: FilePath -> Maybe MimeType @@ -907,7 +1020,7 @@ mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y - _ -> Nothing + _ -> Nothing -- Returns filename for chapter number. showChapter :: Int -> String diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 5538ca061..e322c7d98 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,8 +1,8 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (c) 2011-2012, Sergey Astanin -All rights reserved. +Copyright (c) 2011-2012 Sergey Astanin + 2012-2018 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,17 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. +{- | +Module : Text.Pandoc.Writers.FB2 +Copyright : Copyright (C) 2011-2012 Sergey Astanin + 2012-2018 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane +Stability : alpha +Portability : portable + +Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. FictionBook is an XML-based e-book format. For more information see: <http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> @@ -27,44 +37,42 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.State (StateT, evalStateT, get, modify) -import Control.Monad.State (liftM, liftM2, liftIO) +import Control.Monad (zipWithM) +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict (StateT, evalStateT, get, lift, liftM, modify) import Data.ByteString.Base64 (encode) -import Data.Char (toLower, isSpace, isAscii, isControl) -import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix) +import qualified Data.ByteString.Char8 as B8 +import Data.Char (isAscii, isControl, isSpace, toLower) import Data.Either (lefts, rights) -import Network.Browser (browse, request, setAllowRedirects, setOutHandler) -import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) -import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) -import Network.URI (isURI, unEscapeString) -import System.FilePath (takeExtension) +import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix) +import Data.Text (Text, pack) +import Network.HTTP (urlEncode) import Text.XML.Light -import qualified Control.Exception as E -import qualified Data.ByteString as B import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize, - linesToPara) +import Text.Pandoc.Logging +import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) +import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, orderedListMarkers) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. data FbRenderState = FbRenderState - { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text - , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path - , parentListMarker :: String -- ^ list marker of the parent ordered list - , parentBulletLevel :: Int -- ^ nesting level of the unordered list - , writerOptions :: WriterOptions + { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path + , parentListMarker :: String -- ^ list marker of the parent ordered list + , writerOptions :: WriterOptions } deriving (Show) -- | FictionBook building monad. -type FBM = StateT FbRenderState IO +type FBM m = StateT FbRenderState m newFB :: FbRenderState newFB = FbRenderState { footnotes = [], imagesToFetch = [] - , parentListMarker = "", parentBulletLevel = 0 + , parentListMarker = "" , writerOptions = def } data ImageMode = NormalImage | InlineImage deriving (Eq) @@ -73,20 +81,27 @@ instance Show ImageMode where show InlineImage = "inlineImageType" -- | Produce an FB2 document from a 'Pandoc' document. -writeFB2 :: WriterOptions -- ^ conversion options +writeFB2 :: PandocMonad m + => WriterOptions -- ^ conversion options -> Pandoc -- ^ document to convert - -> IO String -- ^ FictionBook2 document (not encoded yet) -writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + -> m Text -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc + +pandocToFB2 :: PandocMonad m + => WriterOptions + -> Pandoc + -> FBM m Text +pandocToFB2 opts (Pandoc meta blocks) = do modify (\s -> s { writerOptions = opts }) desc <- description meta - fp <- frontpage meta + title <- cMapM toXml . docTitle $ meta secs <- renderSections 1 blocks - let body = el "body" $ fp ++ secs + let body = el "body" $ el "title" (el "p" title) : secs notes <- renderFootnotes - (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + (imgs,missing) <- fmap imagesToFetch get >>= \s -> lift (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) ++ "\n" + return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = @@ -94,67 +109,77 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do xlink = "http://www.w3.org/1999/xlink" in [ uattr "xmlns" xmlns , attr ("xmlns", "l") xlink ] - -- - frontpage :: Meta -> FBM [Content] - frontpage meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ - [ el "title" (el "p" t) - , el "annotation" (map (el "p" . cMap plain) - (docAuthors meta' ++ [docDate meta'])) - ] - description :: Meta -> FBM Content - description meta' = do - bt <- booktitle meta' - let as = authors meta' - dd <- docdate meta' - return $ el "description" - [ el "title-info" (bt ++ as ++ dd) - , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version - ] - booktitle :: Meta -> FBM [Content] - booktitle meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ if null t - then [] - else [ el "book-title" t ] - authors :: Meta -> [Content] - authors meta' = cMap author (docAuthors meta') - author :: [Inline] -> [Content] - author ss = - let ws = words . cMap plain $ ss - email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws - names = case ws' of - (nickname:[]) -> [ el "nickname" nickname ] - (fname:lname:[]) -> [ el "first-name" fname - , el "last-name" lname ] - (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) - , el "last-name" (last rest) ] - ([]) -> [] - in list $ el "author" (names ++ email) - docdate :: Meta -> FBM [Content] - docdate meta' = do - let ss = docDate meta' - d <- cMapM toXml ss - return $ if null d - then [] - else [el "date" d] + +description :: PandocMonad m => Meta -> FBM m Content +description meta' = do + let genre = el "genre" "unrecognised" + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + let lang = case lookupMeta "lang" meta' of + Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] + Just (MetaString s) -> [el "lang" $ iso639 s] + _ -> [] + where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + let coverimage url = do + let img = Image nullAttr mempty (url, "") + im <- insertImage InlineImage img + return [el "coverpage" im] + coverpage <- case lookupMeta "cover-image" meta' of + Just (MetaInlines [Str s]) -> coverimage s + Just (MetaString s) -> coverimage s + _ -> return [] + return $ el "description" + [ el "title-info" (genre : (bt ++ as ++ dd ++ lang)) + , el "document-info" (el "program-used" "pandoc" : coverpage) + ] + +booktitle :: PandocMonad m => Meta -> FBM m [Content] +booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + +authors :: Meta -> [Content] +authors meta' = cMap author (docAuthors meta') + +author :: [Inline] -> [Content] +author ss = + let ws = words . cMap plain $ ss + email = el "email" <$> take 1 (filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + [nickname] -> [ el "nickname" nickname ] + [fname, lname] -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + [] -> [] + in list $ el "author" (names ++ email) + +docdate :: PandocMonad m => Meta -> FBM m [Content] +docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] -- | Divide the stream of blocks into sections and convert to XML -- representation. -renderSections :: Int -> [Block] -> FBM [Content] +renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content] renderSections level blocks = do let secs = splitSections level blocks mapM (renderSection level) secs -renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content renderSection level (ttl, body) = do title <- if null ttl then return [] else return . list . el "title" . formatTitle $ ttl - content <- if (hasSubsections body) + content <- if hasSubsections body then renderSections (level + 1) body else cMapM blockToXml body return $ el "section" (title ++ content) @@ -175,7 +200,7 @@ split cond xs = let (b,a) = break cond xs isLineBreak :: Inline -> Bool isLineBreak LineBreak = True -isLineBreak _ = False +isLineBreak _ = False -- | Divide the stream of block elements into sections: [(title, blocks)]. splitSections :: Int -> [Block] -> [([Inline], [Block])] @@ -186,17 +211,17 @@ splitSections level blocks = reverse $ revSplit (reverse blocks) let (lastsec, before) = break sameLevel rblocks (header, prevblocks) = case before of - ((Header n _ title):prevblocks') -> + (Header n _ title:prevblocks') -> if n == level then (title, prevblocks') else ([], before) _ -> ([], before) in (header, reverse lastsec) : revSplit prevblocks sameLevel (Header n _ _) = n == level - sameLevel _ = False + sameLevel _ = False -- | Make another FictionBook body with footnotes. -renderFootnotes :: FBM [Content] +renderFootnotes :: PandocMonad m => FBM m [Content] renderFootnotes = do fns <- footnotes `liftM` get if null fns @@ -205,19 +230,19 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = (el "title" (el "p" (show n))) : cs + let fn_texts = el "title" (el "p" (show n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. -- Return image data and a list of hrefs of the missing images. -fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) fetchImages links = do imgs <- mapM (uncurry fetchImage) links - return $ (rights imgs, lefts imgs) + return (rights imgs, lefts imgs) -- | Fetch image data from disk or from network and make a <binary> XML section. -- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: String -> String -> IO (Either String Content) +fetchImage :: PandocMonad m => String -> String -> m (Either String Content) fetchImage href link = do mbimg <- case (isURI link, readDataURI link) of @@ -227,28 +252,25 @@ fetchImage href link = do then return (Just (mime',base64)) else return Nothing (True, Just _) -> return Nothing -- not base64-encoded - (True, Nothing) -> fetchURL link - (False, _) -> do - d <- nothingOnError $ B.readFile (unEscapeString link) - let t = case map toLower (takeExtension link) of - ".png" -> Just "image/png" - ".jpg" -> Just "image/jpeg" - ".jpeg" -> Just "image/jpeg" - ".jpe" -> Just "image/jpeg" - _ -> Nothing -- only PNG and JPEG are supported in FB2 - return $ liftM2 (,) t (liftM (toStr . encode) d) + _ -> + catchError (do (bs, mbmime) <- P.fetchItem link + case mbmime of + Nothing -> do + report $ CouldNotDetermineMimeType link + return Nothing + Just mime -> return $ Just (mime, + B8.unpack $ encode bs)) + (\e -> + do report $ CouldNotFetchResource link (show e) + return Nothing) case mbimg of - Just (imgtype, imgdata) -> do + Just (imgtype, imgdata) -> return . Right $ el "binary" ( [uattr "id" href , uattr "content-type" imgtype] , txt imgdata ) _ -> return (Left ('#':href)) - where - nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) - nothingOnError action = liftM Just action `E.catch` omnihandler - omnihandler :: E.SomeException -> IO (Maybe B.ByteString) - omnihandler _ = return Nothing + -- | Extract mime type and encoded data from the Data URI. readDataURI :: String -- ^ URI @@ -276,8 +298,8 @@ isMimeType :: String -> Bool isMimeType s = case split (=='/') s of [mtype,msubtype] -> - ((map toLower mtype) `elem` types - || "x-" `isPrefixOf` (map toLower mtype)) + (map toLower mtype `elem` types + || "x-" `isPrefixOf` map toLower mtype) && all valid mtype && all valid msubtype _ -> False @@ -286,85 +308,63 @@ isMimeType s = valid c = isAscii c && not (isControl c) && not (isSpace c) && c `notElem` "()<>@,;:\\\"/[]?=" --- | Fetch URL, return its Content-Type and binary data on success. -fetchURL :: String -> IO (Maybe (String, String)) -fetchURL url = do - flip catchIO_ (return Nothing) $ do - r <- browse $ do - setOutHandler (const (return ())) - setAllowRedirects True - liftM snd . request . getRequest $ url - let content_type = lookupHeader HdrContentType (getHeaders r) - content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r - return $ liftM2 (,) content_type content - -toBS :: String -> B.ByteString -toBS = B.pack . map (toEnum . fromEnum) - -toStr :: B.ByteString -> String -toStr = map (toEnum . fromEnum) . B.unpack - footnoteID :: Int -> String -footnoteID i = "n" ++ (show i) +footnoteID i = "n" ++ show i linkID :: Int -> String -linkID i = "l" ++ (show i) +linkID i = "l" ++ show i -- | Convert a block-level Pandoc's element to FictionBook XML representation. -blockToXml :: Block -> FBM [Content] +blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) = insertImage NormalImage (Image atr alt (src,tit)) -blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss +blockToXml (Para ss) = (list . el "p") <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s -blockToXml (RawBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code") . lines $ s +blockToXml b@(RawBlock _ _) = do + report $ BlockNotRendered b + return [] blockToXml (Div _ bs) = cMapM blockToXml bs -blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs -blockToXml (LineBlock lns) = blockToXml $ linesToPara lns +blockToXml (BlockQuote bs) = (list . el "cite") <$> cMapM blockToXml bs +blockToXml (LineBlock lns) = + (list . el "poem") <$> mapM stanza (split null lns) + where + v xs = el "v" <$> cMapM toXml xs + stanza xs = el "stanza" <$> mapM v xs blockToXml (OrderedList a bss) = do state <- get let pmrk = parentListMarker state - let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a + let markers = (pmrk ++) <$> orderedListMarkers a let mkitem mrk bs = do - modify (\s -> s { parentListMarker = mrk }) - itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentListMarker = mrk ++ " "}) + item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs modify (\s -> s { parentListMarker = pmrk }) -- old parent marker - return . el "p" $ [ txt mrk, txt " " ] ++ itemtext - mapM (uncurry mkitem) (zip markers bss) + return item + concat <$> zipWithM mkitem markers bss blockToXml (BulletList bss) = do state <- get - let level = parentBulletLevel state let pmrk = parentListMarker state - let prefix = replicate (length pmrk) ' ' - let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] - let mrk = prefix ++ bullets !! (level `mod` (length bullets)) + let mrk = pmrk ++ "•" let mkitem bs = do - modify (\s -> s { parentBulletLevel = (level+1) }) - itemtext <- cMapM blockToXml . paraToPlain $ bs - modify (\s -> s { parentBulletLevel = level }) -- restore bullet level - return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext - mapM mkitem bss + modify (\s -> s { parentListMarker = mrk ++ " "}) + item <- cMapM blockToXml $ plainToPara $ indentBlocks (mrk ++ " ") bs + modify (\s -> s { parentListMarker = pmrk }) -- old parent marker + return item + cMapM mkitem bss blockToXml (DefinitionList defs) = cMapM mkdef defs where mkdef (term, bss) = do - def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss + items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (replicate 4 ' ')) bss t <- wrap "strong" term - return [ el "p" t, el "p" def' ] - sep blocks = - if all needsBreak blocks then - blocks ++ [Plain [LineBreak]] - else - blocks - needsBreak (Para _) = False - needsBreak (Plain ins) = LineBreak `notElem` ins - needsBreak _ = True -blockToXml (Header _ _ _) = -- should never happen, see renderSections - error "unexpected header in section text" + return (el "p" t : items) +blockToXml h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return [] blockToXml HorizontalRule = return [ el "empty-line" () , el "p" (txt (replicate 10 '—')) @@ -372,45 +372,42 @@ blockToXml HorizontalRule = return blockToXml (Table caption aligns _ headers rows) = do hd <- mkrow "th" headers aligns bd <- mapM (\r -> mkrow "td" r aligns) rows - c <- return . el "emphasis" =<< cMapM toXml caption + c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd : bd), el "p" c] where - mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = - (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) + el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) -- align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) - align_str AlignLeft = "left" - align_str AlignCenter = "center" - align_str AlignRight = "right" + align_str AlignLeft = "left" + align_str AlignCenter = "center" + align_str AlignRight = "right" align_str AlignDefault = "left" blockToXml Null = return [] --- Replace paragraphs with plain text and line break. --- Necessary to simulate multi-paragraph lists in FB2. -paraToPlain :: [Block] -> [Block] -paraToPlain [] = [] -paraToPlain (Para inlines : rest) = - let p = (Plain (inlines ++ [LineBreak])) - in p : paraToPlain rest -paraToPlain (p:rest) = p : paraToPlain rest +-- Replace plain text with paragraphs and add line break after paragraphs. +-- It is used to convert plain text from tight list items to paragraphs. +plainToPara :: [Block] -> [Block] +plainToPara [] = [] +plainToPara (Plain inlines : rest) = + Para inlines : plainToPara rest +plainToPara (Para inlines : rest) = + Para inlines : Plain [LineBreak] : plainToPara rest +plainToPara (p:rest) = p : plainToPara rest -- Simulate increased indentation level. Will not really work -- for multi-line paragraphs. -indent :: Block -> Block -indent = indentBlock +indentPrefix :: String -> Block -> Block +indentPrefix spacer = indentBlock where - -- indentation space - spacer :: String - spacer = replicate 4 ' ' - -- - indentBlock (Plain ins) = Plain ((Str spacer):ins) - indentBlock (Para ins) = Para ((Str spacer):ins) + indentBlock (Plain ins) = Plain (Str spacer:ins) + indentBlock (Para ins) = Para (Str spacer:ins) indentBlock (CodeBlock a s) = let s' = unlines . map (spacer++) . lines $ s in CodeBlock a s' @@ -420,10 +417,21 @@ indent = indentBlock -- indent every (explicit) line indentLines :: [Inline] -> [Inline] indentLines ins = let lns = split isLineBreak ins :: [[Inline]] - in intercalate [LineBreak] $ map ((Str spacer):) lns + in intercalate [LineBreak] $ map (Str spacer:) lns + +indent :: Block -> Block +indent = indentPrefix spacer + where + -- indentation space + spacer :: String + spacer = replicate 4 ' ' + +indentBlocks :: String -> [Block] -> [Block] +indentBlocks _ [] = [] +indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ replicate (length prefix) ' ') xs -- | Convert a Pandoc's Inline element to FictionBook XML representation. -toXml :: Inline -> FBM [Content] +toXml :: PandocMonad m => Inline -> FBM m [Content] toXml (Str s) = return [txt s] toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss @@ -444,7 +452,9 @@ toXml Space = return [txt " "] toXml SoftBreak = return [txt " "] toXml LineBreak = return [el "empty-line" ()] toXml (Math _ formula) = insertMath InlineImage formula -toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed +toXml il@(RawInline _ _) = do + report $ InlineNotRendered il + return [] -- raw TeX and raw HTML are suppressed toXml (Link _ text (url,ttl)) = do fns <- footnotes `liftM` get let n = 1 + length fns @@ -462,7 +472,7 @@ toXml (Link _ text (url,ttl)) = do ( [ attr ("l","href") ('#':ln_id) , uattr "type" "note" ] , ln_ref) ] -toXml img@(Image _ _ _) = insertImage InlineImage img +toXml img@Image{} = insertImage InlineImage img toXml (Note bs) = do fns <- footnotes `liftM` get let n = 1 + length fns @@ -474,9 +484,9 @@ toXml (Note bs) = do , uattr "type" "note" ] , fn_ref ) -insertMath :: ImageMode -> String -> FBM [Content] +insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] insertMath immode formula = do - htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get + htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get case htmlMath of WebTeX url -> do let alt = [Code nullAttr formula] @@ -485,7 +495,7 @@ insertMath immode formula = do insertImage immode img _ -> return [el "code" formula] -insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do images <- imagesToFetch `liftM` get let n = 1 + length images @@ -493,7 +503,7 @@ insertImage immode (Image _ alt (url,ttl)) = do modify (\s -> s { imagesToFetch = (fname, url) : images }) let ttlattr = case (immode, null ttl) of (NormalImage, False) -> [ uattr "title" ttl ] - _ -> [] + _ -> [] return . list $ el "image" $ [ attr ("l","href") ('#':fname) @@ -517,20 +527,20 @@ replaceImagesWithAlt missingHrefs body = else c in case XC.nextDF c' of (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document + Nothing -> c' -- end of document -- isImage :: Content -> Bool - isImage (Elem e) = (elName e) == (uname "image") - isImage _ = False + isImage (Elem e) = elName e == uname "image" + isImage _ = False -- - isMissing (Elem img@(Element _ _ _ _)) = + isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs in any (`elem` imgAttrs) badAttrs isMissing _ = False -- replaceNode :: Content -> Content - replaceNode n@(Elem img@(Element _ _ _ _)) = + replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img alt = getAttrVal attrs (uname "alt") imtype = getAttrVal attrs (qname "l" "type") @@ -551,7 +561,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: String -> [Inline] -> FBM Content +wrap :: PandocMonad m => String -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -560,25 +570,25 @@ list = (:[]) -- | Convert an 'Inline' to plaintext. plain :: Inline -> String -plain (Str s) = s -plain (Emph ss) = concat (map plain ss) -plain (Span _ ss) = concat (map plain ss) -plain (Strong ss) = concat (map plain ss) -plain (Strikeout ss) = concat (map plain ss) -plain (Superscript ss) = concat (map plain ss) -plain (Subscript ss) = concat (map plain ss) -plain (SmallCaps ss) = concat (map plain ss) -plain (Quoted _ ss) = concat (map plain ss) -plain (Cite _ ss) = concat (map plain ss) -- FIXME -plain (Code _ s) = s -plain Space = " " -plain SoftBreak = " " -plain LineBreak = "\n" -plain (Math _ s) = s -plain (RawInline _ s) = s +plain (Str s) = s +plain (Emph ss) = cMap plain ss +plain (Span _ ss) = cMap plain ss +plain (Strong ss) = cMap plain ss +plain (Strikeout ss) = cMap plain ss +plain (Superscript ss) = cMap plain ss +plain (Subscript ss) = cMap plain ss +plain (SmallCaps ss) = cMap plain ss +plain (Quoted _ ss) = cMap plain ss +plain (Cite _ ss) = cMap plain ss -- FIXME +plain (Code _ s) = s +plain Space = " " +plain SoftBreak = " " +plain LineBreak = "\n" +plain (Math _ s) = s +plain (RawInline _ _) = "" plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"]) -plain (Image _ alt _) = concat (map plain alt) -plain (Note _) = "" -- FIXME +plain (Image _ alt _) = cMap plain alt +plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) @@ -599,11 +609,11 @@ txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. uattr :: String -> String -> Text.XML.Light.Attr -uattr name val = Attr (uname name) val +uattr name = Attr (uname name) -- | Create an XML attribute with a qualified name from given namespace. attr :: (String, String) -> String -> Text.XML.Light.Attr -attr (ns, name) val = Attr (qname ns name) val +attr (ns, name) = Attr (qname ns name) -- | Unqualified name uname :: String -> QName diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3c8c264d2..1647df7ea 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,6 +1,10 @@ -{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE MultiWayIf #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +23,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,60 +32,92 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition +module Text.Pandoc.Writers.HTML ( + writeHtml4, + writeHtml4String, + writeHtml5, + writeHtml5String, + writeHtmlStringForEPUB, + writeS5, + writeSlidy, + writeSlideous, + writeDZSlides, + writeRevealJs, + tagWithAttributes + ) where +import Control.Monad.State.Strict +import Data.Char (ord, toLower) +import Data.List (intercalate, intersperse, isPrefixOf, partition) +import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options +import qualified Data.Set as Set +import Data.String (fromString) +import Data.Text (Text) +import qualified Data.Text.Lazy as TL +import Network.HTTP (urlEncode) +import Network.URI (URI (..), parseURIReference, unEscapeString) +import Numeric (showHex) +import Text.Blaze.Internal (customLeaf, MarkupM(Empty)) +#if MIN_VERSION_blaze_markup(0,6,3) +#else +import Text.Blaze.Internal (preEscapedString, preEscapedText) +#endif +import Text.Blaze.Html hiding (contents) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight, + styleToCss) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates -import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Highlighting ( highlight, styleToCss, - formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities, escapeStringForXML) -import Network.URI ( parseURIReference, URI(..), unEscapeString ) -import Network.HTTP ( urlEncode ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe, isJust ) -import Control.Monad.State -import Text.Blaze.Html hiding(contents) +import Text.Pandoc.Templates +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML, fromEntities) #if MIN_VERSION_blaze_markup(0,6,3) #else -import Text.Blaze.Internal(preEscapedString) +import Text.Blaze.Internal (preEscapedString, preEscapedText) #endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 #else import qualified Text.Blaze.Html5 as H5 #endif +import Control.Monad.Except (throwError) +import Data.Aeson (Value) +import System.FilePath (takeBaseName, takeExtension) +import Text.Blaze.Html.Renderer.Text (renderHtml) import qualified Text.Blaze.XHtml1.Transitional as H import qualified Text.Blaze.XHtml1.Transitional.Attributes as A -import Text.Blaze.Html.Renderer.String (renderHtml) +import Text.Pandoc.Class (PandocMonad, report, runPure) +import Text.Pandoc.Error +import Text.Pandoc.Logging import Text.TeXMath -import Text.XML.Light.Output -import Text.XML.Light (unode, elChildren, unqual) +import Text.XML.Light (elChildren, unode, unqual) import qualified Text.XML.Light as XML -import System.FilePath (takeExtension) -import Data.Aeson (Value) +import Text.XML.Light.Output data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stMath :: Bool -- ^ Math is used in document - , stQuotes :: Bool -- ^ <q> tag is used - , stHighlighting :: Bool -- ^ Syntax highlighting is used - , stSecNum :: [Int] -- ^ Number of current section - , stElement :: Bool -- ^ Processing an Element + { stNotes :: [Html] -- ^ List of notes + , stMath :: Bool -- ^ Math is used in document + , stQuotes :: Bool -- ^ <q> tag is used + , stHighlighting :: Bool -- ^ Syntax highlighting is used + , stSecNum :: [Int] -- ^ Number of current section + , stElement :: Bool -- ^ Processing an Element + , stHtml5 :: Bool -- ^ Use HTML5 + , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub + , stSlideVariant :: HTMLSlideVariant + , stCodeBlockNum :: Int -- ^ Number of code block } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stHighlighting = False, stSecNum = [], - stElement = False} + stElement = False, stHtml5 = False, + stEPUBVersion = Nothing, + stSlideVariant = NoSlides, + stCodeBlockNum = 0} -- Helpers to render HTML with the appropriate function. @@ -98,62 +134,141 @@ nl opts = if writerWrapText opts == WrapNone then mempty else preEscapedString "\n" --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> renderHtml body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts d = - let (body, context) = evalState (pandocToHtml opts d) defaultWriterState - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context +-- | Convert Pandoc document to Html 5 string. +writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeHtml5String = writeHtmlString' + defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 5 structure. +writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True } + +-- | Convert Pandoc document to Html 4 string. +writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeHtml4String = writeHtmlString' + defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html 4 structure. +writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False } + +-- | Convert Pandoc document to Html appropriate for an epub version. +writeHtmlStringForEPUB :: PandocMonad m + => EPUBVersion -> WriterOptions -> Pandoc + -> m Text +writeHtmlStringForEPUB version o = writeHtmlString' + defaultWriterState{ stHtml5 = version == EPUB3, + stEPUBVersion = Just version } o + +-- | Convert Pandoc document to Reveal JS HTML slide show. +writeRevealJs :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeRevealJs = writeHtmlSlideShow' RevealJsSlides + +-- | Convert Pandoc document to S5 HTML slide show. +writeS5 :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeS5 = writeHtmlSlideShow' S5Slides + +-- | Convert Pandoc document to Slidy HTML slide show. +writeSlidy :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeSlidy = writeHtmlSlideShow' SlidySlides + +-- | Convert Pandoc document to Slideous HTML slide show. +writeSlideous :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeSlideous = writeHtmlSlideShow' SlideousSlides + +-- | Convert Pandoc document to DZSlides HTML slide show. +writeDZSlides :: PandocMonad m + => WriterOptions -> Pandoc -> m Text +writeDZSlides = writeHtmlSlideShow' DZSlides + +writeHtmlSlideShow' :: PandocMonad m + => HTMLSlideVariant -> WriterOptions -> Pandoc -> m Text +writeHtmlSlideShow' variant = writeHtmlString' + defaultWriterState{ stSlideVariant = variant + , stHtml5 = case variant of + RevealJsSlides -> True + S5Slides -> False + SlidySlides -> False + DZSlides -> True + SlideousSlides -> False + NoSlides -> False + } + +renderHtml' :: Html -> Text +renderHtml' = TL.toStrict . renderHtml + +writeHtmlString' :: PandocMonad m + => WriterState -> WriterOptions -> Pandoc -> m Text +writeHtmlString' st opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) st + case writerTemplate opts of + Nothing -> return $ renderHtml' body + Just tpl -> do + -- warn if empty lang + when (isNothing (getField "lang" context :: Maybe String)) $ + report NoLangSpecified + -- check for empty pagetitle + context' <- + case getField "pagetitle" context of + Just (s :: String) | not (null s) -> return context + _ -> do + let fallback = fromMaybe "Untitled" $ takeBaseName <$> + lookup "sourcefile" (writerVariables opts) + report $ NoTitleElement fallback + return $ resetField "pagetitle" fallback context + renderTemplate' tpl $ + defField "body" (renderHtml' body) context' + +writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html +writeHtml' st opts d = + case writerTemplate opts of + Just _ -> preEscapedText <$> writeHtmlString' st opts d + Nothing -> do + (body, _) <- evalStateT (pandocToHtml opts d) st + return body -- result is (title, authors, date, toc, body, new variables) -pandocToHtml :: WriterOptions +pandocToHtml :: PandocMonad m + => WriterOptions -> Pandoc - -> State WriterState (Html, Value) + -> StateT WriterState m (Html, Value) pandocToHtml opts (Pandoc meta blocks) = do metadata <- metaToJSON opts - (fmap renderHtml . blockListToHtml opts) - (fmap renderHtml . inlineListToHtml opts) + (fmap renderHtml' . blockListToHtml opts) + (fmap renderHtml' . inlineListToHtml opts) meta let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts + slideVariant <- gets stSlideVariant let sects = hierarchicalize $ - if writerSlideVariant opts == NoSlides + if slideVariant == NoSlides then blocks else prepSlides slideLevel blocks - toc <- if writerTableOfContents opts - then tableOfContents opts sects + toc <- if writerTableOfContents opts && slideVariant /= S5Slides + then fmap renderHtml' <$> tableOfContents opts sects else return Nothing blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects st <- get - let notes = reverse (stNotes st) - let thebody = blocks' >> footnoteSection opts notes + notes <- footnoteSection opts (reverse (stNotes st)) + let thebody = blocks' >> notes let math = case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - MathML (Just url) -> + MathJax url + | slideVariant /= RevealJsSlides -> + -- mathjax is handled via a special plugin in revealjs H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty - MathJax url -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ case writerSlideVariant opts of + $ case slideVariant of SlideousSlides -> preEscapedString "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" @@ -162,37 +277,55 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty - KaTeX js css -> - (H.script ! A.src (toValue js) $ mempty) <> - (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> - (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) + KaTeX url -> + (H.script ! + A.src (toValue $ url ++ "katex.min.js") $ mempty) <> + (H.script ! + A.src (toValue $ url ++ "contrib/auto-render.min.js") + $ mempty) <> + ( + H.script + "document.addEventListener(\"DOMContentLoaded\", function() {\n renderMathInElement(document.body);\n});") <> + (H.link ! A.rel "stylesheet" ! + A.href (toValue $ url ++ "katex.min.css")) + _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (writerHtml5 opts) -> + Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") | otherwise -> mempty Nothing -> mempty let context = (if stHighlighting st - then defField "highlighting-css" - (styleToCss $ writerHighlightStyle opts) + then case writerHighlightStyle opts of + Just sty -> defField "highlighting-css" + (styleToCss sty) + Nothing -> id else id) $ (if stMath st - then defField "math" (renderHtml math) + then defField "math" (renderHtml' math) else id) $ + defField "mathjax" + (case writerHTMLMathMethod opts of + MathJax _ -> True + _ -> False) $ defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml) toc $ + -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + maybe id (defField "toc") toc $ + maybe id (defField "table-of-contents") toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringifyHTML $ docTitle meta) $ + defField "pagetitle" (stringifyHTML (docTitle meta)) $ defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" - ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $ + ("https://www.w3.org/Talks/Tools/Slidy2" :: String) $ defField "slideous-url" ("slideous" :: String) $ defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ - defField "html5" (writerHtml5 opts) $ + defField "html5" (stHtml5 st) metadata return (thebody, context) @@ -200,44 +333,52 @@ pandocToHtml opts (Pandoc meta blocks) = do prefixedId :: WriterOptions -> String -> Attribute prefixedId opts s = case s of - "" -> mempty - _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s + "" -> mempty + _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s -toList :: (Html -> Html) -> WriterOptions -> ([Html] -> Html) +toList :: PandocMonad m + => (Html -> Html) + -> WriterOptions + -> [Html] + -> StateT WriterState m Html toList listop opts items = do - if (writerIncremental opts) - then if (writerSlideVariant opts /= RevealJsSlides) - then (listop $ mconcat items) ! A.class_ "incremental" - else listop $ mconcat $ map (! A.class_ "fragment") items - else listop $ mconcat items + slideVariant <- gets stSlideVariant + return $ + if writerIncremental opts + then if slideVariant /= RevealJsSlides + then listop (mconcat items) ! A.class_ "incremental" + else listop $ mconcat $ map (! A.class_ "fragment") items + else listop $ mconcat items -unordList :: WriterOptions -> [Html] -> Html +unordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html unordList opts = toList H.ul opts . toListItems opts -ordList :: WriterOptions -> [Html] -> Html +ordList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html ordList opts = toList H.ol opts . toListItems opts -defList :: WriterOptions -> [Html] -> Html +defList :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html defList opts items = toList H.dl opts (items ++ [nl opts]) -- | Construct table of contents from list of elements. -tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) +tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html) tableOfContents _ [] = return Nothing tableOfContents opts sects = do - let opts' = opts { writerIgnoreNotes = True } - contents <- mapM (elementToListItem opts') sects + contents <- mapM (elementToListItem opts) sects let tocList = catMaybes contents - return $ if null tocList - then Nothing - else Just $ unordList opts tocList + if null tocList + then return Nothing + else Just <$> unordList opts tocList -- | Convert section number to string showSecNum :: [Int] -> String -showSecNum = concat . intersperse "." . map show +showSecNum = intercalate "." . map show -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) +elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html) -- Don't include the empty headers created in slide shows -- shows when an hrule is used to separate slides without a new title: elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing @@ -249,45 +390,52 @@ elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num') >> preEscapedString " " else mempty - txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText + txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes - let subList = if null subHeads - then mempty - else unordList opts subHeads + subList <- if null subHeads + then return mempty + else unordList opts subHeads -- in reveal.js, we need #/apples, not #apples: - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant== RevealJsSlides] return $ Just $ if null id' - then (H.a $ toHtml txt) >> subList + then H.a (toHtml txt) >> subList else (H.a ! A.href (toValue $ "#" ++ revealSlash ++ writerIdentifierPrefix opts ++ id') $ toHtml txt) >> subList elementToListItem _ _ = return Nothing -- | Convert an Element to Html. -elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html +elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do - let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel + slideVariant <- gets stSlideVariant + let slide = slideVariant /= NoSlides && level <= slideLevel let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) modify $ \st -> st{stSecNum = num'} -- update section number + html5 <- gets stHtml5 let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty else do modify (\st -> st{ stElement = True}) + let level' = if level <= slideLevel && + slideVariant == SlidySlides + then 1 -- see #3566 + else level res <- blockToHtml opts - (Header level (id',classes,keyvals) title') + (Header level' (id',classes,keyvals) title') modify (\st -> st{ stElement = False}) return res - let isSec (Sec _ _ _ _ _) = True - isSec (Blk _) = False + let isSec Sec{} = True + isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] isPause _ = False - let fragmentClass = case writerSlideVariant opts of - RevealJsSlides -> "fragment" - _ -> "incremental" + let fragmentClass = case slideVariant of + RevealJsSlides -> "fragment" + _ -> "incremental" let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" ++ fragmentClass ++ "\">")) : (xs ++ [Blk (RawBlock (Format "html") "</div>")]) @@ -299,45 +447,51 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen [] -> [] (x:xs) -> x ++ concatMap inDiv xs let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] - let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ + let classes' = ["title-slide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && - not (writerHtml5 opts) ] ++ + not html5 ] ++ ["level" ++ show level | slide || writerSectionDivs opts ] ++ classes - let secttag = if writerHtml5 opts + let secttag = if html5 then H5.section else H.div let attr = (id',classes',keyvals) - return $ if titleSlide - then (if writerSlideVariant opts == RevealJsSlides - then H5.section - else id) $ mconcat $ - (addAttrs opts attr $ secttag $ header') : innerContents - else if writerSectionDivs opts || slide - then addAttrs opts attr - $ secttag $ inNl $ header' : innerContents - else mconcat $ intersperse (nl opts) - $ addAttrs opts attr header' : innerContents + if titleSlide + then do + t <- addAttrs opts attr $ + secttag header' + return $ + (if slideVariant == RevealJsSlides + then H5.section + else id) $ mconcat $ t : innerContents + else if writerSectionDivs opts || slide + then addAttrs opts attr + $ secttag $ inNl $ header' : innerContents + else do + t <- addAttrs opts attr header' + return $ mconcat $ intersperse (nl opts) (t : innerContents) -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then mempty - else nl opts >> (container - $ nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) - where container x = if writerHtml5 opts - then H5.section ! A.class_ "footnotes" $ x - else if writerSlideVariant opts /= NoSlides - then H.div ! A.class_ "footnotes slide" $ x - else H.div ! A.class_ "footnotes" $ x - hrtag = if writerHtml5 opts then H5.hr else H.hr +footnoteSection :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html +footnoteSection opts notes = do + html5 <- gets stHtml5 + slideVariant <- gets stSlideVariant + let hrtag = if html5 then H5.hr else H.hr + let container x + | html5 = H5.section ! A.class_ "footnotes" $ x + | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x + | otherwise = H.div ! A.class_ "footnotes" $ x + return $ + if null notes + then mempty + else nl opts >> container (nl opts >> hrtag >> nl opts >> + H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) -parseMailto s = do +parseMailto s = case break (==':') s of (xs,':':addr) | map toLower xs == "mailto" -> do let (name', rest) = span (/='@') addr @@ -346,10 +500,12 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> Attr -> Html -> String -> Html +obfuscateLink :: PandocMonad m + => WriterOptions -> Attr -> Html -> String + -> StateT WriterState m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (renderHtml -> txt) s = +obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -361,20 +517,25 @@ obfuscateLink opts attr (renderHtml -> txt) s = then ("e", name' ++ " at " ++ domain') else ("'" ++ obfuscateString txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") + (_, classNames, _) = attr + classNamesStr = concatMap (' ':) classNames in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL - preEscapedString $ "<a href=\"" ++ (obfuscateString s') - ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>" + return $ + preEscapedString $ "<a href=\"" ++ obfuscateString s' + ++ "\" class=\"email\">" ++ obfuscateString txt ++ "</a>" JavascriptObfuscation -> + return $ (H.script ! A.type_ "text/javascript" $ preEscapedString ("\n<!--\nh='" ++ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++ + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" ++ + classNamesStr ++ "\">'+" ++ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> H.noscript (preEscapedString $ obfuscateString altText) - _ -> error $ "Unknown obfuscation method: " ++ show meth + _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. @@ -388,36 +549,71 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . fromEntities -addAttrs :: WriterOptions -> Attr -> Html -> Html -addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr) +-- | Create HTML tag with attributes. +tagWithAttributes :: WriterOptions + -> Bool -- ^ True for HTML5 + -> Bool -- ^ True if self-closing tag + -> Text -- ^ Tag text + -> Attr -- ^ Pandoc style tag attributes + -> Text +tagWithAttributes opts html5 selfClosing tagname attr = + let mktag = (TL.toStrict . renderHtml <$> evalStateT + (addAttrs opts attr (customLeaf (textTag tagname) selfClosing)) + defaultWriterState{ stHtml5 = html5 }) + in case runPure mktag of + Left _ -> mempty + Right t -> t -toAttrs :: [(String, String)] -> [Attribute] -toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs +addAttrs :: PandocMonad m + => WriterOptions -> Attr -> Html -> StateT WriterState m Html +addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr -attrsToHtml :: WriterOptions -> Attr -> [Attribute] -attrsToHtml opts (id',classes',keyvals) = - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals +toAttrs :: PandocMonad m + => [(String, String)] -> StateT WriterState m [Attribute] +toAttrs kvs = do + html5 <- gets stHtml5 + return $ map (\(x,y) -> + customAttribute + (fromString (if not html5 || x `Set.member` html5Attributes + || "data-" `isPrefixOf` x + then x + else "data-" ++ x)) (toValue y)) kvs -imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute] -imgAttrsToHtml opts attr = - attrsToHtml opts (ident,cls,kvs') ++ - toAttrs (dimensionsToAttrList opts attr) +attrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +attrsToHtml opts (id',classes',keyvals) = do + attrs <- toAttrs keyvals + return $ + [prefixedId opts id' | not (null id')] ++ + [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs + +imgAttrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +imgAttrsToHtml opts attr = do + attrs <- attrsToHtml opts (ident,cls,kvs') + dimattrs <- toAttrs (dimensionsToAttrList attr) + return $ attrs ++ dimattrs where (ident,cls,kvs) = attr kvs' = filter isNotDim kvs isNotDim ("width", _) = False isNotDim ("height", _) = False - isNotDim _ = True + isNotDim _ = True -dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)] -dimensionsToAttrList opts attr = (go Width) ++ (go Height) +dimensionsToAttrList :: Attr -> [(String, String)] +dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height where - go dir = case (dimension dir attr) of - (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))] - (Just dim) -> [(show dir, showInPixel opts dim)] - _ -> [] - + consolidateStyles :: [(String, String)] -> [(String, String)] + consolidateStyles xs = + case partition isStyle xs of + ([], _) -> xs + (ss, rest) -> ("style", intercalate ";" $ map snd ss) : rest + isStyle ("style", _) = True + isStyle _ = False + go dir = case dimension dir attr of + (Just (Pixel a)) -> [(show dir, show a)] + (Just x) -> [("style", show dir ++ ":" ++ show x)] + Nothing -> [] imageExts :: [String] imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", @@ -427,71 +623,121 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", treatAsImage :: FilePath -> Bool treatAsImage fp = - let path = case uriPath `fmap` parseURIReference fp of - Nothing -> fp - Just up -> up + let path = maybe fp uriPath (parseURIReference fp) ext = map toLower $ drop 1 $ takeExtension path in null ext || ext `elem` imageExts --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml _ Null = return mempty -blockToHtml opts (Plain lst) = inlineListToHtml opts lst --- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do +figure :: PandocMonad m + => WriterOptions -> Attr -> [Inline] -> (String, String) + -> StateT WriterState m Html +figure opts attr txt (s,tit) = do img <- inlineToHtml opts (Image attr txt (s,tit)) - let tocapt = if writerHtml5 opts + html5 <- gets stHtml5 + let tocapt = if html5 then H5.figcaption else H.p ! A.class_ "caption" capt <- if null txt then return mempty else tocapt `fmap` inlineListToHtml opts txt - return $ if writerHtml5 opts + return $ if html5 then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, nl opts, capt, nl opts] + +-- | Convert Pandoc block element to HTML. +blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html +blockToHtml _ Null = return mempty +blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) + | "stretch" `elem` classes = do + slideVariant <- gets stSlideVariant + case slideVariant of + RevealJsSlides -> + -- a "stretched" image in reveal.js must be a direct child + -- of the slide container + inlineToHtml opts (Image attr txt (src, tit)) + _ -> figure opts attr txt (src, tit) +-- title beginning with fig: indicates that the image is a figure +blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = + figure opts attr txt (s,tit) blockToHtml opts (Para lst) | isEmptyRaw lst = return mempty + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty | otherwise = do contents <- inlineListToHtml opts lst return $ H.p contents where - isEmptyRaw [RawInline f _] = f /= (Format "html") - isEmptyRaw _ = False + isEmptyRaw [RawInline f _] = f `notElem` [Format "html", + Format "html4", Format "html5"] + isEmptyRaw _ = False blockToHtml opts (LineBlock lns) = if writerWrapText opts == WrapNone then blockToHtml opts $ linesToPara lns else do - let lf = preEscapedString "\n" - htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns - return $ H.div ! A.style "white-space: pre-line;" $ htmlLines -blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do + htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns + return $ H.div ! A.class_ "line-block" $ htmlLines +blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do + html5 <- gets stHtml5 + slideVariant <- gets stSlideVariant + let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ + [("style", "width:" ++ w ++ ";") + | ("width",w) <- kvs', "column" `elem` classes] let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 - let opts' = if speakerNotes then opts{ writerIncremental = False } else opts - contents <- blockListToHtml opts' bs + let opts' = if | speakerNotes -> opts{ writerIncremental = False } + | "incremental" `elem` classes -> opts{ writerIncremental = True } + | "nonincremental" `elem` classes -> opts{ writerIncremental = False } + | otherwise -> opts + -- we remove "incremental" and "nonincremental" if we're in a + -- slide presentaiton format. + classes' = case slideVariant of + NoSlides -> classes + _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes + contents <- if "columns" `elem` classes' + then -- we don't use blockListToHtml because it inserts + -- a newline between the column divs, which throws + -- off widths! see #4028 + mconcat <$> mapM (blockToHtml opts) bs + else blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts - let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes - then (H5.section, filter (/= "section") classes) - else (H.div, classes) - return $ - if speakerNotes - then case writerSlideVariant opts of - RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' - DZSlides -> (addAttrs opts' attr $ H5.div $ contents') - ! (H5.customAttribute "role" "note") - NoSlides -> addAttrs opts' attr $ H.div $ contents' - _ -> mempty - else addAttrs opts (ident, classes', kvs) $ divtag $ contents' -blockToHtml opts (RawBlock f str) - | f == Format "html" = return $ preEscapedString str - | (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] - | otherwise = return mempty -blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr + let (divtag, classes'') = if html5 && "section" `elem` classes' + then (H5.section, filter (/= "section") classes') + else (H.div, classes') + if speakerNotes + then case slideVariant of + RevealJsSlides -> addAttrs opts' attr $ + H5.aside contents' + DZSlides -> do + t <- addAttrs opts' attr $ + H5.div contents' + return $ t ! H5.customAttribute "role" "note" + NoSlides -> addAttrs opts' attr $ + H.div contents' + _ -> return mempty + else addAttrs opts (ident, classes'', kvs) $ + divtag contents' +blockToHtml opts (RawBlock f str) = do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then blockToHtml opts $ Plain [Math DisplayMath str] + else do + report $ BlockNotRendered (RawBlock f str) + return mempty +blockToHtml _ HorizontalRule = do + html5 <- gets stHtml5 + return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do + id'' <- if null id' + then do + modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } + codeblocknum <- gets stCodeBlockNum + return ("cb" ++ show codeblocknum) + else return id' let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && any (\c -> map toLower c == "literate") classes @@ -503,19 +749,24 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - hlCode = if writerHighlight opts -- check highlighting options - then highlight formatHtmlBlock (id',classes',keyvals) adjCode - else Nothing + hlCode = if isJust (writerHighlightStyle opts) + then highlight (writerSyntaxMap opts) formatHtmlBlock + (id'',classes',keyvals) adjCode + else Left "" case hlCode of - Nothing -> return $ addAttrs opts (id',classes,keyvals) - $ H.pre $ H.code $ toHtml adjCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (addAttrs opts (id',[],keyvals) h) -blockToHtml opts (BlockQuote blocks) = + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + addAttrs opts (id',classes,keyvals) + $ H.pre $ H.code $ toHtml adjCode + Right h -> modify (\st -> st{ stHighlighting = True }) >> + addAttrs opts (id'',[],keyvals) h +blockToHtml opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental - if writerSlideVariant opts /= NoSlides + slideVariant <- gets stSlideVariant + if slideVariant /= NoSlides then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) @@ -541,7 +792,7 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do $ showSecNum secnum) >> strToHtml " " >> contents else contents inElement <- gets stElement - return $ (if inElement then id else addAttrs opts attr) + (if inElement then return else addAttrs opts attr) $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -552,20 +803,17 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst - return $ unordList opts contents + unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst + html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" _ -> camelCaseToHyphenated $ show numstyle - let attribs = (if startnum /= 1 - then [A.start $ toValue startnum] - else []) ++ - (if numstyle == Example - then [A.class_ "example"] - else []) ++ + let attribs = [A.start $ toValue startnum | startnum /= 1] ++ + [A.class_ "example" | numstyle == Example] ++ (if numstyle /= DefaultStyle - then if writerHtml5 opts + then if html5 then [A.type_ $ case numstyle of Decimal -> "1" @@ -577,23 +825,25 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do else [A.style $ toValue $ "list-style-type: " ++ numstyle'] else []) - return $ foldl (!) (ordList opts contents) attribs + l <- ordList opts contents + return $ foldl (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty else liftM H.dt $ inlineListToHtml opts term - defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . + defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : intersperse (nl opts) defs') lst - return $ defList opts contents + defList opts contents blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt then return mempty else do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts + html5 <- gets stHtml5 let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty @@ -601,7 +851,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do H.colgroup $ do nl opts mapM_ (\w -> do - if writerHtml5 opts + if html5 then H.col ! A.style (toValue $ "width: " ++ percent w) else H.col ! A.width (toValue $ percent w) @@ -624,18 +874,19 @@ blockToHtml opts (Table capt aligns widths headers rows') = do else tbl ! A.style (toValue $ "width:" ++ show (round (totalWidth * 100) :: Int) ++ "%;") -tableRowToHtml :: WriterOptions +tableRowToHtml :: PandocMonad m + => WriterOptions -> [Alignment] -> Int -> [[Block]] - -> State WriterState Html + -> StateT WriterState m Html tableRowToHtml opts aligns rownum cols' = do let mkcell = if rownum == 0 then H.th else H.td let rowclass = case rownum of - 0 -> "header" + 0 -> "header" x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith + _ -> "even" + cols'' <- zipWithM (\alignment item -> tableItemToHtml opts mkcell alignment item) aligns cols' return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'') @@ -648,21 +899,23 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "" -tableItemToHtml :: WriterOptions +tableItemToHtml :: PandocMonad m + => WriterOptions -> (Html -> Html) -> Alignment -> [Block] - -> State WriterState Html + -> StateT WriterState m Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item + html5 <- gets stHtml5 let alignStr = alignmentToString align' - let attribs = if writerHtml5 opts + let attribs = if html5 then A.style (toValue $ "text-align: " ++ alignStr ++ ";") else A.align (toValue alignStr) let tag'' = if null alignStr then tag' else tag' ! attribs - return $ (tag'' $ contents) >> nl opts + return $ tag'' contents >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] @@ -670,12 +923,16 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html toListItem opts item = nl opts >> H.li item -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html +blockListToHtml :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = - fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst + (mconcat . intersperse (nl opts) . filter nonempty) + <$> mapM (blockToHtml opts) lst + where nonempty (Empty _) = False + nonempty _ = True -- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html +inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat @@ -684,9 +941,9 @@ annotateMML :: XML.Element -> String -> XML.Element annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) where cs = case elChildren e of - [] -> unode "mrow" () + [] -> unode "mrow" () [x] -> x - xs -> unode "mrow" xs + xs -> unode "mrow" xs math childs = XML.Element q as [XML.Elem childs] l where (XML.Element q as _ l) = e @@ -694,27 +951,29 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, -- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = +inlineToHtml :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Html +inlineToHtml opts inline = do + html5 <- gets stHtml5 case inline of (Str str) -> return $ strToHtml str - (Space) -> return $ strToHtml " " - (SoftBreak) -> return $ case writerWrapText opts of - WrapNone -> preEscapedString " " - WrapAuto -> preEscapedString " " - WrapPreserve -> preEscapedString "\n" - (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + Space -> return $ strToHtml " " + SoftBreak -> return $ case writerWrapText opts of + WrapNone -> preEscapedString " " + WrapAuto -> preEscapedString " " + WrapPreserve -> preEscapedString "\n" + LineBreak -> return $ (if html5 then H5.br else H.br) <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= - return . addAttrs opts attr' . H.span + addAttrs opts attr' . H.span where attr' = (id',classes',kvs') classes' = filter (`notElem` ["csl-no-emph", "csl-no-strong", "csl-no-smallcaps"]) classes kvs' = if null styles then kvs - else (("style", concat styles) : kvs) + else ("style", concat styles) : kvs styles = ["font-style:normal;" | "csl-no-emph" `elem` classes] ++ ["font-weight:normal;" @@ -724,20 +983,23 @@ inlineToHtml opts inline = (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of - Nothing -> return - $ addAttrs opts attr - $ H.code $ strToHtml str - Just h -> do + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + addAttrs opts attr $ H.code $ strToHtml str + Right h -> do modify $ \st -> st{ stHighlighting = True } - return $ addAttrs opts (id',[],keyvals) h + addAttrs opts (id',[],keyvals) h where (id',_,keyvals) = attr - hlCode = if writerHighlight opts - then highlight formatHtmlInline attr str - else Nothing + hlCode = if isJust (writerHighlightStyle opts) + then highlight + (writerSyntaxMap opts) + formatHtmlInline attr str + else Left "" (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= - return . (H.span ! A.style "font-variant: small-caps;") + return . (H.span ! A.class_ "smallcaps") (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub (Quoted quoteType lst) -> @@ -768,15 +1030,15 @@ inlineToHtml opts inline = JsMath _ -> do let m = preEscapedString str return $ case t of - InlineMath -> H.span ! A.class_ mathClass $ m + InlineMath -> H.span ! A.class_ mathClass $ m DisplayMath -> H.div ! A.class_ mathClass $ m WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img + let imtag = if html5 then H5.img else H.img let m = imtag ! A.style "vertical-align:middle" ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -784,103 +1046,124 @@ inlineToHtml opts inline = return $ case t of InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock + MathML -> do let conf = useShortEmptyTags (const False) defaultConfigPP - case writeMathML dt <$> readTeX str of + res <- lift $ convertMath writeMathML t str + case res of Right r -> return $ preEscapedString $ ppcElement conf (annotateMML r str) - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ mathClass) + Left il -> (H.span ! A.class_ mathClass) <$> + inlineToHtml opts il MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" - KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ - toHtml (case t of - InlineMath -> str - DisplayMath -> "\\displaystyle " ++ str) + KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) + x <- lift (texMathToInlines t str) >>= inlineListToHtml opts let m = H.span ! A.class_ mathClass $ x - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag - (RawInline f str) - | f == Format "html" -> return $ preEscapedString str - | otherwise -> return mempty + (RawInline f str) -> do + ishtml <- isRawHtml f + if ishtml + then return $ preEscapedString str + else if (f == Format "latex" || f == Format "tex") && + "\\begin" `isPrefixOf` str && + allowsMathEnvironments (writerHTMLMathMethod opts) && + isMathEnvironment str + then inlineToHtml opts $ Math DisplayMath str + else do + report $ InlineNotRendered inline + return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts attr linkText s - (Link attr txt (s,tit)) -> do + obfuscateLink opts attr linkText s + (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt + slideVariant <- gets stSlideVariant let s' = case s of - '#':xs | writerSlideVariant opts == - RevealJsSlides -> '#':'/':xs - _ -> s + '#':xs -> let prefix = if slideVariant == RevealJsSlides + then "/" + else writerIdentifierPrefix opts + in '#' : prefix ++ xs + _ -> s let link = H.a ! A.href (toValue s') $ linkText - let link' = if txt == [Str (unEscapeString s)] - then link ! A.class_ "uri" - else link - let link'' = addAttrs opts attr link' + let attr = if txt == [Str (unEscapeString s)] + then (ident, "uri" : classes, kvs) + else (ident, classes, kvs) + link' <- addAttrs opts attr link return $ if null tit - then link'' - else link'' ! A.title (toValue tit) + then link' + else link' ! A.title (toValue tit) (Image attr txt (s,tit)) | treatAsImage s -> do let alternate' = stringify txt - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - [A.alt $ toValue alternate' | not (null txt)] ++ - imgAttrsToHtml opts attr - let tag = if writerHtml5 opts then H5.img else H.img + slideVariant <- gets stSlideVariant + let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr + let attributes = + -- reveal.js uses data-src for lazy loading + (if isReveal + then customAttribute "data-src" $ toValue s + else A.src $ toValue s) : + [A.title $ toValue tit | not (null tit)] ++ + [A.alt $ toValue alternate' | not (null txt)] ++ + attrs + let tag = if html5 then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image attr _ (s,tit)) -> do - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - imgAttrsToHtml opts attr + slideVariant <- gets stSlideVariant + let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr + let attributes = + (if isReveal + then customAttribute "data-src" $ toValue s + else A.src $ toValue s) : + [A.title $ toValue tit | not (null tit)] ++ + attrs return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) - | writerIgnoreNotes opts -> return mempty - | otherwise -> do + (Note contents) -> do notes <- gets stNotes - let number = (length notes) + 1 + let number = length notes + 1 let ref = show number htmlContents <- blockListToNote opts ref contents + epubVersion <- gets stEPUBVersion -- push contents onto front of notes - modify $ \st -> st {stNotes = (htmlContents:notes)} - let revealSlash = ['/' | writerSlideVariant opts - == RevealJsSlides] + modify $ \st -> st {stNotes = htmlContents:notes} + slideVariant <- gets stSlideVariant + let revealSlash = ['/' | slideVariant == RevealJsSlides] let link = H.a ! A.href (toValue $ "#" ++ revealSlash ++ writerIdentifierPrefix opts ++ "fn" ++ ref) - ! A.class_ "footnoteRef" + ! A.class_ "footnote-ref" ! prefixedId opts ("fnref" ++ ref) - $ (if isJust (writerEpubVersion opts) + $ (if isJust epubVersion then id else H.sup) $ toHtml ref - return $ case writerEpubVersion opts of + return $ case epubVersion of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents - return $ if writerHtml5 opts + return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) else result -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html +blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])] + let backlink = [Link ("",["footnote-back"],[]) [Str "↩"] ("#" ++ "fnref" ++ ref,[])] blocks' = if null blocks then [] else let lastBlock = last blocks @@ -893,23 +1176,13 @@ blockListToNote opts ref blocks = _ -> otherBlocks ++ [lastBlock, Plain backlink] in do contents <- blockListToHtml opts blocks' - let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents - let noteItem' = case writerEpubVersion opts of + let noteItem = H.li ! prefixedId opts ("fn" ++ ref) $ contents + epubVersion <- gets stEPUBVersion + let noteItem' = case epubVersion of Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' --- Javascript snippet to render all KaTeX elements -renderKaTeX :: String -renderKaTeX = unlines [ - "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" - , "for (var i=0; i < mathElements.length; i++)" - , "{" - , " var texText = mathElements[i].firstChild" - , " katex.render(texText.data, mathElements[i])" - , "}}" - ] - isMathEnvironment :: String -> Bool isMathEnvironment s = "\\begin{" `isPrefixOf` s && envName `elem` mathmlenvs @@ -944,6 +1217,219 @@ isMathEnvironment s = "\\begin{" `isPrefixOf` s && allowsMathEnvironments :: HTMLMathMethod -> Bool allowsMathEnvironments (MathJax _) = True -allowsMathEnvironments (MathML _) = True +allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False + +isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool +isRawHtml f = do + html5 <- gets stHtml5 + return $ f == Format "html" || + ((html5 && f == Format "html5") || f == Format "html4") + +html5Attributes :: Set.Set String +html5Attributes = Set.fromList + [ "abbr" + , "accept" + , "accept-charset" + , "accesskey" + , "action" + , "allowfullscreen" + , "allowpaymentrequest" + , "allowusermedia" + , "alt" + , "as" + , "async" + , "autocomplete" + , "autofocus" + , "autoplay" + , "charset" + , "checked" + , "cite" + , "class" + , "color" + , "cols" + , "colspan" + , "content" + , "contenteditable" + , "controls" + , "coords" + , "crossorigin" + , "data" + , "datetime" + , "default" + , "defer" + , "dir" + , "dirname" + , "disabled" + , "download" + , "draggable" + , "enctype" + , "for" + , "form" + , "formaction" + , "formenctype" + , "formmethod" + , "formnovalidate" + , "formtarget" + , "headers" + , "height" + , "hidden" + , "high" + , "href" + , "hreflang" + , "http-equiv" + , "id" + , "inputmode" + , "integrity" + , "is" + , "ismap" + , "itemid" + , "itemprop" + , "itemref" + , "itemscope" + , "itemtype" + , "kind" + , "label" + , "lang" + , "list" + , "loop" + , "low" + , "manifest" + , "max" + , "maxlength" + , "media" + , "method" + , "min" + , "minlength" + , "multiple" + , "muted" + , "name" + , "nomodule" + , "nonce" + , "novalidate" + , "onabort" + , "onafterprint" + , "onauxclick" + , "onbeforeprint" + , "onbeforeunload" + , "onblur" + , "oncancel" + , "oncanplay" + , "oncanplaythrough" + , "onchange" + , "onclick" + , "onclose" + , "oncontextmenu" + , "oncopy" + , "oncuechange" + , "oncut" + , "ondblclick" + , "ondrag" + , "ondragend" + , "ondragenter" + , "ondragexit" + , "ondragleave" + , "ondragover" + , "ondragstart" + , "ondrop" + , "ondurationchange" + , "onemptied" + , "onended" + , "onerror" + , "onfocus" + , "onhashchange" + , "oninput" + , "oninvalid" + , "onkeydown" + , "onkeypress" + , "onkeyup" + , "onlanguagechange" + , "onload" + , "onloadeddata" + , "onloadedmetadata" + , "onloadend" + , "onloadstart" + , "onmessage" + , "onmessageerror" + , "onmousedown" + , "onmouseenter" + , "onmouseleave" + , "onmousemove" + , "onmouseout" + , "onmouseover" + , "onmouseup" + , "onoffline" + , "ononline" + , "onpagehide" + , "onpageshow" + , "onpaste" + , "onpause" + , "onplay" + , "onplaying" + , "onpopstate" + , "onprogress" + , "onratechange" + , "onrejectionhandled" + , "onreset" + , "onresize" + , "onscroll" + , "onsecuritypolicyviolation" + , "onseeked" + , "onseeking" + , "onselect" + , "onstalled" + , "onstorage" + , "onsubmit" + , "onsuspend" + , "ontimeupdate" + , "ontoggle" + , "onunhandledrejection" + , "onunload" + , "onvolumechange" + , "onwaiting" + , "onwheel" + , "open" + , "optimum" + , "pattern" + , "ping" + , "placeholder" + , "playsinline" + , "poster" + , "preload" + , "readonly" + , "referrerpolicy" + , "rel" + , "required" + , "reversed" + , "rows" + , "rowspan" + , "sandbox" + , "scope" + , "selected" + , "shape" + , "size" + , "sizes" + , "slot" + , "span" + , "spellcheck" + , "src" + , "srcdoc" + , "srclang" + , "srcset" + , "start" + , "step" + , "style" + , "tabindex" + , "target" + , "title" + , "translate" + , "type" + , "typemustmatch" + , "updateviacache" + , "usemap" + , "value" + , "width" + , "workertype" + , "wrap" + ] diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 29fdafe15..688c1f390 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2014-2015, 2017-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Haddock - Copyright : Copyright (C) 2014 John MacFarlane + Copyright : Copyright (C) 2014-2015,2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,17 +33,19 @@ Conversion of 'Pandoc' documents to haddock markup. Haddock: <http://www.haskell.org/haddock/doc/html/> -} module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Control.Monad.State.Strict +import Data.Default +import Data.List (intersperse, transpose) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Logging import Text.Pandoc.Options -import Data.List ( intersperse, transpose ) import Text.Pandoc.Pretty -import Control.Monad.State -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Network.URI (isURI) -import Data.Default +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared type Notes = [[Block]] data WriterState = WriterState { stNotes :: Notes } @@ -49,13 +53,14 @@ instance Default WriterState where def = WriterState{ stNotes = [] } -- | Convert Pandoc to Haddock. -writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeHaddock opts document = - evalState (pandocToHaddock opts{ + evalStateT (pandocToHaddock opts{ writerWrapText = writerWrapText opts } document) def -- | Return haddock representation of document. -pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock :: PandocMonad m + => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToHaddock opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts @@ -63,22 +68,22 @@ pandocToHaddock opts (Pandoc meta blocks) = do body <- blockListToHaddock opts blocks st <- get notes' <- notesToHaddock opts (reverse $ stNotes st) - let render' :: Doc -> String + let render' :: Doc -> Text render' = render colwidth let main = render' $ body <> (if isEmpty notes' then empty else blankline <> notes') metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToHaddock opts) - (fmap (render colwidth) . inlineListToHaddock opts) + (fmap render' . blockListToHaddock opts) + (fmap render' . inlineListToHaddock opts) meta - let context = defField "body" main - $ metadata + let context = defField "body" main metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return haddock representation of notes. -notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock :: PandocMonad m + => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToHaddock opts notes = if null notes then return empty @@ -92,9 +97,10 @@ escapeString = escapeStringUsing haddockEscapes where haddockEscapes = backslashEscapes "\\/'`\"@<" -- | Convert Pandoc block element to haddock. -blockToHaddock :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToHaddock _ Null = return empty blockToHaddock opts (Div _ ils) = do contents <- blockListToHaddock opts ils @@ -110,10 +116,12 @@ blockToHaddock opts (Para inlines) = (<> blankline) `fmap` blockToHaddock opts (Plain inlines) blockToHaddock opts (LineBlock lns) = blockToHaddock opts $ linesToPara lns -blockToHaddock _ (RawBlock f str) - | f == "haddock" = do +blockToHaddock _ b@(RawBlock f str) + | f == "haddock" = return $ text str <> text "\n" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToHaddock opts HorizontalRule = return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do @@ -141,16 +149,16 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do isPlainBlock _ = False let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) (nst,tbl) <- case True of - _ | isSimple -> fmap (nest 2,) $ + _ | isSimple -> (nest 2,) <$> pandocTable opts (all null headers) aligns widths rawHeaders rawRows - | not hasBlocks -> fmap (nest 2,) $ + | not hasBlocks -> (nest 2,) <$> pandocTable opts (all null headers) aligns widths rawHeaders rawRows - | otherwise -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline + | otherwise -> (id,) <$> + gridTable opts blockListToHaddock + (all null headers) aligns widths headers rows + return $ prefixed "> " (nst $ tbl $$ blankline $$ caption'') $$ blankline blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline @@ -160,15 +168,15 @@ blockToHaddock opts (OrderedList (start,_,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ - zip markers' items + contents <- zipWithM (orderedListItemToHaddock opts) markers' items return $ cat contents <> blankline blockToHaddock opts (DefinitionList items) = do contents <- mapM (definitionListItemToHaddock opts) items return $ cat contents <> blankline -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable :: PandocMonad m + => WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> StateT WriterState m Doc pandocTable opts headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of @@ -184,18 +192,17 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do (floor . (fromIntegral (writerColumns opts) *)) widths let makeRow = hcat . intersperse (lblock 1 (text " ")) . - (zipWith3 alignHeader aligns widthsInChars) + zipWith3 alignHeader aligns widthsInChars let rows' = map makeRow rawRows let head' = makeRow rawHeaders let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') - else if headless - then underline - else empty + let border + | maxRowHeight > 1 = text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') + | headless = underline + | otherwise = empty let head'' = if headless then empty else border <> cr <> head' @@ -207,35 +214,9 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> State WriterState Doc -gridTable opts headless _aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if headless - then empty - else head' $$ border '=' - return $ border '-' $$ head'' $$ body $$ border '-' - -- | Convert bullet list item (list of blocks) to haddock -bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToHaddock opts items = do contents <- blockListToHaddock opts items let sps = replicate (writerTabStop opts - 2) ' ' @@ -250,22 +231,24 @@ bulletListItemToHaddock opts items = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to haddock -orderedListItemToHaddock :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToHaddock :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToHaddock opts marker items = do contents <- blockListToHaddock opts items let sps = case length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " let start = text marker <> sps return $ hang (writerTabStop opts) start $ contents <> cr -- | Convert definition list item (label, list of blocks) to haddock -definitionListItemToHaddock :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToHaddock :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToHaddock opts (label, defs) = do labelText <- inlineListToHaddock opts label defs' <- mapM (mapM (blockToHaddock opts)) defs @@ -273,19 +256,22 @@ definitionListItemToHaddock opts (label, defs) = do return $ nowrap (brackets labelText) <> cr <> contents <> cr -- | Convert list of Pandoc block elements to haddock -blockListToHaddock :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToHaddock :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToHaddock opts blocks = mapM (blockToHaddock opts) blocks >>= return . cat -- | Convert list of Pandoc inline elements to haddock. -inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock :: PandocMonad m + => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToHaddock opts lst = mapM (inlineToHaddock opts) lst >>= return . cat -- | Convert Pandoc inline element to haddock. -inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock :: PandocMonad m + => WriterOptions -> Inline -> StateT WriterState m Doc inlineToHaddock opts (Span (ident,_,_) ils) = do contents <- inlineListToHaddock opts ils if not (null ident) && null ils @@ -315,18 +301,20 @@ inlineToHaddock opts (Quoted DoubleQuote lst) = do return $ "“" <> contents <> "”" inlineToHaddock _ (Code _ str) = return $ "@" <> text (escapeString str) <> "@" -inlineToHaddock _ (Str str) = do +inlineToHaddock _ (Str str) = return $ text $ escapeString str inlineToHaddock opts (Math mt str) = do let adjust x = case mt of DisplayMath -> cr <> x <> cr InlineMath -> x - adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str) -inlineToHaddock _ (RawInline f str) + adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts) +inlineToHaddock _ il@(RawInline f str) | f == "haddock" = return $ text str - | otherwise = return empty + | otherwise = do + report $ InlineNotRendered il + return empty -- no line break in haddock (see above on CodeBlock) -inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ LineBreak = return cr inlineToHaddock opts SoftBreak = case writerWrapText opts of WrapAuto -> return space @@ -339,7 +327,7 @@ inlineToHaddock _ (Link _ txt (src, _)) = do let useAuto = isURI src && case txt of [Str s] | escapeURI s == src -> True - _ -> False + _ -> False return $ nowrap $ "<" <> text src <> (if useAuto then empty else space <> linktext) <> ">" inlineToHaddock opts (Image attr alternate (source, tit)) = do diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 8f0d21cf5..a5d851e40 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,8 +1,10 @@ -{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.ICML - Copyright : Copyright (C) 2013-2016 github.com/mb21 + Copyright : Copyright (C) 2013-2018 github.com/mb21 License : GNU GPL, version 2 or above Stability : alpha @@ -14,20 +16,25 @@ InCopy is the companion word-processor to Adobe InDesign and ICML documents can into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict +import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) +import Data.Maybe (fromMaybe) +import qualified Data.Set as Set +import Data.Text as Text (breakOnAll, pack) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared (linesToPara, splitBy, fetchItem, warn) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse) -import Data.Text as Text (breakOnAll, pack) -import Control.Monad.State -import Network.URI (isURI) -import qualified Data.Set as Set +import Text.Pandoc.Shared (isURI, linesToPara, splitBy) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML type Style = [String] type Hyperlink = [(Int, String)] @@ -40,7 +47,7 @@ data WriterState = WriterState{ , maxListDepth :: Int } -type WS a = StateT WriterState IO a +type WS m = StateT WriterState m defaultWriterState :: WriterState defaultWriterState = WriterState{ @@ -121,13 +128,13 @@ subListParName = "subParagraph" footnoteName = "Footnote" citeName = "Cite" - -- | Convert Pandoc document to string in ICML format. -writeICML :: WriterOptions -> Pandoc -> IO String +writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeICML opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState metadata <- metaToJSON opts @@ -139,18 +146,15 @@ writeICML opts (Pandoc meta blocks) = do context = defField "body" main $ defField "charStyles" (render' $ charStylesToDoc st) $ defField "parStyles" (render' $ parStylesToDoc st) - $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) - $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Auxilary functions for parStylesToDoc and charStylesToDoc. contains :: String -> (String, (String, String)) -> [(String, String)] contains s rule = - if isInfixOf (fst rule) s - then [snd rule] - else [] + [snd rule | (fst rule) `isInfixOf` s] -- | The monospaced font to use as default. monospacedFont :: Doc @@ -174,7 +178,7 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where makeStyle s = let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) - attrs = concat $ map (contains s) $ [ + attrs = concatMap (contains s) [ (defListTermName, ("BulletsAndNumberingListType", "BulletList")) , (defListTermName, ("FontStyle", "Bold")) , (tableHeaderName, ("FontStyle", "Bold")) @@ -200,9 +204,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] | otherwise = [] - listType | isOrderedList && (not $ isInfixOf subListParName s) + listType | isOrderedList && not (subListParName `isInfixOf` s) = [("BulletsAndNumberingListType", "NumberedList")] - | isBulletList && (not $ isInfixOf subListParName s) + | isBulletList && not (subListParName `isInfixOf` s) = [("BulletsAndNumberingListType", "BulletList")] | otherwise = [] indent = [("LeftIndent", show indt)] @@ -210,9 +214,9 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st nBlockQuotes = countSubStrs blockQuoteName s nDefLists = countSubStrs defListDefName s indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) - props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + props = inTags True "Properties" [] (basedOn $$ tabList $$ numbForm) where - font = if isInfixOf codeBlockName s + font = if codeBlockName `isInfixOf` s then monospacedFont else empty basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font @@ -239,7 +243,7 @@ charStylesToDoc :: WriterState -> Doc charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st where makeStyle s = - let attrs = concat $ map (contains s) [ + let attrs = concatMap (contains s) [ (strikeoutName, ("StrikeThru", "true")) , (superscriptName, ("Position", "Superscript")) , (subscriptName, ("Position", "Subscript")) @@ -253,7 +257,7 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font where font = - if isInfixOf codeName s + if codeName `isInfixOf` s then monospacedFont else empty in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props @@ -273,23 +277,22 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs hyp (ident, url) = hdest $$ hlink where hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 + [("Self", "HyperlinkURLDestination/"++escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] $ inTags True "Properties" [] $ inTags False "BorderColor" [("type","enumeration")] (text "Black") - $$ (inTags False "Destination" [("type","object")] - $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6 + $$ inTags False "Destination" [("type","object")] (text $ "HyperlinkURLDestination/"++escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 -- | Convert a list of Pandoc blocks to ICML. -blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc blocksToICML opts style lst = do docs <- mapM (blockToICML opts style) lst return $ intersperseBrs docs -- | Convert a Pandoc block element to ICML. -blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc blockToICML opts style (Plain lst) = parStyle opts style lst -- title beginning with fig: indicates that the image is a figure blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do @@ -299,10 +302,12 @@ blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst blockToICML opts style (LineBlock lns) = blockToICML opts style $ linesToPara lns -blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML _ _ (RawBlock f str) +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) [Str str] +blockToICML _ _ b@(RawBlock f str) | f == Format "icml" = return $ text str - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst @@ -343,11 +348,10 @@ blockToICML opts style (Table caption aligns widths headers rows) = then rows else headers:rows cells <- rowsToICML tabl (0::Int) - let colWidths w = if w > 0 - then [("SingleColumnWidth",show $ 500 * w)] - else [] - let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) - let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let colWidths w = + [("SingleColumnWidth",show $ 500 * w) | w > 0] + let tupToDoc tup = selfClosingTag "Column" $ ("Name",show $ fst tup) : colWidths (snd tup) + let colDescs = vcat $ zipWith (curry tupToDoc) [0..nrCols-1] widths let tableDoc = return $ inTags True "Table" [ ("AppliedTableStyle","TableStyle/Table") , ("HeaderRowCount", nrHeaders) @@ -359,7 +363,7 @@ blockToICML opts style (Div _ lst) = blocksToICML opts style lst blockToICML _ _ Null = return empty -- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc listItemsToICML _ _ _ _ [] = return empty listItemsToICML opts listType style attribs (first:rest) = do st <- get @@ -374,18 +378,17 @@ listItemsToICML opts listType style attribs (first:rest) = do return $ intersperseBrs docs -- | Convert a list of blocks to ICML list items. -listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc listItemToICML opts style isFirst attribs item = let makeNumbStart (Just (beginsWith, numbStl, _)) = let doN DefaultStyle = [] - doN LowerRoman = [lowerRomanName] - doN UpperRoman = [upperRomanName] - doN LowerAlpha = [lowerAlphaName] - doN UpperAlpha = [upperAlphaName] - doN _ = [] - bw = if beginsWith > 1 - then [beginsWithName ++ show beginsWith] - else [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = + [beginsWithName ++ show beginsWith | beginsWith > 1] in doN numbStl ++ bw makeNumbStart Nothing = [] stl = if isFirst @@ -394,26 +397,26 @@ listItemToICML opts style isFirst attribs item = stl' = makeNumbStart attribs ++ stl in if length item > 1 then do - let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ Str "\t":lst insertTab block = blockToICML opts style block f <- blockToICML opts stl' $ head item r <- mapM insertTab $ tail item return $ intersperseBrs (f : r) else blocksToICML opts stl' item -definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc definitionListItemToICML opts style (term,defs) = do term' <- parStyle opts (defListTermName:style) term defs' <- mapM (blocksToICML opts (defListDefName:style)) defs - return $ intersperseBrs $ (term' : defs') + return $ intersperseBrs (term' : defs') -- | Convert a list of inline elements to ICML. -inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) -- | Convert an inline element to ICML. -inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst @@ -433,17 +436,20 @@ inlineToICML opts style SoftBreak = WrapPreserve -> charStyle style cr inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML opts style (Math mt str) = - cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str) -inlineToICML _ _ (RawInline f str) + lift (texMathToInlines mt str) >>= + (fmap cat . mapM (inlineToICML opts style)) +inlineToICML _ _ il@(RawInline f str) | f == Format "icml" = return $ text str - | otherwise = return empty + | otherwise = do + report $ InlineNotRendered il + return empty inlineToICML opts style (Link _ lst (url, title)) = do content <- inlinesToICML opts (linkName:style) lst state $ \st -> let ident = if null $ links st then 1::Int - else 1 + (fst $ head $ links st) - newst = st{ links = (ident, url):(links st) } + else 1 + fst (head $ links st) + newst = st{ links = (ident, url):links st } cont = inTags True "HyperlinkTextSource" [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content in (cont, newst) @@ -452,9 +458,9 @@ inlineToICML opts style (Note lst) = footnoteToICML opts style lst inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst -- | Convert a list of block elements to an ICML footnote. -footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc footnoteToICML opts style lst = - let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ Str "\t":ls insertTab block = blockToICML opts (footnoteName:style) block in do contents <- mapM insertTab lst @@ -466,24 +472,24 @@ footnoteToICML opts style lst = -- | Auxiliary function to merge Space elements into the adjacent Strs. mergeSpaces :: [Inline] -> [Inline] -mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x = +mergeSpaces (Str s:(x:(Str s':xs))) | isSp x = mergeSpaces $ Str(s++" "++s') : xs -mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs -mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs -mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces (x:(Str s:xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs +mergeSpaces (Str s:(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : mergeSpaces xs mergeSpaces [] = [] isSp :: Inline -> Bool -isSp Space = True +isSp Space = True isSp SoftBreak = True -isSp _ = False +isSp _ = False -- | Intersperse line breaks intersperseBrs :: [Doc] -> Doc intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty) -- | Wrap a list of inline elements in an ICML Paragraph Style -parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc parStyle opts style lst = let slipIn x y = if null y then x @@ -498,7 +504,7 @@ parStyle opts style lst = begins = filter (isPrefixOf beginsWithName) style in if null begins then ats - else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + else let i = fromMaybe "" $ stripPrefix beginsWithName $ head begins in ("NumberingStartAt", i) : ats else [attrs] in do @@ -507,16 +513,16 @@ parStyle opts style lst = state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) -- | Wrap a Doc in an ICML Character Style. -charStyle :: Style -> Doc -> WS Doc +charStyle :: PandocMonad m => Style -> Doc -> WS m Doc charStyle style content = let (stlStr, attrs) = styleToStrAttr style doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content - in do + in state $ \st -> - let styles = if null stlStr - then st - else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } - in (doc, styles) + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) -- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. styleToStrAttr :: Style -> (String, [(String, String)]) @@ -529,20 +535,18 @@ styleToStrAttr style = in (stlStr, attrs) -- | Assemble an ICML Image. -imageICML :: WriterOptions -> Style -> Attr -> Target -> WS Doc +imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc imageICML opts style attr (src, _) = do - res <- liftIO $ fetchItem (writerSourceURL opts) src - imgS <- case res of - Left (_) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - return def - Right (img, _) -> do - case imageSize img of + imgS <- catchError + (do (img, _) <- P.fetchItem src + case imageSize opts img of Right size -> return size Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg - return def + report $ CouldNotDetermineImageSize src msg + return def) + (\e -> do + report $ CouldNotFetchResource src (show e) + return def) let (ow, oh) = sizeInPoints imgS (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS hw = showFl $ ow / 2 @@ -571,6 +575,5 @@ imageICML opts style attr (src, _) = do ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), - ("ItemTransform", scale++" "++hw++" -"++hh)] - $ (props $$ image) + ("ItemTransform", scale++" "++hw++" -"++hh)] (props $$ image) state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs new file mode 100644 index 000000000..639961acd --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -0,0 +1,455 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Writers.JATS + Copyright : Copyright (C) 2017-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to JATS XML. +Reference: +https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html +-} +module Text.Pandoc.Writers.JATS ( writeJATS ) where +import Control.Monad.Reader +import Data.Char (toLower) +import Data.Generics (everywhere, mkT) +import Data.List (isSuffixOf, partition) +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Logging +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML +import Text.TeXMath +import qualified Text.XML.Light as Xml + +data JATSVersion = JATS1_1 + deriving (Eq, Show) + +type JATS = ReaderT JATSVersion + +writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeJATS opts d = + runReaderT (docToJATS opts d) JATS1_1 + +-- | Convert Pandoc document to string in JATS format. +docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text +docToJATS opts (Pandoc meta blocks) = do + let isBackBlock (Div ("refs",_,_) _) = True + isBackBlock _ = False + let (backblocks, bodyblocks) = partition isBackBlock blocks + let elements = hierarchicalize bodyblocks + let backElements = hierarchicalize backblocks + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render colwidth + let opts' = if maybe False (("/book>" `isSuffixOf`) . trimr) + (writerTemplate opts) && + TopLevelDefault == writerTopLevelDivision opts + then opts{ writerTopLevelDivision = TopLevelChapter } + else opts + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of + TopLevelPart -> -1 + TopLevelChapter -> 0 + TopLevelSection -> 1 + TopLevelDefault -> 1 + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToJATS opts' startLvl) . + hierarchicalize) + (fmap render' . inlinesToJATS opts') + meta + main <- (render' . vcat) <$> + mapM (elementToJATS opts' startLvl) elements + back <- (render' . vcat) <$> + mapM (elementToJATS opts' startLvl) backElements + let context = defField "body" main + $ defField "back" back + $ defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Convert an Element to JATS. +elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> JATS m Doc +elementToJATS opts _ (Blk block) = blockToJATS opts block +elementToJATS opts lvl (Sec _ _num (id',_,kvs) title elements) = do + let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + let otherAttrs = ["sec-type", "specific-use"] + let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] + contents <- mapM (elementToJATS opts (lvl + 1)) elements + title' <- inlinesToJATS opts title + return $ inTags True "sec" attribs $ + inTagsSimple "title" title' $$ vcat contents + +-- | Convert a list of Pandoc blocks to JATS. +blocksToJATS :: PandocMonad m => WriterOptions -> [Block] -> JATS m Doc +blocksToJATS opts = fmap vcat . mapM (blockToJATS opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a list of +-- JATS varlistentrys. +deflistItemsToJATS :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> JATS m Doc +deflistItemsToJATS opts items = + vcat <$> mapM (uncurry (deflistItemToJATS opts)) items + +-- | Convert a term and a list of blocks into a JATS varlistentry. +deflistItemToJATS :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> JATS m Doc +deflistItemToJATS opts term defs = do + term' <- inlinesToJATS opts term + def' <- blocksToJATS opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "def-item" $ + inTagsIndented "term" term' $$ + inTagsIndented "def" def' + +-- | Convert a list of lists of blocks to a list of JATS list items. +listItemsToJATS :: PandocMonad m + => WriterOptions -> Maybe [String] -> [[Block]] -> JATS m Doc +listItemsToJATS opts markers items = + case markers of + Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items + Just ms -> vcat <$> zipWithM (listItemToJATS opts) (map Just ms) items + +-- | Convert a list of blocks into a JATS list item. +listItemToJATS :: PandocMonad m + => WriterOptions -> Maybe String -> [Block] -> JATS m Doc +listItemToJATS opts mbmarker item = do + contents <- blocksToJATS opts item + return $ inTagsIndented "list-item" $ + maybe empty (\lbl -> inTagsIndented "label" (text lbl)) mbmarker + $$ contents + +imageMimeType :: String -> [(String, String)] -> (String, String) +imageMimeType src kvs = + let mbMT = getMimeType src + maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + in (maintype, subtype) + +languageFor :: [String] -> String +languageFor classes = + case langs of + (l:_) -> escapeStringForXML l + [] -> "" + where isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes + +codeAttr :: Attr -> (String, [(String, String)]) +codeAttr (ident,classes,kvs) = (lang, attr) + where + attr = [("id",ident) | not (null ident)] ++ + [("language",lang) | not (null lang)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["code-type", + "code-version", "executable", + "language-version", "orientation", + "platforms", "position", "specific-use"]] + lang = languageFor classes + +-- | Convert a Pandoc block element to JATS. +blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc +blockToJATS _ Null = return empty +-- Bibliography reference: +blockToJATS opts (Div ('r':'e':'f':'-':_,_,_) [Para lst]) = + inlinesToJATS opts lst +blockToJATS opts (Div ("refs",_,_) xs) = do + contents <- blocksToJATS opts xs + return $ inTagsIndented "ref-list" contents +blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do + contents <- blocksToJATS opts bs + let attr = [("id", ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", + "content-type", "orientation", "position"]] + return $ inTags True cls attr contents +blockToJATS opts (Div (ident,_,kvs) bs) = do + contents <- blocksToJATS opts bs + let attr = [("id", ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", + "content-type", "orientation", "position"]] + return $ inTags True "boxed-text" attr contents +blockToJATS opts (Header _ _ title) = do + title' <- inlinesToJATS opts title + return $ inTagsSimple "title" title' +-- No Plain, everything needs to be in a block-level tag +blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) +-- title beginning with fig: indicates that the image is a figure +blockToJATS opts (Para [Image (ident,_,kvs) txt + (src,'f':'i':'g':':':tit)]) = do + alt <- inlinesToJATS opts txt + let (maintype, subtype) = imageMimeType src kvs + let capt = if null txt + then empty + else inTagsSimple "caption" alt + let attr = [("id", ident) | not (null ident)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", + "position", "specific-use"]] + let graphicattr = [("mimetype",maintype), + ("mime-subtype",subtype), + ("xlink:href",src), -- do we need to URL escape this? + ("xlink:title",tit)] + return $ inTags True "fig" attr $ + capt $$ selfClosingTag "graphic" graphicattr +blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do + let (maintype, subtype) = imageMimeType src kvs + let attr = [("id", ident) | not (null ident)] ++ + [("mimetype", maintype), + ("mime-subtype", subtype), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", + "content-type", "specific-use", "xlink:actuate", + "xlink:href", "xlink:role", "xlink:show", + "xlink:type"]] + return $ selfClosingTag "graphic" attr +blockToJATS opts (Para lst) = + inTagsIndented "p" <$> inlinesToJATS opts lst +blockToJATS opts (LineBlock lns) = + blockToJATS opts $ linesToPara lns +blockToJATS opts (BlockQuote blocks) = + inTagsIndented "disp-quote" <$> blocksToJATS opts blocks +blockToJATS _ (CodeBlock a str) = return $ + inTags False tag attr (flush (text (escapeStringForXML str))) + where (lang, attr) = codeAttr a + tag = if null lang then "preformat" else "code" +blockToJATS _ (BulletList []) = return empty +blockToJATS opts (BulletList lst) = + inTags True "list" [("list-type", "bullet")] <$> + listItemsToJATS opts Nothing lst +blockToJATS _ (OrderedList _ []) = return empty +blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do + let listType = case numstyle of + DefaultStyle -> "order" + Decimal -> "order" + Example -> "order" + UpperAlpha -> "alpha-upper" + LowerAlpha -> "alpha-lower" + UpperRoman -> "roman-upper" + LowerRoman -> "roman-lower" + let simpleList = start == 1 && (delimstyle == DefaultDelim || + delimstyle == Period) + let markers = if simpleList + then Nothing + else Just $ + orderedListMarkers (start, numstyle, delimstyle) + inTags True "list" [("list-type", listType)] <$> + listItemsToJATS opts markers items +blockToJATS opts (DefinitionList lst) = + inTags True "def-list" [] <$> deflistItemsToJATS opts lst +blockToJATS _ b@(RawBlock f str) + | f == "jats" = return $ text str -- raw XML block + | otherwise = do + report $ BlockNotRendered b + return empty +blockToJATS _ HorizontalRule = return empty -- not semantic +blockToJATS opts (Table [] aligns widths headers rows) = do + let percent w = show (truncate (100*w) :: Integer) ++ "*" + let coltags = vcat $ zipWith (\w al -> selfClosingTag "col" + ([("width", percent w) | w > 0] ++ + [("align", alignmentToString al)])) widths aligns + thead <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToJATS opts True headers + tbody <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToJATS opts False) rows + return $ inTags True "table" [] $ coltags $$ thead $$ tbody +blockToJATS opts (Table caption aligns widths headers rows) = do + captionDoc <- inTagsIndented "caption" <$> blockToJATS opts (Para caption) + tbl <- blockToJATS opts (Table [] aligns widths headers rows) + return $ inTags True "table-wrap" [] $ captionDoc $$ tbl + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [[Block]] + -> JATS m Doc +tableRowToJATS opts isHeader cols = + (inTagsIndented "tr" . vcat) <$> mapM (tableItemToJATS opts isHeader) cols + +tableItemToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [Block] + -> JATS m Doc +tableItemToJATS opts isHeader [Plain item] = + inTags True (if isHeader then "th" else "td") [] <$> + inlinesToJATS opts item +tableItemToJATS opts isHeader item = + (inTags True (if isHeader then "th" else "td") [] . vcat) <$> + mapM (blockToJATS opts) item + +-- | Convert a list of inline elements to JATS. +inlinesToJATS :: PandocMonad m => WriterOptions -> [Inline] -> JATS m Doc +inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) lst + +-- | Convert an inline element to JATS. +inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> JATS m Doc +inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str +inlineToJATS opts (Emph lst) = + inTagsSimple "italic" <$> inlinesToJATS opts lst +inlineToJATS opts (Strong lst) = + inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst +inlineToJATS opts (Strikeout lst) = + inTagsSimple "strike" <$> inlinesToJATS opts lst +inlineToJATS opts (Superscript lst) = + inTagsSimple "sup" <$> inlinesToJATS opts lst +inlineToJATS opts (Subscript lst) = + inTagsSimple "sub" <$> inlinesToJATS opts lst +inlineToJATS opts (SmallCaps lst) = + inTags False "sc" [("role", "smallcaps")] <$> + inlinesToJATS opts lst +inlineToJATS opts (Quoted SingleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '‘' <> contents <> char '’' +inlineToJATS opts (Quoted DoubleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '“' <> contents <> char '”' +inlineToJATS _ (Code a str) = + return $ inTags False tag attr $ text (escapeStringForXML str) + where (lang, attr) = codeAttr a + tag = if null lang then "monospace" else "code" +inlineToJATS _ il@(RawInline f x) + | f == "jats" = return $ text x + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToJATS _ LineBreak = return $ selfClosingTag "break" [] +inlineToJATS _ Space = return space +inlineToJATS opts SoftBreak + | writerWrapText opts == WrapPreserve = return cr + | otherwise = return space +inlineToJATS opts (Note contents) = + -- TODO technically only <p> tags are allowed inside + inTagsIndented "fn" <$> blocksToJATS opts contents +inlineToJATS opts (Cite _ lst) = + -- TODO revisit this after examining the jats.csl pipeline + inlinesToJATS opts lst +inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils +inlineToJATS opts (Span (ident,_,kvs) ils) = do + contents <- inlinesToJATS opts ils + let attr = [("id",ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs + , k `elem` ["content-type", "rationale", + "rid", "specific-use"]] + return $ selfClosingTag "milestone-start" attr <> contents <> + selfClosingTag "milestone-end" [] +inlineToJATS _ (Math t str) = do + let addPref (Xml.Attr q v) + | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v + | otherwise = Xml.Attr q v + let fixNS' e = e{ Xml.elName = + (Xml.elName e){ Xml.qPrefix = Just "mml" } } + let fixNS = everywhere (mkT fixNS') . + (\e -> e{ Xml.elAttribs = map addPref (Xml.elAttribs e) }) + let conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP + res <- convertMath writeMathML t str + let tagtype = case t of + DisplayMath -> "disp-formula" + InlineMath -> "inline-formula" + let rawtex = inTagsSimple "tex-math" + $ text "<![CDATA[" <> + text str <> + text "]]>" + return $ inTagsSimple tagtype $ + case res of + Right r -> inTagsSimple "alternatives" $ + cr <> rawtex $$ + text (Xml.ppcElement conf $ fixNS r) + Left _ -> rawtex +inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _)) + | escapeURI t == email = + return $ inTagsSimple "email" $ text (escapeStringForXML email) +inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("alt", stringify txt) | not (null txt)] ++ + [("rid", src)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + if null txt + then return $ selfClosingTag "xref" attr + else do + contents <- inlinesToJATS opts txt + return $ inTags False "xref" attr contents +inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("ext-link-type", "uri"), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority", + "specific-use", "xlink:actuate", + "xlink:role", "xlink:show", + "xlink:type"]] + contents <- inlinesToJATS opts txt + return $ inTags False "ext-link" attr contents +inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do + let mbMT = getMimeType src + let maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + let subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + let attr = [("id", ident) | not (null ident)] ++ + [("mimetype", maintype), + ("mime-subtype", subtype), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", + "content-type", "specific-use", "xlink:actuate", + "xlink:href", "xlink:role", "xlink:show", + "xlink:type"]] + return $ selfClosingTag "inline-graphic" attr diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 88934eb44..f61c878e5 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, - PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,37 +30,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into LaTeX. -} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where +module Text.Pandoc.Writers.LaTeX ( + writeLaTeX + , writeBeamer + ) where +import Control.Applicative ((<|>)) +import Control.Monad.State.Strict +import Data.Aeson (FromJSON, object, (.=)) +import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, + toLower) +import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, + stripPrefix, (\\)) +import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (unEscapeString) +import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, + styleToLaTeX, toListingsLanguage) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates -import Text.Printf ( printf ) -import Network.URI ( isURI, unEscapeString ) -import Data.Aeson (object, (.=), FromJSON) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, - nub, nubBy, foldl' ) -import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, - ord, isAlphaNum ) -import Data.Maybe ( fromMaybe, isJust, catMaybes ) -import qualified Data.Text as T -import Control.Applicative ((<|>)) -import Control.Monad.State -import qualified Text.Parsec as P import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize +import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Highlighting (highlight, styleToLaTeX, - formatLaTeXInline, formatLaTeXBlock, - toListingsLanguage) +import Text.Pandoc.Templates +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared +import qualified Text.Parsec as P +import Text.Printf (printf) data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage , stInHeading :: Bool -- true if in a section heading + , stInItem :: Bool -- true if in \item[..] , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter @@ -74,49 +82,75 @@ data WriterState = , stHighlighting :: Bool -- true if document has highlighted code , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit , stInternalLinks :: [String] -- list of internal link targets - , stUsesEuro :: Bool -- true if euro symbol used + , stBeamer :: Bool -- produce beamer + , stEmptyLine :: Bool -- true if no content on line } +startingState :: WriterOptions -> WriterState +startingState options = WriterState { + stInNote = False + , stInQuote = False + , stInMinipage = False + , stInHeading = False + , stInItem = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stBook = case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stInternalLinks = [] + , stBeamer = False + , stEmptyLine = True } + -- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String +writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeLaTeX options document = - evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stInHeading = False, - stNotes = [], stOLLevel = 1, - stOptions = options, stVerbInNote = False, - stTable = False, stStrikeout = False, - stUrl = False, stGraphics = False, - stLHS = False, - stBook = (case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False), - stCsquotes = False, stHighlighting = False, - stIncremental = writerIncremental options, - stInternalLinks = [], stUsesEuro = False } - -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String + evalStateT (pandocToLaTeX options document) $ + startingState options + +-- | Convert Pandoc to LaTeX Beamer. +writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBeamer options document = + evalStateT (pandocToLaTeX options document) $ + (startingState options){ stBeamer = True } + +type LW m = StateT WriterState m + +pandocToLaTeX :: PandocMonad m + => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do -- Strip off final 'references' header if --natbib or --biblatex let method = writerCiteMethod options let blocks' = if method == Biblatex || method == Natbib then case reverse blocks of - (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks + Div (_,["references"],_) _:xs -> reverse xs + _ -> blocks else blocks -- see if there are internal links - let isInternalLink (Link _ _ ('#':xs,_)) = [xs] - isInternalLink _ = [] + let isInternalLink (Link _ _ ('#':xs,_)) = [xs] + isInternalLink _ = [] modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } - let template = maybe "" id $ writerTemplate options + let template = fromMaybe "" $ writerTemplate options -- set stBook depending on documentclass let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) + (fmap render' . blockListToLaTeX) + (fmap render' . inlineListToLaTeX) meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] let documentClass = case P.parse pDocumentClass "template" template of @@ -143,25 +177,38 @@ pandocToLaTeX options (Pandoc meta blocks) = do else case last blocks' of Header 1 _ il -> (init blocks', il) _ -> (blocks', []) - blocks''' <- if writerBeamer options + beamer <- gets stBeamer + blocks''' <- if beamer then toSlides blocks'' else return blocks'' body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' - (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth $ vsep body + (biblioTitle :: Text) <- render' <$> inlineListToLaTeX lastHeader + let main = render' $ vsep body st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let docLangs = nub $ query (extract "lang") blocks + docLangs <- catMaybes <$> + mapM (toLang . Just) (ordNub (query (extract "lang") blocks)) let hasStringValue x = isJust (getField x metadata :: Maybe String) - let geometryFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> + let geometryFromMargins = intercalate [','] $ mapMaybe (\(x,y) -> ((x ++ "=") ++) <$> getField y metadata) [("lmargin","margin-left") ,("rmargin","margin-right") ,("tmargin","margin-top") ,("bmargin","margin-bottom") ] + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + mblang <- toLang $ case getLang options meta of + Just l -> Just l + Nothing | null docLangs -> Nothing + | otherwise -> Just "en" + -- we need a default here since lang is used in template conditionals + + let dirs = query (extract "dir") blocks + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -170,7 +217,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "body" main $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ - defField "documentclass" (if writerBeamer options + defField "documentclass" (if beamer then ("beamer" :: String) else if stBook st then "book" @@ -183,12 +230,14 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "lhs" (stLHS st) $ defField "graphics" (stGraphics st) $ defField "book-class" (stBook st) $ - defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ - defField "beamer" (writerBeamer options) $ + defField "beamer" beamer $ (if stHighlighting st - then defField "highlighting-macros" (styleToLaTeX - $ writerHighlightStyle options ) + then case writerHighlightStyle options of + Just sty -> + defField "highlighting-macros" + (styleToLaTeX sty) + Nothing -> id else id) $ (case writerCiteMethod options of Natbib -> defField "biblio-title" biblioTitle . @@ -196,26 +245,26 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ - -- set lang to something so polyglossia/babel is included - defField "lang" (if null docLangs then ""::String else "en") $ - defField "otherlangs" docLangs $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ - defField "dir" (if (null $ query (extract "dir") blocks) - then ""::String - else "ltr") $ + (if null dirs + then id + else defField "dir" ("ltr" :: String)) $ defField "section-titles" True $ defField "geometry" geometryFromMargins $ + (case getField "papersize" metadata of + -- uppercase a4, a5, etc. + Just (('A':d:ds) :: String) + | all isDigit (d:ds) -> resetField "papersize" + (('a':d:ds) :: String) + _ -> id) metadata - let toPolyObj lang = object [ "name" .= T.pack name - , "options" .= T.pack opts ] - where - (name, opts) = toPolyglossia lang - let lang = maybe [] (splitBy (=='-')) $ getField "lang" context - otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context let context' = - defField "babel-lang" (toBabel lang) - $ defField "babel-otherlangs" (map toBabel otherlangs) + -- note: lang is used in some conditionals in the template, + -- so we need to set it if we have any babel/polyglossia: + maybe id (defField "lang" . renderLang) mblang + $ maybe id (defField "babel-lang" . toBabel) mblang + $ defField "babel-otherlangs" (map toBabel docLangs) $ defField "babel-newcommands" (concatMap (\(poly, babel) -> -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that @@ -227,31 +276,30 @@ pandocToLaTeX options (Pandoc meta blocks) = do "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" ++ poly ++ "}{##2}}}\n" - else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{" - ++ babel ++ "}}{\\end{otherlanguage}}\n" + else (if poly == "latin" -- see #4161 + then "\\providecommand{\\textlatin}{}\n\\renewcommand" + else "\\newcommand") ++ "{\\text" ++ poly ++ + "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++ + "\\newenvironment{" ++ poly ++ + "}[2][]{\\begin{otherlanguage}{" ++ + babel ++ "}}{\\end{otherlanguage}}\n" ) -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) -- find polyglossia and babel names of languages used in the document - $ map (\l -> - let lng = splitBy (=='-') l - in (fst $ toPolyglossia lng, toBabel lng) - ) - docLangs ) - $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) - $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of - Just "rtl" -> True - _ -> False) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' + $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs + ) + $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang + $ defField "polyglossia-otherlangs" (map toPolyObj docLangs) + $ + defField "latex-dir-rtl" + (getField "dir" context == Just ("rtl" :: String)) context + case writerTemplate options of + Nothing -> return main + Just tpl -> renderTemplate' tpl context' -- | Convert Elements to LaTeX -elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc +elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do modify $ \s -> s{stInHeading = True} @@ -266,18 +314,15 @@ data StringContext = TextString deriving (Eq) -- escape things as needed for LaTeX -stringToLaTeX :: StringContext -> String -> State WriterState String +stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && ctx == TextString + let ligatures = isEnabled Ext_smart opts && ctx == TextString let isUrl = ctx == URLString - when (x == '€') $ - modify $ \st -> st{ stUsesEuro = True } return $ case x of - '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest @@ -311,26 +356,30 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest -toLabel :: String -> State WriterState String +toLabel :: PandocMonad m => String -> LW m String toLabel z = go `fmap` stringToLaTeX URLString z where go [] = "" go (x:xs) | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x ("_-+=:;." :: String) = x:go xs + | x `elem` ("_-+=:;." :: String) = x:go xs | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents -toSlides :: [Block] -> State WriterState [Block] +toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do opts <- gets stOptions let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs - concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') + concat `fmap` mapM (elementToBeamer slideLevel) (hierarchicalize bs') -elementToBeamer :: Int -> Element -> State WriterState [Block] +elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block] +elementToBeamer _slideLevel (Blk (Div attr bs)) = do + -- make sure we support "blocks" inside divs + bs' <- concat `fmap` mapM (elementToBeamer 0) (hierarchicalize bs) + return [Div attr bs'] elementToBeamer _slideLevel (Blk b) = return [b] elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) | lvl > slideLevel = do @@ -340,7 +389,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) : bs ++ [RawBlock "latex" "\\end{block}"] | lvl < slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,kvs) tit) : bs + return $ Header lvl (ident,classes,kvs) tit : bs | otherwise = do -- lvl == slideLevel -- note: [fragile] is required or verbatim breaks let hasCodeBlock (CodeBlock _ _) = [True] @@ -349,19 +398,28 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) hasCode _ = [] let fragile = "fragile" `elem` classes || not (null $ query hasCodeBlock elts ++ query hasCode elts) - let frameoptions = ["allowdisplaybreaks", "allowframebreaks", + let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile", "b", "c", "t", "environment", "label", "plain", "shrink", "standout"] - let optionslist = ["fragile" | fragile] ++ + let optionslist = ["fragile" | fragile && isNothing (lookup "fragile" kvs)] ++ [k | k <- classes, k `elem` frameoptions] ++ [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then "" else "[" ++ intercalate "," optionslist ++ "]" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : - if tit == [Str "\0"] -- marker for hrule - then [] - else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] + let latex = RawInline (Format "latex") + slideTitle <- + if tit == [Str "\0"] -- marker for hrule + then return [] + else + if null ident + then return $ latex "{" : tit ++ [latex "}"] + else do + ref <- toLabel ident + return $ latex ("{%\n\\protect\\hypertarget{" ++ + ref ++ "}{%\n") : tit ++ [latex "}}"] + let slideStart = Para $ + RawInline "latex" ("\\begin{frame}" ++ options) : slideTitle let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts @@ -376,43 +434,89 @@ isListBlock _ = False isLineBreakOrSpace :: Inline -> Bool isLineBreakOrSpace LineBreak = True isLineBreakOrSpace SoftBreak = True -isLineBreakOrSpace Space = True -isLineBreakOrSpace _ = False +isLineBreakOrSpace Space = True +isLineBreakOrSpace _ = False -- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: Block -- ^ Block to convert - -> State WriterState Doc +blockToLaTeX :: PandocMonad m + => Block -- ^ Block to convert + -> LW m Doc blockToLaTeX Null = return empty -blockToLaTeX (Div (identifier,classes,kvs) bs) = do - beamer <- writerBeamer `fmap` gets stOptions - ref <- toLabel identifier - let linkAnchor = if null identifier - then empty - else "\\hypertarget" <> braces (text ref) <> - braces empty - let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir - let wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> id - wrapLang txt = case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if null o - then "" - else brackets $ text o - in inCmd "begin" (text l) <> ops - $$ blankline <> txt <> blankline - $$ inCmd "end" (text l) - Nothing -> txt - wrapNotes txt = if beamer && "notes" `elem` classes +blockToLaTeX (Div (identifier,classes,kvs) bs) + | "incremental" `elem` classes = do + let classes' = filter ("incremental"/=) classes + beamer <- gets stBeamer + if beamer + then do oldIncremental <- gets stIncremental + modify $ \s -> s{ stIncremental = True } + result <- blockToLaTeX $ Div (identifier,classes',kvs) bs + modify $ \s -> s{ stIncremental = oldIncremental } + return result + else blockToLaTeX $ Div (identifier,classes',kvs) bs + | "nonincremental" `elem` classes = do + let classes' = filter ("nonincremental"/=) classes + beamer <- gets stBeamer + if beamer + then do oldIncremental <- gets stIncremental + modify $ \s -> s{ stIncremental = False } + result <- blockToLaTeX $ Div (identifier,classes',kvs) bs + modify $ \s -> s{ stIncremental = oldIncremental } + return result + else blockToLaTeX $ Div (identifier,classes',kvs) bs + | otherwise = do + beamer <- gets stBeamer + linkAnchor' <- hypertarget True identifier empty + -- see #2704 for the motivation for adding \leavevmode: + let linkAnchor = + case bs of + Para _ : _ + | not (isEmpty linkAnchor') + -> "\\leavevmode" <> linkAnchor' <> "%" + _ -> linkAnchor' + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs + let wrapColumns = if "columns" `elem` classes + then \contents -> + inCmd "begin" "columns" <> brackets "T" + $$ contents + $$ inCmd "end" "columns" + else id + wrapColumn = if "column" `elem` classes + then \contents -> + let fromPct xs = + case reverse xs of + '%':ds -> '0':'.': reverse ds + _ -> xs + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> + braces (text w <> "\\textwidth") + $$ contents + $$ inCmd "end" "column" + else id + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lang of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o + then "" + else brackets $ text o + in inCmd "begin" (text l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (text l) + Nothing -> txt + wrapNotes txt = if beamer && "notes" `elem` classes then "\\note" <> braces txt -- speaker notes else linkAnchor $$ txt - fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs + (wrapColumns . wrapColumn . wrapDir . wrapLang . wrapNotes) + <$> blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote + inMinipage <- gets stInMinipage modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt notes <- gets stNotes @@ -426,25 +530,26 @@ blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = d let footnotes = notesToLaTeX notes lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab - figure <- hypertarget ident (cr <> - "\\begin{figure}" $$ "\\centering" $$ img $$ - caption $$ "\\end{figure}" <> cr) - return $ if inNote - -- can't have figures in notes + innards <- hypertarget True ident $ + "\\centering" $$ img $$ caption <> cr + let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" + return $ if inNote || inMinipage + -- can't have figures in notes or minipage (here, table cell) + -- http://www.tex.ac.uk/FAQ-ouparmd.html then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" else figure $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer if beamer then blockToLaTeX (RawBlock "latex" "\\pause") else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."] blockToLaTeX (Para lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -blockToLaTeX (LineBlock lns) = do +blockToLaTeX (LineBlock lns) = blockToLaTeX $ linesToPara lns blockToLaTeX (BlockQuote lst) = do - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer case lst of [b] | beamer && isListBlock b -> do oldIncremental <- gets stIncremental @@ -460,11 +565,11 @@ blockToLaTeX (BlockQuote lst) = do return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions - ref <- toLabel identifier - let linkAnchor = if null identifier + lab <- labelFor identifier + linkAnchor' <- hypertarget True identifier lab + let linkAnchor = if isEmpty linkAnchor' then empty - else "\\hypertarget" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) + else linkAnchor' <> "%" let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ @@ -479,6 +584,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do text str $$ text ("\\end{" ++ env ++ "}")) <> cr let listingsCodeBlock = do st <- get + ref <- toLabel identifier let params = if writerListings (stOptions st) then (case getListingsLanguage classes of Just l -> [ "language=" ++ mbBraced l ] @@ -495,9 +601,6 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do else [ "label=" ++ ref ]) else [] - mbBraced x = if not (all isAlphaNum x) - then "{" <> x <> "}" - else x printParams | null params = empty | otherwise = brackets $ hcat (intersperse ", " @@ -505,24 +608,34 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ "\\end{lstlisting}") $$ cr let highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ linkAnchor $$ text (T.unpack h)) + case highlight (writerSyntaxMap opts) + formatLaTeXBlock ("",classes,keyvalAttr) str of + Left msg -> do + unless (null msg) $ + report $ CouldNotHighlight msg + rawCodeBlock + Right h -> do + st <- get + when (stInNote st) $ modify (\s -> s{ stVerbInNote = True }) + modify (\s -> s{ stHighlighting = True }) + return (flush $ linkAnchor $$ text (T.unpack h)) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && - "literate" `elem` classes -> lhsCodeBlock - | writerListings opts -> listingsCodeBlock - | writerHighlight opts && not (null classes) -> highlightedCodeBlock - | otherwise -> rawCodeBlock -blockToLaTeX (RawBlock f x) + "literate" `elem` classes -> lhsCodeBlock + | writerListings opts -> listingsCodeBlock + | not (null classes) && isJust (writerHighlightStyle opts) + -> highlightedCodeBlock + | otherwise -> rawCodeBlock +blockToLaTeX b@(RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental - beamer <- writerBeamer `fmap` gets stOptions + beamer <- gets stBeamer let inc = if beamer && incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst let spacing = if isTightList lst @@ -538,6 +651,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do put $ st {stOLLevel = oldlevel + 1} items <- mapM listItemToLaTeX lst modify (\s -> s {stOLLevel = oldlevel}) + let beamer = stBeamer st let tostyle x = case numstyle of Decimal -> "\\arabic" <> braces x UpperRoman -> "\\Roman" <> braces x @@ -547,15 +661,24 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do Example -> "\\arabic" <> braces x DefaultStyle -> "\\arabic" <> braces x let todelim x = case numdelim of - OneParen -> x <> ")" - TwoParens -> parens x - Period -> x <> "." - _ -> x <> "." + OneParen -> x <> ")" + TwoParens -> parens x + Period -> x <> "." + _ -> x <> "." + let exemplar = case numstyle of + Decimal -> "1" + UpperRoman -> "I" + LowerRoman -> "i" + UpperAlpha -> "A" + LowerAlpha -> "a" + Example -> "1" + DefaultStyle -> "1" let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel) - let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim - then empty - else "\\def" <> "\\label" <> enum <> - braces (todelim $ tostyle enum) + let stylecommand + | numstyle == DefaultStyle && numdelim == DefaultDelim = empty + | beamer = brackets (todelim exemplar) + | otherwise = "\\def" <> "\\label" <> enum <> + braces (todelim $ tostyle enum) let resetcounter = if start == 1 || oldlevel > 4 then empty else "\\setcounter" <> braces enum <> @@ -579,7 +702,8 @@ blockToLaTeX (DefinitionList lst) = do else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ "\\end{description}" -blockToLaTeX HorizontalRule = return $ +blockToLaTeX HorizontalRule = + return "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = True} @@ -587,33 +711,34 @@ blockToLaTeX (Header level (id',classes,_) lst) = do modify $ \s -> s{stInHeading = False} return hdr blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else do - contents <- (tableRowToLaTeX True aligns widths) heads - return ("\\toprule" $$ contents $$ "\\midrule") - let endhead = if all null heads - then empty - else text "\\endhead" - let endfirsthead = if all null heads - then empty - else text "\\endfirsthead" + let toHeaders hs = do contents <- tableRowToLaTeX True aligns widths hs + return ("\\toprule" $$ contents $$ "\\midrule") + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x captionText <- inlineListToLaTeX caption + firsthead <- if isEmpty captionText || all null heads + then return empty + else ($$ text "\\endfirsthead") <$> toHeaders heads + head' <- if all null heads + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else toHeaders (if isEmpty firsthead + then heads + else walk removeNote heads) let capt = if isEmpty captionText then empty - else text "\\caption" <> braces captionText <> "\\tabularnewline" - $$ headers - $$ endfirsthead + else text "\\caption" <> + braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let colDescriptors = text $ concat $ map toColDescriptor aligns + let colDescriptors = text $ concatMap toColDescriptor aligns modify $ \s -> s{ stTable = True } return $ "\\begin{longtable}[]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end $$ capt - $$ (if all null heads then "\\toprule" else empty) - $$ headers - $$ endhead + $$ firsthead + $$ head' + $$ "\\endhead" $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" @@ -626,14 +751,16 @@ toColDescriptor align = AlignCenter -> "c" AlignDefault -> "l" -blockListToLaTeX :: [Block] -> State WriterState Doc -blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst +blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc +blockListToLaTeX lst = + vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst -tableRowToLaTeX :: Bool +tableRowToLaTeX :: PandocMonad m + => Bool -> [Alignment] -> [Double] -> [[Block]] - -> State WriterState Doc + -> LW m Doc tableRowToLaTeX header aligns widths cols = do -- scale factor compensates for extra space between columns -- so the whole table isn't larger than columnwidth @@ -643,9 +770,9 @@ tableRowToLaTeX header aligns widths cols = do isSimple [] = True isSimple _ = False -- simple tables have to have simple cells: - let widths' = if not (all isSimple cols) + let widths' = if all (== 0) widths && not (all isSimple cols) then replicate (length aligns) - (0.97 / fromIntegral (length aligns)) + (scaleFactor / fromIntegral (length aligns)) else map (scaleFactor *) widths cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols return $ hsep (intersperse "&" cells) <> "\\tabularnewline" @@ -672,10 +799,10 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of -- math breaks in simple tables. displayMathToInline :: Inline -> Inline displayMathToInline (Math DisplayMath x) = Math InlineMath x -displayMathToInline x = x +displayMathToInline x = x -tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) - -> State WriterState Doc +tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block]) + -> LW m Doc tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks tableCellToLaTeX header (width, align, blocks) = do @@ -691,7 +818,7 @@ tableCellToLaTeX header (width, align, blocks) = do AlignDefault -> "\\raggedright" return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <> + (halign <> cr <> cellContents <> "\\strut" <> cr) <> "\\end{minipage}") $$ notesToLaTeX notes @@ -708,19 +835,22 @@ notesToLaTeX ns = (case length ns of $ map (\x -> "\\footnotetext" <> braces x) $ reverse ns) -listItemToLaTeX :: [Block] -> State WriterState Doc +listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc listItemToLaTeX lst -- we need to put some text before a header if it's the first -- element in an item. This will look ugly in LaTeX regardless, but -- this will keep the typesetter from throwing an error. - | ((Header _ _ _) :_) <- lst = - blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2) + | (Header{} :_) <- lst = + blockListToLaTeX lst >>= return . (text "\\item ~" $$) . nest 2 | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) . - (nest 2) + nest 2 -defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc +defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc defListItemToLaTeX (term, defs) = do + -- needed to turn off 'listings' because it breaks inside \item[...]: + modify $ \s -> s{stInItem = True} term' <- inlineListToLaTeX term + modify $ \s -> s{stInItem = False} -- put braces around term if it contains an internal link, -- since otherwise we get bad bracket interactions: \item[\hyperref[..] let isInternalLink (Link _ _ ('#':_,_)) = True @@ -730,32 +860,33 @@ defListItemToLaTeX (term, defs) = do else term' def' <- liftM vsep $ mapM blockListToLaTeX defs return $ case defs of - (((Header _ _ _) : _) : _) -> + ((Header{} : _) : _) -> "\\item" <> brackets term'' <> " ~ " $$ def' _ -> "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. -sectionHeader :: Bool -- True for unnumbered +sectionHeader :: PandocMonad m + => Bool -- True for unnumbered -> [Char] -> Int -> [Inline] - -> State WriterState Doc + -> LW m Doc sectionHeader unnumbered ident level lst = do txt <- inlineListToLaTeX lst plain <- stringToLaTeX TextString $ concatMap stringify lst - let removeInvalidInline (Note _) = [] + let removeInvalidInline (Note _) = [] removeInvalidInline (Span (id', _, _) _) | not (null id') = [] - removeInvalidInline (Image _ _ _) = [] - removeInvalidInline x = [x] + removeInvalidInline Image{} = [] + removeInvalidInline x = [x] let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst txtNoNotes <- inlineListToLaTeX lstNoNotes -- footnotes in sections don't work (except for starred variants) -- unless you specify an optional argument: -- \section[mysec]{mysec\footnote{blah}} - optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == [] + optional <- if unnumbered || lstNoNotes == lst || null lstNoNotes then return empty - else do + else return $ brackets txtNoNotes let contents = if render Nothing txt == plain then braces txt @@ -767,7 +898,8 @@ sectionHeader unnumbered ident level lst = do let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault then TopLevelChapter else writerTopLevelDivision opts - let level' = if writerBeamer opts && + beamer <- gets stBeamer + let level' = if beamer && topLevelDivision `elem` [TopLevelPart, TopLevelChapter] -- beamer has parts but no chapters then if level == 1 then -1 else level - 1 @@ -794,7 +926,8 @@ sectionHeader unnumbered ident level lst = do lab <- labelFor ident let star = if unnumbered && level' < 4 then text "*" else empty let stuffing = star <> optional <> contents - stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab + stuffing' <- hypertarget True ident $ + text ('\\':sectionType) <> stuffing <> lab return $ if level' > 5 then txt else prefix $$ stuffing' @@ -804,28 +937,28 @@ sectionHeader unnumbered ident level lst = do braces txtNoNotes else empty -hypertarget :: String -> Doc -> State WriterState Doc -hypertarget ident x = do +hypertarget :: PandocMonad m => Bool -> String -> Doc -> LW m Doc +hypertarget _ "" x = return x +hypertarget addnewline ident x = do ref <- text `fmap` toLabel ident - internalLinks <- gets stInternalLinks - return $ - if ident `elem` internalLinks - then text "\\hypertarget" + return $ text "\\hypertarget" <> braces ref - <> braces x - else x + <> braces ((if addnewline && not (isEmpty x) + then ("%" <> cr) + else empty) <> x) -labelFor :: String -> State WriterState Doc +labelFor :: PandocMonad m => String -> LW m Doc labelFor "" = return empty labelFor ident = do ref <- text `fmap` toLabel ident return $ text "\\label" <> braces ref -- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToLaTeX :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> LW m Doc inlineListToLaTeX lst = - mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) + mapM inlineToLaTeX (fixLineInitialSpaces lst) >>= return . hcat -- nonbreaking spaces (~) in LaTeX don't work after line breaks, -- so we turn nbsps after hard breaks to \hspace commands. @@ -837,43 +970,35 @@ inlineListToLaTeX lst = fixNbsps s = let (ys,zs) = span (=='\160') s in replicate (length ys) hspace ++ [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" - -- linebreaks after blank lines cause problems: - fixBreaks [] = [] - fixBreaks ys@(LineBreak : LineBreak : _) = - case span (== LineBreak) ys of - (lbs, rest) -> RawInline "latex" - ("\\\\[" ++ show (length lbs) ++ - "\\baselineskip]") : fixBreaks rest - fixBreaks (y:ys) = y : fixBreaks ys isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True -isQuoted _ = False +isQuoted _ = False -- | Convert inline element to LaTeX -inlineToLaTeX :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToLaTeX :: PandocMonad m + => Inline -- ^ Inline to convert + -> LW m Doc inlineToLaTeX (Span (id',classes,kvs) ils) = do - ref <- toLabel id' - let linkAnchor = if null id' - then empty - else "\\protect\\hypertarget" <> braces (text ref) <> - braces empty + linkAnchor <- hypertarget False id' empty + lang <- toLang $ lookup "lang" kvs let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ ["textnormal" | "csl-no-strong" `elem` classes || "csl-no-smallcaps" `elem` classes] ++ ["RL" | ("dir", "rtl") `elem` kvs] ++ ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + (case lang of + Just lng -> let (l, o) = toPolyglossia lng ops = if null o then "" else ("[" ++ o ++ "]") in ["text" ++ l ++ ops] Nothing -> []) contents <- inlineListToLaTeX ils - return $ linkAnchor <> - if null cmds - then braces contents - else foldr inCmd contents cmds + return $ (if null id' + then empty + else "\\protect" <> linkAnchor) <> + (if null cmds + then braces contents + else foldr inCmd contents cmds) inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -886,7 +1011,7 @@ inlineToLaTeX (Strikeout lst) = do return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = inlineListToLaTeX lst >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do +inlineToLaTeX (Subscript lst) = inlineListToLaTeX lst >>= return . inCmd "textsubscript" inlineToLaTeX (SmallCaps lst) = inlineListToLaTeX lst >>= return . inCmd "textsc" @@ -901,26 +1026,39 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions inHeading <- gets stInHeading + inItem <- gets stInItem + let listingsCode = do + let listingsopt = case getListingsLanguage classes of + Just l -> "[language=" ++ mbBraced l ++ "]" + Nothing -> "" + inNote <- gets stInNote + when inNote $ modify $ \s -> s{ stVerbInNote = True } + let chr = case "!\"&'()*,-./:;?@_" \\ str of + (c:_) -> c + [] -> '!' + let str' = escapeStringUsing (backslashEscapes "\\{}%~_&") str + -- we always put lstinline in a dummy 'passthrough' command + -- (defined in the default template) so that we don't have + -- to change the way we escape characters depending on whether + -- the lstinline is inside another command. See #1629: + return $ text $ "\\passthrough{\\lstinline" ++ listingsopt ++ [chr] ++ str' ++ [chr] ++ "}" + let rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) + $ stringToLaTeX CodeString str + where escapeSpaces = concatMap + (\c -> if c == ' ' then "\\ " else [c]) + let highlightCode = + case highlight (writerSyntaxMap opts) + formatLaTeXInline ("",classes,[]) str of + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + rawCode + Right h -> modify (\st -> st{ stHighlighting = True }) >> + return (text (T.unpack h)) case () of - _ | writerListings opts && not inHeading -> listingsCode - | writerHighlight opts && not (null classes) -> highlightCode + _ | writerListings opts && not (inHeading || inItem) -> listingsCode + | isJust (writerHighlightStyle opts) && not (null classes) + -> highlightCode | otherwise -> rawCode - where listingsCode = do - inNote <- gets stInNote - when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = case "!\"&'()*,-./:;?@_" \\ str of - (c:_) -> c - [] -> '!' - return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] - highlightCode = do - case highlight formatLaTeXInline ("",classes,[]) str of - Nothing -> rawCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (text (T.unpack h)) - rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) - $ stringToLaTeX CodeString str - where - escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -928,32 +1066,43 @@ inlineToLaTeX (Quoted qt lst) = do if csquotes then return $ "\\enquote" <> braces contents else do - let s1 = if (not (null lst)) && (isQuoted (head lst)) + let s1 = if not (null lst) && isQuoted (head lst) then "\\," else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) + let s2 = if not (null lst) && isQuoted (last lst) then "\\," else empty let inner = s1 <> contents <> s2 return $ case qt of DoubleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then text "``" <> inner <> text "''" else char '\x201C' <> inner <> char '\x201D' SingleQuote -> - if writerTeXLigatures opts + if isEnabled Ext_smart opts then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' -inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str -inlineToLaTeX (Math InlineMath str) = +inlineToLaTeX (Str str) = do + setEmptyLine False + liftM text $ stringToLaTeX TextString str +inlineToLaTeX (Math InlineMath str) = do + setEmptyLine False return $ "\\(" <> text str <> "\\)" -inlineToLaTeX (Math DisplayMath str) = +inlineToLaTeX (Math DisplayMath str) = do + setEmptyLine False return $ "\\[" <> text str <> "\\]" -inlineToLaTeX (RawInline f str) +inlineToLaTeX il@(RawInline f str) | f == Format "latex" || f == Format "tex" - = return $ text str - | otherwise = return empty -inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr + = do + setEmptyLine False + return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToLaTeX LineBreak = do + emptyLine <- gets stEmptyLine + setEmptyLine True + return $ (if emptyLine then "~" else "") <> "\\\\" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of @@ -982,19 +1131,32 @@ inlineToLaTeX (Link _ txt (src, _)) = src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' +inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do + report $ InlineNotRendered il + return empty inlineToLaTeX (Image attr _ (source, _)) = do + setEmptyLine False modify $ \s -> s{ stGraphics = True } opts <- gets stOptions let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of + in case dimension dir attr of Just (Pixel a) -> [d <> text (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] + [d <> text (showFl (a / 100)) <> + case dir of + Width -> "\\textwidth" + Height -> "\\textheight" + ] Just dim -> [d <> text (show dim)] Nothing -> - [] + case dir of + Width | isJust (dimension Height attr) -> + [d <> "\\textwidth"] + Height | isJust (dimension Width attr) -> + [d <> "\\textheight"] + _ -> [] dimList = showDim Width ++ showDim Height dims = if null dimList then empty @@ -1008,6 +1170,7 @@ inlineToLaTeX (Image attr _ (source, _)) = do (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> dims <> braces (text source'') inlineToLaTeX (Note contents) = do + setEmptyLine False inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents @@ -1016,9 +1179,9 @@ inlineToLaTeX (Note contents) = do (CodeBlock _ _ : _) -> cr _ -> empty let noteContents = nest 2 contents' <> optnl - opts <- gets stOptions + beamer <- gets stBeamer -- in beamer slides, display footnote from current overlay forward - let beamerMark = if writerBeamer opts + let beamerMark = if beamer then text "<.->" else empty modify $ \st -> st{ stNotes = noteContents : stNotes st } @@ -1035,8 +1198,12 @@ protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs where ltx = RawInline (Format "latex") protectCode (x : xs) = x : protectCode xs -citationsToNatbib :: [Citation] -> State WriterState Doc -citationsToNatbib (one:[]) +setEmptyLine :: PandocMonad m => Bool -> LW m () +setEmptyLine b = modify $ \st -> st{ stEmptyLine = b } + +citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc +citationsToNatbib + [one] = citeCommand c p s k where Citation { citationId = k @@ -1046,8 +1213,8 @@ citationsToNatbib (one:[]) } = one c = case m of - AuthorInText -> "citet" - SuppressAuthor -> "citeyearpar" + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" NormalCitation -> "citep" citationsToNatbib cits @@ -1056,9 +1223,11 @@ citationsToNatbib cits where noPrefix = all (null . citationPrefix) noSuffix = all (null . citationSuffix) - ismode m = all (((==) m) . citationMode) - p = citationPrefix $ head $ cits - s = citationSuffix $ last $ cits + ismode m = all ((==) m . citationMode) + p = citationPrefix $ + head cits + s = citationSuffix $ + last cits ks = intercalate ", " $ map citationId cits citationsToNatbib (c:cs) | citationMode c == AuthorInText = do @@ -1082,17 +1251,20 @@ citationsToNatbib cits = do SuppressAuthor -> citeCommand "citeyear" p s k NormalCitation -> citeCommand "citealp" p s k -citeCommand :: String -> [Inline] -> [Inline] -> String -> State WriterState Doc +citeCommand :: PandocMonad m + => String -> [Inline] -> [Inline] -> String -> LW m Doc citeCommand c p s k = do args <- citeArguments p s k return $ text ("\\" ++ c) <> args -citeArguments :: [Inline] -> [Inline] -> String -> State WriterState Doc +citeArguments :: PandocMonad m + => [Inline] -> [Inline] -> String -> LW m Doc citeArguments p s k = do let s' = case s of - (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r + (Str + [x] : r) | isPunctuation x -> dropWhile (== Space) r (Str (x:xs) : r) | isPunctuation x -> Str xs : r - _ -> s + _ -> s pdoc <- inlineListToLaTeX p sdoc <- inlineListToLaTeX s' let optargs = case (isEmpty pdoc, isEmpty sdoc) of @@ -1101,8 +1273,9 @@ citeArguments p s k = do (_ , _ ) -> brackets pdoc <> brackets sdoc return $ optargs <> braces (text k) -citationsToBiblatex :: [Citation] -> State WriterState Doc -citationsToBiblatex (one:[]) +citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc +citationsToBiblatex + [one] = citeCommand cmd p s k where Citation { citationId = k @@ -1133,15 +1306,20 @@ citationsToBiblatex _ = return empty -- Determine listings language from list of class attributes. getListingsLanguage :: [String] -> Maybe String -getListingsLanguage [] = Nothing -getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: String -> String +mbBraced x = if not (all isAlphaNum x) + then "{" <> x <> "}" + else x -- Extract a key from divs and spans extract :: String -> Block -> [String] extract key (Div attr _) = lookKey key attr -extract key (Plain ils) = concatMap (extractInline key) ils -extract key (Para ils) = concatMap (extractInline key) ils -extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract key (Plain ils) = query (extractInline key) ils +extract key (Para ils) = query (extractInline key) ils +extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans @@ -1154,85 +1332,95 @@ lookKey :: String -> Attr -> [String] lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv :: Lang -> (String, String) toPolyglossiaEnv l = - case toPolyglossia $ (splitBy (=='-')) l of + case toPolyglossia l of ("arabic", o) -> ("Arabic", o) x -> x -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: [String] -> (String, String) -toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") -toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") -toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") -toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") -toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") -toPolyglossia ("de":"1901":_) = ("german", "spelling=old") -toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") -toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") -toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") -toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") -toPolyglossia ("de":_) = ("german", "") -toPolyglossia ("dsb":_) = ("lsorbian", "") -toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") -toPolyglossia ("en":"AU":_) = ("english", "variant=australian") -toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") -toPolyglossia ("en":"GB":_) = ("english", "variant=british") -toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") -toPolyglossia ("en":"UK":_) = ("english", "variant=british") -toPolyglossia ("en":"US":_) = ("english", "variant=american") -toPolyglossia ("grc":_) = ("greek", "variant=ancient") -toPolyglossia ("hsb":_) = ("usorbian", "") -toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") -toPolyglossia ("sl":_) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia :: Lang -> (String, String) +toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ "AT" vars) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ "CH" vars) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: [String] -> String -toBabel ("de":"1901":_) = "german" -toBabel ("de":"AT":"1901":_) = "austrian" -toBabel ("de":"AT":_) = "naustrian" -toBabel ("de":"CH":"1901":_) = "swissgerman" -toBabel ("de":"CH":_) = "nswissgerman" -toBabel ("de":_) = "ngerman" -toBabel ("dsb":_) = "lowersorbian" -toBabel ("el":"polyton":_) = "polutonikogreek" -toBabel ("en":"AU":_) = "australian" -toBabel ("en":"CA":_) = "canadian" -toBabel ("en":"GB":_) = "british" -toBabel ("en":"NZ":_) = "newzealand" -toBabel ("en":"UK":_) = "british" -toBabel ("en":"US":_) = "american" -toBabel ("fr":"CA":_) = "canadien" -toBabel ("fra":"aca":_) = "acadian" -toBabel ("grc":_) = "polutonikogreek" -toBabel ("hsb":_) = "uppersorbian" -toBabel ("la":"x":"classic":_) = "classiclatin" -toBabel ("sl":_) = "slovene" -toBabel x = commonFromBcp47 x +toBabel :: Lang -> String +toBabel (Lang "de" _ "AT" vars) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ "CH" vars) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ "AU" _) = "australian" +toBabel (Lang "en" _ "CA" _) = "canadian" +toBabel (Lang "en" _ "GB" _) = "british" +toBabel (Lang "en" _ "NZ" _) = "newzealand" +toBabel (Lang "en" _ "UK" _) = "british" +toBabel (Lang "en" _ "US" _) = "american" +toBabel (Lang "fr" _ "CA" _) = "canadien" +toBabel (Lang "fra" _ _ vars) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "sl" _ _ _) = "slovene" +toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: [String] -> String -commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazil" +commonFromBcp47 :: Lang -> String +commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. -commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" -commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" -commonFromBcp47 x = fromIso $ head x +commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" +commonFromBcp47 (Lang "zh" "Latn" _ vars) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" @@ -1316,10 +1504,6 @@ commonFromBcp47 x = fromIso $ head x fromIso "vi" = "vietnamese" fromIso _ = "" -deNote :: Inline -> Inline -deNote (Note _) = RawInline (Format "latex") "" -deNote x = x - pDocumentOptions :: P.Parsec String () [String] pDocumentOptions = do P.char '[' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 98b08b08b..1be955fe3 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2015 John MacFarlane + Copyright : Copyright (C) 2007-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,79 +30,95 @@ Conversion of 'Pandoc' documents to groff man page format. -} module Text.Pandoc.Writers.Man ( writeMan) where +import Control.Monad.State.Strict +import Data.List (intercalate, intersperse, sort, stripPrefix) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder (deleteMeta) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Templates -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Readers.TeXMath -import Text.Printf ( printf ) -import Data.List ( stripPrefix, intersperse, intercalate ) -import Data.Maybe (fromMaybe) import Text.Pandoc.Pretty -import Text.Pandoc.Builder (deleteMeta) -import Control.Monad.State +import Text.Pandoc.Shared +import Text.Pandoc.Templates +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) type Notes = [[Block]] -data WriterState = WriterState { stNotes :: Notes - , stHasTables :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stFontFeatures :: Map.Map Char Bool + , stHasTables :: Bool } + +defaultWriterState :: WriterState +defaultWriterState = WriterState { stNotes = [] + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + , stHasTables = False } -- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) +writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeMan opts document = + evalStateT (pandocToMan opts document) defaultWriterState -- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState String +pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m Text pandocToMan opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let render' = render colwidth + let render' :: Doc -> Text + render' = render colwidth titleText <- inlineListToMan opts $ docTitle meta let title' = render' titleText let setFieldsFromTitle = - case break (== ' ') title' of - (cmdName, rest) -> case break (=='(') cmdName of - (xs, '(':ys) | not (null ys) && - last ys == ')' -> + case T.break (== ' ') title' of + (cmdName, rest) -> case T.break (=='(') cmdName of + (xs, ys) | "(" `T.isPrefixOf` ys + && ")" `T.isSuffixOf` ys -> defField "title" xs . - defField "section" (init ys) . - case splitBy (=='|') rest of + defField "section" (T.init $ T.drop 1 ys) . + case T.splitOn "|" rest of (ft:hds) -> - defField "footer" (trim ft) . + defField "footer" (T.strip ft) . defField "header" - (trim $ concat hds) + (T.strip $ mconcat hds) [] -> id _ -> defField "title" title' metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMan opts) - (fmap (render colwidth) . inlineListToMan opts) + (fmap render' . blockListToMan opts) + (fmap render' . inlineListToMan opts) $ deleteMeta "title" meta body <- blockListToMan opts blocks - notes <- liftM stNotes get + notes <- gets stNotes notes' <- notesToMan opts (reverse notes) let main = render' $ body $$ notes' $$ text "" - hasTables <- liftM stHasTables get + hasTables <- gets stHasTables let context = defField "body" main $ setFieldsFromTitle $ defField "has-tables" hasTables $ defField "hyphenate" True - $ defField "pandoc-version" pandocVersion - $ metadata + $ defField "pandoc-version" pandocVersion metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc notesToMan opts notes = if null notes then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + else zipWithM (noteToMan opts) [1..] notes >>= return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc noteToMan opts num note = do contents <- blockListToMan opts note let marker = cr <> text ".SS " <> brackets (text (show num)) @@ -123,11 +140,11 @@ escapeString = escapeStringUsing manEscapes -- | Escape a literal (code) section for Man. escapeCode :: String -> String -escapeCode = concat . intersperse "\n" . map escapeLine . lines where +escapeCode = intercalate "\n" . map escapeLine . lines where escapeLine codeline = case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of a@('.':_) -> "\\&" ++ a - b -> b + b -> b -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -139,8 +156,8 @@ breakSentence [] = ([],[]) breakSentence xs = let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline (LineBreak) = True - isSentenceEndInline _ = False + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False (as, bs) = break isSentenceEndInline xs in case bs of [] -> (as, []) @@ -160,9 +177,10 @@ splitSentences xs = in if null rest then [sent] else sent : splitSentences rest -- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc +blockToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> StateT WriterState m Doc blockToMan _ Null = return empty blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = @@ -173,9 +191,11 @@ blockToMan opts (Para inlines) = do return $ text ".PP" $$ contents blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns -blockToMan _ (RawBlock f str) +blockToMan _ b@(RawBlock f str) | f == Format "man" = return $ text str - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines @@ -205,12 +225,12 @@ blockToMan opts (Table caption alignments widths headers rows) = then repeat "" else map (printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ intercalate " " + let coldescriptions = text $ unwords (zipWith (\align width -> aligncode align ++ width) alignments iwidths) ++ "." colheadings <- mapM (blockListToMan opts) headers let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ + vcat (intersperse (text "T}@T{") cols) $$ text "T}" let colheadings' = if all null headers then empty @@ -227,7 +247,8 @@ blockToMan opts (BulletList items) = do return (vcat contents) blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + (maximum $ map length markers) + let indent = 1 + + maximum (map length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) @@ -236,11 +257,11 @@ blockToMan opts (DefinitionList items) = do return (vcat contents) -- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = - bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do +bulletListItemToMan opts (Para first:rest) = + bulletListItemToMan opts (Plain first:rest) +bulletListItemToMan opts (Plain first:rest) = do first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest let first'' = text ".IP \\[bu] 2" $$ first' @@ -254,14 +275,15 @@ bulletListItemToMan opts (first:rest) = do return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" -- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc +orderedListItemToMan :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = - orderedListItemToMan opts num indent ((Plain first):rest) +orderedListItemToMan opts num indent (Para first:rest) = + orderedListItemToMan opts num indent (Plain first:rest) orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest @@ -273,44 +295,47 @@ orderedListItemToMan opts num indent (first:rest) = do return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc +definitionListItemToMan :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> StateT WriterState m Doc definitionListItemToMan opts (label, defs) = do labelText <- inlineListToMan opts label contents <- if null defs then return empty - else liftM vcat $ forM defs $ \blocks -> do - let (first, rest) = case blocks of - ((Para x):y) -> (Plain x,y) - (x:y) -> (x,y) - [] -> error "blocks is null" - rest' <- liftM vcat $ - mapM (\item -> blockToMan opts item) rest - first' <- blockToMan opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + else liftM vcat $ forM defs $ \blocks -> + case blocks of + (x:xs) -> do + first' <- blockToMan opts $ + case x of + Para y -> Plain y + _ -> x + rest' <- liftM vcat $ mapM + (\item -> blockToMan opts item) xs + return $ first' $$ + text ".RS" $$ rest' $$ text ".RE" + [] -> return empty return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToMan :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> StateT WriterState m Doc blockListToMan opts blocks = mapM (blockToMan opts) blocks >>= (return . vcat) -- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc inlineToMan opts (Span _ ils) = inlineListToMan opts ils -inlineToMan opts (Emph lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[B]" <> contents <> text "\\f[]" +inlineToMan opts (Emph lst) = + withFontFeature 'I' (inlineListToMan opts lst) +inlineToMan opts (Strong lst) = + withFontFeature 'B' (inlineListToMan opts lst) inlineToMan opts (Strikeout lst) = do contents <- inlineListToMan opts lst return $ text "[STRIKEOUT:" <> contents <> char ']' @@ -330,22 +355,26 @@ inlineToMan opts (Quoted DoubleQuote lst) = do inlineToMan opts (Cite _ lst) = inlineListToMan opts lst inlineToMan _ (Code _ str) = - return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" + withFontFeature 'C' (return (text $ escapeCode str)) inlineToMan _ (Str str@('.':_)) = return $ afterBreak "\\&" <> text (escapeString str) inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = - inlineListToMan opts $ texMathToInlines InlineMath str + lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ texMathToInlines DisplayMath str + contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (RawInline f str) +inlineToMan _ il@(RawInline f str) | f == Format "man" = return $ text str - | otherwise = return empty -inlineToMan _ (LineBreak) = return $ + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToMan _ LineBreak = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space +inlineToMan opts (Link _ txt ('#':_, _)) = + inlineListToMan opts txt -- skip internal links inlineToMan opts (Link _ txt (src, _)) = do linktext <- inlineListToMan opts txt let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) @@ -355,7 +384,7 @@ inlineToMan opts (Link _ txt (src, _)) = do char '<' <> text srcSuffix <> char '>' _ -> linktext <> text " (" <> text src <> char ')' inlineToMan opts (Image attr alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || + let txt = if null alternate || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate @@ -364,6 +393,24 @@ inlineToMan opts (Image attr alternate (source, tit)) = do inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } - notes <- liftM stNotes get - let ref = show $ (length notes) + notes <- gets stNotes + let ref = show (length notes) return $ char '[' <> text ref <> char ']' + +fontChange :: PandocMonad m => StateT WriterState m Doc +fontChange = do + features <- gets stFontFeatures + let filling = sort [c | (c,True) <- Map.toList features] + return $ text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m + => Char + -> StateT WriterState m Doc + -> StateT WriterState m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index e3bb3eea0..cdd8f3b66 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,47 +34,50 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) -import Data.Maybe (fromMaybe) -import Data.Monoid (Any(..)) -import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace, isPunctuation, ord, chr ) -import Data.Ord ( comparing ) -import Text.Pandoc.Pretty import Control.Monad.Reader -import Control.Monad.State -import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (texMathToInlines) -import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) -import Network.URI (isURI) +import Control.Monad.State.Strict +import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum) import Data.Default -import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H -import qualified Data.Vector as V -import qualified Data.Text as T +import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose) +import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import Data.Monoid (Any (..)) +import Data.Ord (comparing) import qualified Data.Set as Set -import Network.HTTP ( urlEncode ) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Vector as V +import Data.Yaml (Value (Array, Bool, Number, Object, String)) +import Network.HTTP (urlEncode) +import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Walk +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.Writers.Shared type Notes = [[Block]] -type Ref = ([Inline], Target, Attr) +type Ref = (Doc, Target, Attr) type Refs = [Ref] -type MD = ReaderT WriterEnv (State WriterState) +type MD m = ReaderT WriterEnv (StateT WriterState m) -evalMD :: MD a -> WriterEnv -> WriterState -> a -evalMD md env st = evalState (runReaderT md env) st +evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a +evalMD md env st = evalStateT (runReaderT md env) st -data WriterEnv = WriterEnv { envInList :: Bool - , envPlain :: Bool +data WriterEnv = WriterEnv { envInList :: Bool + , envPlain :: Bool , envRefShortcutable :: Bool , envBlockLevel :: Int - , envEscapeSpaces :: Bool + , envEscapeSpaces :: Bool } instance Default WriterEnv @@ -82,21 +88,26 @@ instance Default WriterEnv , envEscapeSpaces = False } -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: Set.Set String +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int + , stIds :: Set.Set String , stNoteNum :: Int } instance Default WriterState where def = WriterState{ stNotes = [] , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 , stIds = Set.empty , stNoteNum = 1 } -- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMarkdown opts document = evalMD (pandocToMarkdown opts{ writerWrapText = if isEnabled Ext_hard_line_breaks opts @@ -106,7 +117,7 @@ writeMarkdown opts document = -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). -writePlain :: WriterOptions -> Pandoc -> String +writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m Text writePlain opts document = evalMD (pandocToMarkdown opts document) def{ envPlain = True } def @@ -155,8 +166,7 @@ jsonToYaml (Object hashmap) = | otherwise -> (k' <> ":") $$ x (k', Object _, x) -> (k' <> ":") $$ nest 2 x (_, String "", _) -> empty - (k', _, x) | k == "meta-json" -> empty - | otherwise -> k' <> ":" <> space <> hang 2 "" x) + (k', _, x) -> k' <> ":" <> space <> hang 2 "" x) $ sortBy (comparing fst) $ H.toList hashmap jsonToYaml (Array vec) = vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec @@ -171,15 +181,17 @@ jsonToYaml (Number n) = text $ show n jsonToYaml _ = empty -- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> MD String +pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m Text pandocToMarkdown opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing isPlain <- asks envPlain - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMarkdown opts) - (fmap (render colwidth) . inlineListToMarkdown opts) + let render' :: Doc -> Text + render' = render colwidth . chomp + metadata <- metaToJSON' + (fmap render' . blockListToMarkdown opts) + (fmap render' . blockToMarkdown opts . Plain) meta let title' = maybe empty text $ getField "title" metadata let authors' = maybe [] (map text) $ getField "author" metadata @@ -196,49 +208,51 @@ pandocToMarkdown opts (Pandoc meta blocks) = do | otherwise -> empty Nothing -> empty let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks - else empty + toc <- if writerTableOfContents opts + then render' <$> tableOfContents opts headerBlocks + else return "" -- Strip off final 'references' header if markdown citations enabled let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks + _ -> blocks else blocks body <- blockListToMarkdown opts blocks' notesAndRefs' <- notesAndRefs opts - let render' :: Doc -> String - render' = render colwidth let main = render' $ body <> notesAndRefs' - let context = defField "toc" (render' toc) + let context = -- for backwards compatibility we populate toc + -- with the contents of the toc, rather than a + -- boolean: + defField "toc" toc + $ defField "table-of-contents" toc $ defField "body" main $ (if isNullMeta meta then id else defField "titleblock" (render' titleblock)) - $ metadata + $ addVariablesToJSON opts metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return markdown representation of reference key table. -refsToMarkdown :: WriterOptions -> Refs -> MD Doc +refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions +keyToMarkdown :: PandocMonad m + => WriterOptions -> Ref - -> MD Doc -keyToMarkdown opts (label, (src, tit), attr) = do - label' <- inlineListToMarkdown opts label + -> MD m Doc +keyToMarkdown opts (label', (src, tit), attr) = do let tit' = if null tit then empty else space <> "\"" <> text tit <> "\"" return $ nest 2 $ hang 2 ("[" <> label' <> "]:" <> space) (text src <> tit') - <> linkAttributes opts attr + <+> linkAttributes opts attr -- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> MD Doc +notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc notesToMarkdown opts notes = do n <- gets stNoteNum notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes) @@ -246,7 +260,7 @@ notesToMarkdown opts notes = do return $ vsep notes' -- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> MD Doc +noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks let num' = text $ writerIdentifierPrefix opts ++ show num @@ -256,45 +270,65 @@ noteToMarkdown opts num blocks = do let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " return $ if isEnabled Ext_footnotes opts then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents -- | Escape special characters for Markdown. escapeString :: WriterOptions -> String -> String -escapeString opts = escapeStringUsing markdownEscapes - where markdownEscapes = ('<', "<") : ('>', ">") : - backslashEscapes specialChars - specialChars = - (if isEnabled Ext_superscript opts - then ('^':) - else id) . - (if isEnabled Ext_subscript opts - then ('~':) - else id) . - (if isEnabled Ext_tex_math_dollars opts - then ('$':) - else id) $ - "\\`*_[]#" +escapeString _ [] = [] +escapeString opts (c:cs) = + case c of + '<' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '<' : escapeString opts cs + | otherwise -> "<" ++ escapeString opts cs + '>' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '>' : escapeString opts cs + | otherwise -> ">" ++ escapeString opts cs + '@' | isEnabled Ext_citations opts -> + case cs of + (d:_) + | isAlphaNum d || d == '_' + -> '\\':'@':escapeString opts cs + _ -> '@':escapeString opts cs + _ | c `elem` ['\\','`','*','_','[',']','#'] -> + '\\':c:escapeString opts cs + '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':escapeString opts cs + '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs + '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs + '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString opts cs + _ -> '-':escapeString opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest + _ -> '.':escapeString opts cs + _ -> c : escapeString opts cs -- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc -tableOfContents opts headers = - let opts' = opts { writerIgnoreNotes = True } - contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers - in evalMD (blockToMarkdown opts' contents) def def +tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc +tableOfContents opts headers = do + contents <- BulletList <$> mapM (elementToListItem opts) (hierarchicalize headers) + blockToMarkdown opts contents -- | Converts an Element to a list item for a table of contents, -elementToListItem :: WriterOptions -> Element -> [Block] +elementToListItem :: PandocMonad m => WriterOptions -> Element -> MD m [Block] elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs) - = Plain headerLink : - [ BulletList (map (elementToListItem opts) subsecs) | - not (null subsecs) && lev < writerTOCDepth opts ] - where headerLink = if null ident - then headerText - else [Link nullAttr headerText ('#':ident, "")] -elementToListItem _ (Blk _) = [] + = do isPlain <- asks envPlain + let headerLink = if null ident || isPlain + then walk deNote headerText + else [Link nullAttr (walk deNote headerText) + ('#':ident, "")] + listContents <- if null subsecs || lev >= writerTOCDepth opts + then return [] + else mapM (elementToListItem opts) subsecs + return [Plain headerLink, BulletList listContents] +elementToListItem _ (Blk _) = return [] attrsToMarkdown :: Attr -> Doc attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] @@ -331,10 +365,10 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker beginsWithOrderedListMarker :: String -> Bool beginsWithOrderedListMarker str = case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True + Left _ -> False + Right _ -> True -notesAndRefs :: WriterOptions -> MD Doc +notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc notesAndRefs opts = do notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts modify $ \s -> s { stNotes = [] } @@ -345,16 +379,17 @@ notesAndRefs opts = do if | writerReferenceLocation opts == EndOfDocument -> empty | isEmpty notes' && isEmpty refs' -> empty | otherwise -> blankline - + return $ (if isEmpty notes' then empty else blankline <> notes') <> (if isEmpty refs' then empty else blankline <> refs') <> endSpacing -- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options +blockToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> Block -- ^ Block element - -> MD Doc + -> MD m Doc blockToMarkdown opts blk = local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $ do doc <- blockToMarkdown' opts blk @@ -363,17 +398,26 @@ blockToMarkdown opts blk = then notesAndRefs opts >>= (\d -> return $ doc <> d) else return doc -blockToMarkdown' :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> MD Doc +blockToMarkdown' :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> MD m Doc blockToMarkdown' _ Null = return empty blockToMarkdown' opts (Div attrs ils) = do contents <- blockListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts && - isEnabled Ext_markdown_in_html_blocks opts - then tagWithAttrs "div" attrs <> blankline <> - contents <> blankline <> "</div>" <> blankline - else contents <> blankline + return $ + case () of + _ | isEnabled Ext_fenced_divs opts && + attrs /= nullAttr -> + nowrap (text ":::" <+> attrsToMarkdown attrs) $$ + chomp contents $$ + text ":::" <> blankline + | isEnabled Ext_native_divs opts || + (isEnabled Ext_raw_html opts && + isEnabled Ext_markdown_in_html_blocks opts) -> + tagWithAttrs "div" attrs <> blankline <> + contents <> blankline <> "</div>" <> blankline + | otherwise -> contents <> blankline blockToMarkdown' opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker @@ -382,13 +426,28 @@ blockToMarkdown' opts (Plain inlines) = do then Just $ writerColumns opts else Nothing let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] - let contents' = if isEnabled Ext_all_symbols_escapable opts && - not isPlain && beginsWithOrderedListMarker rendered - then text $ escapeDelimiter rendered - else contents + let escapeMarker (x:xs) | x `elem` (".()" :: String) = '\\':x:xs + | otherwise = x : escapeMarker xs + escapeMarker [] = [] + let contents' = + case rendered of + '%':_ | isEnabled Ext_pandoc_title_block opts && + isEnabled Ext_all_symbols_escapable opts -> + "\\" <> contents + '+':s:_ | not isPlain && isSpace s -> "\\" <> contents + '*':s:_ | not isPlain && isSpace s -> "\\" <> contents + '-':s:_ | not isPlain && isSpace s -> "\\" <> contents + '+':[] | not isPlain -> "\\" <> contents + '*':[] | not isPlain -> "\\" <> contents + '-':[] | not isPlain -> "\\" <> contents + '|':_ | (isEnabled Ext_line_blocks opts || + isEnabled Ext_pipe_tables opts) + && isEnabled Ext_all_symbols_escapable opts + -> "\\" <> contents + _ | not isPlain && beginsWithOrderedListMarker rendered + && isEnabled Ext_all_symbols_escapable opts + -> text $ escapeMarker rendered + | otherwise -> contents return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = @@ -401,9 +460,11 @@ blockToMarkdown' opts (LineBlock lns) = mdLines <- mapM (inlineListToMarkdown opts) lns return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns -blockToMarkdown' opts (RawBlock f str) - | f == "markdown" = return $ text str <> text "\n" - | f == "html" && isEnabled Ext_raw_html opts = do +blockToMarkdown' opts b@(RawBlock f str) + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + = return $ text str <> text "\n" + | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do plain <- asks envPlain return $ if plain then empty @@ -415,7 +476,9 @@ blockToMarkdown' opts (RawBlock f str) return $ if plain then empty else text str <> text "\n" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToMarkdown' opts HorizontalRule = do return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown' opts (Header level attr inlines) = do @@ -442,6 +505,8 @@ blockToMarkdown' opts (Header level attr inlines) = do space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ + -- ensure no newlines; see #3736 + walk lineBreakToSpace $ if level == 1 && plain then capitalize inlines else inlines @@ -497,39 +562,70 @@ blockToMarkdown' opts (BlockQuote blocks) = do contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do + let numcols = maximum (length aligns : length widths : + map length (headers:rows)) caption' <- inlineListToMarkdown opts caption let caption'' = if null caption || not (isEnabled Ext_table_captions opts) - then empty - else blankline <> ": " <> caption' <> blankline - rawHeaders <- mapM (blockListToMarkdown opts) headers - rawRows <- mapM (mapM (blockListToMarkdown opts)) rows + then blankline + else blankline $$ (": " <> caption') $$ blankline let isLineBreak LineBreak = Any True isLineBreak _ = Any False - let isSimple = all (==0) widths && - not ( getAny (query isLineBreak (headers:rows)) ) + let hasLineBreak = getAny . query isLineBreak + let isSimpleCell [Plain ils] = not (hasLineBreak ils) + isSimpleCell [Para ils ] = not (hasLineBreak ils) + isSimpleCell [] = True + isSimpleCell _ = False + let hasSimpleCells = all isSimpleCell (concat (headers:rows)) + let isSimple = hasSimpleCells && all (==0) widths let isPlainBlock (Plain _) = True isPlainBlock _ = False let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) - (nst,tbl) <- case True of - _ | isSimple && - isEnabled Ext_simple_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isSimple && - isEnabled Ext_pipe_tables opts -> fmap (id,) $ - pipeTable (all null headers) aligns rawHeaders rawRows - | not hasBlocks && - isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_grid_tables opts -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_raw_html opts -> fmap (id,) $ - return $ text $ writeHtmlString def - $ Pandoc nullMeta [t] - | otherwise -> return $ (id, text "[TABLE]") - return $ nst $ tbl $$ blankline $$ caption'' $$ blankline + let padRow r = case numcols - length r of + x | x > 0 -> r ++ replicate x empty + | otherwise -> r + let aligns' = case numcols - length aligns of + x | x > 0 -> aligns ++ replicate x AlignDefault + | otherwise -> aligns + let widths' = case numcols - length widths of + x | x > 0 -> widths ++ replicate x 0.0 + | otherwise -> widths + (nst,tbl) <- + case True of + _ | isSimple && + isEnabled Ext_simple_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (nest 2,) <$> pandocTable opts False (all null headers) + aligns' widths' rawHeaders rawRows + | isSimple && + isEnabled Ext_pipe_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows + | not hasBlocks && + isEnabled Ext_multiline_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (nest 2,) <$> pandocTable opts True (all null headers) + aligns' widths' rawHeaders rawRows + | isEnabled Ext_grid_tables opts && + writerColumns opts >= 8 * numcols -> (id,) <$> + gridTable opts blockListToMarkdown + (all null headers) aligns' widths' headers rows + | isEnabled Ext_raw_html opts -> fmap (id,) $ + (text . T.unpack) <$> + (writeHtml5String def $ Pandoc nullMeta [t]) + | hasSimpleCells && + isEnabled Ext_pipe_tables opts -> do + rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers + rawRows <- mapM (fmap padRow . mapM (blockListToMarkdown opts)) + rows + (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows + | otherwise -> return $ (id, text "[TABLE]") + return $ nst $ tbl $$ caption'' $$ blankline blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline @@ -550,7 +646,7 @@ blockToMarkdown' opts (DefinitionList items) = do contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline -inList :: MD a -> MD a +inList :: Monad m => MD m a -> MD m a inList p = local (\env -> env {envInList = True}) p addMarkdownAttribute :: String -> String @@ -562,7 +658,7 @@ addMarkdownAttribute s = x /= "markdown"] _ -> s -pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD Doc +pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc pipeTable headless aligns rawHeaders rawRows = do let sp = text " " let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty @@ -590,9 +686,10 @@ pipeTable headless aligns rawHeaders rawRows = do let body = vcat $ map torow rawRows return $ header $$ border $$ body -pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc -pandocTable opts headless aligns widths rawHeaders rawRows = do +pandocTable :: PandocMonad m + => WriterOptions -> Bool -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> MD m Doc +pandocTable opts multiline headless aligns widths rawHeaders rawRows = do let isSimple = all (==0) widths let alignHeader alignment = case alignment of AlignLeft -> lblock @@ -609,23 +706,21 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do let minNumChars = (+ 2) . maximum . map minOffset let columns = transpose (rawHeaders : rawRows) -- minimal column width without wrapping a single word - let noWordWrapWidth - | writerWrapText opts == WrapAuto - = fromIntegral $ maximum (map minNumChars columns) - | otherwise = fromIntegral $ maximum (map numChars columns) - let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w) - (noWordWrapWidth * w / minimum widths) + let relWidth w col = + max (floor $ fromIntegral (writerColumns opts - 1) * w) + (if writerWrapText opts == WrapAuto + then minNumChars col + else numChars col) let widthsInChars | isSimple = map numChars columns - | otherwise = map relWidth widths + | otherwise = zipWith relWidth widths columns let makeRow = hcat . intersperse (lblock 1 (text " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows let head' = makeRow rawHeaders - let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 + let border = if multiline then text (replicate (sum widthsInChars + length widthsInChars - 1) '-') else if headless @@ -634,7 +729,7 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do let head'' = if headless then empty else border <> cr <> head' - let body = if maxRowHeight > 1 + let body = if multiline then vsep rows' else vcat rows' let bottom = if headless @@ -642,62 +737,15 @@ pandocTable opts headless aligns widths rawHeaders rawRows = do else border return $ head'' $$ underline $$ body $$ bottom -gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD Doc -gridTable opts headless aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map - ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let borderpart ch align widthInChars = - let widthInChars' = if widthInChars < 1 then 1 else widthInChars - in (if (align == AlignLeft || align == AlignCenter) - then char ':' - else char ch) <> - text (replicate widthInChars' ch) <> - (if (align == AlignRight || align == AlignCenter) - then char ':' - else char ch) - let border ch aligns' widthsInChars' = - char '+' <> - hcat (intersperse (char '+') (zipWith (borderpart ch) - aligns' widthsInChars')) <> char '+' - let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars) - rows' - let head'' = if headless - then empty - else head' $$ border '=' aligns widthsInChars - if headless - then return $ - border '-' aligns widthsInChars $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - else return $ - border '-' (repeat AlignDefault) widthsInChars $$ - head'' $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - itemEndsWithTightList :: [Block] -> Bool itemEndsWithTightList bs = case bs of [Plain _, BulletList xs] -> isTightList xs [Plain _, OrderedList _ xs] -> isTightList xs - _ -> False + _ -> False -- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> MD Doc +bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc bulletListItemToMarkdown opts bs = do contents <- blockListToMarkdown opts bs let sps = replicate (writerTabStop opts - 2) ' ' @@ -709,15 +757,16 @@ bulletListItemToMarkdown opts bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options +orderedListItemToMarkdown :: PandocMonad m + => WriterOptions -- ^ options -> String -- ^ list item marker -> [Block] -- ^ list item (list of blocks) - -> MD Doc + -> MD m Doc orderedListItemToMarkdown opts marker bs = do contents <- blockListToMarkdown opts bs let sps = case length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " let start = text marker <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs @@ -726,9 +775,10 @@ orderedListItemToMarkdown opts marker bs = do return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions +definitionListItemToMarkdown :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> MD Doc + -> MD m Doc definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label defs' <- mapM (mapM (blockToMarkdown opts)) defs @@ -739,7 +789,7 @@ definitionListItemToMarkdown opts (label, defs) = do let leader = if isPlain then " " else ": " let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' - _ -> text " " + _ -> text " " if isEnabled Ext_compact_definition_lists opts then do let contents = vcat $ map (\d -> hang tabStop (leader <> sps) @@ -750,7 +800,7 @@ definitionListItemToMarkdown opts (label, defs) = do $ vcat d <> cr) defs' let isTight = case defs of ((Plain _ : _): _) -> True - _ -> False + _ -> False return $ blankline <> nowrap labelText <> (if isTight then cr else blankline) <> contents <> blankline else do @@ -758,75 +808,129 @@ definitionListItemToMarkdown opts (label, defs) = do vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options +blockListToMarkdown :: PandocMonad m + => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> MD Doc -blockListToMarkdown opts blocks = + -> MD m Doc +blockListToMarkdown opts blocks = do + inlist <- asks envInList + isPlain <- asks envPlain + -- a) insert comment between list and indented code block, or the + -- code block will be treated as a list continuation paragraph + -- b) change Plain to Para unless it's followed by a RawBlock + -- or has a list as its parent (#3487) + let fixBlocks (b : CodeBlock attr x : rest) + | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (Plain ils : bs@(RawBlock{}:_)) = + Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs) | inlist = + Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs) = + Para ils : fixBlocks bs + fixBlocks (x : xs) = x : fixBlocks xs + fixBlocks [] = [] + isListBlock (BulletList _) = True + isListBlock (OrderedList _ _) = True + isListBlock (DefinitionList _) = True + isListBlock _ = False + commentSep = if isPlain + then Null + else if isEnabled Ext_raw_html opts + then RawBlock "html" "<!-- -->\n" + else RawBlock "markdown" " \n" mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat - -- insert comment between list and indented code block, or the - -- code block will be treated as a list continuation paragraph - where fixBlocks (b : CodeBlock attr x : rest) - | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = b : commentSep : CodeBlock attr x : - fixBlocks rest - fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (x : xs) = x : fixBlocks xs - fixBlocks [] = [] - isListBlock (BulletList _) = True - isListBlock (OrderedList _ _) = True - isListBlock (DefinitionList _) = True - isListBlock _ = False - commentSep = if isEnabled Ext_raw_html opts - then RawBlock "html" "<!-- -->\n" - else RawBlock "markdown" " " + +getKey :: Doc -> Key +getKey = toKey . render Nothing -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. -getReference :: Attr -> [Inline] -> Target -> MD [Inline] +getReference :: PandocMonad m => Attr -> Doc -> Target -> MD m Doc getReference attr label target = do - st <- get - case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of Just (ref, _, _) -> return ref Nothing -> do - let label' = case find (\(l,_,_) -> l == label) (stRefs st) of - Just _ -> -- label is used; generate numerical label - case find (\n -> notElem [Str (show n)] - (map (\(l,_,_) -> l) (stRefs st))) - [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label - modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) - return label' + keys <- gets stKeys + case M.lookup (getKey label) keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if isEmpty label + then do + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + return (text (show i), i) + else return (label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> do -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = label <> if i == 0 + then mempty + else text (show i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let lab' = text (show i) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' -- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> MD Doc +inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc inlineListToMarkdown opts lst = do inlist <- asks envInList go (if inlist then avoidBadWrapsInList lst else lst) where go [] = return empty go (i:is) = case i of (Link _ _ _) -> case is of - -- If a link is followed by another link or '[' we don't shortcut - (Link _ _ _):_ -> unshortcutable - Space:(Link _ _ _):_ -> unshortcutable - Space:(Str('[':_)):_ -> unshortcutable - Space:(RawInline _ ('[':_)):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:(Link _ _ _):_ -> unshortcutable - SoftBreak:(Str('[':_)):_ -> unshortcutable + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut + (Link _ _ _):_ -> unshortcutable + Space:(Link _ _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:(Link _ _ _):_ -> unshortcutable + SoftBreak:(Str('[':_)):_ -> unshortcutable SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str ('[':_):_ -> unshortcutable - (RawInline _ ('[':_)):_ -> unshortcutable - (RawInline _ (' ':'[':_)):_ -> unshortcutable - _ -> shortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str('[':_)):_ -> unshortcutable + LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + Str ('(':_):_ -> unshortcutable + Str (':':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ ('(':_)):_ -> unshortcutable + (RawInline _ (':':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable _ -> shortcutable where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) unshortcutable = do @@ -836,9 +940,9 @@ inlineListToMarkdown opts lst = do fmap (iMark <>) (go is) isSp :: Inline -> Bool -isSp Space = True +isSp Space = True isSp SoftBreak = True -isSp _ = False +isSp _ = False avoidBadWrapsInList :: [Inline] -> [Inline] avoidBadWrapsInList [] = [] @@ -857,7 +961,7 @@ avoidBadWrapsInList (s:Str cs:[]) avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs isOrderedListMarker :: String -> Bool -isOrderedListMarker xs = (last xs `elem` ['.',')']) && +isOrderedListMarker xs = not (null xs) && (last xs `elem` ['.',')']) && isRight (runParser (anyOrderedListMarker >> eof) defaultParserState "" xs) @@ -866,27 +970,30 @@ isRight (Right _) = True isRight (Left _) = False -- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> MD Doc +inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain contents <- inlineListToMarkdown opts ils return $ case plain of True -> contents - False | isEnabled Ext_bracketed_spans opts -> - "[" <> contents <> "]" <> - if attrs == nullAttr - then "{}" - else linkAttributes opts attrs + False | attrs == nullAttr -> contents + | isEnabled Ext_bracketed_spans opts -> + let attrs' = if attrs /= nullAttr + then attrsToMarkdown attrs + else empty + in "[" <> contents <> "]" <> attrs' | isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts -> tagWithAttrs "span" attrs <> contents <> text "</span>" | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do plain <- asks envPlain contents <- inlineListToMarkdown opts lst return $ if plain then "_" <> contents <> "_" else "*" <> contents <> "*" +inlineToMarkdown _ (Strong []) = return empty inlineToMarkdown opts (Strong lst) = do plain <- asks envPlain if plain @@ -894,6 +1001,7 @@ inlineToMarkdown opts (Strong lst) = do else do contents <- inlineListToMarkdown opts lst return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts @@ -901,6 +1009,7 @@ inlineToMarkdown opts (Strikeout lst) = do else if isEnabled Ext_raw_html opts then "<s>" <> contents <> "</s>" else contents +inlineToMarkdown _ (Superscript []) = return empty inlineToMarkdown opts (Superscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -908,14 +1017,12 @@ inlineToMarkdown opts (Superscript lst) = then "^" <> contents <> "^" else if isEnabled Ext_raw_html opts then "<sup>" <> contents <> "</sup>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSuperscript ds) - _ -> contents - where toSuperscript '1' = '\x00B9' - toSuperscript '2' = '\x00B2' - toSuperscript '3' = '\x00B3' - toSuperscript c = chr (0x2070 + (ord c - 48)) + else + let rendered = render Nothing contents + in case mapM toSuperscript rendered of + Just r -> text r + Nothing -> text $ "^(" ++ rendered ++ ")" +inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do contents <- inlineListToMarkdown opts lst @@ -923,27 +1030,27 @@ inlineToMarkdown opts (Subscript lst) = then "~" <> contents <> "~" else if isEnabled Ext_raw_html opts then "<sub>" <> contents <> "</sub>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSubscript ds) - _ -> contents - where toSubscript c = chr (0x2080 + (ord c - 48)) + else + let rendered = render Nothing contents + in case mapM toSubscript rendered of + Just r -> text r + Nothing -> text $ "_(" ++ rendered ++ ")" inlineToMarkdown opts (SmallCaps lst) = do plain <- asks envPlain if not plain && (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) - then do - contents <- inlineListToMarkdown opts lst - return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) - <> contents <> text "</span>" + then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "‘" <> contents <> "’" + return $ if isEnabled Ext_smart opts + then "'" <> contents <> "'" + else "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ "“" <> contents <> "”" + return $ if isEnabled Ext_smart opts + then "\"" <> contents <> "\"" + else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str let longest = if null tickGroups @@ -960,14 +1067,17 @@ inlineToMarkdown opts (Code attr str) = do else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown opts (Str str) = do isPlain <- asks envPlain - if isPlain - then return $ text str - else return $ text $ escapeString opts str + let str' = (if isEnabled Ext_smart opts + then unsmartify opts + else id) $ + if isPlain + then str + else escapeString opts str + return $ text str' inlineToMarkdown opts (Math InlineMath str) = case writerHTMLMathMethod opts of - WebTeX url -> - inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) + WebTeX url -> inlineToMarkdown opts + (Image nullAttr [Str str] (url ++ urlEncode str, str)) _ | isEnabled Ext_tex_math_dollars opts -> return $ "$" <> text str <> "$" | isEnabled Ext_tex_math_single_backslash opts -> @@ -976,9 +1086,9 @@ inlineToMarkdown opts (Math InlineMath str) = return $ "\\\\(" <> text str <> "\\\\)" | otherwise -> do plain <- asks envPlain - inlineListToMarkdown opts $ - (if plain then makeMathPlainer else id) $ - texMathToInlines InlineMath str + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if plain then makeMathPlainer else id) inlineToMarkdown opts (Math DisplayMath str) = case writerHTMLMathMethod opts of WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` @@ -991,15 +1101,19 @@ inlineToMarkdown opts (Math DisplayMath str) = | isEnabled Ext_tex_math_double_backslash opts -> return $ "\\\\[" <> text str <> "\\\\]" | otherwise -> (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (texMathToInlines DisplayMath str) -inlineToMarkdown opts (RawInline f str) = do + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) +inlineToMarkdown opts il@(RawInline f str) = do plain <- asks envPlain - if not plain && - ( f == "markdown" || + if (plain && f == "plain") || (not plain && + ( f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || - (isEnabled Ext_raw_html opts && f == "html") ) + (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"]) + )) then return $ text str - else return empty + else do + report $ InlineNotRendered il + return empty inlineToMarkdown opts (LineBreak) = do plain <- asks envPlain if plain || isEnabled Ext_hard_line_breaks opts @@ -1052,7 +1166,8 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [lnk]] + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1063,20 +1178,20 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True - _ -> False + _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto shortcutable <- asks envRefShortcutable let useShortcutRefLinks = shortcutable && isEnabled Ext_shortcut_reference_links opts - ref <- if useRefLinks then getReference attr txt (src, tit) else return [] - reftext <- inlineListToMarkdown opts ref + reftext <- if useRefLinks then getReference attr linktext (src, tit) + else return empty return $ if useAuto then if plain then text srcSuffix else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" - second = if txt == ref + second = if getKey linktext == getKey reftext then if useShortcutRefLinks then "" else "[]" @@ -1091,7 +1206,8 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - return $ text $ trim $ writeHtmlString def $ Pandoc nullMeta [Plain [img]] + (text . T.unpack . T.strip) <$> + writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] @@ -1114,4 +1230,36 @@ makeMathPlainer :: [Inline] -> [Inline] makeMathPlainer = walk go where go (Emph xs) = Span nullAttr xs - go x = x + go x = x + +toSuperscript :: Char -> Maybe Char +toSuperscript '1' = Just '\x00B9' +toSuperscript '2' = Just '\x00B2' +toSuperscript '3' = Just '\x00B3' +toSuperscript '+' = Just '\x207A' +toSuperscript '-' = Just '\x207B' +toSuperscript '=' = Just '\x207C' +toSuperscript '(' = Just '\x207D' +toSuperscript ')' = Just '\x207E' +toSuperscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2070 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +toSubscript :: Char -> Maybe Char +toSubscript '+' = Just '\x208A' +toSubscript '-' = Just '\x208B' +toSubscript '=' = Just '\x208C' +toSubscript '(' = Just '\x208D' +toSubscript ')' = Just '\x208E' +toSubscript c + | c >= '0' && c <= '9' = + Just $ chr (0x2080 + (ord c - 48)) + | isSpace c = Just c + | otherwise = Nothing + +lineBreakToSpace :: Inline -> Inline +lineBreakToSpace LineBreak = Space +lineBreakToSpace SoftBreak = Space +lineBreakToSpace x = x diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs new file mode 100644 index 000000000..477f5a0b1 --- /dev/null +++ b/src/Text/Pandoc/Writers/Math.hs @@ -0,0 +1,56 @@ +module Text.Pandoc.Writers.Math + ( texMathToInlines + , convertMath + , defaultMathJaxURL + , defaultKaTeXURL + ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.TeXMath (DisplayType (..), Exp, readTeX, writePandoc) + +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ or @$$@ characters if entire formula +-- can't be converted. +texMathToInlines :: PandocMonad m + => MathType + -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> m [Inline] +texMathToInlines mt inp = do + res <- convertMath writePandoc mt inp + case res of + Right (Just ils) -> return ils + Right Nothing -> do + report $ CouldNotConvertTeXMath inp "" + return [mkFallback mt inp] + Left il -> return [il] + +mkFallback :: MathType -> String -> Inline +mkFallback mt str = Str (delim ++ str ++ delim) + where delim = case mt of + DisplayMath -> "$$" + InlineMath -> "$" + +-- | Converts a raw TeX math formula using a writer function, +-- issuing a warning and producing a fallback (a raw string) +-- on failure. +convertMath :: PandocMonad m + => (DisplayType -> [Exp] -> a) -> MathType -> String + -> m (Either Inline a) +convertMath writer mt str = + case writer dt <$> readTeX str of + Right r -> return (Right r) + Left e -> do + report $ CouldNotConvertTeXMath str e + return (Left $ mkFallback mt str) + where dt = case mt of + DisplayMath -> DisplayBlock + InlineMath -> DisplayInline + +defaultMathJaxURL :: String +defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/" + +defaultKaTeXURL :: String +defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.8.3/" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 78d4651e7..2470d9200 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,41 +30,44 @@ Conversion of 'Pandoc' documents to MediaWiki markup. MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.List (intercalate) +import qualified Data.Set as Set +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) -import Network.URI ( isURI ) -import Control.Monad.Reader -import Control.Monad.State +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { - stNotes :: Bool -- True if there are notes - , stOptions :: WriterOptions -- writer options + stNotes :: Bool -- True if there are notes + , stOptions :: WriterOptions -- writer options } data WriterReader = WriterReader { - options :: WriterOptions -- Writer options - , listLevel :: String -- String at beginning of list items, e.g. "**" - , useTags :: Bool -- True if we should use HTML tags because we're in a complex list + options :: WriterOptions -- Writer options + , listLevel :: String -- String at beginning of list items, e.g. "**" + , useTags :: Bool -- True if we should use HTML tags because we're in a complex list } -type MediaWikiWriter = ReaderT WriterReader (State WriterState) +type MediaWikiWriter m = ReaderT WriterReader (StateT WriterState m) -- | Convert Pandoc to MediaWiki. -writeMediaWiki :: WriterOptions -> Pandoc -> String +writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeMediaWiki opts document = let initialState = WriterState { stNotes = False, stOptions = opts } env = WriterReader { options = opts, listLevel = [], useTags = False } - in evalState (runReaderT (pandocToMediaWiki document) env) initialState + in evalStateT (runReaderT (pandocToMediaWiki document) env) initialState -- | Return MediaWiki representation of document. -pandocToMediaWiki :: Pandoc -> MediaWikiWriter String +pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text pandocToMediaWiki (Pandoc meta blocks) = do opts <- asks options metadata <- metaToJSON opts @@ -79,8 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do let main = body ++ notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata - return $ case writerTemplate opts of - Nothing -> main + pack <$> case writerTemplate opts of + Nothing -> return main Just tpl -> renderTemplate' tpl context -- | Escape special characters for MediaWiki. @@ -88,8 +91,9 @@ escapeString :: String -> String escapeString = escapeStringForXML -- | Convert Pandoc block element to MediaWiki. -blockToMediaWiki :: Block -- ^ Block element - -> MediaWikiWriter String +blockToMediaWiki :: PandocMonad m + => Block -- ^ Block element + -> MediaWikiWriter m String blockToMediaWiki Null = return "" @@ -123,10 +127,10 @@ blockToMediaWiki (Para inlines) = do blockToMediaWiki (LineBlock lns) = blockToMediaWiki $ linesToPara lns -blockToMediaWiki (RawBlock f str) +blockToMediaWiki b@(RawBlock f str) | f == Format "mediawiki" = return str | f == Format "html" = return str - | otherwise = return "" + | otherwise = "" <$ report (BlockNotRendered b) blockToMediaWiki HorizontalRule = return "\n-----\n" @@ -136,23 +140,15 @@ blockToMediaWiki (Header level _ inlines) = do return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" blockToMediaWiki (CodeBlock (_,classes,_) str) = do - let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", - "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", - "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", - "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", - "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", - "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", - "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", - "visualfoxpro", "winbatch", "xml", "xpp", "z80"] + let at = Set.fromList classes `Set.intersection` highlightingLangs return $ - if null at - then "<pre" ++ (if null classes - then ">" - else " class=\"" ++ unwords classes ++ "\">") ++ - escapeString str ++ "</pre>" - else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>" - -- note: no escape! + case Set.toList at of + [] -> "<pre" ++ (if null classes + then ">" + else " class=\"" ++ unwords classes ++ "\">") ++ + escapeString str ++ "</pre>" + (l:_) -> "<source lang=\"" ++ l ++ "\">" ++ str ++ "</source>" + -- note: no escape! even for <! blockToMediaWiki (BlockQuote blocks) = do contents <- blockListToMediaWiki blocks @@ -218,7 +214,7 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: [Block] -> MediaWikiWriter String +listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m String listItemToMediaWiki items = do contents <- blockListToMediaWiki items tags <- asks useTags @@ -229,8 +225,9 @@ listItemToMediaWiki items = do return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to MediaWiki. -definitionListItemToMediaWiki :: ([Inline],[[Block]]) - -> MediaWikiWriter String +definitionListItemToMediaWiki :: PandocMonad m + => ([Inline],[[Block]]) + -> MediaWikiWriter m String definitionListItemToMediaWiki (label, items) = do labelText <- inlineListToMediaWiki label contents <- mapM blockListToMediaWiki items @@ -259,18 +256,18 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -284,20 +281,22 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: -tableRowToMediaWiki :: Bool +tableRowToMediaWiki :: PandocMonad m + => Bool -> [Alignment] -> [Double] -> (Int, [[Block]]) - -> MediaWikiWriter String + -> MediaWikiWriter m String tableRowToMediaWiki headless alignments widths (rownum, cells) = do cells' <- mapM (tableCellToMediaWiki headless rownum) $ zip3 alignments widths cells return $ unlines cells' -tableCellToMediaWiki :: Bool +tableCellToMediaWiki :: PandocMonad m + => Bool -> Int -> (Alignment, Double, [Block]) - -> MediaWikiWriter String + -> MediaWikiWriter m String tableCellToMediaWiki headless rownum (alignment, width, bs) = do contents <- blockListToMediaWiki bs let marker = if rownum == 1 && not headless then "!" else "|" @@ -322,13 +321,13 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -imageToMediaWiki :: Attr -> MediaWikiWriter String +imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m String imageToMediaWiki attr = do opts <- gets stOptions let (_, cls, _) = attr toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = '|':w ++ "px" go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px" go Nothing (Just h) = "|x" ++ h ++ "px" @@ -340,18 +339,19 @@ imageToMediaWiki attr = do return $ dims ++ classes -- | Convert list of Pandoc block elements to MediaWiki. -blockListToMediaWiki :: [Block] -- ^ List of block elements - -> MediaWikiWriter String +blockListToMediaWiki :: PandocMonad m + => [Block] -- ^ List of block elements + -> MediaWikiWriter m String blockListToMediaWiki blocks = fmap vcat $ mapM blockToMediaWiki blocks -- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String +inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m String inlineListToMediaWiki lst = fmap concat $ mapM inlineToMediaWiki lst -- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: Inline -> MediaWikiWriter String +inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m String inlineToMediaWiki (Span attrs ils) = do contents <- inlineListToMediaWiki ils @@ -394,22 +394,28 @@ inlineToMediaWiki (Code _ str) = inlineToMediaWiki (Str str) = return $ escapeString str -inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>" - -- note: str should NOT be escaped +inlineToMediaWiki (Math mt str) = return $ + "<math display=\"" ++ + (if mt == DisplayMath then "block" else "inline") ++ + "\">" ++ str ++ "</math>" + -- note: str should NOT be escaped -inlineToMediaWiki (RawInline f str) +inlineToMediaWiki il@(RawInline f str) | f == Format "mediawiki" = return str | f == Format "html" = return str - | otherwise = return "" + | otherwise = "" <$ report (InlineNotRendered il) -inlineToMediaWiki (LineBreak) = return "<br />\n" +inlineToMediaWiki LineBreak = return "<br />\n" inlineToMediaWiki SoftBreak = do wrapText <- gets (writerWrapText . stOptions) + listlevel <- asks listLevel case wrapText of WrapAuto -> return " " WrapNone -> return " " - WrapPreserve -> return "\n" + WrapPreserve -> if null listlevel + then return "\n" + else return " " inlineToMediaWiki Space = return " " @@ -437,5 +443,636 @@ inlineToMediaWiki (Image attr alt (source, tit)) = do inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents modify (\s -> s { stNotes = True }) - return $ "<ref>" ++ contents' ++ "</ref>" - -- note - may not work for notes with multiple blocks + return $ "<ref>" ++ stripTrailingNewlines contents' ++ "</ref>" + -- note - does not work for notes with multiple blocks + +highlightingLangs :: Set.Set String +highlightingLangs = Set.fromList [ + "abap", + "abl", + "abnf", + "aconf", + "actionscript", + "actionscript3", + "ada", + "ada2005", + "ada95", + "adl", + "agda", + "ahk", + "alloy", + "ambienttalk", + "ambienttalk/2", + "antlr", + "antlr-actionscript", + "antlr-as", + "antlr-c#", + "antlr-cpp", + "antlr-csharp", + "antlr-java", + "antlr-objc", + "antlr-perl", + "antlr-python", + "antlr-rb", + "antlr-ruby", + "apache", + "apacheconf", + "apl", + "applescript", + "arduino", + "arexx", + "as", + "as3", + "asm", + "aspectj", + "aspx-cs", + "aspx-vb", + "asy", + "asymptote", + "at", + "autohotkey", + "autoit", + "awk", + "b3d", + "basemake", + "bash", + "basic", + "bat", + "batch", + "bbcode", + "because", + "befunge", + "bf", + "blitzbasic", + "blitzmax", + "bmax", + "bnf", + "boo", + "boogie", + "bplus", + "brainfuck", + "bro", + "bsdmake", + "bugs", + "c", + "c#", + "c++", + "c++-objdumb", + "c-objdump", + "ca65", + "cadl", + "camkes", + "cbmbas", + "ceylon", + "cf3", + "cfc", + "cfengine3", + "cfg", + "cfm", + "cfs", + "chai", + "chaiscript", + "chapel", + "cheetah", + "chpl", + "cirru", + "cl", + "clay", + "clipper", + "clj", + "cljs", + "clojure", + "clojurescript", + "cmake", + "cobol", + "cobolfree", + "coffee", + "coffee-script", + "coffeescript", + "common-lisp", + "componentpascal", + "console", + "control", + "coq", + "cp", + "cpp", + "cpp-objdump", + "cpsa", + "crmsh", + "croc", + "cry", + "cryptol", + "csh", + "csharp", + "csound", + "csound-csd", + "csound-document", + "csound-orc", + "csound-sco", + "csound-score", + "css", + "css+django", + "css+erb", + "css+genshi", + "css+genshitext", + "css+jinja", + "css+lasso", + "css+mako", + "css+mozpreproc", + "css+myghty", + "css+php", + "css+ruby", + "css+smarty", + "cu", + "cucumber", + "cuda", + "cxx-objdump", + "cypher", + "cython", + "d", + "d-objdump", + "dart", + "debcontrol", + "debsources", + "delphi", + "dg", + "diff", + "django", + "docker", + "dockerfile", + "dosbatch", + "doscon", + "dosini", + "dpatch", + "dtd", + "duby", + "duel", + "dylan", + "dylan-console", + "dylan-lid", + "dylan-repl", + "earl-grey", + "earlgrey", + "easytrieve", + "ebnf", + "ec", + "ecl", + "eg", + "eiffel", + "elisp", + "elixir", + "elm", + "emacs", + "erb", + "erl", + "erlang", + "evoque", + "ex", + "exs", + "ezhil", + "factor", + "fan", + "fancy", + "felix", + "fish", + "fishshell", + "flx", + "fortran", + "fortranfixed", + "foxpro", + "fsharp", + "fy", + "gap", + "gas", + "gawk", + "genshi", + "genshitext", + "gherkin", + "glsl", + "gnuplot", + "go", + "golo", + "gooddata-cl", + "gosu", + "groff", + "groovy", + "gst", + "haml", + "handlebars", + "haskell", + "haxe", + "haxeml", + "hexdump", + "hs", + "html", + "html+cheetah", + "html+django", + "html+erb", + "html+evoque", + "html+genshi", + "html+handlebars", + "html+jinja", + "html+kid", + "html+lasso", + "html+mako", + "html+myghty", + "html+php", + "html+ruby", + "html+smarty", + "html+spitfire", + "html+twig", + "html+velocity", + "htmlcheetah", + "htmldjango", + "http", + "hx", + "hxml", + "hxsl", + "hy", + "hybris", + "hylang", + "i6", + "i6t", + "i7", + "idl", + "idl4", + "idr", + "idris", + "iex", + "igor", + "igorpro", + "ik", + "inform6", + "inform7", + "ini", + "io", + "ioke", + "irb", + "irc", + "isabelle", + "j", + "jade", + "jags", + "jasmin", + "jasminxt", + "java", + "javascript", + "javascript+cheetah", + "javascript+django", + "javascript+erb", + "javascript+genshi", + "javascript+genshitext", + "javascript+jinja", + "javascript+lasso", + "javascript+mako", + "javascript+mozpreproc", + "javascript+myghty", + "javascript+php", + "javascript+ruby", + "javascript+smarty", + "javascript+spitfire", + "jbst", + "jcl", + "jinja", + "jl", + "jlcon", + "jproperties", + "js", + "js+cheetah", + "js+django", + "js+erb", + "js+genshi", + "js+genshitext", + "js+jinja", + "js+lasso", + "js+mako", + "js+myghty", + "js+php", + "js+ruby", + "js+smarty", + "js+spitfire", + "json", + "json-ld", + "jsonld", + "jsonml+bst", + "jsp", + "julia", + "kal", + "kconfig", + "kernel-config", + "kid", + "koka", + "kotlin", + "ksh", + "lagda", + "lasso", + "lassoscript", + "latex", + "lcry", + "lcryptol", + "lean", + "less", + "lhaskell", + "lhs", + "lid", + "lidr", + "lidris", + "lighttpd", + "lighty", + "limbo", + "linux-config", + "liquid", + "lisp", + "literate-agda", + "literate-cryptol", + "literate-haskell", + "literate-idris", + "live-script", + "livescript", + "llvm", + "logos", + "logtalk", + "lsl", + "lua", + "m2", + "make", + "makefile", + "mako", + "man", + "maql", + "mask", + "mason", + "mathematica", + "matlab", + "matlabsession", + "mawk", + "menuconfig", + "mf", + "minid", + "mma", + "modelica", + "modula2", + "moin", + "monkey", + "moo", + "moocode", + "moon", + "moonscript", + "mozhashpreproc", + "mozpercentpreproc", + "mq4", + "mq5", + "mql", + "mql4", + "mql5", + "msc", + "mscgen", + "mupad", + "mxml", + "myghty", + "mysql", + "nasm", + "nawk", + "nb", + "nemerle", + "nesc", + "newlisp", + "newspeak", + "nginx", + "nim", + "nimrod", + "nit", + "nix", + "nixos", + "nroff", + "nsh", + "nsi", + "nsis", + "numpy", + "obj-c", + "obj-c++", + "obj-j", + "objc", + "objc++", + "objdump", + "objdump-nasm", + "objective-c", + "objective-c++", + "objective-j", + "objectivec", + "objectivec++", + "objectivej", + "objectpascal", + "objj", + "ocaml", + "octave", + "odin", + "ooc", + "opa", + "openbugs", + "openedge", + "pacmanconf", + "pan", + "parasail", + "pas", + "pascal", + "pawn", + "pcmk", + "perl", + "perl6", + "php", + "php3", + "php4", + "php5", + "pig", + "pike", + "pkgconfig", + "pl", + "pl6", + "plpgsql", + "po", + "posh", + "postgres", + "postgres-console", + "postgresql", + "postgresql-console", + "postscr", + "postscript", + "pot", + "pov", + "powershell", + "praat", + "progress", + "prolog", + "properties", + "proto", + "protobuf", + "ps1", + "ps1con", + "psm1", + "psql", + "puppet", + "py", + "py3", + "py3tb", + "pycon", + "pypy", + "pypylog", + "pyrex", + "pytb", + "python", + "python3", + "pyx", + "qbasic", + "qbs", + "qml", + "qvt", + "qvto", + "r", + "racket", + "ragel", + "ragel-c", + "ragel-cpp", + "ragel-d", + "ragel-em", + "ragel-java", + "ragel-objc", + "ragel-rb", + "ragel-ruby", + "raw", + "rb", + "rbcon", + "rconsole", + "rd", + "rebol", + "red", + "red/system", + "redcode", + "registry", + "resource", + "resourcebundle", + "rest", + "restructuredtext", + "rexx", + "rhtml", + "rkt", + "roboconf-graph", + "roboconf-instances", + "robotframework", + "rout", + "rql", + "rsl", + "rst", + "rts", + "ruby", + "rust", + "s", + "sage", + "salt", + "sass", + "sc", + "scala", + "scaml", + "scheme", + "scilab", + "scm", + "scss", + "sh", + "shell", + "shell-session", + "shen", + "slim", + "sls", + "smali", + "smalltalk", + "smarty", + "sml", + "snobol", + "sources.list", + "sourceslist", + "sp", + "sparql", + "spec", + "spitfire", + "splus", + "sql", + "sqlite3", + "squeak", + "squid", + "squid.conf", + "squidconf", + "ssp", + "st", + "stan", + "supercollider", + "sv", + "swift", + "swig", + "systemverilog", + "tads3", + "tap", + "tcl", + "tcsh", + "tcshcon", + "tea", + "termcap", + "terminfo", + "terraform", + "tex", + "text", + "tf", + "thrift", + "todotxt", + "trac-wiki", + "trafficscript", + "treetop", + "ts", + "turtle", + "twig", + "typescript", + "udiff", + "urbiscript", + "v", + "vala", + "vapi", + "vb.net", + "vbnet", + "vctreestatus", + "velocity", + "verilog", + "vfp", + "vgl", + "vhdl", + "vim", + "winbatch", + "winbugs", + "x10", + "xbase", + "xml", + "xml+cheetah", + "xml+django", + "xml+erb", + "xml+evoque", + "xml+genshi", + "xml+jinja", + "xml+kid", + "xml+lasso", + "xml+mako", + "xml+myghty", + "xml+php", + "xml+ruby", + "xml+smarty", + "xml+spitfire", + "xml+velocity", + "xq", + "xql", + "xqm", + "xquery", + "xqy", + "xslt", + "xten", + "xtend", + "xul+mozpreproc", + "yaml", + "yaml+jinja", + "zephir" ] diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs new file mode 100644 index 000000000..83d80cd4a --- /dev/null +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -0,0 +1,639 @@ +{- +Copyright (C) 2007-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Ms + Copyright : Copyright (C) 2007-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to groff ms format. + +TODO: + +[ ] use base URL to construct absolute URLs from relative ones for external + links +[ ] is there a better way to do strikeout? +[ ] tight/loose list distinction +-} + +module Text.Pandoc.Writers.Ms ( writeMs ) where +import Control.Monad.State.Strict +import Data.Char (isLower, isUpper, toUpper) +import Data.List (intercalate, intersperse, sort) +import qualified Data.Map as Map +import Data.Maybe (catMaybes, fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (escapeURIString, isAllowedInURI) +import Skylighting +import System.FilePath (takeExtension) +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) +import Text.TeXMath (writeEqn) + +data WriterState = WriterState { stHasInlineMath :: Bool + , stFirstPara :: Bool + , stNotes :: [Note] + , stSmallCaps :: Bool + , stHighlighting :: Bool + , stFontFeatures :: Map.Map Char Bool + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ stHasInlineMath = False + , stFirstPara = True + , stNotes = [] + , stSmallCaps = False + , stHighlighting = False + , stFontFeatures = Map.fromList [ + ('I',False) + , ('B',False) + , ('C',False) + ] + } + +type Note = [Block] + +type MS = StateT WriterState + +-- | Convert Pandoc to Ms. +writeMs :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeMs opts document = + evalStateT (pandocToMs opts document) defaultWriterState + +-- | Return groff ms representation of document. +pandocToMs :: PandocMonad m => WriterOptions -> Pandoc -> MS m Text +pandocToMs opts (Pandoc meta blocks) = do + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render colwidth + metadata <- metaToJSON opts + (fmap render' . blockListToMs opts) + (fmap render' . inlineListToMs' opts) + meta + body <- blockListToMs opts blocks + let main = render' body + hasInlineMath <- gets stHasInlineMath + let titleMeta = (escapeString . stringify) $ docTitle meta + let authorsMeta = map (escapeString . stringify) $ docAuthors meta + hasHighlighting <- gets stHighlighting + let highlightingMacros = if hasHighlighting + then case writerHighlightStyle opts of + Nothing -> mempty + Just sty -> render' $ styleToMs sty + else mempty + + let context = defField "body" main + $ defField "has-inline-math" hasInlineMath + $ defField "hyphenate" True + $ defField "pandoc-version" pandocVersion + $ defField "toc" (writerTableOfContents opts) + $ defField "title-meta" titleMeta + $ defField "author-meta" (intercalate "; " authorsMeta) + $ defField "highlighting-macros" highlightingMacros metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Association list of characters to escape. +msEscapes :: Map.Map Char String +msEscapes = Map.fromList + [ ('\160', "\\~") + , ('\'', "\\[aq]") + , ('`', "\\`") + , ('\8217', "'") + , ('"', "\\[dq]") + , ('\x2014', "\\[em]") + , ('\x2013', "\\[en]") + , ('\x2026', "\\&...") + , ('~', "\\[ti]") + , ('^', "\\[ha]") + , ('-', "\\-") + , ('@', "\\@") + , ('\\', "\\\\") + ] + +escapeChar :: Char -> String +escapeChar c = fromMaybe [c] (Map.lookup c msEscapes) + +-- | Escape | character, used to mark inline math, inside math. +escapeBar :: String -> String +escapeBar = concatMap go + where go '|' = "\\[u007C]" + go c = [c] + +-- | Escape special characters for Ms. +escapeString :: String -> String +escapeString = concatMap escapeChar + +escapeUri :: String -> String +escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) + +toSmallCaps :: String -> String +toSmallCaps [] = [] +toSmallCaps (c:cs) + | isLower c = let (lowers,rest) = span isLower (c:cs) + in "\\s-2" ++ escapeString (map toUpper lowers) ++ + "\\s0" ++ toSmallCaps rest + | isUpper c = let (uppers,rest) = span isUpper (c:cs) + in escapeString uppers ++ toSmallCaps rest + | otherwise = escapeChar c ++ toSmallCaps cs + +-- | Escape a literal (code) section for Ms. +escapeCode :: String -> String +escapeCode = intercalate "\n" . map escapeLine . lines + where escapeCodeChar ' ' = "\\ " + escapeCodeChar '\t' = "\\\t" + escapeCodeChar c = escapeChar c + escapeLine codeline = + case concatMap escapeCodeChar codeline of + a@('.':_) -> "\\&" ++ a + b -> b + +-- We split inline lists into sentences, and print one sentence per +-- line. groff/troff treats the line-ending period differently. +-- See http://code.google.com/p/pandoc/issues/detail?id=148. + +-- | Returns the first sentence in a list of inlines, and the rest. +breakSentence :: [Inline] -> ([Inline], [Inline]) +breakSentence [] = ([],[]) +breakSentence xs = + let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True + isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False + (as, bs) = break isSentenceEndInline xs + in case bs of + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) + (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) + (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs + +-- | Split a list of inlines into sentences. +splitSentences :: [Inline] -> [[Inline]] +splitSentences xs = + let (sent, rest) = breakSentence xs + in if null rest then [sent] else sent : splitSentences rest + +blockToMs :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> MS m Doc +blockToMs _ Null = return empty +blockToMs opts (Div _ bs) = do + setFirstPara + res <- blockListToMs opts bs + setFirstPara + return res +blockToMs opts (Plain inlines) = + liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines +blockToMs opts (Para [Image attr alt (src,_tit)]) + | let ext = takeExtension src in (ext == ".ps" || ext == ".eps") = do + let (mbW,mbH) = (inPoints opts <$> dimension Width attr, + inPoints opts <$> dimension Height attr) + let sizeAttrs = case (mbW, mbH) of + (Just wp, Nothing) -> space <> doubleQuotes + (text (show (floor wp :: Int) ++ "p")) + (Just wp, Just hp) -> space <> doubleQuotes + (text (show (floor wp :: Int) ++ "p")) <> + space <> + doubleQuotes (text (show (floor hp :: Int))) + _ -> empty + capt <- inlineListToMs' opts alt + return $ nowrap (text ".PSPIC -C " <> + doubleQuotes (text (escapeString src)) <> + sizeAttrs) $$ + text ".ce 1000" $$ + capt $$ + text ".ce 0" +blockToMs opts (Para inlines) = do + firstPara <- gets stFirstPara + resetFirstPara + contents <- liftM vcat $ mapM (inlineListToMs' opts) $ + splitSentences inlines + return $ text (if firstPara then ".LP" else ".PP") $$ contents +blockToMs _ b@(RawBlock f str) + | f == Format "ms" = return $ text str + | otherwise = do + report $ BlockNotRendered b + return empty +blockToMs _ HorizontalRule = do + resetFirstPara + return $ text ".HLINE" +blockToMs opts (Header level (ident,classes,_) inlines) = do + setFirstPara + contents <- inlineListToMs' opts $ map breakToSpace inlines + let (heading, secnum) = if writerNumberSections opts && + "unnumbered" `notElem` classes + then (".NH", "\\*[SN]") + else (".SH", "") + let anchor = if null ident + then empty + else nowrap $ + text ".pdfhref M " <> doubleQuotes (text ident) + let bookmark = text ".pdfhref O " <> text (show level ++ " ") <> + doubleQuotes (text $ secnum ++ + (if null secnum + then "" + else " ") ++ + escapeString (stringify inlines)) + let backlink = nowrap (text ".pdfhref L -D " <> + doubleQuotes (text ident) <> space <> text "\\") <> cr <> + text " -- " + let tocEntry = if writerTableOfContents opts && + level <= writerTOCDepth opts + then text ".XS" + $$ backlink <> doubleQuotes ( + nowrap (text (replicate level '\t') <> + (if null secnum + then empty + else text secnum <> text "\\~\\~") + <> contents)) + $$ text ".XE" + else empty + modify $ \st -> st{ stFirstPara = True } + return $ (text heading <> space <> text (show level)) $$ + contents $$ + bookmark $$ + anchor $$ + tocEntry +blockToMs opts (CodeBlock attr str) = do + hlCode <- highlightCode opts attr str + setFirstPara + return $ + text ".IP" $$ + text ".nf" $$ + text "\\f[C]" $$ + hlCode $$ + text "\\f[]" $$ + text ".fi" +blockToMs opts (LineBlock ls) = do + resetFirstPara + blockToMs opts $ Para $ intercalate [LineBreak] ls +blockToMs opts (BlockQuote blocks) = do + setFirstPara + contents <- blockListToMs opts blocks + setFirstPara + return $ text ".RS" $$ contents $$ text ".RE" +blockToMs opts (Table caption alignments widths headers rows) = + let aligncode AlignLeft = "l" + aligncode AlignRight = "r" + aligncode AlignCenter = "c" + aligncode AlignDefault = "l" + in do + caption' <- inlineListToMs' opts caption + let iwidths = if all (== 0) widths + then repeat "" + else map (printf "w(%0.1fn)" . (70 *)) widths + -- 78n default width - 8n indent = 70n + let coldescriptions = text $ unwords + (zipWith (\align width -> aligncode align ++ width) + alignments iwidths) ++ "." + colheadings <- mapM (blockListToMs opts) headers + let makeRow cols = text "T{" $$ + vcat (intersperse (text "T}\tT{") cols) $$ + text "T}" + let colheadings' = if all null headers + then empty + else makeRow colheadings $$ char '_' + body <- mapM (\row -> do + cols <- mapM (blockListToMs opts) row + return $ makeRow cols) rows + setFirstPara + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "delim(@@) tab(\t);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ text ".TE" + +blockToMs opts (BulletList items) = do + contents <- mapM (bulletListItemToMs opts) items + setFirstPara + return (vcat contents) +blockToMs opts (OrderedList attribs items) = do + let markers = take (length items) $ orderedListMarkers attribs + let indent = 2 + + maximum (map length markers) + contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ + zip markers items + setFirstPara + return (vcat contents) +blockToMs opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMs opts) items + setFirstPara + return (vcat contents) + +-- | Convert bullet list item (list of blocks) to ms. +bulletListItemToMs :: PandocMonad m => WriterOptions -> [Block] -> MS m Doc +bulletListItemToMs _ [] = return empty +bulletListItemToMs opts (Para first:rest) = + bulletListItemToMs opts (Plain first:rest) +bulletListItemToMs opts (Plain first:rest) = do + first' <- blockToMs opts (Plain first) + rest' <- blockListToMs opts rest + let first'' = text ".IP \\[bu] 3" $$ first' + let rest'' = if null rest + then empty + else text ".RS 3" $$ rest' $$ text ".RE" + return (first'' $$ rest'') +bulletListItemToMs opts (first:rest) = do + first' <- blockToMs opts first + rest' <- blockListToMs opts rest + return $ text "\\[bu] .RS 3" $$ first' $$ rest' $$ text ".RE" + +-- | Convert ordered list item (a list of blocks) to ms. +orderedListItemToMs :: PandocMonad m + => WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> MS m Doc +orderedListItemToMs _ _ _ [] = return empty +orderedListItemToMs opts num indent (Para first:rest) = + orderedListItemToMs opts num indent (Plain first:rest) +orderedListItemToMs opts num indent (first:rest) = do + first' <- blockToMs opts first + rest' <- blockListToMs opts rest + let num' = printf ("%" ++ show (indent - 1) ++ "s") num + let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let rest'' = if null rest + then empty + else text ".RS " <> text (show indent) $$ + rest' $$ text ".RE" + return $ first'' $$ rest'' + +-- | Convert definition list item (label, list of blocks) to ms. +definitionListItemToMs :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> MS m Doc +definitionListItemToMs opts (label, defs) = do + labelText <- inlineListToMs' opts $ map breakToSpace label + contents <- if null defs + then return empty + else liftM vcat $ forM defs $ \blocks -> do + let (first, rest) = case blocks of + (Para x:y) -> (Plain x,y) + (x:y) -> (x,y) + [] -> (Plain [], []) + -- should not happen + rest' <- liftM vcat $ + mapM (\item -> blockToMs opts item) rest + first' <- blockToMs opts first + return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + return $ nowrap (text ".IP " <> doubleQuotes labelText) $$ contents + +-- | Convert list of Pandoc block elements to ms. +blockListToMs :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> MS m Doc +blockListToMs opts blocks = + mapM (blockToMs opts) blocks >>= (return . vcat) + +-- | Convert list of Pandoc inline elements to ms. +inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc +-- if list starts with ., insert a zero-width character \& so it +-- won't be interpreted as markup if it falls at the beginning of a line. +inlineListToMs opts lst = hcat <$> mapM (inlineToMs opts) lst + +-- This version to be used when there is no further inline content; +-- forces a note at the end. +inlineListToMs' :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc +inlineListToMs' opts lst = do + x <- hcat <$> mapM (inlineToMs opts) lst + y <- handleNotes opts empty + return $ x <> y + +-- | Convert Pandoc inline element to ms. +inlineToMs :: PandocMonad m => WriterOptions -> Inline -> MS m Doc +inlineToMs opts (Span _ ils) = inlineListToMs opts ils +inlineToMs opts (Emph lst) = + withFontFeature 'I' (inlineListToMs opts lst) +inlineToMs opts (Strong lst) = + withFontFeature 'B' (inlineListToMs opts lst) +inlineToMs opts (Strikeout lst) = do + contents <- inlineListToMs opts lst + -- we use grey color instead of strikeout, which seems quite + -- hard to do in groff for arbitrary bits of text + return $ text "\\m[strikecolor]" <> contents <> text "\\m[]" +inlineToMs opts (Superscript lst) = do + contents <- inlineListToMs opts lst + return $ text "\\*{" <> contents <> text "\\*}" +inlineToMs opts (Subscript lst) = do + contents <- inlineListToMs opts lst + return $ text "\\*<" <> contents <> text "\\*>" +inlineToMs opts (SmallCaps lst) = do + -- see https://lists.gnu.org/archive/html/groff/2015-01/msg00016.html + modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } + res <- inlineListToMs opts lst + modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } + return res +inlineToMs opts (Quoted SingleQuote lst) = do + contents <- inlineListToMs opts lst + return $ char '`' <> contents <> char '\'' +inlineToMs opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMs opts lst + return $ text "\\[lq]" <> contents <> text "\\[rq]" +inlineToMs opts (Cite _ lst) = + inlineListToMs opts lst +inlineToMs opts (Code attr str) = do + hlCode <- highlightCode opts attr str + withFontFeature 'C' (return hlCode) +inlineToMs _ (Str str) = do + let shim = case str of + '.':_ -> afterBreak "\\&" + _ -> empty + smallcaps <- gets stSmallCaps + if smallcaps + then return $ shim <> text (toSmallCaps str) + else return $ shim <> text (escapeString str) +inlineToMs opts (Math InlineMath str) = do + modify $ \st -> st{ stHasInlineMath = True } + res <- convertMath writeEqn InlineMath str + case res of + Left il -> inlineToMs opts il + Right r -> return $ text "@" <> text (escapeBar r) <> text "@" +inlineToMs opts (Math DisplayMath str) = do + res <- convertMath writeEqn InlineMath str + case res of + Left il -> do + contents <- inlineToMs opts il + return $ cr <> text ".RS" $$ contents $$ text ".RE" + Right r -> return $ + cr <> text ".EQ" $$ text (escapeBar r) $$ text ".EN" +inlineToMs _ il@(RawInline f str) + | f == Format "ms" = return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToMs _ LineBreak = return $ cr <> text ".br" <> cr +inlineToMs opts SoftBreak = + handleNotes opts $ + case writerWrapText opts of + WrapAuto -> space + WrapNone -> space + WrapPreserve -> cr +inlineToMs opts Space = handleNotes opts space +inlineToMs opts (Link _ txt ('#':ident, _)) = do + -- internal link + contents <- inlineListToMs' opts $ map breakToSpace txt + return $ text "\\c" <> cr <> nowrap (text ".pdfhref L -D " <> + doubleQuotes (text ident) <> text " -A " <> + doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> + text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" +inlineToMs opts (Link _ txt (src, _)) = do + -- external link + contents <- inlineListToMs' opts $ map breakToSpace txt + return $ text "\\c" <> cr <> nowrap (text ".pdfhref W -D " <> + doubleQuotes (text (escapeUri src)) <> text " -A " <> + doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> + text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" +inlineToMs _ (Image _ alternate (_, _)) = + return $ char '[' <> text "IMAGE: " <> + text (escapeString (stringify alternate)) <> char ']' +inlineToMs _ (Note contents) = do + modify $ \st -> st{ stNotes = contents : stNotes st } + return $ text "\\**" + +handleNotes :: PandocMonad m => WriterOptions -> Doc -> MS m Doc +handleNotes opts fallback = do + notes <- gets stNotes + if null notes + then return fallback + else do + modify $ \st -> st{ stNotes = [] } + vcat <$> mapM (handleNote opts) notes + +handleNote :: PandocMonad m => WriterOptions -> Note -> MS m Doc +handleNote opts bs = do + -- don't start with Paragraph or we'll get a spurious blank + -- line after the note ref: + let bs' = case bs of + (Para ils : rest) -> Plain ils : rest + _ -> bs + contents <- blockListToMs opts bs' + return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr + +fontChange :: PandocMonad m => MS m Doc +fontChange = do + features <- gets stFontFeatures + let filling = sort [c | (c,True) <- Map.toList features] + return $ text $ "\\f[" ++ filling ++ "]" + +withFontFeature :: PandocMonad m => Char -> MS m Doc -> MS m Doc +withFontFeature c action = do + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + begin <- fontChange + d <- action + modify $ \st -> st{ stFontFeatures = Map.adjust not c $ stFontFeatures st } + end <- fontChange + return $ begin <> d <> end + +setFirstPara :: PandocMonad m => MS m () +setFirstPara = modify $ \st -> st{ stFirstPara = True } + +resetFirstPara :: PandocMonad m => MS m () +resetFirstPara = modify $ \st -> st{ stFirstPara = False } + +breakToSpace :: Inline -> Inline +breakToSpace SoftBreak = Space +breakToSpace LineBreak = Space +breakToSpace x = x + +-- Highlighting + +styleToMs :: Style -> Doc +styleToMs sty = vcat $ colordefs ++ map (toMacro sty) alltoktypes + where alltoktypes = enumFromTo KeywordTok NormalTok + colordefs = map toColorDef allcolors + toColorDef c = text (".defcolor " ++ + hexColor c ++ " rgb #" ++ hexColor c) + allcolors = catMaybes $ ordNub $ + [defaultColor sty, backgroundColor sty, + lineNumberColor sty, lineNumberBackgroundColor sty] ++ + concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty)) + colorsForToken ts = [tokenColor ts, tokenBackground ts] + +hexColor :: Color -> String +hexColor (RGB r g b) = printf "%02x%02x%02x" r g b + +toMacro :: Style -> TokenType -> Doc +toMacro sty toktype = + nowrap (text ".ds " <> text (show toktype) <> text " " <> + setbg <> setcolor <> setfont <> + text "\\\\$1" <> + resetfont <> resetcolor <> resetbg) + where setcolor = maybe empty fgcol tokCol + resetcolor = maybe empty (const $ text "\\\\m[]") tokCol + setbg = empty -- maybe empty bgcol tokBg + resetbg = empty -- maybe empty (const $ text "\\\\M[]") tokBg + fgcol c = text $ "\\\\m[" ++ hexColor c ++ "]" + -- bgcol c = text $ "\\\\M[" ++ hexColor c ++ "]" + setfont = if tokBold || tokItalic + then text $ "\\\\f[C" ++ ['B' | tokBold] ++ + ['I' | tokItalic] ++ "]" + else empty + resetfont = if tokBold || tokItalic + then text "\\\\f[C]" + else empty + tokSty = Map.lookup toktype (tokenStyles sty) + tokCol = (tokSty >>= tokenColor) `mplus` defaultColor sty + -- tokBg = (tokSty >>= tokenBackground) `mplus` backgroundColor sty + tokBold = fromMaybe False (tokenBold <$> tokSty) + tokItalic = fromMaybe False (tokenItalic <$> tokSty) + -- tokUnderline = fromMaybe False (tokSty >>= tokUnderline) + -- lnColor = lineNumberColor sty + -- lnBkgColor = lineNumberBackgroundColor sty + +msFormatter :: FormatOptions -> [SourceLine] -> Doc +msFormatter _fmtopts = + vcat . map fmtLine + where fmtLine = hcat . map fmtToken + fmtToken (toktype, tok) = text "\\*" <> + brackets (text (show toktype) <> text " \"" + <> text (escapeCode (T.unpack tok)) <> text "\"") + +highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m Doc +highlightCode opts attr str = + case highlight (writerSyntaxMap opts) msFormatter attr str of + Left msg -> do + unless (null msg) $ report $ CouldNotHighlight msg + return $ text (escapeCode str) + Right h -> do + modify (\st -> st{ stHighlighting = True }) + return h diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs new file mode 100644 index 000000000..ad67e489d --- /dev/null +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -0,0 +1,408 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Muse + Copyright : Copyright (C) 2017-2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : stable + Portability : portable + +Conversion of 'Pandoc' documents to Muse. + +This module is mostly intended for <https://amusewiki.org/ Amusewiki> markup support, +as described by <https://amusewiki.org/library/manual Text::Amuse markup manual>. +Original <https://www.gnu.org/software/emacs-muse/ Emacs Muse> markup support +is a secondary goal. + +Where Text::Amuse markup +<https://metacpan.org/pod/Text::Amuse#DIFFERENCES-WITH-THE-ORIGINAL-EMACS-MUSE-MARKUP differs> +from <https://www.gnu.org/software/emacs-muse/manual/ Emacs Muse markup>, +Text::Amuse markup is supported. +For example, native tables are always used instead of Org Mode tables. +However, @\<literal style="html">@ tag is used for HTML raw blocks +even though it is supported only in Emacs Muse. +-} +module Text.Pandoc.Writers.Muse (writeMuse) where +import Control.Monad.State.Strict +import Data.Text (Text) +import Data.List (intersperse, transpose, isInfixOf) +import System.FilePath (takeExtension) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import qualified Data.Set as Set + +type Notes = [[Block]] +data WriterState = + WriterState { stNotes :: Notes + , stOptions :: WriterOptions + , stTopLevel :: Bool + , stInsideBlock :: Bool + , stIds :: Set.Set String + } + +-- | Convert Pandoc to Muse. +writeMuse :: PandocMonad m + => WriterOptions + -> Pandoc + -> m Text +writeMuse opts document = + let st = WriterState { stNotes = [] + , stOptions = opts + , stTopLevel = True + , stInsideBlock = False + , stIds = Set.empty + } + in evalStateT (pandocToMuse document) st + +-- | Return Muse representation of document. +pandocToMuse :: PandocMonad m + => Pandoc + -> StateT WriterState m Text +pandocToMuse (Pandoc meta blocks) = do + opts <- gets stOptions + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' :: Doc -> Text + render' = render Nothing + metadata <- metaToJSON opts + (fmap render' . blockListToMuse) + (fmap render' . inlineListToMuse) + meta + body <- blockListToMuse blocks + notes <- liftM (reverse . stNotes) get >>= notesToMuse + let main = render colwidth $ body $+$ notes + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context + +-- | Helper function for flatBlockListToMuse +-- | Render all blocks and insert blank lines between the first two +catWithBlankLines :: PandocMonad m + => [Block] -- ^ List of block elements + -> Int -- ^ Number of blank lines + -> StateT WriterState m Doc +catWithBlankLines (b : bs) n = do + b' <- blockToMuse b + bs' <- flatBlockListToMuse bs + return $ b' <> blanklines n <> bs' +catWithBlankLines _ _ = error "Expected at least one block" + +-- | Convert list of Pandoc block elements to Muse +-- | without setting stTopLevel. +flatBlockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +flatBlockListToMuse bs@(BulletList _ : BulletList _ : _) = catWithBlankLines bs 2 +flatBlockListToMuse bs@(OrderedList (_, style1, _) _ : OrderedList (_, style2, _) _ : _) = + catWithBlankLines bs (if style1' == style2' then 2 else 0) + where + style1' = normalizeStyle style1 + style2' = normalizeStyle style2 + normalizeStyle DefaultStyle = Decimal + normalizeStyle s = s +flatBlockListToMuse bs@(DefinitionList _ : DefinitionList _ : _) = catWithBlankLines bs 2 +flatBlockListToMuse bs@(_ : _) = catWithBlankLines bs 0 +flatBlockListToMuse [] = return mempty + +-- | Convert list of Pandoc block elements to Muse. +blockListToMuse :: PandocMonad m + => [Block] -- ^ List of block elements + -> StateT WriterState m Doc +blockListToMuse blocks = do + oldState <- get + modify $ \s -> s { stTopLevel = not $ stInsideBlock s + , stInsideBlock = True + } + result <- flatBlockListToMuse blocks + modify $ \s -> s { stTopLevel = stTopLevel oldState + , stInsideBlock = stInsideBlock oldState + } + return result + +-- | Convert Pandoc block element to Muse. +blockToMuse :: PandocMonad m + => Block -- ^ Block element + -> StateT WriterState m Doc +blockToMuse (Plain inlines) = inlineListToMuse inlines +blockToMuse (Para inlines) = do + contents <- inlineListToMuse inlines + return $ contents <> blankline +blockToMuse (LineBlock lns) = do + lns' <- mapM inlineListToMuse lns + return $ nowrap $ vcat (map (text "> " <>) lns') <> blankline +blockToMuse (CodeBlock (_,_,_) str) = + return $ "<example>" $$ text str $$ "</example>" $$ blankline +blockToMuse (RawBlock (Format format) str) = + return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$ + text str $$ "</literal>" $$ blankline +blockToMuse (BlockQuote blocks) = do + contents <- flatBlockListToMuse blocks + return $ blankline + <> "<quote>" + $$ nest 0 contents -- nest 0 to remove trailing blank lines + $$ "</quote>" + <> blankline +blockToMuse (OrderedList (start, style, _) items) = do + let markers = take (length items) $ orderedListMarkers + (start, style, Period) + let maxMarkerLength = maximum $ map length markers + let markers' = map (\m -> let s = maxMarkerLength - length m + in m ++ replicate s ' ') markers + contents <- zipWithM orderedListItemToMuse markers' items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where orderedListItemToMuse :: PandocMonad m + => String -- ^ marker for list item + -> [Block] -- ^ list item (list of blocks) + -> StateT WriterState m Doc + orderedListItemToMuse marker item = do + contents <- blockListToMuse item + return $ hang (length marker + 1) (text marker <> space) contents +blockToMuse (BulletList items) = do + contents <- mapM bulletListItemToMuse items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where bulletListItemToMuse :: PandocMonad m + => [Block] + -> StateT WriterState m Doc + bulletListItemToMuse item = do + contents <- blockListToMuse item + return $ hang 2 "- " contents +blockToMuse (DefinitionList items) = do + contents <- mapM definitionListItemToMuse items + -- ensure that sublists have preceding blank line + topLevel <- gets stTopLevel + return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline + where definitionListItemToMuse :: PandocMonad m + => ([Inline], [[Block]]) + -> StateT WriterState m Doc + definitionListItemToMuse (label, defs) = do + label' <- inlineListToMuse label + contents <- liftM vcat $ mapM descriptionToMuse defs + let ind = offset label' + return $ hang ind label' contents + descriptionToMuse :: PandocMonad m + => [Block] + -> StateT WriterState m Doc + descriptionToMuse desc = hang 4 " :: " <$> blockListToMuse desc +blockToMuse (Header level (ident,_,_) inlines) = do + opts <- gets stOptions + contents <- inlineListToMuse inlines + + ids <- gets stIds + let autoId = uniqueIdent inlines ids + modify $ \st -> st{ stIds = Set.insert autoId ids } + + let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + then empty + else "#" <> text ident <> cr + let header' = text $ replicate level '*' + return $ blankline <> nowrap (header' <> space <> contents) + $$ attr' <> blankline +-- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors +blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline +blockToMuse (Table caption _ _ headers rows) = do + caption' <- inlineListToMuse caption + headers' <- mapM blockListToMuse headers + rows' <- mapM (mapM blockListToMuse) rows + let noHeaders = all null headers + + let numChars = maximum . map offset + -- FIXME: width is not being used. + let widthsInChars = + map numChars $ transpose (headers' : rows') + -- FIXME: Muse doesn't allow blocks with height more than 1. + let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks + where h = maximum (1 : map height blocks) + sep' = lblock (length sep) $ vcat (replicate h (text sep)) + let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars + let head' = makeRow " || " headers' + let rowSeparator = if noHeaders then " | " else " | " + rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row + return $ makeRow rowSeparator cols) rows + let body = vcat rows'' + return $ (if noHeaders then empty else head') + $$ body + $$ (if null caption then empty else " |+ " <> caption' <> " +|") + $$ blankline +blockToMuse (Div _ bs) = flatBlockListToMuse bs +blockToMuse Null = return empty + +-- | Return Muse representation of notes. +notesToMuse :: PandocMonad m + => Notes + -> StateT WriterState m Doc +notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes) + +-- | Return Muse representation of a note. +noteToMuse :: PandocMonad m + => Int + -> [Block] + -> StateT WriterState m Doc +noteToMuse num note = do + contents <- blockListToMuse note + let marker = "[" ++ show num ++ "] " + return $ hang (length marker) (text marker) contents + +-- | Escape special characters for Muse. +escapeString :: String -> String +escapeString s = + "<verbatim>" ++ + substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++ + "</verbatim>" + +-- | Escape special characters for Muse if needed. +conditionalEscapeString :: String -> String +conditionalEscapeString s = + if any (`elem` ("#*<=>[]|" :: String)) s || + "::" `isInfixOf` s || + "----" `isInfixOf` s || + "~~" `isInfixOf` s + then escapeString s + else s + +normalizeInlineList :: [Inline] -> [Inline] +normalizeInlineList (x : Str "" : xs) + = normalizeInlineList (x:xs) +normalizeInlineList (Str x1 : Str x2 : xs) + = normalizeInlineList $ Str (x1 ++ x2) : xs +normalizeInlineList (Emph x1 : Emph x2 : ils) + = normalizeInlineList $ Emph (x1 ++ x2) : ils +normalizeInlineList (Strong x1 : Strong x2 : ils) + = normalizeInlineList $ Strong (x1 ++ x2) : ils +normalizeInlineList (Strikeout x1 : Strikeout x2 : ils) + = normalizeInlineList $ Strikeout (x1 ++ x2) : ils +normalizeInlineList (Superscript x1 : Superscript x2 : ils) + = normalizeInlineList $ Superscript (x1 ++ x2) : ils +normalizeInlineList (Subscript x1 : Subscript x2 : ils) + = normalizeInlineList $ Subscript (x1 ++ x2) : ils +normalizeInlineList (SmallCaps x1 : SmallCaps x2 : ils) + = normalizeInlineList $ SmallCaps (x1 ++ x2) : ils +normalizeInlineList (Code _ x1 : Code _ x2 : ils) + = normalizeInlineList $ Code nullAttr (x1 ++ x2) : ils +normalizeInlineList (RawInline f1 x1 : RawInline f2 x2 : ils) | f1 == f2 + = normalizeInlineList $ RawInline f1 (x1 ++ x2) : ils +normalizeInlineList (Span a1 x1 : Span a2 x2 : ils) | a1 == a2 + = normalizeInlineList $ Span a1 (x1 ++ x2) : ils +normalizeInlineList (x:xs) = x : normalizeInlineList xs +normalizeInlineList [] = [] + +fixNotes :: [Inline] -> [Inline] +fixNotes [] = [] +fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest +fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest +fixNotes (x:xs) = x : fixNotes xs + +-- | Convert list of Pandoc inline elements to Muse. +inlineListToMuse :: PandocMonad m + => [Inline] + -> StateT WriterState m Doc +inlineListToMuse lst = hcat <$> mapM inlineToMuse (fixNotes $ normalizeInlineList lst) + +-- | Convert Pandoc inline element to Muse. +inlineToMuse :: PandocMonad m + => Inline + -> StateT WriterState m Doc +inlineToMuse (Str str) = return $ text $ conditionalEscapeString str +inlineToMuse (Emph lst) = do + contents <- inlineListToMuse lst + return $ "<em>" <> contents <> "</em>" +inlineToMuse (Strong lst) = do + contents <- inlineListToMuse lst + return $ "<strong>" <> contents <> "</strong>" +inlineToMuse (Strikeout lst) = do + contents <- inlineListToMuse lst + return $ "<del>" <> contents <> "</del>" +inlineToMuse (Superscript lst) = do + contents <- inlineListToMuse lst + return $ "<sup>" <> contents <> "</sup>" +inlineToMuse (Subscript lst) = do + contents <- inlineListToMuse lst + return $ "<sub>" <> contents <> "</sub>" +inlineToMuse (SmallCaps lst) = inlineListToMuse lst +inlineToMuse (Quoted SingleQuote lst) = do + contents <- inlineListToMuse lst + return $ "‘" <> contents <> "’" +inlineToMuse (Quoted DoubleQuote lst) = do + contents <- inlineListToMuse lst + return $ "“" <> contents <> "”" +-- Amusewiki does not support <cite> tag, +-- and Emacs Muse citation support is limited +-- (https://www.gnu.org/software/emacs-muse/manual/html_node/Citations.html#Citation) +-- so just fallback to expanding inlines. +inlineToMuse (Cite _ lst) = inlineListToMuse lst +inlineToMuse (Code _ str) = return $ + "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" +inlineToMuse (Math t str) = + lift (texMathToInlines t str) >>= inlineListToMuse +inlineToMuse (RawInline (Format f) str) = + return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>" +inlineToMuse LineBreak = return $ "<br>" <> cr +inlineToMuse Space = return space +inlineToMuse SoftBreak = do + wrapText <- gets $ writerWrapText . stOptions + return $ if wrapText == WrapPreserve then cr else space +inlineToMuse (Link _ txt (src, _)) = + case txt of + [Str x] | escapeURI x == src -> + return $ "[[" <> text (escapeLink x) <> "]]" + _ -> do contents <- inlineListToMuse txt + return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" + where escapeLink lnk = if isImageUrl lnk then "URL:" ++ lnk else lnk + -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension +inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) = + inlineToMuse (Image attr alt (source,title)) +inlineToMuse (Image attr inlines (source, title)) = do + opts <- gets stOptions + alt <- inlineListToMuse inlines + let title' = if null title + then if null inlines + then "" + else "[" <> alt <> "]" + else "[" <> text title <> "]" + let width = case dimension Width attr of + Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer) + _ -> "" + return $ "[[" <> text (source ++ width) <> "]" <> title' <> "]" +inlineToMuse (Note contents) = do + -- add to notes in state + notes <- gets stNotes + modify $ \st -> st { stNotes = contents:notes } + let ref = show $ length notes + 1 + return $ "[" <> text ref <> "]" +inlineToMuse (Span (_,name:_,_) inlines) = do + contents <- inlineListToMuse inlines + return $ "<class name=\"" <> text name <> "\">" <> contents <> "</class>" +inlineToMuse (Span _ lst) = inlineListToMuse lst diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 87e23aeeb..f852bad96 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,14 +30,17 @@ Conversion of a 'Pandoc' document to a string representation. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Data.List ( intersperse ) +import Data.List (intersperse) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition +import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty prettyList :: [Doc] -> Doc prettyList ds = - "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]" + "[" <> + cat (intersperse (cr <> ",") $ map (nest 1) ds) <> "]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> Doc @@ -47,12 +50,12 @@ prettyBlock (BlockQuote blocks) = "BlockQuote" $$ prettyList (map prettyBlock blocks) prettyBlock (OrderedList attribs blockLists) = "OrderedList" <> space <> text (show attribs) $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) + prettyList (map (prettyList . map prettyBlock) blockLists) prettyBlock (BulletList blockLists) = "BulletList" $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) + prettyList (map (prettyList . map prettyBlock) blockLists) prettyBlock (DefinitionList items) = "DefinitionList" $$ - (prettyList $ map deflistitem items) + prettyList (map deflistitem items) where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" prettyBlock (Table caption aligns widths header rows) = @@ -66,8 +69,8 @@ prettyBlock (Div attr blocks) = prettyBlock block = text $ show block -- | Prettyprint Pandoc document. -writeNative :: WriterOptions -> Pandoc -> String -writeNative opts (Pandoc meta blocks) = +writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeNative opts (Pandoc meta blocks) = return $ let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index ce4d456a3..63a3f915a 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2015 John MacFarlane + Copyright : Copyright (C) 2008-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,49 +29,70 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where -import Data.IORef -import Data.List ( isPrefixOf ) -import Data.Maybe ( fromMaybe ) -import Text.XML.Light.Output -import Text.TeXMath -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Text.Pandoc.Shared ( stringify, fetchItem', warn, - getDefaultReferenceODT ) -import Text.Pandoc.ImageSize -import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) +import Control.Monad.Except (catchError) +import Control.Monad.State.Strict +import qualified Data.ByteString.Lazy as B +import Data.Generics (everywhere', mkT) +import Data.List (isPrefixOf) +import Data.Maybe (fromMaybe) +import qualified Data.Text.Lazy as TL +import System.FilePath (takeDirectory, takeExtension, (<.>)) +import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Pandoc.Class (PandocMonad, report, toLang) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging +import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) +import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) +import Text.Pandoc.Pretty +import Text.Pandoc.Shared (stringify) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk -import Text.Pandoc.Writers.Shared ( fixDisplayMath ) -import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) -import Control.Monad (liftM) +import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.XML -import Text.Pandoc.Pretty -import qualified Control.Exception as E -import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension, takeDirectory, (<.>)) +import Text.TeXMath +import Text.XML.Light + +newtype ODTState = ODTState { stEntries :: [Entry] + } + +type O m = StateT ODTState m -- | Produce an ODT file from a Pandoc document. -writeODT :: WriterOptions -- ^ Writer options +writeODT :: PandocMonad m + => WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeODT opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts + -> m B.ByteString +writeODT opts doc = + let initState = ODTState{ stEntries = [] + } + in + evalStateT (pandocToODT opts doc) initState + +-- | Produce an ODT file from a Pandoc document. +pandocToODT :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> O m B.ByteString +pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta + lang <- toLang (getLang opts meta) refArchive <- - case writerReferenceODT opts of - Just f -> liftM toArchive $ B.readFile f - Nothing -> getDefaultReferenceODT datadir + case writerReferenceDoc opts of + Just f -> liftM toArchive $ lift $ P.readFileLazy f + Nothing -> lift $ (toArchive . B.fromStrict) <$> + P.readDataFile "reference.odt" -- handle formulas and pictures - picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc - let newContents = writeOpenDocument opts{writerWrapText = WrapNone} doc' - epochtime <- floor `fmap` getPOSIXTime + -- picEntriesRef <- P.newIORef ([] :: [Entry]) + doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc + newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc' + epochtime <- floor `fmap` lift P.getPOSIXTime let contentEntry = toEntry "content.xml" epochtime - $ fromStringLazy newContents - picEntries <- readIORef picEntriesRef + $ fromTextLazy $ TL.fromStrict newContents + picEntries <- gets stEntries let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive @@ -90,14 +111,13 @@ writeODT opts doc@(Pandoc meta _) = do $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" $$ - ( inTags True "manifest:manifest" + (inTags True "manifest:manifest" [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") - ,("manifest:version","1.2")] - $ ( selfClosingTag "manifest:file-entry" + ,("manifest:version","1.2")] ( selfClosingTag "manifest:file-entry" [("manifest:media-type","application/vnd.oasis.opendocument.text") ,("manifest:full-path","/")] - $$ vcat ( map toFileEntry $ files ) - $$ vcat ( map toFileEntry $ formulas ) + $$ vcat ( map toFileEntry files ) + $$ vcat ( map toFileEntry formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -105,88 +125,138 @@ writeODT opts doc@(Pandoc meta _) = do $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" $$ - ( inTags True "office:document-meta" + (inTags True "office:document-meta" [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0") ,("xmlns:xlink","http://www.w3.org/1999/xlink") ,("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0") ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") - ,("office:version","1.2")] - $ ( inTagsSimple "office:meta" - $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title)) - ) + ,("office:version","1.2")] ( inTagsSimple "office:meta" $ + ( inTagsSimple "dc:title" + (text $ escapeStringForXML (stringify title)) + $$ + case lang of + Just l -> inTagsSimple "dc:language" + (text (escapeStringForXML (renderLang l))) + Nothing -> empty + ) ) ) -- make sure mimetype is first let mimetypeEntry = toEntry "mimetype" epochtime $ fromStringLazy "application/vnd.oasis.opendocument.text" - let archive'' = addEntryToArchive mimetypeEntry + archive'' <- updateStyleWithLang lang + $ addEntryToArchive mimetypeEntry $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' +updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive +updateStyleWithLang Nothing arch = return arch +updateStyleWithLang (Just lang) arch = do + epochtime <- floor `fmap` lift P.getPOSIXTime + return arch{ zEntries = [if eRelativePath e == "styles.xml" + then case parseXMLDoc + (toStringLazy (fromEntry e)) of + Nothing -> e + Just d -> + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else e + | e <- zEntries arch] } + +addLang :: Lang -> Element -> Element +addLang lang = everywhere' (mkT updateLangAttr) + where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) + = Attr n (langLanguage lang) + updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) + = Attr n (langRegion lang) + updateLangAttr x = x + -- | transform both Image and Math elements -transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image attr@(id', cls, _) lab (src,t)) = do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Left (_ :: E.SomeException) -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - return $ Emph lab - Right (img, mbMimeType) -> do - (ptX, ptY) <- case imageSize img of +transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline +transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError + (do (img, mbMimeType) <- P.fetchItem src + (ptX, ptY) <- case imageSize opts img of Right s -> return $ sizeInPoints s Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg + report $ CouldNotDetermineImageSize src msg return (100, 100) let dims = case (getDim Width, getDim Height) of (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")] - (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)] + (Just w@(Percent p), Nothing) -> [("width", show w), ("height", show (p / ratio) ++ "%")] + (Nothing, Just h@(Percent p)) -> [("width", show (p * ratio) ++ "%"), ("height", show h)] (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] where ratio = ptX / ptY - getDim dir = case (dimension dir attr) of + getDim dir = case dimension dir attr of Just (Percent i) -> Just $ Percent i Just dim -> Just $ Inch $ inInch opts dim Nothing -> Nothing let newattr = (id', cls, dims) - entries <- readIORef entriesRef + entries <- gets stEntries let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) (mbMimeType >>= extensionFromMimeType) let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` lift P.getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img - modifyIORef entriesRef (entry:) - return $ Image newattr lab (newsrc, t) -transformPicMath _ entriesRef (Math t math) = do - entries <- readIORef entriesRef + modify $ \st -> st{ stEntries = entry : entries } + return $ Image newattr lab (newsrc, t)) + (\e -> do + report $ CouldNotFetchResource src (show e) + return $ Emph lab) + +transformPicMath _ (Math t math) = do + entries <- gets stEntries let dt = if t == InlineMath then DisplayInline else DisplayBlock case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do let conf = useShortEmptyTags (const False) defaultConfigPP let mathml = ppcTopElement conf r - epochtime <- floor `fmap` getPOSIXTime + epochtime <- floor `fmap` (lift P.getPOSIXTime) let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" let entry = toEntry fname epochtime (fromStringLazy mathml) - modifyIORef entriesRef (entry:) + let fname' = dirname ++ "settings.xml" + let entry' = toEntry fname' epochtime $ documentSettings (t == InlineMath) + modify $ \st -> st{ stEntries = entry' : (entry : entries) } return $ RawInline (Format "opendocument") $ render Nothing $ - inTags False "draw:frame" [("text:anchor-type", - if t == DisplayMath - then "paragraph" - else "as-char") - ,("style:vertical-pos", "middle") - ,("style:vertical-rel", "text")] $ + inTags False "draw:frame" (if t == DisplayMath + then [("draw:style-name","fr2") + -- `draw:frame` does not support either + -- `style:vertical-pos` or `style:vertical-rel`, + -- therefore those attributes must go into the + -- `style:style` element + ,("text:anchor-type","paragraph")] + else [("draw:style-name","fr1") + ,("text:anchor-type","as-char")]) $ selfClosingTag "draw:object" [("xlink:href", dirname) , ("xlink:type", "simple") , ("xlink:show", "embed") , ("xlink:actuate", "onLoad")] -transformPicMath _ _ x = return x +transformPicMath _ x = return x + +documentSettings :: Bool -> B.ByteString +documentSettings isTextMode = fromStringLazy $ render Nothing + $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + $$ + (inTags True "office:document-settings" + [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0") + ,("xmlns:xlink","http://www.w3.org/1999/xlink") + ,("xmlns:config","urn:oasis:names:tc:opendocument:xmlns:config:1.0") + ,("xmlns:ooo","http://openoffice.org/2004/office") + ,("office:version","1.2")] $ + inTagsSimple "office:settings" $ + inTags False "config:config-item-set" + [("config:name", "ooo:configuration-settings")] $ + inTags False "config:config-item" [("config:name", "IsTextMode") + ,("config:type", "boolean")] $ + text $ if isTextMode then "true" else "false") diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs new file mode 100644 index 000000000..30d8d72dd --- /dev/null +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -0,0 +1,108 @@ +{- +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.OOXML + Copyright : Copyright (C) 2012-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions common to OOXML writers (Docx and Powerpoint) +-} +module Text.Pandoc.Writers.OOXML ( mknode + , nodename + , toLazy + , renderXml + , parseXml + , elemToNameSpaces + , elemName + , isElem + , NameSpaces + , fitToPage + ) where + +import Codec.Archive.Zip +import Control.Monad.Reader +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BL8 +import Data.Maybe (mapMaybe) +import Data.Monoid ((<>)) +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.XML.Light as XML + +mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode s attrs = + add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) + +nodename :: String -> QName +nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } + where (name, prefix) = case break (==':') s of + (xs,[]) -> (xs, Nothing) + (ys, _:zs) -> (zs, Just ys) + +toLazy :: B.ByteString -> BL.ByteString +toLazy = BL.fromChunks . (:[]) + +renderXml :: Element -> BL.ByteString +renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> + UTF8.fromStringLazy (showElement elt) + +parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element +parseXml refArchive distArchive relpath = + case findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive of + Nothing -> fail $ relpath ++ " missing in reference file" + Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of + Nothing -> fail $ relpath ++ " corrupt in reference file" + Just d -> return d + +-- Copied from Util + +attrToNSPair :: XML.Attr -> Maybe (String, String) +attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = + QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + let ns' = ns ++ elemToNameSpaces element + in qName (elName element) == name && + qURI (elName element) == lookup prefix ns' + +type NameSpaces = [(String, String)] + +-- | Scales the image to fit the page +-- sizes are passed in emu +fitToPage :: (Double, Double) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > fromIntegral pageWidth = + (pageWidth, floor $ (fromIntegral pageWidth / x) * y) + | otherwise = (floor x, floor y) diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 20c2c5cbc..29e1bc80c 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,40 +29,45 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to OPML XML. -} module Text.Pandoc.Writers.OPML ( writeOPML) where +import Control.Monad.Except (throwError) +import Data.Text (Text, unpack) +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared +import Text.Pandoc.Error import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Markdown (writeMarkdown) -import Text.Pandoc.Pretty -import Text.Pandoc.Compat.Time -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML -- | Convert Pandoc document to string in OPML format. -writeOPML :: WriterOptions -> Pandoc -> String -writeOPML opts (Pandoc meta blocks) = +writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOPML opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta - Just metadata = metaToJSON opts - (Just . writeMarkdown def . Pandoc nullMeta) - (Just . trimr . writeMarkdown def . Pandoc nullMeta . - (\ils -> [Plain ils])) - meta' - main = render colwidth $ vcat (map (elementToOPML opts) elements) - context = defField "body" main metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + metadata <- metaToJSON opts + (writeMarkdown def . Pandoc nullMeta) + (\ils -> T.stripEnd <$> writeMarkdown def (Pandoc nullMeta [Plain ils])) + meta' + main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements + let context = defField "body" main metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -writeHtmlInlines :: [Inline] -> String -writeHtmlInlines ils = trim $ writeHtmlString def - $ Pandoc nullMeta [Plain ils] + +writeHtmlInlines :: PandocMonad m => [Inline] -> m Text +writeHtmlInlines ils = + T.strip <$> writeHtml5String def (Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT showDateTimeRFC822 :: UTCTime -> String @@ -75,20 +80,27 @@ convertDate ils = maybe "" showDateTimeRFC822 $ #else parseTime #endif - defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) + defaultTimeLocale "%F" =<< normalizeDate (stringify ils) -- | Convert an Element to OPML. -elementToOPML :: WriterOptions -> Element -> Doc -elementToOPML _ (Blk _) = empty -elementToOPML opts (Sec _ _num _ title elements) = - let isBlk (Blk _) = True - isBlk _ = False - fromBlk (Blk x) = x - fromBlk _ = error "fromBlk called on non-block" +elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc +elementToOPML _ (Blk _) = return empty +elementToOPML opts (Sec _ _num _ title elements) = do + let isBlk :: Element -> Bool + isBlk (Blk _) = True + isBlk _ = False + + fromBlk :: PandocMonad m => Element -> m Block + fromBlk (Blk x) = return x + fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block" + (blocks, rest) = span isBlk elements - attrs = [("text", writeHtmlInlines title)] ++ - [("_note", writeMarkdown def (Pandoc nullMeta - (map fromBlk blocks))) - | not (null blocks)] - in inTags True "outline" attrs $ - vcat (map (elementToOPML opts) rest) + htmlIls <- writeHtmlInlines title + md <- if null blocks + then return mempty + else do blks <- mapM fromBlk blocks + writeMarkdown def $ Pandoc nullMeta blks + let attrs = ("text", unpack htmlIls) : + [("_note", unpack md) | not (null blocks)] + o <- mapM (elementToOPML opts) rest + return $ inTags True "outline" attrs $ vcat o diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 8f0e037c5..17edc0cbd 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it> +Copyright (C) 2008-2018 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. This program is free software; you can redistribute it and/or modify @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2018 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -30,32 +32,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where +import Control.Arrow ((***), (>>>)) +import Control.Monad.State.Strict hiding (when) +import Data.Char (chr) +import Data.List (sortBy) +import qualified Data.Map as Map +import Data.Ord (comparing) +import qualified Data.Set as Set +import Data.Text (Text) +import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.XML +import Text.Pandoc.Pretty import Text.Pandoc.Shared (linesToPara) import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Pretty -import Text.Printf ( printf ) -import Control.Arrow ( (***), (>>>) ) -import Control.Monad.State hiding ( when ) -import Data.Char (chr) -import qualified Data.Set as Set -import qualified Data.Map as Map +import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Data.List (sortBy) -import Data.Ord (comparing) +import Text.Pandoc.XML +import Text.Printf (printf) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block plainToPara (Plain x) = Para x -plainToPara x = x +plainToPara x = x -- -- OpenDocument writer -- +type OD m = StateT WriterState m + data WriterState = WriterState { stNotes :: [Doc] , stTableStyles :: [Doc] @@ -88,46 +96,45 @@ defaultWriterState = when :: Bool -> Doc -> Doc when p a = if p then a else empty -addTableStyle :: Doc -> State WriterState () +addTableStyle :: PandocMonad m => Doc -> OD m () addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } -addNote :: Doc -> State WriterState () +addNote :: PandocMonad m => Doc -> OD m () addNote i = modify $ \s -> s { stNotes = i : stNotes s } -addParaStyle :: Doc -> State WriterState () +addParaStyle :: PandocMonad m => Doc -> OD m () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } -addTextStyle :: Set.Set TextStyle -> (String, Doc) -> State WriterState () +addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m () addTextStyle attrs i = modify $ \s -> s { stTextStyles = Map.insert attrs i (stTextStyles s) } -addTextStyleAttr :: TextStyle -> State WriterState () +addTextStyleAttr :: PandocMonad m => TextStyle -> OD m () addTextStyleAttr t = modify $ \s -> s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) } -increaseIndent :: State WriterState () +increaseIndent :: PandocMonad m => OD m () increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } -resetIndent :: State WriterState () -resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } +resetIndent :: PandocMonad m => OD m () +resetIndent = modify $ \s -> s { stIndentPara = stIndentPara s - 1 } -inTightList :: State WriterState a -> State WriterState a +inTightList :: PandocMonad m => OD m a -> OD m a inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> modify (\s -> s { stTight = False }) >> return r -setInDefinitionList :: Bool -> State WriterState () +setInDefinitionList :: PandocMonad m => Bool -> OD m () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } -setFirstPara :: State WriterState () +setFirstPara :: PandocMonad m => OD m () setFirstPara = modify $ \s -> s { stFirstPara = True } -inParagraphTags :: Doc -> State WriterState Doc -inParagraphTags d | isEmpty d = return empty +inParagraphTags :: PandocMonad m => Doc -> OD m Doc inParagraphTags d = do b <- gets stFirstPara a <- if b then do modify $ \st -> st { stFirstPara = False } - return $ [("text:style-name", "First_20_paragraph")] + return [("text:style-name", "First_20_paragraph")] else return [("text:style-name", "Text_20_body")] return $ inTags False "text:p" a d @@ -137,7 +144,7 @@ inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] inSpanTags :: String -> Doc -> Doc inSpanTags s = inTags False "text:span" [("text:style-name",s)] -withTextStyle :: TextStyle -> State WriterState a -> State WriterState a +withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a withTextStyle s f = do oldTextStyleAttr <- gets stTextStyleAttr addTextStyleAttr s @@ -145,7 +152,7 @@ withTextStyle s f = do modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr } return res -inTextStyle :: Doc -> State WriterState Doc +inTextStyle :: PandocMonad m => Doc -> OD m Doc inTextStyle d = do at <- gets stTextStyleAttr if Set.null at @@ -161,12 +168,30 @@ inTextStyle d = do inTags False "style:style" [("style:name", styleName) ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (concatMap textStyleAttr (Set.toList at))) + $ selfClosingTag "style:text-properties" + (concatMap textStyleAttr (Set.toList at))) return $ inTags False "text:span" [("text:style-name",styleName)] d -inHeaderTags :: Int -> Doc -> State WriterState Doc +formulaStyles :: [Doc] +formulaStyles = [formulaStyle InlineMath, formulaStyle DisplayMath] + +formulaStyle :: MathType -> Doc +formulaStyle mt = inTags False "style:style" + [("style:name", if mt == InlineMath then "fr1" else "fr2") + ,("style:family", "graphic") + ,("style:parent-style-name", "Formula")] + $ selfClosingTag "style:graphic-properties" $ if mt == InlineMath then + [("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] + else + [("style:vertical-pos", "middle") + ,("style:vertical-rel", "paragraph-content") + ,("style:horizontal-pos", "center") + ,("style:horizontal-rel", "paragraph-content") + ,("style:wrap", "none")] + +inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc inHeaderTags i d = return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) , ("text:outline-level", show i)] d @@ -189,64 +214,67 @@ handleSpaces s rm [] = empty -- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: WriterOptions -> Pandoc -> String -writeOpenDocument opts (Pandoc meta blocks) = +writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOpenDocument opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text render' = render colwidth - ((body, metadata),s) = flip runState + ((body, metadata),s) <- flip runStateT defaultWriterState $ do m <- metaToJSON opts - (fmap (render colwidth) . blocksToOpenDocument opts) - (fmap (render colwidth) . inlinesToOpenDocument opts) + (fmap render' . blocksToOpenDocument opts) + (fmap render' . inlinesToOpenDocument opts) meta b <- render' `fmap` blocksToOpenDocument opts blocks return (b, m) - styles = stTableStyles s ++ stParaStyles s ++ - map snd (reverse $ sortBy (comparing fst) $ - Map.elems (stTextStyles s)) + let styles = stTableStyles s ++ stParaStyles s ++ formulaStyles ++ + map snd (sortBy (flip (comparing fst)) ( + Map.elems (stTextStyles s))) listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) - listStyles = map listStyle (stListStyles s) - automaticStyles = vcat $ reverse $ styles ++ listStyles - context = defField "body" body - $ defField "automatic-styles" (render' automaticStyles) - $ metadata - in case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context - -withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc + let listStyles = map listStyle (stListStyles s) + let automaticStyles = vcat $ reverse $ styles ++ listStyles + let context = defField "body" body + $ defField "toc" (writerTableOfContents opts) + $defField "automatic-styles" (render' automaticStyles) metadata + case writerTemplate opts of + Nothing -> return body + Just tpl -> renderTemplate' tpl context + +withParagraphStyle :: PandocMonad m + => WriterOptions -> String -> [Block] -> OD m Doc withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty -inPreformattedTags :: String -> State WriterState Doc +inPreformattedTags :: PandocMonad m => String -> OD m Doc inPreformattedTags s = do n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s -orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc +orderedListToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [[Block]] -> OD m Doc orderedListToOpenDocument o pn bs = vcat . map (inTagsIndented "text:list-item") <$> mapM (orderedItemToOpenDocument o pn . map plainToPara) bs -orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc -orderedItemToOpenDocument o n (b:bs) - | OrderedList a l <- b = newLevel a l - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where - go i = ($$) i <$> orderedItemToOpenDocument o n bs - newLevel a l = do - nn <- length <$> gets stParaStyles - ls <- head <$> gets stListStyles - modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) } - inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l -orderedItemToOpenDocument _ _ [] = return empty +orderedItemToOpenDocument :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc +orderedItemToOpenDocument o n bs = vcat <$> mapM go bs + where go (OrderedList a l) = newLevel a l + go (Para l) = inParagraphTagsWithStyle ("P" ++ show n) <$> + inlinesToOpenDocument o l + go b = blockToOpenDocument o b + newLevel a l = do + nn <- length <$> gets stParaStyles + ls <- head <$> gets stListStyles + modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : + drop 1 (stListStyles s) } + inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l isTightList :: [[Block]] -> Bool isTightList [] = False @@ -254,7 +282,8 @@ isTightList (b:_) | Plain {} : _ <- b = True | otherwise = False -newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int) +newOrderedListStyle :: PandocMonad m + => Bool -> ListAttributes -> OD m (Int,Int) newOrderedListStyle b a = do ln <- (+) 1 . length <$> gets stListStyles let nbs = orderedListLevelStyle a (ln, []) @@ -262,7 +291,8 @@ newOrderedListStyle b a = do modify $ \s -> s { stListStyles = nbs : stListStyles s } return (ln,pn) -bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc +bulletListToOpenDocument :: PandocMonad m + => WriterOptions -> [[Block]] -> OD m Doc bulletListToOpenDocument o b = do ln <- (+) 1 . length <$> gets stListStyles (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln @@ -270,48 +300,53 @@ bulletListToOpenDocument o b = do is <- listItemsToOpenDocument ("P" ++ show pn) o b return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is -listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc +listItemsToOpenDocument :: PandocMonad m + => String -> WriterOptions -> [[Block]] -> OD m Doc listItemsToOpenDocument s o is = vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is -deflistItemToOpenDocument :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState Doc +deflistItemToOpenDocument :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc deflistItemToOpenDocument o (t,d) = do let ts = if isTightList d then "Definition_20_Term_20_Tight" else "Definition_20_Term" ds = if isTightList d then "Definition_20_Definition_20_Tight" else "Definition_20_Definition" t' <- withParagraphStyle o ts [Para t] - d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d + d' <- liftM vcat $ mapM (withParagraphStyle o ds . map plainToPara) d return $ t' $$ d' -inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc +inBlockQuote :: PandocMonad m + => WriterOptions -> Int -> [Block] -> OD m Doc inBlockQuote o i (b:bs) | BlockQuote l <- b = do increaseIndent ni <- paraStyle [("style:parent-style-name","Quotations")] go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l - | otherwise = do go =<< blockToOpenDocument o b + | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l + | otherwise = go =<< blockToOpenDocument o b where go block = ($$) block <$> inBlockQuote o i bs inBlockQuote _ _ [] = resetIndent >> return empty -- | Convert a list of Pandoc blocks to OpenDocument. -blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc +blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. -blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc +blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs = figure attr c s t - | Para b <- bs = if null b + | Para b <- bs = if null b && + not (isEnabled Ext_empty_paragraphs o) then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div _ xs <- bs = blocksToOpenDocument o xs + | Div attr xs <- bs = withLangFromAttr attr + (blocksToOpenDocument o xs) | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -324,7 +359,9 @@ blockToOpenDocument o bs [ ("text:style-name", "Horizontal_20_Line") ]) | RawBlock f s <- bs = if f == Format "opendocument" then return $ text s - else return empty + else do + report $ BlockNotRendered bs + return empty | Null <- bs = return empty | otherwise = return empty where @@ -370,17 +407,23 @@ blockToOpenDocument o bs captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] return $ imageDoc $$ captionDoc -colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +colHeadsToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc colHeadsToOpenDocument o tn ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns hs) -tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +tableRowToOpenDocument :: PandocMonad m + => WriterOptions -> String -> [String] -> [[Block]] + -> OD m Doc tableRowToOpenDocument o tn ns cs = inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o tn) (zip ns cs) -tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc +tableItemToOpenDocument :: PandocMonad m + => WriterOptions -> String -> (String,[Block]) + -> OD m Doc tableItemToOpenDocument o tn (n,i) = let a = [ ("table:style-name" , tn ++ ".A1" ) , ("office:value-type", "string" ) @@ -389,10 +432,10 @@ tableItemToOpenDocument o tn (n,i) = withParagraphStyle o n (map plainToPara i) -- | Convert a list of inline elements to OpenDocument. -inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc +inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc inlinesToOpenDocument o l = hcat <$> toChunks o l -toChunks :: WriterOptions -> [Inline] -> State WriterState [Doc] +toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc] toChunks _ [] = return [] toChunks o (x : xs) | isChunkable x = do @@ -407,21 +450,21 @@ toChunks o (x : xs) where (ys, zs) = span isChunkable xs isChunkable :: Inline -> Bool -isChunkable (Str _) = True -isChunkable Space = True +isChunkable (Str _) = True +isChunkable Space = True isChunkable SoftBreak = True -isChunkable _ = False +isChunkable _ = False -- | Convert an inline element to OpenDocument. -inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc +inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc inlineToOpenDocument o ils = case ils of Space -> return space SoftBreak | writerWrapText o == WrapPreserve -> return $ preformatted "\n" - | otherwise -> return $ space - Span _ xs -> inlinesToOpenDocument o xs + | otherwise ->return space + Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs) LineBreak -> return $ selfClosingTag "text:line-break" [] Str s -> return $ handleSpaces $ escapeStringForXML s Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l @@ -432,11 +475,14 @@ inlineToOpenDocument o ils SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l Code _ s -> inlinedCode $ preformatted s - Math t s -> inlinesToOpenDocument o (texMathToInlines t s) + Math t s -> lift (texMathToInlines t s) >>= + inlinesToOpenDocument o Cite _ l -> inlinesToOpenDocument o l RawInline f s -> if f == Format "opendocument" then return $ text s - else return empty + else do + report $ InlineNotRendered ils + return empty Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l Image attr _ (s,t) -> mkImg attr s t Note l -> mkNote l @@ -453,8 +499,6 @@ inlineToOpenDocument o ils let getDims [] = [] getDims (("width", w) :xs) = ("svg:width", w) : getDims xs getDims (("height", h):xs) = ("svg:height", h) : getDims xs - getDims (x@("style:rel-width", _) :xs) = x : getDims xs - getDims (x@("style:rel-height", _):xs) = x : getDims xs getDims (_:xs) = getDims xs return $ inTags False "draw:frame" (("draw:name", "img" ++ show id') : getDims kvs) $ @@ -473,18 +517,18 @@ inlineToOpenDocument o ils addNote nn return nn -bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) -bulletListStyle l = - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,8227,8259] - listElStyle = map doStyles [0..9] - in do pn <- paraListStyle l - return (pn, (l, listElStyle)) +bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc])) +bulletListStyle l = do + let doStyles i = inTags True "text:list-level-style-bullet" + [ ("text:level" , show (i + 1) ) + , ("text:style-name" , "Bullet_20_Symbols") + , ("style:num-suffix", "." ) + , ("text:bullet-char", [bulletList !! i] ) + ] (listLevelStyle (1 + i)) + bulletList = map chr $ cycle [8226,9702,9642] + listElStyle = map doStyles [0..9] + pn <- paraListStyle l + return (pn, (l, listElStyle)) orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) orderedListLevelStyle (s,n, d) (l,ls) = @@ -494,11 +538,11 @@ orderedListLevelStyle (s,n, d) (l,ls) = ,("style:num-suffix", ")")] _ -> [("style:num-suffix", ".")] format = case n of - UpperAlpha -> "A" - LowerAlpha -> "a" - UpperRoman -> "I" - LowerRoman -> "i" - _ -> "1" + UpperAlpha -> "A" + LowerAlpha -> "a" + UpperRoman -> "I" + LowerRoman -> "i" + _ -> "1" listStyle = inTags True "text:list-level-style-number" ([ ("text:level" , show $ 1 + length ls ) , ("text:style-name" , "Numbering_20_Symbols") @@ -509,10 +553,10 @@ orderedListLevelStyle (s,n, d) (l,ls) = listLevelStyle :: Int -> Doc listLevelStyle i = - let indent = show (0.25 * fromIntegral i :: Double) in + let indent = show (0.4 * fromIntegral (i - 1) :: Double) in selfClosingTag "style:list-level-properties" [ ("text:space-before" , indent ++ "in") - , ("text:min-label-width", "0.25in")] + , ("text:min-label-width", "0.4in")] tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = @@ -529,7 +573,7 @@ tableStyle num wcs = [ ("style:name" , tableId ++ "." ++ [c]) , ("style:family", "table-column" )] $ selfClosingTag "style:table-column-properties" - [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))] + [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))] cellStyle = inTags True "style:style" [ ("style:name" , tableId ++ ".A1") , ("style:family", "table-cell" )] $ @@ -538,31 +582,33 @@ tableStyle num wcs = columnStyles = map colStyle wcs in table $$ vcat columnStyles $$ cellStyle -paraStyle :: [(String,String)] -> State WriterState Int +paraStyle :: PandocMonad m => [(String,String)] -> OD m Int paraStyle attrs = do pn <- (+) 1 . length <$> gets stParaStyles - i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double + i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight let styleAttr = [ ("style:name" , "P" ++ show pn) , ("style:family" , "paragraph" )] - indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i + indentVal = flip (++) "in" . show $ if b then max 0.5 i else i tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = if (i /= 0 || b) + indent = if i /= 0 || b then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] else [] attributes = indent ++ tight - paraProps = when (not $ null attributes) $ - selfClosingTag "style:paragraph-properties" attributes + paraProps = if null attributes + then mempty + else selfClosingTag + "style:paragraph-properties" attributes addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn -paraListStyle :: Int -> State WriterState Int +paraListStyle :: PandocMonad m => Int -> OD m Int paraListStyle l = paraStyle [("style:parent-style-name","Text_20_body") ,("style:list-style-name", "L" ++ show l )] @@ -582,7 +628,14 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre +data TextStyle = Italic + | Bold + | Strike + | Sub + | Sup + | SmallC + | Pre + | Language Lang deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] @@ -600,4 +653,18 @@ textStyleAttr s | Pre <- s = [("style:font-name" ,"Courier New") ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] + | Language lang <- s + = [("fo:language" ,langLanguage lang) + ,("fo:country" ,langRegion lang)] | otherwise = [] + +withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a +withLangFromAttr (_,_,kvs) action = + case lookup "lang" kvs of + Nothing -> action + Just l -> + case parseBCP47 l of + Right lang -> withTextStyle (Language lang) action + Left _ -> do + report $ InvalidLang l + action diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 4302459cc..2307204a5 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,8 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>, - and John MacFarlane <jgm@berkeley.edu> + 2010-2018 John MacFarlane <jgm@berkeley.edu> + 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane + Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> + 2010-2018 John MacFarlane <jgm@berkeley.edu> + 2016-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above - Maintainer : Puneeth Chaganti <punchagan@gmail.com> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha Portability : portable @@ -32,64 +34,67 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: <http://orgmode.org> -} -module Text.Pandoc.Writers.Org ( writeOrg) where +module Text.Pandoc.Writers.Org (writeOrg) where +import Control.Monad.State.Strict +import Data.Char (isAlphaNum, toLower) +import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) +import Data.Text (Text) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Pretty +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Data.Char ( isAlphaNum, toLower ) -import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose ) -import Control.Monad.State +import Text.Pandoc.Writers.Shared data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Bool - , stImages :: Bool - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stHasMath :: Bool + , stOptions :: WriterOptions } +type Org = StateT WriterState + -- | Convert Pandoc to Org. -writeOrg :: WriterOptions -> Pandoc -> String -writeOrg opts document = - let st = WriterState { stNotes = [], stLinks = False, - stImages = False, stHasMath = False, +writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeOrg opts document = do + let st = WriterState { stNotes = [], + stHasMath = False, stOptions = opts } - in evalState (pandocToOrg document) st + evalStateT (pandocToOrg document) st -- | Return Org representation of document. -pandocToOrg :: Pandoc -> State WriterState String +pandocToOrg :: PandocMonad m => Pandoc -> Org m Text pandocToOrg (Pandoc meta blocks) = do - opts <- liftM stOptions get + opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToOrg) - (fmap (render colwidth) . inlineListToOrg) + (fmap render' . blockListToOrg) + (fmap render' . inlineListToOrg) meta body <- blockListToOrg blocks - notes <- liftM (reverse . stNotes) get >>= notesToOrg - -- note that the notes may contain refs, so we do them first - hasMath <- liftM stHasMath get - let main = render colwidth $ foldl ($+$) empty $ [body, notes] + notes <- gets (reverse . stNotes) >>= notesToOrg + hasMath <- gets stHasMath + let main = render colwidth . foldl ($+$) empty $ [body, notes] let context = defField "body" main - $ defField "math" hasMath + . defField "math" hasMath $ metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Return Org representation of notes. -notesToOrg :: [[Block]] -> State WriterState Doc +notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= - return . vsep + vsep <$> zipWithM noteToOrg [1..] notes -- | Return Org representation of a note. -noteToOrg :: Int -> [Block] -> State WriterState Doc +noteToOrg :: PandocMonad m => Int -> [Block] -> Org m Doc noteToOrg num note = do contents <- blockListToOrg note let marker = "[fn:" ++ show num ++ "] " @@ -109,8 +114,9 @@ isRawFormat f = f == Format "latex" || f == Format "tex" || f == Format "org" -- | Convert Pandoc block element to Org. -blockToOrg :: Block -- ^ Block element - -> State WriterState Doc +blockToOrg :: PandocMonad m + => Block -- ^ Block element + -> Org m Doc blockToOrg Null = return empty blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do contents <- blockListToOrg bs @@ -123,36 +129,25 @@ blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do blankline $$ contents $$ blankline $$ drawerEndTag $$ blankline -blockToOrg (Div attrs bs) = do +blockToOrg (Div (ident, classes, kv) bs) = do contents <- blockListToOrg bs + -- if one class looks like the name of a greater block then output as such: + -- The ID, if present, is added via the #+NAME keyword; other classes and + -- key-value pairs are kept as #+ATTR_HTML attributes. let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower - return $ case attrs of - ("", [], []) -> - -- nullAttr, treat contents as if it wasn't wrapped - blankline $$ contents $$ blankline - (ident, [], []) -> - -- only an id: add id as an anchor, unwrap the rest - blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline - (ident, classes, kv) -> - -- if one class looks like the name of a greater block then output as - -- such: The ID, if present, is added via the #+NAME keyword; other - -- classes and key-value pairs are kept as #+ATTR_HTML attributes. - let - (blockTypeCand, classes') = partition isGreaterBlockClass classes - in case blockTypeCand of - (blockType:classes'') -> - blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ - "#+BEGIN_" <> text blockType $$ contents $$ - "#+END_" <> text blockType $$ blankline - _ -> - -- fallback: wrap in div tags - let - startTag = tagWithAttrs "div" attrs - endTag = text "</div>" - in blankline $$ "#+BEGIN_HTML" $$ - nest 2 startTag $$ "#+END_HTML" $$ blankline $$ - contents $$ blankline $$ "#+BEGIN_HTML" $$ - nest 2 endTag $$ "#+END_HTML" $$ blankline + (blockTypeCand, classes') = partition isGreaterBlockClass classes + return $ case blockTypeCand of + (blockType:classes'') -> + blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ + "#+BEGIN_" <> text blockType $$ contents $$ + "#+END_" <> text blockType $$ blankline + _ -> + -- fallback with id: add id as an anchor if present, discard classes and + -- key-value pairs, unwrap the content. + let contents' = if not (null ident) + then "<<" <> text ident <> ">>" $$ contents + else contents + in blankline $$ contents' $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do @@ -167,20 +162,22 @@ blockToOrg (Para inlines) = do blockToOrg (LineBlock lns) = do let splitStanza [] = [] splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] + (l, []) -> [l] (l, _:r) -> l : splitStanza r let joinWithLinefeeds = nowrap . mconcat . intersperse cr let joinWithBlankLines = mconcat . intersperse blankline - let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls - contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) + let prettifyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls + contents <- joinWithBlankLines <$> mapM prettifyStanza (splitStanza lns) return $ blankline $$ "#+BEGIN_VERSE" $$ nest 2 contents $$ "#+END_VERSE" <> blankline blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | isRawFormat f = - return $ text str -blockToOrg (RawBlock _ _) = return empty +blockToOrg b@(RawBlock f str) + | isRawFormat f = return $ text str + | otherwise = do + report $ BlockNotRendered b + return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToOrg (Header level attr inlines) = do contents <- inlineListToOrg inlines @@ -190,7 +187,7 @@ blockToOrg (Header level attr inlines) = do else cr <> nest (level + 1) (propertiesDrawer attr) return $ headerStr <> " " <> contents <> drawerStr <> blankline blockToOrg (CodeBlock (_,classes,_) str) = do - opts <- stOptions <$> get + opts <- gets stOptions let tabstop = writerTabStop opts let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers let (beg, end) = case at of @@ -205,7 +202,7 @@ blockToOrg (Table caption' _ _ headers rows) = do caption'' <- inlineListToOrg caption' let caption = if null caption' then empty - else ("#+CAPTION: " <> caption'') + else "#+CAPTION: " <> caption'' headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows let numChars = maximum . map offset @@ -215,16 +212,16 @@ blockToOrg (Table caption' _ _ headers rows) = do -- FIXME: Org doesn't allow blocks with height more than 1. let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") + sep' = lblock 3 $ vcat (replicate h (text " | ")) + beg = lblock 2 $ vcat (replicate h (text "| ")) + end = lblock 2 $ vcat (replicate h (text " |")) middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat . intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' @@ -245,8 +242,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $ - zip markers' items + contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ vcat contents $$ blankline blockToOrg (DefinitionList items) = do @@ -254,25 +250,27 @@ blockToOrg (DefinitionList items) = do return $ vcat contents $$ blankline -- | Convert bullet list item (list of blocks) to Org. -bulletListItemToOrg :: [Block] -> State WriterState Doc +bulletListItemToOrg :: PandocMonad m => [Block] -> Org m Doc bulletListItemToOrg items = do contents <- blockListToOrg items - return $ hang 3 "- " (contents <> cr) + return $ hang 2 "- " (contents <> cr) -- | Convert ordered list item (a list of blocks) to Org. -orderedListItemToOrg :: String -- ^ marker for list item +orderedListItemToOrg :: PandocMonad m + => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> Org m Doc orderedListItemToOrg marker items = do contents <- blockListToOrg items return $ hang (length marker + 1) (text marker <> space) (contents <> cr) -- | Convert defintion list item (label, list of blocks) to Org. -definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc +definitionListItemToOrg :: PandocMonad m + => ([Inline], [[Block]]) -> Org m Doc definitionListItemToOrg (label, defs) = do label' <- inlineListToOrg label - contents <- liftM vcat $ mapM blockListToOrg defs - return $ hang 3 "- " $ label' <> " :: " <> (contents <> cr) + contents <- vcat <$> mapM blockListToOrg defs + return . hang 2 "- " $ label' <> " :: " <> (contents <> cr) -- | Convert list of key/value pairs to Org :PROPERTIES: drawer. propertiesDrawer :: Attr -> Doc @@ -280,8 +278,8 @@ propertiesDrawer (ident, classes, kv) = let drawerStart = text ":PROPERTIES:" drawerEnd = text ":END:" - kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv - kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv' + kv' = if classes == mempty then kv else ("CLASS", unwords classes):kv + kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv' properties = vcat $ map kvToOrgProperty kv'' in drawerStart <> cr <> properties <> cr <> drawerEnd @@ -294,23 +292,37 @@ attrHtml :: Attr -> Doc attrHtml ("" , [] , []) = mempty attrHtml (ident, classes, kvs) = let - name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr + name = if null ident then mempty else "#+NAME: " <> text ident <> cr keyword = "#+ATTR_HTML" classKv = ("class", unwords classes) kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) in name <> keyword <> ": " <> text (unwords kvStrings) <> cr -- | Convert list of Pandoc block elements to Org. -blockListToOrg :: [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat +blockListToOrg :: PandocMonad m + => [Block] -- ^ List of block elements + -> Org m Doc +blockListToOrg blocks = vcat <$> mapM blockToOrg blocks -- | Convert list of Pandoc inline elements to Org. -inlineListToOrg :: [Inline] -> State WriterState Doc -inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat +inlineListToOrg :: PandocMonad m + => [Inline] + -> Org m Doc +inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) + where fixMarkers [] = [] -- prevent note refs and list markers from wrapping, see #4171 + fixMarkers (Space : x : rest) | shouldFix x = + Str " " : x : fixMarkers rest + fixMarkers (SoftBreak : x : rest) | shouldFix x = + Str " " : x : fixMarkers rest + fixMarkers (x : rest) = x : fixMarkers rest + + shouldFix Note{} = True -- Prevent footnotes + shouldFix (Str "-") = True -- Prevent bullet list items + -- TODO: prevent ordered list items + shouldFix _ = False -- | Convert Pandoc inline element to Org. -inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg :: PandocMonad m => Inline -> Org m Doc inlineToOrg (Span (uid, [], []) []) = return $ "<<" <> text uid <> ">>" inlineToOrg (Span _ lst) = @@ -339,50 +351,48 @@ inlineToOrg (Quoted DoubleQuote lst) = do return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return $ text $ escapeString str +inlineToOrg (Str str) = return . text $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath then "$" <> text str <> "$" else "$$" <> text str <> "$$" -inlineToOrg (RawInline f@(Format f') str) = - return $ if isRawFormat f - then text str - else "@@" <> text f' <> ":" <> text str <> "@@" -inlineToOrg (LineBreak) = return (text "\\\\" <> cr) +inlineToOrg il@(RawInline f str) + | isRawFormat f = return $ text str + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToOrg LineBreak = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space -inlineToOrg (Link _ txt (src, _)) = do + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space +inlineToOrg (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - do modify $ \s -> s{ stLinks = True } - return $ "[[" <> text (orgPath x) <> "]]" + return $ "[[" <> text (orgPath x) <> "]]" _ -> do contents <- inlineListToOrg txt - modify $ \s -> s{ stLinks = True } return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]" -inlineToOrg (Image _ _ (source, _)) = do - modify $ \s -> s{ stImages = True } +inlineToOrg (Image _ _ (source, _)) = return $ "[[" <> text (orgPath source) <> "]]" inlineToOrg (Note contents) = do -- add to notes in state - notes <- get >>= (return . stNotes) + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ "[fn:" <> text ref <> "]" orgPath :: String -> String orgPath src = case src of - [] -> mempty -- wiki link - ('#':xs) -> xs -- internal link - _ | isUrl src -> src - _ | isFilePath src -> src - _ -> "file:" <> src + [] -> mempty -- wiki link + ('#':_) -> src -- internal link + _ | isUrl src -> src + _ | isFilePath src -> src + _ -> "file:" <> src where isFilePath :: String -> Bool isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"] diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs new file mode 100644 index 000000000..645a4cb86 --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint.hs @@ -0,0 +1,63 @@ + + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to powerpoint (pptx). -} + +{- +This is a wrapper around two modules: + + - Text.Pandoc.Writers.Powerpoint.Presentation (which converts a + pandoc document into a Presentation datatype), and + + - Text.Pandoc.Writers.Powerpoint.Output (which converts a + Presentation into a zip archive, which can be output). +-} + +module Text.Pandoc.Writers.Powerpoint (writePowerpoint) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Walk +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Writers.Shared (fixDisplayMath) +import Text.Pandoc.Writers.Powerpoint.Presentation (documentToPresentation) +import Text.Pandoc.Writers.Powerpoint.Output (presentationToArchive) +import qualified Data.ByteString.Lazy as BL + +writePowerpoint :: (PandocMonad m) + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m BL.ByteString +writePowerpoint opts (Pandoc meta blks) = do + let blks' = walk fixDisplayMath blks + let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks') + mapM_ report logMsgs + archv <- presentationToArchive opts pres + return $ fromArchive archv diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs new file mode 100644 index 000000000..410b6c20c --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -0,0 +1,1834 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint.Output + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Conversion of Presentation datatype (defined in +Text.Pandoc.Writers.Powerpoint.Presentation) to a zip archive. +-} + +module Text.Pandoc.Writers.Powerpoint.Output ( presentationToArchive + ) where + +import Control.Monad.Except (throwError, catchError) +import Control.Monad.Reader +import Control.Monad.State +import Codec.Archive.Zip +import Data.Char (toUpper) +import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) +import Data.Default +import Text.Pandoc.Compat.Time (formatTime, defaultTimeLocale) +import Data.Time.Clock (UTCTime) +import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) +import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) +import Text.XML.Light +import Text.Pandoc.Definition +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Error (PandocError(..)) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Options +import Text.Pandoc.MIME +import qualified Data.ByteString.Lazy as BL +import Text.Pandoc.Writers.OOXML +import qualified Data.Map as M +import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, isJust, maybeToList, catMaybes) +import Text.Pandoc.ImageSize +import Control.Applicative ((<|>)) +import System.FilePath.Glob +import Text.TeXMath +import Text.Pandoc.Writers.Math (convertMath) +import Text.Pandoc.Writers.Powerpoint.Presentation +import Skylighting (fromColor) + +-- This populates the global ids map with images already in the +-- template, so the ids won't be used by images introduced by the +-- user. +initialGlobalIds :: Archive -> Archive -> M.Map FilePath Int +initialGlobalIds refArchive distArchive = + let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive + mediaPaths = filter (isPrefixOf "ppt/media/image") archiveFiles + + go :: FilePath -> Maybe (FilePath, Int) + go fp = do + s <- stripPrefix "ppt/media/image" $ fst $ splitExtension fp + (n, _) <- listToMaybe $ reads s + return (fp, n) + in + M.fromList $ mapMaybe go mediaPaths + +getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) +getPresentationSize refArchive distArchive = do + entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` + findEntryByPath "ppt/presentation.xml" distArchive + presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + let ns = elemToNameSpaces presElement + sldSize <- findChild (elemName ns "p" "sldSz") presElement + cxS <- findAttr (QName "cx" Nothing Nothing) sldSize + cyS <- findAttr (QName "cy" Nothing Nothing) sldSize + (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) + (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + return (cx `div` 12700, cy `div` 12700) + +data WriterEnv = WriterEnv { envRefArchive :: Archive + , envDistArchive :: Archive + , envUTCTime :: UTCTime + , envOpts :: WriterOptions + , envPresentationSize :: (Integer, Integer) + , envSlideHasHeader :: Bool + , envInList :: Bool + , envInNoteSlide :: Bool + , envCurSlideId :: Int + -- the difference between the number at + -- the end of the slide file name and + -- the rId number + , envSlideIdOffset :: Int + , envContentType :: ContentType + , envSlideIdMap :: M.Map SlideId Int + -- maps the slide number to the + -- corresponding notes id number. If there + -- are no notes for a slide, there will be + -- no entry in the map for it. + , envSpeakerNotesIdMap :: M.Map Int Int + } + deriving (Show) + +instance Default WriterEnv where + def = WriterEnv { envRefArchive = emptyArchive + , envDistArchive = emptyArchive + , envUTCTime = posixSecondsToUTCTime 0 + , envOpts = def + , envPresentationSize = (720, 540) + , envSlideHasHeader = False + , envInList = False + , envInNoteSlide = False + , envCurSlideId = 1 + , envSlideIdOffset = 1 + , envContentType = NormalContent + , envSlideIdMap = mempty + , envSpeakerNotesIdMap = mempty + } + +data ContentType = NormalContent + | TwoColumnLeftContent + | TwoColumnRightContent + deriving (Show, Eq) + +data MediaInfo = MediaInfo { mInfoFilePath :: FilePath + , mInfoLocalId :: Int + , mInfoGlobalId :: Int + , mInfoMimeType :: Maybe MimeType + , mInfoExt :: Maybe String + , mInfoCaption :: Bool + } deriving (Show, Eq) + +data WriterState = WriterState { stLinkIds :: M.Map Int (M.Map Int LinkTarget) + -- (FP, Local ID, Global ID, Maybe Mime) + , stMediaIds :: M.Map Int [MediaInfo] + , stMediaGlobalIds :: M.Map FilePath Int + } deriving (Show, Eq) + +instance Default WriterState where + def = WriterState { stLinkIds = mempty + , stMediaIds = mempty + , stMediaGlobalIds = mempty + } + +type P m = ReaderT WriterEnv (StateT WriterState m) + +runP :: Monad m => WriterEnv -> WriterState -> P m a -> m a +runP env st p = evalStateT (runReaderT p env) st + +-------------------------------------------------------------------- + +copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive +copyFileToArchive arch fp = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + case findEntryByPath fp refArchive `mplus` findEntryByPath fp distArchive of + Nothing -> fail $ fp ++ " missing in reference file" + Just e -> return $ addEntryToArchive e arch + +alwaysInheritedPatterns :: [Pattern] +alwaysInheritedPatterns = + map compile [ "docProps/app.xml" + , "ppt/slideLayouts/slideLayout*.xml" + , "ppt/slideLayouts/_rels/slideLayout*.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/theme/theme1.xml" + , "ppt/theme/_rels/theme1.xml.rels" + , "ppt/presProps.xml" + , "ppt/viewProps.xml" + , "ppt/tableStyles.xml" + , "ppt/media/image*" + ] + +-- We only look for these under special conditions +contingentInheritedPatterns :: Presentation -> [Pattern] +contingentInheritedPatterns pres = [] ++ + if presHasSpeakerNotes pres + then map compile [ "ppt/notesMasters/notesMaster*.xml" + , "ppt/notesMasters/_rels/notesMaster*.xml.rels" + , "ppt/theme/theme2.xml" + , "ppt/theme/_rels/theme2.xml.rels" + ] + else [] + +inheritedPatterns :: Presentation -> [Pattern] +inheritedPatterns pres = + alwaysInheritedPatterns ++ contingentInheritedPatterns pres + +patternToFilePaths :: PandocMonad m => Pattern -> P m [FilePath] +patternToFilePaths pat = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + + let archiveFiles = filesInArchive refArchive `union` filesInArchive distArchive + return $ filter (match pat) archiveFiles + +patternsToFilePaths :: PandocMonad m => [Pattern] -> P m [FilePath] +patternsToFilePaths pats = concat <$> mapM patternToFilePaths pats + +-- Here are the files we'll require to make a Powerpoint document. If +-- any of these are missing, we should error out of our build. +requiredFiles :: [FilePath] +requiredFiles = [ "docProps/app.xml" + , "ppt/presProps.xml" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + , "ppt/tableStyles.xml" + ] + +presentationToArchiveP :: PandocMonad m => Presentation -> P m Archive +presentationToArchiveP p@(Presentation docProps slides) = do + filePaths <- patternsToFilePaths $ inheritedPatterns p + + -- make sure all required files are available: + let missingFiles = filter (\fp -> not (fp `elem` filePaths)) requiredFiles + unless (null missingFiles) + (throwError $ + PandocSomeError $ + "The following required files are missing:\n" ++ + (unlines $ map (" " ++) missingFiles) + ) + + newArch' <- foldM copyFileToArchive emptyArchive filePaths + -- we make a docProps/core.xml entry out of the presentation docprops + docPropsEntry <- docPropsToEntry docProps + -- we make this ourself in case there's something unexpected in the + -- one in the reference doc. + relsEntry <- topLevelRelsEntry + -- presentation entry and rels. We have to do the rels first to make + -- sure we know the correct offset for the rIds. + presEntry <- presentationToPresEntry p + presRelsEntry <- presentationToRelsEntry p + slideEntries <- mapM slideToEntry slides + slideRelEntries <- mapM slideToSlideRelEntry slides + spkNotesEntries <- catMaybes <$> mapM slideToSpeakerNotesEntry slides + spkNotesRelEntries <- catMaybes <$> mapM slideToSpeakerNotesRelEntry slides + -- These have to come after everything, because they need the info + -- built up in the state. + mediaEntries <- makeMediaEntries + contentTypesEntry <- presentationToContentTypes p >>= contentTypesToEntry + -- fold everything into our inherited archive and return it. + return $ foldr addEntryToArchive newArch' $ + slideEntries ++ + slideRelEntries ++ + spkNotesEntries ++ + spkNotesRelEntries ++ + mediaEntries ++ + [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry] + +makeSlideIdMap :: Presentation -> M.Map SlideId Int +makeSlideIdMap (Presentation _ slides) = + M.fromList $ (map slideId slides) `zip` [1..] + +makeSpeakerNotesMap :: Presentation -> M.Map Int Int +makeSpeakerNotesMap (Presentation _ slides) = + M.fromList $ (mapMaybe f $ slides `zip` [1..]) `zip` [1..] + where f (Slide _ _ Nothing, _) = Nothing + f (Slide _ _ (Just _), n) = Just n + +presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive +presentationToArchive opts pres = do + distArchive <- (toArchive . BL.fromStrict) <$> + P.readDefaultDataFile "reference.pptx" + refArchive <- case writerReferenceDoc opts of + Just f -> toArchive <$> P.readFileLazy f + Nothing -> (toArchive . BL.fromStrict) <$> + P.readDataFile "reference.pptx" + + utctime <- P.getCurrentTime + + presSize <- case getPresentationSize refArchive distArchive of + Just sz -> return sz + Nothing -> throwError $ + PandocSomeError $ + "Could not determine presentation size" + + let env = def { envRefArchive = refArchive + , envDistArchive = distArchive + , envUTCTime = utctime + , envOpts = opts + , envPresentationSize = presSize + , envSlideIdMap = makeSlideIdMap pres + , envSpeakerNotesIdMap = makeSpeakerNotesMap pres + } + + let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive + } + + runP env st $ presentationToArchiveP pres + + + +-------------------------------------------------- + +-- Check to see if the presentation has speaker notes. This will +-- influence whether we import the notesMaster template. +presHasSpeakerNotes :: Presentation -> Bool +presHasSpeakerNotes (Presentation _ slides) = any isJust $ map slideSpeakerNotes slides + +curSlideHasSpeakerNotes :: PandocMonad m => P m Bool +curSlideHasSpeakerNotes = do + sldId <- asks envCurSlideId + notesIdMap <- asks envSpeakerNotesIdMap + return $ isJust $ M.lookup sldId notesIdMap + +-------------------------------------------------- + +getLayout :: PandocMonad m => Layout -> P m Element +getLayout layout = do + let layoutpath = case layout of + (MetadataSlide _ _ _ _) -> "ppt/slideLayouts/slideLayout1.xml" + (TitleSlide _) -> "ppt/slideLayouts/slideLayout3.xml" + (ContentSlide _ _) -> "ppt/slideLayouts/slideLayout2.xml" + (TwoColumnSlide _ _ _) -> "ppt/slideLayouts/slideLayout4.xml" + distArchive <- asks envDistArchive + root <- case findEntryByPath layoutpath distArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just element -> return $ element + Nothing -> throwError $ + PandocSomeError $ + layoutpath ++ " corrupt in reference file" + Nothing -> throwError $ + PandocSomeError $ + layoutpath ++ " missing in reference file" + return root + +shapeHasId :: NameSpaces -> String -> Element -> Bool +shapeHasId ns ident element + | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element + , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = + nm == ident + | otherwise = False + +-- The content shape in slideLayout2 (Title/Content) has id=3 In +-- slideLayout4 (two column) the left column is id=3, and the right +-- column is id=4. +getContentShape :: PandocMonad m => NameSpaces -> Element -> P m Element +getContentShape ns spTreeElem + | isElem ns "p" "spTree" spTreeElem = do + contentType <- asks envContentType + let idx = case contentType of + NormalContent -> "1" + TwoColumnLeftContent -> "1" + TwoColumnRightContent -> "2" + case getShapeByPlaceHolderIndex ns spTreeElem idx of + Just e -> return e + Nothing -> throwError $ + PandocSomeError $ + "Could not find shape for Powerpoint content" +getContentShape _ _ = throwError $ + PandocSomeError $ + "Attempted to find content on non shapeTree" + +getShapeDimensions :: NameSpaces + -> Element + -> Maybe ((Integer, Integer), (Integer, Integer)) +getShapeDimensions ns element + | isElem ns "p" "sp" element = do + spPr <- findChild (elemName ns "p" "spPr") element + xfrm <- findChild (elemName ns "a" "xfrm") spPr + off <- findChild (elemName ns "a" "off") xfrm + xS <- findAttr (QName "x" Nothing Nothing) off + yS <- findAttr (QName "y" Nothing Nothing) off + ext <- findChild (elemName ns "a" "ext") xfrm + cxS <- findAttr (QName "cx" Nothing Nothing) ext + cyS <- findAttr (QName "cy" Nothing Nothing) ext + (x, _) <- listToMaybe $ reads xS + (y, _) <- listToMaybe $ reads yS + (cx, _) <- listToMaybe $ reads cxS + (cy, _) <- listToMaybe $ reads cyS + return $ ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) + | otherwise = Nothing + + +getMasterShapeDimensionsById :: String + -> Element + -> Maybe ((Integer, Integer), (Integer, Integer)) +getMasterShapeDimensionsById ident master = do + let ns = elemToNameSpaces master + cSld <- findChild (elemName ns "p" "cSld") master + spTree <- findChild (elemName ns "p" "spTree") cSld + sp <- filterChild (\e -> (isElem ns "p" "sp" e) && (shapeHasId ns ident e)) spTree + getShapeDimensions ns sp + +getContentShapeSize :: PandocMonad m + => NameSpaces + -> Element + -> Element + -> P m ((Integer, Integer), (Integer, Integer)) +getContentShapeSize ns layout master + | isElem ns "p" "sldLayout" layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + sp <- getContentShape ns spTree + case getShapeDimensions ns sp of + Just sz -> return sz + Nothing -> do let mbSz = + findChild (elemName ns "p" "nvSpPr") sp >>= + findChild (elemName ns "p" "cNvPr") >>= + findAttr (QName "id" Nothing Nothing) >>= + flip getMasterShapeDimensionsById master + case mbSz of + Just sz' -> return sz' + Nothing -> throwError $ + PandocSomeError $ + "Couldn't find necessary content shape size" +getContentShapeSize _ _ _ = throwError $ + PandocSomeError $ + "Attempted to find content shape size in non-layout" + +replaceNamedChildren :: NameSpaces + -> String + -> String + -> [Element] + -> Element + -> Element +replaceNamedChildren ns prefix name newKids element = + element { elContent = concat $ fun True $ elContent element } + where + fun :: Bool -> [Content] -> [[Content]] + fun _ [] = [] + fun switch ((Elem e) : conts) | isElem ns prefix name e = + if switch + then (map Elem $ newKids) : fun False conts + else fun False conts + fun switch (cont : conts) = [cont] : fun switch conts + +---------------------------------------------------------------- + +registerLink :: PandocMonad m => LinkTarget -> P m Int +registerLink link = do + curSlideId <- asks envCurSlideId + linkReg <- gets stLinkIds + mediaReg <- gets stMediaIds + hasSpeakerNotes <- curSlideHasSpeakerNotes + let maxLinkId = case M.lookup curSlideId linkReg of + Just mp -> case M.keys mp of + [] -> if hasSpeakerNotes then 2 else 1 + ks -> maximum ks + Nothing -> if hasSpeakerNotes then 2 else 1 + maxMediaId = case M.lookup curSlideId mediaReg of + Just [] -> if hasSpeakerNotes then 2 else 1 + Just mInfos -> maximum $ map mInfoLocalId mInfos + Nothing -> if hasSpeakerNotes then 2 else 1 + maxId = max maxLinkId maxMediaId + slideLinks = case M.lookup curSlideId linkReg of + Just mp -> M.insert (maxId + 1) link mp + Nothing -> M.singleton (maxId + 1) link + modify $ \st -> st{ stLinkIds = M.insert curSlideId slideLinks linkReg} + return $ maxId + 1 + +registerMedia :: PandocMonad m => FilePath -> [ParaElem] -> P m MediaInfo +registerMedia fp caption = do + curSlideId <- asks envCurSlideId + linkReg <- gets stLinkIds + mediaReg <- gets stMediaIds + globalIds <- gets stMediaGlobalIds + hasSpeakerNotes <- curSlideHasSpeakerNotes + let maxLinkId = case M.lookup curSlideId linkReg of + Just mp -> case M.keys mp of + [] -> if hasSpeakerNotes then 2 else 1 + ks -> maximum ks + Nothing -> if hasSpeakerNotes then 2 else 1 + maxMediaId = case M.lookup curSlideId mediaReg of + Just [] -> if hasSpeakerNotes then 2 else 1 + Just mInfos -> maximum $ map mInfoLocalId mInfos + Nothing -> if hasSpeakerNotes then 2 else 1 + maxLocalId = max maxLinkId maxMediaId + + maxGlobalId = case M.elems globalIds of + [] -> 0 + ids -> maximum ids + + (imgBytes, mbMt) <- P.fetchItem fp + let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ '.':x)) + <|> + case imageType imgBytes of + Just Png -> Just ".png" + Just Jpeg -> Just ".jpeg" + Just Gif -> Just ".gif" + Just Pdf -> Just ".pdf" + Just Eps -> Just ".eps" + Just Svg -> Just ".svg" + Just Emf -> Just ".emf" + Nothing -> Nothing + + let newGlobalId = case M.lookup fp globalIds of + Just ident -> ident + Nothing -> maxGlobalId + 1 + + let newGlobalIds = M.insert fp newGlobalId globalIds + + let mediaInfo = MediaInfo { mInfoFilePath = fp + , mInfoLocalId = maxLocalId + 1 + , mInfoGlobalId = newGlobalId + , mInfoMimeType = mbMt + , mInfoExt = imgExt + , mInfoCaption = (not . null) caption + } + + let slideMediaInfos = case M.lookup curSlideId mediaReg of + Just minfos -> mediaInfo : minfos + Nothing -> [mediaInfo] + + + modify $ \st -> st{ stMediaIds = M.insert curSlideId slideMediaInfos mediaReg + , stMediaGlobalIds = newGlobalIds + } + return mediaInfo + +makeMediaEntry :: PandocMonad m => MediaInfo -> P m Entry +makeMediaEntry mInfo = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime + (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + let ext = case mInfoExt mInfo of + Just e -> e + Nothing -> "" + let fp = "ppt/media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext + return $ toEntry fp epochtime $ BL.fromStrict imgBytes + +makeMediaEntries :: PandocMonad m => P m [Entry] +makeMediaEntries = do + mediaInfos <- gets stMediaIds + let allInfos = mconcat $ M.elems mediaInfos + mapM makeMediaEntry allInfos + +-- -- | Scales the image to fit the page +-- -- sizes are passed in emu +-- fitToPage' :: (Double, Double) -- image size in emu +-- -> Integer -- pageWidth +-- -> Integer -- pageHeight +-- -> (Integer, Integer) -- imagesize +-- fitToPage' (x, y) pageWidth pageHeight +-- -- Fixes width to the page width and scales the height +-- | x <= fromIntegral pageWidth && y <= fromIntegral pageHeight = +-- (floor x, floor y) +-- | x / fromIntegral pageWidth > y / fromIntegral pageWidth = +-- (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) +-- | otherwise = +-- (floor $ ((fromIntegral pageHeight) / y) * x, pageHeight) + +-- positionImage :: (Double, Double) -> Integer -> Integer -> (Integer, Integer) +-- positionImage (x, y) pageWidth pageHeight = +-- let (x', y') = fitToPage' (x, y) pageWidth pageHeight +-- in +-- ((pageWidth - x') `div` 2, (pageHeight - y') `div` 2) + +getMaster :: PandocMonad m => P m Element +getMaster = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml" + +-- We want to get the header dimensions, so we can make sure that the +-- image goes underneath it. We only use this in a content slide if it +-- has a header. + +-- getHeaderSize :: PandocMonad m => P m ((Integer, Integer), (Integer, Integer)) +-- getHeaderSize = do +-- master <- getMaster +-- let ns = elemToNameSpaces master +-- sps = [master] >>= +-- findChildren (elemName ns "p" "cSld") >>= +-- findChildren (elemName ns "p" "spTree") >>= +-- findChildren (elemName ns "p" "sp") +-- mbXfrm = +-- listToMaybe (filter (shapeHasName ns "Title Placeholder 1") sps) >>= +-- findChild (elemName ns "p" "spPr") >>= +-- findChild (elemName ns "a" "xfrm") +-- xoff = mbXfrm >>= +-- findChild (elemName ns "a" "off") >>= +-- findAttr (QName "x" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- yoff = mbXfrm >>= +-- findChild (elemName ns "a" "off") >>= +-- findAttr (QName "y" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- xext = mbXfrm >>= +-- findChild (elemName ns "a" "ext") >>= +-- findAttr (QName "cx" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- yext = mbXfrm >>= +-- findChild (elemName ns "a" "ext") >>= +-- findAttr (QName "cy" Nothing Nothing) >>= +-- (listToMaybe . (\s -> reads s :: [(Integer, String)])) +-- off = case xoff of +-- Just (xoff', _) | Just (yoff',_) <- yoff -> (xoff', yoff') +-- _ -> (1043490, 1027664) +-- ext = case xext of +-- Just (xext', _) | Just (yext',_) <- yext -> (xext', yext') +-- _ -> (7024744, 1143000) +-- return $ (off, ext) + +-- Hard-coded for now +-- captionPosition :: ((Integer, Integer), (Integer, Integer)) +-- captionPosition = ((457200, 6061972), (8229600, 527087)) + +captionHeight :: Integer +captionHeight = 40 + +createCaption :: PandocMonad m + => ((Integer, Integer), (Integer, Integer)) + -> [ParaElem] + -> P m Element +createCaption contentShapeDimensions paraElements = do + let para = Paragraph def{pPropAlign = Just AlgnCenter} paraElements + elements <- mapM paragraphToElement [para] + let ((x, y), (cx, cy)) = contentShapeDimensions + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + return $ + mknode "p:sp" [] [ mknode "p:nvSpPr" [] + [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () + , mknode "p:cNvSpPr" [("txBox", "1")] () + , mknode "p:nvPr" [] () + ] + , mknode "p:spPr" [] + [ mknode "a:xfrm" [] + [ mknode "a:off" [("x", show $ 12700 * x), + ("y", show $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", show $ 12700 * cx), + ("cy", show $ 12700 * captionHeight)] () + ] + , mknode "a:prstGeom" [("prst", "rect")] + [ mknode "a:avLst" [] () + ] + , mknode "a:noFill" [] () + ] + , txBody + ] + +makePicElements :: PandocMonad m + => Element + -> PicProps + -> MediaInfo + -> [ParaElem] + -> P m [Element] +makePicElements layout picProps mInfo alt = do + opts <- asks envOpts + (pageWidth, pageHeight) <- asks envPresentationSize + -- hasHeader <- asks envSlideHasHeader + let hasCaption = mInfoCaption mInfo + (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + let (pxX, pxY) = case imageSize opts imgBytes of + Right sz -> sizeInPixels $ sz + Left _ -> sizeInPixels $ def + master <- getMaster + let ns = elemToNameSpaces layout + ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master + `catchError` + (\_ -> return ((0, 0), (pageWidth, pageHeight))) + + let cy = if hasCaption then cytmp - captionHeight else cytmp + + let imgRatio = fromIntegral pxX / fromIntegral pxY :: Double + boxRatio = fromIntegral cx / fromIntegral cy :: Double + (dimX, dimY) = if imgRatio > boxRatio + then (fromIntegral cx, fromIntegral cx / imgRatio) + else (fromIntegral cy * imgRatio, fromIntegral cy) + + (dimX', dimY') = (round dimX * 12700, round dimY * 12700) :: (Integer, Integer) + (xoff, yoff) = (fromIntegral x + (fromIntegral cx - dimX) / 2, + fromIntegral y + (fromIntegral cy - dimY) / 2) + (xoff', yoff') = (round xoff * 12700, round yoff * 12700) :: (Integer, Integer) + + let cNvPicPr = mknode "p:cNvPicPr" [] $ + mknode "a:picLocks" [("noGrp","1") + ,("noChangeAspect","1")] () + -- cNvPr will contain the link information so we do that separately, + -- and register the link if necessary. + let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + cNvPr <- case picPropLink picProps of + Just link -> do idNum <- registerLink link + return $ mknode "p:cNvPr" cNvPrAttr $ + mknode "a:hlinkClick" [("r:id", "rId" ++ show idNum)] () + Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () + let nvPicPr = mknode "p:nvPicPr" [] + [ cNvPr + , cNvPicPr + , mknode "p:nvPr" [] ()] + let blipFill = mknode "p:blipFill" [] + [ mknode "a:blip" [("r:embed", "rId" ++ (show $ mInfoLocalId mInfo))] () + , mknode "a:stretch" [] $ + mknode "a:fillRect" [] () ] + let xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () + , mknode "a:ext" [("cx",show dimX') + ,("cy",show dimY')] () ] + let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + let ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + let spPr = mknode "p:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + + let picShape = mknode "p:pic" [] + [ nvPicPr + , blipFill + , spPr ] + + -- And now, maybe create the caption: + if hasCaption + then do cap <- createCaption ((x, y), (cx, cytmp)) alt + return [picShape, cap] + else return [picShape] + + +paraElemToElement :: PandocMonad m => ParaElem -> P m Element +paraElemToElement Break = return $ mknode "a:br" [] () +paraElemToElement (Run rpr s) = do + let sizeAttrs = case rPropForceSize rpr of + Just n -> [("sz", (show $ n * 100))] + Nothing -> if rPropCode rpr + -- hardcoded size for code for now + then [("sz", "1800")] + else [] + attrs = sizeAttrs ++ + (if rPropBold rpr then [("b", "1")] else []) ++ + (if rPropItalics rpr then [("i", "1")] else []) ++ + (if rPropUnderline rpr then [("u", "sng")] else []) ++ + (case rStrikethrough rpr of + Just NoStrike -> [("strike", "noStrike")] + Just SingleStrike -> [("strike", "sngStrike")] + Just DoubleStrike -> [("strike", "dblStrike")] + Nothing -> []) ++ + (case rBaseline rpr of + Just n -> [("baseline", show n)] + Nothing -> []) ++ + (case rCap rpr of + Just NoCapitals -> [("cap", "none")] + Just SmallCapitals -> [("cap", "small")] + Just AllCapitals -> [("cap", "all")] + Nothing -> []) ++ + [] + linkProps <- case rLink rpr of + Just link -> do + idNum <- registerLink link + -- first we have to make sure that if it's an + -- anchor, it's in the anchor map. If not, there's + -- no link. + return $ case link of + InternalTarget _ -> + let linkAttrs = + [ ("r:id", "rId" ++ show idNum) + , ("action", "ppaction://hlinksldjump") + ] + in [mknode "a:hlinkClick" linkAttrs ()] + -- external + ExternalTarget _ -> + let linkAttrs = + [ ("r:id", "rId" ++ show idNum) + ] + in [mknode "a:hlinkClick" linkAttrs ()] + Nothing -> return [] + let colorContents = case rSolidFill rpr of + Just color -> + case fromColor color of + '#':hx -> [mknode "a:solidFill" [] + [mknode "a:srgbClr" [("val", map toUpper hx)] ()] + ] + _ -> [] + Nothing -> [] + let codeContents = if rPropCode rpr + then [mknode "a:latin" [("typeface", "Courier")] ()] + else [] + let propContents = linkProps ++ colorContents ++ codeContents + return $ mknode "a:r" [] [ mknode "a:rPr" attrs $ propContents + , mknode "a:t" [] s + ] +paraElemToElement (MathElem mathType texStr) = do + res <- convertMath writeOMML mathType (unTeXString texStr) + case res of + Right r -> return $ mknode "a14:m" [] $ addMathInfo r + Left (Str s) -> paraElemToElement (Run def s) + Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" + +-- This is a bit of a kludge -- really requires adding an option to +-- TeXMath, but since that's a different package, we'll do this one +-- step at a time. +addMathInfo :: Element -> Element +addMathInfo element = + let mathspace = Attr { attrKey = (QName "m" Nothing (Just "xmlns")) + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } + in add_attr mathspace element + +-- We look through the element to see if it contains an a14:m +-- element. If so, we surround it. This is a bit ugly, but it seems +-- more dependable than looking through shapes for math. Plus this is +-- an xml implementation detail, so it seems to make sense to do it at +-- the xml level. +surroundWithMathAlternate :: Element -> Element +surroundWithMathAlternate element = + case findElement (QName "m" Nothing (Just "a14")) element of + Just _ -> + mknode "mc:AlternateContent" + [("xmlns:mc", "http://schemas.openxmlformats.org/markup-compatibility/2006") + ] [ mknode "mc:Choice" + [ ("xmlns:a14", "http://schemas.microsoft.com/office/drawing/2010/main") + , ("Requires", "a14")] [ element ] + ] + Nothing -> element + +paragraphToElement :: PandocMonad m => Paragraph -> P m Element +paragraphToElement par = do + let + attrs = [("lvl", show $ pPropLevel $ paraProps par)] ++ + (case pPropMarginLeft (paraProps par) of + Just px -> [("marL", show $ 12700 * px), ("indent", "0")] + Nothing -> [] + ) ++ + (case pPropAlign (paraProps par) of + Just AlgnLeft -> [("algn", "l")] + Just AlgnRight -> [("algn", "r")] + Just AlgnCenter -> [("algn", "ctr")] + Nothing -> [] + ) + props = [] ++ + (case pPropSpaceBefore $ paraProps par of + Just px -> [mknode "a:spcBef" [] [ + mknode "a:spcPts" [("val", show $ 100 * px)] () + ] + ] + Nothing -> [] + ) ++ + (case pPropBullet $ paraProps par of + Just Bullet -> [] + Just (AutoNumbering attrs') -> + [mknode "a:buAutoNum" [("type", autoNumberingToType attrs')] ()] + Nothing -> [mknode "a:buNone" [] ()] + ) + paras <- mapM paraElemToElement (paraElems par) + return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] ++ paras + +shapeToElement :: PandocMonad m => Element -> Shape -> P m Element +shapeToElement layout (TextBox paras) + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + sp <- getContentShape ns spTree + elements <- mapM paragraphToElement paras + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + emptySpPr = mknode "p:spPr" [] () + return $ + surroundWithMathAlternate $ + replaceNamedChildren ns "p" "txBody" [txBody] $ + replaceNamedChildren ns "p" "spPr" [emptySpPr] $ + sp +-- GraphicFrame and Pic should never reach this. +shapeToElement _ _ = return $ mknode "p:sp" [] () + +shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element] +shapeToElements layout (Pic picProps fp alt) = do + mInfo <- registerMedia fp alt + case mInfoExt mInfo of + Just _ -> do + makePicElements layout picProps mInfo alt + Nothing -> shapeToElements layout $ TextBox [Paragraph def alt] +shapeToElements layout (GraphicFrame tbls cptn) = + graphicFrameToElements layout tbls cptn +shapeToElements layout shp = do + element <- shapeToElement layout shp + return [element] + +shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element] +shapesToElements layout shps = do + concat <$> mapM (shapeToElements layout) shps + +graphicFrameToElements :: PandocMonad m => Element -> [Graphic] -> [ParaElem] -> P m [Element] +graphicFrameToElements layout tbls caption = do + -- get the sizing + master <- getMaster + (pageWidth, pageHeight) <- asks envPresentationSize + let ns = elemToNameSpaces layout + ((x, y), (cx, cytmp)) <- getContentShapeSize ns layout master + `catchError` + (\_ -> return ((0, 0), (pageWidth, pageHeight))) + + let cy = if (not $ null caption) then cytmp - captionHeight else cytmp + + elements <- mapM (graphicToElement cx) tbls + let graphicFrameElts = + mknode "p:graphicFrame" [] $ + [ mknode "p:nvGraphicFramePr" [] $ + [ mknode "p:cNvPr" [("id", "6"), ("name", "Content Placeholder 5")] () + , mknode "p:cNvGraphicFramePr" [] $ + [mknode "a:graphicFrameLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [mknode "p:ph" [("idx", "1")] ()] + ] + , mknode "p:xfrm" [] $ + [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () + , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + ] + ] ++ elements + + if (not $ null caption) + then do capElt <- createCaption ((x, y), (cx, cytmp)) caption + return [graphicFrameElts, capElt] + else return [graphicFrameElts] + +getDefaultTableStyle :: PandocMonad m => P m (Maybe String) +getDefaultTableStyle = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst + +graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element +graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do + let colWidths = if null hdrCells + then case rows of + r : _ | not (null r) -> replicate (length r) $ + (tableWidth `div` (toInteger $ length r)) + -- satisfy the compiler. This is the same as + -- saying that rows is empty, but the compiler + -- won't understand that `[]` exhausts the + -- alternatives. + _ -> [] + else replicate (length hdrCells) $ + (tableWidth `div` (toInteger $ length hdrCells)) + + let cellToOpenXML paras = + do elements <- mapM paragraphToElement paras + let elements' = if null elements + then [mknode "a:p" [] [mknode "a:endParaRPr" [] ()]] + else elements + return $ + [mknode "a:txBody" [] $ + ([ mknode "a:bodyPr" [] () + , mknode "a:lstStyle" [] ()] + ++ elements')] + headers' <- mapM cellToOpenXML hdrCells + rows' <- mapM (mapM cellToOpenXML) rows + let borderProps = mknode "a:tcPr" [] () + let emptyCell = [mknode "a:p" [] [mknode "a:pPr" [] ()]] + let mkcell border contents = mknode "a:tc" [] + $ (if null contents + then emptyCell + else contents) ++ [ borderProps | border ] + let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells + + let mkgridcol w = mknode "a:gridCol" + [("w", show ((12700 * w) :: Integer))] () + let hasHeader = not (all null hdrCells) + + mbDefTblStyle <- getDefaultTableStyle + let tblPrElt = mknode "a:tblPr" + [ ("firstRow", if tblPrFirstRow tblPr then "1" else "0") + , ("bandRow", if tblPrBandRow tblPr then "1" else "0") + ] (case mbDefTblStyle of + Nothing -> [] + Just sty -> [mknode "a:tableStyleId" [] sty]) + + return $ mknode "a:graphic" [] $ + [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] $ + [mknode "a:tbl" [] $ + [ tblPrElt + , mknode "a:tblGrid" [] (if all (==0) colWidths + then [] + else map mkgridcol colWidths) + ] + ++ [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' + ] + ] + +getShapeByPlaceHolderType :: NameSpaces -> Element -> String -> Maybe Element +getShapeByPlaceHolderType ns spTreeElem phType + | isElem ns "p" "spTree" spTreeElem = + let findPhType element = isElem ns "p" "sp" element && + Just phType == (Just element >>= + findChild (elemName ns "p" "nvSpPr") >>= + findChild (elemName ns "p" "nvPr") >>= + findChild (elemName ns "p" "ph") >>= + findAttr (QName "type" Nothing Nothing)) + in + filterChild findPhType spTreeElem + | otherwise = Nothing + +getShapeByPlaceHolderIndex :: NameSpaces -> Element -> String -> Maybe Element +getShapeByPlaceHolderIndex ns spTreeElem phIdx + | isElem ns "p" "spTree" spTreeElem = + let findPhType element = isElem ns "p" "sp" element && + Just phIdx == (Just element >>= + findChild (elemName ns "p" "nvSpPr") >>= + findChild (elemName ns "p" "nvPr") >>= + findChild (elemName ns "p" "ph") >>= + findAttr (QName "idx" Nothing Nothing)) + in + filterChild findPhType spTreeElem + | otherwise = Nothing + + +nonBodyTextToElement :: PandocMonad m => Element -> String -> [ParaElem] -> P m Element +nonBodyTextToElement layout phType paraElements + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld + , Just sp <- getShapeByPlaceHolderType ns spTree phType = do + let hdrPara = Paragraph def paraElements + element <- paragraphToElement hdrPara + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ + [element] + return $ replaceNamedChildren ns "p" "txBody" [txBody] sp + -- XXX: TODO + | otherwise = return $ mknode "p:sp" [] () + +contentToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> P m Element +contentToElement layout hdrShape shapes + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" hdrShape + let hdrShapeElements = if null hdrShape + then [] + else [element] + contentElements <- local + (\env -> env {envContentType = NormalContent}) + (shapesToElements layout shapes) + return $ + replaceNamedChildren ns "p" "sp" + (hdrShapeElements ++ contentElements) + spTree +contentToElement _ _ _ = return $ mknode "p:sp" [] () + +twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element +twoColumnToElement layout hdrShape shapesL shapesR + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" hdrShape + let hdrShapeElements = if null hdrShape + then [] + else [element] + contentElementsL <- local + (\env -> env {envContentType =TwoColumnLeftContent}) + (shapesToElements layout shapesL) + contentElementsR <- local + (\env -> env {envContentType =TwoColumnRightContent}) + (shapesToElements layout shapesR) + -- let contentElementsL' = map (setIdx ns "1") contentElementsL + -- contentElementsR' = map (setIdx ns "2") contentElementsR + return $ + replaceNamedChildren ns "p" "sp" + (hdrShapeElements ++ contentElementsL ++ contentElementsR) + spTree +twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () + + +titleToElement :: PandocMonad m => Element -> [ParaElem] -> P m Element +titleToElement layout titleElems + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + element <- nonBodyTextToElement layout "title" titleElems + let titleShapeElements = if null titleElems + then [] + else [element] + return $ replaceNamedChildren ns "p" "sp" titleShapeElements spTree +titleToElement _ _ = return $ mknode "p:sp" [] () + +metadataToElement :: PandocMonad m => Element -> [ParaElem] -> [ParaElem] -> [[ParaElem]] -> [ParaElem] -> P m Element +metadataToElement layout titleElems subtitleElems authorsElems dateElems + | ns <- elemToNameSpaces layout + , Just cSld <- findChild (elemName ns "p" "cSld") layout + , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do + titleShapeElements <- if null titleElems + then return [] + else sequence [nonBodyTextToElement layout "ctrTitle" titleElems] + let combinedAuthorElems = intercalate [Break] authorsElems + subtitleAndAuthorElems = intercalate [Break, Break] [subtitleElems, combinedAuthorElems] + subtitleShapeElements <- if null subtitleAndAuthorElems + then return [] + else sequence [nonBodyTextToElement layout "subTitle" subtitleAndAuthorElems] + dateShapeElements <- if null dateElems + then return [] + else sequence [nonBodyTextToElement layout "dt" dateElems] + return $ replaceNamedChildren ns "p" "sp" + (titleShapeElements ++ subtitleShapeElements ++ dateShapeElements) + spTree +metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () + +slideToElement :: PandocMonad m => Slide -> P m Element +slideToElement (Slide _ l@(ContentSlide hdrElems shapes) _ )= do + layout <- getLayout l + spTree <- local (\env -> if null hdrElems + then env + else env{envSlideHasHeader=True}) $ + contentToElement layout hdrElems shapes + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(TwoColumnSlide hdrElems shapesL shapesR) _) = do + layout <- getLayout l + spTree <- local (\env -> if null hdrElems + then env + else env{envSlideHasHeader=True}) $ + twoColumnToElement layout hdrElems shapesL shapesR + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(TitleSlide hdrElems) _) = do + layout <- getLayout l + spTree <- titleToElement layout hdrElems + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] +slideToElement (Slide _ l@(MetadataSlide titleElems subtitleElems authorElems dateElems) _) = do + layout <- getLayout l + spTree <- metadataToElement layout titleElems subtitleElems authorElems dateElems + return $ mknode "p:sld" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main"), + ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"), + ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [mknode "p:cSld" [] [spTree]] + + +-------------------------------------------------------------------- +-- Notes: + +getNotesMaster :: PandocMonad m => P m Element +getNotesMaster = do + let notesMasterPath = "ppt/notesMasters/notesMaster1.xml" + distArchive <- asks envDistArchive + root <- case findEntryByPath notesMasterPath distArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just element -> return $ element + Nothing -> throwError $ + PandocSomeError $ + notesMasterPath ++ " corrupt in reference file" + Nothing -> throwError $ + PandocSomeError $ + notesMasterPath ++ " missing in reference file" + return root + +getSlideNumberFieldId :: PandocMonad m => Element -> P m String +getSlideNumberFieldId notesMaster + | ns <- elemToNameSpaces notesMaster + , Just cSld <- findChild (elemName ns "p" "cSld") notesMaster + , Just spTree <- findChild (elemName ns "p" "spTree") cSld + , Just sp <- getShapeByPlaceHolderType ns spTree "sldNum" + , Just txBody <- findChild (elemName ns "p" "txBody") sp + , Just p <- findChild (elemName ns "a" "p") txBody + , Just fld <- findChild (elemName ns "a" "fld") p + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = + return fldId + | otherwise = throwError $ + PandocSomeError $ + "No field id for slide numbers in notesMaster.xml" + +speakerNotesSlideImage :: Element +speakerNotesSlideImage = + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "2") + , ("name", "Slide Image Placeholder 1") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [ ("noGrp", "1") + , ("noRot", "1") + , ("noChangeAspect", "1") + ] () + ] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [("type", "sldImg")] ()] + ] + , mknode "p:spPr" [] () + ] + +-- we want to wipe links from the speaker notes in the +-- paragraphs. Powerpoint doesn't allow you to input them, and it +-- would provide extra complications. +removeParaLinks :: Paragraph -> Paragraph +removeParaLinks paragraph = paragraph{paraElems = map f (paraElems paragraph)} + where f (Run rProps s) = Run rProps{rLink=Nothing} s + f pe = pe + +-- put an empty paragraph between paragraphs for more expected spacing. +spaceParas :: [Paragraph] -> [Paragraph] +spaceParas = intersperse (Paragraph def []) + +speakerNotesBody :: PandocMonad m => [Paragraph] -> P m Element +speakerNotesBody paras = do + elements <- mapM paragraphToElement $ spaceParas $ map removeParaLinks paras + let txBody = mknode "p:txBody" [] $ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + return $ + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "3") + , ("name", "Notes Placeholder 2") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [("type", "body"), ("idx", "1")] ()] + ] + , mknode "p:spPr" [] () + , txBody + ] + +speakerNotesSlideNumber :: Int -> String -> Element +speakerNotesSlideNumber pgNum fieldId = + mknode "p:sp" [] $ + [ mknode "p:nvSpPr" [] $ + [ mknode "p:cNvPr" [ ("id", "4") + , ("name", "Slide Number Placeholder 3") + ] () + , mknode "p:cNvSpPr" [] $ + [ mknode "a:spLocks" [("noGrp", "1")] ()] + , mknode "p:nvPr" [] $ + [ mknode "p:ph" [ ("type", "sldNum") + , ("sz", "quarter") + , ("idx", "10") + ] () + ] + ] + , mknode "p:spPr" [] () + , mknode "p:txBody" [] $ + [ mknode "a:bodyPr" [] () + , mknode "a:lstStyle" [] () + , mknode "a:p" [] $ + [ mknode "a:fld" [ ("id", fieldId) + , ("type", "slidenum") + ] + [ mknode "a:rPr" [("lang", "en-US")] () + , mknode "a:t" [] (show pgNum) + ] + , mknode "a:endParaRPr" [("lang", "en-US")] () + ] + ] + ] + +slideToSpeakerNotesElement :: PandocMonad m => Slide -> P m (Maybe Element) +slideToSpeakerNotesElement slide + | Slide _ _ mbNotes <- slide + , Just (SpeakerNotes paras) <- mbNotes = do + master <- getNotesMaster + fieldId <- getSlideNumberFieldId master + num <- slideNum slide + let imgShape = speakerNotesSlideImage + sldNumShape = speakerNotesSlideNumber num fieldId + bodyShape <- speakerNotesBody paras + return $ Just $ + mknode "p:notes" + [ ("xmlns:a", "http://schemas.openxmlformats.org/drawingml/2006/main") + , ("xmlns:r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships") + , ("xmlns:p", "http://schemas.openxmlformats.org/presentationml/2006/main") + ] [ mknode "p:cSld" [] + [ mknode "p:spTree" [] + [ mknode "p:nvGrpSpPr" [] + [ mknode "p:cNvPr" [("id", "1"), ("name", "")] () + , mknode "p:cNvGrpSpPr" [] () + , mknode "p:nvPr" [] () + ] + , mknode "p:grpSpPr" [] + [ mknode "a:xfrm" [] + [ mknode "a:off" [("x", "0"), ("y", "0")] () + , mknode "a:ext" [("cx", "0"), ("cy", "0")] () + , mknode "a:chOff" [("x", "0"), ("y", "0")] () + , mknode "a:chExt" [("cx", "0"), ("cy", "0")] () + ] + ] + , imgShape + , bodyShape + , sldNumShape + ] + ] + ] +slideToSpeakerNotesElement _ = return Nothing + +----------------------------------------------------------------------- + +getSlideIdNum :: PandocMonad m => SlideId -> P m Int +getSlideIdNum sldId = do + slideIdMap <- asks envSlideIdMap + case M.lookup sldId slideIdMap of + Just n -> return n + Nothing -> throwError $ + PandocShouldNeverHappenError $ + "Slide Id " ++ (show sldId) ++ " not found." + +slideNum :: PandocMonad m => Slide -> P m Int +slideNum slide = getSlideIdNum $ slideId slide + +idNumToFilePath :: Int -> FilePath +idNumToFilePath idNum = "slide" ++ (show $ idNum) ++ ".xml" + +slideToFilePath :: PandocMonad m => Slide -> P m FilePath +slideToFilePath slide = do + idNum <- slideNum slide + return $ "slide" ++ (show $ idNum) ++ ".xml" + +slideToRelId :: PandocMonad m => Slide -> P m String +slideToRelId slide = do + n <- slideNum slide + offset <- asks envSlideIdOffset + return $ "rId" ++ (show $ n + offset) + + +data Relationship = Relationship { relId :: Int + , relType :: MimeType + , relTarget :: FilePath + } deriving (Show, Eq) + +elementToRel :: Element -> Maybe Relationship +elementToRel element + | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = + do rId <- findAttr (QName "Id" Nothing Nothing) element + numStr <- stripPrefix "rId" rId + num <- case reads numStr :: [(Int, String)] of + (n, _) : _ -> Just n + [] -> Nothing + type' <- findAttr (QName "Type" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship num type' target + | otherwise = Nothing + +slideToPresRel :: PandocMonad m => Slide -> P m Relationship +slideToPresRel slide = do + idNum <- slideNum slide + n <- asks envSlideIdOffset + let rId = idNum + n + fp = "slides/" ++ idNumToFilePath idNum + return $ Relationship { relId = rId + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" + , relTarget = fp + } + +getRels :: PandocMonad m => P m [Relationship] +getRels = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + relsElem <- parseXml refArchive distArchive "ppt/_rels/presentation.xml.rels" + let globalNS = "http://schemas.openxmlformats.org/package/2006/relationships" + let relElems = findChildren (QName "Relationship" (Just globalNS) Nothing) relsElem + return $ mapMaybe elementToRel relElems + +presentationToRels :: PandocMonad m => Presentation -> P m [Relationship] +presentationToRels pres@(Presentation _ slides) = do + mySlideRels <- mapM slideToPresRel slides + let notesMasterRels = + if presHasSpeakerNotes pres + then [Relationship { relId = length mySlideRels + 2 + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster" + , relTarget = "notesMasters/notesMaster1.xml" + }] + else [] + insertedRels = mySlideRels ++ notesMasterRels + rels <- getRels + -- we remove the slide rels and the notesmaster (if it's + -- there). We'll put these back in ourselves, if necessary. + let relsWeKeep = filter + (\r -> relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" && + relType r /= "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") + rels + -- We want to make room for the slides in the id space. The slides + -- will start at Id2 (since Id1 is for the slide master). There are + -- two slides in the data file, but that might change in the future, + -- so we will do this: + -- + -- 1. We look to see what the minimum relWithoutSlide id (greater than 1) is. + -- 2. We add the difference between this and the number of slides to + -- all relWithoutSlide rels (unless they're 1) + -- 3. If we have a notesmaster slide, we make space for that as well. + + let minRelNotOne = case filter (1<) $ map relId relsWeKeep of + [] -> 0 -- doesn't matter in this case, since + -- there will be nothing to map the + -- function over + l -> minimum l + + modifyRelNum :: Int -> Int + modifyRelNum 1 = 1 + modifyRelNum n = n - minRelNotOne + 2 + length insertedRels + + relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep + + return $ insertedRels ++ relsWeKeep' + +-- We make this ourselves, in case there's a thumbnail in the one from +-- the template. +topLevelRels :: [Relationship] +topLevelRels = + [ Relationship { relId = 1 + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" + , relTarget = "ppt/presentation.xml" + } + , Relationship { relId = 2 + , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" + , relTarget = "docProps/core.xml" + } + , Relationship { relId = 3 + , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties" + , relTarget = "docProps/app.xml" + } + ] + +topLevelRelsEntry :: PandocMonad m => P m Entry +topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels + +relToElement :: Relationship -> Element +relToElement rel = mknode "Relationship" [ ("Id", "rId" ++ (show $ relId rel)) + , ("Type", relType rel) + , ("Target", relTarget rel) ] () + +relsToElement :: [Relationship] -> Element +relsToElement rels = mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + (map relToElement rels) + +presentationToRelsEntry :: PandocMonad m => Presentation -> P m Entry +presentationToRelsEntry pres = do + rels <- presentationToRels pres + elemToEntry "ppt/_rels/presentation.xml.rels" $ relsToElement rels + +elemToEntry :: PandocMonad m => FilePath -> Element -> P m Entry +elemToEntry fp element = do + epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime + return $ toEntry fp epochtime $ renderXml element + +slideToEntry :: PandocMonad m => Slide -> P m Entry +slideToEntry slide = do + idNum <- slideNum slide + local (\env -> env{envCurSlideId = idNum}) $ do + element <- slideToElement slide + elemToEntry ("ppt/slides/" ++ idNumToFilePath idNum) element + +slideToSpeakerNotesEntry :: PandocMonad m => Slide -> P m (Maybe Entry) +slideToSpeakerNotesEntry slide = do + idNum <- slideNum slide + local (\env -> env{envCurSlideId = idNum}) $ do + mbElement <- slideToSpeakerNotesElement slide + mbNotesIdNum <- do mp <- asks envSpeakerNotesIdMap + return $ M.lookup idNum mp + case mbElement of + Just element | Just notesIdNum <- mbNotesIdNum -> + Just <$> + elemToEntry + ("ppt/notesSlides/notesSlide" ++ show notesIdNum ++ ".xml") + element + _ -> return Nothing + +slideToSpeakerNotesRelElement :: PandocMonad m => Slide -> P m (Maybe Element) +slideToSpeakerNotesRelElement slide + | Slide _ _ mbNotes <- slide + , Just _ <- mbNotes = do + idNum <- slideNum slide + return $ Just $ + mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + [ mknode "Relationship" [ ("Id", "rId2") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") + , ("Target", "../slides/slide" ++ show idNum ++ ".xml") + ] () + , mknode "Relationship" [ ("Id", "rId1") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") + , ("Target", "../notesMasters/notesMaster1.xml") + ] () + ] +slideToSpeakerNotesRelElement _ = return Nothing + +slideToSpeakerNotesRelEntry :: PandocMonad m => Slide -> P m (Maybe Entry) +slideToSpeakerNotesRelEntry slide = do + idNum <- slideNum slide + mbElement <- slideToSpeakerNotesRelElement slide + mp <- asks envSpeakerNotesIdMap + let mbNotesIdNum = M.lookup idNum mp + case mbElement of + Just element | Just notesIdNum <- mbNotesIdNum -> + Just <$> + elemToEntry + ("ppt/notesSlides/_rels/notesSlide" ++ show notesIdNum ++ ".xml.rels") + element + _ -> return Nothing + +slideToSlideRelEntry :: PandocMonad m => Slide -> P m Entry +slideToSlideRelEntry slide = do + idNum <- slideNum slide + element <- slideToSlideRelElement slide + elemToEntry ("ppt/slides/_rels/" ++ idNumToFilePath idNum ++ ".rels") element + +linkRelElement :: PandocMonad m => Int -> LinkTarget -> P m Element +linkRelElement rIdNum (InternalTarget targetId) = do + targetIdNum <- getSlideIdNum targetId + return $ + mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") + , ("Target", "slide" ++ show targetIdNum ++ ".xml") + ] () +linkRelElement rIdNum (ExternalTarget (url, _)) = do + return $ + mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") + , ("Target", url) + , ("TargetMode", "External") + ] () + +linkRelElements :: PandocMonad m => M.Map Int LinkTarget -> P m [Element] +linkRelElements mp = mapM (\(n, lnk) -> linkRelElement n lnk) (M.toList mp) + +mediaRelElement :: MediaInfo -> Element +mediaRelElement mInfo = + let ext = case mInfoExt mInfo of + Just e -> e + Nothing -> "" + in + mknode "Relationship" [ ("Id", "rId" ++ (show $ mInfoLocalId mInfo)) + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") + , ("Target", "../media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext) + ] () + +speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) +speakerNotesSlideRelElement slide = do + idNum <- slideNum slide + mp <- asks envSpeakerNotesIdMap + return $ case M.lookup idNum mp of + Nothing -> Nothing + Just n -> + let target = "../notesSlides/notesSlide" ++ show n ++ ".xml" + in Just $ + mknode "Relationship" [ ("Id", "rId2") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") + , ("Target", target) + ] () + +slideToSlideRelElement :: PandocMonad m => Slide -> P m Element +slideToSlideRelElement slide = do + idNum <- slideNum slide + let target = case slide of + (Slide _ (MetadataSlide _ _ _ _) _) -> "../slideLayouts/slideLayout1.xml" + (Slide _ (TitleSlide _) _) -> "../slideLayouts/slideLayout3.xml" + (Slide _ (ContentSlide _ _) _) -> "../slideLayouts/slideLayout2.xml" + (Slide _ (TwoColumnSlide _ _ _) _) -> "../slideLayouts/slideLayout4.xml" + + speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide + + linkIds <- gets stLinkIds + mediaIds <- gets stMediaIds + + linkRels <- case M.lookup idNum linkIds of + Just mp -> linkRelElements mp + Nothing -> return [] + let mediaRels = case M.lookup idNum mediaIds of + Just mInfos -> map mediaRelElement mInfos + Nothing -> [] + + return $ + mknode "Relationships" + [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + ([mknode "Relationship" [ ("Id", "rId1") + , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout") + , ("Target", target)] () + ] ++ speakerNotesRels ++ linkRels ++ mediaRels) + +slideToSldIdElement :: PandocMonad m => Slide -> P m Element +slideToSldIdElement slide = do + n <- slideNum slide + let id' = show $ n + 255 + rId <- slideToRelId slide + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () + +presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element +presentationToSldIdLst (Presentation _ slides) = do + ids <- mapM slideToSldIdElement slides + return $ mknode "p:sldIdLst" [] ids + +presentationToPresentationElement :: PandocMonad m => Presentation -> P m Element +presentationToPresentationElement pres@(Presentation _ slds) = do + refArchive <- asks envRefArchive + distArchive <- asks envDistArchive + element <- parseXml refArchive distArchive "ppt/presentation.xml" + sldIdLst <- presentationToSldIdLst pres + + let modifySldIdLst :: Content -> Content + modifySldIdLst (Elem e) = case elName e of + (QName "sldIdLst" _ _) -> Elem sldIdLst + _ -> Elem e + modifySldIdLst ct = ct + + notesMasterRId = length slds + 2 + + notesMasterElem = mknode "p:notesMasterIdLst" [] + [ mknode + "p:NotesMasterId" + [("r:id", "rId" ++ show notesMasterRId)] + () + ] + + -- if there's a notesMasterIdLst in the presentation.xml file, + -- we want to remove it. We then want to put our own, if + -- necessary, after the slideMasterIdLst element. + + removeNotesMaster' :: Content -> [Content] + removeNotesMaster' (Elem e) = case elName e of + (QName "notesMasterIdLst" _ _) -> [] + _ -> [Elem e] + removeNotesMaster' ct = [ct] + + removeNotesMaster :: [Content] -> [Content] + removeNotesMaster = concatMap removeNotesMaster' + + insertNotesMaster' :: Content -> [Content] + insertNotesMaster' (Elem e) = case elName e of + (QName "sldMasterIdLst" _ _) -> [Elem e, Elem notesMasterElem] + _ -> [Elem e] + insertNotesMaster' ct = [ct] + + insertNotesMaster :: [Content] -> [Content] + insertNotesMaster = if presHasSpeakerNotes pres + then concatMap insertNotesMaster' + else id + + newContent = insertNotesMaster $ + removeNotesMaster $ + map modifySldIdLst $ + elContent element + + return $ element{elContent = newContent} + +presentationToPresEntry :: PandocMonad m => Presentation -> P m Entry +presentationToPresEntry pres = presentationToPresentationElement pres >>= + elemToEntry "ppt/presentation.xml" + +-- adapted from the Docx writer +docPropsElement :: PandocMonad m => DocProps -> P m Element +docPropsElement docProps = do + utctime <- asks envUTCTime + let keywords = case dcKeywords docProps of + Just xs -> intercalate "," xs + Nothing -> "" + return $ + mknode "cp:coreProperties" + [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") + ,("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:dcterms","http://purl.org/dc/terms/") + ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") + ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] + $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps) + : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps) + : (mknode "cp:keywords" [] keywords) + : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + +docPropsToEntry :: PandocMonad m => DocProps -> P m Entry +docPropsToEntry docProps = docPropsElement docProps >>= + elemToEntry "docProps/core.xml" + + +defaultContentTypeToElem :: DefaultContentType -> Element +defaultContentTypeToElem dct = + mknode "Default" + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] + () + +overrideContentTypeToElem :: OverrideContentType -> Element +overrideContentTypeToElem oct = + mknode "Override" + [("PartName", overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] + () + +contentTypesToElement :: ContentTypes -> Element +contentTypesToElement ct = + let ns = "http://schemas.openxmlformats.org/package/2006/content-types" + in + mknode "Types" [("xmlns", ns)] $ + (map defaultContentTypeToElem $ contentTypesDefaults ct) ++ + (map overrideContentTypeToElem $ contentTypesOverrides ct) + +data DefaultContentType = DefaultContentType + { defContentTypesExt :: String + , defContentTypesType:: MimeType + } + deriving (Show, Eq) + +data OverrideContentType = OverrideContentType + { overrideContentTypesPart :: FilePath + , overrideContentTypesType :: MimeType + } + deriving (Show, Eq) + +data ContentTypes = ContentTypes { contentTypesDefaults :: [DefaultContentType] + , contentTypesOverrides :: [OverrideContentType] + } + deriving (Show, Eq) + +contentTypesToEntry :: PandocMonad m => ContentTypes -> P m Entry +contentTypesToEntry ct = elemToEntry "[Content_Types].xml" $ contentTypesToElement ct + +pathToOverride :: FilePath -> Maybe OverrideContentType +pathToOverride fp = OverrideContentType ("/" ++ fp) <$> (getContentType fp) + +mediaFileContentType :: FilePath -> Maybe DefaultContentType +mediaFileContentType fp = case takeExtension fp of + '.' : ext -> Just $ + DefaultContentType { defContentTypesExt = ext + , defContentTypesType = + case getMimeType fp of + Just mt -> mt + Nothing -> "application/octet-stream" + } + _ -> Nothing + +mediaContentType :: MediaInfo -> Maybe DefaultContentType +mediaContentType mInfo + | Just ('.' : ext) <- mInfoExt mInfo = + Just $ DefaultContentType { defContentTypesExt = ext + , defContentTypesType = + case mInfoMimeType mInfo of + Just mt -> mt + Nothing -> "application/octet-stream" + } + | otherwise = Nothing + +getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] +getSpeakerNotesFilePaths = do + mp <- asks envSpeakerNotesIdMap + let notesIdNums = M.elems mp + return $ map (\n -> "ppt/notesSlides/notesSlide" ++ show n ++ ".xml") notesIdNums + +presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes +presentationToContentTypes p@(Presentation _ slides) = do + mediaInfos <- (mconcat . M.elems) <$> gets stMediaIds + filePaths <- patternsToFilePaths $ inheritedPatterns p + let mediaFps = filter (match (compile "ppt/media/image*")) filePaths + let defaults = [ DefaultContentType "xml" "application/xml" + , DefaultContentType "rels" "application/vnd.openxmlformats-package.relationships+xml" + ] + mediaDefaults = nub $ + (mapMaybe mediaContentType $ mediaInfos) ++ + (mapMaybe mediaFileContentType $ mediaFps) + + inheritedOverrides = mapMaybe pathToOverride filePaths + docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"] + presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"] + relativePaths <- mapM slideToFilePath slides + let slideOverrides = mapMaybe + (\fp -> pathToOverride $ "ppt/slides/" ++ fp) + relativePaths + speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths + return $ ContentTypes + (defaults ++ mediaDefaults) + (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides) + +presML :: String +presML = "application/vnd.openxmlformats-officedocument.presentationml" + +noPresML :: String +noPresML = "application/vnd.openxmlformats-officedocument" + +getContentType :: FilePath -> Maybe MimeType +getContentType fp + | fp == "ppt/presentation.xml" = Just $ presML ++ ".presentation.main+xml" + | fp == "ppt/presProps.xml" = Just $ presML ++ ".presProps+xml" + | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml" + | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml" + | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" + | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml" + | "ppt" : "slideMasters" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".slideMaster+xml" + | "ppt" : "slides" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".slide+xml" + | "ppt" : "notesMasters" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".notesMaster+xml" + | "ppt" : "notesSlides" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ presML ++ ".notesSlide+xml" + | "ppt" : "theme" : f : [] <- splitDirectories fp + , (_, ".xml") <- splitExtension f = + Just $ noPresML ++ ".theme+xml" + | "ppt" : "slideLayouts" : _ : [] <- splitDirectories fp= + Just $ presML ++ ".slideLayout+xml" + | otherwise = Nothing + +autoNumberingToType :: ListAttributes -> String +autoNumberingToType (_, numStyle, numDelim) = + typeString ++ delimString + where + typeString = case numStyle of + Decimal -> "arabic" + UpperAlpha -> "alphaUc" + LowerAlpha -> "alphaLc" + UpperRoman -> "romanUc" + LowerRoman -> "romanLc" + _ -> "arabic" + delimString = case numDelim of + Period -> "Period" + OneParen -> "ParenR" + TwoParens -> "ParenBoth" + _ -> "Period" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs new file mode 100644 index 000000000..396469edd --- /dev/null +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -0,0 +1,987 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2017-2018 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Powerpoint.Presentation + Copyright : Copyright (C) 2017-2018 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Definition of Presentation datatype, modeling a MS Powerpoint (pptx) +document, and functions for converting a Pandoc document to +Presentation. +-} + +module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation + , Presentation(..) + , DocProps(..) + , Slide(..) + , Layout(..) + , SpeakerNotes(..) + , SlideId(..) + , Shape(..) + , Graphic(..) + , BulletType(..) + , Algnment(..) + , Paragraph(..) + , ParaElem(..) + , ParaProps(..) + , RunProps(..) + , TableProps(..) + , Strikethrough(..) + , Capitals(..) + , PicProps(..) + , URL + , TeXString(..) + , LinkTarget(..) + ) where + + +import Control.Monad.Reader +import Control.Monad.State +import Data.List (intercalate) +import Data.Default +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Slides (getSlideLevel) +import Text.Pandoc.Options +import Text.Pandoc.Logging +import Text.Pandoc.Walk +import Text.Pandoc.Compat.Time (UTCTime) +import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" +import Text.Pandoc.Writers.Shared (metaValueToInlines) +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Maybe (maybeToList, fromMaybe) +import Text.Pandoc.Highlighting +import qualified Data.Text as T +import Control.Applicative ((<|>)) +import Skylighting + +data WriterEnv = WriterEnv { envMetadata :: Meta + , envRunProps :: RunProps + , envParaProps :: ParaProps + , envSlideLevel :: Int + , envOpts :: WriterOptions + , envSlideHasHeader :: Bool + , envInList :: Bool + , envInNoteSlide :: Bool + , envCurSlideId :: SlideId + , envInSpeakerNotes :: Bool + } + deriving (Show) + +instance Default WriterEnv where + def = WriterEnv { envMetadata = mempty + , envRunProps = def + , envParaProps = def + , envSlideLevel = 2 + , envOpts = def + , envSlideHasHeader = False + , envInList = False + , envInNoteSlide = False + , envCurSlideId = SlideId "Default" + , envInSpeakerNotes = False + } + + +data WriterState = WriterState { stNoteIds :: M.Map Int [Block] + -- associate anchors with slide id + , stAnchorMap :: M.Map String SlideId + , stSlideIdSet :: S.Set SlideId + , stLog :: [LogMessage] + , stSpeakerNotesMap :: M.Map SlideId [[Paragraph]] + } deriving (Show, Eq) + +instance Default WriterState where + def = WriterState { stNoteIds = mempty + , stAnchorMap = mempty + -- we reserve this s + , stSlideIdSet = reservedSlideIds + , stLog = [] + , stSpeakerNotesMap = mempty + } + +metadataSlideId :: SlideId +metadataSlideId = SlideId "Metadata" + +tocSlideId :: SlideId +tocSlideId = SlideId "TOC" + +endNotesSlideId :: SlideId +endNotesSlideId = SlideId "EndNotes" + +reservedSlideIds :: S.Set SlideId +reservedSlideIds = S.fromList [ metadataSlideId + , tocSlideId + , endNotesSlideId + ] + +uniqueSlideId' :: Integer -> S.Set SlideId -> String -> SlideId +uniqueSlideId' n idSet s = + let s' = if n == 0 then s else s ++ "-" ++ show n + in if SlideId s' `S.member` idSet + then uniqueSlideId' (n+1) idSet s + else SlideId s' + +uniqueSlideId :: S.Set SlideId -> String -> SlideId +uniqueSlideId = uniqueSlideId' 0 + +runUniqueSlideId :: String -> Pres SlideId +runUniqueSlideId s = do + idSet <- gets stSlideIdSet + let sldId = uniqueSlideId idSet s + modify $ \st -> st{stSlideIdSet = S.insert sldId idSet} + return sldId + +addLogMessage :: LogMessage -> Pres () +addLogMessage msg = modify $ \st -> st{stLog = msg : stLog st} + +type Pres = ReaderT WriterEnv (State WriterState) + +runPres :: WriterEnv -> WriterState -> Pres a -> (a, [LogMessage]) +runPres env st p = (pres, reverse $ stLog finalSt) + where (pres, finalSt) = runState (runReaderT p env) st + +-- GHC 7.8 will still complain about concat <$> mapM unless we specify +-- Functor. We can get rid of this when we stop supporting GHC 7.8. +concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +concatMapM f xs = liftM concat (mapM f xs) + +type Pixels = Integer + +data Presentation = Presentation DocProps [Slide] + deriving (Show) + +data DocProps = DocProps { dcTitle :: Maybe String + , dcSubject :: Maybe String + , dcCreator :: Maybe String + , dcKeywords :: Maybe [String] + , dcCreated :: Maybe UTCTime + } deriving (Show, Eq) + + +data Slide = Slide { slideId :: SlideId + , slideLayout :: Layout + , slideSpeakerNotes :: Maybe SpeakerNotes + } deriving (Show, Eq) + +newtype SlideId = SlideId String + deriving (Show, Eq, Ord) + +-- In theory you could have anything on a notes slide but it seems +-- designed mainly for one textbox, so we'll just put in the contents +-- of that textbox, to avoid other shapes that won't work as well. +newtype SpeakerNotes = SpeakerNotes {fromSpeakerNotes :: [Paragraph]} + deriving (Show, Eq) + +data Layout = MetadataSlide { metadataSlideTitle :: [ParaElem] + , metadataSlideSubtitle :: [ParaElem] + , metadataSlideAuthors :: [[ParaElem]] + , metadataSlideDate :: [ParaElem] + } + | TitleSlide { titleSlideHeader :: [ParaElem]} + | ContentSlide { contentSlideHeader :: [ParaElem] + , contentSlideContent :: [Shape] + } + | TwoColumnSlide { twoColumnSlideHeader :: [ParaElem] + , twoColumnSlideLeft :: [Shape] + , twoColumnSlideRight :: [Shape] + } + deriving (Show, Eq) + +data Shape = Pic PicProps FilePath [ParaElem] + | GraphicFrame [Graphic] [ParaElem] + | TextBox [Paragraph] + deriving (Show, Eq) + +type Cell = [Paragraph] + +data TableProps = TableProps { tblPrFirstRow :: Bool + , tblPrBandRow :: Bool + } deriving (Show, Eq) + +data Graphic = Tbl TableProps [Cell] [[Cell]] + deriving (Show, Eq) + + +data Paragraph = Paragraph { paraProps :: ParaProps + , paraElems :: [ParaElem] + } deriving (Show, Eq) + +data BulletType = Bullet + | AutoNumbering ListAttributes + deriving (Show, Eq) + +data Algnment = AlgnLeft | AlgnRight | AlgnCenter + deriving (Show, Eq) + +data ParaProps = ParaProps { pPropMarginLeft :: Maybe Pixels + , pPropMarginRight :: Maybe Pixels + , pPropLevel :: Int + , pPropBullet :: Maybe BulletType + , pPropAlign :: Maybe Algnment + , pPropSpaceBefore :: Maybe Pixels + } deriving (Show, Eq) + +instance Default ParaProps where + def = ParaProps { pPropMarginLeft = Just 0 + , pPropMarginRight = Just 0 + , pPropLevel = 0 + , pPropBullet = Nothing + , pPropAlign = Nothing + , pPropSpaceBefore = Nothing + } + +newtype TeXString = TeXString {unTeXString :: String} + deriving (Eq, Show) + +data ParaElem = Break + | Run RunProps String + -- It would be more elegant to have native TeXMath + -- Expressions here, but this allows us to use + -- `convertmath` from T.P.Writers.Math. Will perhaps + -- revisit in the future. + | MathElem MathType TeXString + deriving (Show, Eq) + +data Strikethrough = NoStrike | SingleStrike | DoubleStrike + deriving (Show, Eq) + +data Capitals = NoCapitals | SmallCapitals | AllCapitals + deriving (Show, Eq) + +type URL = String + +data LinkTarget = ExternalTarget (URL, String) + | InternalTarget SlideId + deriving (Show, Eq) + +data RunProps = RunProps { rPropBold :: Bool + , rPropItalics :: Bool + , rStrikethrough :: Maybe Strikethrough + , rBaseline :: Maybe Int + , rCap :: Maybe Capitals + , rLink :: Maybe LinkTarget + , rPropCode :: Bool + , rPropBlockQuote :: Bool + , rPropForceSize :: Maybe Pixels + , rSolidFill :: Maybe Color + -- TODO: Make a full underline data type with + -- the different options. + , rPropUnderline :: Bool + } deriving (Show, Eq) + +instance Default RunProps where + def = RunProps { rPropBold = False + , rPropItalics = False + , rStrikethrough = Nothing + , rBaseline = Nothing + , rCap = Nothing + , rLink = Nothing + , rPropCode = False + , rPropBlockQuote = False + , rPropForceSize = Nothing + , rSolidFill = Nothing + , rPropUnderline = False + } + +data PicProps = PicProps { picPropLink :: Maybe LinkTarget + , picWidth :: Maybe Dimension + , picHeight :: Maybe Dimension + } deriving (Show, Eq) + +instance Default PicProps where + def = PicProps { picPropLink = Nothing + , picWidth = Nothing + , picHeight = Nothing + } + +-------------------------------------------------- + +inlinesToParElems :: [Inline] -> Pres [ParaElem] +inlinesToParElems ils = concatMapM inlineToParElems ils + +inlineToParElems :: Inline -> Pres [ParaElem] +inlineToParElems (Str s) = do + pr <- asks envRunProps + return [Run pr s] +inlineToParElems (Emph ils) = + local (\r -> r{envRunProps = (envRunProps r){rPropItalics=True}}) $ + inlinesToParElems ils +inlineToParElems (Strong ils) = + local (\r -> r{envRunProps = (envRunProps r){rPropBold=True}}) $ + inlinesToParElems ils +inlineToParElems (Strikeout ils) = + local (\r -> r{envRunProps = (envRunProps r){rStrikethrough=Just SingleStrike}}) $ + inlinesToParElems ils +inlineToParElems (Superscript ils) = + local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just 30000}}) $ + inlinesToParElems ils +inlineToParElems (Subscript ils) = + local (\r -> r{envRunProps = (envRunProps r){rBaseline=Just (-25000)}}) $ + inlinesToParElems ils +inlineToParElems (SmallCaps ils) = + local (\r -> r{envRunProps = (envRunProps r){rCap = Just SmallCapitals}}) $ + inlinesToParElems ils +inlineToParElems Space = inlineToParElems (Str " ") +inlineToParElems SoftBreak = inlineToParElems (Str " ") +inlineToParElems LineBreak = return [Break] +inlineToParElems (Link _ ils (url, title)) = + local (\r ->r{envRunProps = (envRunProps r){rLink = Just $ ExternalTarget (url, title)}}) $ + inlinesToParElems ils +inlineToParElems (Code _ str) = + local (\r ->r{envRunProps = (envRunProps r){rPropCode = True}}) $ + inlineToParElems $ Str str +inlineToParElems (Math mathtype str) = + return [MathElem mathtype (TeXString str)] +-- We ignore notes if we're in a speaker notes div. Otherwise this +-- would add an entry to the endnotes slide, which would put speaker +-- notes in the public presentation. In the future, we can entertain a +-- way of adding a speakernotes-specific note that would just add +-- paragraphs to the bottom of the notes page. +inlineToParElems (Note blks) = do + inSpNotes <- asks envInSpeakerNotes + if inSpNotes + then return [] + else do + notes <- gets stNoteIds + let maxNoteId = case M.keys notes of + [] -> 0 + lst -> maximum lst + curNoteId = maxNoteId + 1 + modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } + local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ + inlineToParElems $ Superscript [Str $ show curNoteId] +inlineToParElems (Span _ ils) = concatMapM inlineToParElems ils +inlineToParElems (RawInline _ _) = return [] +inlineToParElems _ = return [] + +isListType :: Block -> Bool +isListType (OrderedList _ _) = True +isListType (BulletList _) = True +isListType (DefinitionList _) = True +isListType _ = False + +registerAnchorId :: String -> Pres () +registerAnchorId anchor = do + anchorMap <- gets stAnchorMap + sldId <- asks envCurSlideId + unless (null anchor) $ + modify $ \st -> st {stAnchorMap = M.insert anchor sldId anchorMap} + +-- Currently hardcoded, until I figure out how to make it dynamic. +blockQuoteSize :: Pixels +blockQuoteSize = 20 + +noteSize :: Pixels +noteSize = 18 + +blockToParagraphs :: Block -> Pres [Paragraph] +blockToParagraphs (Plain ils) = do + parElems <- inlinesToParElems ils + pProps <- asks envParaProps + return [Paragraph pProps parElems] +blockToParagraphs (Para ils) = do + parElems <- inlinesToParElems ils + pProps <- asks envParaProps + return [Paragraph pProps parElems] +blockToParagraphs (LineBlock ilsList) = do + parElems <- inlinesToParElems $ intercalate [LineBreak] ilsList + pProps <- asks envParaProps + return [Paragraph pProps parElems] +-- TODO: work out the attributes +blockToParagraphs (CodeBlock attr str) = + local (\r -> r{ envParaProps = def{pPropMarginLeft = Just 100} + , envRunProps = (envRunProps r){rPropCode = True}}) $ do + mbSty <- writerHighlightStyle <$> asks envOpts + synMap <- writerSyntaxMap <$> asks envOpts + case mbSty of + Just sty -> + case highlight synMap (formatSourceLines sty) attr str of + Right pElems -> do pProps <- asks envParaProps + return [Paragraph pProps pElems] + Left _ -> blockToParagraphs $ Para [Str str] + Nothing -> blockToParagraphs $ Para [Str str] +-- We can't yet do incremental lists, but we should render a +-- (BlockQuote List) as a list to maintain compatibility with other +-- formats. +blockToParagraphs (BlockQuote (blk : blks)) | isListType blk = do + ps <- blockToParagraphs blk + ps' <- blockToParagraphs $ BlockQuote blks + return $ ps ++ ps' +blockToParagraphs (BlockQuote blks) = + local (\r -> r{ envParaProps = (envParaProps r){pPropMarginLeft = Just 100} + , envRunProps = (envRunProps r){rPropForceSize = Just blockQuoteSize}})$ + concatMapM blockToParagraphs blks +-- TODO: work out the format +blockToParagraphs (RawBlock _ _) = return [] +blockToParagraphs (Header _ (ident, _, _) ils) = do + -- Note that this function only deals with content blocks, so it + -- will only touch headers that are above the current slide level -- + -- slides at or below the slidelevel will be taken care of by + -- `blocksToSlide'`. We have the register anchors in both of them. + registerAnchorId ident + -- we set the subeader to bold + parElems <- local (\e->e{envRunProps = (envRunProps e){rPropBold=True}}) $ + inlinesToParElems ils + -- and give it a bit of space before it. + return [Paragraph def{pPropSpaceBefore = Just 30} parElems] +blockToParagraphs (BulletList blksLst) = do + pProps <- asks envParaProps + let lvl = pPropLevel pProps + local (\env -> env{ envInList = True + , envParaProps = pProps{ pPropLevel = lvl + 1 + , pPropBullet = Just Bullet + , pPropMarginLeft = Nothing + }}) $ + concatMapM multiParBullet blksLst +blockToParagraphs (OrderedList listAttr blksLst) = do + pProps <- asks envParaProps + let lvl = pPropLevel pProps + local (\env -> env{ envInList = True + , envParaProps = pProps{ pPropLevel = lvl + 1 + , pPropBullet = Just (AutoNumbering listAttr) + , pPropMarginLeft = Nothing + }}) $ + concatMapM multiParBullet blksLst +blockToParagraphs (DefinitionList entries) = do + let go :: ([Inline], [[Block]]) -> Pres [Paragraph] + go (ils, blksLst) = do + term <-blockToParagraphs $ Para [Strong ils] + -- For now, we'll treat each definition term as a + -- blockquote. We can extend this further later. + definition <- concatMapM (blockToParagraphs . BlockQuote) blksLst + return $ term ++ definition + concatMapM go entries +blockToParagraphs (Div (_, "notes" : [], _) blks) = + local (\env -> env{envInSpeakerNotes=True}) $ do + sldId <- asks envCurSlideId + spkNotesMap <- gets stSpeakerNotesMap + paras <- concatMapM blockToParagraphs blks + let spkNotesMap' = case M.lookup sldId spkNotesMap of + Just lst -> M.insert sldId (paras : lst) spkNotesMap + Nothing -> M.insert sldId [paras] spkNotesMap + modify $ \st -> st{stSpeakerNotesMap = spkNotesMap'} + return [] +blockToParagraphs (Div _ blks) = concatMapM blockToParagraphs blks +blockToParagraphs blk = do + addLogMessage $ BlockNotRendered blk + return [] + +-- Make sure the bullet env gets turned off after the first para. +multiParBullet :: [Block] -> Pres [Paragraph] +multiParBullet [] = return [] +multiParBullet (b:bs) = do + pProps <- asks envParaProps + p <- blockToParagraphs b + ps <- local (\env -> env{envParaProps = pProps{pPropBullet = Nothing}}) $ + concatMapM blockToParagraphs bs + return $ p ++ ps + +cellToParagraphs :: Alignment -> TableCell -> Pres [Paragraph] +cellToParagraphs algn tblCell = do + paras <- mapM blockToParagraphs tblCell + let alignment = case algn of + AlignLeft -> Just AlgnLeft + AlignRight -> Just AlgnRight + AlignCenter -> Just AlgnCenter + AlignDefault -> Nothing + paras' = map (map (\p -> p{paraProps = (paraProps p){pPropAlign = alignment}})) paras + return $ concat paras' + +rowToParagraphs :: [Alignment] -> [TableCell] -> Pres [[Paragraph]] +rowToParagraphs algns tblCells = do + -- We have to make sure we have the right number of alignments + let pairs = zip (algns ++ repeat AlignDefault) tblCells + mapM (uncurry cellToParagraphs) pairs + +withAttr :: Attr -> Shape -> Shape +withAttr attr (Pic picPr url caption) = + let picPr' = picPr { picWidth = dimension Width attr + , picHeight = dimension Height attr + } + in + Pic picPr' url caption +withAttr _ sp = sp + +blockToShape :: Block -> Pres Shape +blockToShape (Plain (il:_)) | Image attr ils (url, _) <- il = + (withAttr attr . Pic def url) <$> inlinesToParElems ils +blockToShape (Para (il:_)) | Image attr ils (url, _) <- il = + (withAttr attr . Pic def url) <$> inlinesToParElems ils +blockToShape (Plain (il:_)) | Link _ (il':_) target <- il + , Image attr ils (url, _) <- il' = + (withAttr attr . Pic def {picPropLink = Just $ ExternalTarget target} url) <$> + inlinesToParElems ils +blockToShape (Para (il:_)) | Link _ (il':_) target <- il + , Image attr ils (url, _) <- il' = + (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} url) <$> + inlinesToParElems ils +blockToShape (Table caption algn _ hdrCells rows) = do + caption' <- inlinesToParElems caption + hdrCells' <- rowToParagraphs algn hdrCells + rows' <- mapM (rowToParagraphs algn) rows + let tblPr = if null hdrCells + then TableProps { tblPrFirstRow = False + , tblPrBandRow = True + } + else TableProps { tblPrFirstRow = True + , tblPrBandRow = True + } + + return $ GraphicFrame [Tbl tblPr hdrCells' rows'] caption' +blockToShape blk = do paras <- blockToParagraphs blk + let paras' = map (\par -> par{paraElems = combineParaElems $ paraElems par}) paras + return $ TextBox paras' + +combineShapes :: [Shape] -> [Shape] +combineShapes [] = [] +combineShapes[s] = [s] +combineShapes (pic@(Pic{}) : ss) = pic : combineShapes ss +combineShapes (TextBox [] : ss) = combineShapes ss +combineShapes (s : TextBox [] : ss) = combineShapes (s : ss) +combineShapes (TextBox (p:ps) : TextBox (p':ps') : ss) = + combineShapes $ TextBox ((p:ps) ++ (p':ps')) : ss +combineShapes (s:ss) = s : combineShapes ss + +blocksToShapes :: [Block] -> Pres [Shape] +blocksToShapes blks = combineShapes <$> mapM blockToShape blks + +isImage :: Inline -> Bool +isImage (Image{}) = True +isImage (Link _ (Image _ _ _ : _) _) = True +isImage _ = False + +splitBlocks' :: [Block] -> [[Block]] -> [Block] -> Pres [[Block]] +splitBlocks' cur acc [] = return $ acc ++ (if null cur then [] else [cur]) +splitBlocks' cur acc (HorizontalRule : blks) = + splitBlocks' [] (acc ++ (if null cur then [] else [cur])) blks +splitBlocks' cur acc (h@(Header n _ _) : blks) = do + slideLevel <- asks envSlideLevel + case compare n slideLevel of + LT -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[h]]) blks + EQ -> splitBlocks' [h] (acc ++ (if null cur then [] else [cur])) blks + GT -> splitBlocks' (cur ++ [h]) acc blks +-- `blockToParagraphs` treats Plain and Para the same, so we can save +-- some code duplication by treating them the same here. +splitBlocks' cur acc (Plain ils : blks) = splitBlocks' cur acc (Para ils : blks) +splitBlocks' cur acc (Para (il:ils) : blks) | isImage il = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] + (acc ++ [cur ++ [Para [il]]]) + (if null ils then blks else Para ils : blks) + _ -> splitBlocks' [] + (acc ++ (if null cur then [] else [cur]) ++ [[Para [il]]]) + (if null ils then blks else Para ils : blks) +splitBlocks' cur acc (tbl@(Table{}) : blks) = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] (acc ++ [cur ++ [tbl]]) blks + _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[tbl]]) blks +splitBlocks' cur acc (d@(Div (_, classes, _) _): blks) | "columns" `elem` classes = do + slideLevel <- asks envSlideLevel + case cur of + [(Header n _ _)] | n == slideLevel -> + splitBlocks' [] (acc ++ [cur ++ [d]]) blks + _ -> splitBlocks' [] (acc ++ (if null cur then [] else [cur]) ++ [[d]]) blks +splitBlocks' cur acc (blk : blks) = splitBlocks' (cur ++ [blk]) acc blks + +splitBlocks :: [Block] -> Pres [[Block]] +splitBlocks = splitBlocks' [] [] + +getSpeakerNotes :: Pres (Maybe SpeakerNotes) +getSpeakerNotes = do + sldId <- asks envCurSlideId + spkNtsMap <- gets stSpeakerNotesMap + return $ (SpeakerNotes . concat . reverse) <$> (M.lookup sldId spkNtsMap) + +blocksToSlide' :: Int -> [Block] -> Pres Slide +blocksToSlide' lvl (Header n (ident, _, _) ils : blks) + | n < lvl = do + registerAnchorId ident + sldId <- asks envCurSlideId + hdr <- inlinesToParElems ils + return $ Slide sldId TitleSlide {titleSlideHeader = hdr} Nothing + | n == lvl = do + registerAnchorId ident + hdr <- inlinesToParElems ils + -- Now get the slide without the header, and then add the header + -- in. + slide <- blocksToSlide' lvl blks + let layout = case slideLayout slide of + ContentSlide _ cont -> ContentSlide hdr cont + TwoColumnSlide _ contL contR -> TwoColumnSlide hdr contL contR + layout' -> layout' + return $ slide{slideLayout = layout} +blocksToSlide' _ (blk : blks) + | Div (_, classes, _) divBlks <- blk + , "columns" `elem` classes + , Div (_, clsL, _) blksL : Div (_, clsR, _) blksR : remaining <- divBlks + , "column" `elem` clsL, "column" `elem` clsR = do + unless (null blks) + (mapM (addLogMessage . BlockNotRendered) blks >> return ()) + unless (null remaining) + (mapM (addLogMessage . BlockNotRendered) remaining >> return ()) + mbSplitBlksL <- splitBlocks blksL + mbSplitBlksR <- splitBlocks blksR + let blksL' = case mbSplitBlksL of + bs : _ -> bs + [] -> [] + let blksR' = case mbSplitBlksR of + bs : _ -> bs + [] -> [] + shapesL <- blocksToShapes blksL' + shapesR <- blocksToShapes blksR' + sldId <- asks envCurSlideId + return $ Slide + sldId + TwoColumnSlide { twoColumnSlideHeader = [] + , twoColumnSlideLeft = shapesL + , twoColumnSlideRight = shapesR + } + Nothing +blocksToSlide' _ (blk : blks) = do + inNoteSlide <- asks envInNoteSlide + shapes <- if inNoteSlide + then forceFontSize noteSize $ blocksToShapes (blk : blks) + else blocksToShapes (blk : blks) + sldId <- asks envCurSlideId + return $ + Slide + sldId + ContentSlide { contentSlideHeader = [] + , contentSlideContent = shapes + } + Nothing +blocksToSlide' _ [] = do + sldId <- asks envCurSlideId + return $ + Slide + sldId + ContentSlide { contentSlideHeader = [] + , contentSlideContent = [] + } + Nothing + +blocksToSlide :: [Block] -> Pres Slide +blocksToSlide blks = do + slideLevel <- asks envSlideLevel + sld <- blocksToSlide' slideLevel blks + spkNotes <- getSpeakerNotes + return $ sld{slideSpeakerNotes = spkNotes} + +makeNoteEntry :: Int -> [Block] -> [Block] +makeNoteEntry n blks = + let enum = Str (show n ++ ".") + in + case blks of + (Para ils : blks') -> (Para $ enum : Space : ils) : blks' + _ -> Para [enum] : blks + +forceFontSize :: Pixels -> Pres a -> Pres a +forceFontSize px x = do + rpr <- asks envRunProps + local (\r -> r {envRunProps = rpr{rPropForceSize = Just px}}) x + +-- We leave these as blocks because we will want to include them in +-- the TOC. +makeEndNotesSlideBlocks :: Pres [Block] +makeEndNotesSlideBlocks = do + noteIds <- gets stNoteIds + slideLevel <- asks envSlideLevel + meta <- asks envMetadata + -- Get identifiers so we can give the notes section a unique ident. + anchorSet <- M.keysSet <$> gets stAnchorMap + if M.null noteIds + then return [] + else do let title = case lookupMeta "notes-title" meta of + Just val -> metaValueToInlines val + Nothing -> [Str "Notes"] + ident = Shared.uniqueIdent title anchorSet + hdr = Header slideLevel (ident, [], []) title + blks <- return $ + concatMap (\(n, bs) -> makeNoteEntry n bs) $ + M.toList noteIds + return $ hdr : blks + +getMetaSlide :: Pres (Maybe Slide) +getMetaSlide = do + meta <- asks envMetadata + title <- inlinesToParElems $ docTitle meta + subtitle <- inlinesToParElems $ + case lookupMeta "subtitle" meta of + Just (MetaString s) -> [Str s] + Just (MetaInlines ils) -> ils + Just (MetaBlocks [Plain ils]) -> ils + Just (MetaBlocks [Para ils]) -> ils + _ -> [] + authors <- mapM inlinesToParElems $ docAuthors meta + date <- inlinesToParElems $ docDate meta + if null title && null subtitle && null authors && null date + then return Nothing + else return $ + Just $ + Slide + metadataSlideId + MetadataSlide { metadataSlideTitle = title + , metadataSlideSubtitle = subtitle + , metadataSlideAuthors = authors + , metadataSlideDate = date + } + Nothing + +-- adapted from the markdown writer +elementToListItem :: Shared.Element -> Pres [Block] +elementToListItem (Shared.Sec lev _nums (ident,_,_) headerText subsecs) = do + opts <- asks envOpts + let headerLink = if null ident + then walk Shared.deNote headerText + else [Link nullAttr (walk Shared.deNote headerText) + ('#':ident, "")] + listContents <- if null subsecs || lev >= writerTOCDepth opts + then return [] + else mapM elementToListItem subsecs + return [Plain headerLink, BulletList listContents] +elementToListItem (Shared.Blk _) = return [] + +makeTOCSlide :: [Block] -> Pres Slide +makeTOCSlide blks = local (\env -> env{envCurSlideId = tocSlideId}) $ do + contents <- BulletList <$> mapM elementToListItem (Shared.hierarchicalize blks) + meta <- asks envMetadata + slideLevel <- asks envSlideLevel + let tocTitle = case lookupMeta "toc-title" meta of + Just val -> metaValueToInlines val + Nothing -> [Str "Table of Contents"] + hdr = Header slideLevel nullAttr tocTitle + sld <- blocksToSlide [hdr, contents] + return sld + +combineParaElems' :: Maybe ParaElem -> [ParaElem] -> [ParaElem] +combineParaElems' mbPElem [] = maybeToList mbPElem +combineParaElems' Nothing (pElem : pElems) = + combineParaElems' (Just pElem) pElems +combineParaElems' (Just pElem') (pElem : pElems) + | Run rPr' s' <- pElem' + , Run rPr s <- pElem + , rPr == rPr' = + combineParaElems' (Just $ Run rPr' $ s' ++ s) pElems + | otherwise = + pElem' : combineParaElems' (Just pElem) pElems + +combineParaElems :: [ParaElem] -> [ParaElem] +combineParaElems = combineParaElems' Nothing + +applyToParagraph :: Monad m => (ParaElem -> m ParaElem) -> Paragraph -> m Paragraph +applyToParagraph f para = do + paraElems' <- mapM f $ paraElems para + return $ para {paraElems = paraElems'} + +applyToShape :: Monad m => (ParaElem -> m ParaElem) -> Shape -> m Shape +applyToShape f (Pic pPr fp pes) = do + pes' <- mapM f pes + return $ Pic pPr fp pes' +applyToShape f (GraphicFrame gfx pes) = do + pes' <- mapM f pes + return $ GraphicFrame gfx pes' +applyToShape f (TextBox paras) = do + paras' <- mapM (applyToParagraph f) paras + return $ TextBox paras' + +applyToLayout :: Monad m => (ParaElem -> m ParaElem) -> Layout -> m Layout +applyToLayout f (MetadataSlide title subtitle authors date) = do + title' <- mapM f title + subtitle' <- mapM f subtitle + authors' <- mapM (mapM f) authors + date' <- mapM f date + return $ MetadataSlide title' subtitle' authors' date' +applyToLayout f (TitleSlide title) = do + title' <- mapM f title + return $ TitleSlide title' +applyToLayout f (ContentSlide hdr content) = do + hdr' <- mapM f hdr + content' <- mapM (applyToShape f) content + return $ ContentSlide hdr' content' +applyToLayout f (TwoColumnSlide hdr contentL contentR) = do + hdr' <- mapM f hdr + contentL' <- mapM (applyToShape f) contentL + contentR' <- mapM (applyToShape f) contentR + return $ TwoColumnSlide hdr' contentL' contentR' + +applyToSlide :: Monad m => (ParaElem -> m ParaElem) -> Slide -> m Slide +applyToSlide f slide = do + layout' <- applyToLayout f $ slideLayout slide + mbNotes' <- case slideSpeakerNotes slide of + Just (SpeakerNotes notes) -> (Just . SpeakerNotes) <$> + mapM (applyToParagraph f) notes + Nothing -> return Nothing + return slide{slideLayout = layout', slideSpeakerNotes = mbNotes'} + +replaceAnchor :: ParaElem -> Pres ParaElem +replaceAnchor (Run rProps s) + | Just (ExternalTarget ('#':anchor, _)) <- rLink rProps = do + anchorMap <- gets stAnchorMap + -- If the anchor is not in the anchormap, we just remove the + -- link. + let rProps' = case M.lookup anchor anchorMap of + Just n -> rProps{rLink = Just $ InternalTarget n} + Nothing -> rProps{rLink = Nothing} + return $ Run rProps' s +replaceAnchor pe = return pe + +emptyParaElem :: ParaElem -> Bool +emptyParaElem (Run _ s) = + null $ Shared.trim s +emptyParaElem (MathElem _ ts) = + null $ Shared.trim $ unTeXString ts +emptyParaElem _ = False + +emptyParagraph :: Paragraph -> Bool +emptyParagraph para = all emptyParaElem $ paraElems para + + +emptyShape :: Shape -> Bool +emptyShape (TextBox paras) = all emptyParagraph $ paras +emptyShape _ = False + +emptyLayout :: Layout -> Bool +emptyLayout layout = case layout of + MetadataSlide title subtitle authors date -> + all emptyParaElem title && + all emptyParaElem subtitle && + all (all emptyParaElem) authors && + all emptyParaElem date + TitleSlide hdr -> all emptyParaElem hdr + ContentSlide hdr shapes -> + all emptyParaElem hdr && + all emptyShape shapes + TwoColumnSlide hdr shapes1 shapes2 -> + all emptyParaElem hdr && + all emptyShape shapes1 && + all emptyShape shapes2 + +emptySlide :: Slide -> Bool +emptySlide (Slide _ layout Nothing) = emptyLayout layout +emptySlide _ = False + +blocksToPresentationSlides :: [Block] -> Pres [Slide] +blocksToPresentationSlides blks = do + opts <- asks envOpts + metadataslides <- maybeToList <$> getMetaSlide + -- As far as I can tell, if we want to have a variable-length toc in + -- the future, we'll have to make it twice. Once to get the length, + -- and a second time to include the notes slide. We can't make the + -- notes slide before the body slides because we need to know if + -- there are notes, and we can't make either before the toc slide, + -- because we need to know its length to get slide numbers right. + -- + -- For now, though, since the TOC slide is only length 1, if it + -- exists, we'll just get the length, and then come back to make the + -- slide later + blksLst <- splitBlocks blks + bodySlideIds <- mapM + (\n -> runUniqueSlideId $ "BodySlide" ++ show n) + (take (length blksLst) [1..] :: [Integer]) + bodyslides <- mapM + (\(bs, ident) -> + local (\st -> st{envCurSlideId = ident}) (blocksToSlide bs)) + (zip blksLst bodySlideIds) + endNotesSlideBlocks <- makeEndNotesSlideBlocks + -- now we come back and make the real toc... + tocSlides <- if writerTableOfContents opts + then do toc <- makeTOCSlide $ blks ++ endNotesSlideBlocks + return [toc] + else return [] + -- ... and the notes slide. We test to see if the blocks are empty, + -- because we don't want to make an empty slide. + endNotesSlides <- if null endNotesSlideBlocks + then return [] + else do endNotesSlide <- local + (\env -> env { envCurSlideId = endNotesSlideId + , envInNoteSlide = True + }) + (blocksToSlide endNotesSlideBlocks) + return [endNotesSlide] + + let slides = metadataslides ++ tocSlides ++ bodyslides ++ endNotesSlides + slides' = filter (not . emptySlide) slides + mapM (applyToSlide replaceAnchor) slides' + +metaToDocProps :: Meta -> DocProps +metaToDocProps meta = + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> Just $ map Shared.stringify xs + _ -> Nothing + + authors = case map Shared.stringify $ docAuthors meta of + [] -> Nothing + ss -> Just $ intercalate ";" ss + in + DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta + , dcSubject = Shared.stringify <$> lookupMeta "subject" meta + , dcCreator = authors + , dcKeywords = keywords + , dcCreated = Nothing + } + +documentToPresentation :: WriterOptions + -> Pandoc + -> (Presentation, [LogMessage]) +documentToPresentation opts (Pandoc meta blks) = + let env = def { envOpts = opts + , envMetadata = meta + , envSlideLevel = fromMaybe (getSlideLevel blks) (writerSlideLevel opts) + } + (presSlides, msgs) = runPres env def $ blocksToPresentationSlides blks + docProps = metaToDocProps meta + in + (Presentation docProps presSlides, msgs) + +-- -------------------------------------------------------------- + +applyTokStyToRunProps :: TokenStyle -> RunProps -> RunProps +applyTokStyToRunProps tokSty rProps = + rProps{ rSolidFill = tokenColor tokSty <|> rSolidFill rProps + , rPropBold = tokenBold tokSty || rPropBold rProps + , rPropItalics = tokenItalic tokSty || rPropItalics rProps + , rPropUnderline = tokenUnderline tokSty || rPropUnderline rProps + } + +formatToken :: Style -> Token -> ParaElem +formatToken sty (tokType, txt) = + let rProps = def{rPropCode = True, rSolidFill = defaultColor sty} + rProps' = case M.lookup tokType (tokenStyles sty) of + Just tokSty -> applyTokStyToRunProps tokSty rProps + Nothing -> rProps + in + Run rProps' $ T.unpack txt + +formatSourceLine :: Style -> FormatOptions -> SourceLine -> [ParaElem] +formatSourceLine sty _ srcLn = map (formatToken sty) srcLn + +formatSourceLines :: Style -> FormatOptions -> [SourceLine] -> [ParaElem] +formatSourceLines sty opts srcLns = intercalate [Break] $ + map (formatSourceLine sty opts) srcLns diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 064434483..95cb46643 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,19 +31,21 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST ) where +import Control.Monad.State.Strict +import Data.Char (isSpace, toLower) +import Data.List (isPrefixOf, stripPrefix) +import Data.Maybe (fromMaybe) +import Data.Text (Text, stripEnd) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.ImageSize import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Builder (deleteMeta) -import Data.Maybe (fromMaybe) -import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose ) -import Network.URI (isURI) -import Text.Pandoc.Pretty -import Control.Monad.State -import Data.Char (isSpace, toLower) +import Text.Pandoc.Writers.Shared type Refs = [([Inline], Target)] @@ -57,95 +59,99 @@ data WriterState = , stTopLevel :: Bool } +type RST = StateT WriterState + -- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = +writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeRST opts document = do let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stHasRawTeX = False, stOptions = opts, - stTopLevel = True} - in evalState (pandocToRST document) st + stTopLevel = True } + evalStateT (pandocToRST document) st -- | Return RST representation of document. -pandocToRST :: Pandoc -> State WriterState String +pandocToRST :: PandocMonad m => Pandoc -> RST m Text pandocToRST (Pandoc meta blocks) = do - opts <- liftM stOptions get + opts <- gets stOptions let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + let render' :: Doc -> Text + render' = render colwidth let subtit = case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> xs - _ -> [] + _ -> [] title <- titleToRST (docTitle meta) subtit metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToRST) - (fmap (trimr . render colwidth) . inlineListToRST) - $ deleteMeta "title" $ deleteMeta "subtitle" meta + (fmap render' . blockListToRST) + (fmap (stripEnd . render') . inlineListToRST) + $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta body <- blockListToRST' True $ case writerTemplate opts of Just _ -> normalizeHeadings 1 blocks Nothing -> blocks - notes <- liftM (reverse . stNotes) get >>= notesToRST + notes <- gets (reverse . stNotes) >>= notesToRST -- note that the notes may contain refs, so we do them first - refs <- liftM (reverse . stLinks) get >>= refsToRST - pics <- liftM (reverse . stImages) get >>= pictRefsToRST - hasMath <- liftM stHasMath get - rawTeX <- liftM stHasRawTeX get - let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] + refs <- gets (reverse . stLinks) >>= refsToRST + pics <- gets (reverse . stImages) >>= pictRefsToRST + hasMath <- gets stHasMath + rawTeX <- gets stHasRawTeX + let main = render' $ foldl ($+$) empty [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath - $ defField "rawtex" rawTeX - $ metadata + $ defField "rawtex" rawTeX metadata case writerTemplate opts of Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context where normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' where (cont,bs') = break (headerLtEq l) bs headerLtEq level (Header l' _ _) = l' <= level - headerLtEq _ _ = False + headerLtEq _ _ = False normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. -refsToRST :: Refs -> State WriterState Doc +refsToRST :: PandocMonad m => Refs -> RST m Doc refsToRST refs = mapM keyToRST refs >>= return . vcat -- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) - -> State WriterState Doc +keyToRST :: PandocMonad m => ([Inline], (String, String)) -> RST m Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` ((render Nothing label') :: String) + let label'' = if ':' `elem` (render Nothing label' :: String) then char '`' <> label' <> char '`' else label' return $ nowrap $ ".. _" <> label'' <> ": " <> text src -- | Return RST representation of notes. -notesToRST :: [[Block]] -> State WriterState Doc +notesToRST :: PandocMonad m => [[Block]] -> RST m Doc notesToRST notes = - mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= + zipWithM noteToRST [1..] notes >>= return . vsep -- | Return RST representation of a note. -noteToRST :: Int -> [Block] -> State WriterState Doc +noteToRST :: PandocMonad m => Int -> [Block] -> RST m Doc noteToRST num note = do contents <- blockListToRST note let marker = ".. [" <> text (show num) <> "]" return $ nowrap $ marker $$ nest 3 contents -- | Return RST representation of picture reference table. -pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))] - -> State WriterState Doc +pictRefsToRST :: PandocMonad m + => [([Inline], (Attr, String, String, Maybe String))] + -> RST m Doc pictRefsToRST refs = mapM pictToRST refs >>= return . vcat -- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (Attr, String, String, Maybe String)) - -> State WriterState Doc +pictToRST :: PandocMonad m + => ([Inline], (Attr, String, String, Maybe String)) + -> RST m Doc pictToRST (label, (attr, src, _, mbtarget)) = do label' <- inlineListToRST label dims <- imageDimsToRST attr @@ -160,10 +166,27 @@ pictToRST (label, (attr, src, _, mbtarget)) = do Just t -> " :target: " <> text t -- | Escape special characters for RST. -escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "`\\|*_") +escapeString :: WriterOptions -> String -> String +escapeString = escapeString' True + where + escapeString' _ _ [] = [] + escapeString' firstChar opts (c:cs) = + case c of + _ | c `elem` ['\\','`','*','_','|'] && + (firstChar || null cs) -> '\\':c:escapeString' False opts cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString' False opts cs + _ -> '-':escapeString' False opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest + _ -> '.':escapeString' False opts cs + _ -> c : escapeString' False opts cs -titleToRST :: [Inline] -> [Inline] -> State WriterState Doc +titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m Doc titleToRST [] _ = return empty titleToRST tit subtit = do title <- inlineListToRST tit @@ -179,8 +202,9 @@ bordered contents c = border = text (replicate len c) -- | Convert Pandoc block element to RST. -blockToRST :: Block -- ^ Block element - -> State WriterState Doc +blockToRST :: PandocMonad m + => Block -- ^ Block element + -> RST m Doc blockToRST Null = return empty blockToRST (Div attr bs) = do contents <- blockListToRST bs @@ -200,7 +224,7 @@ blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do else ":figclass: " <> text (unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) - | LineBreak `elem` inlines = do -- use line block if LineBreaks + | LineBreak `elem` inlines = linesToLineBlock $ splitBy (==LineBreak) inlines | otherwise = do contents <- inlineListToRST inlines @@ -211,17 +235,22 @@ blockToRST (RawBlock f@(Format f') str) | f == "rst" = return $ text str | otherwise = return $ blankline <> ".. raw:: " <> text (map toLower f') $+$ - (nest 3 $ text str) $$ blankline + nest 3 (text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines + -- we calculate the id that would be used by auto_identifiers + -- so we know whether to print an explicit identifier + let autoId = uniqueIdent inlines mempty isTopLevel <- gets stTopLevel if isTopLevel then do let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + let anchor | null name || name == autoId = empty + | otherwise = ".. _" <> text name <> ":" $$ blankline + return $ nowrap $ anchor $$ contents $$ border $$ blankline else do let rub = "rubric:: " <> contents let name' | null name = empty @@ -230,7 +259,7 @@ blockToRST (Header level (name,classes,_) inlines) = do | otherwise = ":class: " <> text (unwords classes) return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do - opts <- stOptions <$> get + opts <- gets stOptions let tabstop = writerTabStop opts let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes @@ -246,59 +275,38 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do (lang:_) -> (".. code:: " <> text lang) $$ numberlines) $+$ nest tabstop (text str) $$ blankline blockToRST (BlockQuote blocks) = do - tabstop <- get >>= (return . writerTabStop . stOptions) + tabstop <- gets $ writerTabStop . stOptions contents <- blockListToRST blocks - return $ (nest tabstop contents) <> blankline -blockToRST (Table caption _ widths headers rows) = do + return $ nest tabstop contents <> blankline +blockToRST (Table caption aligns widths headers rows) = do caption' <- inlineListToRST caption - let caption'' = if null caption - then empty - else blankline <> text "Table: " <> caption' - headers' <- mapM blockListToRST headers - rawRows <- mapM (mapM blockListToRST) rows - -- let isSimpleCell [Plain _] = True - -- isSimpleCell [Para _] = True - -- isSimpleCell [] = True - -- isSimpleCell _ = False - -- let isSimple = all (==0) widths && all (all isSimpleCell) rows - let numChars = maximum . map offset - opts <- get >>= return . stOptions - let widthsInChars = - if all (== 0) widths - then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (fromIntegral (writerColumns opts) *)) widths - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = height (hcat blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map makeRow rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if all null headers - then empty - else head' $$ border '=' - return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline + let blocksToDoc opts bs = do + oldOpts <- gets stOptions + modify $ \st -> st{ stOptions = opts } + result <- blockListToRST bs + modify $ \st -> st{ stOptions = oldOpts } + return result + opts <- gets stOptions + tbl <- gridTable opts blocksToDoc (all null headers) + (map (const AlignDefault) aligns) widths + headers rows + return $ if null caption + then tbl $$ blankline + else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$ + blankline blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim - then take (length items) $ repeat "#." + then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ - zip markers' items + contents <- zipWithM orderedListItemToRST markers' items -- ensure that sublists have preceding blank line return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do @@ -307,51 +315,69 @@ blockToRST (DefinitionList items) = do return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: [Block] -> State WriterState Doc +bulletListItemToRST :: PandocMonad m => [Block] -> RST m Doc bulletListItemToRST items = do contents <- blockListToRST items return $ hang 3 "- " $ contents <> cr -- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: String -- ^ marker for list item +orderedListItemToRST :: PandocMonad m + => String -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc + -> RST m Doc orderedListItemToRST marker items = do contents <- blockListToRST items let marker' = marker ++ " " return $ hang (length marker') (text marker') $ contents <> cr -- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc +definitionListItemToRST :: PandocMonad m => ([Inline], [[Block]]) -> RST m Doc definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs - tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (nestle contents <> cr) + tabstop <- gets $ writerTabStop . stOptions + return $ nowrap label' $$ nest tabstop (nestle contents <> cr) -- | Format a list of lines as line block. -linesToLineBlock :: [[Inline]] -> State WriterState Doc +linesToLineBlock :: PandocMonad m => [[Inline]] -> RST m Doc linesToLineBlock inlineLines = do lns <- mapM inlineListToRST inlineLines - return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline + return $ + vcat (map (hang 2 (text "| ")) lns) <> blankline -- | Convert list of Pandoc block elements to RST. -blockListToRST' :: Bool +blockListToRST' :: PandocMonad m + => Bool -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> RST m Doc blockListToRST' topLevel blocks = do + -- insert comment between list and quoted blocks, see #4248 and #3675 + let fixBlocks (b1:b2@(BlockQuote _):bs) + | toClose b1 = b1 : commentSep : b2 : fixBlocks bs + where + toClose Plain{} = False + toClose Header{} = False + toClose LineBlock{} = False + toClose HorizontalRule = False + toClose (Para [Image _ _ (_,'f':'i':'g':':':_)]) = True + toClose Para{} = False + toClose _ = True + commentSep = RawBlock "rst" "..\n\n" + fixBlocks (b:bs) = b : fixBlocks bs + fixBlocks [] = [] tl <- gets stTopLevel modify (\s->s{stTopLevel=topLevel}) - res <- vcat `fmap` mapM blockToRST blocks + res <- vcat `fmap` mapM blockToRST (fixBlocks blocks) modify (\s->s{stTopLevel=tl}) return res -blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc +blockListToRST :: PandocMonad m + => [Block] -- ^ List of block elements + -> RST m Doc blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: [Inline] -> State WriterState Doc +inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc inlineListToRST lst = mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat @@ -362,7 +388,7 @@ inlineListToRST lst = removeSpaceAfterDisplayMath [] = [] insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) - | isComplex y && (surroundComplex x z) = + | isComplex y && surroundComplex x z = x : y : insertBS (z : zs) insertBS (x:y:zs) | isComplex x && not (okAfterComplex y) = @@ -396,23 +422,28 @@ inlineListToRST lst = okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False isComplex :: Inline -> Bool - isComplex (Emph _) = True - isComplex (Strong _) = True - isComplex (SmallCaps _) = True - isComplex (Strikeout _) = True + isComplex (Emph _) = True + isComplex (Strong _) = True + isComplex (SmallCaps _) = True + isComplex (Strikeout _) = True isComplex (Superscript _) = True - isComplex (Subscript _) = True - isComplex (Link _ _ _) = True - isComplex (Image _ _ _) = True - isComplex (Code _ _) = True - isComplex (Math _ _) = True - isComplex (Cite _ (x:_)) = isComplex x - isComplex (Span _ (x:_)) = isComplex x - isComplex _ = False + isComplex (Subscript _) = True + isComplex Link{} = True + isComplex Image{} = True + isComplex (Code _ _) = True + isComplex (Math _ _) = True + isComplex (Cite _ (x:_)) = isComplex x + isComplex (Span _ (x:_)) = isComplex x + isComplex _ = False -- | Convert Pandoc inline element to RST. -inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Span _ ils) = inlineListToRST ils +inlineToRST :: PandocMonad m => Inline -> RST m Doc +inlineToRST (Span (_,_,kvs) ils) = do + contents <- inlineListToRST ils + return $ + case lookup "role" kvs of + Just role -> ":" <> text role <> ":`" <> contents <> "`" + Nothing -> contents inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" @@ -431,14 +462,33 @@ inlineToRST (Subscript lst) = do inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst - return $ "‘" <> contents <> "’" + opts <- gets stOptions + if isEnabled Ext_smart opts + then return $ "'" <> contents <> "'" + else return $ "‘" <> contents <> "’" inlineToRST (Quoted DoubleQuote lst) = do contents <- inlineListToRST lst - return $ "“" <> contents <> "”" + opts <- gets stOptions + if isEnabled Ext_smart opts + then return $ "\"" <> contents <> "\"" + else return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = inlineListToRST lst -inlineToRST (Code _ str) = return $ "``" <> text str <> "``" -inlineToRST (Str str) = return $ text $ escapeString str +inlineToRST (Code _ str) = do + opts <- gets stOptions + -- we trim the string because the delimiters must adjoin a + -- non-space character; see #3496 + -- we use :literal: when the code contains backticks, since + -- :literal: allows backslash-escapes; see #3974 + return $ if '`' `elem` str + then ":literal:`" <> text (escapeString opts (trim str)) <> "`" + else "``" <> text (trim str) <> "``" +inlineToRST (Str str) = do + opts <- gets stOptions + return $ text $ + (if isEnabled Ext_smart opts + then unsmartify opts + else id) $ escapeString opts str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath @@ -447,20 +497,20 @@ inlineToRST (Math t str) = do then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline f x) +inlineToRST il@(RawInline f x) | f == "rst" = return $ text x | f == "latex" || f == "tex" = do modify $ \st -> st{ stHasRawTeX = True } return $ ":raw-latex:`" <> text x <> "`" - | otherwise = return empty -inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) + | otherwise = empty <$ report (InlineNotRendered il) +inlineToRST LineBreak = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space inlineToRST SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) + wrapText <- gets $ writerWrapText . stOptions case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space -- autolink inlineToRST (Link _ [Str str] (src, _)) | isURI src && @@ -473,15 +523,15 @@ inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do label <- registerImage attr alt (imgsrc,imgtit) (Just src) return $ "|" <> label <> "|" inlineToRST (Link _ txt (src, tit)) = do - useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions - linktext <- inlineListToRST $ normalizeSpaces txt + useReferenceLinks <- gets $ writerReferenceLinks . stOptions + linktext <- inlineListToRST $ B.toList . B.trimInlines . B.fromList $ txt if useReferenceLinks - then do refs <- get >>= return . stLinks + then do refs <- gets stLinks case lookup txt refs of Just (src',tit') -> if src == src' && tit == tit' then return $ "`" <> linktext <> "`_" - else do -- duplicate label, use non-reference link + else return $ "`" <> linktext <> " <" <> text src <> ">`__" Nothing -> do modify $ \st -> st { stLinks = (txt,(src,tit)):refs } @@ -494,12 +544,12 @@ inlineToRST (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 + let ref = show $ length notes + 1 return $ " [" <> text ref <> "]_" -registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc +registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe String -> RST m Doc registerImage attr alt (src,tit) mbtarget = do - pics <- get >>= return . stImages + pics <- gets stImages txt <- case lookup alt pics of Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget) -> return alt @@ -512,14 +562,14 @@ registerImage attr alt (src,tit) mbtarget = do return alt' inlineListToRST txt -imageDimsToRST :: Attr -> State WriterState Doc +imageDimsToRST :: PandocMonad m => Attr -> RST m Doc imageDimsToRST attr = do let (ident, _, _) = attr name = if null ident then empty else ":name: " <> text ident showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d) - in case (dimension dir attr) of + in case dimension dir attr of Just (Percent a) -> case dir of Height -> empty diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 8f942b4d0..7006b58d1 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -27,103 +28,125 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( writeRTF, writeRTFWithEmbeddedImages ) where +module Text.Pandoc.Writers.RTF ( writeRTF + ) where +import Control.Monad.Except (catchError, throwError) +import Control.Monad +import qualified Data.ByteString as B +import Data.Char (chr, isDigit, ord) +import Data.List (intercalate, isSuffixOf) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk -import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, chr, isDigit ) -import qualified Data.ByteString as B -import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.ImageSize +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) -- | Convert Image inlines into a raw RTF embedded image, read from a file, -- or a MediaBag, or the internet. -- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: WriterOptions -> Inline -> IO Inline -rtfEmbedImage opts x@(Image attr _ (src,_)) = do - result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case result of - Right (imgdata, Just mime) - | mime == "image/jpeg" || mime == "image/png" -> do - let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case mime of - "image/jpeg" -> "\\jpegblip" - "image/png" -> "\\pngblip" - _ -> error "Unknown file type" - sizeSpec <- case imageSize imgdata of - Left msg -> do - warn $ "Could not determine image size in `" ++ - src ++ "': " ++ msg - return "" - Right sz -> return $ "\\picw" ++ show xpx ++ - "\\pich" ++ show ypx ++ - "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) - ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) - -- twip = 1/1440in = 1/20pt - where (xpx, ypx) = sizeInPixels sz - (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ - concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline (Format "rtf") raw - _ -> return x +rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline +rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError + (do result <- P.fetchItem src + case result of + (imgdata, Just mime) + | mime == "image/jpeg" || mime == "image/png" -> do + let bytes = map (printf "%02x") $ B.unpack imgdata + filetype <- + case mime of + "image/jpeg" -> return "\\jpegblip" + "image/png" -> return "\\pngblip" + _ -> throwError $ + PandocShouldNeverHappenError $ + "Unknown file type " ++ mime + sizeSpec <- + case imageSize opts imgdata of + Left msg -> do + report $ CouldNotDetermineImageSize src msg + return "" + Right sz -> return $ "\\picw" ++ show xpx ++ + "\\pich" ++ show ypx ++ + "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) + ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) + -- twip = 1/1440in = 1/20pt + where (xpx, ypx) = sizeInPixels sz + (xpt, ypt) = desiredSizeInPoints opts attr sz + let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ + concat bytes ++ "}" + if B.null imgdata + then do + report $ CouldNotFetchResource src "image contained no data" + return x + else return $ RawInline (Format "rtf") raw + | otherwise -> do + report $ CouldNotFetchResource src "image is not a jpeg or png" + return x + (_, Nothing) -> do + report $ CouldNotDetermineMimeType src + return x) + (\e -> do + report $ CouldNotFetchResource src (show e) + return x) rtfEmbedImage _ x = return x --- | Convert Pandoc to a string in rich text format, with --- images embedded as encoded binary data. -writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String -writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` walkM (rtfEmbedImage options) doc - -- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta@(Meta metamap) blocks) = +writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeRTF options doc = do + -- handle images + Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta - toPlain (MetaBlocks [Para ils]) = MetaInlines ils - toPlain x = x - -- adjust title, author, date so we don't get para inside para - meta' = Meta $ M.adjust toPlain "title" + let toPlain (MetaBlocks [Para ils]) = MetaInlines ils + toPlain x = x + -- adjust title, author, date so we don't get para inside para + let meta' = Meta $ M.adjust toPlain "title" . M.adjust toPlain "author" . M.adjust toPlain "date" $ metamap - Just metadata = metaToJSON options - (Just . concatMap (blockToRTF 0 AlignDefault)) - (Just . inlineListToRTF) + metadata <- metaToJSON options + (fmap concat . mapM (blockToRTF 0 AlignDefault)) + inlinesToRTF meta' - body = concatMap (blockToRTF 0 AlignDefault) blocks - isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options - isTOCHeader _ = False - context = defField "body" body + body <- blocksToRTF 0 AlignDefault blocks + let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options + isTOCHeader _ = False + toc <- tableOfContents $ filter isTOCHeader blocks + let context = defField "body" body $ defField "spacer" spacer - $ (if writerTableOfContents options - then defField "toc" - (tableOfContents $ filter isTOCHeader blocks) - else id) - $ metadata - in case writerTemplate options of + $(if writerTableOfContents options + then defField "table-of-contents" toc + -- for backwards compatibility, + -- we populate toc with the contents + -- of the toc rather than a boolean: + . defField "toc" toc + else id) metadata + T.pack <$> + case writerTemplate options of Just tpl -> renderTemplate' tpl context - Nothing -> case reverse body of + Nothing -> return $ + case reverse body of ('\n':_) -> body _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String -tableOfContents headers = - let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 nullAttr [Str "Contents"], - BulletList (map elementToListItem contentsTree)] +tableOfContents :: PandocMonad m => [Block] -> m String +tableOfContents headers = do + let contents = map elementToListItem $ hierarchicalize headers + blocksToRTF 0 AlignDefault + [Header 1 nullAttr [Str "Contents"], BulletList contents] elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++ +elementToListItem (Sec _ _ _ sectext subsecs) = Plain sectext : if null subsecs then [] else [BulletList (map elementToListItem subsecs)] @@ -140,11 +163,11 @@ handleUnicode (c:cs) = lower = r + 0xDC00 in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs else enc c ++ handleUnicode cs - else c:(handleUnicode cs) + else c:handleUnicode cs where surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = '\\':'u':(show (ord x)) ++ "?" + enc x = '\\':'u':show (ord x) ++ "?" -- | Escape special characters. escapeSpecial :: String -> String @@ -175,13 +198,13 @@ rtfParSpaced :: Int -- ^ space after (in twips) -> String rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of - AlignLeft -> "\\ql " - AlignRight -> "\\qr " - AlignCenter -> "\\qc " + AlignLeft -> "\\ql " + AlignRight -> "\\qr " + AlignCenter -> "\\qc " AlignDefault -> "\\ql " in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + "\\f0 \\sa" ++ show spaceAfter ++ " \\li" ++ show indent ++ + " \\fi" ++ show firstLineIndent ++ " " ++ content ++ "\\par}\n" -- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) @@ -221,143 +244,180 @@ orderedMarkers indent (start, style, delim) = _ -> orderedListMarkers (start, LowerAlpha, Period) else orderedListMarkers (start, style, delim) +blocksToRTF :: PandocMonad m + => Int + -> Alignment + -> [Block] + -> m String +blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) + -- | Convert Pandoc block element to RTF. -blockToRTF :: Int -- ^ indent level +blockToRTF :: PandocMonad m + => Int -- ^ indent level -> Alignment -- ^ alignment -> Block -- ^ block to convert - -> String -blockToRTF _ _ Null = "" + -> m String +blockToRTF _ _ Null = return "" blockToRTF indent alignment (Div _ bs) = - concatMap (blockToRTF indent alignment) bs + blocksToRTF indent alignment bs blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment $ inlineListToRTF lst + rtfCompact indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment $ inlineListToRTF lst + rtfPar indent 0 alignment <$> inlinesToRTF lst blockToRTF indent alignment (LineBlock lns) = blockToRTF indent alignment $ linesToPara lns blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst + blocksToRTF (indent + indentIncrement) alignment lst blockToRTF indent _ (CodeBlock _ str) = - rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawBlock f str) - | f == Format "rtf" = str - | otherwise = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ - concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = + return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ codeStringToRTF str) +blockToRTF _ _ b@(RawBlock f str) + | f == Format "rtf" = return str + | otherwise = do + report $ BlockNotRendered b + return "" +blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> + mapM (listItemToRTF alignment indent (bulletMarker indent)) lst +blockToRTF indent alignment (OrderedList attribs lst) = + (spaceAtEnd . concat) <$> + zipWithM (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst +blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> + mapM (definitionListItemToRTF alignment indent) lst +blockToRTF indent _ HorizontalRule = return $ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level _ lst) = rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = - (if all null headers - then "" - else tableRowToRTF True indent aligns sizes headers) ++ - concatMap (tableRowToRTF False indent aligns sizes) rows ++ - rtfPar indent 0 alignment (inlineListToRTF caption) +blockToRTF indent alignment (Header level _ lst) = do + contents <- inlinesToRTF lst + return $ rtfPar indent 0 alignment $ + "\\b \\fs" ++ show (40 - (level * 4)) ++ " " ++ contents +blockToRTF indent alignment (Table caption aligns sizes headers rows) = do + caption' <- inlinesToRTF caption + header' <- if all null headers + then return "" + else tableRowToRTF True indent aligns sizes headers + rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows + return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String -tableRowToRTF header indent aligns sizes' cols = +tableRowToRTF :: PandocMonad m + => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String +tableRowToRTF header indent aligns sizes' cols = do let totalTwips = 6 * 1440 -- 6 inches - sizes = if all (== 0) sizes' - then take (length cols) $ repeat (1.0 / fromIntegral (length cols)) + let sizes = if all (== 0) sizes' + then replicate (length cols) (1.0 / fromIntegral (length cols)) else sizes' - columns = concat $ zipWith (tableItemToRTF indent) aligns cols - rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) + columns <- concat <$> + zipWithM (tableItemToRTF indent) aligns cols + let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) (0 :: Integer) sizes - cellDefs = map (\edge -> (if header + let cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" else "") ++ "\\cellx" ++ show edge) rightEdges - start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ "\\trkeep\\intbl\n{\n" - end = "}\n\\intbl\\row}\n" - in start ++ columns ++ end + let end = "}\n\\intbl\\row}\n" + return $ start ++ columns ++ end -tableItemToRTF :: Int -> Alignment -> [Block] -> String -tableItemToRTF indent alignment item = - let contents = concatMap (blockToRTF indent alignment) item - in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" +tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String +tableItemToRTF indent alignment item = do + contents <- blocksToRTF indent alignment item + return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. spaceAtEnd :: String -> String spaceAtEnd str = - if isSuffixOf "\\par}\n" str - then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + if "\\par}\n" `isSuffixOf` str + then take (length str - 6) str ++ "\\sa180\\par}\n" else str -- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment -- ^ alignment +listItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) - -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = - let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list - listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" - insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = + -> m String +listItemToRTF alignment indent marker [] = return $ + rtfCompact (indent + listIncrement) (negate listIncrement) alignment + (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ") +listItemToRTF alignment indent marker list = do + (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list + let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++ + "\\tx" ++ show listIncrement ++ "\\tab" + let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker ('\\':'f':'i':d:xs) | isDigit d = listMarker ++ dropWhile isDigit xs insertListMarker (x:xs) = x : insertListMarker xs insertListMarker [] = [] - -- insert the list marker into the (processed) first block - in insertListMarker first ++ concat rest + -- insert the list marker into the (processed) first block + return $ insertListMarker first ++ concat rest -- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment -- ^ alignment +definitionListItemToRTF :: PandocMonad m + => Alignment -- ^ alignment -> Int -- ^ indent level -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> [Char] -definitionListItemToRTF alignment indent (label, defs) = - let labelText = blockToRTF indent alignment (Plain label) - itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $ - concat defs - in labelText ++ itemsText + -> m String +definitionListItemToRTF alignment indent (label, defs) = do + labelText <- blockToRTF indent alignment (Plain label) + itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) + return $ labelText ++ itemsText -- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline] -- ^ list of inlines to convert - -> String -inlineListToRTF lst = concatMap inlineToRTF lst +inlinesToRTF :: PandocMonad m + => [Inline] -- ^ list of inlines to convert + -> m String +inlinesToRTF lst = concat <$> mapM inlineToRTF lst -- | Convert inline item to RTF. -inlineToRTF :: Inline -- ^ inline to convert - -> String -inlineToRTF (Span _ lst) = inlineListToRTF lst -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str -inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (RawInline f str) - | f == Format "rtf" = str - | otherwise = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF SoftBreak = " " -inlineToRTF Space = " " -inlineToRTF (Link _ text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" +inlineToRTF :: PandocMonad m + => Inline -- ^ inline to convert + -> m String +inlineToRTF (Span _ lst) = inlinesToRTF lst +inlineToRTF (Emph lst) = do + contents <- inlinesToRTF lst + return $ "{\\i " ++ contents ++ "}" +inlineToRTF (Strong lst) = do + contents <- inlinesToRTF lst + return $ "{\\b " ++ contents ++ "}" +inlineToRTF (Strikeout lst) = do + contents <- inlinesToRTF lst + return $ "{\\strike " ++ contents ++ "}" +inlineToRTF (Superscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\super " ++ contents ++ "}" +inlineToRTF (Subscript lst) = do + contents <- inlinesToRTF lst + return $ "{\\sub " ++ contents ++ "}" +inlineToRTF (SmallCaps lst) = do + contents <- inlinesToRTF lst + return $ "{\\scaps " ++ contents ++ "}" +inlineToRTF (Quoted SingleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8216'" ++ contents ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = do + contents <- inlinesToRTF lst + return $ "\\u8220\"" ++ contents ++ "\\u8221\"" +inlineToRTF (Code _ str) = return $ "{\\f1 " ++ codeStringToRTF str ++ "}" +inlineToRTF (Str str) = return $ stringToRTF str +inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF +inlineToRTF (Cite _ lst) = inlinesToRTF lst +inlineToRTF il@(RawInline f str) + | f == Format "rtf" = return str + | otherwise = do + return $ InlineNotRendered il + return "" +inlineToRTF LineBreak = return "\\line " +inlineToRTF SoftBreak = return " " +inlineToRTF Space = return " " +inlineToRTF (Link _ text (src, _)) = do + contents <- inlinesToRTF text + return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ codeStringToRTF src ++ + "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" inlineToRTF (Image _ _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" + return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Note contents) = do + body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents + return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + body ++ "}" diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 845d22077..ae4cc5cc5 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2015 John MacFarlane + Copyright : Copyright (C) 2013-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,62 +30,91 @@ Shared utility functions for pandoc writers. -} module Text.Pandoc.Writers.Shared ( metaToJSON + , metaToJSON' + , addVariablesToJSON , getField , setField + , resetField , defField , tagWithAttrs , fixDisplayMath + , unsmartify + , gridTable + , metaValueToInlines ) where -import Text.Pandoc.Definition -import Text.Pandoc.Pretty -import Text.Pandoc.XML (escapeStringForXML) -import Control.Monad (liftM) -import Text.Pandoc.Options (WriterOptions(..)) +import Control.Monad (zipWithM) +import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object), + encode, fromJSON) import qualified Data.HashMap.Strict as H +import Data.List (groupBy, intersperse, transpose) import qualified Data.Map as M +import Data.Maybe (isJust) import qualified Data.Text as T -import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode) -import Text.Pandoc.UTF8 (toStringLazy) import qualified Data.Traversable as Traversable -import Data.List ( groupBy ) -import Data.Maybe ( isJust ) +import qualified Text.Pandoc.Builder as Builder +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Walk (query) +import Text.Pandoc.UTF8 (toStringLazy) +import Text.Pandoc.XML (escapeStringForXML) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. -- Variables overwrite metadata fields with the same names. -- If multiple variables are set with the same name, a list is --- assigned. -metaToJSON :: Monad m +-- assigned. Does nothing if 'writerTemplate' is Nothing. +metaToJSON :: (Functor m, Monad m, ToJSON a) => WriterOptions - -> ([Block] -> m String) - -> ([Inline] -> m String) + -> ([Block] -> m a) + -> ([Inline] -> m a) -> Meta -> m Value -metaToJSON opts blockWriter inlineWriter (Meta metamap) - | isJust (writerTemplate opts) = do - let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty) - $ writerVariables opts - renderedMap <- Traversable.mapM - (metaValueToJSON blockWriter inlineWriter) - metamap - let metadata = M.foldWithKey defField baseContext renderedMap - return $ defField "meta-json" (toStringLazy $ encode metadata) metadata +metaToJSON opts blockWriter inlineWriter meta + | isJust (writerTemplate opts) = + addVariablesToJSON opts <$> metaToJSON' blockWriter inlineWriter meta | otherwise = return (Object H.empty) -metaValueToJSON :: Monad m - => ([Block] -> m String) - -> ([Inline] -> m String) +-- | Like 'metaToJSON', but does not include variables and is +-- not sensitive to 'writerTemplate'. +metaToJSON' :: (Functor m, Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) + -> Meta + -> m Value +metaToJSON' blockWriter inlineWriter (Meta metamap) = do + renderedMap <- Traversable.mapM + (metaValueToJSON blockWriter inlineWriter) + metamap + return $ M.foldrWithKey defField (Object H.empty) renderedMap + +-- | Add variables to JSON object, replacing any existing values. +-- Also include @meta-json@, a field containing a string representation +-- of the original JSON object itself, prior to addition of variables. +addVariablesToJSON :: WriterOptions -> Value -> Value +addVariablesToJSON opts metadata = + foldl (\acc (x,y) -> setField x y acc) + (defField "meta-json" (toStringLazy $ encode metadata) (Object mempty)) + (writerVariables opts) + `combineMetadata` metadata + where combineMetadata (Object o1) (Object o2) = Object $ H.union o1 o2 + combineMetadata x _ = x + +metaValueToJSON :: (Functor m, Monad m, ToJSON a) + => ([Block] -> m a) + -> ([Inline] -> m a) -> MetaValue -> m Value -metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ +metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = toJSON <$> Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap -metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $ +metaValueToJSON blockWriter inlineWriter (MetaList xs) = toJSON <$> Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs metaValueToJSON _ _ (MetaBool b) = return $ toJSON b -metaValueToJSON _ _ (MetaString s) = return $ toJSON s -metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs -metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs +metaValueToJSON _ inlineWriter (MetaString s) = toJSON <$> + inlineWriter (Builder.toList (Builder.text s)) +metaValueToJSON blockWriter _ (MetaBlocks bs) = toJSON <$> blockWriter bs +metaValueToJSON _ inlineWriter (MetaInlines is) = toJSON <$> inlineWriter is -- | Retrieve a field value from a JSON object. getField :: FromJSON a @@ -111,10 +140,22 @@ setField field val (Object hashmap) = Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap where combine newval oldval = case fromJSON oldval of - Success xs -> toJSON $ xs ++ [newval] - _ -> toJSON [oldval, newval] + Success xs -> toJSON $ xs ++ [newval] + _ -> toJSON [oldval, newval] setField _ _ x = x +resetField :: ToJSON a + => String + -> a + -> Value + -> Value +-- | Reset a field of a JSON object. If the field already has a value, +-- the new value replaces it. +-- This is a utility function to be used in preparing template contexts. +resetField field val (Object hashmap) = + Object $ H.insert (T.pack field) (toJSON val) hashmap +resetField _ _ x = x + defField :: ToJSON a => String -> a @@ -148,22 +189,131 @@ isDisplayMath _ = False stripLeadingTrailingSpace :: [Inline] -> [Inline] stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs + where go (Space:xs) = xs go (SoftBreak:xs) = xs - go xs = xs + go xs = xs -- Put display math in its own block (for ODT/DOCX). fixDisplayMath :: Block -> Block fixDisplayMath (Plain lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ + map Plain $ + filter (not . null) $ + map stripLeadingTrailingSpace $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath (Para lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ + map Para $ + filter (not . null) $ + map stripLeadingTrailingSpace $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath x = x + +unsmartify :: WriterOptions -> String -> String +unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs +unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs +unsmartify opts ('\8211':xs) + | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs + | otherwise = "--" ++ unsmartify opts xs +unsmartify opts ('\8212':xs) + | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs + | otherwise = "---" ++ unsmartify opts xs +unsmartify opts ('\8220':xs) = '"' : unsmartify opts xs +unsmartify opts ('\8221':xs) = '"' : unsmartify opts xs +unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs +unsmartify opts (x:xs) = x : unsmartify opts xs +unsmartify _ [] = [] + +gridTable :: Monad m + => WriterOptions + -> (WriterOptions -> [Block] -> m Doc) + -> Bool -- ^ headless + -> [Alignment] + -> [Double] + -> [[Block]] + -> [[[Block]]] + -> m Doc +gridTable opts blocksToDoc headless aligns widths headers rows = do + let numcols = maximum (length aligns : length widths : + map length (headers:rows)) + let handleGivenWidths widths' = do + let widthsInChars' = map ( + (\x -> if x < 1 then 1 else x) . + (\x -> x - 3) . floor . + (fromIntegral (writerColumns opts) *) + ) widths' + rawHeaders' <- zipWithM blocksToDoc + (map (\w -> opts{writerColumns = + min (w - 2) (writerColumns opts)}) widthsInChars') + headers + rawRows' <- mapM + (\cs -> zipWithM blocksToDoc + (map (\w -> opts{writerColumns = + min (w - 2) (writerColumns opts)}) widthsInChars') + cs) + rows + return (widthsInChars', rawHeaders', rawRows') + let handleZeroWidths = do + rawHeaders' <- mapM (blocksToDoc opts) headers + rawRows' <- mapM (mapM (blocksToDoc opts)) rows + let numChars [] = 0 + numChars xs = maximum . map offset $ xs + let widthsInChars' = + map numChars $ transpose (rawHeaders' : rawRows') + if sum widthsInChars' > writerColumns opts + then -- use even widths + handleGivenWidths + (replicate numcols (1.0 / fromIntegral numcols) :: [Double]) + else return (widthsInChars', rawHeaders', rawRows') + (widthsInChars, rawHeaders, rawRows) <- if all (== 0) widths + then handleZeroWidths + else handleGivenWidths widths + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (1 : map height blocks) + sep' = lblock 3 $ vcat (replicate h (text " | ")) + beg = lblock 2 $ vcat (replicate h (text "| ")) + end = lblock 2 $ vcat (replicate h (text " |")) + middle = chomp $ hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow rawHeaders + let rows' = map (makeRow . map chomp) rawRows + let borderpart ch align widthInChars = + (if align == AlignLeft || align == AlignCenter + then char ':' + else char ch) <> + text (replicate widthInChars ch) <> + (if align == AlignRight || align == AlignCenter + then char ':' + else char ch) + let border ch aligns' widthsInChars' = + char '+' <> + hcat (intersperse (char '+') (zipWith (borderpart ch) + aligns' widthsInChars')) <> char '+' + let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars) + rows' + let head'' = if headless + then empty + else head' $$ border '=' aligns widthsInChars + if headless + then return $ + border '-' aligns widthsInChars $$ + body $$ + border '-' (repeat AlignDefault) widthsInChars + else return $ + border '-' (repeat AlignDefault) widthsInChars $$ + head'' $$ + body $$ + border '-' (repeat AlignDefault) widthsInChars + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index 9bd23ac3b..4936c743e 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,63 +30,67 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where +import Data.Char (toLower) +import Data.List (isPrefixOf, stripPrefix) +import Data.Text (Text) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Data.List ( stripPrefix, isPrefixOf ) -import Data.Char ( toLower ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML -- | Convert list of authors to a docbook <author> section -authorToTEI :: WriterOptions -> [Inline] -> B.Inlines -authorToTEI opts name' = - let name = render Nothing $ inlinesToTEI opts name' - colwidth = if writerWrapText opts == WrapAuto +authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines +authorToTEI opts name' = do + name <- render Nothing <$> inlinesToTEI opts name' + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - in B.rawInline "tei" $ render colwidth $ + return $ B.rawInline "tei" $ render colwidth $ inTagsSimple "author" (text $ escapeStringForXML name) -- | Convert Pandoc document to string in Docbook format. -writeTEI :: WriterOptions -> Pandoc -> String -writeTEI opts (Pandoc meta blocks) = +writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeTEI opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing + render' :: Doc -> Text render' = render colwidth startLvl = case writerTopLevelDivision opts of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 - auths' = map (authorToTEI opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToTEI opts startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToTEI opts) + auths' <- mapM (authorToTEI opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render' . vcat) . + mapM (elementToTEI opts startLvl) . hierarchicalize) + (fmap render' . inlinesToTEI opts) meta' - main = render' $ vcat (map (elementToTEI opts startLvl) elements) - context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML _ -> True - _ -> False) - $ metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context + main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements + let context = defField "body" main + $ + defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) metadata + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to TEI. -elementToTEI :: WriterOptions -> Int -> Element -> Doc +elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc elementToTEI opts _ (Blk block) = blockToTEI opts block -elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = +elementToTEI opts lvl (Sec _ _num attr title elements) = do -- TEI doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] @@ -96,14 +101,14 @@ elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "level" ++ show n | otherwise -> "section" - in inTags True "div" [("type", divType) | not (null id')] $ --- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $ - inTagsSimple "head" (inlinesToTEI opts title) $$ - vcat (map (elementToTEI opts (lvl + 1)) elements') + contents <- vcat <$> mapM (elementToTEI opts (lvl + 1)) elements' + titleContents <- inlinesToTEI opts title + return $ inTags True "div" (("type", divType) : idFromAttr opts attr) $ + inTagsSimple "head" titleContents $$ contents -- | Convert a list of Pandoc blocks to TEI. -blocksToTEI :: WriterOptions -> [Block] -> Doc -blocksToTEI opts = vcat . map (blockToTEI opts) +blocksToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc +blocksToTEI opts bs = vcat <$> mapM (blockToTEI opts) bs -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -112,48 +117,54 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a TEI -- list with labels and items. -deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc +deflistItemsToTEI :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> m Doc deflistItemsToTEI opts items = - vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items + vcat <$> mapM (uncurry (deflistItemToTEI opts)) items -- | Convert a term and a list of blocks into a TEI varlistentry. -deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToTEI opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "label" (inlinesToTEI opts term) $$ - inTagsIndented "item" (blocksToTEI opts def') +deflistItemToTEI :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> m Doc +deflistItemToTEI opts term defs = do + term' <- inlinesToTEI opts term + defs' <- blocksToTEI opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "label" term' $$ + inTagsIndented "item" defs' -- | Convert a list of lists of blocks to a list of TEI list items. -listItemsToTEI :: WriterOptions -> [[Block]] -> Doc -listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items +listItemsToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc +listItemsToTEI opts items = vcat <$> mapM (listItemToTEI opts) items -- | Convert a list of blocks into a TEI list item. -listItemToTEI :: WriterOptions -> [Block] -> Doc +listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc listItemToTEI opts item = - inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item + inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item) -imageToTEI :: WriterOptions -> Attr -> String -> Doc -imageToTEI _ attr src = selfClosingTag "graphic" $ - ("url", src) : idAndRole attr ++ dims +imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m Doc +imageToTEI opts attr src = return $ selfClosingTag "graphic" $ + ("url", src) : idFromAttr opts attr ++ dims where - dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of + dims = go Width "width" ++ go Height "height" + go dir dstr = case dimension dir attr of Just a -> [(dstr, show a)] Nothing -> [] -- | Convert a Pandoc block element to TEI. -blockToTEI :: WriterOptions -> Block -> Doc -blockToTEI _ Null = empty +blockToTEI :: PandocMonad m => WriterOptions -> Block -> m Doc +blockToTEI _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToTEI opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in - inTags False "p" attribs $ inlinesToTEI opts lst +blockToTEI opts (Div attr [Para lst]) = do + let attribs = idFromAttr opts attr + inTags False "p" attribs <$> inlinesToTEI opts lst blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs -blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize +blockToTEI _ h@Header{} = do + -- should not occur after hierarchicalize, except inside lists/blockquotes + report $ BlockNotRendered h + return empty -- For TEI simple, text must be within containing block element, so --- we use plainToPara to ensure that Plain text ends up contained by --- something. +-- we use treat as Para to ensure that Plain text ends up contained by +-- something: blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- title beginning with fig: indicates that the image is a figure --blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = @@ -168,13 +179,13 @@ blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- (imageToTEI opts attr src)) $$ -- inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToTEI opts (Para lst) = - inTags False "p" [] $ inlinesToTEI opts lst + inTags False "p" [] <$> inlinesToTEI opts lst blockToTEI opts (LineBlock lns) = blockToTEI opts $ linesToPara lns blockToTEI opts (BlockQuote blocks) = - inTagsIndented "quote" $ blocksToTEI opts blocks + inTagsIndented "quote" <$> blocksToTEI opts blocks blockToTEI _ (CodeBlock (_,classes,_) str) = - text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> + return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" @@ -184,11 +195,11 @@ blockToTEI _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToTEI opts (BulletList lst) = +blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] - in inTags True "list" attribs $ listItemsToTEI opts lst -blockToTEI _ (OrderedList _ []) = empty -blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = + inTags True "list" attribs <$> listItemsToTEI opts lst +blockToTEI _ (OrderedList _ []) = return empty +blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do let attribs = case numstyle of DefaultStyle -> [] Decimal -> [("type", "ordered:arabic")] @@ -197,127 +208,138 @@ blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("type", "ordered:loweralpha")] UpperRoman -> [("type", "ordered:upperroman")] LowerRoman -> [("type", "ordered:lowerroman")] - items = if start == 1 - then listItemsToTEI opts (first:rest) - else (inTags True "item" [("n",show start)] - (blocksToTEI opts $ map plainToPara first)) $$ - listItemsToTEI opts rest - in inTags True "list" attribs items -blockToTEI opts (DefinitionList lst) = + items <- if start == 1 + then listItemsToTEI opts (first:rest) + else do + fi <- blocksToTEI opts $ map plainToPara first + re <- listItemsToTEI opts rest + return $ inTags True "item" [("n",show start)] fi $$ re + return $ inTags True "list" attribs items +blockToTEI opts (DefinitionList lst) = do let attribs = [("type", "definition")] - in inTags True "list" attribs $ deflistItemsToTEI opts lst -blockToTEI _ (RawBlock f str) - | f == "tei" = text str -- raw TEI block (should such a thing exist). --- | f == "html" = text str -- allow html for backwards compatibility - | otherwise = empty -blockToTEI _ HorizontalRule = - selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")] + inTags True "list" attribs <$> deflistItemsToTEI opts lst +blockToTEI _ b@(RawBlock f str) + | f == "tei" = return $ text str + -- raw TEI block (should such a thing exist). + | otherwise = do + report $ BlockNotRendered b + return empty +blockToTEI _ HorizontalRule = return $ + selfClosingTag "milestone" [("unit","undefined") + ,("type","separator") + ,("rendition","line")] -- | TEI Tables -- TEI Simple's tables are composed of cells and rows; other -- table info in the AST is here lossily discard. -blockToTEI opts (Table _ _ _ headers rows) = - let - headers' = tableHeadersToTEI opts headers --- headers' = if all null headers --- then return empty --- else tableRowToTEI opts headers - in - inTags True "table" [] $ - vcat $ [headers'] <> map (tableRowToTEI opts) rows +blockToTEI opts (Table _ _ _ headers rows) = do + headers' <- tableHeadersToTEI opts headers + rows' <- mapM (tableRowToTEI opts) rows + return $ inTags True "table" [] $ headers' $$ vcat rows' -tableRowToTEI :: WriterOptions - -> [[Block]] - -> Doc +tableRowToTEI :: PandocMonad m + => WriterOptions + -> [[Block]] + -> m Doc tableRowToTEI opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols + (inTagsIndented "row" . vcat) <$> mapM (tableItemToTEI opts) cols -tableHeadersToTEI :: WriterOptions +tableHeadersToTEI :: PandocMonad m + => WriterOptions -> [[Block]] - -> Doc + -> m Doc tableHeadersToTEI opts cols = - inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols + (inTags True "row" [("role","label")] . vcat) <$> + mapM (tableItemToTEI opts) cols -tableItemToTEI :: WriterOptions - -> [Block] - -> Doc +tableItemToTEI :: PandocMonad m + => WriterOptions + -> [Block] + -> m Doc tableItemToTEI opts item = - inTags False "cell" [] $ vcat $ map (blockToTEI opts) item + (inTags False "cell" [] . vcat) <$> mapM (blockToTEI opts) item -- | Convert a list of inline elements to TEI. -inlinesToTEI :: WriterOptions -> [Inline] -> Doc -inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst +inlinesToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m Doc +inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst -- | Convert an inline element to TEI. -inlineToTEI :: WriterOptions -> Inline -> Doc -inlineToTEI _ (Str str) = text $ escapeStringForXML str +inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m Doc +inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str inlineToTEI opts (Emph lst) = - inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst inlineToTEI opts (Strong lst) = - inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:bold")] <$> inlinesToTEI opts lst inlineToTEI opts (Strikeout lst) = - inTags False "hi" [("rendition", "simple:strikethrough")] $ + inTags False "hi" [("rendition", "simple:strikethrough")] <$> inlinesToTEI opts lst inlineToTEI opts (Superscript lst) = - inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:superscript")] <$> + inlinesToTEI opts lst inlineToTEI opts (Subscript lst) = - inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:subscript")] <$> + inlinesToTEI opts lst inlineToTEI opts (SmallCaps lst) = - inTags False "hi" [("rendition", "simple:smallcaps")] $ - inlinesToTEI opts lst + inTags False "hi" [("rendition", "simple:smallcaps")] <$> + inlinesToTEI opts lst inlineToTEI opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToTEI opts lst + inTagsSimple "quote" <$> inlinesToTEI opts lst inlineToTEI opts (Cite _ lst) = inlinesToTEI opts lst inlineToTEI opts (Span _ ils) = inlinesToTEI opts ils -inlineToTEI _ (Code _ str) = +inlineToTEI _ (Code _ str) = return $ inTags False "seg" [("type","code")] $ text (escapeStringForXML str) -- Distinguish display from inline math by wrapping the former in a "figure." -inlineToTEI _ (Math t str) = +inlineToTEI _ (Math t str) = return $ case t of InlineMath -> inTags False "formula" [("notation","TeX")] $ - text (str) + text str DisplayMath -> inTags True "figure" [("type","math")] $ - inTags False "formula" [("notation","TeX")] $ text (str) + inTags False "formula" [("notation","TeX")] $ text str -inlineToTEI _ (RawInline f x) | f == "tei" = text x - | otherwise = empty -inlineToTEI _ LineBreak = selfClosingTag "lb" [] -inlineToTEI _ Space = space +inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x + | otherwise = empty <$ + report (InlineNotRendered il) +inlineToTEI _ LineBreak = return $ selfClosingTag "lb" [] +inlineToTEI _ Space = + return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToTEI _ SoftBreak = space +inlineToTEI _ SoftBreak = + return space inlineToTEI opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = + | Just email <- stripPrefix "mailto:" src = do let emailLink = text $ - escapeStringForXML $ email - in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToTEI opts txt <+> - char '(' <> emailLink <> char ')' + escapeStringForXML email + case txt of + [Str s] | escapeURI s == email -> + return emailLink + _ -> do + linktext <- inlinesToTEI opts txt + return $ linktext <+> char '(' <> emailLink <> char ')' | otherwise = - (if isPrefixOf "#" src - then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr - else inTags False "ref" $ ("target", src) : idAndRole attr ) $ + (if "#" `isPrefixOf` src + then inTags False "ref" $ ("target", drop 1 src) + : idFromAttr opts attr + else inTags False "ref" $ ("target", src) + : idFromAttr opts attr ) <$> inlinesToTEI opts txt -inlineToTEI opts (Image attr description (src, tit)) = +inlineToTEI opts (Image attr description (src, tit)) = do let titleDoc = if null tit then empty - else inTags False "figDesc" [] (text $ escapeStringForXML tit) - imageDesc = if null description - then empty - else inTags False "head" [] (inlinesToTEI opts description) - in inTagsIndented "figure" $ imageDesc $$ - imageToTEI opts attr src $$ titleDoc + else inTags False "figDesc" [] + (text $ escapeStringForXML tit) + imageDesc <- if null description + then return empty + else inTags False "head" [] + <$> inlinesToTEI opts description + img <- imageToTEI opts attr src + return $ inTagsIndented "figure" $ imageDesc $$ img $$ titleDoc inlineToTEI opts (Note contents) = - inTagsIndented "note" $ blocksToTEI opts contents + inTagsIndented "note" <$> blocksToTEI opts contents -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role - where - ident = if null id' - then [] - else [("id", id')] - role = if null cls - then [] - else [("role", unwords cls)] +idFromAttr :: WriterOptions -> Attr -> [(String, String)] +idFromAttr opts (id',_,_) = + if null id' + then [] + else [("xml:id", writerIdentifierPrefix opts ++ id')] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index f2b9aa15f..bf434642e 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2015 John MacFarlane and Peter Wang +Copyright (C) 2008-2018 John MacFarlane + 2012 Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2018 John MacFarlane + 2012 Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,21 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where +import Control.Monad.Except (throwError) +import Control.Monad.State.Strict +import Data.Char (chr, ord) +import Data.List (maximumBy, transpose) +import Data.Ord (comparing) +import qualified Data.Set as Set +import Data.Text (Text) +import Network.URI (unEscapeString) +import System.FilePath +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Pretty import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Printf ( printf ) -import Data.List ( transpose, maximumBy ) -import Data.Ord ( comparing ) -import Data.Char ( chr, ord ) -import Control.Monad.State -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Network.URI ( isURI, unEscapeString ) -import System.FilePath -import qualified Data.Set as Set +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout @@ -59,10 +66,12 @@ data WriterState = - generated .texi files don't work when run through texi2dvi -} +type TI m = StateT WriterState m + -- | Convert Pandoc to Texinfo. -writeTexinfo :: WriterOptions -> Pandoc -> String +writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTexinfo options document = - evalState (pandocToTexinfo options $ wrapTop document) $ + evalStateT (pandocToTexinfo options $ wrapTop document) WriterState { stStrikeout = False, stSuperscript = False, stEscapeComma = False, stSubscript = False, stIdentifiers = Set.empty, stOptions = options} @@ -72,16 +81,18 @@ wrapTop :: Pandoc -> Pandoc wrapTop (Pandoc meta blocks) = Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks) -pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String +pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m Text pandocToTexinfo options (Pandoc meta blocks) = do let titlePage = not $ all null $ docTitle meta : docDate meta : docAuthors meta let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing + let render' :: Doc -> Text + render' = render colwidth metadata <- metaToJSON options - (fmap (render colwidth) . blockListToTexinfo) - (fmap (render colwidth) . inlineListToTexinfo) + (fmap render' . blockListToTexinfo) + (fmap render' . inlineListToTexinfo) meta main <- blockListToTexinfo blocks st <- get @@ -91,11 +102,11 @@ pandocToTexinfo options (Pandoc meta blocks) = do $ defField "titlepage" titlePage $ defField "subscript" (stSubscript st) $ defField "superscript" (stSuperscript st) - $ defField "strikeout" (stStrikeout st) - $ metadata + $ + defField "strikeout" (stStrikeout st) metadata case writerTemplate options of Nothing -> return body - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context -- | Escape things as needed for Texinfo. stringToTexinfo :: String -> String @@ -110,7 +121,7 @@ stringToTexinfo = escapeStringUsing texinfoEscapes , ('\x2019', "'") ] -escapeCommas :: State WriterState Doc -> State WriterState Doc +escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc escapeCommas parser = do oldEscapeComma <- gets stEscapeComma modify $ \st -> st{ stEscapeComma = True } @@ -123,8 +134,9 @@ inCmd :: String -> Doc -> Doc inCmd cmd contents = char '@' <> text cmd <> braces contents -- | Convert Pandoc block element to Texinfo. -blockToTexinfo :: Block -- ^ Block to convert - -> State WriterState Doc +blockToTexinfo :: PandocMonad m + => Block -- ^ Block to convert + -> TI m Doc blockToTexinfo Null = return empty @@ -154,17 +166,19 @@ blockToTexinfo (BlockQuote lst) = do contents $$ text "@end quotation" -blockToTexinfo (CodeBlock _ str) = do +blockToTexinfo (CodeBlock _ str) = return $ blankline $$ - text "@verbatim" $$ - flush (text str) $$ - text "@end verbatim" <> blankline + text "@verbatim" $$ + flush (text str) $$ + text "@end verbatim" <> blankline -blockToTexinfo (RawBlock f str) +blockToTexinfo b@(RawBlock f str) | f == "texinfo" = return $ text str | f == "latex" || f == "tex" = return $ text "@tex" $$ text str $$ text "@end tex" - | otherwise = return empty + | otherwise = do + report $ BlockNotRendered b + return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst @@ -204,7 +218,7 @@ blockToTexinfo HorizontalRule = text "@bigskip@hrule@bigskip" $$ text "@end iftex" $$ text "@ifnottex" $$ - text (take 72 $ repeat '-') $$ + text (replicate 72 '-') $$ text "@end ifnottex" blockToTexinfo (Header 0 _ lst) = do @@ -214,23 +228,27 @@ blockToTexinfo (Header 0 _ lst) = do return $ text "@node Top" $$ text "@top " <> txt <> blankline -blockToTexinfo (Header level _ lst) = do - node <- inlineListForNode lst - txt <- inlineListToTexinfo lst - idsUsed <- gets stIdentifiers - let id' = uniqueIdent lst idsUsed - modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } - return $ if (level > 0) && (level <= 4) - then blankline <> text "@node " <> node $$ - text (seccmd level) <> txt $$ - text "@anchor" <> braces (text $ '#':id') - else txt - where - seccmd 1 = "@chapter " - seccmd 2 = "@section " - seccmd 3 = "@subsection " - seccmd 4 = "@subsubsection " - seccmd _ = error "illegal seccmd level" +blockToTexinfo (Header level _ lst) + | level < 1 || level > 4 = blockToTexinfo (Para lst) + | otherwise = do + node <- inlineListForNode lst + txt <- inlineListToTexinfo lst + idsUsed <- gets stIdentifiers + let id' = uniqueIdent lst idsUsed + modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } + sec <- seccmd level + return $ if (level > 0) && (level <= 4) + then blankline <> text "@node " <> node $$ + text sec <> txt $$ + text "@anchor" <> braces (text $ '#':id') + else txt + where + seccmd :: PandocMonad m => Int -> TI m String + seccmd 1 = return "@chapter " + seccmd 2 = return "@section " + seccmd 3 = return "@subsection " + seccmd 4 = return "@subsubsection " + seccmd _ = throwError $ PandocSomeError "illegal seccmd level" blockToTexinfo (Table caption aligns widths heads rows) = do headers <- if all null heads @@ -256,28 +274,32 @@ blockToTexinfo (Table caption aligns widths heads rows) = do inCmd "caption" captionText $$ text "@end float" -tableHeadToTexinfo :: [Alignment] +tableHeadToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " -tableRowToTexinfo :: [Alignment] +tableRowToTexinfo :: PandocMonad m + => [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableRowToTexinfo = tableAnyRowToTexinfo "@item " -tableAnyRowToTexinfo :: String +tableAnyRowToTexinfo :: PandocMonad m + => String -> [Alignment] -> [[Block]] - -> State WriterState Doc + -> TI m Doc tableAnyRowToTexinfo itemtype aligns cols = zipWithM alignedBlock aligns cols >>= return . (text itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty -alignedBlock :: Alignment +alignedBlock :: PandocMonad m + => Alignment -> [Block] - -> State WriterState Doc + -> TI m Doc -- XXX @flushleft and @flushright text won't get word wrapped. Since word -- wrapping is more important than alignment, we ignore the alignment. alignedBlock _ = blockListToTexinfo @@ -292,8 +314,9 @@ alignedBlock _ col = blockListToTexinfo col -} -- | Convert Pandoc block elements to Texinfo. -blockListToTexinfo :: [Block] - -> State WriterState Doc +blockListToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc blockListToTexinfo [] = return empty blockListToTexinfo (x:xs) = do x' <- blockToTexinfo x @@ -316,8 +339,8 @@ blockListToTexinfo (x:xs) = do Para _ -> do xs' <- blockListToTexinfo xs case xs of - ((CodeBlock _ _):_) -> return $ x' $$ xs' - _ -> return $ x' $+$ xs' + (CodeBlock _ _:_) -> return $ x' $$ xs' + _ -> return $ x' $+$ xs' _ -> do xs' <- blockListToTexinfo xs return $ x' $$ xs' @@ -335,15 +358,17 @@ collectNodes level (x:xs) = _ -> collectNodes level xs -makeMenuLine :: Block - -> State WriterState Doc +makeMenuLine :: PandocMonad m + => Block + -> TI m Doc makeMenuLine (Header _ _ lst) = do txt <- inlineListForNode lst return $ text "* " <> txt <> text "::" -makeMenuLine _ = error "makeMenuLine called with non-Header block" +makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block" -listItemToTexinfo :: [Block] - -> State WriterState Doc +listItemToTexinfo :: PandocMonad m + => [Block] + -> TI m Doc listItemToTexinfo lst = do contents <- blockListToTexinfo lst let spacer = case reverse lst of @@ -351,8 +376,9 @@ listItemToTexinfo lst = do _ -> empty return $ text "@item" $$ contents <> spacer -defListItemToTexinfo :: ([Inline], [[Block]]) - -> State WriterState Doc +defListItemToTexinfo :: PandocMonad m + => ([Inline], [[Block]]) + -> TI m Doc defListItemToTexinfo (term, defs) = do term' <- inlineListToTexinfo term let defToTexinfo bs = do d <- blockListToTexinfo bs @@ -363,13 +389,15 @@ defListItemToTexinfo (term, defs) = do return $ text "@item " <> term' $+$ vcat defs' -- | Convert list of inline elements to Texinfo. -inlineListToTexinfo :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListToTexinfo :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat -- | Convert list of inline elements to Texinfo acceptable for a node name. -inlineListForNode :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc +inlineListForNode :: PandocMonad m + => [Inline] -- ^ Inlines to convert + -> TI m Doc inlineListForNode = return . text . stringToTexinfo . filter (not . disallowedInNode) . stringify @@ -378,8 +406,9 @@ disallowedInNode :: Char -> Bool disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo -inlineToTexinfo :: Inline -- ^ Inline to convert - -> State WriterState Doc +inlineToTexinfo :: PandocMonad m + => Inline -- ^ Inline to convert + -> TI m Doc inlineToTexinfo (Span _ lst) = inlineListToTexinfo lst @@ -408,7 +437,7 @@ inlineToTexinfo (Subscript lst) = do inlineToTexinfo (SmallCaps lst) = inlineListToTexinfo lst >>= return . inCmd "sc" -inlineToTexinfo (Code _ str) = do +inlineToTexinfo (Code _ str) = return $ text $ "@code{" ++ stringToTexinfo str ++ "}" inlineToTexinfo (Quoted SingleQuote lst) = do @@ -423,12 +452,14 @@ inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (RawInline f str) +inlineToTexinfo il@(RawInline f str) | f == "latex" || f == "tex" = return $ text "@tex" $$ text str $$ text "@end tex" | f == "texinfo" = return $ text str - | otherwise = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" <> cr + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToTexinfo LineBreak = return $ text "@*" <> cr inlineToTexinfo SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of @@ -441,10 +472,10 @@ inlineToTexinfo (Link _ txt (src@('#':_), _)) = do contents <- escapeCommas $ inlineListToTexinfo txt return $ text "@ref" <> braces (text (stringToTexinfo src) <> text "," <> contents) -inlineToTexinfo (Link _ txt (src, _)) = do +inlineToTexinfo (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - do return $ text $ "@url{" ++ x ++ "}" + return $ text $ "@url{" ++ x ++ "}" _ -> do contents <- escapeCommas $ inlineListToTexinfo txt let src1 = stringToTexinfo src return $ text ("@uref{" ++ src1 ++ ",") <> contents <> @@ -453,7 +484,7 @@ inlineToTexinfo (Link _ txt (src, _)) = do inlineToTexinfo (Image attr alternate (source, _)) = do content <- escapeCommas $ inlineListToTexinfo alternate opts <- gets stOptions - let showDim dim = case (dimension dim attr) of + let showDim dim = case dimension dim attr of (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in" (Just (Percent _)) -> "" (Just d) -> show d diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index f73876fd2..f46eb43bc 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,17 +30,20 @@ Conversion of 'Pandoc' documents to Textile markup. Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where +import Control.Monad.State.Strict +import Data.Char (isSpace) +import Data.List (intercalate) +import Data.Text (Text, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition +import Text.Pandoc.ImageSize +import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize -import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intercalate ) -import Control.Monad.State -import Data.Char ( isSpace ) +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { stNotes :: [String] -- Footnotes @@ -49,29 +52,34 @@ data WriterState = WriterState { , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } +type TW = StateT WriterState + -- | Convert Pandoc to Textile. -writeTextile :: WriterOptions -> Pandoc -> String +writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTextile opts document = - evalState (pandocToTextile opts document) - WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing, + evalStateT (pandocToTextile opts document) + WriterState { stNotes = [], + stListLevel = [], + stStartNum = Nothing, stUseTags = False } -- | Return Textile representation of document. -pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String +pandocToTextile :: PandocMonad m + => WriterOptions -> Pandoc -> TW m Text pandocToTextile opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (blockListToTextile opts) (inlineListToTextile opts) meta body <- blockListToTextile opts blocks - notes <- liftM (unlines . reverse . stNotes) get - let main = body ++ if null notes then "" else ("\n\n" ++ notes) + notes <- gets $ unlines . reverse . stNotes + let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes) let context = defField "body" main metadata case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context + Nothing -> return main + Just tpl -> renderTemplate' tpl context -withUseTags :: State WriterState a -> State WriterState a +withUseTags :: PandocMonad m => TW m a -> TW m a withUseTags action = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags modify $ \s -> s { stUseTags = True } result <- action modify $ \s -> s { stUseTags = oldUseTags } @@ -101,9 +109,10 @@ escapeStringForTextile :: String -> String escapeStringForTextile = concatMap escapeCharForTextile -- | Convert Pandoc block element to Textile. -blockToTextile :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState String +blockToTextile :: PandocMonad m + => WriterOptions -- ^ Options + -> Block -- ^ Block element + -> TW m String blockToTextile _ Null = return "" @@ -123,8 +132,8 @@ blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do return $ im ++ "\n" ++ capt blockToTextile opts (Para inlines) = do - useTags <- liftM stUseTags get - listLevel <- liftM stListLevel get + useTags <- gets stUseTags + listLevel <- gets stListLevel contents <- inlineListToTextile opts inlines return $ if useTags then "<p>" ++ contents ++ "</p>" @@ -133,9 +142,11 @@ blockToTextile opts (Para inlines) = do blockToTextile opts (LineBlock lns) = blockToTextile opts $ linesToPara lns -blockToTextile _ (RawBlock f str) +blockToTextile _ b@(RawBlock f str) | f == Format "html" || f == Format "textile" = return str - | otherwise = return "" + | otherwise = do + report $ BlockNotRendered b + return "" blockToTextile _ HorizontalRule = return "<hr />\n" @@ -211,7 +222,7 @@ blockToTextile opts (Table capt aligns widths headers rows') = do "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" blockToTextile opts x@(BulletList items) = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -219,13 +230,13 @@ blockToTextile opts x@(BulletList items) = do return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" else do modify $ \s -> s { stListLevel = stListLevel s ++ "*" } - level <- get >>= return . length . stListLevel + level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } return $ vcat contents ++ (if level > 1 then "" else "\n") blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do - oldUseTags <- liftM stUseTags get + oldUseTags <- gets stUseTags let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -237,7 +248,7 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do , stStartNum = if start > 1 then Just start else Nothing } - level <- get >>= return . length . stListLevel + level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s), stStartNum = Nothing } @@ -261,10 +272,11 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet or ordered list item (list of blocks) to Textile. -listItemToTextile :: WriterOptions -> [Block] -> State WriterState String +listItemToTextile :: PandocMonad m + => WriterOptions -> [Block] -> TW m String listItemToTextile opts items = do contents <- blockListToTextile opts items - useTags <- get >>= return . stUseTags + useTags <- gets stUseTags if useTags then return $ "<li>" ++ contents ++ "</li>" else do @@ -277,14 +289,15 @@ listItemToTextile opts items = do Nothing -> return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to Textile. -definitionListItemToTextile :: WriterOptions +definitionListItemToTextile :: PandocMonad m + => WriterOptions -> ([Inline],[[Block]]) - -> State WriterState String + -> TW m String definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label contents <- mapM (blockListToTextile opts) items return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents) + intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -301,16 +314,16 @@ isSimpleListItem :: [Block] -> Bool isSimpleListItem [] = True isSimpleListItem [x] = case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - _ -> False + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + _ -> False isSimpleListItem [x, y] | isPlainOrPara x = case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - _ -> False + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + _ -> False isSimpleListItem _ = False isPlainOrPara :: Block -> Bool @@ -325,18 +338,19 @@ vcat = intercalate "\n" -- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, -- and Textile writers, and should be abstracted out.) -tableRowToTextile :: WriterOptions - -> [String] - -> Int - -> [[Block]] - -> State WriterState String +tableRowToTextile :: PandocMonad m + => WriterOptions + -> [String] + -> Int + -> [[Block]] + -> TW m String tableRowToTextile opts alignStrings rownum cols' = do let celltype = if rownum == 0 then "th" else "td" let rowclass = case rownum of - 0 -> "header" + 0 -> "header" x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith + _ -> "even" + cols'' <- zipWithM (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -348,11 +362,12 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableItemToTextile :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String +tableItemToTextile :: PandocMonad m + => WriterOptions + -> String + -> String + -> [Block] + -> TW m String tableItemToTextile opts celltype align' item = do let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ x ++ "</" ++ celltype ++ ">" @@ -360,19 +375,21 @@ tableItemToTextile opts celltype align' item = do return $ mkcell contents -- | Convert list of Pandoc block elements to Textile. -blockListToTextile :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState String +blockListToTextile :: PandocMonad m + => WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> TW m String blockListToTextile opts blocks = mapM (blockToTextile opts) blocks >>= return . vcat -- | Convert list of Pandoc inline elements to Textile. -inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String +inlineListToTextile :: PandocMonad m + => WriterOptions -> [Inline] -> TW m String inlineListToTextile opts lst = mapM (inlineToTextile opts) lst >>= return . concat -- | Convert Pandoc inline element to Textile. -inlineToTextile :: WriterOptions -> Inline -> State WriterState String +inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String inlineToTextile opts (Span _ lst) = inlineListToTextile opts lst @@ -429,13 +446,15 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str inlineToTextile _ (Math _ str) = return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>" -inlineToTextile opts (RawInline f str) +inlineToTextile opts il@(RawInline f str) | f == Format "html" || f == Format "textile" = return str | (f == Format "latex" || f == Format "tex") && isEnabled Ext_raw_tex opts = return str - | otherwise = return "" + | otherwise = do + report $ InlineNotRendered il + return "" -inlineToTextile _ (LineBreak) = return "\n" +inlineToTextile _ LineBreak = return "\n" inlineToTextile _ SoftBreak = return " " @@ -464,7 +483,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do then "" else "(" ++ unwords cls ++ ")" showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";" - in case (dimension dir attr) of + in case dimension dir attr of Just (Percent a) -> toCss $ show (Percent a) Just dim -> toCss $ showInPixel opts dim ++ "px" Nothing -> Nothing @@ -476,7 +495,7 @@ inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!" inlineToTextile opts (Note contents) = do - curNotes <- liftM stNotes get + curNotes <- gets stNotes let newnum = length curNotes + 1 contents' <- blockListToTextile opts contents let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 423928c8a..dec1f9d4a 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,5 +1,6 @@ {- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2018 John MacFarlane <jgm@berkeley.edu> + 2017-2018 Alex Ivkin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,11 +19,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ZimWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane, 2016 Alex Ivkin + Copyright : Copyright (C) 2008-2018 John MacFarlane, 2017-2018 Alex Ivkin License : GNU GPL, version 2 or above Maintainer : Alex Ivkin <alex@ivkin.net> - Stability : alpha + Stability : beta Portability : portable Conversion of 'Pandoc' documents to ZimWiki markup. @@ -31,48 +32,53 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html -} module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where +import Control.Monad (zipWithM) +import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) +import Data.Default (Default (..)) +import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) +import qualified Data.Map as Map +import Data.Maybe (fromMaybe) +import Data.Text (Text, breakOnAll, pack) +import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr - , substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf ) -import Data.Text ( breakOnAll, pack ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) ---import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) +import Text.Pandoc.Logging +import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) +import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, + substitute, trimr) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared (defField, metaToJSON) data WriterState = WriterState { - stItemNum :: Int, - stIndent :: String -- Indent after the marker at the beginning of list items + stItemNum :: Int, + stIndent :: String, -- Indent after the marker at the beginning of list items + stInTable :: Bool, -- Inside a table + stInLink :: Bool -- Inside a link description } instance Default WriterState where - def = WriterState { stItemNum = 1, stIndent = "" } + def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False } + +type ZW = StateT WriterState -- | Convert Pandoc to ZimWiki. -writeZimWiki :: WriterOptions -> Pandoc -> String -writeZimWiki opts document = evalState (pandocToZimWiki opts document) (WriterState 1 "") +writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def -- | Return ZimWiki representation of document. -pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String +pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text pandocToZimWiki opts (Pandoc meta blocks) = do metadata <- metaToJSON opts (fmap trimr . blockListToZimWiki opts) (inlineListToZimWiki opts) meta - body <- blockListToZimWiki opts blocks + body <- pack <$> blockListToZimWiki opts blocks --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" let main = body let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata + $ defField "toc" (writerTableOfContents opts) metadata case writerTemplate opts of - Just tpl -> return $ renderTemplate' tpl context + Just tpl -> renderTemplate' tpl context Nothing -> return main -- | Escape special characters for ZimWiki. @@ -83,7 +89,7 @@ escapeString = substitute "__" "''__''" . substitute "//" "''//''" -- | Convert Pandoc block element to ZimWiki. -blockToZimWiki :: WriterOptions -> Block -> State WriterState String +blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m String blockToZimWiki _ Null = return "" @@ -107,18 +113,20 @@ blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n" blockToZimWiki opts (Para inlines) = do - indent <- stIndent <$> get - -- useTags <- stUseTags <$> get + indent <- gets stIndent + -- useTags <- gets stUseTags contents <- inlineListToZimWiki opts inlines return $ contents ++ if null indent then "\n" else "" -blockToZimWiki opts (LineBlock lns) = do +blockToZimWiki opts (LineBlock lns) = blockToZimWiki opts $ linesToPara lns -blockToZimWiki opts (RawBlock f str) +blockToZimWiki opts b@(RawBlock f str) | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" + | f == Format "html" = indentFromHTML opts str + | otherwise = do + report $ BlockNotRendered b + return "" blockToZimWiki _ HorizontalRule = return "\n----\n" @@ -128,9 +136,13 @@ blockToZimWiki opts (Header level _ inlines) = do return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" blockToZimWiki _ (CodeBlock (_,classes,_) str) = do + -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using + let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")] + let langmap = Map.fromList langal return $ case classes of - [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- no lang block is a quote block - (x:_) -> "{{{code: lang=\"" ++ x ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec + [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block + (x:_) -> "{{{code: lang=\"" ++ + fromMaybe x (Map.lookup x langmap) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec blockToZimWiki opts (BlockQuote blocks) = do contents <- blockListToZimWiki opts blocks @@ -143,12 +155,12 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do c <- inlineListToZimWiki opts capt return $ "" ++ c ++ "\n" headers' <- if all null headers - then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0) - else zipWithM (tableItemToZimWiki opts) aligns headers + then zipWithM (tableItemToZimWiki opts) aligns (head rows) + else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows let widths = map (maximum . map length) $ transpose (headers':rows') let padTo (width, al) s = - case (width - length s) of + case width - length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault then s ++ replicate x ' ' @@ -157,63 +169,63 @@ blockToZimWiki opts (Table capt aligns _ headers rows) = do else replicate (x `div` 2) ' ' ++ s ++ replicate (x - x `div` 2) ' ' | otherwise -> s - let borderCell (width, al) _ = - if al == AlignLeft - then ":"++ replicate (width-1) '-' - else if al == AlignDefault - then replicate width '-' - else if al == AlignRight - then replicate (width-1) '-' ++ ":" - else ":" ++ replicate (width-2) '-' ++ ":" + let borderCell (width, al) _ + | al == AlignLeft = ":"++ replicate (width-1) '-' + | al == AlignDefault = replicate width '-' + | al == AlignRight = replicate (width-1) '-' ++ ":" + | otherwise = ":" ++ replicate (width-2) '-' ++ ":" let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|" - let renderRow sep cells = sep ++ intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep + let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|" return $ captionDoc ++ - (if null headers' then "" else renderRow "|" headers' ++ "\n") ++ underheader ++ "\n" ++ - unlines (map (renderRow "|") rows') + (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++ + unlines (map renderRow rows') blockToZimWiki opts (BulletList items) = do - indent <- stIndent <$> get + indent <- gets stIndent modify $ \s -> s { stIndent = stIndent s ++ "\t" } - contents <- (mapM (listItemToZimWiki opts) items) + contents <- mapM (listItemToZimWiki opts) items modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } return $ vcat contents ++ if null indent then "\n" else "" blockToZimWiki opts (OrderedList _ items) = do - indent <- stIndent <$> get + indent <- gets stIndent modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 } - contents <- (mapM (orderedListItemToZimWiki opts) items) + contents <- mapM (orderedListItemToZimWiki opts) items modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } return $ vcat contents ++ if null indent then "\n" else "" blockToZimWiki opts (DefinitionList items) = do - contents <- (mapM (definitionListItemToZimWiki opts) items) + contents <- mapM (definitionListItemToZimWiki opts) items return $ vcat contents -definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String +definitionListItemToZimWiki :: PandocMonad m + => WriterOptions + -> ([Inline],[[Block]]) + -> ZW m String definitionListItemToZimWiki opts (label, items) = do labelText <- inlineListToZimWiki opts label contents <- mapM (blockListToZimWiki opts) items - indent <- stIndent <$> get + indent <- gets stIndent return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents -- Auxiliary functions for lists: -indentFromHTML :: WriterOptions -> String -> State WriterState String +indentFromHTML :: PandocMonad m => WriterOptions -> String -> ZW m String indentFromHTML _ str = do - indent <- stIndent <$> get - itemnum <- stItemNum <$> get - if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "." - else if isInfixOf "</li>" str then return "\n" - else if isInfixOf "<li value=" str then do + indent <- gets stIndent + itemnum <- gets stItemNum + if "<li>" `isInfixOf` str then return $ indent ++ show itemnum ++ "." + else if "</li>" `isInfixOf` str then return "\n" + else if "<li value=" `isInfixOf` str then do -- poor man's cut let val = drop 10 $ reverse $ drop 1 $ reverse str --let val = take ((length valls) - 2) valls modify $ \s -> s { stItemNum = read val } return "" - else if isInfixOf "<ol>" str then do + else if "<ol>" `isInfixOf` str then do let olcount=countSubStrs "<ol>" str modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 } return "" - else if isInfixOf "</ol>" str then do + else if "</ol>" `isInfixOf` str then do let olcount=countSubStrs "/<ol>" str modify $ \s -> s{ stIndent = drop olcount (stIndent s) } return "" @@ -230,23 +242,25 @@ vcat :: [String] -> String vcat = intercalate "\n" -- | Convert bullet list item (list of blocks) to ZimWiki. -listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String +listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m String listItemToZimWiki opts items = do contents <- blockListToZimWiki opts items - indent <- stIndent <$> get + indent <- gets stIndent return $ indent ++ "* " ++ contents -- | Convert ordered list item (list of blocks) to ZimWiki. -orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String +orderedListItemToZimWiki :: PandocMonad m + => WriterOptions -> [Block] -> ZW m String orderedListItemToZimWiki opts items = do contents <- blockListToZimWiki opts items - indent <- stIndent <$> get - itemnum <- stItemNum <$> get + indent <- gets stIndent + itemnum <- gets stItemNum --modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering return $ indent ++ show itemnum ++ ". " ++ contents -- Auxiliary functions for tables: -tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String +tableItemToZimWiki :: PandocMonad m + => WriterOptions -> Alignment -> [Block] -> ZW m String tableItemToZimWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " @@ -254,19 +268,24 @@ tableItemToZimWiki opts align' item = do (if align' == AlignLeft || align' == AlignCenter then " " else "") - contents <- blockListToZimWiki opts item -- local (\s -> s { stBackSlashLB = True }) $ + modify $ \s -> s { stInTable = True } + contents <- blockListToZimWiki opts item + modify $ \s -> s { stInTable = False } return $ mkcell contents -- | Convert list of Pandoc block elements to ZimWiki. -blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String +blockListToZimWiki :: PandocMonad m + => WriterOptions -> [Block] -> ZW m String blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks -- | Convert list of Pandoc inline elements to ZimWiki. -inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String -inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst) +inlineListToZimWiki :: PandocMonad m + => WriterOptions -> [Inline] -> ZW m String +inlineListToZimWiki opts lst = concat <$> mapM (inlineToZimWiki opts) lst -- | Convert Pandoc inline element to ZimWiki. -inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String +inlineToZimWiki :: PandocMonad m + => WriterOptions -> Inline -> ZW m String inlineToZimWiki opts (Emph lst) = do contents <- inlineListToZimWiki opts lst @@ -304,7 +323,15 @@ inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''" -inlineToZimWiki _ (Str str) = return $ escapeString str +inlineToZimWiki _ (Str str) = do + inTable <- gets stInTable + inLink <- gets stInLink + if inTable + then return $ substitute "|" "\\|" . escapeString $ str + else + if inLink + then return str + else return $ escapeString str inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped where delim = case mathType of @@ -312,12 +339,18 @@ inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note InlineMath -> "$" -- | f == Format "html" = return $ "<html>" ++ str ++ "</html>" -inlineToZimWiki opts (RawInline f str) +inlineToZimWiki opts il@(RawInline f str) | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" + | f == Format "html" = indentFromHTML opts str + | otherwise = do + report $ InlineNotRendered il + return "" -inlineToZimWiki _ (LineBreak) = return "\n" -- was \\\\ +inlineToZimWiki _ LineBreak = do + inTable <- gets stInTable + if inTable + then return "\\n" + else return "\n" inlineToZimWiki opts SoftBreak = case writerWrapText opts of @@ -328,37 +361,45 @@ inlineToZimWiki opts SoftBreak = inlineToZimWiki _ Space = return " " inlineToZimWiki opts (Link _ txt (src, _)) = do - label <- inlineListToZimWiki opts txt + inTable <- gets stInTable + modify $ \s -> s { stInLink = True } + label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it + modify $ \s -> s { stInLink = False } + let label'= if inTable + then "" -- no label is allowed in a table + else "|"++label case txt of [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ "|" ++ label ++ "]]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + then return $ "[[" ++ src ++ label' ++ "]]" + else return $ "[[" ++ src' ++ label' ++ "]]" where src' = case src of '/':xs -> xs -- with leading / it's a _ -> src -- link to a help page inlineToZimWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToZimWiki opts alt - let txt = case (tit, alt) of - ("", []) -> "" - ("", _ ) -> "|" ++ alt' - (_ , _ ) -> "|" ++ tit + inTable <- gets stInTable + let txt = case (tit, alt, inTable) of + ("",[], _) -> "" + ("", _, False ) -> "|" ++ alt' + (_ , _, False ) -> "|" ++ tit + (_ , _, True ) -> "" -- Relative links fail isURI and receive a colon prefix = if isURI source then "" else ":" return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}" inlineToZimWiki opts (Note contents) = do + -- no concept of notes in zim wiki, use a text block contents' <- blockListToZimWiki opts contents - return $ "((" ++ contents' ++ "))" - -- note - may not work for notes with multiple blocks + return $ " **{Note:** " ++ trimr contents' ++ "**}**" imageDims :: WriterOptions -> Attr -> String imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim + checkPct maybeDim = maybeDim go (Just w) Nothing = "?" ++ w go (Just w) (Just h) = "?" ++ w ++ "x" ++ h go Nothing (Just h) = "?0x" ++ h diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index e105aee91..62874f0b9 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -36,18 +36,20 @@ module Text.Pandoc.XML ( escapeCharForXML, toEntities, fromEntities ) where -import Text.Pandoc.Pretty -import Data.Char (ord, isAscii, isSpace) +import Data.Char (isAscii, isSpace, ord) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Pretty -- | Escape one character as needed for XML. escapeCharForXML :: Char -> String escapeCharForXML x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - c -> [c] + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + c -> [c] -- | Escape string as needed for XML. Entity references are not preserved. escapeStringForXML :: String -> String @@ -91,11 +93,10 @@ inTagsIndented :: String -> Doc -> Doc inTagsIndented tagType = inTags True tagType [] -- | Escape all non-ascii characters using numerical entities. -toEntities :: String -> String -toEntities [] = "" -toEntities (c:cs) - | isAscii c = c : toEntities cs - | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs +toEntities :: Text -> Text +toEntities = T.concatMap go + where go c | isAscii c = T.singleton c + | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") -- Unescapes XML entities fromEntities :: String -> String @@ -108,8 +109,8 @@ fromEntities ('&':xs) = (zs, ys) -> (zs,ys) ent' = case ent of '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" + '#':_ -> ent + _ -> ent ++ ";" fromEntities (x:xs) = x : fromEntities xs fromEntities [] = [] |