diff options
-rw-r--r-- | pandoc.cabal | 11 | ||||
-rw-r--r-- | pandoc.hs | 858 | ||||
-rw-r--r-- | src/Text/Pandoc/App.hs | 720 | ||||
-rw-r--r-- | src/Text/Pandoc/Highlighting.hs | 14 |
4 files changed, 826 insertions, 777 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 9ed1eb47d..bb95c5453 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -297,6 +297,10 @@ Library cmark >= 0.5 && < 0.6, doctemplates >= 0.1 && < 0.2, free >= 4 + if os(windows) + Cpp-options: -D_WINDOWS + else + Build-Depends: unix >= 2.4 && < 2.8 if flag(old-locale) Build-Depends: old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5 @@ -330,6 +334,7 @@ Library Other-Modules: Prelude Exposed-Modules: Text.Pandoc, + Text.Pandoc.App, Text.Pandoc.Options, Text.Pandoc.Extensions, Text.Pandoc.Pretty, @@ -386,6 +391,7 @@ Library Text.Pandoc.Templates, Text.Pandoc.XML, Text.Pandoc.SelfContained, + Text.Pandoc.Highlighting, Text.Pandoc.Process, Text.Pandoc.MIME, Text.Pandoc.Class @@ -421,7 +427,6 @@ Library Text.Pandoc.UUID, Text.Pandoc.ImageSize, Text.Pandoc.Slides, - Text.Pandoc.Highlighting, Text.Pandoc.Compat.Time, Paths_pandoc @@ -448,10 +453,6 @@ Executable pandoc Build-Depends: network >= 2 && < 2.6 Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -fprof-auto-exported -rtsopts -with-rtsopts=-K16m - if os(windows) - Cpp-options: -D_WINDOWS - else - Build-Depends: unix >= 2.4 && < 2.8 Default-Language: Haskell98 Other-Extensions: PatternGuards, OverloadedStrings, @@ -30,51 +30,31 @@ Parses command-line options and calls the appropriate readers and writers. -} module Main where +import Text.Pandoc.App (defaultOpts, convertWithOpts, Opt(..)) import Text.Pandoc -import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Walk (walk) -import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, - safeRead, headerShift, err, openURL ) -import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) -import Text.Pandoc.XML ( toEntities ) -import Text.Pandoc.SelfContained ( makeSelfContained ) -import Text.Pandoc.Process (pipeProcess) -import Skylighting ( defaultSyntaxMap, Syntax(..), Style, tango, pygments, - espresso, zenburn, kate, haddock, breezeDark, monochrome ) -import System.Environment ( getArgs, getProgName, getEnvironment ) -import System.Exit ( ExitCode (..), exitSuccess ) -import System.FilePath +import Text.Pandoc.Shared +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Highlighting (highlightingStyles) +import Skylighting (Syntax(..), defaultSyntaxMap) +import Data.List (sort, intercalate) +import Data.Char (toUpper) import System.Console.GetOpt -import Data.Char ( toLower, toUpper ) -import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) -import System.Directory ( getAppUserDataDirectory, findExecutable, - doesFileExist, Permissions(..), getPermissions ) -import System.IO ( stdout, stderr ) -import System.IO.Error ( isDoesNotExistError ) -import qualified Control.Exception as E -import Control.Exception.Extensible ( throwIO ) +import Control.Monad import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad (when, unless, (>=>)) -import Data.Maybe (fromMaybe, isNothing, isJust) -import Data.Foldable (foldrM) -import Network.URI (parseURI, isURI, URI(..)) -import qualified Data.ByteString.Lazy as B +import System.Environment +import System.FilePath (takeBaseName) +import Data.Maybe (fromMaybe) +import Text.Pandoc.Shared (err) +import qualified Data.Text as T +import qualified Control.Exception as E import qualified Data.ByteString as BS -import Data.Aeson (eitherDecode', encode) import qualified Data.Map as M -import Data.Yaml (decode) -import qualified Data.Yaml as Yaml -import qualified Data.Text as T +import System.Exit +import System.IO (stdout) import Control.Applicative ((<|>)) +import System.Directory (getAppUserDataDirectory) import Paths_pandoc (getDataDir) import Text.Printf (printf) -#ifndef _WINDOWS -import System.Posix.Terminal (queryTerminal) -import System.Posix.IO (stdOutput) -#endif -import Control.Monad.Trans -import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity) main :: IO () main = do @@ -95,549 +75,6 @@ main = do opts <- foldl (>>=) (return defaultOpts) actions convertWithOpts opts args -addDeprecationNote :: String -> [String] -> [String] -addDeprecationNote "--smart" = - (("--smart has been removed. Use +smart or -smart extension instead.\n" ++ - "For example: pandoc -f markdown+smart -t markdown-smart.") :) -addDeprecationNote "-S" = addDeprecationNote "--smart" -addDeprecationNote "--old-dashes" = - ("--old-dashes has been removed. Use +old_dashes extension instead." :) -addDeprecationNote "--no-wrap" = - ("--no-wrap has been removed. Use --wrap=none instead." :) -addDeprecationNote "--chapters" = - ("--chapters has been removed. Use --top-level-division=chapter instead." :) -addDeprecationNote "--reference-docx" = - ("--reference-docx has been removed. Use --reference-doc instead." :) -addDeprecationNote "--reference-odt" = - ("--reference-odt has been removed. Use --reference-doc instead." :) -addDeprecationNote x = - (("Unknown option " ++ x ++ ".") :) - -convertWithOpts :: Opt -> [FilePath] -> IO () -convertWithOpts opts args = do - let outputFile = optOutputFile opts - let filters = optFilters opts - let verbosity = optVerbosity opts - - when (optDumpArgs opts) $ - do UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) args - exitSuccess - - epubStylesheet <- case optEpubStylesheet opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - epubMetadata <- case optEpubMetadata opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" - let mathMethod = - case (optKaTeXJS opts, optKaTeXStylesheet opts) of - (Nothing, _) -> optHTMLMathMethod opts - (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) - - - -- --bibliography implies -F pandoc-citeproc for backwards compatibility: - let needsCiteproc = isJust (M.lookup "bibliography" (optMetadata opts)) && - optCiteMethod opts `notElem` [Natbib, Biblatex] && - "pandoc-citeproc" `notElem` map takeBaseName filters - let filters' = if needsCiteproc then "pandoc-citeproc" : filters - else filters - - let sources = case args of - [] -> ["-"] - xs | optIgnoreArgs opts -> ["-"] - | otherwise -> xs - - datadir <- case optDataDir opts of - Nothing -> E.catch - (Just <$> getAppUserDataDirectory "pandoc") - (\e -> let _ = (e :: E.SomeException) - in return Nothing) - Just _ -> return $ optDataDir opts - - -- assign reader and writer based on options and filenames - let readerName = case optReader opts of - Nothing -> defaultReaderName - (if any isURI sources - then "html" - else "markdown") sources - Just x -> map toLower x - - let writerName = case optWriter opts of - Nothing -> defaultWriterName outputFile - Just x -> map toLower x - let format = takeWhile (`notElem` ['+','-']) - $ takeFileName writerName -- in case path to lua script - - let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - - let laTeXOutput = format `elem` ["latex", "beamer"] - let conTeXtOutput = format == "context" - let html5Output = format == "html5" || format == "html" - - -- disabling the custom writer for now - writer <- if ".lua" `isSuffixOf` format - -- note: use non-lowercased version writerName - then error "custom writers disabled for now" - else case getWriter writerName of - Left e -> err 9 $ - if format == "pdf" - then e ++ - "\nTo create a pdf with pandoc, use " ++ - "the latex or beamer writer and specify\n" ++ - "an output file with .pdf extension " ++ - "(pandoc -t latex -o filename.pdf)." - else e - Right w -> return (w :: Writer PandocIO) - - -- TODO: we have to get the input and the output into the state for - -- the sake of the text2tags reader. - reader <- case getReader readerName of - Right r -> return (r :: Reader PandocIO) - Left e -> err 7 e' - where e' = case readerName of - "pdf" -> e ++ - "\nPandoc can convert to PDF, but not from PDF." - "doc" -> e ++ - "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." - _ -> e - - let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput - - templ <- case optTemplate opts of - _ | not standalone -> return Nothing - Nothing -> do - deftemp <- getDefaultTemplate datadir format - case deftemp of - Left e -> throwIO e - Right t -> return (Just t) - Just tp -> do - -- strip off extensions - let tp' = case takeExtension tp of - "" -> tp <.> format - _ -> tp - Just <$> E.catch (UTF8.readFile tp') - (\e -> if isDoesNotExistError e - then E.catch - (readDataFileUTF8 datadir - ("templates" </> tp')) - (\e' -> let _ = (e' :: E.SomeException) - in throwIO e') - else throwIO e) - - let addStringAsVariable varname s vars = return $ (varname, s) : vars - - let addContentsAsVariable varname fp vars = do - s <- UTF8.readFile fp - return $ (varname, s) : vars - - -- note: this reverses the list constructed in option parsing, - -- which in turn was reversed from the command-line order, - -- so we end up with the correct order in the variable list: - let withList _ [] vars = return vars - withList f (x:xs) vars = f x vars >>= withList f xs - - variables <- return (optVariables opts) - >>= - withList (addContentsAsVariable "include-before") - (optIncludeBeforeBody opts) - >>= - withList (addContentsAsVariable "include-after") - (optIncludeAfterBody opts) - >>= - withList (addContentsAsVariable "header-includes") - (optIncludeInHeader opts) - >>= - withList (addStringAsVariable "css") (optCss opts) - >>= - maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) - >>= - maybe return (addStringAsVariable "epub-cover-image") - (optEpubCoverImage opts) - >>= - (\vars -> case mathMethod of - LaTeXMathML Nothing -> do - s <- readDataFileUTF8 datadir "LaTeXMathML.js" - return $ ("mathml-script", s) : vars - _ -> return vars) - >>= - (\vars -> if format == "dzslides" - then do - dztempl <- readDataFileUTF8 datadir - ("dzslides" </> "template.html") - let dzline = "<!-- {{{{ dzslides core" - let dzcore = unlines - $ dropWhile (not . (dzline `isPrefixOf`)) - $ lines dztempl - return $ ("dzslides-core", dzcore) : vars - else return vars) - - let sourceURL = case sources of - [] -> Nothing - (x:_) -> case parseURI x of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriQuery = "", - uriFragment = "" } - _ -> Nothing - - let readerOpts = def{ readerStandalone = standalone - , readerParseRaw = optParseRaw opts - , readerColumns = optColumns opts - , readerTabStop = optTabStop opts - , readerIndentedCodeClasses = optIndentedCodeClasses opts - , readerApplyMacros = not laTeXOutput - , readerDefaultImageExtension = - optDefaultImageExtension opts - , readerTrackChanges = optTrackChanges opts - } - - highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts - - let writerOptions = def { writerTemplate = templ, - writerVariables = variables, - writerTabStop = optTabStop opts, - writerTableOfContents = optTableOfContents opts, - writerHTMLMathMethod = mathMethod, - writerIncremental = optIncremental opts, - writerCiteMethod = optCiteMethod opts, - writerNumberSections = optNumberSections opts, - writerNumberOffset = optNumberOffset opts, - writerSectionDivs = optSectionDivs opts, - writerReferenceLinks = optReferenceLinks opts, - writerReferenceLocation = optReferenceLocation opts, - writerDpi = optDpi opts, - writerWrapText = optWrapText opts, - writerColumns = optColumns opts, - writerEmailObfuscation = optEmailObfuscation opts, - writerIdentifierPrefix = optIdentifierPrefix opts, - writerSourceURL = sourceURL, - writerUserDataDir = datadir, - writerHtmlQTags = optHtmlQTags opts, - writerTopLevelDivision = optTopLevelDivision opts, - writerListings = optListings opts, - writerSlideLevel = optSlideLevel opts, - writerHighlightStyle = highlightStyle, - writerSetextHeaders = optSetextHeaders opts, - writerEpubMetadata = epubMetadata, - writerEpubStylesheet = epubStylesheet, - writerEpubFonts = optEpubFonts opts, - writerEpubChapterLevel = optEpubChapterLevel opts, - writerTOCDepth = optTOCDepth opts, - writerReferenceDoc = optReferenceDoc opts, - writerLaTeXArgs = optLaTeXEngineArgs opts - } - - -#ifdef _WINDOWS - let istty = True -#else - istty <- queryTerminal stdOutput -#endif - when (istty && not (isTextFormat format) && outputFile == "-") $ - err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++ - "Specify an output file using the -o option." - - - let transforms = case optBaseHeaderLevel opts of - x | x > 1 -> [headerShift (x - 1)] - | otherwise -> [] - - let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" - then 0 - else optTabStop opts) - - readSources :: MonadIO m => [FilePath] -> m String - readSources srcs = convertTabs . intercalate "\n" <$> - mapM readSource srcs - - let runIO' :: PandocIO a -> IO a - runIO' f = do - (res, reports) <- runIOorExplode $ do - setVerbosity verbosity - x <- f - rs <- getLog - return (x, rs) - let isWarning (WARNING, _) = True - isWarning _ = False - when (optFailIfWarnings opts && any isWarning reports) $ - err 3 "Failing because there were warnings." - return res - - let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) - sourceToDoc sources' = - case reader of - StringReader r - | optFileScope opts || readerName == "json" -> do - pairs <- mapM - (readSource >=> withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) - | otherwise -> - readSources sources' >>= withMediaBag . r readerOpts - ByteStringReader r -> do - pairs <- mapM (readFile' >=> - withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) - - runIO' $ do - (doc, media) <- sourceToDoc sources - doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> - adjustMetadata (optMetadata opts) >=> - applyTransforms transforms >=> - applyFilters datadir filters' [format]) doc - - case writer of - -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile - ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile - StringWriter f - | pdfOutput -> do - -- make sure writer is latex or beamer or context or html5 - unless (laTeXOutput || conTeXtOutput || html5Output) $ - err 47 $ "cannot produce pdf output with " ++ format ++ - " writer" - - let pdfprog = case () of - _ | conTeXtOutput -> "context" - _ | html5Output -> "wkhtmltopdf" - _ -> optLaTeXEngine opts - -- check for pdf creating program - mbPdfProg <- liftIO $ findExecutable pdfprog - when (isNothing mbPdfProg) $ - err 41 $ pdfprog ++ " not found. " ++ - pdfprog ++ " is needed for pdf output." - - res <- makePDF pdfprog f writerOptions verbosity media doc' - case res of - Right pdf -> writeFnBinary outputFile pdf - Left err' -> liftIO $ do - B.hPutStr stderr err' - B.hPut stderr $ B.pack [10] - err 43 "Error producing PDF" - | otherwise -> do - let htmlFormat = format `elem` - ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] - selfcontain = if optSelfContained opts && htmlFormat - then makeSelfContained writerOptions media - else return - handleEntities = if htmlFormat && optAscii opts - then toEntities - else id - output <- f writerOptions doc' - selfcontain (output ++ ['\n' | not standalone]) >>= - writerFn outputFile . handleEntities - -type Transform = Pandoc -> Pandoc - -copyrightMessage :: String -copyrightMessage = intercalate "\n" [ - "", - "Copyright (C) 2006-2016 John MacFarlane", - "Web: http://pandoc.org", - "This is free software; see the source for copying conditions.", - "There is no warranty, not even for merchantability or fitness", - "for a particular purpose." ] - -compileInfo :: String -compileInfo = - "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ - VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting - --- | Converts a list of strings into a single string with the items printed as --- comma separated words in lines with a maximum line length. -wrapWords :: Int -> Int -> [String] -> String -wrapWords indent c = wrap' (c - indent) (c - indent) - where - wrap' _ _ [] = "" - wrap' cols remaining (x:xs) - | remaining == cols = - x ++ wrap' cols (remaining - length x) xs - | (length x + 1) > remaining = - ",\n" ++ replicate indent ' ' ++ x ++ - wrap' cols (cols - length x) xs - | otherwise = - ", " ++ x ++ - wrap' cols (remaining - length x - 2) xs - -isTextFormat :: String -> Bool -isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] - -externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc -externalFilter f args' d = liftIO $ do - exists <- doesFileExist f - isExecutable <- if exists - then executable <$> getPermissions f - else return True - let (f', args'') = if exists - then case map toLower (takeExtension f) of - _ | isExecutable -> ("." </> f, args') - ".py" -> ("python", f:args') - ".hs" -> ("runhaskell", f:args') - ".pl" -> ("perl", f:args') - ".rb" -> ("ruby", f:args') - ".php" -> ("php", f:args') - ".js" -> ("node", f:args') - _ -> (f, args') - else (f, args') - unless (exists && isExecutable) $ do - mbExe <- findExecutable f' - when (isNothing mbExe) $ - err 83 $ "Error running filter " ++ f ++ ":\n" ++ - "Could not find executable '" ++ f' ++ "'." - env <- getEnvironment - let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env - (exitcode, outbs) <- E.handle filterException $ - pipeProcess env' f' args'' $ encode d - case exitcode of - ExitSuccess -> return $ either error id $ eitherDecode' outbs - ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ - "Filter returned error status " ++ show ec - where filterException :: E.SomeException -> IO a - filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ - show e - -highlightingStyles :: [(String, Style)] -highlightingStyles = - [("pygments", pygments), - ("tango", tango), - ("espresso", espresso), - ("zenburn", zenburn), - ("kate", kate), - ("monochrome", monochrome), - ("breezedark", breezeDark), - ("haddock", haddock)] - --- | Data structure for command line options. -data Opt = Opt - { optTabStop :: Int -- ^ Number of spaces per tab - , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces - , optStandalone :: Bool -- ^ Include header, footer - , optReader :: Maybe String -- ^ Reader format - , optWriter :: Maybe String -- ^ Writer format - , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX - , optTableOfContents :: Bool -- ^ Include table of contents - , optBaseHeaderLevel :: Int -- ^ Base header level - , optTemplate :: Maybe FilePath -- ^ Custom template - , optVariables :: [(String,String)] -- ^ Template variables to set - , optMetadata :: M.Map String MetaValue -- ^ Metadata fields to set - , optOutputFile :: FilePath -- ^ Name of output file - , optNumberSections :: Bool -- ^ Number sections in LaTeX - , optNumberOffset :: [Int] -- ^ Starting number for sections - , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 - , optSelfContained :: Bool -- ^ Make HTML accessible offline - , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML - , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code - , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions - , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math - , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc - , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet - , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata - , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed - , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters - , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub - , optTOCDepth :: Int -- ^ Number of levels to include in TOC - , optDumpArgs :: Bool -- ^ Output command-line arguments - , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output - , optFailIfWarnings :: Bool -- ^ Fail on warnings - , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output - , optDpi :: Int -- ^ Dpi - , optWrapText :: WrapOption -- ^ Options for wrapping text - , optColumns :: Int -- ^ Line length in characters - , optFilters :: [FilePath] -- ^ Filters to apply - , optEmailObfuscation :: ObfuscationMethod - , optIdentifierPrefix :: String - , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks - , optDataDir :: Maybe FilePath - , optCiteMethod :: CiteMethod -- ^ Method to output cites - , optListings :: Bool -- ^ Use listings package for code blocks - , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf - , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine - , optSlideLevel :: Maybe Int -- ^ Header level that creates slides - , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 - , optAscii :: Bool -- ^ Use ascii characters only in html - , optDefaultImageExtension :: String -- ^ Default image extension - , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media - , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. - , optFileScope :: Bool -- ^ Parse input files before combining - , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX - , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX - , optTitlePrefix :: Maybe String -- ^ Prefix for title - , optCss :: [FilePath] -- ^ CSS files to link to - , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before - , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body - , optIncludeInHeader :: [FilePath] -- ^ Files to include in header - } - --- | Defaults for command-line options. -defaultOpts :: Opt -defaultOpts = Opt - { optTabStop = 4 - , optPreserveTabs = False - , optStandalone = False - , optReader = Nothing - , optWriter = Nothing - , optParseRaw = False - , optTableOfContents = False - , optBaseHeaderLevel = 1 - , optTemplate = Nothing - , optVariables = [] - , optMetadata = M.empty - , optOutputFile = "-" -- "-" means stdout - , optNumberSections = False - , optNumberOffset = [0,0,0,0,0,0] - , optSectionDivs = False - , optIncremental = False - , optSelfContained = False - , optHtmlQTags = False - , optHighlightStyle = Just "pygments" - , optTopLevelDivision = TopLevelDefault - , optHTMLMathMethod = PlainMath - , optReferenceDoc = Nothing - , optEpubStylesheet = Nothing - , optEpubMetadata = Nothing - , optEpubFonts = [] - , optEpubChapterLevel = 1 - , optEpubCoverImage = Nothing - , optTOCDepth = 3 - , optDumpArgs = False - , optIgnoreArgs = False - , optVerbosity = WARNING - , optFailIfWarnings = False - , optReferenceLinks = False - , optReferenceLocation = EndOfDocument - , optDpi = 96 - , optWrapText = WrapAuto - , optColumns = 72 - , optFilters = [] - , optEmailObfuscation = NoObfuscation - , optIdentifierPrefix = "" - , optIndentedCodeClasses = [] - , optDataDir = Nothing - , optCiteMethod = Citeproc - , optListings = False - , optLaTeXEngine = "pdflatex" - , optLaTeXEngineArgs = [] - , optSlideLevel = Nothing - , optSetextHeaders = True - , optAscii = False - , optDefaultImageExtension = "" - , optExtractMedia = Nothing - , optTrackChanges = AcceptChanges - , optFileScope = False - , optKaTeXStylesheet = Nothing - , optKaTeXJS = Nothing - , optTitlePrefix = Nothing - , optCss = [] - , optIncludeBeforeBody = [] - , optIncludeAfterBody = [] - , optIncludeInHeader = [] - } - -- | A list of functions, each transforming the options data structure -- in response to a command-line option. options :: [OptDescr (Opt -> IO Opt)] @@ -751,21 +188,16 @@ options = , Option "M" ["metadata"] (ReqArg (\arg opt -> do - let (key,val) = case break (`elem` ":=") arg of - (k,_:v) -> (k, readMetaValue v) - (k,_) -> (k, MetaBool True) - return opt{ optMetadata = addMetadata key val - $ optMetadata opt }) + let (key, val) = splitField arg + return opt{ optMetadata = (key, val) : optMetadata opt }) "KEY[:VALUE]") "" , Option "V" ["variable"] (ReqArg (\arg opt -> do - let (key,val) = case break (`elem` ":=") arg of - (k,_:v) -> (k,v) - (k,_) -> (k,"true") - return opt{ optVariables = (key,val) : optVariables opt }) + let (key, val) = splitField arg + return opt{ optVariables = (key, val) : optVariables opt }) "KEY[:VALUE]") "" @@ -1066,29 +498,24 @@ options = , Option "" ["bibliography"] (ReqArg - (\arg opt -> return opt{ optMetadata = addMetadata - "bibliography" (readMetaValue arg) - $ optMetadata opt - }) + (\arg opt -> return opt{ optMetadata = + ("bibliography", arg) : optMetadata opt }) "FILE") "" , Option "" ["csl"] (ReqArg (\arg opt -> - return opt{ optMetadata = addMetadata "csl" - (readMetaValue arg) - $ optMetadata opt }) + return opt{ optMetadata = + ("csl", arg) : optMetadata opt }) "FILE") "" , Option "" ["citation-abbreviations"] (ReqArg (\arg opt -> - return opt{ optMetadata = addMetadata - "citation-abbreviations" - (readMetaValue arg) - $ optMetadata opt }) + return opt{ optMetadata = + ("citation-abbreviations", arg): optMetadata opt }) "FILE") "" @@ -1207,8 +634,8 @@ options = map ("--" ++) longs let allopts = unwords (concatMap optnames options) UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords (map fst (readers :: [(String, Reader PandocIO)]))) - (unwords (map fst (writers :: [(String, Writer PandocIO)]))) + (unwords readers'names) + (unwords writers'names) (unwords $ map fst highlightingStyles) ddir exitSuccess )) @@ -1217,7 +644,6 @@ options = , Option "" ["list-input-formats"] (NoArg (\_ -> do - let readers'names = sort (map fst (readers :: [(String, Reader PandocIO)])) mapM_ (UTF8.hPutStrLn stdout) readers'names exitSuccess )) "" @@ -1225,7 +651,6 @@ options = , Option "" ["list-output-formats"] (NoArg (\_ -> do - let writers'names = sort (map fst (writers :: [(String, Writer PandocIO)])) mapM_ (UTF8.hPutStrLn stdout) writers'names exitSuccess )) "" @@ -1286,179 +711,70 @@ options = ] -addMetadata :: String -> MetaValue -> M.Map String MetaValue - -> M.Map String MetaValue -addMetadata k v m = case M.lookup k m of - Nothing -> M.insert k v m - Just (MetaList xs) -> M.insert k - (MetaList (xs ++ [v])) m - Just x -> M.insert k (MetaList [v, x]) m - -readMetaValue :: String -> MetaValue -readMetaValue s = case decode (UTF8.fromString s) of - Just (Yaml.String t) -> MetaString $ T.unpack t - Just (Yaml.Bool b) -> MetaBool b - _ -> MetaString s - -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]") --- Determine default reader based on source file extensions -defaultReaderName :: String -> [FilePath] -> String -defaultReaderName fallback [] = fallback -defaultReaderName fallback (x:xs) = - case takeExtension (map toLower x) of - ".xhtml" -> "html" - ".html" -> "html" - ".htm" -> "html" - ".md" -> "markdown" - ".markdown" -> "markdown" - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".rst" -> "rst" - ".org" -> "org" - ".lhs" -> "markdown+lhs" - ".db" -> "docbook" - ".opml" -> "opml" - ".wiki" -> "mediawiki" - ".dokuwiki" -> "dokuwiki" - ".textile" -> "textile" - ".native" -> "native" - ".json" -> "json" - ".docx" -> "docx" - ".t2t" -> "t2t" - ".epub" -> "epub" - ".odt" -> "odt" - ".pdf" -> "pdf" -- so we get an "unknown reader" error - ".doc" -> "doc" -- so we get an "unknown reader" error - _ -> defaultReaderName fallback xs - --- Returns True if extension of first source is .lhs -lhsExtension :: [FilePath] -> Bool -lhsExtension (x:_) = takeExtension x == ".lhs" -lhsExtension _ = False - --- Determine default writer based on output file extension -defaultWriterName :: FilePath -> String -defaultWriterName "-" = "html" -- no output file -defaultWriterName x = - case takeExtension (map toLower x) of - "" -> "markdown" -- empty extension - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".context" -> "context" - ".ctx" -> "context" - ".rtf" -> "rtf" - ".rst" -> "rst" - ".s5" -> "s5" - ".native" -> "native" - ".json" -> "json" - ".txt" -> "markdown" - ".text" -> "markdown" - ".md" -> "markdown" - ".markdown" -> "markdown" - ".textile" -> "textile" - ".lhs" -> "markdown+lhs" - ".texi" -> "texinfo" - ".texinfo" -> "texinfo" - ".db" -> "docbook" - ".odt" -> "odt" - ".docx" -> "docx" - ".epub" -> "epub" - ".org" -> "org" - ".asciidoc" -> "asciidoc" - ".adoc" -> "asciidoc" - ".pdf" -> "latex" - ".fb2" -> "fb2" - ".opml" -> "opml" - ".icml" -> "icml" - ".tei.xml" -> "tei" - ".tei" -> "tei" - ['.',y] | y `elem` ['1'..'9'] -> "man" - _ -> "html" - --- Transformations of a Pandoc document post-parsing: - -extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc -extractMedia media dir d = - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - extractMediaBag True dir media - return $ walk (adjustImagePath dir fps) d - -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) -adjustImagePath _ _ x = x - -adjustMetadata :: Monad m => M.Map String MetaValue -> Pandoc -> m Pandoc -adjustMetadata metadata d = return $ M.foldWithKey setMeta d metadata - -applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc -applyTransforms transforms d = return $ foldr ($) d transforms - - -- First we check to see if a filter is found. If not, and if it's - -- not an absolute path, we check to see whether it's in `userdir/filters`. - -- If not, we leave it unchanged. -expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath -expandFilterPath mbDatadir fp = liftIO $ do - fpExists <- doesFileExist fp - if fpExists - then return fp - else case mbDatadir of - Just datadir | isRelative fp -> do - let filterPath = (datadir </> "filters" </> fp) - filterPathExists <- doesFileExist filterPath - if filterPathExists - then return filterPath - else return fp - _ -> return fp - -applyFilters :: MonadIO m - => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc -applyFilters mbDatadir filters args d = do - expandedFilters <- mapM (expandFilterPath mbDatadir) filters - foldrM ($) d $ map (flip externalFilter args) expandedFilters +copyrightMessage :: String +copyrightMessage = intercalate "\n" [ + "", + "Copyright (C) 2006-2016 John MacFarlane", + "Web: http://pandoc.org", + "This is free software; see the source for copying conditions.", + "There is no warranty, not even for merchantability or fitness", + "for a particular purpose." ] + +compileInfo :: String +compileInfo = + "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ + VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting + +-- | Converts a list of strings into a single string with the items printed as +-- comma separated words in lines with a maximum line length. +wrapWords :: Int -> Int -> [String] -> String +wrapWords indent c = wrap' (c - indent) (c - indent) + where + wrap' _ _ [] = "" + wrap' cols remaining (x:xs) + | remaining == cols = + x ++ wrap' cols (remaining - length x) xs + | (length x + 1) > remaining = + ",\n" ++ replicate indent ' ' ++ x ++ + wrap' cols (cols - length x) xs + | otherwise = + ", " ++ x ++ + wrap' cols (remaining - length x - 2) xs + +addDeprecationNote :: String -> [String] -> [String] +addDeprecationNote "--smart" = + (("--smart has been removed. Use +smart or -smart extension instead.\n" ++ + "For example: pandoc -f markdown+smart -t markdown-smart.") :) +addDeprecationNote "-S" = addDeprecationNote "--smart" +addDeprecationNote "--old-dashes" = + ("--old-dashes has been removed. Use +old_dashes extension instead." :) +addDeprecationNote "--no-wrap" = + ("--no-wrap has been removed. Use --wrap=none instead." :) +addDeprecationNote "--chapters" = + ("--chapters has been removed. Use --top-level-division=chapter instead." :) +addDeprecationNote "--reference-docx" = + ("--reference-docx has been removed. Use --reference-doc instead." :) +addDeprecationNote "--reference-odt" = + ("--reference-odt has been removed. Use --reference-doc instead." :) +addDeprecationNote x = + (("Unknown option " ++ x ++ ".") :) uppercaseFirstLetter :: String -> String uppercaseFirstLetter (c:cs) = toUpper c : cs uppercaseFirstLetter [] = [] -readSource :: MonadIO m => FilePath -> m String -readSource "-" = liftIO UTF8.getContents -readSource src = case parseURI src of - Just u | uriScheme u `elem` ["http:","https:"] -> - readURI src - | uriScheme u == "file:" -> - liftIO $ UTF8.readFile (uriPath u) - _ -> liftIO $ UTF8.readFile src - -readURI :: MonadIO m => FilePath -> m String -readURI src = do - res <- liftIO $ openURL src - case res of - Left e -> liftIO $ throwIO e - Right (bs,_) -> return $ UTF8.toString bs - -readFile' :: MonadIO m => FilePath -> m B.ByteString -readFile' "-" = liftIO $ B.getContents -readFile' f = liftIO $ B.readFile f - -writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () -writeFnBinary "-" = liftIO . B.putStr -writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) - -writerFn :: MonadIO m => FilePath -> String -> m () -writerFn "-" = liftIO . UTF8.putStr -writerFn f = liftIO . UTF8.writeFile f - -lookupHighlightStyle :: Maybe String -> IO (Maybe Style) -lookupHighlightStyle Nothing = return Nothing -lookupHighlightStyle (Just s) = - case lookup (map toLower s) highlightingStyles of - Just sty -> return (Just sty) - Nothing -> err 68 $ "Unknown highlight-style " ++ s +readers'names :: [String] +readers'names = sort (map fst (readers :: [(String, Reader PandocIO)])) + +writers'names :: [String] +writers'names = sort (map fst (writers :: [(String, Writer PandocIO)])) + +splitField :: String -> (String, String) +splitField s = + case break (`elem` ":=") s of + (k,_:v) -> (k,v) + (k,[]) -> (k,"true") diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs new file mode 100644 index 000000000..e51a45395 --- /dev/null +++ b/src/Text/Pandoc/App.hs @@ -0,0 +1,720 @@ +{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-} +{- +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.App + Copyright : Copyright (C) 2006-2016 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Does a pandoc conversion based on command-line options. +-} +module Text.Pandoc.App ( + convertWithOpts + , Opt(..) + , defaultOpts + ) where +import Text.Pandoc +import Text.Pandoc.Builder (setMeta) +import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, + headerShift, err, openURL ) +import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) +import Text.Pandoc.XML ( toEntities ) +import Text.Pandoc.Highlighting (highlightingStyles) +import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Process (pipeProcess) +import Skylighting ( Style ) +import System.Environment ( getEnvironment ) +import System.Exit ( ExitCode (..), exitSuccess ) +import System.FilePath +import Data.Char ( toLower ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import System.Directory ( getAppUserDataDirectory, findExecutable, + doesFileExist, Permissions(..), getPermissions ) +import System.IO ( stdout, stderr ) +import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E +import Control.Exception.Extensible ( throwIO ) +import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad (when, unless, (>=>)) +import Data.Maybe (fromMaybe, isNothing, isJust) +import Data.Foldable (foldrM) +import Network.URI (parseURI, isURI, URI(..)) +import qualified Data.ByteString.Lazy as B +import Data.Aeson (eitherDecode', encode) +import Data.Yaml (decode) +import qualified Data.Yaml as Yaml +import qualified Data.Text as T +#ifndef _WINDOWS +import System.Posix.Terminal (queryTerminal) +import System.Posix.IO (stdOutput) +#endif +import Control.Monad.Trans +import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity) + +convertWithOpts :: Opt -> [FilePath] -> IO () +convertWithOpts opts args = do + let outputFile = optOutputFile opts + let filters = optFilters opts + let verbosity = optVerbosity opts + + when (optDumpArgs opts) $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (UTF8.hPutStrLn stdout) args + exitSuccess + + epubStylesheet <- case optEpubStylesheet opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + epubMetadata <- case optEpubMetadata opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" + let mathMethod = + case (optKaTeXJS opts, optKaTeXStylesheet opts) of + (Nothing, _) -> optHTMLMathMethod opts + (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) + + + -- --bibliography implies -F pandoc-citeproc for backwards compatibility: + let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && + optCiteMethod opts `notElem` [Natbib, Biblatex] && + "pandoc-citeproc" `notElem` map takeBaseName filters + let filters' = if needsCiteproc then "pandoc-citeproc" : filters + else filters + + let sources = case args of + [] -> ["-"] + xs | optIgnoreArgs opts -> ["-"] + | otherwise -> xs + + datadir <- case optDataDir opts of + Nothing -> E.catch + (Just <$> getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return $ optDataDir opts + + -- assign reader and writer based on options and filenames + let readerName = case optReader opts of + Nothing -> defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources + Just x -> map toLower x + + let writerName = case optWriter opts of + Nothing -> defaultWriterName outputFile + Just x -> map toLower x + let format = takeWhile (`notElem` ['+','-']) + $ takeFileName writerName -- in case path to lua script + + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + + let laTeXOutput = format `elem` ["latex", "beamer"] + let conTeXtOutput = format == "context" + let html5Output = format == "html5" || format == "html" + + -- disabling the custom writer for now + writer <- if ".lua" `isSuffixOf` format + -- note: use non-lowercased version writerName + then error "custom writers disabled for now" + else case getWriter writerName of + Left e -> err 9 $ + if format == "pdf" + then e ++ + "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return (w :: Writer PandocIO) + + -- TODO: we have to get the input and the output into the state for + -- the sake of the text2tags reader. + reader <- case getReader readerName of + Right r -> return (r :: Reader PandocIO) + Left e -> err 7 e' + where e' = case readerName of + "pdf" -> e ++ + "\nPandoc can convert to PDF, but not from PDF." + "doc" -> e ++ + "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." + _ -> e + + let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput + + templ <- case optTemplate opts of + _ | not standalone -> return Nothing + Nothing -> do + deftemp <- getDefaultTemplate datadir format + case deftemp of + Left e -> throwIO e + Right t -> return (Just t) + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + Just <$> E.catch (UTF8.readFile tp') + (\e -> if isDoesNotExistError e + then E.catch + (readDataFileUTF8 datadir + ("templates" </> tp')) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') + else throwIO e) + + let addStringAsVariable varname s vars = return $ (varname, s) : vars + + let addContentsAsVariable varname fp vars = do + s <- UTF8.readFile fp + return $ (varname, s) : vars + + -- note: this reverses the list constructed in option parsing, + -- which in turn was reversed from the command-line order, + -- so we end up with the correct order in the variable list: + let withList _ [] vars = return vars + withList f (x:xs) vars = f x vars >>= withList f xs + + variables <- return (optVariables opts) + >>= + withList (addContentsAsVariable "include-before") + (optIncludeBeforeBody opts) + >>= + withList (addContentsAsVariable "include-after") + (optIncludeAfterBody opts) + >>= + withList (addContentsAsVariable "header-includes") + (optIncludeInHeader opts) + >>= + withList (addStringAsVariable "css") (optCss opts) + >>= + maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) + >>= + maybe return (addStringAsVariable "epub-cover-image") + (optEpubCoverImage opts) + >>= + (\vars -> case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFileUTF8 datadir "LaTeXMathML.js" + return $ ("mathml-script", s) : vars + _ -> return vars) + >>= + (\vars -> if format == "dzslides" + then do + dztempl <- readDataFileUTF8 datadir + ("dzslides" </> "template.html") + let dzline = "<!-- {{{{ dzslides core" + let dzcore = unlines + $ dropWhile (not . (dzline `isPrefixOf`)) + $ lines dztempl + return $ ("dzslides-core", dzcore) : vars + else return vars) + + let sourceURL = case sources of + [] -> Nothing + (x:_) -> case parseURI x of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriQuery = "", + uriFragment = "" } + _ -> Nothing + + let readerOpts = def{ readerStandalone = standalone + , readerParseRaw = optParseRaw opts + , readerColumns = optColumns opts + , readerTabStop = optTabStop opts + , readerIndentedCodeClasses = optIndentedCodeClasses opts + , readerApplyMacros = not laTeXOutput + , readerDefaultImageExtension = + optDefaultImageExtension opts + , readerTrackChanges = optTrackChanges opts + } + + highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts + + let writerOptions = def { writerTemplate = templ, + writerVariables = variables, + writerTabStop = optTabStop opts, + writerTableOfContents = optTableOfContents opts, + writerHTMLMathMethod = mathMethod, + writerIncremental = optIncremental opts, + writerCiteMethod = optCiteMethod opts, + writerNumberSections = optNumberSections opts, + writerNumberOffset = optNumberOffset opts, + writerSectionDivs = optSectionDivs opts, + writerReferenceLinks = optReferenceLinks opts, + writerReferenceLocation = optReferenceLocation opts, + writerDpi = optDpi opts, + writerWrapText = optWrapText opts, + writerColumns = optColumns opts, + writerEmailObfuscation = optEmailObfuscation opts, + writerIdentifierPrefix = optIdentifierPrefix opts, + writerSourceURL = sourceURL, + writerUserDataDir = datadir, + writerHtmlQTags = optHtmlQTags opts, + writerTopLevelDivision = optTopLevelDivision opts, + writerListings = optListings opts, + writerSlideLevel = optSlideLevel opts, + writerHighlightStyle = highlightStyle, + writerSetextHeaders = optSetextHeaders opts, + writerEpubMetadata = epubMetadata, + writerEpubStylesheet = epubStylesheet, + writerEpubFonts = optEpubFonts opts, + writerEpubChapterLevel = optEpubChapterLevel opts, + writerTOCDepth = optTOCDepth opts, + writerReferenceDoc = optReferenceDoc opts, + writerLaTeXArgs = optLaTeXEngineArgs opts + } + + +#ifdef _WINDOWS + let istty = True +#else + istty <- queryTerminal stdOutput +#endif + when (istty && not (isTextFormat format) && outputFile == "-") $ + err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++ + "Specify an output file using the -o option." + + + let transforms = case optBaseHeaderLevel opts of + x | x > 1 -> [headerShift (x - 1)] + | otherwise -> [] + + let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" + then 0 + else optTabStop opts) + + readSources :: MonadIO m => [FilePath] -> m String + readSources srcs = convertTabs . intercalate "\n" <$> + mapM readSource srcs + + let runIO' :: PandocIO a -> IO a + runIO' f = do + (res, reports) <- runIOorExplode $ do + setVerbosity verbosity + x <- f + rs <- getLog + return (x, rs) + let isWarning (WARNING, _) = True + isWarning _ = False + when (optFailIfWarnings opts && any isWarning reports) $ + err 3 "Failing because there were warnings." + return res + + let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) + sourceToDoc sources' = + case reader of + StringReader r + | optFileScope opts || readerName == "json" -> do + pairs <- mapM + (readSource >=> withMediaBag . r readerOpts) sources + return (mconcat (map fst pairs), mconcat (map snd pairs)) + | otherwise -> + readSources sources' >>= withMediaBag . r readerOpts + ByteStringReader r -> do + pairs <- mapM (readFile' >=> + withMediaBag . r readerOpts) sources + return (mconcat (map fst pairs), mconcat (map snd pairs)) + + runIO' $ do + (doc, media) <- sourceToDoc sources + doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> + return . flip (foldr addMetadata) (optMetadata opts) >=> + applyTransforms transforms >=> + applyFilters datadir filters' [format]) doc + + case writer of + -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile + ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile + StringWriter f + | pdfOutput -> do + -- make sure writer is latex or beamer or context or html5 + unless (laTeXOutput || conTeXtOutput || html5Output) $ + err 47 $ "cannot produce pdf output with " ++ format ++ + " writer" + + let pdfprog = case () of + _ | conTeXtOutput -> "context" + _ | html5Output -> "wkhtmltopdf" + _ -> optLaTeXEngine opts + -- check for pdf creating program + mbPdfProg <- liftIO $ findExecutable pdfprog + when (isNothing mbPdfProg) $ + err 41 $ pdfprog ++ " not found. " ++ + pdfprog ++ " is needed for pdf output." + + res <- makePDF pdfprog f writerOptions verbosity media doc' + case res of + Right pdf -> writeFnBinary outputFile pdf + Left err' -> liftIO $ do + B.hPutStr stderr err' + B.hPut stderr $ B.pack [10] + err 43 "Error producing PDF" + | otherwise -> do + let htmlFormat = format `elem` + ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] + selfcontain = if optSelfContained opts && htmlFormat + then makeSelfContained writerOptions media + else return + handleEntities = if htmlFormat && optAscii opts + then toEntities + else id + output <- f writerOptions doc' + selfcontain (output ++ ['\n' | not standalone]) >>= + writerFn outputFile . handleEntities + +type Transform = Pandoc -> Pandoc + +isTextFormat :: String -> Bool +isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] + +externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc +externalFilter f args' d = liftIO $ do + exists <- doesFileExist f + isExecutable <- if exists + then executable <$> getPermissions f + else return True + let (f', args'') = if exists + then case map toLower (takeExtension f) of + _ | isExecutable -> ("." </> f, args') + ".py" -> ("python", f:args') + ".hs" -> ("runhaskell", f:args') + ".pl" -> ("perl", f:args') + ".rb" -> ("ruby", f:args') + ".php" -> ("php", f:args') + ".js" -> ("node", f:args') + _ -> (f, args') + else (f, args') + unless (exists && isExecutable) $ do + mbExe <- findExecutable f' + when (isNothing mbExe) $ + err 83 $ "Error running filter " ++ f ++ ":\n" ++ + "Could not find executable '" ++ f' ++ "'." + env <- getEnvironment + let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env + (exitcode, outbs) <- E.handle filterException $ + pipeProcess env' f' args'' $ encode d + case exitcode of + ExitSuccess -> return $ either error id $ eitherDecode' outbs + ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ + "Filter returned error status " ++ show ec + where filterException :: E.SomeException -> IO a + filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ + show e + +-- | Data structure for command line options. +data Opt = Opt + { optTabStop :: Int -- ^ Number of spaces per tab + , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: Maybe String -- ^ Reader format + , optWriter :: Maybe String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optTableOfContents :: Bool -- ^ Include table of contents + , optBaseHeaderLevel :: Int -- ^ Base header level + , optTemplate :: Maybe FilePath -- ^ Custom template + , optVariables :: [(String,String)] -- ^ Template variables to set + , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optOutputFile :: FilePath -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optNumberOffset :: [Int] -- ^ Starting number for sections + , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 + , optSelfContained :: Bool -- ^ Make HTML accessible offline + , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML + , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code + , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions + , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc + , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet + , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata + , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed + , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters + , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub + , optTOCDepth :: Int -- ^ Number of levels to include in TOC + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output + , optFailIfWarnings :: Bool -- ^ Fail on warnings + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output + , optDpi :: Int -- ^ Dpi + , optWrapText :: WrapOption -- ^ Options for wrapping text + , optColumns :: Int -- ^ Line length in characters + , optFilters :: [FilePath] -- ^ Filters to apply + , optEmailObfuscation :: ObfuscationMethod + , optIdentifierPrefix :: String + , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optDataDir :: Maybe FilePath + , optCiteMethod :: CiteMethod -- ^ Method to output cites + , optListings :: Bool -- ^ Use listings package for code blocks + , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf + , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine + , optSlideLevel :: Maybe Int -- ^ Header level that creates slides + , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optAscii :: Bool -- ^ Use ascii characters only in html + , optDefaultImageExtension :: String -- ^ Default image extension + , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media + , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. + , optFileScope :: Bool -- ^ Parse input files before combining + , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX + , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX + , optTitlePrefix :: Maybe String -- ^ Prefix for title + , optCss :: [FilePath] -- ^ CSS files to link to + , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before + , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body + , optIncludeInHeader :: [FilePath] -- ^ Files to include in header + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optTabStop = 4 + , optPreserveTabs = False + , optStandalone = False + , optReader = Nothing + , optWriter = Nothing + , optParseRaw = False + , optTableOfContents = False + , optBaseHeaderLevel = 1 + , optTemplate = Nothing + , optVariables = [] + , optMetadata = [] + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optNumberOffset = [0,0,0,0,0,0] + , optSectionDivs = False + , optIncremental = False + , optSelfContained = False + , optHtmlQTags = False + , optHighlightStyle = Just "pygments" + , optTopLevelDivision = TopLevelDefault + , optHTMLMathMethod = PlainMath + , optReferenceDoc = Nothing + , optEpubStylesheet = Nothing + , optEpubMetadata = Nothing + , optEpubFonts = [] + , optEpubChapterLevel = 1 + , optEpubCoverImage = Nothing + , optTOCDepth = 3 + , optDumpArgs = False + , optIgnoreArgs = False + , optVerbosity = WARNING + , optFailIfWarnings = False + , optReferenceLinks = False + , optReferenceLocation = EndOfDocument + , optDpi = 96 + , optWrapText = WrapAuto + , optColumns = 72 + , optFilters = [] + , optEmailObfuscation = NoObfuscation + , optIdentifierPrefix = "" + , optIndentedCodeClasses = [] + , optDataDir = Nothing + , optCiteMethod = Citeproc + , optListings = False + , optLaTeXEngine = "pdflatex" + , optLaTeXEngineArgs = [] + , optSlideLevel = Nothing + , optSetextHeaders = True + , optAscii = False + , optDefaultImageExtension = "" + , optExtractMedia = Nothing + , optTrackChanges = AcceptChanges + , optFileScope = False + , optKaTeXStylesheet = Nothing + , optKaTeXJS = Nothing + , optTitlePrefix = Nothing + , optCss = [] + , optIncludeBeforeBody = [] + , optIncludeAfterBody = [] + , optIncludeInHeader = [] + } + +addMetadata :: (String, String) -> Pandoc -> Pandoc +addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs + where meta' = case lookupMeta k meta of + Nothing -> setMeta k v' meta + Just (MetaList xs) -> + setMeta k (MetaList (xs ++ [v'])) meta + Just x -> setMeta k (MetaList [x, v']) meta + v' = readMetaValue v + +readMetaValue :: String -> MetaValue +readMetaValue s = case decode (UTF8.fromString s) of + Just (Yaml.String t) -> MetaString $ T.unpack t + Just (Yaml.Bool b) -> MetaBool b + _ -> MetaString s + +-- Determine default reader based on source file extensions +defaultReaderName :: String -> [FilePath] -> String +defaultReaderName fallback [] = fallback +defaultReaderName fallback (x:xs) = + case takeExtension (map toLower x) of + ".xhtml" -> "html" + ".html" -> "html" + ".htm" -> "html" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".rst" -> "rst" + ".org" -> "org" + ".lhs" -> "markdown+lhs" + ".db" -> "docbook" + ".opml" -> "opml" + ".wiki" -> "mediawiki" + ".dokuwiki" -> "dokuwiki" + ".textile" -> "textile" + ".native" -> "native" + ".json" -> "json" + ".docx" -> "docx" + ".t2t" -> "t2t" + ".epub" -> "epub" + ".odt" -> "odt" + ".pdf" -> "pdf" -- so we get an "unknown reader" error + ".doc" -> "doc" -- so we get an "unknown reader" error + _ -> defaultReaderName fallback xs + +-- Determine default writer based on output file extension +defaultWriterName :: FilePath -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + case takeExtension (map toLower x) of + "" -> "markdown" -- empty extension + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".context" -> "context" + ".ctx" -> "context" + ".rtf" -> "rtf" + ".rst" -> "rst" + ".s5" -> "s5" + ".native" -> "native" + ".json" -> "json" + ".txt" -> "markdown" + ".text" -> "markdown" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".textile" -> "textile" + ".lhs" -> "markdown+lhs" + ".texi" -> "texinfo" + ".texinfo" -> "texinfo" + ".db" -> "docbook" + ".odt" -> "odt" + ".docx" -> "docx" + ".epub" -> "epub" + ".org" -> "org" + ".asciidoc" -> "asciidoc" + ".adoc" -> "asciidoc" + ".pdf" -> "latex" + ".fb2" -> "fb2" + ".opml" -> "opml" + ".icml" -> "icml" + ".tei.xml" -> "tei" + ".tei" -> "tei" + ['.',y] | y `elem` ['1'..'9'] -> "man" + _ -> "html" + +-- Transformations of a Pandoc document post-parsing: + +extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc +extractMedia media dir d = + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + extractMediaBag True dir media + return $ walk (adjustImagePath dir fps) d + +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) +adjustImagePath _ _ x = x + +applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc +applyTransforms transforms d = return $ foldr ($) d transforms + + -- First we check to see if a filter is found. If not, and if it's + -- not an absolute path, we check to see whether it's in `userdir/filters`. + -- If not, we leave it unchanged. +expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath +expandFilterPath mbDatadir fp = liftIO $ do + fpExists <- doesFileExist fp + if fpExists + then return fp + else case mbDatadir of + Just datadir | isRelative fp -> do + let filterPath = (datadir </> "filters" </> fp) + filterPathExists <- doesFileExist filterPath + if filterPathExists + then return filterPath + else return fp + _ -> return fp + +applyFilters :: MonadIO m + => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc +applyFilters mbDatadir filters args d = do + expandedFilters <- mapM (expandFilterPath mbDatadir) filters + foldrM ($) d $ map (flip externalFilter args) expandedFilters + +readSource :: MonadIO m => FilePath -> m String +readSource "-" = liftIO UTF8.getContents +readSource src = case parseURI src of + Just u | uriScheme u `elem` ["http:","https:"] -> + readURI src + | uriScheme u == "file:" -> + liftIO $ UTF8.readFile (uriPath u) + _ -> liftIO $ UTF8.readFile src + +readURI :: MonadIO m => FilePath -> m String +readURI src = do + res <- liftIO $ openURL src + case res of + Left e -> liftIO $ throwIO e + Right (bs,_) -> return $ UTF8.toString bs + +readFile' :: MonadIO m => FilePath -> m B.ByteString +readFile' "-" = liftIO $ B.getContents +readFile' f = liftIO $ B.readFile f + +writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () +writeFnBinary "-" = liftIO . B.putStr +writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) + +writerFn :: MonadIO m => FilePath -> String -> m () +writerFn "-" = liftIO . UTF8.putStr +writerFn f = liftIO . UTF8.writeFile f + +lookupHighlightStyle :: Maybe String -> IO (Maybe Style) +lookupHighlightStyle Nothing = return Nothing +lookupHighlightStyle (Just s) = + case lookup (map toLower s) highlightingStyles of + Just sty -> return (Just sty) + Nothing -> err 68 $ "Unknown highlight-style " ++ s diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 896682389..df060915c 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Exports functions for syntax highlighting. -} -module Text.Pandoc.Highlighting ( languages +module Text.Pandoc.Highlighting ( highlightingStyles + , languages , languagesByExtension , highlight , formatLaTeXInline @@ -57,6 +58,17 @@ import qualified Data.Map as M import Control.Monad import qualified Data.Text as T +highlightingStyles :: [(String, Style)] +highlightingStyles = + [("pygments", pygments), + ("tango", tango), + ("espresso", espresso), + ("zenburn", zenburn), + ("kate", kate), + ("monochrome", monochrome), + ("breezedark", breezeDark), + ("haddock", haddock)] + languages :: [String] languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] |