summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/App.hs720
-rw-r--r--src/Text/Pandoc/Highlighting.hs14
2 files changed, 733 insertions, 1 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
new file mode 100644
index 000000000..e51a45395
--- /dev/null
+++ b/src/Text/Pandoc/App.hs
@@ -0,0 +1,720 @@
+{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-}
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.App
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Does a pandoc conversion based on command-line options.
+-}
+module Text.Pandoc.App (
+ convertWithOpts
+ , Opt(..)
+ , defaultOpts
+ ) where
+import Text.Pandoc
+import Text.Pandoc.Builder (setMeta)
+import Text.Pandoc.PDF (makePDF)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8,
+ headerShift, err, openURL )
+import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
+import Text.Pandoc.XML ( toEntities )
+import Text.Pandoc.Highlighting (highlightingStyles)
+import Text.Pandoc.SelfContained ( makeSelfContained )
+import Text.Pandoc.Process (pipeProcess)
+import Skylighting ( Style )
+import System.Environment ( getEnvironment )
+import System.Exit ( ExitCode (..), exitSuccess )
+import System.FilePath
+import Data.Char ( toLower )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import System.Directory ( getAppUserDataDirectory, findExecutable,
+ doesFileExist, Permissions(..), getPermissions )
+import System.IO ( stdout, stderr )
+import System.IO.Error ( isDoesNotExistError )
+import qualified Control.Exception as E
+import Control.Exception.Extensible ( throwIO )
+import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad (when, unless, (>=>))
+import Data.Maybe (fromMaybe, isNothing, isJust)
+import Data.Foldable (foldrM)
+import Network.URI (parseURI, isURI, URI(..))
+import qualified Data.ByteString.Lazy as B
+import Data.Aeson (eitherDecode', encode)
+import Data.Yaml (decode)
+import qualified Data.Yaml as Yaml
+import qualified Data.Text as T
+#ifndef _WINDOWS
+import System.Posix.Terminal (queryTerminal)
+import System.Posix.IO (stdOutput)
+#endif
+import Control.Monad.Trans
+import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity)
+
+convertWithOpts :: Opt -> [FilePath] -> IO ()
+convertWithOpts opts args = do
+ let outputFile = optOutputFile opts
+ let filters = optFilters opts
+ let verbosity = optVerbosity opts
+
+ when (optDumpArgs opts) $
+ do UTF8.hPutStrLn stdout outputFile
+ mapM_ (UTF8.hPutStrLn stdout) args
+ exitSuccess
+
+ epubStylesheet <- case optEpubStylesheet opts of
+ Nothing -> return Nothing
+ Just fp -> Just <$> UTF8.readFile fp
+
+ epubMetadata <- case optEpubMetadata opts of
+ Nothing -> return Nothing
+ Just fp -> Just <$> UTF8.readFile fp
+
+ let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css"
+ let mathMethod =
+ case (optKaTeXJS opts, optKaTeXStylesheet opts) of
+ (Nothing, _) -> optHTMLMathMethod opts
+ (Just js, ss) -> KaTeX js (fromMaybe csscdn ss)
+
+
+ -- --bibliography implies -F pandoc-citeproc for backwards compatibility:
+ let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) &&
+ optCiteMethod opts `notElem` [Natbib, Biblatex] &&
+ "pandoc-citeproc" `notElem` map takeBaseName filters
+ let filters' = if needsCiteproc then "pandoc-citeproc" : filters
+ else filters
+
+ let sources = case args of
+ [] -> ["-"]
+ xs | optIgnoreArgs opts -> ["-"]
+ | otherwise -> xs
+
+ datadir <- case optDataDir opts of
+ Nothing -> E.catch
+ (Just <$> getAppUserDataDirectory "pandoc")
+ (\e -> let _ = (e :: E.SomeException)
+ in return Nothing)
+ Just _ -> return $ optDataDir opts
+
+ -- assign reader and writer based on options and filenames
+ let readerName = case optReader opts of
+ Nothing -> defaultReaderName
+ (if any isURI sources
+ then "html"
+ else "markdown") sources
+ Just x -> map toLower x
+
+ let writerName = case optWriter opts of
+ Nothing -> defaultWriterName outputFile
+ Just x -> map toLower x
+ let format = takeWhile (`notElem` ['+','-'])
+ $ takeFileName writerName -- in case path to lua script
+
+ let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+
+ let laTeXOutput = format `elem` ["latex", "beamer"]
+ let conTeXtOutput = format == "context"
+ let html5Output = format == "html5" || format == "html"
+
+ -- disabling the custom writer for now
+ writer <- if ".lua" `isSuffixOf` format
+ -- note: use non-lowercased version writerName
+ then error "custom writers disabled for now"
+ else case getWriter writerName of
+ Left e -> err 9 $
+ if format == "pdf"
+ then e ++
+ "\nTo create a pdf with pandoc, use " ++
+ "the latex or beamer writer and specify\n" ++
+ "an output file with .pdf extension " ++
+ "(pandoc -t latex -o filename.pdf)."
+ else e
+ Right w -> return (w :: Writer PandocIO)
+
+ -- TODO: we have to get the input and the output into the state for
+ -- the sake of the text2tags reader.
+ reader <- case getReader readerName of
+ Right r -> return (r :: Reader PandocIO)
+ Left e -> err 7 e'
+ where e' = case readerName of
+ "pdf" -> e ++
+ "\nPandoc can convert to PDF, but not from PDF."
+ "doc" -> e ++
+ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc."
+ _ -> e
+
+ let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
+
+ templ <- case optTemplate opts of
+ _ | not standalone -> return Nothing
+ Nothing -> do
+ deftemp <- getDefaultTemplate datadir format
+ case deftemp of
+ Left e -> throwIO e
+ Right t -> return (Just t)
+ Just tp -> do
+ -- strip off extensions
+ let tp' = case takeExtension tp of
+ "" -> tp <.> format
+ _ -> tp
+ Just <$> E.catch (UTF8.readFile tp')
+ (\e -> if isDoesNotExistError e
+ then E.catch
+ (readDataFileUTF8 datadir
+ ("templates" </> tp'))
+ (\e' -> let _ = (e' :: E.SomeException)
+ in throwIO e')
+ else throwIO e)
+
+ let addStringAsVariable varname s vars = return $ (varname, s) : vars
+
+ let addContentsAsVariable varname fp vars = do
+ s <- UTF8.readFile fp
+ return $ (varname, s) : vars
+
+ -- note: this reverses the list constructed in option parsing,
+ -- which in turn was reversed from the command-line order,
+ -- so we end up with the correct order in the variable list:
+ let withList _ [] vars = return vars
+ withList f (x:xs) vars = f x vars >>= withList f xs
+
+ variables <- return (optVariables opts)
+ >>=
+ withList (addContentsAsVariable "include-before")
+ (optIncludeBeforeBody opts)
+ >>=
+ withList (addContentsAsVariable "include-after")
+ (optIncludeAfterBody opts)
+ >>=
+ withList (addContentsAsVariable "header-includes")
+ (optIncludeInHeader opts)
+ >>=
+ withList (addStringAsVariable "css") (optCss opts)
+ >>=
+ maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts)
+ >>=
+ maybe return (addStringAsVariable "epub-cover-image")
+ (optEpubCoverImage opts)
+ >>=
+ (\vars -> case mathMethod of
+ LaTeXMathML Nothing -> do
+ s <- readDataFileUTF8 datadir "LaTeXMathML.js"
+ return $ ("mathml-script", s) : vars
+ _ -> return vars)
+ >>=
+ (\vars -> if format == "dzslides"
+ then do
+ dztempl <- readDataFileUTF8 datadir
+ ("dzslides" </> "template.html")
+ let dzline = "<!-- {{{{ dzslides core"
+ let dzcore = unlines
+ $ dropWhile (not . (dzline `isPrefixOf`))
+ $ lines dztempl
+ return $ ("dzslides-core", dzcore) : vars
+ else return vars)
+
+ let sourceURL = case sources of
+ [] -> Nothing
+ (x:_) -> case parseURI x of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
+
+ let readerOpts = def{ readerStandalone = standalone
+ , readerParseRaw = optParseRaw opts
+ , readerColumns = optColumns opts
+ , readerTabStop = optTabStop opts
+ , readerIndentedCodeClasses = optIndentedCodeClasses opts
+ , readerApplyMacros = not laTeXOutput
+ , readerDefaultImageExtension =
+ optDefaultImageExtension opts
+ , readerTrackChanges = optTrackChanges opts
+ }
+
+ highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
+
+ let writerOptions = def { writerTemplate = templ,
+ writerVariables = variables,
+ writerTabStop = optTabStop opts,
+ writerTableOfContents = optTableOfContents opts,
+ writerHTMLMathMethod = mathMethod,
+ writerIncremental = optIncremental opts,
+ writerCiteMethod = optCiteMethod opts,
+ writerNumberSections = optNumberSections opts,
+ writerNumberOffset = optNumberOffset opts,
+ writerSectionDivs = optSectionDivs opts,
+ writerReferenceLinks = optReferenceLinks opts,
+ writerReferenceLocation = optReferenceLocation opts,
+ writerDpi = optDpi opts,
+ writerWrapText = optWrapText opts,
+ writerColumns = optColumns opts,
+ writerEmailObfuscation = optEmailObfuscation opts,
+ writerIdentifierPrefix = optIdentifierPrefix opts,
+ writerSourceURL = sourceURL,
+ writerUserDataDir = datadir,
+ writerHtmlQTags = optHtmlQTags opts,
+ writerTopLevelDivision = optTopLevelDivision opts,
+ writerListings = optListings opts,
+ writerSlideLevel = optSlideLevel opts,
+ writerHighlightStyle = highlightStyle,
+ writerSetextHeaders = optSetextHeaders opts,
+ writerEpubMetadata = epubMetadata,
+ writerEpubStylesheet = epubStylesheet,
+ writerEpubFonts = optEpubFonts opts,
+ writerEpubChapterLevel = optEpubChapterLevel opts,
+ writerTOCDepth = optTOCDepth opts,
+ writerReferenceDoc = optReferenceDoc opts,
+ writerLaTeXArgs = optLaTeXEngineArgs opts
+ }
+
+
+#ifdef _WINDOWS
+ let istty = True
+#else
+ istty <- queryTerminal stdOutput
+#endif
+ when (istty && not (isTextFormat format) && outputFile == "-") $
+ err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option."
+
+
+ let transforms = case optBaseHeaderLevel opts of
+ x | x > 1 -> [headerShift (x - 1)]
+ | otherwise -> []
+
+ let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
+ then 0
+ else optTabStop opts)
+
+ readSources :: MonadIO m => [FilePath] -> m String
+ readSources srcs = convertTabs . intercalate "\n" <$>
+ mapM readSource srcs
+
+ let runIO' :: PandocIO a -> IO a
+ runIO' f = do
+ (res, reports) <- runIOorExplode $ do
+ setVerbosity verbosity
+ x <- f
+ rs <- getLog
+ return (x, rs)
+ let isWarning (WARNING, _) = True
+ isWarning _ = False
+ when (optFailIfWarnings opts && any isWarning reports) $
+ err 3 "Failing because there were warnings."
+ return res
+
+ let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag)
+ sourceToDoc sources' =
+ case reader of
+ StringReader r
+ | optFileScope opts || readerName == "json" -> do
+ pairs <- mapM
+ (readSource >=> withMediaBag . r readerOpts) sources
+ return (mconcat (map fst pairs), mconcat (map snd pairs))
+ | otherwise ->
+ readSources sources' >>= withMediaBag . r readerOpts
+ ByteStringReader r -> do
+ pairs <- mapM (readFile' >=>
+ withMediaBag . r readerOpts) sources
+ return (mconcat (map fst pairs), mconcat (map snd pairs))
+
+ runIO' $ do
+ (doc, media) <- sourceToDoc sources
+ doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=>
+ return . flip (foldr addMetadata) (optMetadata opts) >=>
+ applyTransforms transforms >=>
+ applyFilters datadir filters' [format]) doc
+
+ case writer of
+ -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile
+ ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile
+ StringWriter f
+ | pdfOutput -> do
+ -- make sure writer is latex or beamer or context or html5
+ unless (laTeXOutput || conTeXtOutput || html5Output) $
+ err 47 $ "cannot produce pdf output with " ++ format ++
+ " writer"
+
+ let pdfprog = case () of
+ _ | conTeXtOutput -> "context"
+ _ | html5Output -> "wkhtmltopdf"
+ _ -> optLaTeXEngine opts
+ -- check for pdf creating program
+ mbPdfProg <- liftIO $ findExecutable pdfprog
+ when (isNothing mbPdfProg) $
+ err 41 $ pdfprog ++ " not found. " ++
+ pdfprog ++ " is needed for pdf output."
+
+ res <- makePDF pdfprog f writerOptions verbosity media doc'
+ case res of
+ Right pdf -> writeFnBinary outputFile pdf
+ Left err' -> liftIO $ do
+ B.hPutStr stderr err'
+ B.hPut stderr $ B.pack [10]
+ err 43 "Error producing PDF"
+ | otherwise -> do
+ let htmlFormat = format `elem`
+ ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
+ selfcontain = if optSelfContained opts && htmlFormat
+ then makeSelfContained writerOptions media
+ else return
+ handleEntities = if htmlFormat && optAscii opts
+ then toEntities
+ else id
+ output <- f writerOptions doc'
+ selfcontain (output ++ ['\n' | not standalone]) >>=
+ writerFn outputFile . handleEntities
+
+type Transform = Pandoc -> Pandoc
+
+isTextFormat :: String -> Bool
+isTextFormat s = s `notElem` ["odt","docx","epub","epub3"]
+
+externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc
+externalFilter f args' d = liftIO $ do
+ exists <- doesFileExist f
+ isExecutable <- if exists
+ then executable <$> getPermissions f
+ else return True
+ let (f', args'') = if exists
+ then case map toLower (takeExtension f) of
+ _ | isExecutable -> ("." </> f, args')
+ ".py" -> ("python", f:args')
+ ".hs" -> ("runhaskell", f:args')
+ ".pl" -> ("perl", f:args')
+ ".rb" -> ("ruby", f:args')
+ ".php" -> ("php", f:args')
+ ".js" -> ("node", f:args')
+ _ -> (f, args')
+ else (f, args')
+ unless (exists && isExecutable) $ do
+ mbExe <- findExecutable f'
+ when (isNothing mbExe) $
+ err 83 $ "Error running filter " ++ f ++ ":\n" ++
+ "Could not find executable '" ++ f' ++ "'."
+ env <- getEnvironment
+ let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env
+ (exitcode, outbs) <- E.handle filterException $
+ pipeProcess env' f' args'' $ encode d
+ case exitcode of
+ ExitSuccess -> return $ either error id $ eitherDecode' outbs
+ ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++
+ "Filter returned error status " ++ show ec
+ where filterException :: E.SomeException -> IO a
+ filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++
+ show e
+
+-- | Data structure for command line options.
+data Opt = Opt
+ { optTabStop :: Int -- ^ Number of spaces per tab
+ , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces
+ , optStandalone :: Bool -- ^ Include header, footer
+ , optReader :: Maybe String -- ^ Reader format
+ , optWriter :: Maybe String -- ^ Writer format
+ , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
+ , optTableOfContents :: Bool -- ^ Include table of contents
+ , optBaseHeaderLevel :: Int -- ^ Base header level
+ , optTemplate :: Maybe FilePath -- ^ Custom template
+ , optVariables :: [(String,String)] -- ^ Template variables to set
+ , optMetadata :: [(String, String)] -- ^ Metadata fields to set
+ , optOutputFile :: FilePath -- ^ Name of output file
+ , optNumberSections :: Bool -- ^ Number sections in LaTeX
+ , optNumberOffset :: [Int] -- ^ Starting number for sections
+ , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5
+ , optSelfContained :: Bool -- ^ Make HTML accessible offline
+ , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML
+ , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code
+ , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions
+ , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
+ , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
+ , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet
+ , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
+ , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
+ , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters
+ , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub
+ , optTOCDepth :: Int -- ^ Number of levels to include in TOC
+ , optDumpArgs :: Bool -- ^ Output command-line arguments
+ , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
+ , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output
+ , optFailIfWarnings :: Bool -- ^ Fail on warnings
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output
+ , optDpi :: Int -- ^ Dpi
+ , optWrapText :: WrapOption -- ^ Options for wrapping text
+ , optColumns :: Int -- ^ Line length in characters
+ , optFilters :: [FilePath] -- ^ Filters to apply
+ , optEmailObfuscation :: ObfuscationMethod
+ , optIdentifierPrefix :: String
+ , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
+ , optDataDir :: Maybe FilePath
+ , optCiteMethod :: CiteMethod -- ^ Method to output cites
+ , optListings :: Bool -- ^ Use listings package for code blocks
+ , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
+ , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine
+ , optSlideLevel :: Maybe Int -- ^ Header level that creates slides
+ , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
+ , optAscii :: Bool -- ^ Use ascii characters only in html
+ , optDefaultImageExtension :: String -- ^ Default image extension
+ , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
+ , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
+ , optFileScope :: Bool -- ^ Parse input files before combining
+ , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX
+ , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX
+ , optTitlePrefix :: Maybe String -- ^ Prefix for title
+ , optCss :: [FilePath] -- ^ CSS files to link to
+ , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
+ , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
+ , optIncludeInHeader :: [FilePath] -- ^ Files to include in header
+ }
+
+-- | Defaults for command-line options.
+defaultOpts :: Opt
+defaultOpts = Opt
+ { optTabStop = 4
+ , optPreserveTabs = False
+ , optStandalone = False
+ , optReader = Nothing
+ , optWriter = Nothing
+ , optParseRaw = False
+ , optTableOfContents = False
+ , optBaseHeaderLevel = 1
+ , optTemplate = Nothing
+ , optVariables = []
+ , optMetadata = []
+ , optOutputFile = "-" -- "-" means stdout
+ , optNumberSections = False
+ , optNumberOffset = [0,0,0,0,0,0]
+ , optSectionDivs = False
+ , optIncremental = False
+ , optSelfContained = False
+ , optHtmlQTags = False
+ , optHighlightStyle = Just "pygments"
+ , optTopLevelDivision = TopLevelDefault
+ , optHTMLMathMethod = PlainMath
+ , optReferenceDoc = Nothing
+ , optEpubStylesheet = Nothing
+ , optEpubMetadata = Nothing
+ , optEpubFonts = []
+ , optEpubChapterLevel = 1
+ , optEpubCoverImage = Nothing
+ , optTOCDepth = 3
+ , optDumpArgs = False
+ , optIgnoreArgs = False
+ , optVerbosity = WARNING
+ , optFailIfWarnings = False
+ , optReferenceLinks = False
+ , optReferenceLocation = EndOfDocument
+ , optDpi = 96
+ , optWrapText = WrapAuto
+ , optColumns = 72
+ , optFilters = []
+ , optEmailObfuscation = NoObfuscation
+ , optIdentifierPrefix = ""
+ , optIndentedCodeClasses = []
+ , optDataDir = Nothing
+ , optCiteMethod = Citeproc
+ , optListings = False
+ , optLaTeXEngine = "pdflatex"
+ , optLaTeXEngineArgs = []
+ , optSlideLevel = Nothing
+ , optSetextHeaders = True
+ , optAscii = False
+ , optDefaultImageExtension = ""
+ , optExtractMedia = Nothing
+ , optTrackChanges = AcceptChanges
+ , optFileScope = False
+ , optKaTeXStylesheet = Nothing
+ , optKaTeXJS = Nothing
+ , optTitlePrefix = Nothing
+ , optCss = []
+ , optIncludeBeforeBody = []
+ , optIncludeAfterBody = []
+ , optIncludeInHeader = []
+ }
+
+addMetadata :: (String, String) -> Pandoc -> Pandoc
+addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs
+ where meta' = case lookupMeta k meta of
+ Nothing -> setMeta k v' meta
+ Just (MetaList xs) ->
+ setMeta k (MetaList (xs ++ [v'])) meta
+ Just x -> setMeta k (MetaList [x, v']) meta
+ v' = readMetaValue v
+
+readMetaValue :: String -> MetaValue
+readMetaValue s = case decode (UTF8.fromString s) of
+ Just (Yaml.String t) -> MetaString $ T.unpack t
+ Just (Yaml.Bool b) -> MetaBool b
+ _ -> MetaString s
+
+-- Determine default reader based on source file extensions
+defaultReaderName :: String -> [FilePath] -> String
+defaultReaderName fallback [] = fallback
+defaultReaderName fallback (x:xs) =
+ case takeExtension (map toLower x) of
+ ".xhtml" -> "html"
+ ".html" -> "html"
+ ".htm" -> "html"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".rst" -> "rst"
+ ".org" -> "org"
+ ".lhs" -> "markdown+lhs"
+ ".db" -> "docbook"
+ ".opml" -> "opml"
+ ".wiki" -> "mediawiki"
+ ".dokuwiki" -> "dokuwiki"
+ ".textile" -> "textile"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".docx" -> "docx"
+ ".t2t" -> "t2t"
+ ".epub" -> "epub"
+ ".odt" -> "odt"
+ ".pdf" -> "pdf" -- so we get an "unknown reader" error
+ ".doc" -> "doc" -- so we get an "unknown reader" error
+ _ -> defaultReaderName fallback xs
+
+-- Determine default writer based on output file extension
+defaultWriterName :: FilePath -> String
+defaultWriterName "-" = "html" -- no output file
+defaultWriterName x =
+ case takeExtension (map toLower x) of
+ "" -> "markdown" -- empty extension
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".context" -> "context"
+ ".ctx" -> "context"
+ ".rtf" -> "rtf"
+ ".rst" -> "rst"
+ ".s5" -> "s5"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".txt" -> "markdown"
+ ".text" -> "markdown"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".textile" -> "textile"
+ ".lhs" -> "markdown+lhs"
+ ".texi" -> "texinfo"
+ ".texinfo" -> "texinfo"
+ ".db" -> "docbook"
+ ".odt" -> "odt"
+ ".docx" -> "docx"
+ ".epub" -> "epub"
+ ".org" -> "org"
+ ".asciidoc" -> "asciidoc"
+ ".adoc" -> "asciidoc"
+ ".pdf" -> "latex"
+ ".fb2" -> "fb2"
+ ".opml" -> "opml"
+ ".icml" -> "icml"
+ ".tei.xml" -> "tei"
+ ".tei" -> "tei"
+ ['.',y] | y `elem` ['1'..'9'] -> "man"
+ _ -> "html"
+
+-- Transformations of a Pandoc document post-parsing:
+
+extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc
+extractMedia media dir d =
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ extractMediaBag True dir media
+ return $ walk (adjustImagePath dir fps) d
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
+applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
+applyTransforms transforms d = return $ foldr ($) d transforms
+
+ -- First we check to see if a filter is found. If not, and if it's
+ -- not an absolute path, we check to see whether it's in `userdir/filters`.
+ -- If not, we leave it unchanged.
+expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath
+expandFilterPath mbDatadir fp = liftIO $ do
+ fpExists <- doesFileExist fp
+ if fpExists
+ then return fp
+ else case mbDatadir of
+ Just datadir | isRelative fp -> do
+ let filterPath = (datadir </> "filters" </> fp)
+ filterPathExists <- doesFileExist filterPath
+ if filterPathExists
+ then return filterPath
+ else return fp
+ _ -> return fp
+
+applyFilters :: MonadIO m
+ => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
+applyFilters mbDatadir filters args d = do
+ expandedFilters <- mapM (expandFilterPath mbDatadir) filters
+ foldrM ($) d $ map (flip externalFilter args) expandedFilters
+
+readSource :: MonadIO m => FilePath -> m String
+readSource "-" = liftIO UTF8.getContents
+readSource src = case parseURI src of
+ Just u | uriScheme u `elem` ["http:","https:"] ->
+ readURI src
+ | uriScheme u == "file:" ->
+ liftIO $ UTF8.readFile (uriPath u)
+ _ -> liftIO $ UTF8.readFile src
+
+readURI :: MonadIO m => FilePath -> m String
+readURI src = do
+ res <- liftIO $ openURL src
+ case res of
+ Left e -> liftIO $ throwIO e
+ Right (bs,_) -> return $ UTF8.toString bs
+
+readFile' :: MonadIO m => FilePath -> m B.ByteString
+readFile' "-" = liftIO $ B.getContents
+readFile' f = liftIO $ B.readFile f
+
+writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m ()
+writeFnBinary "-" = liftIO . B.putStr
+writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f)
+
+writerFn :: MonadIO m => FilePath -> String -> m ()
+writerFn "-" = liftIO . UTF8.putStr
+writerFn f = liftIO . UTF8.writeFile f
+
+lookupHighlightStyle :: Maybe String -> IO (Maybe Style)
+lookupHighlightStyle Nothing = return Nothing
+lookupHighlightStyle (Just s) =
+ case lookup (map toLower s) highlightingStyles of
+ Just sty -> return (Just sty)
+ Nothing -> err 68 $ "Unknown highlight-style " ++ s
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 896682389..df060915c 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Exports functions for syntax highlighting.
-}
-module Text.Pandoc.Highlighting ( languages
+module Text.Pandoc.Highlighting ( highlightingStyles
+ , languages
, languagesByExtension
, highlight
, formatLaTeXInline
@@ -57,6 +58,17 @@ import qualified Data.Map as M
import Control.Monad
import qualified Data.Text as T
+highlightingStyles :: [(String, Style)]
+highlightingStyles =
+ [("pygments", pygments),
+ ("tango", tango),
+ ("espresso", espresso),
+ ("zenburn", zenburn),
+ ("kate", kate),
+ ("monochrome", monochrome),
+ ("breezedark", breezeDark),
+ ("haddock", haddock)]
+
languages :: [String]
languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap]