summaryrefslogtreecommitdiff
path: root/src/pandoc.hs
diff options
context:
space:
mode:
authordr@jones.dk <dr@jones.dk>2010-03-22 12:40:10 +0100
committerdr@jones.dk <dr@jones.dk>2010-03-22 12:40:10 +0100
commit96d4f941026a8eca3ba211facdc8ce66b2ab38bb (patch)
treeaae68ec157e85fe9590d1dd5216fc6b7916e08d3 /src/pandoc.hs
parent789d0772d8b5d9c066fb8624bd51576cbde5e30b (diff)
Imported Upstream version 1.5.0.1
Diffstat (limited to 'src/pandoc.hs')
-rw-r--r--src/pandoc.hs318
1 files changed, 224 insertions, 94 deletions
diff --git a/src/pandoc.hs b/src/pandoc.hs
index c2cc9b75e..7e1bfc284 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP #-}
{-
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
@@ -32,7 +31,8 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
-import Text.Pandoc.Shared ( HTMLMathMethod (..), tabFilter, ObfuscationMethod (..) )
+import Text.Pandoc.Writers.S5 (s5HeaderIncludes)
+import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile )
#ifdef _HIGHLIGHTING
import Text.Pandoc.Highlighting ( languages )
#endif
@@ -41,19 +41,29 @@ import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
import Data.Maybe ( fromMaybe )
-import Data.Char ( toLower )
+import Data.Char ( toLower, isDigit )
import Data.List ( intercalate, isSuffixOf )
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
+import System.Directory ( getAppUserDataDirectory )
import System.IO ( stdout, stderr )
+-- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
+-- So we use System.IO.UTF8 only if we have an earlier version
+#if MIN_VERSION_base(4,2,0)
+import System.IO ( hPutStr, hPutStrLn )
+#else
+import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
import System.IO.UTF8
+#endif
#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
#endif
-import Control.Monad (when, unless)
+import Control.Monad (when, unless, liftM)
+import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
+import Network.URI (parseURI, isURI)
+import Data.ByteString.Lazy.UTF8 (toString)
copyrightMessage :: String
-copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++
+copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++
"Web: http://johnmacfarlane.net/pandoc\n" ++
"This is free software; see the source for copying conditions. There is no\n" ++
"warranty, not even for merchantability or fitness for a particular purpose."
@@ -96,26 +106,27 @@ readers = [("native" , readPandoc)
readPandoc :: ParserState -> String -> Pandoc
readPandoc _ = read
--- | Association list of formats and pairs of writers and default headers.
-writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
-writers = [("native" , (writeDoc, ""))
- ,("html" , (writeHtmlString, ""))
- ,("html+lhs" , (writeHtmlString, ""))
- ,("s5" , (writeS5String, defaultS5Header))
- ,("docbook" , (writeDocbook, defaultDocbookHeader))
- ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
- ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
- ,("latex" , (writeLaTeX, defaultLaTeXHeader))
- ,("latex+lhs" , (writeLaTeX, defaultLaTeXHeader))
- ,("context" , (writeConTeXt, defaultConTeXtHeader))
- ,("texinfo" , (writeTexinfo, ""))
- ,("man" , (writeMan, ""))
- ,("markdown" , (writeMarkdown, ""))
- ,("markdown+lhs" , (writeMarkdown, ""))
- ,("rst" , (writeRST, ""))
- ,("rst+lhs" , (writeRST, ""))
- ,("mediawiki" , (writeMediaWiki, ""))
- ,("rtf" , (writeRTF, defaultRTFHeader))
+-- | Association list of formats and writers.
+writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
+writers = [("native" , writeDoc)
+ ,("html" , writeHtmlString)
+ ,("html+lhs" , writeHtmlString)
+ ,("s5" , writeS5String)
+ ,("docbook" , writeDocbook)
+ ,("opendocument" , writeOpenDocument)
+ ,("odt" , writeOpenDocument)
+ ,("latex" , writeLaTeX)
+ ,("latex+lhs" , writeLaTeX)
+ ,("context" , writeConTeXt)
+ ,("texinfo" , writeTexinfo)
+ ,("man" , writeMan)
+ ,("markdown" , writeMarkdown)
+ ,("markdown+lhs" , writeMarkdown)
+ ,("plain" , writePlain)
+ ,("rst" , writeRST)
+ ,("rst+lhs" , writeRST)
+ ,("mediawiki" , writeMediaWiki)
+ ,("rtf" , writeRTF)
]
isNonTextOutput :: String -> Bool
@@ -125,6 +136,12 @@ isNonTextOutput = (`elem` ["odt"])
writeDoc :: WriterOptions -> Pandoc -> String
writeDoc _ = prettyPandoc
+headerShift :: Int -> Pandoc -> Pandoc
+headerShift n = processWith shift
+ where shift :: Block -> Block
+ shift (Header level inner) = Header (level + n) inner
+ shift x = x
+
-- | Data structure for command line options.
data Opt = Opt
{ optTabStop :: Int -- ^ Number of spaces per tab
@@ -133,18 +150,19 @@ data Opt = Opt
, optReader :: String -- ^ Reader format
, optWriter :: String -- ^ Writer format
, optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
- , optCSS :: [String] -- ^ CSS file to link to
, optTableOfContents :: Bool -- ^ Include table of contents
- , optIncludeInHeader :: String -- ^ File to include in header
- , optIncludeBeforeBody :: String -- ^ File to include at top of body
- , optIncludeAfterBody :: String -- ^ File to include at end of body
- , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
- , optTitlePrefix :: String -- ^ Optional prefix for HTML title
+ , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply
+ , optTemplate :: String -- ^ Custom template
+ , optVariables :: [(String,String)] -- ^ Template variables to set
+ , optBefore :: [String] -- ^ Texts to include before body
+ , optAfter :: [String] -- ^ Texts to include after body
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ Number sections in LaTeX
, optIncremental :: Bool -- ^ Use incremental lists in S5
+ , optXeTeX :: Bool -- ^ Format latex for xetex
, optSmart :: Bool -- ^ Use smart typography
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
+ , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
@@ -155,6 +173,7 @@ data Opt = Opt
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
, optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
+ , optDataDir :: Maybe FilePath
#ifdef _CITEPROC
, optBiblioFile :: String
, optBiblioFormat :: String
@@ -171,18 +190,19 @@ defaultOpts = Opt
, optReader = "" -- null for default reader
, optWriter = "" -- null for default writer
, optParseRaw = False
- , optCSS = []
, optTableOfContents = False
- , optIncludeInHeader = ""
- , optIncludeBeforeBody = ""
- , optIncludeAfterBody = ""
- , optCustomHeader = "DEFAULT"
- , optTitlePrefix = ""
+ , optTransforms = []
+ , optTemplate = ""
+ , optVariables = []
+ , optBefore = []
+ , optAfter = []
, optOutputFile = "-" -- "-" means stdout
, optNumberSections = False
, optIncremental = False
+ , optXeTeX = False
, optSmart = False
, optHTMLMathMethod = PlainMath
+ , optReferenceODT = Nothing
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
@@ -193,6 +213,7 @@ defaultOpts = Opt
, optEmailObfuscation = JavascriptObfuscation
, optIdentifierPrefix = ""
, optIndentedCodeClasses = []
+ , optDataDir = Nothing
#ifdef _CITEPROC
, optBiblioFile = []
, optBiblioFormat = []
@@ -260,11 +281,18 @@ options =
, Option "m" ["latexmathml", "asciimathml"]
(OptArg
- (\arg opt -> return opt { optHTMLMathMethod =
- LaTeXMathML arg })
+ (\arg opt ->
+ return opt { optHTMLMathMethod = LaTeXMathML arg })
"URL")
"" -- "Use LaTeXMathML script in html output"
+ , Option "" ["mathml"]
+ (OptArg
+ (\arg opt ->
+ return opt { optHTMLMathMethod = MathML arg })
+ "URL")
+ "" -- "Use mathml for HTML math"
+
, Option "" ["mimetex"]
(OptArg
(\arg opt -> return opt { optHTMLMathMethod = MimeTeX
@@ -288,6 +316,11 @@ options =
(\opt -> return opt { optIncremental = True }))
"" -- "Make list items display incrementally in S5"
+ , Option "" ["xetex"]
+ (NoArg
+ (\opt -> return opt { optXeTeX = True }))
+ "" -- "Format latex for processing by XeTeX"
+
, Option "N" ["number-sections"]
(NoArg
(\opt -> return opt { optNumberSections = True }))
@@ -334,21 +367,60 @@ options =
(\opt -> return opt { optTableOfContents = True }))
"" -- "Include table of contents"
+ , Option "" ["base-header-level"]
+ (ReqArg
+ (\arg opt -> do
+ if all isDigit arg && (read arg :: Int) >= 1
+ then do
+ let oldTransforms = optTransforms opt
+ let shift = read arg - 1
+ return opt{ optTransforms =
+ headerShift shift : oldTransforms }
+ else do
+ hPutStrLn stderr $ "base-header-level must be a number >= 1"
+ exitWith $ ExitFailure 19)
+ "LEVEL")
+ "" -- "Headers base level"
+
+ , Option "" ["template"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt{ optTemplate = text,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "Use custom template"
+
+ , Option "V" ["variable"]
+ (ReqArg
+ (\arg opt ->
+ case break (`elem` ":=") arg of
+ (k,_:v) -> do
+ let newvars = optVariables opt ++ [(k,v)]
+ return opt{ optVariables = newvars }
+ _ -> do
+ hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
+ exitWith $ ExitFailure 17)
+ "FILENAME")
+ "" -- "Use custom template"
+
, Option "c" ["css"]
(ReqArg
(\arg opt -> do
- let old = optCSS opt
- return opt { optCSS = old ++ [arg],
+ -- add new link to end, so it is included in proper order
+ let newvars = optVariables opt ++ [("css",arg)]
+ return opt { optVariables = newvars,
optStandalone = True })
- "CSS")
+ "URL")
"" -- "Link to CSS style sheet"
, Option "H" ["include-in-header"]
(ReqArg
(\arg opt -> do
- let old = optIncludeInHeader opt
text <- readFile arg
- return opt { optIncludeInHeader = old ++ text,
+ -- add new ones to end, so they're included in order specified
+ let newvars = optVariables opt ++ [("header-includes",text)]
+ return opt { optVariables = newvars,
optStandalone = True })
"FILENAME")
"" -- "File to include at end of header (implies -s)"
@@ -356,18 +428,22 @@ options =
, Option "B" ["include-before-body"]
(ReqArg
(\arg opt -> do
- let old = optIncludeBeforeBody opt
text <- readFile arg
- return opt { optIncludeBeforeBody = old ++ text })
+ -- add new ones to end, so they're included in order specified
+ let newvars = optVariables opt ++ [("include-before",text)]
+ return opt { optVariables = newvars,
+ optStandalone = True })
"FILENAME")
"" -- "File to include before document body"
, Option "A" ["include-after-body"]
(ReqArg
(\arg opt -> do
- let old = optIncludeAfterBody opt
text <- readFile arg
- return opt { optIncludeAfterBody = old ++ text })
+ -- add new ones to end, so they're included in order specified
+ let newvars = optVariables opt ++ [("include-after",text)]
+ return opt { optVariables = newvars,
+ optStandalone = True })
"FILENAME")
"" -- "File to include after document body"
@@ -375,28 +451,38 @@ options =
(ReqArg
(\arg opt -> do
text <- readFile arg
- return opt { optCustomHeader = text,
- optStandalone = True })
+ let newVars = ("legacy-header", text) : optVariables opt
+ return opt { optVariables = newVars
+ , optStandalone = True })
"FILENAME")
"" -- "File to use for custom header (implies -s)"
, Option "T" ["title-prefix"]
(ReqArg
- (\arg opt -> return opt { optTitlePrefix = arg,
- optStandalone = True })
+ (\arg opt -> do
+ let newvars = ("title-prefix", arg) : optVariables opt
+ return opt { optVariables = newvars,
+ optStandalone = True })
"STRING")
"" -- "String to prefix to HTML window title"
- , Option "D" ["print-default-header"]
+ , Option "" ["reference-odt"]
+ (ReqArg
+ (\arg opt -> do
+ return opt { optReferenceODT = Just arg })
+ "FILENAME")
+ "" -- "Path of custom reference.odt"
+
+ , Option "D" ["print-default-template"]
(ReqArg
(\arg _ -> do
- let header = case (lookup arg writers) of
- Just (_, h) -> h
- Nothing -> error ("Unknown reader: " ++ arg)
- hPutStr stdout header
+ templ <- getDefaultTemplate Nothing arg
+ case templ of
+ Right t -> hPutStr stdout t
+ Left e -> error $ show e
exitWith ExitSuccess)
"FORMAT")
- "" -- "Print default header for FORMAT"
+ "" -- "Print default template for FORMAT"
#ifdef _CITEPROC
, Option "" ["biblio"]
(ReqArg
@@ -414,6 +500,12 @@ options =
"FILENAME")
""
#endif
+ , Option "" ["data-dir"]
+ (ReqArg
+ (\arg opt -> return opt { optDataDir = Just arg })
+ "DIRECTORY") -- "Directory containing pandoc data files."
+ ""
+
, Option "" ["dump-args"]
(NoArg
(\opt -> return opt { optDumpArgs = True }))
@@ -450,9 +542,9 @@ usageMessage programName = usageInfo
(intercalate ", " $ map fst writers) ++ "\nOptions:")
-- Determine default reader based on source file extensions
-defaultReaderName :: [FilePath] -> String
-defaultReaderName [] = "markdown"
-defaultReaderName (x:xs) =
+defaultReaderName :: String -> [FilePath] -> String
+defaultReaderName fallback [] = fallback
+defaultReaderName fallback (x:xs) =
case takeExtension (map toLower x) of
".xhtml" -> "html"
".html" -> "html"
@@ -463,7 +555,7 @@ defaultReaderName (x:xs) =
".rst" -> "rst"
".lhs" -> "markdown+lhs"
".native" -> "native"
- _ -> defaultReaderName xs
+ _ -> defaultReaderName fallback xs
-- Returns True if extension of first source is .lhs
lhsExtension :: [FilePath] -> Bool
@@ -510,8 +602,8 @@ main = do
unless (null errors) $
do name <- getProgName
- mapM_ (\e -> hPutStrLn stderr e) errors
- hPutStr stderr (usageMessage name options)
+ mapM_ (\e -> hPutStr stderr (name ++ ": ") >> hPutStr stderr e) errors
+ hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
exitWith $ ExitFailure 2
let defaultOpts' = if compatMode
@@ -529,18 +621,19 @@ main = do
, optReader = readerName
, optWriter = writerName
, optParseRaw = parseRaw
- , optCSS = css
+ , optVariables = variables
+ , optBefore = befores
+ , optAfter = afters
, optTableOfContents = toc
- , optIncludeInHeader = includeHeader
- , optIncludeBeforeBody = includeBefore
- , optIncludeAfterBody = includeAfter
- , optCustomHeader = customHeader
- , optTitlePrefix = titlePrefix
+ , optTransforms = transforms
+ , optTemplate = template
, optOutputFile = outputFile
, optNumberSections = numberSections
, optIncremental = incremental
+ , optXeTeX = xetex
, optSmart = smart
, optHTMLMathMethod = mathMethod
+ , optReferenceODT = referenceODT
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
@@ -550,6 +643,7 @@ main = do
, optEmailObfuscation = obfuscationMethod
, optIdentifierPrefix = idPrefix
, optIndentedCodeClasses = codeBlockClasses
+ , optDataDir = mbDataDir
#ifdef _CITEPROC
, optBiblioFile = biblioFile
, optBiblioFormat = biblioFormat
@@ -562,11 +656,27 @@ main = do
mapM_ (\arg -> hPutStrLn stdout arg) args
exitWith ExitSuccess
+ -- warn about deprecated options
+ case lookup "legacy-header" variables of
+ Just _ -> hPutStrLn stderr $
+ "Warning: The -C/--custom-header is deprecated.\n" ++
+ "Please transition to using --template instead."
+ Nothing -> return ()
+
let sources = if ignoreArgs then [] else args
+ datadir <- case mbDataDir of
+ Nothing -> catch
+ (liftM Just $ getAppUserDataDirectory "pandoc")
+ (const $ return Nothing)
+ Just _ -> return mbDataDir
+
-- assign reader and writer based on options and filenames
let readerName' = if null readerName
- then defaultReaderName sources
+ then let fallback = if any isURI sources
+ then "html"
+ else "markdown"
+ in defaultReaderName fallback sources
else readerName
let writerName' = if null writerName
@@ -577,9 +687,14 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
- (writer, defaultHeader) <- case (lookup writerName' writers) of
- Just (w,h) -> return (w, h)
- Nothing -> error ("Unknown writer: " ++ writerName')
+ writer <- case (lookup writerName' writers) of
+ Just r -> return r
+ Nothing -> error ("Unknown writer: " ++ writerName')
+
+ templ <- getDefaultTemplate datadir writerName'
+ let defaultTemplate = case templ of
+ Right t -> t
+ Left e -> error (show e)
environment <- getEnvironment
let columns = case lookup "COLUMNS" environment of
@@ -592,6 +707,21 @@ main = do
refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat
#endif
+ variables' <- if writerName' == "s5" && standalone'
+ then do
+ inc <- s5HeaderIncludes datadir
+ return $ ("header-includes", inc) : variables
+ else return variables
+
+ variables'' <- case mathMethod of
+ LaTeXMathML Nothing -> do
+ s <- readDataFile datadir $ "data" </> "LaTeXMathML.js"
+ return $ ("mathml-script", s) : variables'
+ MathML Nothing -> do
+ s <- readDataFile datadir $ "data"</>"MathMLinHTML.js"
+ return $ ("mathml-script", s) : variables'
+ _ -> return variables'
+
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
@@ -607,28 +737,22 @@ main = do
stateColumns = columns,
stateStrict = strict,
stateIndentedCodeClasses = codeBlockClasses }
- let csslink = if null css
- then ""
- else concatMap
- (\f -> "<link rel=\"stylesheet\" href=\"" ++
- f ++ "\" type=\"text/css\" media=\"all\" />\n")
- css
- let header = (if customHeader == "DEFAULT"
- then defaultHeader
- else customHeader) ++ csslink ++ includeHeader
let writerOptions = WriterOptions { writerStandalone = standalone',
- writerHeader = header,
- writerTitlePrefix = titlePrefix,
+ writerTemplate = if null template
+ then defaultTemplate
+ else template,
+ writerVariables = variables'',
+ writerIncludeBefore = concat befores,
+ writerIncludeAfter = concat afters,
writerTabStop = tabStop,
writerTableOfContents = toc &&
writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
writerS5 = (writerName' == "s5"),
+ writerXeTeX = xetex,
writerIgnoreNotes = False,
writerIncremental = incremental,
writerNumberSections = numberSections,
- writerIncludeBefore = includeBefore,
- writerIncludeAfter = includeAfter,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap,
@@ -651,23 +775,29 @@ main = do
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
readSource "-" = getContents
- readSource src = readFile src
+ readSource src = case parseURI src of
+ Just u -> readURI u
+ Nothing -> readFile src
+ readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
+ return . toString -- treat all as UTF8
let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources)
- doc' <- do
+ let doc' = foldr ($) doc transforms
+
+ doc'' <- do
#ifdef _CITEPROC
- processBiblio cslFile refs doc
+ processBiblio cslFile refs doc'
#else
- return doc
+ return doc'
#endif
- let writerOutput = writer writerOptions doc' ++ "\n"
+ let writerOutput = writer writerOptions doc'' ++ "\n"
case writerName' of
- "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput
+ "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput
_ -> if outputFile == "-"
then putStr writerOutput
else writeFile outputFile writerOutput