diff options
author | dr@jones.dk <dr@jones.dk> | 2010-03-22 12:40:10 +0100 |
---|---|---|
committer | dr@jones.dk <dr@jones.dk> | 2010-03-22 12:40:10 +0100 |
commit | 96d4f941026a8eca3ba211facdc8ce66b2ab38bb (patch) | |
tree | aae68ec157e85fe9590d1dd5216fc6b7916e08d3 /src/pandoc.hs | |
parent | 789d0772d8b5d9c066fb8624bd51576cbde5e30b (diff) |
Imported Upstream version 1.5.0.1
Diffstat (limited to 'src/pandoc.hs')
-rw-r--r-- | src/pandoc.hs | 318 |
1 files changed, 224 insertions, 94 deletions
diff --git a/src/pandoc.hs b/src/pandoc.hs index c2cc9b75e..7e1bfc284 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {- Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> @@ -32,7 +31,8 @@ writers. module Main where import Text.Pandoc import Text.Pandoc.ODT -import Text.Pandoc.Shared ( HTMLMathMethod (..), tabFilter, ObfuscationMethod (..) ) +import Text.Pandoc.Writers.S5 (s5HeaderIncludes) +import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile ) #ifdef _HIGHLIGHTING import Text.Pandoc.Highlighting ( languages ) #endif @@ -41,19 +41,29 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Maybe ( fromMaybe ) -import Data.Char ( toLower ) +import Data.Char ( toLower, isDigit ) import Data.List ( intercalate, isSuffixOf ) -import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) +import System.Directory ( getAppUserDataDirectory ) import System.IO ( stdout, stderr ) +-- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv +-- So we use System.IO.UTF8 only if we have an earlier version +#if MIN_VERSION_base(4,2,0) +import System.IO ( hPutStr, hPutStrLn ) +#else +import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) import System.IO.UTF8 +#endif #ifdef _CITEPROC import Text.CSL import Text.Pandoc.Biblio #endif -import Control.Monad (when, unless) +import Control.Monad (when, unless, liftM) +import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) +import Network.URI (parseURI, isURI) +import Data.ByteString.Lazy.UTF8 (toString) copyrightMessage :: String -copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++ +copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ "Web: http://johnmacfarlane.net/pandoc\n" ++ "This is free software; see the source for copying conditions. There is no\n" ++ "warranty, not even for merchantability or fitness for a particular purpose." @@ -96,26 +106,27 @@ readers = [("native" , readPandoc) readPandoc :: ParserState -> String -> Pandoc readPandoc _ = read --- | Association list of formats and pairs of writers and default headers. -writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] -writers = [("native" , (writeDoc, "")) - ,("html" , (writeHtmlString, "")) - ,("html+lhs" , (writeHtmlString, "")) - ,("s5" , (writeS5String, defaultS5Header)) - ,("docbook" , (writeDocbook, defaultDocbookHeader)) - ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader)) - ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader)) - ,("latex" , (writeLaTeX, defaultLaTeXHeader)) - ,("latex+lhs" , (writeLaTeX, defaultLaTeXHeader)) - ,("context" , (writeConTeXt, defaultConTeXtHeader)) - ,("texinfo" , (writeTexinfo, "")) - ,("man" , (writeMan, "")) - ,("markdown" , (writeMarkdown, "")) - ,("markdown+lhs" , (writeMarkdown, "")) - ,("rst" , (writeRST, "")) - ,("rst+lhs" , (writeRST, "")) - ,("mediawiki" , (writeMediaWiki, "")) - ,("rtf" , (writeRTF, defaultRTFHeader)) +-- | Association list of formats and writers. +writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] +writers = [("native" , writeDoc) + ,("html" , writeHtmlString) + ,("html+lhs" , writeHtmlString) + ,("s5" , writeS5String) + ,("docbook" , writeDocbook) + ,("opendocument" , writeOpenDocument) + ,("odt" , writeOpenDocument) + ,("latex" , writeLaTeX) + ,("latex+lhs" , writeLaTeX) + ,("context" , writeConTeXt) + ,("texinfo" , writeTexinfo) + ,("man" , writeMan) + ,("markdown" , writeMarkdown) + ,("markdown+lhs" , writeMarkdown) + ,("plain" , writePlain) + ,("rst" , writeRST) + ,("rst+lhs" , writeRST) + ,("mediawiki" , writeMediaWiki) + ,("rtf" , writeRTF) ] isNonTextOutput :: String -> Bool @@ -125,6 +136,12 @@ isNonTextOutput = (`elem` ["odt"]) writeDoc :: WriterOptions -> Pandoc -> String writeDoc _ = prettyPandoc +headerShift :: Int -> Pandoc -> Pandoc +headerShift n = processWith shift + where shift :: Block -> Block + shift (Header level inner) = Header (level + n) inner + shift x = x + -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -133,18 +150,19 @@ data Opt = Opt , optReader :: String -- ^ Reader format , optWriter :: String -- ^ Writer format , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX - , optCSS :: [String] -- ^ CSS file to link to , optTableOfContents :: Bool -- ^ Include table of contents - , optIncludeInHeader :: String -- ^ File to include in header - , optIncludeBeforeBody :: String -- ^ File to include at top of body - , optIncludeAfterBody :: String -- ^ File to include at end of body - , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" - , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply + , optTemplate :: String -- ^ Custom template + , optVariables :: [(String,String)] -- ^ Template variables to set + , optBefore :: [String] -- ^ Texts to include before body + , optAfter :: [String] -- ^ Texts to include after body , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX , optIncremental :: Bool -- ^ Use incremental lists in S5 + , optXeTeX :: Bool -- ^ Format latex for xetex , optSmart :: Bool -- ^ Use smart typography , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax @@ -155,6 +173,7 @@ data Opt = Opt , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optDataDir :: Maybe FilePath #ifdef _CITEPROC , optBiblioFile :: String , optBiblioFormat :: String @@ -171,18 +190,19 @@ defaultOpts = Opt , optReader = "" -- null for default reader , optWriter = "" -- null for default writer , optParseRaw = False - , optCSS = [] , optTableOfContents = False - , optIncludeInHeader = "" - , optIncludeBeforeBody = "" - , optIncludeAfterBody = "" - , optCustomHeader = "DEFAULT" - , optTitlePrefix = "" + , optTransforms = [] + , optTemplate = "" + , optVariables = [] + , optBefore = [] + , optAfter = [] , optOutputFile = "-" -- "-" means stdout , optNumberSections = False , optIncremental = False + , optXeTeX = False , optSmart = False , optHTMLMathMethod = PlainMath + , optReferenceODT = Nothing , optDumpArgs = False , optIgnoreArgs = False , optStrict = False @@ -193,6 +213,7 @@ defaultOpts = Opt , optEmailObfuscation = JavascriptObfuscation , optIdentifierPrefix = "" , optIndentedCodeClasses = [] + , optDataDir = Nothing #ifdef _CITEPROC , optBiblioFile = [] , optBiblioFormat = [] @@ -260,11 +281,18 @@ options = , Option "m" ["latexmathml", "asciimathml"] (OptArg - (\arg opt -> return opt { optHTMLMathMethod = - LaTeXMathML arg }) + (\arg opt -> + return opt { optHTMLMathMethod = LaTeXMathML arg }) "URL") "" -- "Use LaTeXMathML script in html output" + , Option "" ["mathml"] + (OptArg + (\arg opt -> + return opt { optHTMLMathMethod = MathML arg }) + "URL") + "" -- "Use mathml for HTML math" + , Option "" ["mimetex"] (OptArg (\arg opt -> return opt { optHTMLMathMethod = MimeTeX @@ -288,6 +316,11 @@ options = (\opt -> return opt { optIncremental = True })) "" -- "Make list items display incrementally in S5" + , Option "" ["xetex"] + (NoArg + (\opt -> return opt { optXeTeX = True })) + "" -- "Format latex for processing by XeTeX" + , Option "N" ["number-sections"] (NoArg (\opt -> return opt { optNumberSections = True })) @@ -334,21 +367,60 @@ options = (\opt -> return opt { optTableOfContents = True })) "" -- "Include table of contents" + , Option "" ["base-header-level"] + (ReqArg + (\arg opt -> do + if all isDigit arg && (read arg :: Int) >= 1 + then do + let oldTransforms = optTransforms opt + let shift = read arg - 1 + return opt{ optTransforms = + headerShift shift : oldTransforms } + else do + hPutStrLn stderr $ "base-header-level must be a number >= 1" + exitWith $ ExitFailure 19) + "LEVEL") + "" -- "Headers base level" + + , Option "" ["template"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt{ optTemplate = text, + optStandalone = True }) + "FILENAME") + "" -- "Use custom template" + + , Option "V" ["variable"] + (ReqArg + (\arg opt -> + case break (`elem` ":=") arg of + (k,_:v) -> do + let newvars = optVariables opt ++ [(k,v)] + return opt{ optVariables = newvars } + _ -> do + hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" + exitWith $ ExitFailure 17) + "FILENAME") + "" -- "Use custom template" + , Option "c" ["css"] (ReqArg (\arg opt -> do - let old = optCSS opt - return opt { optCSS = old ++ [arg], + -- add new link to end, so it is included in proper order + let newvars = optVariables opt ++ [("css",arg)] + return opt { optVariables = newvars, optStandalone = True }) - "CSS") + "URL") "" -- "Link to CSS style sheet" , Option "H" ["include-in-header"] (ReqArg (\arg opt -> do - let old = optIncludeInHeader opt text <- readFile arg - return opt { optIncludeInHeader = old ++ text, + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("header-includes",text)] + return opt { optVariables = newvars, optStandalone = True }) "FILENAME") "" -- "File to include at end of header (implies -s)" @@ -356,18 +428,22 @@ options = , Option "B" ["include-before-body"] (ReqArg (\arg opt -> do - let old = optIncludeBeforeBody opt text <- readFile arg - return opt { optIncludeBeforeBody = old ++ text }) + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("include-before",text)] + return opt { optVariables = newvars, + optStandalone = True }) "FILENAME") "" -- "File to include before document body" , Option "A" ["include-after-body"] (ReqArg (\arg opt -> do - let old = optIncludeAfterBody opt text <- readFile arg - return opt { optIncludeAfterBody = old ++ text }) + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("include-after",text)] + return opt { optVariables = newvars, + optStandalone = True }) "FILENAME") "" -- "File to include after document body" @@ -375,28 +451,38 @@ options = (ReqArg (\arg opt -> do text <- readFile arg - return opt { optCustomHeader = text, - optStandalone = True }) + let newVars = ("legacy-header", text) : optVariables opt + return opt { optVariables = newVars + , optStandalone = True }) "FILENAME") "" -- "File to use for custom header (implies -s)" , Option "T" ["title-prefix"] (ReqArg - (\arg opt -> return opt { optTitlePrefix = arg, - optStandalone = True }) + (\arg opt -> do + let newvars = ("title-prefix", arg) : optVariables opt + return opt { optVariables = newvars, + optStandalone = True }) "STRING") "" -- "String to prefix to HTML window title" - , Option "D" ["print-default-header"] + , Option "" ["reference-odt"] + (ReqArg + (\arg opt -> do + return opt { optReferenceODT = Just arg }) + "FILENAME") + "" -- "Path of custom reference.odt" + + , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do - let header = case (lookup arg writers) of - Just (_, h) -> h - Nothing -> error ("Unknown reader: " ++ arg) - hPutStr stdout header + templ <- getDefaultTemplate Nothing arg + case templ of + Right t -> hPutStr stdout t + Left e -> error $ show e exitWith ExitSuccess) "FORMAT") - "" -- "Print default header for FORMAT" + "" -- "Print default template for FORMAT" #ifdef _CITEPROC , Option "" ["biblio"] (ReqArg @@ -414,6 +500,12 @@ options = "FILENAME") "" #endif + , Option "" ["data-dir"] + (ReqArg + (\arg opt -> return opt { optDataDir = Just arg }) + "DIRECTORY") -- "Directory containing pandoc data files." + "" + , Option "" ["dump-args"] (NoArg (\opt -> return opt { optDumpArgs = True })) @@ -450,9 +542,9 @@ usageMessage programName = usageInfo (intercalate ", " $ map fst writers) ++ "\nOptions:") -- Determine default reader based on source file extensions -defaultReaderName :: [FilePath] -> String -defaultReaderName [] = "markdown" -defaultReaderName (x:xs) = +defaultReaderName :: String -> [FilePath] -> String +defaultReaderName fallback [] = fallback +defaultReaderName fallback (x:xs) = case takeExtension (map toLower x) of ".xhtml" -> "html" ".html" -> "html" @@ -463,7 +555,7 @@ defaultReaderName (x:xs) = ".rst" -> "rst" ".lhs" -> "markdown+lhs" ".native" -> "native" - _ -> defaultReaderName xs + _ -> defaultReaderName fallback xs -- Returns True if extension of first source is .lhs lhsExtension :: [FilePath] -> Bool @@ -510,8 +602,8 @@ main = do unless (null errors) $ do name <- getProgName - mapM_ (\e -> hPutStrLn stderr e) errors - hPutStr stderr (usageMessage name options) + mapM_ (\e -> hPutStr stderr (name ++ ": ") >> hPutStr stderr e) errors + hPutStrLn stderr $ "Try " ++ name ++ " --help for more information." exitWith $ ExitFailure 2 let defaultOpts' = if compatMode @@ -529,18 +621,19 @@ main = do , optReader = readerName , optWriter = writerName , optParseRaw = parseRaw - , optCSS = css + , optVariables = variables + , optBefore = befores + , optAfter = afters , optTableOfContents = toc - , optIncludeInHeader = includeHeader - , optIncludeBeforeBody = includeBefore - , optIncludeAfterBody = includeAfter - , optCustomHeader = customHeader - , optTitlePrefix = titlePrefix + , optTransforms = transforms + , optTemplate = template , optOutputFile = outputFile , optNumberSections = numberSections , optIncremental = incremental + , optXeTeX = xetex , optSmart = smart , optHTMLMathMethod = mathMethod + , optReferenceODT = referenceODT , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict @@ -550,6 +643,7 @@ main = do , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses + , optDataDir = mbDataDir #ifdef _CITEPROC , optBiblioFile = biblioFile , optBiblioFormat = biblioFormat @@ -562,11 +656,27 @@ main = do mapM_ (\arg -> hPutStrLn stdout arg) args exitWith ExitSuccess + -- warn about deprecated options + case lookup "legacy-header" variables of + Just _ -> hPutStrLn stderr $ + "Warning: The -C/--custom-header is deprecated.\n" ++ + "Please transition to using --template instead." + Nothing -> return () + let sources = if ignoreArgs then [] else args + datadir <- case mbDataDir of + Nothing -> catch + (liftM Just $ getAppUserDataDirectory "pandoc") + (const $ return Nothing) + Just _ -> return mbDataDir + -- assign reader and writer based on options and filenames let readerName' = if null readerName - then defaultReaderName sources + then let fallback = if any isURI sources + then "html" + else "markdown" + in defaultReaderName fallback sources else readerName let writerName' = if null writerName @@ -577,9 +687,14 @@ main = do Just r -> return r Nothing -> error ("Unknown reader: " ++ readerName') - (writer, defaultHeader) <- case (lookup writerName' writers) of - Just (w,h) -> return (w, h) - Nothing -> error ("Unknown writer: " ++ writerName') + writer <- case (lookup writerName' writers) of + Just r -> return r + Nothing -> error ("Unknown writer: " ++ writerName') + + templ <- getDefaultTemplate datadir writerName' + let defaultTemplate = case templ of + Right t -> t + Left e -> error (show e) environment <- getEnvironment let columns = case lookup "COLUMNS" environment of @@ -592,6 +707,21 @@ main = do refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat #endif + variables' <- if writerName' == "s5" && standalone' + then do + inc <- s5HeaderIncludes datadir + return $ ("header-includes", inc) : variables + else return variables + + variables'' <- case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFile datadir $ "data" </> "LaTeXMathML.js" + return $ ("mathml-script", s) : variables' + MathML Nothing -> do + s <- readDataFile datadir $ "data"</>"MathMLinHTML.js" + return $ ("mathml-script", s) : variables' + _ -> return variables' + let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, @@ -607,28 +737,22 @@ main = do stateColumns = columns, stateStrict = strict, stateIndentedCodeClasses = codeBlockClasses } - let csslink = if null css - then "" - else concatMap - (\f -> "<link rel=\"stylesheet\" href=\"" ++ - f ++ "\" type=\"text/css\" media=\"all\" />\n") - css - let header = (if customHeader == "DEFAULT" - then defaultHeader - else customHeader) ++ csslink ++ includeHeader let writerOptions = WriterOptions { writerStandalone = standalone', - writerHeader = header, - writerTitlePrefix = titlePrefix, + writerTemplate = if null template + then defaultTemplate + else template, + writerVariables = variables'', + writerIncludeBefore = concat befores, + writerIncludeAfter = concat afters, writerTabStop = tabStop, writerTableOfContents = toc && writerName' /= "s5", writerHTMLMathMethod = mathMethod, writerS5 = (writerName' == "s5"), + writerXeTeX = xetex, writerIgnoreNotes = False, writerIncremental = incremental, writerNumberSections = numberSections, - writerIncludeBefore = includeBefore, - writerIncludeAfter = includeAfter, writerStrictMarkdown = strict, writerReferenceLinks = referenceLinks, writerWrapText = wrap, @@ -651,23 +775,29 @@ main = do let readSources [] = mapM readSource ["-"] readSources srcs = mapM readSource srcs readSource "-" = getContents - readSource src = readFile src + readSource src = case parseURI src of + Just u -> readURI u + Nothing -> readFile src + readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>= + return . toString -- treat all as UTF8 let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources) - doc' <- do + let doc' = foldr ($) doc transforms + + doc'' <- do #ifdef _CITEPROC - processBiblio cslFile refs doc + processBiblio cslFile refs doc' #else - return doc + return doc' #endif - let writerOutput = writer writerOptions doc' ++ "\n" + let writerOutput = writer writerOptions doc'' ++ "\n" case writerName' of - "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput + "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput _ -> if outputFile == "-" then putStr writerOutput else writeFile outputFile writerOutput |