diff options
author | dr@jones.dk <dr@jones.dk> | 2011-02-04 00:01:35 +0100 |
---|---|---|
committer | dr@jones.dk <dr@jones.dk> | 2011-02-04 00:01:35 +0100 |
commit | 91179df4907bec919e0884019da785be1ceb01b3 (patch) | |
tree | 2a6655fb4ec4655c554ea17ad074859d707b7709 /src/pandoc.hs | |
parent | 1f6b4aee268fefc72c84bd305b10d4f9103901eb (diff) |
Imported Upstream version 1.8.0.1
Diffstat (limited to 'src/pandoc.hs')
-rw-r--r-- | src/pandoc.hs | 327 |
1 files changed, 168 insertions, 159 deletions
diff --git a/src/pandoc.hs b/src/pandoc.hs index ef38c0332..c0f457449 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -32,29 +32,27 @@ module Main where import Text.Pandoc import Text.Pandoc.S5 (s5HeaderIncludes) import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, - headerShift ) + headerShift, findDataFile, normalize ) #ifdef _HIGHLIGHTING import Text.Pandoc.Highlighting ( languages ) #endif -import System.Environment ( getArgs, getProgName, getEnvironment ) +import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt -import Data.Char ( toLower, isDigit ) -import Data.List ( intercalate, isSuffixOf ) -import System.Directory ( getAppUserDataDirectory ) +import Data.Char ( toLower ) +import Data.List ( intercalate, isSuffixOf, isPrefixOf ) +import System.Directory ( getAppUserDataDirectory, doesFileExist ) import System.IO ( stdout, stderr ) import qualified Text.Pandoc.UTF8 as UTF8 -#ifdef _CITEPROC import Text.CSL import Text.Pandoc.Biblio -#endif import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) -import Network.URI (parseURI, isURI) +import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString, fromString) -import Codec.Binary.UTF8.String (decodeString) +import Data.ByteString.Lazy.UTF8 (toString ) +import Codec.Binary.UTF8.String (decodeString, encodeString) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ @@ -64,9 +62,7 @@ copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ compileInfo :: String compileInfo = -#ifdef _CITEPROC "\nCompiled with citeproc support." ++ -#endif #ifdef _HIGHLIGHTING "\nCompiled with syntax highlighting support for:\n" ++ wrapWords 78 languages ++ @@ -84,47 +80,6 @@ wrapWords c = wrap' c c where then ",\n" ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs --- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] -readers = [("native" , readPandoc) - ,("markdown" , readMarkdown) - ,("markdown+lhs" , readMarkdown) - ,("rst" , readRST) - ,("rst+lhs" , readRST) - ,("html" , readHtml) - ,("latex" , readLaTeX) - ,("latex+lhs" , readLaTeX) - ] - --- | Reader for native Pandoc format. -readPandoc :: ParserState -> String -> Pandoc -readPandoc _ = read - --- | Association list of formats and writers. -writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeNative) - ,("html" , writeHtmlString) - ,("html+lhs" , writeHtmlString) - ,("s5" , writeHtmlString) - ,("slidy" , writeHtmlString) - ,("docbook" , writeDocbook) - ,("opendocument" , writeOpenDocument) - ,("odt" , \_ _ -> "") - ,("epub" , \_ _ -> "") - ,("latex" , writeLaTeX) - ,("latex+lhs" , writeLaTeX) - ,("context" , writeConTeXt) - ,("texinfo" , writeTexinfo) - ,("man" , writeMan) - ,("markdown" , writeMarkdown) - ,("markdown+lhs" , writeMarkdown) - ,("plain" , writePlain) - ,("rst" , writeRST) - ,("rst+lhs" , writeRST) - ,("mediawiki" , writeMediaWiki) - ,("rtf" , writeRTF) - ] - isNonTextOutput :: String -> Bool isNonTextOutput = (`elem` ["odt","epub"]) @@ -147,6 +102,8 @@ data Opt = Opt , optOffline :: Bool -- ^ Make slideshow accessible offline , optXeTeX :: Bool -- ^ Format latex for xetex , optSmart :: Bool -- ^ Use smart typography + , optHtml5 :: Bool -- ^ Produce HTML5 in HTML + , optChapters :: Bool -- ^ Use chapter for top-level sects , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet @@ -156,17 +113,16 @@ data Opt = Opt , optStrict :: Bool -- ^ Use strict markdown syntax , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text - , optSanitizeHTML :: Bool -- ^ Sanitize HTML + , optColumns :: Int -- ^ Line length in characters , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath -#ifdef _CITEPROC - , optBiblioFile :: String - , optBiblioFormat :: String - , optCslFile :: String -#endif + , optCiteMethod :: CiteMethod -- ^ Method to output cites + , optBibliography :: [String] + , optCslFile :: FilePath + , optListings :: Bool -- ^ Use listings package for code blocks } -- | Defaults for command-line options. @@ -189,6 +145,8 @@ defaultOpts = Opt , optOffline = False , optXeTeX = False , optSmart = False + , optHtml5 = False + , optChapters = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing , optEPUBStylesheet = Nothing @@ -198,17 +156,16 @@ defaultOpts = Opt , optStrict = False , optReferenceLinks = False , optWrapText = True - , optSanitizeHTML = False + , optColumns = 72 , optPlugins = [] , optEmailObfuscation = JavascriptObfuscation , optIdentifierPrefix = "" , optIndentedCodeClasses = [] , optDataDir = Nothing -#ifdef _CITEPROC - , optBiblioFile = [] - , optBiblioFormat = [] - , optCslFile = [] -#endif + , optCiteMethod = Citeproc + , optBibliography = [] + , optCslFile = "" + , optListings = False } -- | A list of functions, each transforming the options data structure @@ -219,13 +176,13 @@ options = (ReqArg (\arg opt -> return opt { optReader = map toLower arg }) "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")") + "" , Option "tw" ["to","write"] (ReqArg (\arg opt -> return opt { optWriter = map toLower arg }) "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")") + "" , Option "s" ["standalone"] (NoArg @@ -245,8 +202,14 @@ options = , Option "" ["tab-stop"] (ReqArg - (\arg opt -> return opt { optTabStop = (read arg) } ) - "TABSTOP") + (\arg opt -> + case reads arg of + [(t,"")] | t > 0 -> return opt { optTabStop = t } + _ -> do + UTF8.hPutStrLn stderr $ + "tab-stop must be a number greater than 0" + exitWith $ ExitFailure 31) + "NUMBER") "" -- "Tab stop (default 4)" , Option "" ["strict"] @@ -254,6 +217,12 @@ options = (\opt -> return opt { optStrict = True } )) "" -- "Disable markdown syntax extensions" + , Option "" ["normalize"] + (NoArg + (\opt -> return opt { optTransforms = + normalize : optTransforms opt } )) + "" -- "Normalize the Pandoc AST" + , Option "" ["reference-links"] (NoArg (\opt -> return opt { optReferenceLinks = True } )) @@ -269,6 +238,11 @@ options = (\opt -> return opt { optSmart = True })) "" -- "Use smart quotes, dashes, and ellipses" + , Option "5" ["html5"] + (NoArg + (\opt -> return opt { optHtml5 = True })) + "" -- "Produce HTML5 in HTML output" + , Option "m" ["latexmathml", "asciimathml"] (OptArg (\arg opt -> @@ -309,6 +283,12 @@ options = "URL") "" -- "Use jsMath for HTML math" + , Option "" ["mathjax"] + (ReqArg + (\arg opt -> return opt { optHTMLMathMethod = MathJax arg}) + "URL") + "" -- "Use MathJax for HTML math" + , Option "" ["gladtex"] (NoArg (\opt -> return opt { optHTMLMathMethod = GladTeX })) @@ -330,11 +310,21 @@ options = (\opt -> return opt { optXeTeX = True })) "" -- "Format latex for processing by XeTeX" + , Option "" ["chapters"] + (NoArg + (\opt -> return opt { optChapters = True })) + "" -- "Use chapter for top-level sections in LaTeX, DocBook" + , Option "N" ["number-sections"] (NoArg (\opt -> return opt { optNumberSections = True })) "" -- "Number sections in LaTeX" + , Option "" ["listings"] + (NoArg + (\opt -> return opt { optListings = True })) + "" -- "Use listings package for LaTeX code blocks" + , Option "" ["section-divs"] (NoArg (\opt -> return opt { optSectionDivs = True })) @@ -345,10 +335,17 @@ options = (\opt -> return opt { optWrapText = False })) "" -- "Do not wrap text in output" - , Option "" ["sanitize-html"] - (NoArg - (\opt -> return opt { optSanitizeHTML = True })) - "" -- "Sanitize HTML" + , Option "" ["columns"] + (ReqArg + (\arg opt -> + case reads arg of + [(t,"")] | t > 0 -> return opt { optColumns = t } + _ -> do + UTF8.hPutStrLn stderr $ + "columns must be a number greater than 0" + exitWith $ ExitFailure 33) + "NUMBER") + "" -- "Length of line in characters" , Option "" ["email-obfuscation"] (ReqArg @@ -383,17 +380,18 @@ options = , Option "" ["base-header-level"] (ReqArg - (\arg opt -> do - if all isDigit arg && (read arg :: Int) >= 1 - then do - let oldTransforms = optTransforms opt - let shift = read arg - 1 - return opt{ optTransforms = - headerShift shift : oldTransforms } - else do - UTF8.hPutStrLn stderr $ "base-header-level must be a number >= 1" - exitWith $ ExitFailure 19) - "LEVEL") + (\arg opt -> + case reads arg of + [(t,"")] | t > 0 -> do + let oldTransforms = optTransforms opt + let shift = t - 1 + return opt{ optTransforms = + headerShift shift : oldTransforms } + _ -> do + UTF8.hPutStrLn stderr $ + "base-header-level must be a number > 0" + exitWith $ ExitFailure 19) + "NUMBER") "" -- "Headers base level" , Option "" ["template"] @@ -415,7 +413,7 @@ options = _ -> do UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" exitWith $ ExitFailure 17) - "FILENAME") + "KEY:VALUE") "" -- "Use custom template" , Option "c" ["css"] @@ -461,16 +459,6 @@ options = "FILENAME") "" -- "File to include after document body" - , Option "C" ["custom-header"] - (ReqArg - (\arg opt -> do - text <- UTF8.readFile arg - let newVars = ("legacy-header", text) : optVariables opt - return opt { optVariables = newVars - , optStandalone = True }) - "FILENAME") - "" -- "File to use for custom header (implies -s)" - , Option "T" ["title-prefix"] (ReqArg (\arg opt -> do @@ -513,23 +501,29 @@ options = exitWith ExitSuccess) "FORMAT") "" -- "Print default template for FORMAT" -#ifdef _CITEPROC - , Option "" ["biblio"] + + , Option "" ["bibliography"] (ReqArg - (\arg opt -> return opt { optBiblioFile = arg} ) + (\arg opt -> return opt { optBibliography = (optBibliography opt) ++ [arg] }) "FILENAME") "" - , Option "" ["biblio-format"] - (ReqArg - (\arg opt -> return opt { optBiblioFormat = arg} ) - "STRING") - "" + , Option "" ["csl"] (ReqArg - (\arg opt -> return opt { optCslFile = arg} ) + (\arg opt -> return opt { optCslFile = arg }) "FILENAME") "" -#endif + + , Option "" ["natbib"] + (NoArg + (\opt -> return opt { optCiteMethod = Natbib })) + "" -- "Use natbib cite commands in LaTeX output" + + , Option "" ["biblatex"] + (NoArg + (\opt -> return opt { optCiteMethod = Biblatex })) + "" -- "Use biblatex cite commands in LaTeX output" + , Option "" ["data-dir"] (ReqArg (\arg opt -> return opt { optDataDir = Just arg }) @@ -569,7 +563,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++ - (intercalate ", " $ map fst writers) ++ "\nOptions:") + (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:") -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -584,7 +578,9 @@ defaultReaderName fallback (x:xs) = ".ltx" -> "latex" ".rst" -> "rst" ".lhs" -> "markdown+lhs" + ".textile" -> "textile" ".native" -> "native" + ".json" -> "json" _ -> defaultReaderName fallback xs -- Returns True if extension of first source is .lhs @@ -607,16 +603,19 @@ defaultWriterName x = ".rst" -> "rst" ".s5" -> "s5" ".native" -> "native" + ".json" -> "json" ".txt" -> "markdown" ".text" -> "markdown" ".md" -> "markdown" ".markdown" -> "markdown" + ".textile" -> "textile" ".lhs" -> "markdown+lhs" ".texi" -> "texinfo" ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" ".epub" -> "epub" + ".org" -> "org" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -663,6 +662,8 @@ main = do , optOffline = offline , optXeTeX = xetex , optSmart = smart + , optHtml5 = html5 + , optChapters = chapters , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT , optEPUBStylesheet = epubStylesheet @@ -672,16 +673,15 @@ main = do , optStrict = strict , optReferenceLinks = referenceLinks , optWrapText = wrap - , optSanitizeHTML = sanitize + , optColumns = columns , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses , optDataDir = mbDataDir -#ifdef _CITEPROC - , optBiblioFile = biblioFile - , optBiblioFormat = biblioFormat - , optCslFile = cslFile -#endif + , optBibliography = reffiles + , optCslFile = cslfile + , optCiteMethod = citeMethod + , optListings = listings } = opts when dumpArgs $ @@ -689,13 +689,6 @@ main = do mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args exitWith ExitSuccess - -- warn about deprecated options - case lookup "legacy-header" variables of - Just _ -> UTF8.hPutStrLn stderr $ - "Warning: The -C/--custom-header is deprecated.\n" ++ - "Please transition to using --template instead." - Nothing -> return () - let sources = if ignoreArgs then [] else args datadir <- case mbDataDir of @@ -720,30 +713,13 @@ main = do Just r -> return r Nothing -> error ("Unknown reader: " ++ readerName') - let writer = case lookup writerName' writers of - Just _ | writerName' == "epub" -> writeEPUB epubStylesheet - Just _ | writerName' == "odt" -> writeODT referenceODT - Just r -> \o -> - return . fromString . r o - Nothing -> error $ "Unknown writer: " ++ - writerName' - templ <- getDefaultTemplate datadir writerName' let defaultTemplate = case templ of Right t -> t Left e -> error (show e) - environment <- getEnvironment - let columns = case lookup "COLUMNS" environment of - Just cols -> read cols - Nothing -> stateColumns defaultParserState - let standalone' = standalone || isNonTextOutput writerName' -#ifdef _CITEPROC - refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat -#endif - variables' <- case (writerName', standalone', offline) of ("s5", True, True) -> do inc <- s5HeaderIncludes datadir @@ -766,6 +742,11 @@ main = do return $ ("mathml-script", s) : variables' _ -> return variables' + refs <- mapM (\f -> catch (readBiblioFile f) $ \e -> do + UTF8.hPutStrLn stderr $ "Error reading bibliography `" ++ f ++ "'" + UTF8.hPutStrLn stderr $ show e + exitWith (ExitFailure 23)) reffiles >>= \rs -> return $ concat rs + let sourceDir = if null sources then "." else takeDirectory (head sources) @@ -778,18 +759,16 @@ main = do let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, - stateSanitizeHTML = sanitize, stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || lhsExtension sources, stateStandalone = standalone', -#ifdef _CITEPROC - stateCitations = map citeKey refs, -#endif + stateCitations = map refId refs, stateSmart = smart || writerName' `elem` - ["latex", "context", "man"], + ["latex", "context", "latex+lhs", "man"], stateColumns = columns, stateStrict = strict, - stateIndentedCodeClasses = codeBlockClasses } + stateIndentedCodeClasses = codeBlockClasses, + stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] } let writerOptions = WriterOptions { writerStandalone = standalone', writerTemplate = if null template @@ -804,12 +783,15 @@ main = do writerSlideVariant = slideVariant, writerIncremental = incremental, writerXeTeX = xetex, + writerCiteMethod = citeMethod, + writerBiblioFiles = reffiles, writerIgnoreNotes = False, writerNumberSections = numberSections, writerSectionDivs = sectionDivs, writerStrictMarkdown = strict, writerReferenceLinks = referenceLinks, writerWrapText = wrap, + writerColumns = columns, writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' || lhsExtension [outputFile], writerEmailObfuscation = if strict @@ -817,7 +799,11 @@ main = do else obfuscationMethod, writerIdentifierPrefix = idPrefix, writerSourceDirectory = sourceDir, - writerUserDataDir = datadir } + writerUserDataDir = datadir, + writerHtml5 = html5 && + "html" `isPrefixOf` writerName', + writerChapters = chapters, + writerListings = listings } when (isNonTextOutput writerName' && outputFile == "-") $ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ @@ -828,8 +814,9 @@ main = do readSources srcs = mapM readSource srcs readSource "-" = UTF8.getContents readSource src = case parseURI src of - Just u -> readURI u - Nothing -> UTF8.readFile src + Just u | uriScheme u `elem` ["http:","https:"] -> + readURI u + _ -> UTF8.readFile src readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>= return . toString -- treat all as UTF8 @@ -837,17 +824,39 @@ main = do doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources) - let doc' = foldr ($) doc transforms - - doc'' <- do -#ifdef _CITEPROC - processBiblio cslFile refs doc' -#else - return doc' -#endif - - writerOutput <- writer writerOptions doc'' - - if outputFile == "-" - then B.putStr writerOutput - else B.writeFile outputFile writerOutput + let doc0 = foldr ($) doc transforms + + doc1 <- if writerName' == "rtf" + then bottomUpM rtfEmbedImage doc0 + else return doc0 + + doc2 <- do + if citeMethod == Citeproc && not (null refs) + then do + csldir <- getAppUserDataDirectory "csl" + cslfile' <- if null cslfile + then findDataFile datadir "default.csl" + else do + ex <- doesFileExist cslfile + if ex + then return cslfile + else findDataFile datadir $ + replaceDirectory + (replaceExtension cslfile "csl") + csldir + processBiblio cslfile' refs doc1 + else return doc1 + + case lookup writerName' writers of + Nothing | writerName' == "epub" -> + writeEPUB epubStylesheet writerOptions doc2 + >>= B.writeFile (encodeString outputFile) + Nothing | writerName' == "odt" -> + writeODT referenceODT writerOptions doc2 + >>= B.writeFile (encodeString outputFile) + Just r -> writerFn outputFile result + where writerFn "-" = UTF8.putStr + writerFn f = UTF8.writeFile f + result = r writerOptions doc2 ++ + ['\n' | not standalone'] + Nothing -> error $ "Unknown writer: " ++ writerName' |