diff options
Diffstat (limited to 'src/Text/Pandoc/PDF.hs')
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 351 |
1 files changed, 216 insertions, 135 deletions
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 9faff1816..5f41d6c55 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,8 @@ -{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2016 John MacFarlane + Copyright : Copyright (C) 2012-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,49 +32,60 @@ Conversion of LaTeX documents to PDF. -} module Text.Pandoc.PDF ( makePDF ) where +import qualified Codec.Picture as JP +import qualified Control.Exception as E +import Control.Monad (unless, when) +import Control.Monad.Trans (MonadIO (..)) +import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) -import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BC -import qualified Data.ByteString as BS +import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) -import System.Exit (ExitCode (..)) -import System.FilePath -import System.IO (stderr, stdout) -import System.IO.Temp (withTempFile) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.IO as TextIO import System.Directory -import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment -import Control.Monad (unless, when, (<=<)) -import qualified Control.Exception as E -import Data.List (isInfixOf) -import Data.Maybe (fromMaybe) -import qualified Text.Pandoc.UTF8 as UTF8 +import System.Exit (ExitCode (..)) +import System.FilePath +import System.IO (stdout) +import System.IO.Temp (withTempDirectory, withTempFile) +#if MIN_VERSION_base(4,8,3) +import System.IO.Error (IOError, isDoesNotExistError) +#else +import System.IO.Error (isDoesNotExistError) +#endif import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError)) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) +import Text.Pandoc.Process (pipeProcess) +import Text.Pandoc.Shared (inDirectory, stringify, withTempDir) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory, - stringify) import Text.Pandoc.Writers.Shared (getField, metaToJSON) -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) -import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) -import Text.Pandoc.Process (pipeProcess) -import qualified Data.ByteString.Lazy as BL -import qualified Codec.Picture as JP #ifdef _WINDOWS import Data.List (intercalate) #endif +import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getCommonState, + getVerbosity, putCommonState, report, runIO, + runIOorExplode, setVerbosity) +import Text.Pandoc.Logging #ifdef _WINDOWS changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, - -- xelatex, context, wkhtmltopdf) - -> (WriterOptions -> Pandoc -> String) -- ^ writer +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex, + -- wkhtmltopdf, weasyprint, prince, context, pdfroff) + -> [String] -- ^ arguments to pass to pdf creator + -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document - -> IO (Either ByteString ByteString) -makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do + -> PandocIO (Either ByteString ByteString) +makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do let mathArgs = case writerHTMLMathMethod opts of -- with MathJax, wait til all math is rendered: MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", @@ -80,8 +93,7 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do _ -> [] meta' <- metaToJSON opts (return . stringify) (return . stringify) meta let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd - let args = mathArgs ++ - concatMap toArgs + let args = pdfargs ++ mathArgs ++ concatMap toArgs [("page-size", getField "papersize" meta') ,("title", getField "title" meta') ,("margin-bottom", fromMaybe (Just "1.2in") @@ -92,55 +104,61 @@ makePDF "wkhtmltopdf" writer opts doc@(Pandoc meta _) = do (getField "margin-right" meta')) ,("margin-left", fromMaybe (Just "1.25in") (getField "margin-left" meta')) + ,("footer-html", fromMaybe Nothing + (getField "footer-html" meta')) + ,("header-html", fromMaybe Nothing + (getField "header-html" meta')) ] - let source = writer opts doc - html2pdf (writerVerbose opts) args source -makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages opts tmpdir doc - let source = writer opts doc' - args = writerLaTeXArgs opts - case takeBaseName program of - "context" -> context2pdf (writerVerbose opts) tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] - -> tex2pdf' (writerVerbose opts) args tmpdir program source - _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "wkhtmltopdf" args source +makePDF "weasyprint" pdfargs writer opts doc = do + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "weasyprint" pdfargs source +makePDF "prince" pdfargs writer opts doc = do + source <- writer opts doc + verbosity <- getVerbosity + liftIO $ html2pdf verbosity "prince" pdfargs source +makePDF "pdfroff" pdfargs writer opts doc = do + source <- writer opts doc + let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", + "--no-toc-relocation"] ++ pdfargs + verbosity <- getVerbosity + liftIO $ ms2pdf verbosity args source +makePDF program pdfargs writer opts doc = do + let withTemp = if takeBaseName program == "context" + then withTempDirectory "." + else withTempDir + commonState <- getCommonState + verbosity <- getVerbosity + liftIO $ withTemp "tex2pdf." $ \tmpdir -> do + source <- runIOorExplode $ do + putCommonState commonState + doc' <- handleImages tmpdir doc + writer opts doc' + case takeBaseName program of + "context" -> context2pdf verbosity tmpdir source + prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] + -> tex2pdf' verbosity pdfargs tmpdir program source + _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program -handleImages :: WriterOptions - -> FilePath -- ^ temp dir to store images +handleImages :: FilePath -- ^ temp dir to store images -> Pandoc -- ^ document - -> IO Pandoc -handleImages opts tmpdir = walkM (convertImages tmpdir) <=< walkM (handleImage' opts tmpdir) - -handleImage' :: WriterOptions - -> FilePath - -> Inline - -> IO Inline -handleImage' opts tmpdir (Image attr ils (src,tit)) = do - exists <- doesFileExist src - if exists - then return $ Image attr ils (src,tit) - else do - res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src - case res of - Right (contents, Just mime) -> do - let ext = fromMaybe (takeExtension src) $ - extensionFromMimeType mime - let basename = showDigest $ sha1 $ BL.fromChunks [contents] - let fname = tmpdir </> basename <.> ext - BS.writeFile fname contents - return $ Image attr ils (fname,tit) - _ -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - -- return alt text - return $ Emph ils -handleImage' _ _ x = return x + -> PandocIO Pandoc +handleImages tmpdir doc = + fillMediaBag doc >>= + extractMedia tmpdir >>= + walkM (convertImages tmpdir) -convertImages :: FilePath -> Inline -> IO Inline +convertImages :: FilePath -> Inline -> PandocIO Inline convertImages tmpdir (Image attr ils (src, tit)) = do - img <- convertImage tmpdir src + img <- liftIO $ convertImage tmpdir src newPath <- case img of - Left e -> src <$ warn e + Left e -> do + report $ CouldNotConvertImage src e + return src Right fp -> return fp return (Image attr ils (newPath, tit)) convertImages _ x = return x @@ -152,29 +170,43 @@ convertImage tmpdir fname = Just "image/png" -> doNothing Just "image/jpeg" -> doNothing Just "application/pdf" -> doNothing + Just "image/svg+xml" -> E.catch (do + (exit, _) <- pipeProcess Nothing "rsvg-convert" + ["-f","pdf","-a","-o",pdfOut,fname] BL.empty + if exit == ExitSuccess + then return $ Right pdfOut + else return $ Left "conversion from SVG failed") + (\(e :: E.SomeException) -> return $ Left $ + "check that rsvg2pdf is in path.\n" ++ + show e) _ -> JP.readImage fname >>= \res -> case res of - Left _ -> return $ Left $ "Unable to convert `" ++ - fname ++ "' for use with pdflatex." + Left e -> return $ Left e Right img -> - E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ + E.catch (Right pngOut <$ JP.savePngImage pngOut img) $ \(e :: E.SomeException) -> return (Left (show e)) where - fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir + pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir + pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: Bool -- ^ Verbose output +tex2pdf' :: Verbosity -- ^ Verbosity level -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program - -> String -- ^ tex source + -> Text -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' verbose args tmpDir program source = do - let numruns = if "\\tableofcontents" `isInfixOf` source +tex2pdf' verbosity args tmpDir program source = do + let numruns = if "\\tableofcontents" `T.isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source + (exit, log', mbPdf) <- E.catch + (runTeXProgram verbosity program args 1 numruns tmpDir source) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError program + else E.throwIO e) case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -182,11 +214,26 @@ tex2pdf' verbose args tmpDir program source = do case logmsg of x | "! Package inputenc Error" `BC.isPrefixOf` x && program /= "xelatex" - -> "\nTry running pandoc with --latex-engine=xelatex." + -> "\nTry running pandoc with --pdf-engine=xelatex." _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf + (ExitSuccess, Just pdf) -> do + missingCharacterWarnings verbosity log' + return $ Right pdf + +missingCharacterWarnings :: Verbosity -> ByteString -> IO () +missingCharacterWarnings verbosity log' = do + let ls = BC.lines log' + let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " + let warnings = [ UTF8.toStringLazy (BC.drop 19 l) + | l <- ls + , isMissingCharacterWarning l + ] + runIO $ do + setVerbosity verbosity + mapM_ (report . MissingCharacter) warnings + return () -- parsing output @@ -212,12 +259,12 @@ extractConTeXtMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String - -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbose program args runNumber numRuns tmpDir source = do +runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath + -> Text -> IO (ExitCode, ByteString, Maybe ByteString) +runTeXProgram verbosity program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file - unless exists $ UTF8.writeFile file source + unless exists $ BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir @@ -234,7 +281,7 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when (verbose && runNumber == 1) $ do + when (verbosity >= INFO && runNumber == 1) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" @@ -244,16 +291,15 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do mapM_ print env'' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" - B.readFile file' >>= B.putStr + BL.readFile file' >>= BL.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty - when verbose $ do + (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty + when (verbosity >= INFO) $ do putStrLn $ "[makePDF] Run #" ++ show runNumber - B.hPutStr stdout out - B.hPutStr stderr err + BL.hPutStr stdout out putStr "\n" if runNumber <= numRuns - then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source + then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile @@ -261,36 +307,75 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - return (exit, out <> err, pdf) + -- Note that some things like Missing character warnings + -- appear in the log but not on stderr, so we prefer the log: + let logFile = replaceExtension file ".log" + logExists <- doesFileExist logFile + log' <- if logExists + then BL.readFile logFile + else return out + return (exit, log', pdf) + +ms2pdf :: Verbosity + -> [String] + -> Text + -> IO (Either ByteString ByteString) +ms2pdf verbosity args source = do + env' <- getEnvironment + when (verbosity >= INFO) $ do + putStrLn "[makePDF] Command line:" + putStrLn $ "pdfroff " ++ " " ++ unwords (map show args) + putStr "\n" + putStrLn "[makePDF] Environment:" + mapM_ print env' + putStr "\n" + putStrLn "[makePDF] Contents:\n" + putStr $ T.unpack source + putStr "\n" + (exit, out) <- E.catch + (pipeProcess (Just env') "pdfroff" args + (BL.fromStrict $ UTF8.fromText source)) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError "pdfroff" + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out + putStr "\n" + return $ case exit of + ExitFailure _ -> Left out + ExitSuccess -> Right out -html2pdf :: Bool -- ^ Verbose output - -> [String] -- ^ Args to wkhtmltopdf - -> String -- ^ HTML5 source +html2pdf :: Verbosity -- ^ Verbosity level + -> String -- ^ Program (wkhtmltopdf, weasyprint or prince) + -> [String] -- ^ Args to program + -> Text -- ^ HTML5 source -> IO (Either ByteString ByteString) -html2pdf verbose args source = do - file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp +html2pdf verbosity program args source = do pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp - UTF8.writeFile file source - let programArgs = args ++ [file, pdfFile] + let pdfFileArgName = ["-o" | program == "prince"] + let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile] env' <- getEnvironment - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] Command line:" - putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) + putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" mapM_ print env' putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + putStrLn "[makePDF] Contents of intermediate HTML:" + TextIO.putStr source putStr "\n" - (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf" - programArgs BL.empty - removeFile file - when verbose $ do - B.hPutStr stdout out - B.hPutStr stderr err + (exit, out) <- E.catch + (pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError program + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out putStr "\n" pdfExists <- doesFileExist pdfFile mbPdf <- if pdfExists @@ -298,23 +383,22 @@ html2pdf verbose args source = do -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. then do - res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile removeFile pdfFile return res else return Nothing - let log' = out <> err return $ case (exit, mbPdf) of - (ExitFailure _, _) -> Left log' + (ExitFailure _, _) -> Left out (ExitSuccess, Nothing) -> Left "" (ExitSuccess, Just pdf) -> Right pdf -context2pdf :: Bool -- ^ Verbose output +context2pdf :: Verbosity -- ^ Verbosity level -> FilePath -- ^ temp directory for output - -> String -- ^ ConTeXt source + -> Text -- ^ ConTeXt source -> IO (Either ByteString ByteString) -context2pdf verbose tmpDir source = inDirectory tmpDir $ do +context2pdf verbosity tmpDir source = inDirectory tmpDir $ do let file = "input.tex" - UTF8.writeFile file source + BS.writeFile file $ UTF8.fromText source #ifdef _WINDOWS -- note: we want / even on Windows, for TexLive let tmpDir' = changePathSeparators tmpDir @@ -323,27 +407,26 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do #endif let programArgs = "--batchmode" : [file] env' <- getEnvironment - let sep = [searchPathSeparator] - let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++) - $ lookup "TEXINPUTS" env' - let env'' = ("TEXINPUTS", texinputs) : - [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when verbose $ do + when (verbosity >= INFO) $ do putStrLn "[makePDF] temp dir:" putStrLn tmpDir' putStrLn "[makePDF] Command line:" putStrLn $ "context" ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" - mapM_ print env'' + mapM_ print env' putStr "\n" putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr + BL.readFile file >>= BL.putStr putStr "\n" - (exit, out, err) <- pipeProcess (Just env'') "context" programArgs BL.empty - when verbose $ do - B.hPutStr stdout out - B.hPutStr stderr err + (exit, out) <- E.catch + (pipeProcess (Just env') "context" programArgs BL.empty) + (\(e :: IOError) -> if isDoesNotExistError e + then E.throwIO $ + PandocPDFProgramNotFoundError "context" + else E.throwIO e) + when (verbosity >= INFO) $ do + BL.hPutStr stdout out putStr "\n" let pdfFile = replaceExtension file ".pdf" pdfExists <- doesFileExist pdfFile @@ -351,13 +434,11 @@ context2pdf verbose tmpDir source = inDirectory tmpDir $ do -- We read PDF as a strict bytestring to make sure that the -- temp directory is removed on Windows. -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing - let log' = out <> err case (exit, mbPdf) of (ExitFailure _, _) -> do - let logmsg = extractConTeXtMsg log' + let logmsg = extractConTeXtMsg out return $ Left logmsg (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf - |