summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-12-21 17:22:12 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-12-21 17:22:12 -0800
commit0a768f1cc568722973302554265efa414d097da3 (patch)
treed1ed3391ec43848ce1ff163977c4064f5329893c
parent32d27896cde18de187be3f8a9e7f9c4ef79b58b3 (diff)
Added preliminary support for PDF creation via wkhtmltopdf.
To use this: pandoc -t html5 -o result.pdf (and add `--mathjax` if you have math.)
-rw-r--r--README11
-rw-r--r--pandoc.hs28
-rw-r--r--src/Text/Pandoc/PDF.hs61
3 files changed, 79 insertions, 21 deletions
diff --git a/README b/README
index ebc8f58d4..ea6226d38 100644
--- a/README
+++ b/README
@@ -24,7 +24,7 @@ markup], [Haddock markup], [OPML], [Emacs Org mode], [DocBook],
[FictionBook2], [Textile], [groff man] pages, [Emacs Org mode],
[AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides],
[reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output
-on systems where LaTeX or ConTeXt is installed.
+on systems where LaTeX, ConTeXt, or `wkhtmltopdf` is installed.
Pandoc's enhanced version of Markdown includes syntax for [footnotes],
[tables], flexible [ordered lists], [definition lists], [fenced code blocks],
@@ -189,12 +189,13 @@ any header file. The [`natbib`], [`biblatex`], [`bibtex`], and [`biber`]
packages can optionally be used for [citation rendering]. These are
included with all recent versions of [TeX Live].
-Alternatively, pandoc can use ConTeXt to create a PDF.
+Alternatively, pandoc can use ConTeXt or `wkhtmltopdf` to create a PDF.
To do this, specify an output file with a `.pdf` extension,
-as before, but add `-t context` to the command line.
+as before, but add `-t context` or `-t html5` to the command line.
-PDF output can be controlled using [variables for LaTeX]
-or [variables for ConTeXt].
+PDF output can be controlled using [variables for LaTeX] (if
+LaTeX is used), [variables for ConTeXt] (if ConTeXt is used),
+and `--css` (if `wkhtmltopdf` is used).
[`amsfonts`]: https://ctan.org/pkg/amsfonts
[`amsmath`]: https://ctan.org/pkg/amsmath
diff --git a/pandoc.hs b/pandoc.hs
index cfb9adc1c..a4906382a 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -1168,6 +1168,7 @@ convertWithOpts opts args = do
let laTeXOutput = "latex" `isPrefixOf` writerName' ||
"beamer" `isPrefixOf` writerName'
let conTeXtOutput = "context" `isPrefixOf` writerName'
+ let html5Output = "html5" `isPrefixOf` writerName'
let laTeXInput = "latex" `isPrefixOf` readerName' ||
"beamer" `isPrefixOf` readerName'
@@ -1367,27 +1368,28 @@ convertWithOpts opts args = do
IOByteStringWriter f -> f writerOptions doc' >>= writeBinary
PureStringWriter f
| pdfOutput -> do
- -- make sure writer is latex or beamer
- unless (laTeXOutput || conTeXtOutput) $
+ -- make sure writer is latex or beamer or context or html5
+ unless (laTeXOutput || conTeXtOutput || html5Output) $
err 47 $ "cannot produce pdf output with " ++ writerName' ++
" writer"
- let texprog = if conTeXtOutput
- then "context"
- else latexEngine
- -- check for latex program
- mbLatex <- findExecutable texprog
- when (isNothing mbLatex) $
- err 41 $ texprog ++ " not found. " ++
- texprog ++ " is needed for pdf output."
-
- res <- makePDF texprog f writerOptions doc'
+ let pdfprog = case () of
+ _ | conTeXtOutput -> "context"
+ _ | html5Output -> "wkhtmltopdf"
+ _ -> latexEngine
+ -- check for pdf creating program
+ mbPdfProg <- findExecutable pdfprog
+ when (isNothing mbPdfProg) $
+ err 41 $ pdfprog ++ " not found. " ++
+ pdfprog ++ " is needed for pdf output."
+
+ res <- makePDF pdfprog f writerOptions doc'
case res of
Right pdf -> writeBinary pdf
Left err' -> do
B.hPutStr stderr err'
B.hPut stderr $ B.pack [10]
- err 43 "Error producing PDF from TeX source"
+ err 43 "Error producing PDF"
| otherwise -> selfcontain (f writerOptions doc' ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 0e533ede8..72b563044 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -38,6 +38,7 @@ import Text.Pandoc.Compat.Monoid ((<>))
import System.Exit (ExitCode (..))
import System.FilePath
import System.IO (stderr, stdout)
+import System.IO.Temp (withTempFile)
import System.Directory
import Data.Digest.Pure.SHA (showDigest, sha1)
import System.Environment
@@ -49,7 +50,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory)
-import Text.Pandoc.Options (WriterOptions(..))
+import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
import Text.Pandoc.Process (pipeProcess)
import qualified Data.ByteString.Lazy as BL
@@ -63,18 +64,29 @@ changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
#endif
-makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
+makePDF :: String -- ^ pdf creator (pdflatex, lualatex,
+ -- xelatex, context, wkhtmltopdf)
-> (WriterOptions -> Pandoc -> String) -- ^ writer
-> WriterOptions -- ^ options
-> Pandoc -- ^ document
-> IO (Either ByteString ByteString)
+makePDF "wkhtmltopdf" writer opts doc =
+ wkhtml2pdf (writerVerbose opts) args source
+ where args = case writerHTMLMathMethod opts of
+ -- with MathJax, wait til all math is rendered:
+ MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
+ "--window-status", "mathjax_loaded"]
+ _ -> []
+ source = writer opts doc
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
args = writerLaTeXArgs opts
case program of
"context" -> context2pdf (writerVerbose opts) tmpdir source
- _ -> tex2pdf' (writerVerbose opts) args tmpdir program source
+ _ | program `elem` ["pdflatex", "lualatex", "xelatex"]
+ -> tex2pdf' (writerVerbose opts) args tmpdir program source
+ _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
@@ -235,6 +247,49 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do
else return Nothing
return (exit, out <> err, pdf)
+wkhtml2pdf :: Bool -- ^ Verbose output
+ -> [String] -- ^ Args to wkhtmltopdf
+ -> String -- ^ HTML5 source
+ -> IO (Either ByteString ByteString)
+wkhtml2pdf verbose args source = do
+ file <- withTempFile "." "wkhtml2pdf.html" $ \fp _ -> return fp
+ pdfFile <- withTempFile "." "wkhtml2pdf.pdf" $ \fp _ -> return fp
+ UTF8.writeFile file source
+ let programArgs = args ++ [file, pdfFile]
+ env' <- getEnvironment
+ when verbose $ do
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env'
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
+ B.readFile file >>= B.putStr
+ putStr "\n"
+ (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf"
+ programArgs BL.empty
+ removeFile file
+ when verbose $ do
+ B.hPutStr stdout out
+ B.hPutStr stderr err
+ putStr "\n"
+ pdfExists <- doesFileExist pdfFile
+ mbPdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then do
+ res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ removeFile pdfFile
+ return res
+ else return Nothing
+ let log' = out <> err
+ return $ case (exit, mbPdf) of
+ (ExitFailure _, _) -> Left log'
+ (ExitSuccess, Nothing) -> Left ""
+ (ExitSuccess, Just pdf) -> Right pdf
+
context2pdf :: Bool -- ^ Verbose output
-> FilePath -- ^ temp directory for output
-> String -- ^ ConTeXt source