summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Main.hs106
-rw-r--r--README27
-rw-r--r--Text/Pandoc/PDF.hs202
-rw-r--r--pandoc.cabal1
4 files changed, 176 insertions, 160 deletions
diff --git a/Main.hs b/Main.hs
index 2be8c2699..5fbffed8e 100644
--- a/Main.hs
+++ b/Main.hs
@@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Main
Copyright : Copyright (C) 2006-8 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley@edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Parses command-line options and calls the appropriate readers and
@@ -32,6 +32,7 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
+import Text.Pandoc.PDF
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
@@ -98,7 +99,7 @@ readers = [("native" , readPandoc)
-- | Reader for native Pandoc format.
readPandoc :: ParserState -> String -> Pandoc
readPandoc _ input = read input
-
+
-- | Association list of formats and pairs of writers and default headers.
writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
writers = [("native" , (writeDoc, ""))
@@ -108,6 +109,7 @@ writers = [("native" , (writeDoc, ""))
,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
,("latex" , (writeLaTeX, defaultLaTeXHeader))
+ ,("pdf" , (writeLaTeX, defaultLaTeXHeader))
,("context" , (writeConTeXt, defaultConTeXtHeader))
,("texinfo" , (writeTexinfo, ""))
,("man" , (writeMan, ""))
@@ -118,8 +120,7 @@ writers = [("native" , (writeDoc, ""))
]
isNonTextOutput :: String -> Bool
-isNonTextOutput "odt" = True
-isNonTextOutput _ = False
+isNonTextOutput = (`elem` ["odt", "pdf"])
-- | Writer for Pandoc native format.
writeDoc :: WriterOptions -> Pandoc -> String
@@ -205,7 +206,7 @@ options =
(\arg opt -> return opt { optWriter = map toLower arg })
"FORMAT")
"" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")")
-
+
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))
@@ -250,7 +251,7 @@ options =
, Option "m" ["asciimathml"]
(OptArg
- (\arg opt -> return opt { optHTMLMathMethod =
+ (\arg opt -> return opt { optHTMLMathMethod =
ASCIIMathML arg })
"URL")
"" -- "Use ASCIIMathML script in html output"
@@ -290,13 +291,13 @@ options =
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))
- "" -- "Include table of contents"
+ "" -- "Include table of contents"
, Option "c" ["css"]
(ReqArg
- (\arg opt -> do
+ (\arg opt -> do
let old = optCSS opt
- return opt { optCSS = old ++ [arg],
+ return opt { optCSS = old ++ [arg],
optStandalone = True })
"CSS")
"" -- "Link to CSS style sheet"
@@ -340,17 +341,17 @@ options =
, Option "T" ["title-prefix"]
(ReqArg
- (\arg opt -> return opt { optTitlePrefix = arg,
+ (\arg opt -> return opt { optTitlePrefix = arg,
optStandalone = True })
"STRING")
"" -- "String to prefix to HTML window title"
-
+
, Option "D" ["print-default-header"]
(ReqArg
(\arg _ -> do
let header = case (lookup arg writers) of
Just (_, h) -> h
- Nothing -> error ("Unknown reader: " ++ arg)
+ Nothing -> error ("Unknown reader: " ++ arg)
hPutStr stdout header
exitWith ExitSuccess)
"FORMAT")
@@ -376,7 +377,7 @@ options =
(NoArg
(\opt -> return opt { optIgnoreArgs = True }))
"" -- "Ignore command-line arguments."
-
+
, Option "v" ["version"]
(NoArg
(\_ -> do
@@ -398,15 +399,15 @@ options =
-- Returns usage message
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName opts = usageInfo
- (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
- (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++
+ (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
+ (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++
(joinWithSep ", " $ map fst writers) ++ "\nOptions:")
opts
-
+
-- Determine default reader based on source file extensions
defaultReaderName :: [FilePath] -> String
defaultReaderName [] = "markdown"
-defaultReaderName (x:xs) =
+defaultReaderName (x:xs) =
case takeExtension (map toLower x) of
".xhtml" -> "html"
".html" -> "html"
@@ -441,6 +442,7 @@ defaultWriterName x =
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
+ ".pdf" -> "pdf"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
@@ -464,7 +466,7 @@ main = do
else
return ()
- let defaultOpts' = if compatMode
+ let defaultOpts' = if compatMode
then defaultOpts { optReader = "markdown"
, optWriter = "html"
, optStrict = True }
@@ -513,11 +515,11 @@ main = do
let sources = if ignoreArgs then [] else args
-- assign reader and writer based on options and filenames
- let readerName' = if null readerName
+ let readerName' = if null readerName
then defaultReaderName sources
else readerName
- let writerName' = if null writerName
+ let writerName' = if null writerName
then defaultWriterName outputFile
else writerName
@@ -539,74 +541,76 @@ main = do
-- remove DOS line endings
tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs)
tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs)
- tabFilter spsToNextStop ('\t':xs) =
+ tabFilter spsToNextStop ('\t':xs) =
if preserveTabs
- then '\t':(tabFilter tabStop xs)
- else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
- tabFilter 1 (x:xs) =
+ then '\t':(tabFilter tabStop xs)
+ else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
+ tabFilter 1 (x:xs) =
x:(tabFilter tabStop xs)
- tabFilter spsToNextStop (x:xs) =
+ tabFilter spsToNextStop (x:xs) =
x:(tabFilter (spsToNextStop - 1) xs)
- let standalone' = (standalone && not strict) || writerName' == "odt"
+ let standalone' = (standalone && not strict) || isNonTextOutput writerName'
#ifdef _CITEPROC
refs <- if null modsFile then return [] else readModsColletionFile modsFile
#endif
- let startParserState =
+ let startParserState =
defaultParserState { stateParseRaw = parseRaw,
- stateTabStop = tabStop,
+ stateTabStop = tabStop,
stateSanitizeHTML = sanitize,
stateStandalone = standalone',
#ifdef _CITEPROC
stateCitations = map citeKey refs,
#endif
- stateSmart = smart || writerName' `elem`
+ stateSmart = smart || writerName' `elem`
["latex", "context"],
stateColumns = columns,
stateStrict = strict }
let csslink = if null css
then ""
- else concatMap
+ else concatMap
(\f -> "<link rel=\"stylesheet\" href=\"" ++
f ++ "\" type=\"text/css\" media=\"all\" />\n")
css
- let header = (if (customHeader == "DEFAULT")
+ let header = (if (customHeader == "DEFAULT")
then defaultHeader
else customHeader) ++ csslink ++ includeHeader
let writerOptions = WriterOptions { writerStandalone = standalone',
- writerHeader = header,
+ writerHeader = header,
writerTitlePrefix = titlePrefix,
- writerTabStop = tabStop,
+ writerTabStop = tabStop,
writerTableOfContents = toc &&
(not strict) &&
writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
writerS5 = (writerName' == "s5"),
writerIgnoreNotes = False,
- writerIncremental = incremental,
+ writerIncremental = incremental,
writerNumberSections = numberSections,
- writerIncludeBefore = includeBefore,
+ writerIncludeBefore = includeBefore,
writerIncludeAfter = includeAfter,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap }
- let writeOutput = if writerName' == "odt"
- then if outputFile == "-"
- then \_ -> do
- hPutStrLn stderr ("Error: Cannot write " ++ writerName ++
- " output to stdout.\n" ++
- "Specify an output file using the -o option.")
- exitWith $ ExitFailure 5
- else let sourceDirRelative = if null sources
- then ""
- else takeDirectory (head sources)
- in saveOpenDocumentAsODT outputFile sourceDirRelative
- else if outputFile == "-"
- then putStrLn
- else writeFile outputFile . (++ "\n")
+ if isNonTextOutput writerName' && outputFile == "-"
+ then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option.")
+ exitWith $ ExitFailure 5
+ else return ()
+
+ let sourceDirRelative = if null sources
+ then ""
+ else takeDirectory (head sources)
+
+ let writeOutput = case writerName' of
+ "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative
+ "pdf" -> saveLaTeXAsPDF outputFile sourceDirRelative
+ _ -> if outputFile == "-"
+ then putStrLn
+ else writeFile outputFile . (++ "\n")
fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n")
(readSources sources) >>=
@@ -615,7 +619,7 @@ main = do
#endif
writeOutput . writer writerOptions
- where
+ where
readSources [] = mapM readSource ["-"]
readSources sources = mapM readSource sources
readSource "-" = getContents
diff --git a/README b/README
index 62837543c..3cc3c2b58 100644
--- a/README
+++ b/README
@@ -82,11 +82,28 @@ To convert `hello.html` from html to markdown:
pandoc -f html -t markdown hello.html
Supported output formats include `markdown`, `latex`, `context`
-(ConTeXt), `html`, `rtf` (rich text format), `rst` (reStructuredText),
-`docbook` (DocBook XML), `opendocument` (OpenDocument XML), `odt`
-(OpenOffice text document), `texinfo`, (GNU Texinfo), `mediawiki`
-(MediaWiki markup), `man` (groff man), and `s5` (which produces an
-HTML file that acts like powerpoint).
+(ConTeXt), `pdf`, `html`, `rtf` (rich text format), `rst`
+(reStructuredText), `docbook` (DocBook XML), `opendocument`
+(OpenDocument XML), `odt` (OpenOffice text document), `texinfo`, (GNU
+Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
+(which produces an HTML file that acts like powerpoint).
+
+Notes:
+
+- For `odt` output, you must have `zip` in the path. If you
+ don't have it installed, you can get the free [Info-ZIP].
+
+- For `pdf` output, you must have `pdflatex` and `bibtex` in the
+ path. You should also have the following LaTeX packages installed:
+ `unicode`, `fancyhdr` (if you have verbatim text in footnotes),
+ `graphicx` (if you use images), `array` (if you use tables), and
+ `ulem` (if you use strikeout text). If they are not already included
+ in your LaTeX distribution, you can get them from [CTAN]. A full
+ [TeX Live] or [MacTeX] distribution will have all of these packages.
+
+ [Info-ZIP]: http://www.info-zip.org/Zip.html
+ [TeX Live]: http://www.tug.org/texlive/
+ [MacTeX]: http://www.tug.org/mactex/
Supported input formats include `markdown`, `html`, `latex`, and `rst`.
Note that the `rst` reader only parses a subset of reStructuredText
diff --git a/Text/Pandoc/PDF.hs b/Text/Pandoc/PDF.hs
index 5b900bf03..fa1e6dcf5 100644
--- a/Text/Pandoc/PDF.hs
+++ b/Text/Pandoc/PDF.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE TemplateHaskell, CPP #-}
+{-# LANGUAGE CPP #-}
{-
Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.ODT
+ Module : Text.Pandoc.PDF
Copyright : Copyright (C) 2006-7 John MacFarlane
License : GNU GPL, version 2 or above
@@ -26,119 +26,113 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-Functions for producing an ODT file from OpenDocument XML.
+Functions for producing a PDF file from LaTeX.
-}
-module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Text.Pandoc.TH ( binaryContentsOf )
-import Data.Maybe ( fromJust )
-import Data.List ( partition, intersperse )
+module Text.Pandoc.PDF ( saveLaTeXAsPDF ) where
+import Data.List ( isInfixOf )
import System.Directory
-import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
+import System.FilePath ( (</>), (<.>), takeBaseName )
import System.Process ( runProcess, waitForProcess )
import System.Exit
-import Text.XML.Light
-import Text.XML.Light.Cursor
+import System.Environment ( getEnvironment )
import Text.Pandoc.Shared ( withTempDir )
-import Network.URI ( isURI )
-import qualified Data.ByteString as B ( writeFile, pack )
-import Data.ByteString.Internal ( c2w )
-import Prelude hiding ( writeFile, readFile )
-import System.IO ( stderr )
+import Prelude hiding ( writeFile, readFile, putStrLn )
+import System.IO ( stderr, openFile, IOMode (..) )
#ifdef _UTF8STRING
import System.IO.UTF8
#else
import Text.Pandoc.UTF8
#endif
--- | Produce an ODT file from OpenDocument XML.
-saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
- -> FilePath -- ^ Relative directory of source file.
- -> String -- ^ OpenDocument XML contents.
- -> IO ()
-saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
- let zipCmd = "zip"
- -- check for zip in path:
- zipPathMaybe <- findExecutable zipCmd
- let zipPath = case zipPathMaybe of
- Nothing -> error $ "The '" ++ zipCmd ++
- "' command, which is needed to build an ODT file, was not found.\n" ++
- "It can be obtained from http://www.info-zip.org/Zip.html\n" ++
- "Debian (and Debian-based) linux: apt-get install zip\n" ++
- "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
+-- | Produce an PDF file from LaTeX.
+saveLaTeXAsPDF :: FilePath -- ^ Pathname of PDF file to be produced.
+ -> FilePath -- ^ Relative directory of source file.
+ -> String -- ^ LaTeX document.
+ -> IO ()
+saveLaTeXAsPDF destinationPDFPath sourceDirRelative latex = do
+ -- check for pdflatex and bibtex in path:
+ latexPathMaybe <- findExecutable "pdflatex"
+ bibtexPathMaybe <- findExecutable "bibtex"
+ let latexPath = case latexPathMaybe of
+ Nothing -> error $ "The 'pdflatex' command, which is needed to build an PDF file, was not found."
Just x -> x
- withTempDir "pandoc-odt" $ \tempDir -> do
- let tempODT = tempDir </> "reference.odt"
- B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
- xml' <- handlePictures tempODT sourceDirRelative xml
- writeFile (tempDir </> "content.xml") xml'
- ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
- (Just tempDir) Nothing Nothing Nothing (Just stderr)
- ec <- waitForProcess ph -- requires compilation with -threaded
- case ec of
- ExitSuccess -> copyFile tempODT destinationODTPath
- _ -> error "Error creating ODT." >> exitWith ec
-
--- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
--- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes
--- to point to the new locations in Pictures/. Return modified XML.
-handlePictures :: FilePath -- ^ Path of ODT file in temp directory
- -> FilePath -- ^ Directory (relative) containing source file
- -> String -- ^ OpenDocument XML string
- -> IO String -- ^ Modified XML
-handlePictures tempODT sourceDirRelative xml = do
- let parsed = case parseXMLDoc xml of
- Nothing -> error "Could not parse OpenDocument XML."
- Just x -> x
- let cursor = case (fromForest $ elContent parsed) of
- Nothing -> error "ODT appears empty"
- Just x -> x
- cursor' <- scanPictures tempODT sourceDirRelative cursor
- let modified = parsed { elContent = toForest $ root cursor' }
- return $ showTopElement modified
-
-scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
-scanPictures tempODT sourceDirRelative cursor = do
- cursor' <- handleTree tempODT sourceDirRelative cursor
- case right cursor' of
- Just n -> scanPictures tempODT sourceDirRelative n
- Nothing -> return cursor'
-
-handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
-handleTree tempODT sourceDirRelative cursor = do
- case firstChild cursor of
- Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
- Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
+ let bibtexPath = case bibtexPathMaybe of
+ Nothing -> error $ "The 'bibtex' command, which is needed to build an PDF file, was not found."
+ Just x -> x
+ sourceDirAbsolute <- getCurrentDirectory >>= return . (</> sourceDirRelative) >>= canonicalizePath
+ withTempDir "pandoc-pdf" $ \tempDir -> do
+ env <- getEnvironment
+ let env' = ("TEXINPUTS", ".:" ++ sourceDirAbsolute ++ ":") : env
+ let baseName = "input"
+ writeFile (tempDir </> baseName <.> "tex") latex
+ let runLatex = runProgram latexPath ["-interaction=nonstopmode", baseName] tempDir env'
+ let runBibtex = runProgram bibtexPath [baseName] tempDir env'
+ messages1 <- runLatex
+ let logPath = tempDir </> baseName <.> "log"
+ tocExists <- doesFileExist (tempDir </> baseName <.> "toc")
+ logContents <- readFile logPath
+ let undefinedRefs = "There were undefined references" `isInfixOf` logContents
+ let needsBibtex = "itation" `isInfixOf` logContents
+ if needsBibtex
+ then runBibtex >>= hPutStr stderr . unlines
+ else return ()
+ if tocExists || undefinedRefs
+ then do
+ messages2 <- runLatex
+ logContents' <- readFile logPath
+ let stillUndefinedRefs = "There were undefined references" `isInfixOf` logContents'
+ if stillUndefinedRefs
+ then runLatex >>= hPutStr stderr . unlines
+ else hPutStr stderr $ unlines messages2
+ else
+ hPutStr stderr $ unlines messages1
+ let pdfPath = tempDir </> baseName <.> "pdf"
+ pdfExists <- doesFileExist pdfPath
+ if pdfExists
+ then copyFile pdfPath destinationPDFPath
+ else error "The PDF could not be created."
--- | If content is an image link, handle it appropriately.
--- Otherwise, handle children if any.
-handleContent :: FilePath -> FilePath -> Content -> IO Content
-handleContent tempODT sourceDirRelative content@(Elem el) = do
- if qName (elName el) == "image"
- then do
- let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
- let href = case hrefs of
- [] -> error $ "No href found in " ++ show el
- [x] -> x
- _ -> error $ "Multiple hrefs found in " ++ show el
- if isURI $ attrVal href
- then return content
- else do -- treat as filename
- let oldLoc = sourceDirRelative </> attrVal href
- fileExists <- doesFileExist oldLoc
- if fileExists
- then do
- let pref = take 230 $ concat $ intersperse "_" $
- splitDirectories $ takeDirectory $ attrVal href
- let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href)
- let tempDir = takeDirectory tempODT
- createDirectoryIfMissing False $ tempDir </> takeDirectory newLoc
- copyFile oldLoc $ tempDir </> newLoc
- let newAttrs = (href { attrVal = newLoc }) : rest
- return $ Elem (el { elAttribs = newAttrs })
- else do
- hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring."
- return content
- else return content
-
-handleContent _ _ c = return c -- not Element
+runProgram :: FilePath -- ^ pathname of executable
+ -> [String] -- ^ arguments
+ -> FilePath -- ^ working directory
+ -> [(String, String)] -- ^ environment
+ -> IO [String]
+runProgram cmdPath arguments workingDir env = do
+ let runOutputPath = workingDir </> "output" <.> "tmp"
+ runOutput <- openFile runOutputPath WriteMode
+ ph <- runProcess cmdPath arguments (Just workingDir) (Just env) Nothing (Just runOutput) (Just runOutput)
+ ec <- waitForProcess ph -- requires compilation with -threaded
+ case ec of
+ ExitSuccess -> return []
+ _ -> do
+ output <- readFile runOutputPath
+ if (takeBaseName cmdPath) == "bibtex"
+ then return $! lines output
+ else do
+ return $!
+ (if "`ucs.sty' not found" `isInfixOf` output
+ then ["Please install the 'unicode' package from CTAN:",
+ " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"]
+ else []) ++
+ (if "`ulem.sty' not found" `isInfixOf` output
+ then ["Please install the 'ulem' package from CTAN:",
+ " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/"]
+ else []) ++
+ (if "`graphicx.sty' not found" `isInfixOf` output
+ then ["Please install the 'graphicx' package from CTAN:",
+ " http://www.ctan.org/tex-archive/macros/latex/required/graphics/"]
+ else []) ++
+ (if "`fancyhdr.sty' not found" `isInfixOf` output
+ then ["Please install the 'fancyhdr' package from CTAN:",
+ " http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/"]
+ else []) ++
+ (if "`array.sty' not found" `isInfixOf` output
+ then ["Please install the 'array' package from CTAN:",
+ " http://www.ctan.org/tex-archive/macros/latex/required/tools/"]
+ else []) ++
+ (filter isUseful $ lines output)
+ where isUseful ln = take 1 ln == "!" ||
+ take 2 ln == "l." ||
+ "Error" `isInfixOf` ln ||
+ "error" `isInfixOf` ln
diff --git a/pandoc.cabal b/pandoc.cabal
index 1c39a2208..1871275c8 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -165,6 +165,7 @@ Library
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.ODT,
+ Text.Pandoc.PDF,
Text.Pandoc.ASCIIMathML,
Text.Pandoc.DefaultHeaders,
Text.Pandoc.Highlighting,