summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-31 23:16:02 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-31 23:16:02 +0000
commitd072ad4b66b5cdf5a8f811ae1f78460e34270d58 (patch)
tree6f77dde12c4d5824434e0961f71c9f38f19be4cd
parent504a61a97b9dbd0b55b2b06fd2c1f547d71a1fa1 (diff)
Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument writer into an ODT archive. This replaces markdown2odt. + Added odt output option to Main.hs. + Added default for .odt output file. + Changed defaults so that .xml and .sgml aren't automatically DocBook. + Added odt writer to Text.Pandoc exports. + Added Text.Pandoc.ODT and included in pandoc.cabal. + Added reference.odt as data-file in pandoc.cabal. + Handle picture links in OpenDocument files using xml library. + Removed markdown2odt and references from Makefile, README, man. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--Main.hs46
-rw-r--r--Makefile7
-rw-r--r--README26
-rw-r--r--Text/Pandoc/ODT.hs141
-rw-r--r--Text/Pandoc/Shared.hs26
-rw-r--r--man/man1/markdown2odt.1.md52
-rw-r--r--pandoc.cabal15
-rw-r--r--wrappers/markdown2odt.in41
8 files changed, 210 insertions, 144 deletions
diff --git a/Main.hs b/Main.hs
index 87ae17ce6..423cf35dc 100644
--- a/Main.hs
+++ b/Main.hs
@@ -31,11 +31,12 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.UTF8
+import Text.Pandoc.ODT
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
-import System.FilePath ( takeExtension )
+import System.FilePath ( takeExtension, takeDirectory )
import System.Console.GetOpt
import System.IO
import Data.Maybe ( fromMaybe )
@@ -82,6 +83,7 @@ writers = [("native" , (writeDoc, ""))
,("s5" , (writeS5String, defaultS5Header))
,("docbook" , (writeDocbook, defaultDocbookHeader))
,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
+ ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
,("latex" , (writeLaTeX, defaultLaTeXHeader))
,("context" , (writeConTeXt, defaultConTeXtHeader))
,("texinfo" , (writeTexinfo, ""))
@@ -92,6 +94,10 @@ writers = [("native" , (writeDoc, ""))
,("rtf" , (writeRTF, defaultRTFHeader))
]
+isNonTextOutput :: String -> Bool
+isNonTextOutput "odt" = True
+isNonTextOutput _ = False
+
-- | Writer for Pandoc native format.
writeDoc :: WriterOptions -> Pandoc -> String
writeDoc _ = prettyPandoc
@@ -392,8 +398,7 @@ defaultWriterName x =
".texi" -> "texinfo"
".texinfo" -> "texinfo"
".db" -> "docbook"
- ".xml" -> "docbook"
- ".sgml" -> "docbook"
+ ".odt" -> "odt"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
@@ -478,10 +483,6 @@ main = do
Just (w,h) -> return (w, h)
Nothing -> error ("Unknown writer: " ++ writerName')
- output <- if (outputFile == "-")
- then return stdout
- else openFile outputFile WriteMode
-
environment <- getEnvironment
let columns = case lookup "COLUMNS" environment of
Just cols -> read cols
@@ -501,11 +502,13 @@ main = do
tabFilter spsToNextStop (x:xs) =
x:(tabFilter (spsToNextStop - 1) xs)
+ let standalone' = (standalone && not strict) || writerName' == "odt"
+
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
stateSanitizeHTML = sanitize,
- stateStandalone = standalone && (not strict),
+ stateStandalone = standalone',
stateSmart = smart || writerName' `elem`
["latex", "context"],
stateColumns = columns,
@@ -519,16 +522,15 @@ main = do
let header = (if (customHeader == "DEFAULT")
then defaultHeader
else customHeader) ++ csslink ++ includeHeader
- let writerOptions = WriterOptions { writerStandalone = standalone &&
- (not strict),
+ let writerOptions = WriterOptions { writerStandalone = standalone',
writerHeader = header,
writerTitlePrefix = titlePrefix,
writerTabStop = tabStop,
writerTableOfContents = toc &&
(not strict) &&
- writerName/="s5",
+ writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
- writerS5 = (writerName=="s5"),
+ writerS5 = (writerName' == "s5"),
writerIgnoreNotes = False,
writerIncremental = incremental,
writerNumberSections = numberSections,
@@ -538,11 +540,25 @@ main = do
writerReferenceLinks = referenceLinks,
writerWrapText = wrap }
- (readSources sources) >>= (hPutStrLn output . toUTF8 .
+ let writeOutput = if writerName' == "odt"
+ then if outputFile == "-"
+ then \_ -> do
+ hPutStrLn stderr ("Error: Cannot write " ++ writerName ++
+ " output to stdout.\n" ++
+ "Specify an output file using the -o option.")
+ exitWith $ ExitFailure 5
+ else let sourceDirRelative = if null sources
+ then ""
+ else takeDirectory (head sources)
+ in saveOpenDocumentAsODT outputFile sourceDirRelative
+ else if outputFile == "-"
+ then putStrLn
+ else writeFile outputFile . (++ "\n")
+
+ (readSources sources) >>= writeOutput . toUTF8 .
(writer writerOptions) .
(reader startParserState) . tabFilter tabStop .
- fromUTF8 . (joinWithSep "\n")) >>
- hClose output
+ fromUTF8 . (joinWithSep "\n")
where
readSources [] = mapM readSource ["-"]
diff --git a/Makefile b/Makefile
index 53dad731a..bc11f6a79 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:\{0,1\}[[:space:]]*//p' $(CABAL))
#-------------------------------------------------------------------------------
# Install targets
#-------------------------------------------------------------------------------
-WRAPPERS := html2markdown markdown2pdf hsmarkdown markdown2odt
+WRAPPERS := html2markdown markdown2pdf hsmarkdown
# Add .exe extensions if we're running Windows/Cygwin.
EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
sed -ne 's/^cygwin.*$$/\.exe/p')
@@ -114,11 +114,6 @@ $(ODTREF): $(addprefix $(ODTSTYLES)/, layout-cache meta.xml styles.xml content.x
cd $(ODTSTYLES) ; \
zip -9 -r $(notdir $@) * -x $(notdir $@)
-ODTREFSH=$(SRCDIR)/wrappers/odtref.sh
-cleanup_files+=$(ODTREFSH)
-$(ODTREFSH): $(ODTREF)
- echo "REFERENCEODT='$(PREFIX)/share/$(DATADIR)/$(notdir $(ODTREF))'" > $@
-
.PHONY: wrappers
wrappers: $(WRAPPERS)
cleanup_files+=$(WRAPPERS)
diff --git a/README b/README
index 1fbdf199b..ef001e5b3 100644
--- a/README
+++ b/README
@@ -119,7 +119,7 @@ then convert the output back to the local encoding.
Shell scripts
=============
-Four shell scripts, `markdown2pdf`, `markdown2odt`, `html2markdown`, and
+Three shell scripts, `markdown2pdf`, `html2markdown`, and
`hsmarkdown`, are included in the standard Pandoc installation. (They
are not included in the Windows binary package, as they require a POSIX
shell, but they may be used in Windows under Cygwin.)
@@ -149,27 +149,7 @@ shell, but they may be used in Windows under Cygwin.)
included in your LaTeX distribution, you can get them from
[CTAN].
-2. `markdown2odt` produces an ODT file from markdown-formatted
- text, using `pandoc` and `pdflatex`. (ODT is "OpenDocument
- Text," the default format for the OpenOffice.org Writer.)
- The default behavior of `markdown2odt` is to create a file with the
- same base name as the first argument and the extension `odt`; thus,
- for example,
-
- markdown2odt sample.txt endnotes.txt
-
- will produce `sample.odt`. (If `sample.odt` exists already,
- it will be backed up before being overwritten.) An output file
- name can be specified explicitly using the `-o` option:
-
- markdown2odt -o book.odt chap1 chap2
-
- If no input file is specified, input will be taken from STDIN.
- All of `pandoc`'s options will work with `markdown2odt` as well.
-
- `markdown2odt` requires `zip`, which must be in the path.
-
-3. `html2markdown` grabs a web page from a file or URL and converts
+2. `html2markdown` grabs a web page from a file or URL and converts
it to markdown-formatted text, using `tidy` and `pandoc`.
All of `pandoc`'s options will work with `html2markdown` as well.
@@ -198,7 +178,7 @@ shell, but they may be used in Windows under Cygwin.)
It uses [`iconv`] for character encoding conversions; if `iconv`
is absent, it will still work, but it will treat everything as UTF-8.
-4. `hsmarkdown` is designed to be used as a drop-in replacement for
+3. `hsmarkdown` is designed to be used as a drop-in replacement for
`Markdown.pl`. It forces `pandoc` to convert from markdown to
HTML, and to use the `--strict` flag for maximal compliance with
official markdown syntax. (All of Pandoc's syntax extensions and
diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs
new file mode 100644
index 000000000..f388515fb
--- /dev/null
+++ b/Text/Pandoc/ODT.hs
@@ -0,0 +1,141 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.ODT
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for producing an ODT file from OpenDocument XML.
+-}
+module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
+import Data.Maybe ( fromJust )
+import Data.List ( partition, intersperse )
+import Prelude hiding ( writeFile, readFile )
+import System.IO.UTF8
+import System.IO ( stderr )
+import System.Directory
+import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
+import System.Process ( runCommand, waitForProcess )
+import System.Exit
+import Text.XML.Light
+import Text.XML.Light.Cursor
+import Text.Pandoc.Shared ( withTempDir )
+import Network.URI ( isURI )
+import Paths_pandoc
+
+-- | Produce an ODT file from OpenDocument XML.
+saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
+ -> FilePath -- ^ Relative directory of source file.
+ -> String -- ^ OpenDocument XML contents.
+ -> IO ()
+saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
+ let zipCmd = "zip"
+ -- check for zip in path:
+ maybeZipPath <- findExecutable zipCmd
+ let zipPath = case maybeZipPath of
+ Just p -> p
+ Nothing -> error $ "The '" ++ zipCmd ++ "' command, which is needed to build an ODT file, " ++
+ "was not found.\n" ++
+ "It can be obtained from http://www.info-zip.org/Zip.html\n" ++
+ "Debian (and Debian-based) linux: apt-get install zip\n" ++
+ "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
+ referenceODTPath <- getDataFileName "reference.odt"
+ withTempDir "pandoc-odt" $ \tempDir -> do
+ let tempODT = tempDir </> "reference.odt"
+ copyFile referenceODTPath tempODT
+ createDirectory $ tempDir </> "Pictures"
+ xml' <- handlePictures tempODT sourceDirRelative xml
+ writeFile (tempDir </> "content.xml") xml'
+ oldDir <- getCurrentDirectory
+ setCurrentDirectory tempDir
+ let zipCmdLine = zipPath ++ " -9 -q -r " ++ tempODT ++ " " ++ "content.xml Pictures"
+ ec <- runCommand zipCmdLine >>= waitForProcess -- this requires compilation with -threaded
+ setCurrentDirectory oldDir
+ case ec of
+ ExitSuccess -> copyFile tempODT destinationODTPath
+ _ -> error "Error creating ODT." >> exitWith ec
+
+
+-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
+-- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes
+-- to point to the new locations in Pictures/. Return modified XML.
+handlePictures :: FilePath -- ^ Path of ODT file in temp directory
+ -> FilePath -- ^ Directory (relative) containing source file
+ -> String -- ^ OpenDocument XML string
+ -> IO String -- ^ Modified XML
+handlePictures tempODT sourceDirRelative xml = do
+ let parsed = case parseXMLDoc xml of
+ Nothing -> error "Could not parse OpenDocument XML."
+ Just x -> x
+ let cursor = case (fromForest $ elContent parsed) of
+ Nothing -> error "ODT appears empty"
+ Just x -> x
+ cursor' <- scanPictures tempODT sourceDirRelative cursor
+ let modified = parsed { elContent = toForest $ root cursor' }
+ putStrLn $ showTopElement modified
+ return $ showTopElement modified
+
+scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
+scanPictures tempODT sourceDirRelative cursor = do
+ cursor' <- handleTree tempODT sourceDirRelative cursor
+ case right cursor' of
+ Just n -> scanPictures tempODT sourceDirRelative n
+ Nothing -> return cursor'
+
+handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
+handleTree tempODT sourceDirRelative cursor = do
+ case firstChild cursor of
+ Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
+ Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
+
+-- | If content is an image link, handle it appropriately.
+-- Otherwise, handle children if any.
+handleContent :: FilePath -> FilePath -> Content -> IO Content
+handleContent tempODT sourceDirRelative content@(Elem el) = do
+ if qName (elName el) == "image"
+ then do
+ let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
+ let href = case hrefs of
+ [] -> error $ "No href found in " ++ show el
+ [x] -> x
+ _ -> error $ "Multiple hrefs found in " ++ show el
+ if isURI $ attrVal href
+ then return content
+ else do -- treat as filename
+ let oldLoc = sourceDirRelative </> attrVal href
+ fileExists <- doesFileExist oldLoc
+ if fileExists
+ then do
+ let pref = take 230 $ concat $ intersperse "_" $
+ splitDirectories $ takeDirectory $ attrVal href
+ let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href)
+ let tempDir = takeDirectory tempODT
+ copyFile oldLoc $ tempDir </> newLoc
+ let newAttrs = (href { attrVal = newLoc }) : rest
+ return $ Elem (el { elAttribs = newAttrs })
+ else do
+ hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring."
+ return content
+ else return content
+
+handleContent _ _ c = return c -- not Element
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index 05289d6a6..3c202db0f 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -98,7 +98,9 @@ module Text.Pandoc.Shared (
-- * Writer options
HTMLMathMethod (..),
WriterOptions (..),
- defaultWriterOptions
+ defaultWriterOptions,
+ -- * File handling
+ withTempDir
) where
import Text.Pandoc.Definition
@@ -110,6 +112,9 @@ import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
+import System.FilePath ( (</>), (<.>) )
+import System.IO.Error ( catch, ioError, isAlreadyExistsError )
+import System.Directory
--
-- List processing
@@ -900,3 +905,22 @@ defaultWriterOptions =
, writerReferenceLinks = False
, writerWrapText = True
}
+
+-- | Perform a function in a temporary directory and clean up.
+withTempDir :: FilePath -> (FilePath -> IO a) -> IO a
+withTempDir baseName func = do
+ tempDir <- createTempDir 0 baseName
+ result <- catch (func tempDir) $ \e -> removeDirectoryRecursive tempDir >> ioError e
+ removeDirectoryRecursive tempDir
+ return result
+
+-- | Create a temporary directory with a unique name.
+createTempDir :: Integer -> FilePath -> IO FilePath
+createTempDir num baseName = do
+ sysTempDir <- getTemporaryDirectory
+ let dirName = sysTempDir </> baseName <.> show num
+ catch (createDirectory dirName >> return dirName) $
+ \e -> if isAlreadyExistsError e
+ then createTempDir (num + 1) baseName
+ else ioError e
+
diff --git a/man/man1/markdown2odt.1.md b/man/man1/markdown2odt.1.md
deleted file mode 100644
index 9325237f4..000000000
--- a/man/man1/markdown2odt.1.md
+++ /dev/null
@@ -1,52 +0,0 @@
-% MARKDOWN2ODT(1) Pandoc User Manuals
-% John MacFarlane and Recai Oktas
-% March 14, 2008
-
-# NAME
-
-markdown2odt - converts markdown-formatted text to ODT
-
-# SYNOPSIS
-
-markdown2odt [*options*] [*input-file*]...
-
-# DESCRIPTION
-
-`markdown2odt` converts *input-file* (or text from standard
-input) from markdown-formatted plain text to ODT (OpenDocument
-Text) format. If no output filename is specified (using the `-o`
-option), the name of the output file is derived from the input file;
-thus, for example, if the input file is *hello.txt*, the output file
-will be *hello.odt*. If the input is read from STDIN and no output
-filename is specified, the output file will be named *stdin.odt*. If
-multiple input files are specified, they will be concatenated before
-conversion, and the name of the output file will be derived from the
-first input file.
-
-Input is assumed to be in the UTF-8 character encoding. If your
-local character encoding is not UTF-8, you should pipe input
-through `iconv`:
-
- iconv -t utf-8 input.txt | markdown2odt
-
-# OPTIONS
-
-`markdown2odt` is a wrapper around `pandoc`, so all of
-`pandoc`'s options can be used with `markdown2odt` as well.
-See `pandoc`(1) for a complete list.
-The following options are most relevant:
-
--o *FILE*, \--output=*FILE*
-: Write output to *FILE*.
-
-\--strict
-: Use strict markdown syntax, with no extensions or variants.
-
--S, \--smart
-: Use smart quotes, dashes, and ellipses. (This option is significant
- only when the input format is `markdown`. It is selected automatically
- when the output format is `latex` or `context`.)
-
-# SEE ALSO
-
-`pandoc`(1)
diff --git a/pandoc.cabal b/pandoc.cabal
index 679806acb..d0d788499 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -57,6 +57,7 @@ Extra-Source-Files: README, INSTALL, COPYRIGHT, COPYING,
Extra-Tmp-Files: Text/Pandoc/ASCIIMathML.hs,
Text/Pandoc/DefaultHeaders.hs,
Text/Pandoc/Writers/S5.hs
+Data-Files: odt-styles/reference.odt
Flag splitBase
Description: Choose the new, smaller, split-up base package.
Default: True
@@ -72,7 +73,8 @@ Library
if flag(highlighting)
Build-depends: highlighting-kate
cpp-options: -DHIGHLIGHTING
- Build-Depends: parsec < 3, xhtml, mtl, network, filepath
+ Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml,
+ bytestring, binary
Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks,
@@ -80,6 +82,7 @@ Library
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.UTF8,
+ Text.Pandoc.ODT,
Text.Pandoc.ASCIIMathML,
Text.Pandoc.DefaultHeaders,
Text.Pandoc.Highlighting,
@@ -101,11 +104,11 @@ Library
Text.Pandoc.Writers.RTF,
Text.Pandoc.Writers.S5
Other-Modules: Text.Pandoc.XML
- Ghc-Options: -O2 -Wall
+ Ghc-Options: -O2 -Wall -threaded
Ghc-Prof-Options: -auto-all
Executable pandoc
- Hs-Source-Dirs: .
- Main-Is: Main.hs
- Ghc-Options: -O2 -Wall
- Ghc-Prof-Options: -auto-all
+ Hs-Source-Dirs: .
+ Main-Is: Main.hs
+ Ghc-Options: -O2 -Wall -threaded
+ Ghc-Prof-Options: -auto-all
diff --git a/wrappers/markdown2odt.in b/wrappers/markdown2odt.in
deleted file mode 100644
index e5b3f0212..000000000
--- a/wrappers/markdown2odt.in
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/sh -e
-
-REQUIRED="zip"
-SYNOPSIS="converts markdown-formatted text to ODT."
-
-### odtref.sh
-
-### common.sh
-
-### tempdir.sh
-
-if [ "$OUTPUT" = "-" ]; then
- firstinfile="$(echo $ARGS | sed -ne '1p')"
- firstinfilebase="${firstinfile%.*}"
- destname="${firstinfilebase:-stdin}.odt"
-else
- destname="$OUTPUT"
-fi
-
-(
- cp $REFERENCEODT $THIS_TEMPDIR/new.odt
- pandoc -s -r markdown -w opendocument "$@" -o $THIS_TEMPDIR/content.xml
- zip -9 -j $THIS_TEMPDIR/new.odt $THIS_TEMPDIR/content.xml
-) || exit $?
-
-is_target_exists=
-if [ -f "$destname" ]; then
- is_target_exists=1
- mv "$destname" "$destname~"
-fi
-
-mv -f $THIS_TEMPDIR/new.odt "$destname"
-
-errn "Created $destname"
-[ -z "$is_target_exists" ] || {
- errn " (previous file has been backed up as $destname~)"
-}
-
-err .
-
-