summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Main.hs53
-rw-r--r--README170
-rw-r--r--Text/Pandoc/Definition.hs25
-rw-r--r--Text/Pandoc/Plugins.hs69
-rw-r--r--man/man1/pandoc.1.md8
-rw-r--r--pandoc.cabal9
-rw-r--r--plugins/CapitalizeEmphasisPlugin.hs14
-rw-r--r--plugins/DotPlugin.hs30
-rw-r--r--plugins/IncludeFilePlugin.hs19
-rw-r--r--plugins/ListLinksPlugin.hs15
-rw-r--r--plugins/WordPressPlugin.hs10
11 files changed, 398 insertions, 24 deletions
diff --git a/Main.hs b/Main.hs
index 0e0b4ba26..26057ff58 100644
--- a/Main.hs
+++ b/Main.hs
@@ -32,22 +32,24 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
-import Text.Pandoc.Shared ( HTMLMathMethod (..) )
+import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
-import System.FilePath ( takeExtension, takeDirectory )
+import System.FilePath
import System.Console.GetOpt
import Data.Maybe ( fromMaybe )
import Data.Char ( toLower )
import Data.List ( intercalate, isSuffixOf )
-import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
+import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
import System.IO ( stdout, stderr )
import System.IO.UTF8
#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
#endif
+import Text.Pandoc.Plugins (getPlugin)
+import Control.Monad (foldM)
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++
@@ -153,6 +155,7 @@ data Opt = Opt
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, optWrapText :: Bool -- ^ Wrap text
, optSanitizeHTML :: Bool -- ^ Sanitize HTML
+ , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
#ifdef _CITEPROC
, optBiblioFile :: String
, optBiblioFormat :: String
@@ -187,6 +190,7 @@ defaultOpts = Opt
, optReferenceLinks = False
, optWrapText = True
, optSanitizeHTML = False
+ , optPlugins = []
#ifdef _CITEPROC
, optBiblioFile = []
, optBiblioFormat = []
@@ -348,6 +352,15 @@ options =
"FILENAME")
"" -- "File to use for custom header (implies -s)"
+ , Option "P" ["plugins"]
+ (ReqArg
+ (\arg opt -> do
+ let pluginModules = splitBy ',' arg
+ plugins <- mapM getPlugin pluginModules
+ return opt { optPlugins = plugins })
+ "MODULE[,MODULE...]")
+ "" -- "Haskell modules"
+
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> return opt { optTitlePrefix = arg,
@@ -519,6 +532,7 @@ main = do
, optReferenceLinks = referenceLinks
, optWrapText = wrap
, optSanitizeHTML = sanitize
+ , optPlugins = plugins
#ifdef _CITEPROC
, optBiblioFile = biblioFile
, optBiblioFormat = biblioFormat
@@ -630,21 +644,26 @@ main = do
then ""
else takeDirectory (head sources)
- let writeOutput = case writerName' of
- "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative
- _ -> if outputFile == "-"
- then putStrLn
- else writeFile outputFile . (++ "\n")
+ let readSources [] = mapM readSource ["-"]
+ readSources srcs = mapM readSource srcs
+ readSource "-" = getContents
+ readSource src = readFile src
+
+ doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources)
- fmap (reader startParserState . tabFilter tabStop . intercalate "\n")
- (readSources sources) >>=
+ doc' <- do
#ifdef _CITEPROC
- processBiblio cslFile refs >>=
+ processBiblio cslFile refs doc
+#else
+ return doc
#endif
- writeOutput . writer writerOptions
- where
- readSources [] = mapM readSource ["-"]
- readSources sources = mapM readSource sources
- readSource "-" = getContents
- readSource source = readFile source
+ doc'' <- foldM (flip ($)) doc' plugins
+
+ let writerOutput = writer writerOptions doc'' ++ "\n"
+
+ case writerName' of
+ "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput
+ _ -> if outputFile == "-"
+ then putStr writerOutput
+ else writeFile outputFile writerOutput
diff --git a/README b/README
index 86c52ac2b..4746ffae0 100644
--- a/README
+++ b/README
@@ -309,6 +309,14 @@ For further documentation, see the `pandoc(1)` man page.
repeatedly to include multiple files. They will be included in the
order specified.
+`-P` *MODULE[,MODULE...]*, `--plugins`*=MODULE[,MODULE...]*
+: specifies plugins to load, by module name or source file pathname.
+ Plugins should export a function `transform` of type `a -> a`
+ or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`,
+ `[Inline]`, or `[Block]`. This function will be used to transform
+ the pandoc document after it is parsed by the reader and before it
+ is written out by the writer. (See below on [Plugins](#plugins).)
+
`-T` or `--title-prefix` *string*
: includes *string* as a prefix at the beginning of the title that
appears in the HTML header (but not in the title as it appears at
@@ -1108,3 +1116,165 @@ ordinary HTML (without bird tracks).
writes HTML with the Haskell code in bird tracks, so it can be copied
and pasted as literate Haskell source.
+Plugins
+=======
+
+Pandoc's plugin system allows users to modify pandoc's behavior by writing
+short Haskell programs. A plugin is a Haskell module that exports a function
+`transform`, of type `a -> a` or `a -> IO a`, where `a` is `Pandoc`,
+`Block`, `Inline`, `[Block]`, or `[Inline]`. The `transform` function will
+be used to transform the pandoc document generated by the reader, before
+it is transformed by the writer.
+
+An example will help make this clearer. Suppose we want to use pandoc with
+the WordPress blog engine. WordPress provides support for LaTeX math, but
+instead of `$e = mc^2$`, WordPress wants `$LaTeX e = mc^2$`. Prior to plugins,
+there was no good way to make pandoc do this. We could have tried using
+regex replacements on the markdown input or HTML output, but this would have
+been error-prone: we'd have to make sure we weren't capturing non-math text
+between dollar signs (for example, text inside a code block). Besides,
+pandoc's markdown reader has already identified the math bits; why not
+make use of that? By writing a plugin, we can:
+
+~~~ {.haskell}
+-- WordPressPlugin.hs
+module WordPressPlugin (transform) where
+import Text.Pandoc
+
+transform :: Inline -> Inline
+transform (Math x y) = Math x $ "LaTeX " ++ y
+transform x = x
+~~~
+
+This is a Haskell program, but a very short one. The lines
+
+~~~ {.haskell}
+module WordPressPlugin (transform) where
+import Text.Pandoc
+~~~
+
+just define the name of the module (`WordPressPlugin`), the names of any
+exported functions (for a plugin, this will always just be `transform`),
+and the modules that will be used in the program itself (`Text.Pandoc`).
+The real meat of the program is the three-line definition of `transform`:
+
+~~~ {.haskell}
+transform :: Inline -> Inline
+transform (Math x y) = Math x $ "LaTeX " ++ y
+transform x = x
+~~~
+
+The first line defines the type of the function: it is a function that
+takes an `Inline` element and returns an `Inline` element. (For the definition
+of `Inline`, see the module `Text.Pandoc.Definition`.) The next line says
+that when the input matches the pattern `Math x y`, the string `LaTeX `
+should be inserted at the beginning of `y`. (`x` just specifies whether the
+math element is inline or display math, so we leave it alone.) The last
+line says, in effect, that the `transform` function has no effect on any
+other kind of `Inline` element -- it just passes it through. When the plugin
+is applied, this transformation will be used on every `Inline` element in
+the document, and `LaTeX ` will be inserted where needed in math elements.
+
+To use this plugin, we just specify the module (or alternatively the filename)
+with the `--plugins` option:
+
+ % echo "Hello, $e=mc^2$." | pandoc -m --plugins=WordPressPlugin.hs
+ <p
+ >Hello, <span class="LaTeX"
+ >$LaTeX e=mc^2$</span
+ >.</p
+ >
+
+Let's look at a more complex example, involving IO. Suppose we want to include
+some graphviz diagrams in our document. Of course, we could use a Makefile to
+generate the diagrams, then use regular images in our document. But wouldn't it
+be nicer just to include the graphviz code in the document itself, perhaps in
+a specially marked delimited code block?
+
+ ~~~ {.dot name="diagram1"}
+ digraph G {Hello->World}
+ ~~~
+
+This can be accomplished by a plugin:
+
+~~~ {.haskell}
+-- DotPlugin.hs
+module DotPlugin (transform) where
+import Text.Pandoc
+import Text.Pandoc.Shared
+import System.Process (readProcess)
+import Data.Char (ord)
+-- from the utf8-string package on HackageDB:
+import Data.ByteString.Lazy.UTF8 (fromString)
+-- from the SHA package on HackageDB:
+import Data.Digest.Pure.SHA
+
+transform :: Block -> IO Block
+transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do
+ let (name, outfile) = case lookup "name" namevals of
+ Just fn -> ([Str fn], fn ++ ".png")
+ Nothing -> ([], uniqueName contents ++ ".png")
+ result <- readProcess "dot" ["-Tpng"] contents
+ writeFile outfile result
+ return $ Para [Image name (outfile, "")]
+transform x = return x
+
+-- | Generate a unique filename given the file's contents.
+uniqueName :: String -> String
+uniqueName = showDigest . sha1 . fromString
+~~~
+
+The heart of this plugin is the `transform` function, which converts a `Block`
+to a `Block`. Again, there are two clauses, one for code blocks that are marked
+with the "dot" class, one for all other blocks. Code blocks with ".dot" are
+replaced with links to an image file; this file is generated by running
+`dot -Tpng` on the contents of the code block.
+
+Because `transform` performs file reads and writes, it needs to be in the
+IO monad, hence the type: `Block -> IO Block`.
+
+One more example. Suppose we want emphasized text to be CAPITALIZED
+instead of italicized. We could use a plugin:
+
+~~~ {.haskell}
+module CapitalizeEmphasisPlugin (transform) where
+import Text.Pandoc
+import Data.Char (toUpper)
+
+transform :: [Inline] -> [Inline]
+transform (Emph xs : ys) = processIn capStr xs ++ transform ys
+transform (x : ys) = x : transform ys
+transform [] = []
+
+capStr :: Inline -> Inline
+capStr (Str x) = Str (map toUpper x)
+capStr x = x
+~~~
+
+Here `transform` converts a whole list of `Inline` elements to another
+such list. The key clause is
+
+~~~ {.haskell}
+transform (Emph xs : ys) = processIn capStr xs ++ transform ys
+~~~
+
+This applies the `capStr` function recursively to all inlines in the
+list of emphasized inlines and puts the transformed list in place
+of the original. `capStr` is a simple `Inline` transformation that
+capitalizes `Str` elements and leaves everything else alone. The
+function `processIn`, defined in `Text.Pandoc.Definition`, uses some
+`Data.Generics` magic to apply its argument (here `capStr`) to every
+`Inline` element in a list, including elements that are deeply buried in
+other elements. Thus
+
+ processIn captStr [Str "one", Strong [Str "two", Space]] ==>
+ [Str "ONE", Strong [Str "TWO", Space]]
+
+There are other sample plugins in the `plugins` subdirectory of the
+pandoc source code.
+
+**Note:** Do not attempt to use plugins when running pandoc in the
+directory containing pandoc's source code. The interpreter will try to
+load the files directly from the source code, rather than reading the compiled
+versions, and pandoc will hang.
+
diff --git a/Text/Pandoc/Definition.hs b/Text/Pandoc/Definition.hs
index 3c783b1b9..ed58f6b75 100644
--- a/Text/Pandoc/Definition.hs
+++ b/Text/Pandoc/Definition.hs
@@ -128,10 +128,23 @@ data Inline
| Note [Block] -- ^ Footnote or endnote
deriving (Show, Eq, Read, Typeable, Data)
--- | Applies a transformation to matching elements in a Pandoc document.
-processPandoc :: Typeable a => (a -> a) -> Pandoc -> Pandoc
-processPandoc f = everywhere (mkT f)
+-- | Applies a transformation on @a@s to matching elements in a @b@.
+processIn :: (Data a, Data b) => (a -> a) -> b -> b
+processIn f = everywhere (mkT f)
+
+-- | Like 'processIn', but with monadic transformations.
+processInM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b
+processInM f = everywhereM (mkM f)
+
+-- | Runs a query on matching @a@ elements in a @c@.
+queryIn :: (Data a, Data c) => (a -> [b]) -> c -> [b]
+queryIn f = everything (++) ([] `mkQ` f)
+
+{-# DEPRECATED processPandoc "Use processIn instead" #-}
+processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc
+processPandoc = processIn
+
+{-# DEPRECATED queryPandoc "Use queryIn instead" #-}
+queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b]
+queryPandoc = queryIn
--- | Runs a query on matching elements in a Pandoc document.
-queryPandoc :: Typeable a => (a -> [b]) -> Pandoc -> [b]
-queryPandoc f = everything (++) ([] `mkQ` f)
diff --git a/Text/Pandoc/Plugins.hs b/Text/Pandoc/Plugins.hs
new file mode 100644
index 000000000..5ba333faa
--- /dev/null
+++ b/Text/Pandoc/Plugins.hs
@@ -0,0 +1,69 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Pluigns
+ Copyright : Copyright (C) 2008 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Support for plugins.
+-}
+
+module Text.Pandoc.Plugins (getPlugin)
+where
+
+import Language.Haskell.Interpreter
+import Text.Pandoc
+import Control.Monad (unless, liftM)
+import Control.Monad.Error (throwError)
+import Data.List (isInfixOf)
+
+-- | Returns the function named @transform@ in the specified
+-- module. The module may be identified either by module name
+-- or by path name. The @transform@ function should have type
+-- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data':
+-- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@,
+-- @Block -> Block@, or @[Inline] -> IO [Inline]@.
+getPlugin :: String -> IO (Pandoc -> IO Pandoc)
+getPlugin modsrc = do
+ res <- runInterpreter (evaluatePlugin modsrc)
+ case res of
+ Right func -> return func
+ Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs)
+ Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x
+ Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x
+ Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x
+
+evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc)
+evaluatePlugin modsrc = do
+ set [installedModulesInScope := False]
+ loadModules [modsrc]
+ modnames <- getLoadedModules
+ setTopLevelModules modnames
+ setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"]
+ exports <- liftM concat $ mapM getModuleExports modnames
+ unless ((Fun "transform") `elem` exports) $
+ throwError $ UnknownError $ "The plugin module must define a function 'transform'."
+ transformType <- typeOf "transform"
+ if "-> IO" `isInfixOf` transformType
+ then interpret "processInM transform" (as :: Pandoc -> IO Pandoc)
+ else interpret "return . (processIn transform)" (as :: Pandoc -> IO Pandoc)
diff --git a/man/man1/pandoc.1.md b/man/man1/pandoc.1.md
index 91c3a8537..a418e608b 100644
--- a/man/man1/pandoc.1.md
+++ b/man/man1/pandoc.1.md
@@ -173,6 +173,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`.
`context`, `docbook`, `man`, `markdown`, `opendocument`,
`rst`, `rtf`).
+-P *MODULE[,MODULE...]*, \--plugins=*MODULE[,MODULE...]*
+: Specify plugins to load, by module name or source file pathname.
+ Plugins should export a function `transform` of type `a -> a`
+ or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`,
+ `[Inline]`, or `[Block]`. This function will be used to transform
+ the pandoc document after it is parsed by the reader and before it
+ is written out by the writer.
+
-T *STRING*, \--title-prefix=*STRING*
: Specify *STRING* as a prefix to the HTML window title.
diff --git a/pandoc.cabal b/pandoc.cabal
index 20968719d..8ff5d8b87 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -70,6 +70,12 @@ Extra-Source-Files:
data/odt-styles/Thumbnails/thumbnail.png,
data/odt-styles/styles.xml,
data/odt-styles/mimetype,
+ -- sample plugins
+ plugins/WordPressPlugin.hs,
+ plugins/CapitalizeEmphasisPlugin.hs,
+ plugins/IncludeFilePlugin.hs,
+ plugins/DotPlugin.hs,
+ plugins/ListLinksPlugin.hs,
-- tests
tests/bodybg.gif,
tests/writer.latex,
@@ -139,7 +145,7 @@ Library
mtl >= 1.1, network >= 2, filepath >= 1.1,
process >= 1, directory >= 1, template-haskell >= 2.2,
bytestring >= 0.9, zip-archive >= 0.1.1,
- utf8-string >= 0.3, old-time >= 1
+ utf8-string >= 0.3, old-time >= 1, hint >= 0.3.0.0
if impl(ghc >= 6.10)
Build-depends: base >= 4, syb
else
@@ -158,6 +164,7 @@ Library
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.ODT,
+ Text.Pandoc.Plugins,
Text.Pandoc.LaTeXMathML,
Text.Pandoc.DefaultHeaders,
Text.Pandoc.Highlighting,
diff --git a/plugins/CapitalizeEmphasisPlugin.hs b/plugins/CapitalizeEmphasisPlugin.hs
new file mode 100644
index 000000000..31cb4403a
--- /dev/null
+++ b/plugins/CapitalizeEmphasisPlugin.hs
@@ -0,0 +1,14 @@
+module CapitalizeEmphasisPlugin (transform) where
+import Text.Pandoc
+import Data.Char (toUpper)
+
+-- This plugin changes emphasized text into CAPITALIZED TEXT.
+
+transform :: [Inline] -> [Inline]
+transform (Emph x : ys) = processIn capStr x ++ transform ys
+transform (x : ys) = x : transform ys
+transform [] = []
+
+capStr :: Inline -> Inline
+capStr (Str x) = Str (map toUpper x)
+capStr x = x
diff --git a/plugins/DotPlugin.hs b/plugins/DotPlugin.hs
new file mode 100644
index 000000000..db1a02e1c
--- /dev/null
+++ b/plugins/DotPlugin.hs
@@ -0,0 +1,30 @@
+module DotPlugin (transform) where
+import Text.Pandoc
+import Text.Pandoc.Shared
+import System.Process (readProcess)
+import Data.Char (ord)
+-- from the utf8-string package on HackageDB:
+import Data.ByteString.Lazy.UTF8 (fromString)
+-- from the SHA package on HackageDB:
+import Data.Digest.Pure.SHA
+
+-- This plugin allows you to include a graphviz "dot" diagram
+-- in a document like this:
+--
+-- ~~~ {.dot name="diagram1"}
+-- digraph G {Hello->World}
+-- ~~~
+
+transform :: Block -> IO Block
+transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do
+ let (name, outfile) = case lookup "name" namevals of
+ Just fn -> ([Str fn], fn ++ ".png")
+ Nothing -> ([], uniqueName contents ++ ".png")
+ result <- readProcess "dot" ["-Tpng"] contents
+ writeFile outfile result
+ return $ Para [Image name (outfile, "")]
+transform x = return x
+
+-- | Generate a unique filename given the file's contents.
+uniqueName :: String -> String
+uniqueName = showDigest . sha1 . fromString
diff --git a/plugins/IncludeFilePlugin.hs b/plugins/IncludeFilePlugin.hs
new file mode 100644
index 000000000..40a8ce34d
--- /dev/null
+++ b/plugins/IncludeFilePlugin.hs
@@ -0,0 +1,19 @@
+module IncludeFilePlugin (transform) where
+import Text.Pandoc
+import Text.Pandoc.Shared
+import Control.Monad
+
+-- This plugin allows you to include the contents of an
+-- external file in a delimited code block like this:
+--
+-- ~~~ {include="filename"}
+-- ~~~
+--
+-- Trailing newlines are trimmed.
+
+transform :: Block -> IO Block
+transform cb@(CodeBlock (id, classes, namevals) contents) =
+ case lookup "include" namevals of
+ Just f -> return . (CodeBlock (id, classes, namevals) . stripTrailingNewlines) =<< readFile f
+ Nothing -> return cb
+transform x = return x
diff --git a/plugins/ListLinksPlugin.hs b/plugins/ListLinksPlugin.hs
new file mode 100644
index 000000000..88c1553b1
--- /dev/null
+++ b/plugins/ListLinksPlugin.hs
@@ -0,0 +1,15 @@
+module ListLinksPlugin (transform) where
+import Text.Pandoc
+
+-- This plugin returns an empty document and prints a list
+-- of the URLs linked to in the source document.
+
+transform :: Pandoc -> IO Pandoc
+transform p = do
+ let urls = queryIn findURLs p
+ putStrLn $ unlines urls
+ return $ Pandoc (Meta [] [] []) []
+
+findURLs :: Inline -> [String]
+findURLs (Link label (url, title)) = [url]
+findURLs x = []
diff --git a/plugins/WordPressPlugin.hs b/plugins/WordPressPlugin.hs
new file mode 100644
index 000000000..85b7ca72b
--- /dev/null
+++ b/plugins/WordPressPlugin.hs
@@ -0,0 +1,10 @@
+module WordPressPlugin (transform) where
+import Text.Pandoc
+
+-- This plugin (when used with -m) prints LaTeX math in the
+-- format required by WordPress blogs. $e=mc^2$ becomes
+-- $LaTeX e=mc^2$.
+
+transform :: Inline -> Inline
+transform (Math x y) = Math x $ "LaTeX " ++ y
+transform x = x