From 85dacbb282f34dc45a810d7c7b5a2d639b70e1f5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 22:52:15 -0700 Subject: Added `--filter` option. This makes it easier to use JSON filters. Instead of doing pandoc -t json | ./filter | pandoc -f json you can just do pandoc --filter ./filter --- README | 8 ++++++++ pandoc.cabal | 1 + pandoc.hs | 25 +++++++++++++++++++++---- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/README b/README index d9b003344..83873a92e 100644 --- a/README +++ b/README @@ -259,6 +259,14 @@ Reader options require different kinds of images. Currently this option only affects the markdown and LaTeX readers. +`--filter=`*PATH* +: Specify an executable to be used as a filter transforming the + Pandoc AST after the input is parsed and before the output is + written. The executable should read JSON from stdin and write + JSON to stdout. The JSON must be formatted like pandoc's own + JSON input and output. Filters may be most easily created in Haskell, + using the utility function `toJsonFilter` from `Text.Pandoc`. + `--normalize` : Normalize the document after reading: merge adjacent `Str` or `Emph` elements, for example, and remove repeated `Space`s. diff --git a/pandoc.cabal b/pandoc.cabal index cd04a2faf..c47273c57 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -360,6 +360,7 @@ Executable pandoc extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, HTTP >= 4000.0.5 && < 4000.3, + process >= 1 && < 1.2, citeproc-hs >= 0.3.7 && < 0.4 if impl(ghc >= 7.0.1) Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind diff --git a/pandoc.hs b/pandoc.hs index 79bade221..94d206103 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -43,6 +43,7 @@ import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt +import System.Process (readProcess) import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) @@ -53,6 +54,7 @@ import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.CSL as CSL import Control.Monad (when, unless, liftM) +import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B @@ -86,6 +88,12 @@ wrapWords indent c = wrap' (c - indent) (c - indent) isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] +externalFilter :: FilePath -> Pandoc -> IO Pandoc +externalFilter f d = E.catch + (readJSON def `fmap` readProcess f [] (writeJSON def d)) + (\e -> let _ = (e :: E.SomeException) + in err 83 $ "Error running filter `" ++ f ++ "'") + -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -272,6 +280,13 @@ options = "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" + , Option "" ["filter"] + (ReqArg + (\arg opt -> return opt { optPlugins = externalFilter arg : + optPlugins opt }) + "PROGRAM") + "" -- "External JSON filter" + , Option "" ["normalize"] (NoArg (\opt -> return opt { optTransforms = @@ -876,6 +891,7 @@ main = do , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns + , optPlugins = plugins , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses @@ -1099,6 +1115,7 @@ main = do reader readerOpts let doc0 = foldr ($) doc transforms + doc1 <- foldrM ($) doc0 plugins let writeBinary :: B.ByteString -> IO () writeBinary = B.writeFile (UTF8.encodePath outputFile) @@ -1109,15 +1126,15 @@ main = do case getWriter writerName' of Left e -> err 9 e - Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile - Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary + Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- makePDF latexEngine f writerOptions doc0 + res <- makePDF latexEngine f writerOptions doc1 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' - | otherwise -> selfcontain (f writerOptions doc0 ++ + | otherwise -> selfcontain (f writerOptions doc1 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities where htmlFormat = writerName' `elem` -- cgit v1.2.3