From 6dd24184765800bdedc1d28a87f9564f7f44d4f4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 31 Jul 2014 12:00:21 -0700 Subject: New module, Text.Pandoc.MediaBag. Moved `MediaBag` definition and functions from Shared: `lookupMedia`, `mediaDirectory`, `insertMedia`, `extractMediaBag`. Removed `emptyMediaBag`; use `mempty` instead, since `MediaBag` is a Monoid. --- src/Text/Pandoc.hs | 3 +- src/Text/Pandoc/MediaBag.hs | 107 +++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Options.hs | 5 +- src/Text/Pandoc/Readers/Docx.hs | 4 +- src/Text/Pandoc/SelfContained.hs | 4 +- src/Text/Pandoc/Shared.hs | 79 ++--------------------------- src/Text/Pandoc/Writers/EPUB.hs | 3 +- 7 files changed, 123 insertions(+), 82 deletions(-) create mode 100644 src/Text/Pandoc/MediaBag.hs (limited to 'src') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 11553383c..77eb3e82f 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -160,7 +160,8 @@ import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn, MediaBag) +import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.MediaBag (MediaBag) import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs new file mode 100644 index 000000000..667089f55 --- /dev/null +++ b/src/Text/Pandoc/MediaBag.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- +Copyright (C) 2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.MediaBag + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Definition of a MediaBag object to hold binary resources, and an +interface for interacting with it. +-} +module Text.Pandoc.MediaBag ( + MediaBag, + lookupMedia, + insertMedia, + mediaDirectory, + extractMediaBag + ) where +import System.FilePath +import System.Directory (createDirectoryIfMissing) +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as BL +import Data.Monoid (Monoid) +import Control.Monad (when, MonadPlus(..)) +import Text.Pandoc.MIME (getMimeType) +import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Maybe (fromMaybe) +import System.IO (stderr) + +-- | A container for a collection of binary resources, with names and +-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' +-- can be used for an empty 'MediaBag', and '<>' can be used to append +-- two 'MediaBag's. +newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) + deriving (Monoid) + +instance Show MediaBag where + show bag = "MediaBag " ++ show (mediaDirectory bag) + +-- | Insert a media item into a 'MediaBag', replacing any existing +-- value with the same name. +insertMedia :: FilePath -- ^ relative path and canonical name of resource + -> Maybe String -- ^ mime type (Nothing = determine from extension) + -> BL.ByteString -- ^ contents of resource + -> MediaBag + -> MediaBag +insertMedia fp mbMime contents (MediaBag mediamap) = + MediaBag (M.insert fp (mime, contents) mediamap) + where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) + fallback = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + _ -> getMimeType fp + +-- | Lookup a media item in a 'MediaBag', returning mime type and contents. +lookupMedia :: FilePath + -> MediaBag + -> Maybe (String, BL.ByteString) +lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap + +-- | Get a list of the file paths stored in a 'MediaBag', with +-- their corresponding mime types and the lengths in bytes of the contents. +mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory (MediaBag mediamap) = + M.foldWithKey (\fp (mime,contents) -> + ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + +-- | Extract contents of MediaBag to a given directory. Print informational +-- messages if 'verbose' is true. +extractMediaBag :: Bool + -> FilePath + -> MediaBag + -> IO () +extractMediaBag verbose dir (MediaBag mediamap) = do + sequence_ $ M.foldWithKey + (\fp (_ ,contents) -> + ((writeMedia verbose dir (fp, contents)):)) [] mediamap + +writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () +writeMedia verbose dir (subpath, bs) = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir joinPath (splitPath subpath) + createDirectoryIfMissing True $ takeDirectory fullpath + when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath + BL.writeFile fullpath bs + + diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index bf6b3d910..85a6a3096 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -49,7 +49,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (MediaBag, emptyMediaBag) +import Text.Pandoc.MediaBag (MediaBag) +import Data.Monoid -- | Individually selectable syntax extensions. data Extension = @@ -358,7 +359,7 @@ instance Default WriterOptions where , writerTOCDepth = 3 , writerReferenceODT = Nothing , writerReferenceDocx = Nothing - , writerMediaBag = emptyMediaBag + , writerMediaBag = mempty } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2fb4da2d9..7a89c0b04 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,8 +84,10 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Readers.Docx.TexChar import Text.Pandoc.Shared +import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.Maybe (mapMaybe, fromMaybe) import Data.List (delete, isPrefixOf, (\\), intercalate, intersect) +import Data.Monoid import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Control.Monad.Reader @@ -108,7 +110,7 @@ data DState = DState { docxAnchorMap :: M.Map String String defaultDState :: DState defaultDState = DState { docxAnchorMap = M.empty - , docxMediaBag = emptyMediaBag + , docxMediaBag = mempty , docxInHeaderBlock = False , docxInTexSubscript = False} diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 77f8b6530..adb2c0014 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -41,8 +41,8 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, - MediaBag, lookupMedia) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index deab1abc0..d5769c1ab 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,5 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, GeneralizedNewtypeDeriving #-} + FlexibleContexts, ScopedTypeVariables #-} {- Copyright (C) 2006-2014 John MacFarlane @@ -48,13 +48,6 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, - -- * Media Handling - MediaBag, - emptyMediaBag, - lookupMedia, - insertMedia, - mediaDirectory, - extractMediaBag, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -97,6 +90,7 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 @@ -106,18 +100,16 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference ) import qualified Data.Set as Set import System.Directory import Text.Pandoc.MIME (getMimeType) -import System.FilePath ( (), takeExtension, dropExtension, takeDirectory, - splitPath, joinPath ) +import System.FilePath ( (), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless, MonadPlus(..), when) +import Control.Monad (msum, unless) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time @@ -127,7 +119,6 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 -import qualified Data.ByteString.Lazy as BL import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) @@ -294,68 +285,6 @@ tabFilter tabStop = x : go (spsToNextStop - 1) xs in go tabStop ---- ---- Media handling ---- - --- | A container for a collection of binary resources, with names and --- mime types. -newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) - deriving (Monoid) - -instance Show MediaBag where - show bag = "MediaBag " ++ show (mediaDirectory bag) - -emptyMediaBag :: MediaBag -emptyMediaBag = MediaBag M.empty - --- | Insert a media item into a 'MediaBag', replacing any existing --- value with the same name. -insertMedia :: FilePath -- ^ relative path and canonical name of resource - -> Maybe String -- ^ mime type (Nothing = determine from extension) - -> BL.ByteString -- ^ contents of resource - -> MediaBag - -> MediaBag -insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert fp (mime, contents) mediamap) - where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) - fallback = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - _ -> getMimeType fp - --- | Lookup a media item in a 'MediaBag', returning mime type and contents. -lookupMedia :: FilePath - -> MediaBag - -> Maybe (String, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap - --- | Get a list of the file paths stored in a 'MediaBag', with --- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, String, Int)] -mediaDirectory (MediaBag mediamap) = - M.foldWithKey (\fp (mime,contents) -> - ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap - --- | Extract contents of MediaBag to a given directory. Print informational --- messages if 'verbose' is true. -extractMediaBag :: Bool - -> FilePath - -> MediaBag - -> IO () -extractMediaBag verbose dir (MediaBag mediamap) = do - sequence_ $ M.foldWithKey - (\fp (_ ,contents) -> - ((writeMedia verbose dir (fp, contents)):)) [] mediamap - -writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () -writeMedia verbose dir (subpath, bs) = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / - let fullpath = dir joinPath (splitPath subpath) - createDirectoryIfMissing True $ takeDirectory fullpath - when verbose $ warn $ "extracting " ++ fullpath - BL.writeFile fullpath bs - -- -- Date/time -- diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 62dd70e73..770b6f244 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -61,6 +61,7 @@ import Text.Pandoc.MIME (getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup +import Data.Monoid -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -793,7 +794,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained emptyMediaBag Nothing $ writeHtmlInline opts x + raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x return $ RawInline (Format "html") raw transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do -- cgit v1.2.3