summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2014-07-31 11:04:40 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2014-07-31 11:05:35 -0700
commit00662faefbca0b9889d3d79dbb2985350356d18a (patch)
treeb0f6c39c3c91bc247c970297b4afa5a630d6d410
parent6e96f8b4783d46e2b9e245bf3144f023c5296a38 (diff)
Made MediaBag a newtype, and added mime type information to media.
Shared now exports functions for interacting with a MediaBag: - `emptyMediaBag` - `lookuMedia` - `insertMedia` - `mediaDirectory` - `extractMediaBag`
-rw-r--r--pandoc.hs26
-rw-r--r--src/Text/Pandoc/Options.hs5
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs4
-rw-r--r--src/Text/Pandoc/SelfContained.hs26
-rw-r--r--src/Text/Pandoc/Shared.hs80
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs2
-rw-r--r--tests/Tests/Readers/Docx.hs5
7 files changed, 98 insertions, 50 deletions
diff --git a/pandoc.hs b/pandoc.hs
index b3da9b8b1..64128221c 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -37,7 +37,8 @@ import Text.Pandoc.Walk (walk)
import Text.Pandoc.Readers.LaTeX (handleIncludes)
import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
safeRead, headerShift, normalize, err, warn,
- openURL )
+ openURL, mediaDirectory, extractMediaBag,
+ emptyMediaBag )
import Text.Pandoc.XML ( toEntities )
import Text.Pandoc.SelfContained ( makeSelfContained )
import Text.Pandoc.Process (pipeProcess)
@@ -50,8 +51,7 @@ import System.Console.GetOpt
import Data.Char ( toLower )
import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )
import System.Directory ( getAppUserDataDirectory, findExecutable,
- doesFileExist, Permissions(..), getPermissions,
- createDirectoryIfMissing )
+ doesFileExist, Permissions(..), getPermissions )
import System.IO ( stdout, stderr )
import System.IO.Error ( isDoesNotExistError )
import qualified Control.Exception as E
@@ -1206,15 +1206,6 @@ main = do
then handleIncludes
else return
- let writeMedia :: FilePath -> (FilePath, B.ByteString) -> IO ()
- writeMedia dir (subpath, bs) = do
- -- we join and split to convert a/b/c to a\b\c on Windows;
- -- in zip containers all paths use /
- let fullpath = dir </> joinPath (splitPath subpath)
- createDirectoryIfMissing True $ takeDirectory fullpath
- warn $ "extracting " ++ fullpath
- B.writeFile fullpath bs
-
let adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
adjustImagePath dir paths (Image lab (src, tit))
| src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
@@ -1226,13 +1217,16 @@ main = do
inp <- readSources sources >>=
handleIncludes' . convertTabs . intercalate "\n"
d <- r readerOpts inp
- return (d, M.empty)
+ return (d, emptyMediaBag)
ByteStringReader r -> do
(d, media) <- readFiles sources >>= r readerOpts
d' <- case mbExtractMedia of
- Just dir | not (M.null media) -> do
- mapM_ (writeMedia dir) $ M.toList media
- return $ walk (adjustImagePath dir (M.keys media)) d
+ Just dir -> do
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ extractMediaBag True dir media
+ return $ walk (adjustImagePath dir fps) d
_ -> return d
return (d', media)
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 2de1a9e35..bf6b3d910 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -49,8 +49,7 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
-import Text.Pandoc.Shared (MediaBag)
-import qualified Data.Map as M
+import Text.Pandoc.Shared (MediaBag, emptyMediaBag)
-- | Individually selectable syntax extensions.
data Extension =
@@ -359,7 +358,7 @@ instance Default WriterOptions where
, writerTOCDepth = 3
, writerReferenceODT = Nothing
, writerReferenceDocx = Nothing
- , writerMediaBag = M.empty
+ , writerMediaBag = emptyMediaBag
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index f5fb6565a..2fb4da2d9 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -108,7 +108,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
defaultDState :: DState
defaultDState = DState { docxAnchorMap = M.empty
- , docxMediaBag = M.empty
+ , docxMediaBag = emptyMediaBag
, docxInHeaderBlock = False
, docxInTexSubscript = False}
@@ -369,7 +369,7 @@ parPartToInlines (BookMark _ anchor) =
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
mediaBag <- gets docxMediaBag
- modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 777da3551..77f8b6530 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -41,11 +41,11 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err,
+ MediaBag, lookupMedia)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
-import qualified Data.Map as M
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
@@ -110,16 +110,20 @@ getItem media userdata f =
-- this is needed for things like cmunrm.eot?#iefix,
-- which is used to get old versions of IE to work with web fonts.
let f' = takeWhile (\c -> c /= '?' && c /= '#') f
- let mime = case takeExtension f' of
- ".gz" -> getMimeType $ dropExtension f'
- x -> getMimeType x
+ let mbMime = case takeExtension f' of
+ ".gz" -> getMimeType $ dropExtension f'
+ x -> getMimeType x
exists <- doesFileExist f'
- cont <- if exists
- then B.readFile f'
- else case M.lookup f media of
- Just bs -> return $ BS.concat $ L.toChunks bs
- Nothing -> readDataFile userdata f'
- return (cont, mime)
+ if exists
+ then do
+ cont <- B.readFile f'
+ return (cont, mbMime)
+ else case lookupMedia f media of
+ Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
+ Just mime)
+ Nothing -> do
+ cont <- readDataFile userdata f'
+ return (cont, mbMime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
getRaw :: MediaBag -> Maybe FilePath -> String -> String
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ee48f5bc1..deab1abc0 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables #-}
+ FlexibleContexts, ScopedTypeVariables, GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -50,7 +50,11 @@ module Text.Pandoc.Shared (
tabFilter,
-- * Media Handling
MediaBag,
+ emptyMediaBag,
+ lookupMedia,
insertMedia,
+ mediaDirectory,
+ extractMediaBag,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
@@ -102,16 +106,18 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
+import Data.Maybe ( fromMaybe )
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
unEscapeString, parseURIReference )
import qualified Data.Set as Set
import System.Directory
import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension, takeDirectory,
+ splitPath, joinPath )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
-import Control.Monad (msum, unless)
+import Control.Monad (msum, unless, MonadPlus(..), when)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
@@ -292,15 +298,63 @@ tabFilter tabStop =
--- Media handling
---
--- | A map of media paths to their binary representations.
-type MediaBag = M.Map String BL.ByteString
+-- | A container for a collection of binary resources, with names and
+-- mime types.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+ deriving (Monoid)
--- | Insert a media item into a `MediaBag`
-insertMedia :: FilePath
- -> BL.ByteString
+instance Show MediaBag where
+ show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+emptyMediaBag :: MediaBag
+emptyMediaBag = MediaBag M.empty
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe String -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
+ -> MediaBag
-> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+ MediaBag (M.insert fp (mime, contents) mediamap)
+ where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+ fallback = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ _ -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
-> MediaBag
-insertMedia = M.insert
+ -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+ M.foldWithKey (\fp (mime,contents) ->
+ ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory. Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+ -> FilePath
+ -> MediaBag
+ -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+ sequence_ $ M.foldWithKey
+ (\fp (_ ,contents) ->
+ ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> joinPath (splitPath subpath)
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ when verbose $ warn $ "extracting " ++ fullpath
+ BL.writeFile fullpath bs
--
-- Date/time
@@ -803,13 +857,9 @@ fetchItem sourceURL s
fetchItem' :: MediaBag -> Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem' media sourceURL s = do
- case M.lookup s media of
+ case lookupMedia s media of
Nothing -> fetchItem sourceURL s
- Just bs -> do
- let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- return $ Right (BS.concat $ toChunks bs, mime)
+ Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f222b8b8..62dd70e73 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -793,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained emptyMediaBag Nothing $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 494669fd5..0eae20e22 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -12,6 +12,7 @@ import qualified Data.ByteString.Base64 as B64
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Writers.Native (writeNative)
import qualified Data.Map as M
+import Text.Pandoc.Shared (lookupMedia)
-- We define a wrapper around pandoc that doesn't normalize in the
-- tests. Since we do our own normalization, we want to make sure
@@ -60,8 +61,8 @@ testCompareMediaIO name docxFile mediaPath mediaFile = do
df <- B.readFile docxFile
mf <- B.readFile mediaFile
let (_, mb) = readDocx def df
- dBytes = case M.lookup mediaPath mb of
- Just bs -> bs
+ dBytes = case lookupMedia mediaPath mb of
+ Just (_,bs) -> bs
Nothing -> error "Media file not found"
d64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks dBytes
m64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks mf