summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-05-18 13:38:19 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-05-18 13:38:19 +0200
commit0f6458c0c13380969ccac82d54a0e68a3ec76200 (patch)
tree5cd2ff7ec1f5cfaa4873753c02339200a5d8a72d
parent818d5c2f354cd4896659493452722c030ae7c766 (diff)
Don't double extract images from docx.
This fixes a regression that was introduced when `--extract-media` was generalized to work with any input format. We were getting two versions of each image extracted from a docx, one with a hash, one with the original filename, though only the hash one was used. This patch restores the original behavior (using the original filename). Pointed out in comments on #3674. Thanks to @laperouse.
-rw-r--r--src/Text/Pandoc/Class.hs23
1 files changed, 14 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 4ef56ec33..8b2adc507 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -345,19 +345,24 @@ withPaths (p:ps) action fp =
catchError (action (p </> fp))
(\_ -> withPaths ps action fp)
--- | Traverse tree, filling media bag.
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc
fillMediaBag sourceURL d = walkM handleImage d
where handleImage :: PandocMonad m => Inline -> m Inline
handleImage (Image attr lab (src, tit)) = catchError
- (do (bs, mt) <- fetchItem sourceURL src
- let ext = fromMaybe (takeExtension src)
- (mt >>= extensionFromMimeType)
- let bs' = BL.fromChunks [bs]
- let basename = showDigest $ sha1 bs'
- let fname = basename <.> ext
- insertMedia fname mt bs'
- return $ Image attr lab (fname, tit))
+ (do mediabag <- getMediaBag
+ case lookupMedia src mediabag of
+ Just (_, _) -> return $ Image attr lab (src, tit)
+ Nothing -> do
+ (bs, mt) <- downloadOrRead sourceURL src
+ let ext = fromMaybe (takeExtension src)
+ (mt >>= extensionFromMimeType)
+ let bs' = BL.fromChunks [bs]
+ let basename = showDigest $ sha1 bs'
+ let fname = basename <.> ext
+ insertMedia fname mt bs'
+ return $ Image attr lab (fname, tit))
(\e -> do
case e of
PandocResourceNotFound _ -> do