From 0f6458c0c13380969ccac82d54a0e68a3ec76200 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 May 2017 13:38:19 +0200 Subject: Don't double extract images from docx. This fixes a regression that was introduced when `--extract-media` was generalized to work with any input format. We were getting two versions of each image extracted from a docx, one with a hash, one with the original filename, though only the hash one was used. This patch restores the original behavior (using the original filename). Pointed out in comments on #3674. Thanks to @laperouse. --- src/Text/Pandoc/Class.hs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 4ef56ec33..8b2adc507 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -345,19 +345,24 @@ withPaths (p:ps) action fp = catchError (action (p fp)) (\_ -> withPaths ps action fp) --- | Traverse tree, filling media bag. +-- | Traverse tree, filling media bag for any images that +-- aren't already in the media bag. fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc fillMediaBag sourceURL d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError - (do (bs, mt) <- fetchItem sourceURL src - let ext = fromMaybe (takeExtension src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> ext - insertMedia fname mt bs' - return $ Image attr lab (fname, tit)) + (do mediabag <- getMediaBag + case lookupMedia src mediabag of + Just (_, _) -> return $ Image attr lab (src, tit) + Nothing -> do + (bs, mt) <- downloadOrRead sourceURL src + let ext = fromMaybe (takeExtension src) + (mt >>= extensionFromMimeType) + let bs' = BL.fromChunks [bs] + let basename = showDigest $ sha1 bs' + let fname = basename <.> ext + insertMedia fname mt bs' + return $ Image attr lab (fname, tit)) (\e -> do case e of PandocResourceNotFound _ -> do -- cgit v1.2.3