From 0fa753b99963d01508d311598a55ac44f0f7a198 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 13 May 2015 14:52:51 -0700 Subject: EPUB writer: Properly handle image URLs without an extension. We now look at the mime type from the server and attach an appropriate extension. Closes #1855. --- src/Text/Pandoc/Writers/EPUB.hs | 78 ++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 37c285dc2..a1a0878e9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) @@ -60,14 +60,14 @@ import Text.Pandoc.Walk (walk, walkM) import Data.Default import Text.Pandoc.Writers.Markdown (writePlain) import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, mplus, liftM, when) +import Control.Monad (mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -378,17 +378,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mediaRef <- newIORef [] Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= walkM (transformBlock opts' mediaRef) - pics <- readIORef mediaRef - let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem' (writerMediaBag opts') - (writerSourceURL opts') oldsrc - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return entries - Right (img,_) -> return $ - (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries - picEntries <- foldM readPicEntry [] pics + picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef -- handle fonts let matchingGlob f = do @@ -794,59 +784,75 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Tag String -> IO (Tag String) -transformTag mediaRef tag@(TagOpen name attr) +transformTag opts mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef mediaRef src - newposter <- modifyMediaRef mediaRef poster + newsrc <- modifyMediaRef opts mediaRef src + newposter <- modifyMediaRef opts mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ tag = return tag - -modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath -modifyMediaRef _ "" = return "" -modifyMediaRef mediaRef oldsrc = do +transformTag _ _ tag = return tag + +modifyMediaRef :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] + -> FilePath + -> IO FilePath +modifyMediaRef _ _ "" = return "" +modifyMediaRef opts mediaRef oldsrc = do media <- readIORef mediaRef case lookup oldsrc media of - Just n -> return n - Nothing -> do - let new = "media/file" ++ show (length media) ++ - takeExtension (takeWhile (/='?') oldsrc) -- remove query - modifyIORef mediaRef ( (oldsrc, new): ) + Just (n,_) -> return n + Nothing -> do + res <- fetchItem' (writerMediaBag opts) + (writerSourceURL opts) oldsrc + (new, mbEntry) <- + case res of + Left _ -> do + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." + return (oldsrc, Nothing) + Right (img,mbMime) -> do + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + epochtime <- floor `fmap` getPOSIXTime + let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + return (new, Just entry) + modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) return new transformBlock :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Block -> IO Block -transformBlock _ mediaRef (RawBlock fmt raw) +transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline _ mediaRef (Image lab (src,tit)) = do - newsrc <- modifyMediaRef mediaRef src +transformInline opts mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef opts mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ mediaRef (RawInline fmt raw) +transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x -- cgit v1.2.3