summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-05-13 14:52:51 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-05-13 14:52:51 -0700
commit0fa753b99963d01508d311598a55ac44f0f7a198 (patch)
tree448ecbce1e8a1ef43b1b6b2c89776fd17ea63794 /src
parent16133ed1ac664d2fddfad383ce16026e1234ebf9 (diff)
EPUB writer: Properly handle image URLs without an extension.
We now look at the mime type from the server and attach an appropriate extension. Closes #1855.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs78
1 files changed, 42 insertions, 36 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 37c285dc2..a1a0878e9 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to EPUB.
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
import qualified Data.Map as M
-import Data.Maybe ( fromMaybe )
+import Data.Maybe ( fromMaybe, catMaybes )
import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
@@ -60,14 +60,14 @@ import Text.Pandoc.Walk (walk, walkM)
import Data.Default
import Text.Pandoc.Writers.Markdown (writePlain)
import Control.Monad.State (modify, get, execState, State, put, evalState)
-import Control.Monad (foldM, mplus, liftM, when)
+import Control.Monad (mplus, liftM, when)
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
, strContent, lookupAttr, Node(..), QName(..), parseXML
, onlyElems, node, ppElement)
import Text.Pandoc.UUID (getRandomUUID)
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
-import Text.Pandoc.MIME (MimeType, getMimeType)
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
@@ -378,17 +378,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
mediaRef <- newIORef []
Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
walkM (transformBlock opts' mediaRef)
- pics <- readIORef mediaRef
- let readPicEntry entries (oldsrc, newsrc) = do
- res <- fetchItem' (writerMediaBag opts')
- (writerSourceURL opts') oldsrc
- case res of
- Left _ -> do
- warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
- return entries
- Right (img,_) -> return $
- (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
- picEntries <- foldM readPicEntry [] pics
+ picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef
-- handle fonts
let matchingGlob f = do
@@ -794,59 +784,75 @@ metadataElement version md currentTime =
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
-transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+transformTag :: WriterOptions
+ -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
-> Tag String
-> IO (Tag String)
-transformTag mediaRef tag@(TagOpen name attr)
+transformTag opts mediaRef tag@(TagOpen name attr)
| name `elem` ["video", "source", "img", "audio"] = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
- newsrc <- modifyMediaRef mediaRef src
- newposter <- modifyMediaRef mediaRef poster
+ newsrc <- modifyMediaRef opts mediaRef src
+ newposter <- modifyMediaRef opts mediaRef poster
let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
[("src", newsrc) | not (null newsrc)] ++
[("poster", newposter) | not (null newposter)]
return $ TagOpen name attr'
-transformTag _ tag = return tag
-
-modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
-modifyMediaRef _ "" = return ""
-modifyMediaRef mediaRef oldsrc = do
+transformTag _ _ tag = return tag
+
+modifyMediaRef :: WriterOptions
+ -> IORef [(FilePath, (FilePath, Maybe Entry))]
+ -> FilePath
+ -> IO FilePath
+modifyMediaRef _ _ "" = return ""
+modifyMediaRef opts mediaRef oldsrc = do
media <- readIORef mediaRef
case lookup oldsrc media of
- Just n -> return n
- Nothing -> do
- let new = "media/file" ++ show (length media) ++
- takeExtension (takeWhile (/='?') oldsrc) -- remove query
- modifyIORef mediaRef ( (oldsrc, new): )
+ Just (n,_) -> return n
+ Nothing -> do
+ res <- fetchItem' (writerMediaBag opts)
+ (writerSourceURL opts) oldsrc
+ (new, mbEntry) <-
+ case res of
+ Left _ -> do
+ warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
+ return (oldsrc, Nothing)
+ Right (img,mbMime) -> do
+ let new = "media/file" ++ show (length media) ++
+ fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
+ (('.':) <$> (mbMime >>= extensionFromMimeType))
+ epochtime <- floor `fmap` getPOSIXTime
+ let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
+ return (new, Just entry)
+ modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): )
return new
transformBlock :: WriterOptions
- -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+ -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
-> Block
-> IO Block
-transformBlock _ mediaRef (RawBlock fmt raw)
+transformBlock opts mediaRef (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag mediaRef) tags
+ tags' <- mapM (transformTag opts mediaRef) tags
return $ RawBlock fmt (renderTags' tags')
transformBlock _ _ b = return b
transformInline :: WriterOptions
- -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+ -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
-> Inline
-> IO Inline
-transformInline _ mediaRef (Image lab (src,tit)) = do
- newsrc <- modifyMediaRef mediaRef src
+transformInline opts mediaRef (Image lab (src,tit)) = do
+ newsrc <- modifyMediaRef opts mediaRef src
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
-transformInline _ mediaRef (RawInline fmt raw)
+transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag mediaRef) tags
+ tags' <- mapM (transformTag opts mediaRef) tags
return $ RawInline fmt (renderTags' tags')
transformInline _ _ x = return x