diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 683 |
1 files changed, 398 insertions, 285 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 00bf4a81c..7b4853a24 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,9 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2015 John MacFarlane + Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,47 +31,47 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} -module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) -import qualified Data.Map as M -import qualified Data.Set as Set -import Data.Maybe ( fromMaybe, catMaybes ) -import Data.List ( isPrefixOf, isInfixOf, intercalate ) -import System.Environment ( getEnv ) -import Text.Printf (printf) -import System.FilePath ( takeExtension, takeFileName ) -import System.FilePath.Glob ( namesMatching ) -import Network.HTTP ( urlEncode ) +module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where +import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, + fromArchive, fromEntry, toEntry) +import Control.Monad (mplus, unless, when, zipWithM) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, + gets, lift, modify, put) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Data.Time.Clock.POSIX ( getPOSIXTime ) -import Text.Pandoc.Compat.Time -import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim - , normalizeDate, readDataFile, stringify, warn - , hierarchicalize, fetchItem' ) -import qualified Text.Pandoc.Shared as S (Element(..)) +import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.List (intercalate, isInfixOf, isPrefixOf) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) +import qualified Data.Set as Set +import qualified Data.Text as TS +import qualified Data.Text.Lazy as TL +import Network.HTTP (urlEncode) +import System.FilePath (takeExtension, takeFileName, makeRelative) +import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options ( WriterOptions(..) - , WrapOption(..) - , HTMLMathMethod(..) - , EPUBVersion(..) - , ObfuscationMethod(NoObfuscation) ) +import Text.Pandoc.Class (PandocMonad, report) +import qualified Text.Pandoc.Class as P +import Text.Pandoc.Compat.Time import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk, walkM, query) -import Control.Monad.State (modify, get, State, put, evalState) -import Control.Monad (mplus, liftM, when) -import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs - , strContent, lookupAttr, Node(..), QName(..), parseXML - , onlyElems, node, ppElement) -import Text.Pandoc.UUID (getRandomUUID) -import Text.Pandoc.Writers.HTML ( writeHtml ) -import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import qualified Control.Exception as E -import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) +import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), + ObfuscationMethod (NoObfuscation), WrapOption (..), + WriterOptions (..)) +import Text.Pandoc.Shared (hierarchicalize, normalizeDate, renderTags', + safeRead, stringify, trim, uniqueIdent) +import qualified Text.Pandoc.Shared as S (Element (..)) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.UUID (getUUID) +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) +import Text.Printf (printf) +import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), + add_attrs, lookupAttr, node, onlyElems, parseXML, + ppElement, showElement, strContent, unode, unqual) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -76,51 +79,55 @@ import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBState = EPUBState { + stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] + , stEpubSubdir :: String + } + +type E m = StateT EPUBState m + data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheet :: Maybe Stylesheet - , epubPageDirection :: Maybe ProgressionDirection + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheets :: [FilePath] + , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] } deriving Show -data Stylesheet = StylesheetPath FilePath - | StylesheetContents String - deriving Show - data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: String + , dateEvent :: Maybe String } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: String + , identifierScheme :: Maybe String } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String } deriving Show data ProgressionDirection = LTR | RTL deriving Show @@ -143,15 +150,29 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry +mkEntry path content = do + epubSubdir <- gets stEpubSubdir + let addEpubSubdir :: Entry -> Entry + addEpubSubdir e = e{ eRelativePath = + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ eRelativePath e } + epochtime <- floor <$> lift P.getPOSIXTime + return $ + (if path == "mimetype" || "META-INF" `isPrefixOf` path + then id + else addEpubSubdir) $ toEntry path epochtime content + +getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) then do - randomId <- fmap show getRandomUUID + randomId <- (show . getUUID) <$> lift P.newStdGen return $ m{ epubIdentifier = [Identifier randomId Nothing] } else return m let addLanguage m = @@ -159,16 +180,19 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- E.catch (liftM - (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: E.SomeException) in return "en-US") + mLang <- lift $ P.lookupEnv "LANG" + let localeLang = + case mLang of + Just lang -> + map (\c -> if c == '_' then '-' else c) $ + takeWhile (/='.') lang + Nothing -> "en-US" return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) then do - currentTime <- getCurrentTime + currentTime <- lift P.getCurrentTime return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -225,12 +249,16 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s +metaValueToString (MetaString s) = s metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs -metaValueToString (MetaBool True) = "true" -metaValueToString (MetaBool False) = "false" -metaValueToString _ = "" +metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaBool True) = "true" +metaValueToString (MetaBool False) = "false" +metaValueToString _ = "" + +metaValueToPaths:: MetaValue -> [FilePath] +metaValueToPaths (MetaList xs) = map metaValueToString xs +metaValueToPaths x = [metaValueToString x] getList :: String -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = @@ -267,19 +295,18 @@ getCreator s meta = getList s meta handleMetaValue getDate :: String -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = - Date{ dateText = maybe "" id $ + Date{ dateText = fromMaybe "" $ M.lookup "text" m >>= normalizeDate' . metaValueToString , dateEvent = metaValueToString <$> M.lookup "event" m } - handleMetaValue mv = Date { dateText = maybe "" - id $ normalizeDate' $ metaValueToString mv + handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } simpleList :: String -> Meta -> [String] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] + Just x -> [metaValueToString x] + Nothing -> [] metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ @@ -299,8 +326,9 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubCoverImage = coverImage - , epubStylesheet = stylesheet + , epubStylesheets = stylesheets , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -320,70 +348,125 @@ metadataFromMeta opts meta = EPUBMetadata{ rights = metaValueToString <$> lookupMeta "rights" meta coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) - stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` - ((StylesheetPath . metaValueToString) <$> - lookupMeta "stylesheet" meta) + stylesheets = fromMaybe [] + (metaValueToPaths <$> lookupMeta "stylesheet" meta) ++ + [f | ("css",f) <- writerVariables opts] pageDirection = case map toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing + ibooksFields = case lookupMeta "ibooks" meta of + Just (MetaMap mp) + -> M.toList $ M.map metaValueToString mp + _ -> [] + +-- | Produce an EPUB2 file from a Pandoc document. +writeEPUB2 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB2 = writeEPUB EPUB2 + +-- | Produce an EPUB3 file from a Pandoc document. +writeEPUB3 :: PandocMonad m + => WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> m B.ByteString +writeEPUB3 = writeEPUB EPUB3 -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: WriterOptions -- ^ Writer options +writeEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert - -> IO B.ByteString -writeEPUB opts doc@(Pandoc meta _) = do - let version = fromMaybe EPUB2 (writerEpubVersion opts) + -> m B.ByteString +writeEPUB epubVersion opts doc = do + let epubSubdir = writerEpubSubdirectory opts + -- sanity check on epubSubdir + unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + throwError $ PandocEpubSubdirectoryError epubSubdir + let initState = EPUBState { stMediaPaths = [], stEpubSubdir = epubSubdir } + evalStateT (pandocToEPUB epubVersion opts doc) initState + +pandocToEPUB :: PandocMonad m + => EPUBVersion + -> WriterOptions + -> Pandoc + -> E m B.ByteString +pandocToEPUB version opts doc@(Pandoc meta _) = do + epubSubdir <- gets stEpubSubdir let epub3 = version == EPUB3 - epochtime <- floor `fmap` getPOSIXTime - let mkEntry path content = toEntry path epochtime content + let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . + writeHtmlStringForEPUB version o + metadata <- getEPUBMetadata opts meta + + -- stylesheet + stylesheets <- case epubStylesheets metadata of + [] -> (\x -> [B.fromChunks [x]]) <$> + P.readDataFile "epub.css" + fs -> mapM P.readFileLazy fs + stylesheetEntries <- zipWithM + (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) + stylesheets [(1 :: Int)..] + let vars = ("epub3", if epub3 then "true" else "false") - : ("css", "stylesheet.css") - : writerVariables opts + : [(x,y) | (x,y) <- writerVariables opts, x /= "css"] + + let cssvars useprefix = map (\e -> ("css", + (if useprefix + then "../" + else "") + ++ makeRelative epubSubdir (eRelativePath e))) + stylesheetEntries + let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True - , writerHtml5 = epub3 , writerVariables = vars , writerHTMLMathMethod = if epub3 - then MathML Nothing + then MathML else writerHTMLMathMethod opts , writerWrapText = WrapAuto } - metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml - opts'{ writerVariables = ("coverpage","true"):vars } - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) - imgContent <- B.readFile img - return ( [mkEntry "cover.xhtml" cpContent] - , [mkEntry coverImage imgContent] ) + let coverImage = takeFileName img + cpContent <- lift $ writeHtml + opts'{ writerVariables = + ("coverpage","true"): + cssvars True ++ vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + imgContent <- lift $ P.readFileLazy img + coverEntry <- mkEntry "text/cover.xhtml" cpContent + coverImageEntry <- mkEntry ("media/" ++ coverImage) + imgContent + return ( [ coverEntry ] + , [ coverImageEntry ] ) -- title page - let tpContent = renderHtml $ writeHtml opts'{ - writerVariables = ("titlepage","true"):vars } - (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent + tpContent <- lift $ writeHtml opts'{ + writerVariables = ("titlepage","true"): + cssvars True ++ vars } + (Pandoc meta []) + tpEntry <- mkEntry "text/title_page.xhtml" tpContent -- handle pictures - mediaRef <- newIORef [] - Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= - walkM (transformBlock opts' mediaRef) - picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef - + -- mediaRef <- P.newIORef [] + Pandoc _ blocks <- walkM (transformInline opts') doc >>= + walkM transformBlock + picEntries <- (mapMaybe (snd . snd)) <$> gets stMediaPaths -- handle fonts let matchingGlob f = do - xs <- namesMatching f + xs <- lift $ P.glob f when (null xs) $ - warn $ f ++ " did not match any font files." + report $ CouldNotFetchResource f "glob did not match any font files" return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< + lift (P.readFileLazy f) fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') fontEntries <- mapM mkFontEntry fontFiles @@ -420,7 +503,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mbnum <- if "unnumbered" `elem` classes then return Nothing else case splitAt (n - 1) nums of - (ks, (m:_)) -> do + (ks, m:_) -> do let nums' = ks ++ [m+1] put nums' return $ Just (ks ++ [m]) @@ -467,77 +550,93 @@ writeEPUB opts doc@(Pandoc meta _) = do Chapter mbnum $ walk fixInternalReferences bs) chapters' - let chapToEntry :: Int -> Chapter -> Entry - chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) - $ renderHtml - $ writeHtml opts'{ writerNumberOffset = - fromMaybe [] mbnum } - $ case bs of - (Header _ _ xs : _) -> - -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> - Pandoc nullMeta bs - - let chapterEntries = zipWith chapToEntry [1..] chapters + let chapToEntry num (Chapter mbnum bs) = + mkEntry ("text/" ++ showChapter num) =<< + writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum + , writerVariables = cssvars True ++ vars } + (case bs of + (Header _ _ xs : _) -> + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs + _ -> Pandoc nullMeta bs) + + chapterEntries <- zipWithM chapToEntry [1..] chapters -- incredibly inefficient (TODO): let containsMathML ent = epub3 && - "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + "<math" `isInfixOf` + B8.unpack (fromEntry ent) let containsSVG ent = epub3 && - "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + "<svg" `isInfixOf` + B8.unpack (fromEntry ent) let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf let chapterNode ent = unode "item" ! - ([("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), + ([("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of - [] -> [] - xs -> [("properties", unwords xs)]) + [] -> [] + xs -> [("properties", unwords xs)]) $ () + let chapterRefNode ent = unode "itemref" ! - [("idref", toId $ eRelativePath ent)] $ () + [("idref", toId $ makeRelative epubSubdir + $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "application/octet-stream" + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + [("id", toId $ makeRelative epubSubdir + $ eRelativePath ent), + ("href", makeRelative epubSubdir + $ eRelativePath ent), + ("media-type", fromMaybe "" $ + getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle' meta of [] -> case epubTitle metadata of - [] -> "UNTITLED" + [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x let tocTitle = fromMaybe plainTitle $ metaValueToString <$> lookupMeta "toc-title" meta - let uuid = case epubIdentifier metadata of - (x:_) -> identifierText x -- use first identifier as UUID - [] -> error "epubIdentifier is null" -- shouldn't happen - currentTime <- getCurrentTime + uuid <- case epubIdentifier metadata of + (x:_) -> return $ identifierText x -- use first identifier as UUID + [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen + currentTime <- lift P.getCurrentTime let contentsData = UTF8.fromStringLazy $ ppTopElement $ - unode "package" ! [("version", case version of - EPUB2 -> "2.0" - EPUB3 -> "3.0") - ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ + unode "package" ! + ([("version", case version of + EPUB2 -> "2.0" + EPUB3 -> "3.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","epub-id-1") + ] ++ + [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $ [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () - , unode "item" ! [("id","style"), ("href","stylesheet.css") - ,("media-type","text/css")] $ () , unode "item" ! ([("id","nav") ,("href","nav.xhtml") ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ + [ unode "item" ! [("id","style"), ("href",fp) + ,("media-type","text/css")] $ () | + fp <- map + (makeRelative epubSubdir . eRelativePath) + stylesheetEntries ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ (case cpicEntry of [] -> [] @@ -546,7 +645,8 @@ writeEPUB opts doc@(Pandoc meta _) = do (pictureNode x)]) ++ map pictureNode picEntries ++ map fontNode fontEntries - , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $ + , unode "spine" ! ( + ("toc","ncx") : progressionDirection) $ case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! @@ -565,48 +665,54 @@ writeEPUB opts doc@(Pandoc meta _) = do ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing + [("type","cover") + ,("title","Cover") + ,("href","text/cover.xhtml")] $ () + | isJust (epubCoverImage metadata) ] ] - let contentsEntry = mkEntry "content.opf" contentsData + contentsEntry <- mkEntry "content.opf" contentsData -- toc.ncx let secs = hierarchicalize blocks' let tocLevel = writerTOCDepth opts - let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> S.Element -> State Int Element + let navPointNode :: PandocMonad m + => (Int -> [Inline] -> String -> [Element] -> Element) + -> S.Element -> StateT Int m Element navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) let showNums :: [Int] -> String showNums = intercalate "." . map show - let tit' = stringify ils let tit = if writerNumberSections opts && not (null nums) - then showNums nums ++ " " ++ tit' - else tit' - let src = case lookup ident reftable of - Just x -> x - Nothing -> error (ident ++ " not found in reftable") + then Span ("", ["section-header-number"], []) + [Str (showNums nums)] : Space : ils + else ils + src <- case lookup ident reftable of + Just x -> return x + Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable" let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel - isSec _ = False + isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk" - let navMapFormatter :: Int -> String -> String -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" ++ src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] + , unode "content" ! [("src", "text/title_page.xhtml")] + $ () ] + navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ @@ -623,34 +729,48 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> [] Just img -> [unode "meta" ! [("name","cover"), ("content", toId img)] $ ()] - , unode "docTitle" $ unode "text" $ plainTitle + , unode "docTitle" $ unode "text" plainTitle , unode "navMap" $ - tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 + tpNode : navMap ] - let tocEntry = mkEntry "toc.ncx" tocData + tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] - $ tit) + (unode "a" ! + [("href", "text/" ++ src)] + $ titElements) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] + where titElements = parseXML titRendered + titRendered = case P.runPure + (writeHtmlStringForEPUB version + opts{ writerTemplate = Nothing } + (Pandoc nullMeta + [Plain $ walk delink tit])) of + Left _ -> TS.pack $ stringify tit + Right x -> x + -- can't have a element inside a... + delink (Link _ ils _) = Span ("", [], []) ils + delink x = x let navtag = if epub3 then "nav" else "div" - let navBlocks = [RawBlock (Format "html") $ ppElement $ + tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1 + let navBlocks = [RawBlock (Format "html") + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarks = if epub3 then [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("hidden","hidden")] $ [ unode "ol" $ [ unode "li" - [ unode "a" ! [("href", "cover.xhtml") + [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ "Cover"] | epubCoverImage metadata /= Nothing @@ -664,52 +784,50 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ] else [] - let navData = renderHtml $ writeHtml - opts'{ writerVariables = ("navpage","true"):vars } + navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"): + cssvars False ++ vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) (navBlocks ++ landmarks)) - let navEntry = mkEntry "nav.xhtml" navData + navEntry <- mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" + mimetypeEntry <- mkEntry "mimetype" $ + UTF8.fromStringLazy "application/epub+zip" -- container.xml let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") + unode "rootfile" ! [("full-path", + (if null epubSubdir + then "" + else epubSubdir ++ "/") ++ "content.opf") ,("media-type","application/oebps-package+xml")] $ () - let containerEntry = mkEntry "META-INF/container.xml" containerData + containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" - let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple - - -- stylesheet - stylesheet <- case epubStylesheet metadata of - Just (StylesheetPath fp) -> UTF8.readFile fp - Just (StylesheetContents s) -> return s - Nothing -> UTF8.toString `fmap` - readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet + appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : - contentsEntry : tocEntry : navEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) + let archive = foldr addEntryToArchive emptyArchive $ + [mimetypeEntry, containerEntry, appleEntry, + contentsEntry, tocEntry, navEntry, tpEntry] ++ + stylesheetEntries ++ picEntries ++ cpicEntry ++ + cpgEntry ++ chapterEntries ++ fontEntries return $ fromArchive archive metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element metadataElement version md currentTime = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes + ++ languageNodes ++ ibooksNodes ++ creatorNodes ++ contributorNodes ++ subjectNodes ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes @@ -728,6 +846,8 @@ metadataElement version md currentTime = [] -> [] (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] + ibooksNodes = map ibooksNode (epubIbooksFields md) + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -747,7 +867,7 @@ metadataElement version md currentTime = ("content",toId img)] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3 ] + showDateTimeISO8601 currentTime | version == EPUB3 ] dcTag n s = unode ("dc:" ++ n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) @@ -791,99 +911,92 @@ metadataElement version md currentTime = (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix "ISBN-10" = "02" - schemeToOnix "GTIN-13" = "03" - schemeToOnix "UPC" = "04" - schemeToOnix "ISMN-10" = "05" - schemeToOnix "DOI" = "06" - schemeToOnix "LCCN" = "13" - schemeToOnix "GTIN-14" = "14" - schemeToOnix "ISBN-13" = "15" + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" schemeToOnix "Legal deposit number" = "17" - schemeToOnix "URN" = "22" - schemeToOnix "OCLC" = "23" - schemeToOnix "ISMN-13" = "25" - schemeToOnix "ISBN-A" = "26" - schemeToOnix "JP" = "27" - schemeToOnix "OLCC" = "28" - schemeToOnix _ = "01" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Tag String - -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag :: PandocMonad m + => Tag String + -> E m (Tag String) +transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && - lookup "data-external" attr == Nothing = do + isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef opts mediaRef src - newposter <- modifyMediaRef opts mediaRef poster + newsrc <- modifyMediaRef src + newposter <- modifyMediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", newsrc) | not (null newsrc)] ++ - [("poster", newposter) | not (null newposter)] + [("src", "../" ++ newsrc) | not (null newsrc)] ++ + [("poster", "../" ++ newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag - -modifyMediaRef :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] - -> FilePath - -> IO FilePath -modifyMediaRef _ _ "" = return "" -modifyMediaRef opts mediaRef oldsrc = do - media <- readIORef mediaRef +transformTag tag = return tag + +modifyMediaRef :: PandocMonad m + => FilePath + -> E m FilePath +modifyMediaRef "" = return "" +modifyMediaRef oldsrc = do + media <- gets stMediaPaths case lookup oldsrc media of Just (n,_) -> return n - Nothing -> do - res <- fetchItem' (writerMediaBag opts) - (writerSourceURL opts) oldsrc - (new, mbEntry) <- - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return (oldsrc, Nothing) - Right (img,mbMime) -> do - let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) - epochtime <- floor `fmap` getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img - return (new, Just entry) - modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) - return new - -transformBlock :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Block - -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) + Nothing -> catchError + (do (img, mbMime) <- P.fetchItem oldsrc + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + entry <- mkEntry new (B.fromChunks . (:[]) $ img) + modify $ \st -> st{ stMediaPaths = + (oldsrc, (new, Just entry)):media} + return new) + (\e -> do + report $ CouldNotFetchResource oldsrc (show e) + return oldsrc) + +transformBlock :: PandocMonad m + => Block + -> E m Block +transformBlock (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') -transformBlock _ _ b = return b +transformBlock b = return b -transformInline :: WriterOptions - -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media +transformInline :: PandocMonad m + => WriterOptions -> Inline - -> IO Inline -transformInline opts mediaRef (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef opts mediaRef src - return $ Image attr lab (newsrc, tit) -transformInline opts mediaRef (x@(Math t m)) + -> E m Inline +transformInline _opts (Image attr lab (src,tit)) = do + newsrc <- modifyMediaRef src + return $ Image attr lab ("../" ++ newsrc, tit) +transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef opts mediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef (url ++ urlEncode m) let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] -transformInline opts mediaRef (RawInline fmt raw) + return $ Span ("",["math",mathclass],[]) + [Image nullAttr [x] ("../" ++ newsrc, "")] +transformInline _opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -transformInline _ _ x = return x +transformInline _ x = return x (!) :: (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) @@ -898,8 +1011,8 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . let (ds,ys) = break (==';') xs rest = drop 1 ys in case safeRead ('\'':'\\':ds ++ "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs + Just x -> x : unEntity rest + Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs mediaTypeOf :: FilePath -> Maybe MimeType @@ -907,7 +1020,7 @@ mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y - _ -> Nothing + _ -> Nothing -- Returns filename for chapter number. showChapter :: Int -> String |