diff options
author | Kiwamu Okabe <kiwamu@debian.or.jp> | 2012-02-28 01:49:16 +0900 |
---|---|---|
committer | Kiwamu Okabe <kiwamu@debian.or.jp> | 2012-02-28 01:49:16 +0900 |
commit | b27a86d9758512d85d052a9a46917a54eb3da035 (patch) | |
tree | 43171a5211b250025cc85e6994a6a516aad5668f /src/Text/Pandoc/Writers/EPUB.hs | |
parent | 6ac52ff209b172129452cd464d840b3c73c85c01 (diff) |
Imported Upstream version 1.9.1.1
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 177 |
1 files changed, 112 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9fc393fed..67048348e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -32,30 +32,31 @@ import Data.IORef import Data.Maybe ( fromMaybe, isNothing ) import Data.List ( findIndices, isPrefixOf ) import System.Environment ( getEnv ) -import System.FilePath ( (</>), takeBaseName, takeExtension ) +import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip -import System.Time +import Data.Time.Clock.POSIX import Text.Pandoc.Shared hiding ( Element ) import Text.Pandoc.Definition import Text.Pandoc.Generic -import Control.Monad (liftM) +import Control.Monad.State import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) -import System.Directory ( copyFile ) import Network.URI ( unEscapeString ) +import Text.Pandoc.MIME (getMimeType) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line + -> [FilePath] -- ^ Paths to fonts to embed -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do - (TOD epochtime _) <- getClockTime +writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do + epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerStandalone = True @@ -64,17 +65,24 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do let vars = writerVariables opts' let mbCoverImage = lookup "epub-cover-image" vars + titlePageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-titlepage" <.> "html" + + coverImageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-coverimage" <.> "html" + + pageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-page" <.> "html" + -- cover page (cpgEntry, cpicEntry) <- case mbCoverImage of Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img - copyFile img coverImage let cpContent = fromString $ writeHtmlString - opts'{writerTemplate = pageTemplate - ,writerVariables = - ("coverimage",coverImage):vars} + opts'{writerTemplate = coverImageTemplate, + writerVariables = ("coverimage",coverImage):vars} (Pandoc meta []) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] @@ -82,37 +90,47 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- title page let tpContent = fromString $ writeHtmlString - opts'{writerTemplate = pageTemplate - ,writerVariables = ("titlepage","yes"):vars} + opts'{writerTemplate = titlePageTemplate} (Pandoc meta []) let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM + Pandoc _ blocks <- bottomUpM (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> return e{ eRelativePath = newsrc } picEntries <- mapM readPicEntry pics + -- handle fonts + let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + fontEntries <- mapM mkFontEntry fonts + -- body pages let isH1 (Header 1 _) = True isH1 _ = False - let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks - let chunks = splitByIndices h1Indices blocks + -- internal reference IDs change when we chunk the file, + -- so the next two lines fix that: + let reftable = correlateRefs blocks + let blocks' = replaceRefs reftable blocks + let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks' + let chunks = splitByIndices h1Indices blocks' let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys titleize xs = Pandoc meta xs - let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapters = map titleize chunks + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapterToEntry :: Int -> Pandoc -> Entry chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ fromString $ chapToHtml chap let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf - lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") - (\_ -> return "en-US") + localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG") + (\_ -> return "en-US") + let lang = case lookup "lang" (writerVariables opts') of + Just x -> x + Nothing -> localeLang uuid <- getRandomUUID let chapterNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), @@ -125,17 +143,22 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" $ imageTypeOf $ eRelativePath ent)] $ () + let fontNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () let plainify t = removeTrailingSpace $ writePlain opts'{ writerStandalone = False } $ Pandoc meta [Plain t] let plainTitle = plainify $ docTitle meta let plainAuthors = map plainify $ docAuthors meta + let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta let contentsData = fromString $ ppTopElement $ unode "package" ! [("version","2.0") ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ [ metadataElement (writerEPUBMetadata opts') - uuid lang plainTitle plainAuthors mbCoverImage + uuid lang plainTitle plainAuthors plainDate mbCoverImage , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -143,7 +166,8 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("media-type","text/css")] $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) + map pictureNode (cpicEntry ++ picEntries) ++ + map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ case mbCoverImage of Nothing -> [] @@ -197,6 +221,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("media-type","application/oebps-package+xml")] $ () let containerEntry = mkEntry "META-INF/container.xml" containerData + -- com.apple.ibooks.display-options.xml + let apple = fromString $ ppTopElement $ + unode "display_options" $ + unode "platform" ! [("name","*")] $ + unode "option" ! [("name","specified-fonts")] $ "true" + let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple + -- stylesheet stylesheet <- case mbStylesheet of Just s -> return s @@ -205,13 +236,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- construct archive let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : contentsEntry : tocEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) ) + (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) ) return $ fromArchive archive -metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element -metadataElement metadataXML uuid lang title authors mbCoverImage = +metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element +metadataElement metadataXML uuid lang title authors date mbCoverImage = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ @@ -227,6 +258,7 @@ metadataElement metadataXML uuid lang title authors mbCoverImage = [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | not (elt `contains` "identifier") ] ++ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++ + [ unode "dc:date" date | not (elt `contains` "date") ] ++ [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | not (isNothing mbCoverImage) ] in elt{ elContent = elContent elt ++ map Elem newNodes } @@ -263,20 +295,24 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do "</ops:switch>" result = if "<math" `isPrefixOf` mathml then inOps else mathml return $ RawInline "html" result : xs -transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs -transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs transformInlines _ _ _ xs = return xs -transformBlock :: Block -> Block -transformBlock (RawBlock _ _) = Null -transformBlock x = x - (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) -- | Version of 'ppTopElement' that specifies UTF-8 encoding. ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement + -- unEntity removes numeric entities introduced by ppElement + -- (kindlegen seems to choke on these). + where unEntity [] = "" + unEntity ('&':'#':xs) = + let (ds,ys) = break (==';') xs + rest = drop 1 ys + in case reads ('\'':'\\':ds ++ "'") of + ((x,_):_) -> x : unEntity rest + _ -> '&':'#':unEntity xs + unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of @@ -288,38 +324,49 @@ imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of "svg" -> Just "image/svg+xml" _ -> Nothing -pageTemplate :: String -pageTemplate = unlines - [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" - , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" - , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" - , "<head>" - , "<title>$title$</title>" - , "$if(coverimage)$" - , "<style type=\"text/css\">img{ max-width: 100%; }</style>" - , "$endif$" - , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" - , "</head>" - , "<body>" - , "$if(coverimage)$" - , "<div id=\"cover-image\">" - , "<img src=\"$coverimage$\" alt=\"$title$\" />" - , "</div>" - , "$else$" - , "$if(titlepage)$" - , "<h1 class=\"title\">$title$</h1>" - , "$for(author)$" - , "<h2 class=\"author\">$author$</h2>" - , "$endfor$" - , "$else$" - , "<h1>$title$</h1>" - , "$if(toc)$" - , "$toc$" - , "$endif$" - , "$endif$" - , "$body$" - , "$endif$" - , "</body>" - , "</html>" - ] +data IdentState = IdentState{ + chapterNumber :: Int, + runningIdents :: [String], + chapterIdents :: [String], + identTable :: [(String,String)] + } deriving (Read, Show) + +-- Go through a block list and construct a table +-- correlating the automatically constructed references +-- that would be used in a normal pandoc document with +-- new URLs to be used in the EPUB. For example, what +-- was "header-1" might turn into "ch6.xhtml#header". +correlateRefs :: [Block] -> [(String,String)] +correlateRefs bs = identTable $ execState (mapM_ go bs) + IdentState{ chapterNumber = 0 + , runningIdents = [] + , chapterIdents = [] + , identTable = [] } + where go :: Block -> State IdentState () + go (Header n ils) = do + when (n == 1) $ + modify $ \s -> s{ chapterNumber = chapterNumber s + 1 + , chapterIdents = [] } + st <- get + let runningid = uniqueIdent ils (runningIdents st) + let chapid = if n == 1 + then Nothing + else Just $ uniqueIdent ils (chapterIdents st) + modify $ \s -> s{ runningIdents = runningid : runningIdents st + , chapterIdents = maybe (chapterIdents st) + (: chapterIdents st) chapid + , identTable = (runningid, "ch" ++ show (chapterNumber st) ++ + ".xhtml" ++ maybe "" ('#':) chapid) : identTable st + } + go _ = return () + +-- Replace internal link references using the table produced +-- by correlateRefs. +replaceRefs :: [(String,String)] -> [Block] -> [Block] +replaceRefs refTable = bottomUp replaceOneRef + where replaceOneRef x@(Link lab ('#':xs,tit)) = + case lookup xs refTable of + Just url -> Link lab (url,tit) + Nothing -> x + replaceOneRef x = x |