summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/EPUB.hs
diff options
context:
space:
mode:
authorKiwamu Okabe <kiwamu@debian.or.jp>2012-02-28 01:49:16 +0900
committerKiwamu Okabe <kiwamu@debian.or.jp>2012-02-28 01:49:16 +0900
commitb27a86d9758512d85d052a9a46917a54eb3da035 (patch)
tree43171a5211b250025cc85e6994a6a516aad5668f /src/Text/Pandoc/Writers/EPUB.hs
parent6ac52ff209b172129452cd464d840b3c73c85c01 (diff)
Imported Upstream version 1.9.1.1
Diffstat (limited to 'src/Text/Pandoc/Writers/EPUB.hs')
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs177
1 files changed, 112 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9fc393fed..67048348e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -32,30 +32,31 @@ import Data.IORef
import Data.Maybe ( fromMaybe, isNothing )
import Data.List ( findIndices, isPrefixOf )
import System.Environment ( getEnv )
-import System.FilePath ( (</>), takeBaseName, takeExtension )
+import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
-import System.Time
+import Data.Time.Clock.POSIX
import Text.Pandoc.Shared hiding ( Element )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
-import Control.Monad (liftM)
+import Control.Monad.State
import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
import Data.Char ( toLower )
-import System.Directory ( copyFile )
import Network.URI ( unEscapeString )
+import Text.Pandoc.MIME (getMimeType)
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
+ -> [FilePath] -- ^ Paths to fonts to embed
-> WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
-writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
- (TOD epochtime _) <- getClockTime
+writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
+ epochtime <- floor `fmap` getPOSIXTime
let mkEntry path content = toEntry path epochtime content
let opts' = opts{ writerEmailObfuscation = NoObfuscation
, writerStandalone = True
@@ -64,17 +65,24 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
let vars = writerVariables opts'
let mbCoverImage = lookup "epub-cover-image" vars
+ titlePageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-titlepage" <.> "html"
+
+ coverImageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-coverimage" <.> "html"
+
+ pageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-page" <.> "html"
+
-- cover page
(cpgEntry, cpicEntry) <-
case mbCoverImage of
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
- copyFile img coverImage
let cpContent = fromString $ writeHtmlString
- opts'{writerTemplate = pageTemplate
- ,writerVariables =
- ("coverimage",coverImage):vars}
+ opts'{writerTemplate = coverImageTemplate,
+ writerVariables = ("coverimage",coverImage):vars}
(Pandoc meta [])
imgContent <- B.readFile img
return ( [mkEntry "cover.xhtml" cpContent]
@@ -82,37 +90,47 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- title page
let tpContent = fromString $ writeHtmlString
- opts'{writerTemplate = pageTemplate
- ,writerVariables = ("titlepage","yes"):vars}
+ opts'{writerTemplate = titlePageTemplate}
(Pandoc meta [])
let tpEntry = mkEntry "title_page.xhtml" tpContent
-- handle pictures
picsRef <- newIORef []
- Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM
+ Pandoc _ blocks <- bottomUpM
(transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
pics <- readIORef picsRef
let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
return e{ eRelativePath = newsrc }
picEntries <- mapM readPicEntry pics
+ -- handle fonts
+ let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
+ fontEntries <- mapM mkFontEntry fonts
+
-- body pages
let isH1 (Header 1 _) = True
isH1 _ = False
- let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
- let chunks = splitByIndices h1Indices blocks
+ -- internal reference IDs change when we chunk the file,
+ -- so the next two lines fix that:
+ let reftable = correlateRefs blocks
+ let blocks' = replaceRefs reftable blocks
+ let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks'
+ let chunks = splitByIndices h1Indices blocks'
let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
titleize xs = Pandoc meta xs
- let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapters = map titleize chunks
+ let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapterToEntry :: Int -> Pandoc -> Entry
chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
fromString $ chapToHtml chap
let chapterEntries = zipWith chapterToEntry [1..] chapters
-- contents.opf
- lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
- (\_ -> return "en-US")
+ localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG")
+ (\_ -> return "en-US")
+ let lang = case lookup "lang" (writerVariables opts') of
+ Just x -> x
+ Nothing -> localeLang
uuid <- getRandomUUID
let chapterNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
@@ -125,17 +143,22 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
("href", eRelativePath ent),
("media-type", fromMaybe "application/octet-stream"
$ imageTypeOf $ eRelativePath ent)] $ ()
+ let fontNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
let plainify t = removeTrailingSpace $
writePlain opts'{ writerStandalone = False } $
Pandoc meta [Plain t]
let plainTitle = plainify $ docTitle meta
let plainAuthors = map plainify $ docAuthors meta
+ let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
let contentsData = fromString $ ppTopElement $
unode "package" ! [("version","2.0")
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
[ metadataElement (writerEPUBMetadata opts')
- uuid lang plainTitle plainAuthors mbCoverImage
+ uuid lang plainTitle plainAuthors plainDate mbCoverImage
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
@@ -143,7 +166,8 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("media-type","text/css")] $ ()
] ++
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
- map pictureNode (cpicEntry ++ picEntries)
+ map pictureNode (cpicEntry ++ picEntries) ++
+ map fontNode fontEntries
, unode "spine" ! [("toc","ncx")] $
case mbCoverImage of
Nothing -> []
@@ -197,6 +221,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("media-type","application/oebps-package+xml")] $ ()
let containerEntry = mkEntry "META-INF/container.xml" containerData
+ -- com.apple.ibooks.display-options.xml
+ let apple = fromString $ ppTopElement $
+ unode "display_options" $
+ unode "platform" ! [("name","*")] $
+ unode "option" ! [("name","specified-fonts")] $ "true"
+ let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+
-- stylesheet
stylesheet <- case mbStylesheet of
Just s -> return s
@@ -205,13 +236,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- construct archive
let archive = foldr addEntryToArchive emptyArchive
- (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+ (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
contentsEntry : tocEntry :
- (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) )
+ (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) )
return $ fromArchive archive
-metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element
-metadataElement metadataXML uuid lang title authors mbCoverImage =
+metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element
+metadataElement metadataXML uuid lang title authors date mbCoverImage =
let userNodes = parseXML metadataXML
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
@@ -227,6 +258,7 @@ metadataElement metadataXML uuid lang title authors mbCoverImage =
[ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
not (elt `contains` "identifier") ] ++
[ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++
+ [ unode "dc:date" date | not (elt `contains` "date") ] ++
[ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
not (isNothing mbCoverImage) ]
in elt{ elContent = elContent elt ++ map Elem newNodes }
@@ -263,20 +295,24 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
"</ops:switch>"
result = if "<math" `isPrefixOf` mathml then inOps else mathml
return $ RawInline "html" result : xs
-transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs
-transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
transformInlines _ _ _ xs = return xs
-transformBlock :: Block -> Block
-transformBlock (RawBlock _ _) = Null
-transformBlock x = x
-
(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
+ -- unEntity removes numeric entities introduced by ppElement
+ -- (kindlegen seems to choke on these).
+ where unEntity [] = ""
+ unEntity ('&':'#':xs) =
+ let (ds,ys) = break (==';') xs
+ rest = drop 1 ys
+ in case reads ('\'':'\\':ds ++ "'") of
+ ((x,_):_) -> x : unEntity rest
+ _ -> '&':'#':unEntity xs
+ unEntity (x:xs) = x : unEntity xs
imageTypeOf :: FilePath -> Maybe String
imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
@@ -288,38 +324,49 @@ imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
"svg" -> Just "image/svg+xml"
_ -> Nothing
-pageTemplate :: String
-pageTemplate = unlines
- [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
- , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
- , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
- , "<head>"
- , "<title>$title$</title>"
- , "$if(coverimage)$"
- , "<style type=\"text/css\">img{ max-width: 100%; }</style>"
- , "$endif$"
- , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
- , "</head>"
- , "<body>"
- , "$if(coverimage)$"
- , "<div id=\"cover-image\">"
- , "<img src=\"$coverimage$\" alt=\"$title$\" />"
- , "</div>"
- , "$else$"
- , "$if(titlepage)$"
- , "<h1 class=\"title\">$title$</h1>"
- , "$for(author)$"
- , "<h2 class=\"author\">$author$</h2>"
- , "$endfor$"
- , "$else$"
- , "<h1>$title$</h1>"
- , "$if(toc)$"
- , "$toc$"
- , "$endif$"
- , "$endif$"
- , "$body$"
- , "$endif$"
- , "</body>"
- , "</html>"
- ]
+data IdentState = IdentState{
+ chapterNumber :: Int,
+ runningIdents :: [String],
+ chapterIdents :: [String],
+ identTable :: [(String,String)]
+ } deriving (Read, Show)
+
+-- Go through a block list and construct a table
+-- correlating the automatically constructed references
+-- that would be used in a normal pandoc document with
+-- new URLs to be used in the EPUB. For example, what
+-- was "header-1" might turn into "ch6.xhtml#header".
+correlateRefs :: [Block] -> [(String,String)]
+correlateRefs bs = identTable $ execState (mapM_ go bs)
+ IdentState{ chapterNumber = 0
+ , runningIdents = []
+ , chapterIdents = []
+ , identTable = [] }
+ where go :: Block -> State IdentState ()
+ go (Header n ils) = do
+ when (n == 1) $
+ modify $ \s -> s{ chapterNumber = chapterNumber s + 1
+ , chapterIdents = [] }
+ st <- get
+ let runningid = uniqueIdent ils (runningIdents st)
+ let chapid = if n == 1
+ then Nothing
+ else Just $ uniqueIdent ils (chapterIdents st)
+ modify $ \s -> s{ runningIdents = runningid : runningIdents st
+ , chapterIdents = maybe (chapterIdents st)
+ (: chapterIdents st) chapid
+ , identTable = (runningid, "ch" ++ show (chapterNumber st) ++
+ ".xhtml" ++ maybe "" ('#':) chapid) : identTable st
+ }
+ go _ = return ()
+
+-- Replace internal link references using the table produced
+-- by correlateRefs.
+replaceRefs :: [(String,String)] -> [Block] -> [Block]
+replaceRefs refTable = bottomUp replaceOneRef
+ where replaceOneRef x@(Link lab ('#':xs,tit)) =
+ case lookup xs refTable of
+ Just url -> Link lab (url,tit)
+ Nothing -> x
+ replaceOneRef x = x