summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/SelfContained.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-09-30 16:07:47 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2017-09-30 16:11:20 -0500
commitf3a80034fff41a8b0c13519fa13bed794db1b8d2 (patch)
tree8355bd3c88b87b960171a65aad05b36e6822e8a4 /src/Text/Pandoc/SelfContained.hs
parent9b7d652ab7a0f4cdd86efd92f43f1b20724e8982 (diff)
Removed writerSourceURL, add source URL to common state.
Removed `writerSourceURL` from `WriterOptions` (API change). Added `stSourceURL` to `CommonState`. It is set automatically by `setInputFiles`. Text.Pandoc.Class now exports `setInputFiles`, `setOutputFile`. The type of `getInputFiles` has changed; it now returns `[FilePath]` instead of `Maybe [FilePath]`. Functions in Class that formerly took the source URL as a parameter now have one fewer parameter (`fetchItem`, `downloadOrRead`, `setMediaResource`, `fillMediaBag`). Removed `WriterOptions` parameter from `makeSelfContained` in `SelfContained`.
Diffstat (limited to 'src/Text/Pandoc/SelfContained.hs')
-rw-r--r--src/Text/Pandoc/SelfContained.hs110
1 files changed, 54 insertions, 56 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 55df147b6..787ea1954 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -42,10 +42,11 @@ import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy as L
import Data.Char (isAlphaNum, isAscii, toLower)
import Data.List (isPrefixOf)
-import Network.URI (URI (..), escapeURIString, parseURI)
+import Network.URI (escapeURIString)
import System.FilePath (takeDirectory, takeExtension, (</>))
import Text.HTML.TagSoup
-import Text.Pandoc.Class (PandocMonad (..), fetchItem, report)
+import Text.Pandoc.Class (PandocMonad (..), fetchItem, report,
+ getInputFiles, setInputFiles)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.MIME (MimeType)
@@ -68,29 +69,29 @@ makeDataURI (mime, raw) =
then mime ++ ";charset=utf-8"
else mime -- mime type already has charset
-convertTags :: PandocMonad m => Maybe String -> [Tag String] -> m [Tag String]
-convertTags _ [] = return []
-convertTags sourceURL (t@TagOpen{}:ts)
- | fromAttrib "data-external" t == "1" = (t:) <$> convertTags sourceURL ts
-convertTags sourceURL (t@(TagOpen tagname as):ts)
+convertTags :: PandocMonad m => [Tag String] -> m [Tag String]
+convertTags [] = return []
+convertTags (t@TagOpen{}:ts)
+ | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts
+convertTags (t@(TagOpen tagname as):ts)
| tagname `elem`
["img", "embed", "video", "input", "audio", "source", "track"] = do
as' <- mapM processAttribute as
- rest <- convertTags sourceURL ts
+ rest <- convertTags ts
return $ TagOpen tagname as' : rest
where processAttribute (x,y) =
if x == "src" || x == "data-src" || x == "href" || x == "poster"
then do
- enc <- getDataURI sourceURL (fromAttrib "type" t) y
+ enc <- getDataURI (fromAttrib "type" t) y
return (x, enc)
else return (x,y)
-convertTags sourceURL (t@(TagOpen "script" as):TagClose "script":ts) =
+convertTags (t@(TagOpen "script" as):TagClose "script":ts) =
case fromAttrib "src" t of
- [] -> (t:) <$> convertTags sourceURL ts
+ [] -> (t:) <$> convertTags ts
src -> do
let typeAttr = fromAttrib "type" t
- res <- getData sourceURL typeAttr src
- rest <- convertTags sourceURL ts
+ res <- getData typeAttr src
+ rest <- convertTags ts
case res of
Left dataUri -> return $ TagOpen "script"
(("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) :
@@ -110,21 +111,21 @@ convertTags sourceURL (t@(TagOpen "script" as):TagClose "script":ts) =
(("src",makeDataURI (mime, bs)) :
[(x,y) | (x,y) <- as, x /= "src"]) :
TagClose "script" : rest
-convertTags sourceURL (t@(TagOpen "link" as):ts) =
+convertTags (t@(TagOpen "link" as):ts) =
case fromAttrib "href" t of
- [] -> (t:) <$> convertTags sourceURL ts
+ [] -> (t:) <$> convertTags ts
src -> do
- res <- getData sourceURL (fromAttrib "type" t) src
+ res <- getData (fromAttrib "type" t) src
case res of
Left dataUri -> do
- rest <- convertTags sourceURL ts
+ rest <- convertTags ts
return $ TagOpen "link"
(("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) :
rest
Right (mime, bs)
| "text/css" `isPrefixOf` mime
&& not ("</" `B.isInfixOf` bs) -> do
- rest <- convertTags sourceURL $
+ rest <- convertTags $
dropWhile (==TagClose "link") ts
return $
TagOpen "style" [("type", mime)]
@@ -132,16 +133,16 @@ convertTags sourceURL (t@(TagOpen "link" as):ts) =
: TagClose "style"
: rest
| otherwise -> do
- rest <- convertTags sourceURL ts
+ rest <- convertTags ts
return $ TagOpen "link"
(("href",makeDataURI (mime, bs)) :
[(x,y) | (x,y) <- as, x /= "href"]) : rest
-convertTags sourceURL (t:ts) = (t:) <$> convertTags sourceURL ts
+convertTags (t:ts) = (t:) <$> convertTags ts
cssURLs :: PandocMonad m
- => Maybe String -> FilePath -> ByteString -> m ByteString
-cssURLs sourceURL d orig = do
- res <- runParserT (parseCSSUrls sourceURL d) () "css" orig
+ => FilePath -> ByteString -> m ByteString
+cssURLs d orig = do
+ res <- runParserT (parseCSSUrls d) () "css" orig
case res of
Left e -> do
report $ CouldNotParseCSS (show e)
@@ -149,17 +150,16 @@ cssURLs sourceURL d orig = do
Right bs -> return bs
parseCSSUrls :: PandocMonad m
- => Maybe String -> FilePath -> ParsecT ByteString () m ByteString
-parseCSSUrls sourceURL d = B.concat <$> P.many
- (pCSSWhite <|> pCSSComment <|> pCSSImport sourceURL d <|>
- pCSSUrl sourceURL d <|> pCSSOther)
+ => FilePath -> ParsecT ByteString () m ByteString
+parseCSSUrls d = B.concat <$> P.many
+ (pCSSWhite <|> pCSSComment <|> pCSSImport d <|> pCSSUrl d <|> pCSSOther)
-pCSSImport :: PandocMonad m => Maybe String -> FilePath
- -> ParsecT ByteString () m ByteString
-pCSSImport sourceURL d = P.try $ do
+pCSSImport :: PandocMonad m
+ => FilePath -> ParsecT ByteString () m ByteString
+pCSSImport d = P.try $ do
P.string "@import"
P.spaces
- res <- (pQuoted <|> pUrl) >>= handleCSSUrl sourceURL d
+ res <- (pQuoted <|> pUrl) >>= handleCSSUrl d
P.spaces
P.char ';'
P.spaces
@@ -184,9 +184,9 @@ pCSSOther = do
(B.singleton <$> P.char '/')
pCSSUrl :: PandocMonad m
- => Maybe String -> FilePath -> ParsecT ByteString () m ByteString
-pCSSUrl sourceURL d = P.try $ do
- res <- pUrl >>= handleCSSUrl sourceURL d
+ => FilePath -> ParsecT ByteString () m ByteString
+pCSSUrl d = P.try $ do
+ res <- pUrl >>= handleCSSUrl d
case res of
Left b -> return b
Right (mt,b) -> do
@@ -215,41 +215,41 @@ pUrl = P.try $ do
return (url, fallback)
handleCSSUrl :: PandocMonad m
- => Maybe String -> FilePath -> (String, ByteString)
+ => FilePath -> (String, ByteString)
-> ParsecT ByteString () m
(Either ByteString (MimeType, ByteString))
-handleCSSUrl sourceURL d (url, fallback) = do
+handleCSSUrl d (url, fallback) = do
-- pipes are used in URLs provided by Google Code fonts
-- but parseURI doesn't like them, so we escape them:
case escapeURIString (/='|') (trim url) of
'#':_ -> return $ Left fallback
'd':'a':'t':'a':':':_ -> return $ Left fallback
u -> do let url' = if isURI u then u else d </> u
- res <- lift $ getData sourceURL "" url'
+ res <- lift $ getData "" url'
case res of
Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")")
Right (mt, raw) -> do
-- note that the downloaded CSS may
-- itself contain url(...).
b <- if "text/css" `isPrefixOf` mt
- then cssURLs sourceURL d raw
+ then cssURLs d raw
else return raw
return $ Right (mt, b)
-getDataURI :: PandocMonad m => Maybe String -> MimeType -> String -> m String
-getDataURI sourceURL mimetype src = do
- res <- getData sourceURL mimetype src
+getDataURI :: PandocMonad m => MimeType -> String -> m String
+getDataURI mimetype src = do
+ res <- getData mimetype src
case res of
Left uri -> return uri
Right x -> return $ makeDataURI x
getData :: PandocMonad m
- => Maybe String -> MimeType -> String
+ => MimeType -> String
-> m (Either String (MimeType, ByteString))
-getData _ _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri
-getData sourceURL mimetype src = do
+getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri
+getData mimetype src = do
let ext = map toLower $ takeExtension src
- (raw, respMime) <- fetchItem sourceURL src
+ (raw, respMime) <- fetchItem src
let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw]
@@ -259,15 +259,13 @@ getData sourceURL mimetype src = do
$ "Could not determine mime type for `" ++ src ++ "'"
(x, Nothing) -> return x
(_, Just x ) -> return x
- let cssSourceURL = case parseURI src of
- Just u
- | uriScheme u `elem` ["http:","https:"] ->
- Just $ show u{ uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- _ -> Nothing
result <- if "text/css" `isPrefixOf` mime
- then cssURLs cssSourceURL (takeDirectory src) raw'
+ then do
+ oldInputs <- getInputFiles
+ setInputFiles [src]
+ res <- cssURLs (takeDirectory src) raw'
+ setInputFiles oldInputs
+ return res
else return raw'
return $ Right (mime, result)
@@ -275,8 +273,8 @@ getData sourceURL mimetype src = do
-- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs.
-makeSelfContained :: PandocMonad m => WriterOptions -> String -> m String
-makeSelfContained opts inp = do
+makeSelfContained :: PandocMonad m => String -> m String
+makeSelfContained inp = do
let tags = parseTags inp
- out' <- convertTags (writerSourceURL opts) tags
+ out' <- convertTags tags
return $ renderTags' out'