From f3a80034fff41a8b0c13519fa13bed794db1b8d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 30 Sep 2017 16:07:47 -0500 Subject: Removed writerSourceURL, add source URL to common state. Removed `writerSourceURL` from `WriterOptions` (API change). Added `stSourceURL` to `CommonState`. It is set automatically by `setInputFiles`. Text.Pandoc.Class now exports `setInputFiles`, `setOutputFile`. The type of `getInputFiles` has changed; it now returns `[FilePath]` instead of `Maybe [FilePath]`. Functions in Class that formerly took the source URL as a parameter now have one fewer parameter (`fetchItem`, `downloadOrRead`, `setMediaResource`, `fillMediaBag`). Removed `WriterOptions` parameter from `makeSelfContained` in `SelfContained`. --- src/Text/Pandoc/SelfContained.hs | 110 +++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 56 deletions(-) (limited to 'src/Text/Pandoc/SelfContained.hs') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 55df147b6..787ea1954 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,10 +42,11 @@ import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import Data.Char (isAlphaNum, isAscii, toLower) import Data.List (isPrefixOf) -import Network.URI (URI (..), escapeURIString, parseURI) +import Network.URI (escapeURIString) import System.FilePath (takeDirectory, takeExtension, ()) import Text.HTML.TagSoup -import Text.Pandoc.Class (PandocMonad (..), fetchItem, report) +import Text.Pandoc.Class (PandocMonad (..), fetchItem, report, + getInputFiles, setInputFiles) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) @@ -68,29 +69,29 @@ makeDataURI (mime, raw) = then mime ++ ";charset=utf-8" else mime -- mime type already has charset -convertTags :: PandocMonad m => Maybe String -> [Tag String] -> m [Tag String] -convertTags _ [] = return [] -convertTags sourceURL (t@TagOpen{}:ts) - | fromAttrib "data-external" t == "1" = (t:) <$> convertTags sourceURL ts -convertTags sourceURL (t@(TagOpen tagname as):ts) +convertTags :: PandocMonad m => [Tag String] -> m [Tag String] +convertTags [] = return [] +convertTags (t@TagOpen{}:ts) + | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts +convertTags (t@(TagOpen tagname as):ts) | tagname `elem` ["img", "embed", "video", "input", "audio", "source", "track"] = do as' <- mapM processAttribute as - rest <- convertTags sourceURL ts + rest <- convertTags ts return $ TagOpen tagname as' : rest where processAttribute (x,y) = if x == "src" || x == "data-src" || x == "href" || x == "poster" then do - enc <- getDataURI sourceURL (fromAttrib "type" t) y + enc <- getDataURI (fromAttrib "type" t) y return (x, enc) else return (x,y) -convertTags sourceURL (t@(TagOpen "script" as):TagClose "script":ts) = +convertTags (t@(TagOpen "script" as):TagClose "script":ts) = case fromAttrib "src" t of - [] -> (t:) <$> convertTags sourceURL ts + [] -> (t:) <$> convertTags ts src -> do let typeAttr = fromAttrib "type" t - res <- getData sourceURL typeAttr src - rest <- convertTags sourceURL ts + res <- getData typeAttr src + rest <- convertTags ts case res of Left dataUri -> return $ TagOpen "script" (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) : @@ -110,21 +111,21 @@ convertTags sourceURL (t@(TagOpen "script" as):TagClose "script":ts) = (("src",makeDataURI (mime, bs)) : [(x,y) | (x,y) <- as, x /= "src"]) : TagClose "script" : rest -convertTags sourceURL (t@(TagOpen "link" as):ts) = +convertTags (t@(TagOpen "link" as):ts) = case fromAttrib "href" t of - [] -> (t:) <$> convertTags sourceURL ts + [] -> (t:) <$> convertTags ts src -> do - res <- getData sourceURL (fromAttrib "type" t) src + res <- getData (fromAttrib "type" t) src case res of Left dataUri -> do - rest <- convertTags sourceURL ts + rest <- convertTags ts return $ TagOpen "link" (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) : rest Right (mime, bs) | "text/css" `isPrefixOf` mime && not (" do - rest <- convertTags sourceURL $ + rest <- convertTags $ dropWhile (==TagClose "link") ts return $ TagOpen "style" [("type", mime)] @@ -132,16 +133,16 @@ convertTags sourceURL (t@(TagOpen "link" as):ts) = : TagClose "style" : rest | otherwise -> do - rest <- convertTags sourceURL ts + rest <- convertTags ts return $ TagOpen "link" (("href",makeDataURI (mime, bs)) : [(x,y) | (x,y) <- as, x /= "href"]) : rest -convertTags sourceURL (t:ts) = (t:) <$> convertTags sourceURL ts +convertTags (t:ts) = (t:) <$> convertTags ts cssURLs :: PandocMonad m - => Maybe String -> FilePath -> ByteString -> m ByteString -cssURLs sourceURL d orig = do - res <- runParserT (parseCSSUrls sourceURL d) () "css" orig + => FilePath -> ByteString -> m ByteString +cssURLs d orig = do + res <- runParserT (parseCSSUrls d) () "css" orig case res of Left e -> do report $ CouldNotParseCSS (show e) @@ -149,17 +150,16 @@ cssURLs sourceURL d orig = do Right bs -> return bs parseCSSUrls :: PandocMonad m - => Maybe String -> FilePath -> ParsecT ByteString () m ByteString -parseCSSUrls sourceURL d = B.concat <$> P.many - (pCSSWhite <|> pCSSComment <|> pCSSImport sourceURL d <|> - pCSSUrl sourceURL d <|> pCSSOther) + => FilePath -> ParsecT ByteString () m ByteString +parseCSSUrls d = B.concat <$> P.many + (pCSSWhite <|> pCSSComment <|> pCSSImport d <|> pCSSUrl d <|> pCSSOther) -pCSSImport :: PandocMonad m => Maybe String -> FilePath - -> ParsecT ByteString () m ByteString -pCSSImport sourceURL d = P.try $ do +pCSSImport :: PandocMonad m + => FilePath -> ParsecT ByteString () m ByteString +pCSSImport d = P.try $ do P.string "@import" P.spaces - res <- (pQuoted <|> pUrl) >>= handleCSSUrl sourceURL d + res <- (pQuoted <|> pUrl) >>= handleCSSUrl d P.spaces P.char ';' P.spaces @@ -184,9 +184,9 @@ pCSSOther = do (B.singleton <$> P.char '/') pCSSUrl :: PandocMonad m - => Maybe String -> FilePath -> ParsecT ByteString () m ByteString -pCSSUrl sourceURL d = P.try $ do - res <- pUrl >>= handleCSSUrl sourceURL d + => FilePath -> ParsecT ByteString () m ByteString +pCSSUrl d = P.try $ do + res <- pUrl >>= handleCSSUrl d case res of Left b -> return b Right (mt,b) -> do @@ -215,41 +215,41 @@ pUrl = P.try $ do return (url, fallback) handleCSSUrl :: PandocMonad m - => Maybe String -> FilePath -> (String, ByteString) + => FilePath -> (String, ByteString) -> ParsecT ByteString () m (Either ByteString (MimeType, ByteString)) -handleCSSUrl sourceURL d (url, fallback) = do +handleCSSUrl d (url, fallback) = do -- pipes are used in URLs provided by Google Code fonts -- but parseURI doesn't like them, so we escape them: case escapeURIString (/='|') (trim url) of '#':_ -> return $ Left fallback 'd':'a':'t':'a':':':_ -> return $ Left fallback u -> do let url' = if isURI u then u else d u - res <- lift $ getData sourceURL "" url' + res <- lift $ getData "" url' case res of Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")") Right (mt, raw) -> do -- note that the downloaded CSS may -- itself contain url(...). b <- if "text/css" `isPrefixOf` mt - then cssURLs sourceURL d raw + then cssURLs d raw else return raw return $ Right (mt, b) -getDataURI :: PandocMonad m => Maybe String -> MimeType -> String -> m String -getDataURI sourceURL mimetype src = do - res <- getData sourceURL mimetype src +getDataURI :: PandocMonad m => MimeType -> String -> m String +getDataURI mimetype src = do + res <- getData mimetype src case res of Left uri -> return uri Right x -> return $ makeDataURI x getData :: PandocMonad m - => Maybe String -> MimeType -> String + => MimeType -> String -> m (Either String (MimeType, ByteString)) -getData _ _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri -getData sourceURL mimetype src = do +getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri +getData mimetype src = do let ext = map toLower $ takeExtension src - (raw, respMime) <- fetchItem sourceURL src + (raw, respMime) <- fetchItem src let raw' = if ext == ".gz" then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks $ [raw] @@ -259,15 +259,13 @@ getData sourceURL mimetype src = do $ "Could not determine mime type for `" ++ src ++ "'" (x, Nothing) -> return x (_, Just x ) -> return x - let cssSourceURL = case parseURI src of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriPath = "", - uriQuery = "", - uriFragment = "" } - _ -> Nothing result <- if "text/css" `isPrefixOf` mime - then cssURLs cssSourceURL (takeDirectory src) raw' + then do + oldInputs <- getInputFiles + setInputFiles [src] + res <- cssURLs (takeDirectory src) raw' + setInputFiles oldInputs + return res else return raw' return $ Right (mime, result) @@ -275,8 +273,8 @@ getData sourceURL mimetype src = do -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: PandocMonad m => WriterOptions -> String -> m String -makeSelfContained opts inp = do +makeSelfContained :: PandocMonad m => String -> m String +makeSelfContained inp = do let tags = parseTags inp - out' <- convertTags (writerSourceURL opts) tags + out' <- convertTags tags return $ renderTags' out' -- cgit v1.2.3