From 436a585c3bbf890d3a5df25cbc577376ee2fa5a1 Mon Sep 17 00:00:00 2001 From: Sergey Astanin Date: Sun, 22 Apr 2012 21:04:12 +0200 Subject: FB2: support images embedded as data URIs + tests. --- src/Tests/Old.hs | 1 + src/Text/Pandoc/Writers/FB2.hs | 70 ++++++++++++++++++++++++++++++++++-------- tests/fb2.images-embedded.fb2 | 2 ++ tests/fb2.images-embedded.html | 14 +++++++++ 4 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 tests/fb2.images-embedded.fb2 create mode 100644 tests/fb2.images-embedded.html diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs index c6952520c..0400b0324 100644 --- a/src/Tests/Old.hs +++ b/src/Tests/Old.hs @@ -111,6 +111,7 @@ tests = [ testGroup "markdown" [ fb2WriterTest "basic" [] "fb2.basic.markdown" "fb2.basic.fb2" , fb2WriterTest "titles" [] "fb2.titles.markdown" "fb2.titles.fb2" , fb2WriterTest "images" [] "fb2.images.markdown" "fb2.images.fb2" + , fb2WriterTest "images-embedded" [] "fb2.images-embedded.html" "fb2.images-embedded.fb2" , fb2WriterTest "tables" [] "tables.native" "tables.fb2" , fb2WriterTest "math" [] "fb2.math.markdown" "fb2.math.fb2" , fb2WriterTest "math-webtex" ["--webtex"] "fb2.math.markdown" "fb2.math-webtex.fb2" diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 924ffd819..0fbfb3968 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -28,8 +28,8 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad.State (StateT, evalStateT, get, modify) import Control.Monad.State (liftM, liftM2, liftIO) import Data.ByteString.Base64 (encode) -import Data.Char (toUpper, toLower, isSpace) -import Data.List (intersperse, intercalate) +import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.List (intersperse, intercalate, isPrefixOf) import Data.Either (lefts, rights) import Network.Browser (browse, request, setAllowRedirects, setOutHandler) import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) @@ -220,9 +220,15 @@ fetchImages links = do fetchImage :: String -> String -> IO (Either String Content) fetchImage href link = do mbimg <- - if isURI link - then fetchURL link - else do + case (isURI link, readDataURI link) of + (True, Just (mime,_,True,base64)) -> + let mime' = map toLower mime + in if mime' == "image/png" || mime' == "image/jpeg" + then return (Just (mime',base64)) + else return Nothing + (True, Just _) -> return Nothing -- not base64-encoded + (True, Nothing) -> fetchURL link + (False, _) -> do d <- nothingOnError $ B.readFile (unEscapeString link) let t = case map toLower (takeExtension link) of ".png" -> Just "image/png" @@ -230,15 +236,13 @@ fetchImage href link = do ".jpeg" -> Just "image/jpeg" ".jpe" -> Just "image/jpeg" _ -> Nothing -- only PNG and JPEG are supported in FB2 - return $ liftM2 (,) t d + return $ liftM2 (,) t (liftM (toStr . encode) d) case mbimg of Just (imgtype, imgdata) -> do - let encdata = encode imgdata - let encstr = map (toEnum . fromEnum) . B.unpack $ encdata return . Right $ el "binary" ( [uattr "id" href , uattr "content-type" imgtype] - , txt encstr ) + , txt imgdata ) _ -> return (Left ('#':href)) where nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) @@ -246,8 +250,45 @@ fetchImage href link = do omnihandler :: E.SomeException -> IO (Maybe B.ByteString) omnihandler _ = return Nothing +-- | Extract mime type and encoded data from the Data URI. +readDataURI :: String -- ^ URI + -> Maybe (String,String,Bool,String) + -- ^ Maybe (mime,charset,isBase64,data) +readDataURI uri = + let prefix = "data:" + in if not (prefix `isPrefixOf` uri) + then Nothing + else + let rest = drop (length prefix) uri + meta = takeWhile (/= ',') rest -- without trailing ',' + uridata = drop (length meta + 1) rest + parts = split (== ';') meta + (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts + in Just (mime,cs,enc,uridata) + where + upd str m@(mime,cs,enc) + | isMimeType str = (str,cs,enc) + | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc) + | str == "base64" = (mime,cs,True) + | otherwise = m + +-- Without parameters like ;charset=...; see RFC 2045, 5.1 +isMimeType :: String -> Bool +isMimeType s = + case split (=='/') s of + [mtype,msubtype] -> + ((map toLower mtype) `elem` types + || "x-" `isPrefixOf` (map toLower mtype)) + && all valid mtype + && all valid msubtype + _ -> False + where + types = ["text","image","audio","video","application","message","multipart"] + valid c = isAscii c && not (isControl c) && not (isSpace c) && + c `notElem` "()<>@,;:\\\"/[]?=" + -- | Fetch URL, return its Content-Type and binary data on success. -fetchURL :: String -> IO (Maybe (String, B.ByteString)) +fetchURL :: String -> IO (Maybe (String, String)) fetchURL url = do flip catchIO_ (return Nothing) $ do r <- browse $ do @@ -255,10 +296,15 @@ fetchURL url = do setAllowRedirects True liftM snd . request . getRequest $ url let content_type = lookupHeader HdrContentType (getHeaders r) - content <- liftM (Just . toBS) . getResponseBody $ Right r + content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r return $ liftM2 (,) content_type content where - toBS = B.pack . map (toEnum . fromEnum) + +toBS :: String -> B.ByteString +toBS = B.pack . map (toEnum . fromEnum) + +toStr :: B.ByteString -> String +toStr = map (toEnum . fromEnum) . B.unpack footnoteID :: Int -> String footnoteID i = "n" ++ (show i) diff --git a/tests/fb2.images-embedded.fb2 b/tests/fb2.images-embedded.fb2 new file mode 100644 index 000000000..373eda7ff --- /dev/null +++ b/tests/fb2.images-embedded.fb2 @@ -0,0 +1,2 @@ + +pandoc<p />

This image was embedded using data URI scheme

This image was embedded using data URI scheme

iVBORw0KGgoAAAANSUhEUgAAADAAAAAgCAIAAADbtmxLAAABmGlDQ1BpY2MAAHjapdG/axMBGMbxTy4tldJSwSAiHW4ootKCqIOrVShIkRIrJNUluUvaQi4Nd1dEXAQHF4cOXVRcLOLirJv4BygIggqCi7sUBRcpcbiDgtBBfOGF5/315eV9qR7qRUk2EpL087S+MB82mivh2BeBcUcddrwVZYOLS0uLDrRfH1Xgw1wvSjL/ZpNxJ4uohFiKBmlOZYC7t/JBTmUXtWitFROMYDZtNFcIzqDWLvRl1FYL3UAtXa5fIughXC30A4TtQr9AGK2lCcFbzMRJPybYxWScxDFVGE16m1G5ZwUTnf71aziPaXUsYB4h2tjEOnrIMVfG/QJyAC/GtKvYKFlRqQe4jbTkrGKtZM+WvZvI0CnjbtnfKb1XMtBoroR//yzrnjtbbDRxhdFvw+HP04w9Zu/+cPj76XC4t0P1M2+29+c3trnwnerWfm7mCVP3ePl6P9d+xqstjn0dtNIWitMG3S4/njPZ5Mh7xm/8b734Z1m384nlOyy+4+EjTnSZunkyzsP1ft5J+63eKWT1hXn4AzDofghlJQBJAAAACXBIWXMAAAsSAAALEgHS3X78AAACInpUWHRSYXcgcHJvZmlsZSB0eXBlIGV4aWYAAHjahVRJtiQhCNx7ijpCMIhyHNPU9/oGffxeaNY3p/5VC5IAQkAhtL9/evh8Pp+PiwaNKZubAYDuugNcMH4ZIAN6A6ATo68kdAA8VQ1DkoEIq2EILiBiALIQDTISWvz3SSQNJHwnilWTGgC/ZMSa1Fc8TDznZH4rgWOtRrwQKGh8VyNZ8bAY9Ccj1EGXUI0JwNE3n3itxrzis7Sq1TgBiNMwesKo1TjfcdZqXBaiWRpLrcbbEjBLU63G9QGv1bit+CSKWi2W8+3QLDluF/wIBgBEaNQWNSjFzHj7/zgOP92EBap3v2BqlNi2pEbGVi0yBNwkcRPiTVxIJDVLRgJxEXGLRgAgLBASkiIIwlIEYgyBJCnC4lKExN6yGSW6SD961nvvQaxhBZq4rbptbX1HlJPSokN37t9m9957a5utDux7Xwk06WnWWOJ2yqgkPqW4e2urnmNPK0HMtq0Hkkc7ZbSXUleHiNMIoGy7r/ppEwAIV+Amv1rS/3ghgCz23ns+m/HrASdJMWT2chsBiS2z73fcLGd+3E8hZ05nQ81zzOW2n8Saj1VzwTMHZ+g6xcPg5ozLASM7Z/hl9kaPnPFQmrcyvm8lFKbrAQwAoegtYFy34rEXRSFP/qEo4tmQ0wywlwPyG5G/BJQXvF5wOR4k7m9HjlupR/y6Mp42RjhWxm+Oh99BvMrwD3UCiGvkpxuRAAAACXZwQWcAAAAwAAAAIACELJ4GAAALGklEQVRYw11YW48dV1b+1tq7qs6t+/TldPsSx3bbcRwncWY0A4LMCOYFXpgHJCR4QvwAnpAQj/wB/gR/ACR4QUKDECMUEjLOZew4zsRxuu122+52n9Pnfuqy9/p4qNNtD1tLpVKpap/vrMu31rdl/nAgIjSpF0kAJAEHGJwAAIQheqeARVqomCQZGUkjSYLmTAyAkCJiESRVNYQAaL3h8k3SzPj6MjEzsr7SqyoAcRABQItRVZ14kqifmNEkTZNiUZiZ934xm4WE3mva8GYGAA5UJ8YYIyCipiKk1ZuTBAhQBGZQFRKvTAgBaQBIeOekBqpa+0gBxFgAlmhqhixJzUKMRVEUx/3RWmel3z9xzrVX2+tuRb3WnrDKVFUEgKlCVWI055WMZ76AieipX0AYzYwQUZqZqJD0tKCqFJAxhKiqSZKooirj06fPMt/odlezhngFYar+0e7jxWLR6/Vc7ubzpNnM4BlDSDLvvQ8hnAYFIlCFRUDEjASgIGFGEcBIoSrMCFAVhgiKTxJPkgJVSeBBhKqaTCb5Ip4cD/efPP/g9q2soSvdTp7nAuv2Vk/2BoHFyWgRYxVjbDezRiMDMicNxqCqqhIZVZxZVK3zaRkgXYaMdeDMoMoapSpo8GSE1t6ihaCqPs3WV/VwPpxOp8fHRw++cUmq7XYzSbLV1W5vu3fw7Emn0xwcD+ZzF8vKQtlI09R7J6SXOldgEDEwAmdguMyYV/lEVWEkARExEkoVBxHWxeUSFefK6fTJ48dlme/sXN7a3vjVnf8NIQ6Hk1ajOZ+Nf/3ZZ9Uij0V+fedqM0v2nz5ut7LtCz1GC2WVJqmry1KgZKLqlbo0qLPanJrAVKgSRX/LPElxQF0smiDy6Ojo7pd3qXLjxo2r1y8fnwxORpN8Nt/u9fLF5OTFcRVKDWE0HJZF2O71IsOzZ0+e7D3tdDo7168miU9SJ6IhBIGD2KuKMpHaPwoFXkXq9Aak+/u//TsFRCEgEAljjFtbW+UiHPb765sbJ8M+Y9HtdAbDgWs0pSge7O1u9Tar+Xwwmy0m03I0XpgbjydZlk2n43armaVOJIqQDEIIIKBABK/iBYuAnT4HwJrx1CkhRosAIRBBo93ImunO25dv/eBmb231+rUrRTH/we1ba+udhw/uSoZGOzt38Vx/1D85folYLhazqpg3mj5ruNWVTqPhxcEsAOa8ihJiAhMEQVChCgXmhKcWT41O6IGq5kCjqPOTk9F4PF7rrlvKRtQHjx/d2Lk+mgy+evrwhzs3Xuzv0tmbb5z79LNPzm9utdorEoNkzPPx+x+812o10tR7DyCqUwCMQUUIUqm2JOolbb/GjmbwoDGS9GZBnYM4IS0E5+TChS1Nk7Z28uPhW29euvPRf2fN1pdffNE1vXnzxsGgf/B036duvbce5tXG2uZkfnL79q3t8+sA4Rws0CJIAOI8LADUusLqeC0rTl4RJgyg1Fe+fB7yUrxTQoRWVoExbWTjl6P5fD7dO6yIF4P+bDK/92T3hzffLWGffPq5c7K1uf7hj3+PrLqbqxffuDQc9VdWVtY31846l1CFqJlalk8gr/UxAIzLLrZkbVK5WHgHhFIYmS+K6ZhFkY/Hi/5gPOhLUzvnu73L59KVxjyffXH314vFIsbYaTXeuXFtcHx4sPd9S+3l84PMwTsyBrMgQhUIgrEUREWUU4O8uhcLdW45BsWpiVRgoSyYTxYn/ee7e9/f/2Z8eOwYm8pko4EktDvpzOZXL5z3Dd3b2/vi/r1G0795cat//GxrvWPTaW+lvdVdXUkTCaVagAUrF6wKVSAGxIAQJEZYiVjSSrBSq4SVWCFWKINYpQzK4FHmhqjOgbF/+Hx0dJyXtv/46ZuXtrrd1V6ruRhNJqPZlfbq5999/Dt/+LNf/tf/nEyH585t73338OrVS3E8Pnl+MJnOi7i4fG1no7cJ79Sh7vPISwCwZYCERN3TjQCFZFxGE3FJV97iXCGIKBZlcyV799zbIdhsNnvyaP/x3r6Z+Tx02i6Kv/7WO8bFX/3ZT37xyZ2yf9Ta2n73vZ1vf/Xlv338+R/97MOVjc63X99fbXduvfuONBJTAlDxiBGAWKQZSVn2BZ4lDYwAlDQzkF7LEmYQyUSTdkvTzNuiZLz25vlOt3N0cPTwxcH7W2/9+NbN0Uef3Pvs6w/+9KcraePb3+z+6NKF+cHLvvN/89d/2SQsTSdr3clwMh0M1nvr0ki4yI0iQgBSu8dYOwxm4BIEyTrTEY1mHrNpZD3KiMIhlLEokljKWueyytsX3ji/2rqzuzt4Obh95eLd3+ztf7Xv1e2sp8OXw73h8Od//FNNtIh5Rqx322tZWoWimo1c7hQi4mgBxtPpA3VBkXQ0M8BOh5VoS3D8j3+CACqsR4EYzUyIalqaCqCxyIsB/uFf//lPfv93R/2jf/z4m0Zmf/6j977af/wXH/7B6ubK+dVV2+g6D0Yzg4OoA0kRrYoiEXcarFfzq7IueJKsYZktAXl7MRURiARakiQ08xAACHCGybj/i3v3ZoNyDa3/vPPg59cuf/fi4GKjPRmH99cuHT5+Ouv3ipV5b2ve2lxRdfPJLMa4ttKxEEl68ZH52bh4xkDhbLI2OYNiBpL+cHfmnKvfq2nf+eWXi2L+6PgQh42nk+GVc1uf7h9+f1T8ZOftXz76flu7L07Gs7FttCYvknH74PiDt24kjWa/PyvLElupJ2OMZFXBROS3hnoz8sxBS9+YYemhrw9L55xCFCYSalgR7Ehy/+DFnChC0kq3m37j9nr2L7uPnrw8UBb//t2emGsnybW8OW9M3uhc+fbprNWW0Qwh6MwKAQGEEIhlpccYSVLl/wkPM4unqEj6+7MyUZeIpj6pRxOJjDGO5qNpbNLsWTiSyt/dex5mR1Ui+9Phxe72w+moI1mmViSbG7xQNNpHlrhKBnkkcbIwi5WZGegpgMYYq6oCoKoiamZAfB1TWCoq+o/2DxzEiU/TNHXeBCGEqqqKGKGIDPOFTedDY8x8M5HEIRlXi5D7TtrsVpv5cDbZyB69eL7a6IjIfD6NNBHJy1A3tcRJCOV4Nmq1OiqZUgFznonzZVWIECoqXjWpijJGevVipFkZqipYKt6JE1XXjj5tJHk+n41jptrtrifqptNpq9WaTMcqyXon9VlqxHgyi5GjWa6qeZ5DJcsyM6OomVm04WQwnY3XIGkCQGEhzktjORoN8yp3zqVpI0lSUNXB59OhiChE1RdaSK0XSTNqoSSbmQe8F8Bi4jRN00WRr3TY7jSzJAEAQZIkCBUQo1aqmtSTHmJkrGApQtu7lodKKXDQGCyaaJo4mgeAEEPM667sd59/dyoQpUYDUQBCpZiqqmrNIgCEIKlgWS1OTl4OYl9gRoo40VgrRhGpP0G9qMZgFieTKgaKOFXQgolaCE7MBOrgnEO0aKWfLI7rWRaAnC4ATv3ZpiLiVQFYoCmzLAuhHI5eMsInIBkNUXTZUAERAVlvFSrziZoZFxOaaK3HAJOw1K9ArQXKMl/MJ7Ld6Z0l/1m86qMCGEXEe7+EW0+g4iFljCHxLQGcWq0NUp/Ur52K+Vc3ohrrFrs8PzhlJgBiKt7MYoxVKIpiIRudtdc9QSxheTPnHIAYCaNzTlXNzKg+YWRQpgKQRjGIB8Pyb0BJnv1ikFoYOwBEBKAEqFQlaRalFtqkc440n3IZrNp7Z4LExMpgy4SAVBaFVp9zhBCIaBQnDqhPSQgKIAJPksal+AG8ogpRnQCIUVRFoSEE9QIaLNCpqiPFQUK0/wPxadi/ncvxsAAAACV0RVh0ZGF0ZTpjcmVhdGUAMjAxMS0wMi0yOFQwMjo1NTowMiswMTowMGbLlncAAAAldEVYdGRhdGU6bW9kaWZ5ADIwMTEtMDItMjhUMDI6NTU6MDIrMDE6MDAXli7LAAAAEXRFWHRqcGVnOmNvbG9yc3BhY2UAMix1VZ8AAAAgdEVYdGpwZWc6c2FtcGxpbmctZmFjdG9yADF4MSwxeDEsMXgx6ZX8cAAAAABJRU5ErkJggg==
\ No newline at end of file diff --git a/tests/fb2.images-embedded.html b/tests/fb2.images-embedded.html new file mode 100644 index 000000000..19c8f7c7a --- /dev/null +++ b/tests/fb2.images-embedded.html @@ -0,0 +1,14 @@ + + + + + + + + + +
+This image was embedded using data URI scheme

This image was embedded using data URI scheme

+
+ + -- cgit v1.2.3