summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Astanin <s.astanin@gmail.com>2012-04-22 21:04:12 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2012-07-22 11:13:14 -0700
commit436a585c3bbf890d3a5df25cbc577376ee2fa5a1 (patch)
tree0c185be2f0477e25e515f91d74fdacb099cda69d
parenta9429e951cc86abd79efbd12b22d25b5203828db (diff)
FB2: support images embedded as data URIs + tests.
-rw-r--r--src/Tests/Old.hs1
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs70
-rw-r--r--tests/fb2.images-embedded.fb22
-rw-r--r--tests/fb2.images-embedded.html14
4 files changed, 75 insertions, 12 deletions
diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs
index c6952520c..0400b0324 100644
--- a/src/Tests/Old.hs
+++ b/src/Tests/Old.hs
@@ -111,6 +111,7 @@ tests = [ testGroup "markdown"
[ fb2WriterTest "basic" [] "fb2.basic.markdown" "fb2.basic.fb2"
, fb2WriterTest "titles" [] "fb2.titles.markdown" "fb2.titles.fb2"
, fb2WriterTest "images" [] "fb2.images.markdown" "fb2.images.fb2"
+ , fb2WriterTest "images-embedded" [] "fb2.images-embedded.html" "fb2.images-embedded.fb2"
, fb2WriterTest "tables" [] "tables.native" "tables.fb2"
, fb2WriterTest "math" [] "fb2.math.markdown" "fb2.math.fb2"
, fb2WriterTest "math-webtex" ["--webtex"] "fb2.math.markdown" "fb2.math-webtex.fb2"
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 924ffd819..0fbfb3968 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -28,8 +28,8 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
-import Data.Char (toUpper, toLower, isSpace)
-import Data.List (intersperse, intercalate)
+import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
+import Data.List (intersperse, intercalate, isPrefixOf)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody)
@@ -220,9 +220,15 @@ fetchImages links = do
fetchImage :: String -> String -> IO (Either String Content)
fetchImage href link = do
mbimg <-
- if isURI link
- then fetchURL link
- else do
+ case (isURI link, readDataURI link) of
+ (True, Just (mime,_,True,base64)) ->
+ let mime' = map toLower mime
+ in if mime' == "image/png" || mime' == "image/jpeg"
+ then return (Just (mime',base64))
+ else return Nothing
+ (True, Just _) -> return Nothing -- not base64-encoded
+ (True, Nothing) -> fetchURL link
+ (False, _) -> do
d <- nothingOnError $ B.readFile (unEscapeString link)
let t = case map toLower (takeExtension link) of
".png" -> Just "image/png"
@@ -230,15 +236,13 @@ fetchImage href link = do
".jpeg" -> Just "image/jpeg"
".jpe" -> Just "image/jpeg"
_ -> Nothing -- only PNG and JPEG are supported in FB2
- return $ liftM2 (,) t d
+ return $ liftM2 (,) t (liftM (toStr . encode) d)
case mbimg of
Just (imgtype, imgdata) -> do
- let encdata = encode imgdata
- let encstr = map (toEnum . fromEnum) . B.unpack $ encdata
return . Right $ el "binary"
( [uattr "id" href
, uattr "content-type" imgtype]
- , txt encstr )
+ , txt imgdata )
_ -> return (Left ('#':href))
where
nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString))
@@ -246,8 +250,45 @@ fetchImage href link = do
omnihandler :: E.SomeException -> IO (Maybe B.ByteString)
omnihandler _ = return Nothing
+-- | Extract mime type and encoded data from the Data URI.
+readDataURI :: String -- ^ URI
+ -> Maybe (String,String,Bool,String)
+ -- ^ Maybe (mime,charset,isBase64,data)
+readDataURI uri =
+ let prefix = "data:"
+ in if not (prefix `isPrefixOf` uri)
+ then Nothing
+ else
+ let rest = drop (length prefix) uri
+ meta = takeWhile (/= ',') rest -- without trailing ','
+ uridata = drop (length meta + 1) rest
+ parts = split (== ';') meta
+ (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
+ in Just (mime,cs,enc,uridata)
+ where
+ upd str m@(mime,cs,enc)
+ | isMimeType str = (str,cs,enc)
+ | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc)
+ | str == "base64" = (mime,cs,True)
+ | otherwise = m
+
+-- Without parameters like ;charset=...; see RFC 2045, 5.1
+isMimeType :: String -> Bool
+isMimeType s =
+ case split (=='/') s of
+ [mtype,msubtype] ->
+ ((map toLower mtype) `elem` types
+ || "x-" `isPrefixOf` (map toLower mtype))
+ && all valid mtype
+ && all valid msubtype
+ _ -> False
+ where
+ types = ["text","image","audio","video","application","message","multipart"]
+ valid c = isAscii c && not (isControl c) && not (isSpace c) &&
+ c `notElem` "()<>@,;:\\\"/[]?="
+
-- | Fetch URL, return its Content-Type and binary data on success.
-fetchURL :: String -> IO (Maybe (String, B.ByteString))
+fetchURL :: String -> IO (Maybe (String, String))
fetchURL url = do
flip catchIO_ (return Nothing) $ do
r <- browse $ do
@@ -255,10 +296,15 @@ fetchURL url = do
setAllowRedirects True
liftM snd . request . getRequest $ url
let content_type = lookupHeader HdrContentType (getHeaders r)
- content <- liftM (Just . toBS) . getResponseBody $ Right r
+ content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r
return $ liftM2 (,) content_type content
where
- toBS = B.pack . map (toEnum . fromEnum)
+
+toBS :: String -> B.ByteString
+toBS = B.pack . map (toEnum . fromEnum)
+
+toStr :: B.ByteString -> String
+toStr = map (toEnum . fromEnum) . B.unpack
footnoteID :: Int -> String
footnoteID i = "n" ++ (show i)
diff --git a/tests/fb2.images-embedded.fb2 b/tests/fb2.images-embedded.fb2
new file mode 100644
index 000000000..373eda7ff
--- /dev/null
+++ b/tests/fb2.images-embedded.fb2
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink"><description><title-info /><document-info><program-used>pandoc</program-used></document-info></description><body><title><p /></title><annotation><p></p></annotation><section><image l:href="#image1" l:type="imageType" alt="This image was embedded using data URI scheme" /><p>This image was embedded using data URI scheme</p></section></body><binary id="image1" content-type="image/png">iVBORw0KGgoAAAANSUhEUgAAADAAAAAgCAIAAADbtmxLAAABmGlDQ1BpY2MAAHjapdG/axMBGMbxTy4tldJSwSAiHW4ootKCqIOrVShIkRIrJNUluUvaQi4Nd1dEXAQHF4cOXVRcLOLirJv4BygIggqCi7sUBRcpcbiDgtBBfOGF5/315eV9qR7qRUk2EpL087S+MB82mivh2BeBcUcddrwVZYOLS0uLDrRfH1Xgw1wvSjL/ZpNxJ4uohFiKBmlOZYC7t/JBTmUXtWitFROMYDZtNFcIzqDWLvRl1FYL3UAtXa5fIughXC30A4TtQr9AGK2lCcFbzMRJPybYxWScxDFVGE16m1G5ZwUTnf71aziPaXUsYB4h2tjEOnrIMVfG/QJyAC/GtKvYKFlRqQe4jbTkrGKtZM+WvZvI0CnjbtnfKb1XMtBoroR//yzrnjtbbDRxhdFvw+HP04w9Zu/+cPj76XC4t0P1M2+29+c3trnwnerWfm7mCVP3ePl6P9d+xqstjn0dtNIWitMG3S4/njPZ5Mh7xm/8b734Z1m384nlOyy+4+EjTnSZunkyzsP1ft5J+63eKWT1hXn4AzDofghlJQBJAAAACXBIWXMAAAsSAAALEgHS3X78AAACInpUWHRSYXcgcHJvZmlsZSB0eXBlIGV4aWYAAHjahVRJtiQhCNx7ijpCMIhyHNPU9/oGffxeaNY3p/5VC5IAQkAhtL9/evh8Pp+PiwaNKZubAYDuugNcMH4ZIAN6A6ATo68kdAA8VQ1DkoEIq2EILiBiALIQDTISWvz3SSQNJHwnilWTGgC/ZMSa1Fc8TDznZH4rgWOtRrwQKGh8VyNZ8bAY9Ccj1EGXUI0JwNE3n3itxrzis7Sq1TgBiNMwesKo1TjfcdZqXBaiWRpLrcbbEjBLU63G9QGv1bit+CSKWi2W8+3QLDluF/wIBgBEaNQWNSjFzHj7/zgOP92EBap3v2BqlNi2pEbGVi0yBNwkcRPiTVxIJDVLRgJxEXGLRgAgLBASkiIIwlIEYgyBJCnC4lKExN6yGSW6SD961nvvQaxhBZq4rbptbX1HlJPSokN37t9m9957a5utDux7Xwk06WnWWOJ2yqgkPqW4e2urnmNPK0HMtq0Hkkc7ZbSXUleHiNMIoGy7r/ppEwAIV+Amv1rS/3ghgCz23ns+m/HrASdJMWT2chsBiS2z73fcLGd+3E8hZ05nQ81zzOW2n8Saj1VzwTMHZ+g6xcPg5ozLASM7Z/hl9kaPnPFQmrcyvm8lFKbrAQwAoegtYFy34rEXRSFP/qEo4tmQ0wywlwPyG5G/BJQXvF5wOR4k7m9HjlupR/y6Mp42RjhWxm+Oh99BvMrwD3UCiGvkpxuRAAAACXZwQWcAAAAwAAAAIACELJ4GAAALGklEQVRYw11YW48dV1b+1tq7qs6t+/TldPsSx3bbcRwncWY0A4LMCOYFXpgHJCR4QvwAnpAQj/wB/gR/ACR4QUKDECMUEjLOZew4zsRxuu122+52n9Pnfuqy9/p4qNNtD1tLpVKpap/vrMu31rdl/nAgIjSpF0kAJAEHGJwAAIQheqeARVqomCQZGUkjSYLmTAyAkCJiESRVNYQAaL3h8k3SzPj6MjEzsr7SqyoAcRABQItRVZ14kqifmNEkTZNiUZiZ934xm4WE3mva8GYGAA5UJ8YYIyCipiKk1ZuTBAhQBGZQFRKvTAgBaQBIeOekBqpa+0gBxFgAlmhqhixJzUKMRVEUx/3RWmel3z9xzrVX2+tuRb3WnrDKVFUEgKlCVWI055WMZ76AieipX0AYzYwQUZqZqJD0tKCqFJAxhKiqSZKooirj06fPMt/odlezhngFYar+0e7jxWLR6/Vc7ubzpNnM4BlDSDLvvQ8hnAYFIlCFRUDEjASgIGFGEcBIoSrMCFAVhgiKTxJPkgJVSeBBhKqaTCb5Ip4cD/efPP/g9q2soSvdTp7nAuv2Vk/2BoHFyWgRYxVjbDezRiMDMicNxqCqqhIZVZxZVK3zaRkgXYaMdeDMoMoapSpo8GSE1t6ihaCqPs3WV/VwPpxOp8fHRw++cUmq7XYzSbLV1W5vu3fw7Emn0xwcD+ZzF8vKQtlI09R7J6SXOldgEDEwAmdguMyYV/lEVWEkARExEkoVBxHWxeUSFefK6fTJ48dlme/sXN7a3vjVnf8NIQ6Hk1ajOZ+Nf/3ZZ9Uij0V+fedqM0v2nz5ut7LtCz1GC2WVJqmry1KgZKLqlbo0qLPanJrAVKgSRX/LPElxQF0smiDy6Ojo7pd3qXLjxo2r1y8fnwxORpN8Nt/u9fLF5OTFcRVKDWE0HJZF2O71IsOzZ0+e7D3tdDo7168miU9SJ6IhBIGD2KuKMpHaPwoFXkXq9Aak+/u//TsFRCEgEAljjFtbW+UiHPb765sbJ8M+Y9HtdAbDgWs0pSge7O1u9Tar+Xwwmy0m03I0XpgbjydZlk2n43armaVOJIqQDEIIIKBABK/iBYuAnT4HwJrx1CkhRosAIRBBo93ImunO25dv/eBmb231+rUrRTH/we1ba+udhw/uSoZGOzt38Vx/1D85folYLhazqpg3mj5ruNWVTqPhxcEsAOa8ihJiAhMEQVChCgXmhKcWT41O6IGq5kCjqPOTk9F4PF7rrlvKRtQHjx/d2Lk+mgy+evrwhzs3Xuzv0tmbb5z79LNPzm9utdorEoNkzPPx+x+812o10tR7DyCqUwCMQUUIUqm2JOolbb/GjmbwoDGS9GZBnYM4IS0E5+TChS1Nk7Z28uPhW29euvPRf2fN1pdffNE1vXnzxsGgf/B036duvbce5tXG2uZkfnL79q3t8+sA4Rws0CJIAOI8LADUusLqeC0rTl4RJgyg1Fe+fB7yUrxTQoRWVoExbWTjl6P5fD7dO6yIF4P+bDK/92T3hzffLWGffPq5c7K1uf7hj3+PrLqbqxffuDQc9VdWVtY31846l1CFqJlalk8gr/UxAIzLLrZkbVK5WHgHhFIYmS+K6ZhFkY/Hi/5gPOhLUzvnu73L59KVxjyffXH314vFIsbYaTXeuXFtcHx4sPd9S+3l84PMwTsyBrMgQhUIgrEUREWUU4O8uhcLdW45BsWpiVRgoSyYTxYn/ee7e9/f/2Z8eOwYm8pko4EktDvpzOZXL5z3Dd3b2/vi/r1G0795cat//GxrvWPTaW+lvdVdXUkTCaVagAUrF6wKVSAGxIAQJEZYiVjSSrBSq4SVWCFWKINYpQzK4FHmhqjOgbF/+Hx0dJyXtv/46ZuXtrrd1V6ruRhNJqPZlfbq5999/Dt/+LNf/tf/nEyH585t73338OrVS3E8Pnl+MJnOi7i4fG1no7cJ79Sh7vPISwCwZYCERN3TjQCFZFxGE3FJV97iXCGIKBZlcyV799zbIdhsNnvyaP/x3r6Z+Tx02i6Kv/7WO8bFX/3ZT37xyZ2yf9Ta2n73vZ1vf/Xlv338+R/97MOVjc63X99fbXduvfuONBJTAlDxiBGAWKQZSVn2BZ4lDYwAlDQzkF7LEmYQyUSTdkvTzNuiZLz25vlOt3N0cPTwxcH7W2/9+NbN0Uef3Pvs6w/+9KcraePb3+z+6NKF+cHLvvN/89d/2SQsTSdr3clwMh0M1nvr0ki4yI0iQgBSu8dYOwxm4BIEyTrTEY1mHrNpZD3KiMIhlLEokljKWueyytsX3ji/2rqzuzt4Obh95eLd3+ztf7Xv1e2sp8OXw73h8Od//FNNtIh5Rqx322tZWoWimo1c7hQi4mgBxtPpA3VBkXQ0M8BOh5VoS3D8j3+CACqsR4EYzUyIalqaCqCxyIsB/uFf//lPfv93R/2jf/z4m0Zmf/6j977af/wXH/7B6ubK+dVV2+g6D0Yzg4OoA0kRrYoiEXcarFfzq7IueJKsYZktAXl7MRURiARakiQ08xAACHCGybj/i3v3ZoNyDa3/vPPg59cuf/fi4GKjPRmH99cuHT5+Ouv3ipV5b2ve2lxRdfPJLMa4ttKxEEl68ZH52bh4xkDhbLI2OYNiBpL+cHfmnKvfq2nf+eWXi2L+6PgQh42nk+GVc1uf7h9+f1T8ZOftXz76flu7L07Gs7FttCYvknH74PiDt24kjWa/PyvLElupJ2OMZFXBROS3hnoz8sxBS9+YYemhrw9L55xCFCYSalgR7Ehy/+DFnChC0kq3m37j9nr2L7uPnrw8UBb//t2emGsnybW8OW9M3uhc+fbprNWW0Qwh6MwKAQGEEIhlpccYSVLl/wkPM4unqEj6+7MyUZeIpj6pRxOJjDGO5qNpbNLsWTiSyt/dex5mR1Ui+9Phxe72w+moI1mmViSbG7xQNNpHlrhKBnkkcbIwi5WZGegpgMYYq6oCoKoiamZAfB1TWCoq+o/2DxzEiU/TNHXeBCGEqqqKGKGIDPOFTedDY8x8M5HEIRlXi5D7TtrsVpv5cDbZyB69eL7a6IjIfD6NNBHJy1A3tcRJCOV4Nmq1OiqZUgFznonzZVWIECoqXjWpijJGevVipFkZqipYKt6JE1XXjj5tJHk+n41jptrtrifqptNpq9WaTMcqyXon9VlqxHgyi5GjWa6qeZ5DJcsyM6OomVm04WQwnY3XIGkCQGEhzktjORoN8yp3zqVpI0lSUNXB59OhiChE1RdaSK0XSTNqoSSbmQe8F8Bi4jRN00WRr3TY7jSzJAEAQZIkCBUQo1aqmtSTHmJkrGApQtu7lodKKXDQGCyaaJo4mgeAEEPM667sd59/dyoQpUYDUQBCpZiqqmrNIgCEIKlgWS1OTl4OYl9gRoo40VgrRhGpP0G9qMZgFieTKgaKOFXQgolaCE7MBOrgnEO0aKWfLI7rWRaAnC4ATv3ZpiLiVQFYoCmzLAuhHI5eMsInIBkNUXTZUAERAVlvFSrziZoZFxOaaK3HAJOw1K9ArQXKMl/MJ7Ld6Z0l/1m86qMCGEXEe7+EW0+g4iFljCHxLQGcWq0NUp/Ur52K+Vc3ohrrFrs8PzhlJgBiKt7MYoxVKIpiIRudtdc9QSxheTPnHIAYCaNzTlXNzKg+YWRQpgKQRjGIB8Pyb0BJnv1ikFoYOwBEBKAEqFQlaRalFtqkc440n3IZrNp7Z4LExMpgy4SAVBaFVp9zhBCIaBQnDqhPSQgKIAJPksal+AG8ogpRnQCIUVRFoSEE9QIaLNCpqiPFQUK0/wPxadi/ncvxsAAAACV0RVh0ZGF0ZTpjcmVhdGUAMjAxMS0wMi0yOFQwMjo1NTowMiswMTowMGbLlncAAAAldEVYdGRhdGU6bW9kaWZ5ADIwMTEtMDItMjhUMDI6NTU6MDIrMDE6MDAXli7LAAAAEXRFWHRqcGVnOmNvbG9yc3BhY2UAMix1VZ8AAAAgdEVYdGpwZWc6c2FtcGxpbmctZmFjdG9yADF4MSwxeDEsMXgx6ZX8cAAAAABJRU5ErkJggg==</binary></FictionBook> \ No newline at end of file
diff --git a/tests/fb2.images-embedded.html b/tests/fb2.images-embedded.html
new file mode 100644
index 000000000..19c8f7c7a
--- /dev/null
+++ b/tests/fb2.images-embedded.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <meta http-equiv="Content-Style-Type" content="text/css" />
+ <meta name="generator" content="pandoc" />
+ <title></title>
+</head>
+<body>
+<div class="figure">
+<img src="" alt="This image was embedded using data URI scheme" /><p class="caption">This image was embedded using data URI scheme</p>
+</div>
+</body>
+</html>