summaryrefslogtreecommitdiff
path: root/Remote
diff options
context:
space:
mode:
authorJoey Hess <id@joeyh.name>2014-12-03 14:02:29 -0400
committerJoey Hess <id@joeyh.name>2014-12-03 14:10:52 -0400
commit911ba8d972e4e7b151385d30c198598e1a0dfaca (patch)
tree03c2af0f2819bf7f165151033f700daf9f3aac79 /Remote
parentc994d73fcf88a2dcd10314d6111964425dc5fc32 (diff)
parent748e403fedf6e1c1f27114149e1cce61725ef36e (diff)
Merge branch 's3-aws'
Diffstat (limited to 'Remote')
-rw-r--r--Remote/Helper/AWS.hs38
-rw-r--r--Remote/Helper/Http.hs52
-rw-r--r--Remote/S3.hs526
3 files changed, 411 insertions, 205 deletions
diff --git a/Remote/Helper/AWS.hs b/Remote/Helper/AWS.hs
index 9b3643bc23..d27f2aad17 100644
--- a/Remote/Helper/AWS.hs
+++ b/Remote/Helper/AWS.hs
@@ -1,6 +1,6 @@
{- Amazon Web Services common infrastructure.
-
- - Copyright 2011,2012 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -12,8 +12,14 @@ module Remote.Helper.AWS where
import Common.Annex
import Creds
+import qualified Aws
+import qualified Aws.S3 as S3
import qualified Data.Map as M
+import qualified Data.ByteString as B
+import qualified Data.Text as T
+import Data.Text.Encoding (encodeUtf8)
import Data.Text (Text)
+import Data.IORef
creds :: UUID -> CredPairStorage
creds u = CredPairStorage
@@ -22,6 +28,13 @@ creds u = CredPairStorage
, credPairRemoteKey = Just "s3creds"
}
+genCredentials :: CredPair -> IO Aws.Credentials
+genCredentials (keyid, secret) = Aws.Credentials
+ <$> pure (encodeUtf8 (T.pack keyid))
+ <*> pure (encodeUtf8 (T.pack secret))
+ <*> newIORef []
+ <*> pure Nothing
+
data Service = S3 | Glacier
deriving (Eq)
@@ -33,9 +46,10 @@ regionMap = M.fromList . regionInfo
defaultRegion :: Service -> Region
defaultRegion = snd . Prelude.head . regionInfo
-{- S3 and Glacier use different names for some regions. Ie, "us-east-1"
- - cannot be used with S3, while "US" cannot be used with Glacier. Dunno why.
- - Also, Glacier is not yet available in all regions. -}
+data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region
+
+{- The "US" and "EU" names are used as location constraints when creating a
+ - S3 bucket. -}
regionInfo :: Service -> [(Text, Region)]
regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
filter (matchingService . snd) $
@@ -45,9 +59,7 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
[ ("US East (N. Virginia)", [S3Region "US", GlacierRegion "us-east-1"])
, ("US West (Oregon)", [BothRegion "us-west-2"])
, ("US West (N. California)", [BothRegion "us-west-1"])
- -- Requires AWS4-HMAC-SHA256 which S3 library does not
- -- currently support.
- -- , ("EU (Frankfurt)", [BothRegion "eu-central-1"])
+ , ("EU (Frankfurt)", [BothRegion "eu-central-1"])
, ("EU (Ireland)", [S3Region "EU", GlacierRegion "eu-west-1"])
, ("Asia Pacific (Singapore)", [S3Region "ap-southeast-1"])
, ("Asia Pacific (Tokyo)", [BothRegion "ap-northeast-1"])
@@ -63,4 +75,14 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
matchingService (S3Region _) = service == S3
matchingService (GlacierRegion _) = service == Glacier
-data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region
+s3HostName :: Region -> B.ByteString
+s3HostName "US" = "s3.amazonaws.com"
+s3HostName "EU" = "s3-eu-west-1.amazonaws.com"
+s3HostName r = encodeUtf8 $ T.concat ["s3-", r, ".amazonaws.com"]
+
+s3DefaultHost :: String
+s3DefaultHost = "s3.amazonaws.com"
+
+mkLocationConstraint :: Region -> S3.LocationConstraint
+mkLocationConstraint "US" = S3.locationUsClassic
+mkLocationConstraint r = r
diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs
index f1d576d1c9..6ce5bacb82 100644
--- a/Remote/Helper/Http.hs
+++ b/Remote/Helper/Http.hs
@@ -5,13 +5,15 @@
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE BangPatterns #-}
+
module Remote.Helper.Http where
import Common.Annex
import Types.StoreRetrieve
import Utility.Metered
import Remote.Helper.Special
-import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader)
+import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader, NeedsPopper)
import Network.HTTP.Types
import qualified Data.ByteString.Lazy as L
@@ -24,17 +26,45 @@ import Control.Concurrent
-- Implemented as a fileStorer, so that the content can be streamed
-- from the file in constant space.
httpStorer :: (Key -> RequestBody -> Annex Bool) -> Storer
-httpStorer a = fileStorer $ \k f m -> do
- size <- liftIO $ (fromIntegral . fileSize <$> getFileStatus f :: IO Integer)
- let streamer sink = withMeteredFile f m $ \b -> do
- mvar <- newMVar $ L.toChunks b
- let getnextchunk = modifyMVar mvar $ pure . pop
- sink getnextchunk
- let body = RequestBodyStream (fromInteger size) streamer
- a k body
+httpStorer a = fileStorer $ \k f m -> a k =<< liftIO (httpBodyStorer f m)
+
+-- Reads the file and generates a streaming request body, that will update
+-- the meter as it's sent.
+httpBodyStorer :: FilePath -> MeterUpdate -> IO RequestBody
+httpBodyStorer src m = do
+ size <- fromIntegral . fileSize <$> getFileStatus src :: IO Integer
+ let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
+ return $ RequestBodyStream (fromInteger size) streamer
+
+byteStringPopper :: L.ByteString -> NeedsPopper () -> IO ()
+byteStringPopper b sink = do
+ mvar <- newMVar $ L.toChunks b
+ let getnextchunk = modifyMVar mvar $ \v ->
+ case v of
+ [] -> return ([], S.empty)
+ (c:cs) -> return (cs, c)
+ sink getnextchunk
+
+{- Makes a Popper that streams a given number of chunks of a given
+ - size from the handle, updating the meter as the chunks are read. -}
+handlePopper :: Integer -> Int -> MeterUpdate -> Handle -> NeedsPopper () -> IO ()
+handlePopper numchunks chunksize meterupdate h sink = do
+ mvar <- newMVar zeroBytesProcessed
+ let getnextchunk = do
+ sent <- takeMVar mvar
+ if sent >= target
+ then do
+ putMVar mvar sent
+ return S.empty
+ else do
+ b <- S.hGet h chunksize
+ let !sent' = addBytesProcessed sent chunksize
+ putMVar mvar sent'
+ meterupdate sent'
+ return b
+ sink getnextchunk
where
- pop [] = ([], S.empty)
- pop (c:cs) = (cs, c)
+ target = toBytesProcessed (numchunks * fromIntegral chunksize)
-- Reads the http body and stores it to the specified file, updating the
-- meter as it goes.
diff --git a/Remote/S3.hs b/Remote/S3.hs
index 281aaf6f3a..844d87902d 100644
--- a/Remote/S3.hs
+++ b/Remote/S3.hs
@@ -1,22 +1,31 @@
{- S3 remotes
-
- - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
+ - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE CPP #-}
+
module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where
-import Network.AWS.AWSConnection
-import Network.AWS.S3Object hiding (getStorageClass)
-import Network.AWS.S3Bucket hiding (size)
-import Network.AWS.AWSResult
+import qualified Aws as AWS
+import qualified Aws.Core as AWS
+import qualified Aws.S3 as S3
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString as S
import qualified Data.Map as M
import Data.Char
import Network.Socket (HostName)
+import Network.HTTP.Conduit (Manager, newManager, closeManager)
+import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, responseStatus, responseBody, RequestBody(..))
+import Network.HTTP.Types
+import Control.Monad.Trans.Resource
+import Control.Monad.Catch
+import Data.Conduit
import Common.Annex
import Types.Remote
@@ -25,13 +34,15 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
+import Remote.Helper.Http
import qualified Remote.Helper.AWS as AWS
import Creds
-import Utility.Metered
import Annex.UUID
import Logs.Web
+import Utility.Metered
+import Utility.DataUnits
-type Bucket = String
+type BucketName = String
remote :: RemoteType
remote = RemoteType {
@@ -42,13 +53,16 @@ remote = RemoteType {
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
-gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
+gen r u c gc = do
+ cst <- remoteCost gc expensiveRemoteCost
+ info <- extractS3Info c
+ return $ new cst info
where
- new cst = Just $ specialRemote c
- (prepareStore this)
- (prepareRetrieve this)
- (simplyPrepare $ remove this c)
- (simplyPrepare $ checkKey this)
+ new cst info = Just $ specialRemote c
+ (prepareS3 this info $ store this)
+ (prepareS3 this info retrieve)
+ (prepareS3 this info remove)
+ (prepareS3 this info $ checkKey this)
this
where
this = Remote {
@@ -73,10 +87,11 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
remotetype = remote,
mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc,
getInfo = includeCredsInfo c (AWS.creds u) $ catMaybes
- [ Just ("bucket", fromMaybe "unknown" (getBucket c))
+ [ Just ("bucket", fromMaybe "unknown" (getBucketName c))
, if configIA c
- then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucket c)
+ then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
else Nothing
+ , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
]
}
@@ -92,8 +107,8 @@ s3Setup' u mcreds c = if configIA c then archiveorg else defaulthost
defaults = M.fromList
[ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3)
, ("storageclass", "STANDARD")
- , ("host", defaultAmazonS3Host)
- , ("port", show defaultAmazonS3Port)
+ , ("host", AWS.s3DefaultHost)
+ , ("port", "80")
, ("bucket", defbucket)
]
@@ -113,52 +128,114 @@ s3Setup' u mcreds c = if configIA c then archiveorg else defaulthost
c' <- setRemoteCredPair noEncryptionUsed c (AWS.creds u) mcreds
-- Ensure user enters a valid bucket name, since
-- this determines the name of the archive.org item.
- let bucket = replace " " "-" $ map toLower $
+ let validbucket = replace " " "-" $ map toLower $
fromMaybe (error "specify bucket=") $
- getBucket c'
+ getBucketName c'
let archiveconfig =
- -- hS3 does not pass through x-archive-* headers
+ -- IA acdepts x-amz-* as an alias for x-archive-*
M.mapKeys (replace "x-archive-" "x-amz-") $
-- encryption does not make sense here
M.insert "encryption" "none" $
- M.insert "bucket" bucket $
+ M.insert "bucket" validbucket $
M.union c' $
-- special constraints on key names
- M.insert "mungekeys" "ia" $
- -- bucket created only when files are uploaded
- M.insert "x-amz-auto-make-bucket" "1" defaults
- writeUUIDFile archiveconfig u
+ M.insert "mungekeys" "ia" defaults
+ info <- extractS3Info archiveconfig
+ withS3Handle archiveconfig u info $
+ writeUUIDFile archiveconfig u
use archiveconfig
-prepareStore :: Remote -> Preparer Storer
-prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
- fileStorer $ \k src p -> do
- ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src)
-
- -- Store public URL to item in Internet Archive.
- when (ok && configIA (config r) && not (isChunkKey k)) $
- setUrlPresent k (iaKeyUrl r k)
-
- return ok
-
-store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
-store (conn, bucket) r k p file = do
- size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
- withMeteredFile file p $ \content -> do
- -- size is provided to S3 so the whole content
- -- does not need to be buffered to calculate it
- let object = S3Object
- bucket (bucketFile r k) ""
- (("Content-Length", show size) : getXheaders (config r))
- content
- sendObject conn $
- setStorageClass (getStorageClass $ config r) object
-
-prepareRetrieve :: Remote -> Preparer Retriever
-prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
- byteRetriever $ \k sink ->
- liftIO (getObject conn $ bucketKey r bucket k)
- >>= either s3Error (sink . obj_data)
+-- Sets up a http connection manager for S3 encdpoint, which allows
+-- http connections to be reused across calls to the helper.
+prepareS3 :: Remote -> S3Info -> (S3Handle -> helper) -> Preparer helper
+prepareS3 r info = resourcePrepare $ const $
+ withS3Handle (config r) (uuid r) info
+
+store :: Remote -> S3Handle -> Storer
+store r h = fileStorer $ \k f p -> do
+ case partSize (hinfo h) of
+ Just partsz | partsz > 0 -> do
+ fsz <- fromIntegral . fileSize <$> liftIO (getFileStatus f)
+ if fsz > partsz
+ then multipartupload fsz partsz k f p
+ else singlepartupload k f p
+ _ -> singlepartupload k f p
+ -- Store public URL to item in Internet Archive.
+ when (isIA (hinfo h) && not (isChunkKey k)) $
+ setUrlPresent k (iaKeyUrl r k)
+ return True
+ where
+ singlepartupload k f p = do
+ rbody <- liftIO $ httpBodyStorer f p
+ void $ sendS3Handle h $ putObject h (bucketObject (hinfo h) k) rbody
+ multipartupload fsz partsz k f p = do
+#if MIN_VERSION_aws(0,10,6)
+ let info = hinfo h
+ let object = bucketObject info k
+
+ let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
+ { S3.imuStorageClass = Just (storageClass info)
+ , S3.imuMetadata = metaHeaders info
+ , S3.imuAutoMakeBucket = isIA info
+ , S3.imuExpires = Nothing -- TODO set some reasonable expiry
+ }
+ uploadid <- S3.imurUploadId <$> sendS3Handle h startreq
+
+ -- The actual part size will be a even multiple of the
+ -- 32k chunk size that hGetUntilMetered uses.
+ let partsz' = (partsz `div` toInteger defaultChunkSize) * toInteger defaultChunkSize
+
+ -- Send parts of the file, taking care to stream each part
+ -- w/o buffering in memory, since the parts can be large.
+ etags <- bracketIO (openBinaryFile f ReadMode) hClose $ \fh -> do
+ let sendparts meter etags partnum = do
+ pos <- liftIO $ hTell fh
+ if pos >= fsz
+ then return (reverse etags)
+ else do
+ -- Calculate size of part that will
+ -- be read.
+ let sz = if fsz - pos < partsz'
+ then fsz - pos
+ else partsz'
+ let p' = offsetMeterUpdate p (toBytesProcessed pos)
+ let numchunks = ceiling (fromIntegral sz / fromIntegral defaultChunkSize :: Double)
+ let popper = handlePopper numchunks defaultChunkSize p' fh
+ let req = S3.uploadPart (bucket info) object partnum uploadid $
+ RequestBodyStream (fromIntegral sz) popper
+ S3.UploadPartResponse _ etag <- sendS3Handle h req
+ sendparts (offsetMeterUpdate meter (toBytesProcessed sz)) (etag:etags) (partnum + 1)
+ sendparts p [] 1
+
+ void $ sendS3Handle h $ S3.postCompleteMultipartUpload
+ (bucket info) object uploadid (zip [1..] etags)
+#else
+ warning $ "Cannot do multipart upload (partsize " ++ show partsz ++ ") of large file (" ++ show fsz ++ "); built with too old a version of the aws library."
+ singlepartupload k f p
+#endif
+
+{- Implemented as a fileRetriever, that uses conduit to stream the chunks
+ - out to the file. Would be better to implement a byteRetriever, but
+ - that is difficult. -}
+retrieve :: S3Handle -> Retriever
+retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
+ (fr, fh) <- allocate (openFile f WriteMode) hClose
+ let req = S3.getObject (bucket info) (bucketObject info k)
+ S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
+ responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
+ release fr
+ where
+ info = hinfo h
+ sinkprogressfile fh meterupdate sofar = do
+ mbs <- await
+ case mbs of
+ Nothing -> return ()
+ Just bs -> do
+ let sofar' = addBytesProcessed sofar (S.length bs)
+ liftIO $ do
+ void $ meterupdate sofar'
+ S.hPut fh bs
+ sinkprogressfile fh meterupdate sofar'
retrieveCheap :: Key -> FilePath -> Annex Bool
retrieveCheap _ _ = return False
@@ -166,186 +243,263 @@ retrieveCheap _ _ = return False
{- Internet Archive doesn't easily allow removing content.
- While it may remove the file, there are generally other files
- derived from it that it does not remove. -}
-remove :: Remote -> RemoteConfig -> Remover
-remove r c k
- | configIA c = do
+remove :: S3Handle -> Remover
+remove h k
+ | isIA info = do
warning "Cannot remove content from the Internet Archive"
return False
- | otherwise = remove' r k
-
-remove' :: Remote -> Key -> Annex Bool
-remove' r k = s3Action r False $ \(conn, bucket) ->
- s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k)
-
-checkKey :: Remote -> CheckPresent
-checkKey r k = s3Action r noconn $ \(conn, bucket) -> do
- showAction $ "checking " ++ name r
- res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k
- case res of
- Right _ -> return True
- Left (AWSError _ _) -> return False
- Left e -> s3Error e
+ | otherwise = do
+ res <- tryNonAsync $ sendS3Handle h $
+ S3.DeleteObject (bucketObject info k) (bucket info)
+ return $ either (const False) (const True) res
where
- noconn = error "S3 not configured"
-
-s3Warning :: ReqError -> Annex Bool
-s3Warning e = do
- warning $ prettyReqError e
- return False
-
-s3Error :: ReqError -> a
-s3Error e = error $ prettyReqError e
-
-s3Bool :: AWSResult () -> Annex Bool
-s3Bool (Right _) = return True
-s3Bool (Left e) = s3Warning e
-
-s3Action :: Remote -> a -> ((AWSConnection, Bucket) -> Annex a) -> Annex a
-s3Action r noconn action = do
- let bucket = M.lookup "bucket" $ config r
- conn <- s3Connection (config r) (uuid r)
- case (bucket, conn) of
- (Just b, Just c) -> action (c, b)
- _ -> return noconn
-
-bucketFile :: Remote -> Key -> FilePath
-bucketFile r = munge . key2file
- where
- munge s = case M.lookup "mungekeys" c of
- Just "ia" -> iaMunge $ filePrefix c ++ s
- _ -> filePrefix c ++ s
- c = config r
-
-filePrefix :: RemoteConfig -> String
-filePrefix = M.findWithDefault "" "fileprefix"
-
-bucketKey :: Remote -> Bucket -> Key -> S3Object
-bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty
+ info = hinfo h
-{- Internet Archive limits filenames to a subset of ascii,
- - with no whitespace. Other characters are xml entity
- - encoded. -}
-iaMunge :: String -> String
-iaMunge = (>>= munge)
+checkKey :: Remote -> S3Handle -> CheckPresent
+checkKey r h k = do
+ showAction $ "checking " ++ name r
+#if MIN_VERSION_aws(0,10,0)
+ rsp <- go
+ return (isJust $ S3.horMetadata rsp)
+#else
+ catchMissingException $ do
+ void go
+ return True
+#endif
where
- munge c
- | isAsciiUpper c || isAsciiLower c || isNumber c = [c]
- | c `elem` "_-.\"" = [c]
- | isSpace c = []
- | otherwise = "&" ++ show (ord c) ++ ";"
+ go = sendS3Handle h $
+ S3.headObject (bucket (hinfo h)) (bucketObject (hinfo h) k)
+
+#if ! MIN_VERSION_aws(0,10,0)
+ {- Catch exception headObject returns when an object is not present
+ - in the bucket, and returns False. All other exceptions indicate a
+ - check error and are let through. -}
+ catchMissingException :: Annex Bool -> Annex Bool
+ catchMissingException a = catchJust missing a (const $ return False)
+ where
+ missing :: AWS.HeaderException -> Maybe ()
+ missing e
+ | AWS.headerErrorMessage e == "ETag missing" = Just ()
+ | otherwise = Nothing
+#endif
{- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket.
-
- - To check if the bucket exists, ask for its location. However, some ACLs
- - can allow read/write to buckets, but not querying location, so first
- - check if the UUID file already exists and we can skip doing anything.
+ - Some ACLs can allow read/write to buckets, but not querying them,
+ - so first check if the UUID file already exists and we can skip doing
+ - anything.
-}
genBucket :: RemoteConfig -> UUID -> Annex ()
genBucket c u = do
- conn <- s3ConnectionRequired c u
showAction "checking bucket"
- unlessM ((== Right True) <$> checkUUIDFile c u conn) $ do
- loc <- liftIO $ getBucketLocation conn bucket
- case loc of
- Right _ -> writeUUIDFile c u
- Left err@(NetworkError _) -> s3Error err
- Left (AWSError _ _) -> do
- showAction $ "creating bucket in " ++ datacenter
- res <- liftIO $ createBucketIn conn bucket datacenter
- case res of
- Right _ -> writeUUIDFile c u
- Left err -> s3Error err
+ info <- extractS3Info c
+ withS3Handle c u info $ \h ->
+ go h =<< checkUUIDFile c u h
where
- bucket = fromJust $ getBucket c
+ go _ (Right True) = noop
+ go h _ = do
+ v <- tryNonAsync $ sendS3Handle h (S3.getBucket $ bucket $ hinfo h)
+ case v of
+ Right _ -> noop
+ Left _ -> do
+ showAction $ "creating bucket in " ++ datacenter
+ void $ sendS3Handle h $
+ S3.PutBucket (bucket $ hinfo h) Nothing $
+ AWS.mkLocationConstraint $
+ T.pack datacenter
+ writeUUIDFile c u h
+
datacenter = fromJust $ M.lookup "datacenter" c
{- Writes the UUID to an annex-uuid file within the bucket.
-
- If the file already exists in the bucket, it must match.
-
- - Note that IA items do not get created by createBucketIn.
- - Rather, they are created the first time a file is stored in them.
- - So this also takes care of that.
+ - Note that IA buckets can only created by having a file
+ - stored in them. So this also takes care of that.
-}
-writeUUIDFile :: RemoteConfig -> UUID -> Annex ()
-writeUUIDFile c u = do
- conn <- s3ConnectionRequired c u
- v <- checkUUIDFile c u conn
+writeUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex ()
+writeUUIDFile c u h = do
+ v <- checkUUIDFile c u h
case v of
- Left e -> error e
- Right True -> return ()
- Right False -> do
- let object = setStorageClass (getStorageClass c) (mkobject uuidb)
- either s3Error return =<< liftIO (sendObject conn object)
+ Right True -> noop
+ _ -> void $ sendS3Handle h mkobject
where
- file = uuidFile c
+ file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
- bucket = fromJust $ getBucket c
- mkobject = S3Object bucket file "" (getXheaders c)
+ mkobject = putObject h file (RequestBodyLBS uuidb)
-{- Checks if the UUID file exists in the bucket and has the specified UUID already. -}
-checkUUIDFile :: RemoteConfig -> UUID -> AWSConnection -> Annex (Either String Bool)
-checkUUIDFile c u conn = check <$> liftIO (tryNonAsync $ getObject conn $ mkobject L.empty)
+{- Checks if the UUID file exists in the bucket
+ - and has the specified UUID already. -}
+checkUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex (Either SomeException Bool)
+checkUUIDFile c u h = tryNonAsync $ check <$> get
where
- check (Right (Right o))
- | obj_data o == uuidb = Right True
- | otherwise = Left $ "This bucket is already in use by a different S3 special remote, with UUID: " ++ show (obj_data o)
- check _ = Right False
-
+ get = liftIO
+ . runResourceT
+ . either (pure . Left) (Right <$$> AWS.loadToMemory)
+ =<< tryS3 (sendS3Handle h (S3.getObject (bucket (hinfo h)) file))
+ check (Right (S3.GetObjectMemoryResponse _meta rsp)) =
+ responseStatus rsp == ok200 && responseBody rsp == uuidb
+ check (Left _S3Error) = False
+
+ file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
- bucket = fromJust $ getBucket c
- file = uuidFile c
-
- mkobject = S3Object bucket file "" (getXheaders c)
uuidFile :: RemoteConfig -> FilePath
-uuidFile c = filePrefix c ++ "annex-uuid"
-
-s3ConnectionRequired :: RemoteConfig -> UUID -> Annex AWSConnection
-s3ConnectionRequired c u =
- maybe (error "Cannot connect to S3") return =<< s3Connection c u
-
-s3Connection :: RemoteConfig -> UUID -> Annex (Maybe AWSConnection)
-s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
+uuidFile c = getFilePrefix c ++ "annex-uuid"
+
+putObject :: S3Handle -> T.Text -> RequestBody -> S3.PutObject
+putObject h file rbody = (S3.putObject (bucket info) file rbody)
+ { S3.poStorageClass = Just (storageClass info)
+ , S3.poMetadata = metaHeaders info
+ , S3.poAutoMakeBucket = isIA info
+ }
+ where
+ info = hinfo h
+
+data S3Handle = S3Handle
+ { hmanager :: Manager
+ , hawscfg :: AWS.Configuration
+ , hs3cfg :: S3.S3Configuration AWS.NormalQuery
+ , hinfo :: S3Info
+ }
+
+{- Sends a request to S3 and gets back the response.
+ -
+ - Note that pureAws's use of ResourceT is bypassed here;
+ - the response should be fully processed while the S3Handle
+ - is still open, eg within a call to withS3Handle.
+ -}
+sendS3Handle
+ :: (AWS.Transaction req res, AWS.ServiceConfiguration req ~ S3.S3Configuration)
+ => S3Handle
+ -> req
+ -> Annex res
+sendS3Handle h r = liftIO $ runResourceT $ sendS3Handle' h r
+
+sendS3Handle'
+ :: (AWS.Transaction r a, AWS.ServiceConfiguration r ~ S3.S3Configuration)
+ => S3Handle
+ -> r
+ -> ResourceT IO a
+sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h)
+
+withS3Handle :: RemoteConfig -> UUID -> S3Info -> (S3Handle -> Annex a) -> Annex a
+withS3Handle c u info a = do
+ creds <- getRemoteCredPairFor "S3" c (AWS.creds u)
+ awscreds <- liftIO $ AWS.genCredentials $ fromMaybe nocreds creds
+ let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error)
+ bracketIO (newManager httpcfg) closeManager $ \mgr ->
+ a $ S3Handle mgr awscfg s3cfg info
where
- go Nothing = return Nothing
- go (Just (ak, sk)) = return $ Just $ AWSConnection host port ak sk
+ s3cfg = s3Configuration c
+ httpcfg = defaultManagerSettings
+ { managerResponseTimeout = Nothing }
+ nocreds = error "Cannot use S3 without credentials configured"
+s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery
+s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port }
+ where
+ proto
+ | port == 443 = AWS.HTTPS
+ | otherwise = AWS.HTTP
host = fromJust $ M.lookup "host" c
+ datacenter = fromJust $ M.lookup "datacenter" c
+ -- When the default S3 host is configured, connect directly to
+ -- the S3 endpoint for the configured datacenter.
+ -- When another host is configured, it's used as-is.
+ endpoint
+ | host == AWS.s3DefaultHost = AWS.s3HostName $ T.pack datacenter
+ | otherwise = T.encodeUtf8 $ T.pack host
port = let s = fromJust $ M.lookup "port" c in
case reads s of
[(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s
-getBucket :: RemoteConfig -> Maybe Bucket
-getBucket = M.lookup "bucket"
+tryS3 :: Annex a -> Annex (Either S3.S3Error a)
+tryS3 a = (Right <$> a) `catch` (pure . Left)
+
+data S3Info = S3Info
+ { bucket :: S3.Bucket
+ , storageClass :: S3.StorageClass
+ , bucketObject :: Key -> T.Text
+ , metaHeaders :: [(T.Text, T.Text)]
+ , partSize :: Maybe Integer
+ , isIA :: Bool
+ }
+
+extractS3Info :: RemoteConfig -> Annex S3Info
+extractS3Info c = do
+ b <- maybe
+ (error "S3 bucket not configured")
+ (return . T.pack)
+ (getBucketName c)
+ return $ S3Info
+ { bucket = b
+ , storageClass = getStorageClass c
+ , bucketObject = T.pack . getBucketObject c
+ , metaHeaders = getMetaHeaders c
+ , partSize = getPartSize c
+ , isIA = configIA c
+ }
+
+getBucketName :: RemoteConfig -> Maybe BucketName
+getBucketName = M.lookup "bucket"
-getStorageClass :: RemoteConfig -> StorageClass
-getStorageClass c = case fromJust $ M.lookup "storageclass" c of
- "REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY
- _ -> STANDARD
-
-getXheaders :: RemoteConfig -> [(String, String)]
-getXheaders = filter isxheader . M.assocs
+getStorageClass :: RemoteConfig -> S3.StorageClass
+getStorageClass c = case M.lookup "storageclass" c of
+ Just "REDUCED_REDUNDANCY" -> S3.ReducedRedundancy
+ _ -> S3.Standard
+
+getPartSize :: RemoteConfig -> Maybe Integer
+getPartSize c = readSize dataUnits =<< M.lookup "partsize" c
+
+getMetaHeaders :: RemoteConfig -> [(T.Text, T.Text)]
+getMetaHeaders = map munge . filter ismetaheader . M.assocs
+ where
+ ismetaheader (h, _) = metaprefix `isPrefixOf` h
+ metaprefix = "x-amz-meta-"
+ metaprefixlen = length metaprefix
+ munge (k, v) = (T.pack $ drop metaprefixlen k, T.pack v)
+
+getFilePrefix :: RemoteConfig -> String
+getFilePrefix = M.findWithDefault "" "fileprefix"
+
+getBucketObject :: RemoteConfig -> Key -> FilePath
+getBucketObject c = munge . key2file
+ where
+ munge s = case M.lookup "mungekeys" c of
+ Just "ia" -> iaMunge $ getFilePrefix c ++ s
+ _ -> getFilePrefix c ++ s
+
+{- Internet Archive limits filenames to a subset of ascii,
+ - with no whitespace. Other characters are xml entity
+ - encoded. -}
+iaMunge :: String -> String
+iaMunge = (>>= munge)
where
- isxheader (h, _) = "x-amz-" `isPrefixOf` h
+ munge c
+ | isAsciiUpper c || isAsciiLower c || isNumber c = [c]
+ | c `elem` "_-.\"" = [c]
+ | isSpace c = []
+ | otherwise = "&" ++ show (ord c) ++ ";"
+
+configIA :: RemoteConfig -> Bool
+configIA = maybe False isIAHost . M.lookup "host"
{- Hostname to use for archive.org S3. -}
iaHost :: HostName
iaHost = "s3.us.archive.org"
-configIA :: RemoteConfig -> Bool
-configIA c = maybe False isIAHost (M.lookup "host" c)
-
isIAHost :: HostName -> Bool
isIAHost h = ".archive.org" `isSuffixOf` map toLower h
-iaItemUrl :: Bucket -> URLString
-iaItemUrl bucket = "http://archive.org/details/" ++ bucket
+iaItemUrl :: BucketName -> URLString
+iaItemUrl b = "http://archive.org/details/" ++ b
iaKeyUrl :: Remote -> Key -> URLString
-iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketFile r k
+iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k
where
- bucket = fromMaybe "" $ getBucket $ config r
+ b = fromMaybe "" $ getBucketName $ config r