summaryrefslogtreecommitdiff
path: root/src/s3ql/backends/s3.py
blob: 9793a962d44c8280c102359d20ee16a0adfd444a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
s3.py - this file is part of S3QL (http://s3ql.googlecode.com)

Copyright (C) Nikolaus Rath <Nikolaus@rath.org>

This program can be distributed under the terms of the GNU GPLv3.
'''

from __future__ import division, print_function, absolute_import
from . import s3c
from s3ql.backends.common import retry
from s3ql.common import QuietError
import xml.etree.cElementTree as ElementTree
import logging
import re

log = logging.getLogger("backend.s3")

# Pylint goes berserk with false positives
#pylint: disable=E1002,E1101


# These regions provide read after write consistency for new objects
# http://docs.amazonwebservices.com/AmazonS3/2006-03-01/dev/LocationSelection.html
GOOD_REGIONS=('EU', 'us-west-1', 'us-west-2', 'ap-southeast-1', 'ap-northeast-1', 'sa-east-1')

# These don't
BAD_REGIONS=('us-standard',)
              
class Bucket(s3c.Bucket):
    """A bucket stored in Amazon S3
    
    This class uses standard HTTP connections to connect to S3.
    
    The bucket guarantees get after create consistency, i.e. a newly created
    object will be immediately retrievable. Additional consistency guarantees
    may or may not be available and can be queried for with instance methods.    
    """

    def __init__(self, storage_url, login, password):
        super(Bucket, self).__init__(storage_url, login, password)

        self.region = self._get_region()
        if self.region in BAD_REGIONS:
            log.warn('Warning: bucket provides insufficient consistency guarantees!')
        elif self.region not in GOOD_REGIONS:
            log.warn('Unknown region: %s - please file a bug report. ')

    @staticmethod
    def _parse_storage_url(storage_url):
        hit = re.match(r'^s3s?://([^/]+)(?:/(.*))?$', storage_url)
        if not hit:
            raise QuietError('Invalid storage URL')

        bucket_name = hit.group(1)
        hostname = '%s.s3.amazonaws.com' % bucket_name
        prefix = hit.group(2) or ''
        return (hostname, 80, bucket_name, prefix)

    @retry
    def _get_region(self):
        ''''Return bucket region'''

        log.debug('_get_region()')
        resp = self._do_request('GET', '/', subres='location')

        region = ElementTree.parse(resp).getroot().text

        if not region:
            region = 'us-standard'

        return region

    def __str__(self):
        return 's3://%s/%s' % (self.bucket_name, self.prefix)

    def is_get_consistent(self):
        '''If True, objects retrievals are guaranteed to be up-to-date
        
        If this method returns True, then creating, deleting, or overwriting an
        object is guaranteed to be immediately reflected in subsequent object
        retrieval attempts.
        '''

        return False

    def is_list_create_consistent(self):
        '''If True, new objects are guaranteed to show up in object listings
        
        If this method returns True, creation of objects will immediately be
        reflected when retrieving the list of available objects.
        '''

        return self.region in GOOD_REGIONS