diff options
Diffstat (limited to 'src/s3ql/inode_cache.py')
-rw-r--r-- | src/s3ql/inode_cache.py | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/src/s3ql/inode_cache.py b/src/s3ql/inode_cache.py new file mode 100644 index 0000000..149947f --- /dev/null +++ b/src/s3ql/inode_cache.py @@ -0,0 +1,286 @@ +''' +inode_cache.py - this file is part of S3QL (http://s3ql.googlecode.com) + +Copyright (C) 2008-2010 Nikolaus Rath <Nikolaus@rath.org> + +This program can be distributed under the terms of the GNU LGPL. +''' + +from __future__ import division, print_function, absolute_import + +import time +import logging +from random import randint +import apsw +from .database import NoSuchRowError + +__all__ = [ 'InodeCache', 'OutOfInodesError' ] +log = logging.getLogger('inode_cache') + +CACHE_SIZE = 100 +ATTRIBUTES = ('mode', 'refcount', 'uid', 'gid', 'size', 'locked', + 'rdev', 'target', 'atime', 'mtime', 'ctime', 'id') +ATTRIBUTE_STR = ', '.join(ATTRIBUTES) +UPDATE_ATTRS = ('mode', 'refcount', 'uid', 'gid', 'size', 'locked', + 'rdev', 'target', 'atime', 'mtime', 'ctime') +UPDATE_STR = ', '.join('%s=?' % x for x in UPDATE_ATTRS) +TIMEZONE = time.timezone + +class _Inode(object): + '''An inode with its attributes''' + + __slots__ = ATTRIBUTES + ('dirty',) + + def __init__(self): + super(_Inode, self).__init__() + self.dirty = False + + # This allows access to all st_* attributes, even if they're + # not defined in the table + def __getattr__(self, key): + if key == 'st_nlink': + return self.refcount + + elif key == 'st_blocks': + return self.size // 512 + + elif key == 'st_ino': + return self.id + + # Timeout, can effectively be infinite since attribute changes + # are only triggered by the kernel's own requests + elif key == 'attr_timeout' or key == 'entry_timeout': + return 3600 + + # We want our blocksize for IO as large as possible to get large + # write requests + elif key == 'st_blksize': + return 128 * 1024 + + # Our inodes are already unique + elif key == 'generation': + return 1 + + elif key.startswith('st_'): + return getattr(self, key[3:]) + + def __eq__(self, other): + if not isinstance(other, _Inode): + return NotImplemented + + for attr in ATTRIBUTES: + if getattr(self, attr) != getattr(other, attr): + return False + + return True + + + def copy(self): + copy = _Inode() + + for attr in ATTRIBUTES: + setattr(copy, attr, getattr(self, attr)) + + return copy + + def __setattr__(self, name, value): + if name != 'dirty': + object.__setattr__(self, 'dirty', True) + object.__setattr__(self, name, value) + + +class InodeCache(object): + ''' + This class maps the `inode` SQL table to a dict, caching the rows. + + If the cache is full and a row is not in the cache, the least-recently + retrieved row is deleted from the cache. This means that accessing + cached rows will *not* change the order of their expiration. + + Attributes: + ----------- + :attrs: inode indexed dict holding the attributes + :cached_rows: list of the inodes that are in cache + :pos: position of the most recently retrieved inode in + 'cached_rows'. + + Notes + ----- + + Callers should keep in mind that the changes of the returned inode + object will only be written to the database if the inode is still + in the cache when its attributes are updated: it is possible for + the caller to keep a reference to an inode when that + inode has already been expired from the InodeCache. Modifications + to this inode object will be lost(!). + + Callers should therefore use the returned inode objects only + as long as they can guarantee that no other calls to InodeCache + are made that may result in expiration of inodes from the cache. + + Moreover, the caller must make sure that he does not call + InodeCache methods while a database transaction is active that + may be rolled back. This would rollback database updates + performed by InodeCache, which are generally for inodes that + are expired from the cache and therefore *not* directly related + to the effects of the current method call. + ''' + + def __init__(self, db): + self.attrs = dict() + self.cached_rows = list() + self.db = db + + # Fill the cache with dummy data, so that we don't have to + # check if the cache is full or not (it will always be full) + for _ in xrange(CACHE_SIZE): + self.cached_rows.append(None) + + self.pos = 0 + + + def __delitem__(self, inode): + if self.db.execute('DELETE FROM inodes WHERE id=?', (inode,)) != 1: + raise KeyError('No such inode') + try: + del self.attrs[inode] + except KeyError: + pass + + def __getitem__(self, id_): + try: + return self.attrs[id_] + except KeyError: + try: + inode = self.getattr(id_) + except NoSuchRowError: + raise KeyError('No such inode: %d' % id_) + + old_id = self.cached_rows[self.pos] + self.cached_rows[self.pos] = id_ + self.pos = (self.pos + 1) % CACHE_SIZE + if old_id is not None: + try: + old_inode = self.attrs[old_id] + except KeyError: + # We may have deleted that inode + pass + else: + del self.attrs[old_id] + self.setattr(old_inode) + self.attrs[id_] = inode + return inode + + def getattr(self, id_): + attrs = self.db.get_row("SELECT %s FROM inodes WHERE id=? " % ATTRIBUTE_STR, + (id_,)) + inode = _Inode() + + for (i, id_) in enumerate(ATTRIBUTES): + setattr(inode, id_, attrs[i]) + + # Convert to local time + # Pylint does not detect the attributes + #pylint: disable=E1101 + inode.atime += TIMEZONE + inode.mtime += TIMEZONE + inode.ctime += TIMEZONE + + inode.dirty = False + + return inode + + def create_inode(self, **kw): + + inode = _Inode() + + for (key, val) in kw.iteritems(): + setattr(inode, key, val) + + for i in ('atime', 'ctime', 'mtime'): + kw[i] -= TIMEZONE + + init_attrs = [ x for x in ATTRIBUTES if x in kw ] + + # We want to restrict inodes to 2^32, and we do not want to immediately + # reuse deleted inodes (so that the lack of generation numbers isn't too + # likely to cause problems with NFS) + sql = ('INSERT INTO inodes (id, %s) VALUES(?, %s)' + % (', '.join(init_attrs), ','.join('?' for _ in init_attrs))) + bindings = [ kw[x] for x in init_attrs ] + for _ in range(100): + # _Inode.id is not explicitly defined + #pylint: disable-msg=W0201 + inode.id = randint(0, 2 ** 32 - 1) + try: + self.db.execute(sql, [inode.id] + bindings) + except apsw.ConstraintError: + pass + else: + break + else: + raise OutOfInodesError() + + + return self[inode.id] + + + def setattr(self, inode): + if not inode.dirty: + return + inode.dirty = False + inode = inode.copy() + + inode.atime -= TIMEZONE + inode.mtime -= TIMEZONE + inode.ctime -= TIMEZONE + + self.db.execute("UPDATE inodes SET %s WHERE id=?" % UPDATE_STR, + [ getattr(inode, x) for x in UPDATE_ATTRS ] + [inode.id]) + + def flush_id(self, id_): + if id_ in self.attrs: + self.setattr(self.attrs[id_]) + + def destroy(self): + '''Finalize cache''' + + for i in xrange(len(self.cached_rows)): + id_ = self.cached_rows[i] + self.cached_rows[i] = None + if id_ is not None: + try: + inode = self.attrs[id_] + except KeyError: + # We may have deleted that inode + pass + else: + del self.attrs[id_] + self.setattr(inode) + + def flush(self): + '''Flush all entries to database''' + + # We don't want to use dict.itervalues() since + # the dict may change while we iterate + for i in xrange(len(self.cached_rows)): + id_ = self.cached_rows[i] + if id_ is not None: + try: + inode = self.attrs[id_] + except KeyError: + # We may have deleted that inode + pass + else: + self.setattr(inode) + + def __del__(self): + if self.attrs: + raise RuntimeError('InodeCache instance was destroyed without calling close()') + + + +class OutOfInodesError(Exception): + + def __str__(self): + return 'Could not find free rowid in inode table' |