summaryrefslogtreecommitdiff
path: root/src/s3ql/inode_cache.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/s3ql/inode_cache.py')
-rw-r--r--src/s3ql/inode_cache.py286
1 files changed, 286 insertions, 0 deletions
diff --git a/src/s3ql/inode_cache.py b/src/s3ql/inode_cache.py
new file mode 100644
index 0000000..149947f
--- /dev/null
+++ b/src/s3ql/inode_cache.py
@@ -0,0 +1,286 @@
+'''
+inode_cache.py - this file is part of S3QL (http://s3ql.googlecode.com)
+
+Copyright (C) 2008-2010 Nikolaus Rath <Nikolaus@rath.org>
+
+This program can be distributed under the terms of the GNU LGPL.
+'''
+
+from __future__ import division, print_function, absolute_import
+
+import time
+import logging
+from random import randint
+import apsw
+from .database import NoSuchRowError
+
+__all__ = [ 'InodeCache', 'OutOfInodesError' ]
+log = logging.getLogger('inode_cache')
+
+CACHE_SIZE = 100
+ATTRIBUTES = ('mode', 'refcount', 'uid', 'gid', 'size', 'locked',
+ 'rdev', 'target', 'atime', 'mtime', 'ctime', 'id')
+ATTRIBUTE_STR = ', '.join(ATTRIBUTES)
+UPDATE_ATTRS = ('mode', 'refcount', 'uid', 'gid', 'size', 'locked',
+ 'rdev', 'target', 'atime', 'mtime', 'ctime')
+UPDATE_STR = ', '.join('%s=?' % x for x in UPDATE_ATTRS)
+TIMEZONE = time.timezone
+
+class _Inode(object):
+ '''An inode with its attributes'''
+
+ __slots__ = ATTRIBUTES + ('dirty',)
+
+ def __init__(self):
+ super(_Inode, self).__init__()
+ self.dirty = False
+
+ # This allows access to all st_* attributes, even if they're
+ # not defined in the table
+ def __getattr__(self, key):
+ if key == 'st_nlink':
+ return self.refcount
+
+ elif key == 'st_blocks':
+ return self.size // 512
+
+ elif key == 'st_ino':
+ return self.id
+
+ # Timeout, can effectively be infinite since attribute changes
+ # are only triggered by the kernel's own requests
+ elif key == 'attr_timeout' or key == 'entry_timeout':
+ return 3600
+
+ # We want our blocksize for IO as large as possible to get large
+ # write requests
+ elif key == 'st_blksize':
+ return 128 * 1024
+
+ # Our inodes are already unique
+ elif key == 'generation':
+ return 1
+
+ elif key.startswith('st_'):
+ return getattr(self, key[3:])
+
+ def __eq__(self, other):
+ if not isinstance(other, _Inode):
+ return NotImplemented
+
+ for attr in ATTRIBUTES:
+ if getattr(self, attr) != getattr(other, attr):
+ return False
+
+ return True
+
+
+ def copy(self):
+ copy = _Inode()
+
+ for attr in ATTRIBUTES:
+ setattr(copy, attr, getattr(self, attr))
+
+ return copy
+
+ def __setattr__(self, name, value):
+ if name != 'dirty':
+ object.__setattr__(self, 'dirty', True)
+ object.__setattr__(self, name, value)
+
+
+class InodeCache(object):
+ '''
+ This class maps the `inode` SQL table to a dict, caching the rows.
+
+ If the cache is full and a row is not in the cache, the least-recently
+ retrieved row is deleted from the cache. This means that accessing
+ cached rows will *not* change the order of their expiration.
+
+ Attributes:
+ -----------
+ :attrs: inode indexed dict holding the attributes
+ :cached_rows: list of the inodes that are in cache
+ :pos: position of the most recently retrieved inode in
+ 'cached_rows'.
+
+ Notes
+ -----
+
+ Callers should keep in mind that the changes of the returned inode
+ object will only be written to the database if the inode is still
+ in the cache when its attributes are updated: it is possible for
+ the caller to keep a reference to an inode when that
+ inode has already been expired from the InodeCache. Modifications
+ to this inode object will be lost(!).
+
+ Callers should therefore use the returned inode objects only
+ as long as they can guarantee that no other calls to InodeCache
+ are made that may result in expiration of inodes from the cache.
+
+ Moreover, the caller must make sure that he does not call
+ InodeCache methods while a database transaction is active that
+ may be rolled back. This would rollback database updates
+ performed by InodeCache, which are generally for inodes that
+ are expired from the cache and therefore *not* directly related
+ to the effects of the current method call.
+ '''
+
+ def __init__(self, db):
+ self.attrs = dict()
+ self.cached_rows = list()
+ self.db = db
+
+ # Fill the cache with dummy data, so that we don't have to
+ # check if the cache is full or not (it will always be full)
+ for _ in xrange(CACHE_SIZE):
+ self.cached_rows.append(None)
+
+ self.pos = 0
+
+
+ def __delitem__(self, inode):
+ if self.db.execute('DELETE FROM inodes WHERE id=?', (inode,)) != 1:
+ raise KeyError('No such inode')
+ try:
+ del self.attrs[inode]
+ except KeyError:
+ pass
+
+ def __getitem__(self, id_):
+ try:
+ return self.attrs[id_]
+ except KeyError:
+ try:
+ inode = self.getattr(id_)
+ except NoSuchRowError:
+ raise KeyError('No such inode: %d' % id_)
+
+ old_id = self.cached_rows[self.pos]
+ self.cached_rows[self.pos] = id_
+ self.pos = (self.pos + 1) % CACHE_SIZE
+ if old_id is not None:
+ try:
+ old_inode = self.attrs[old_id]
+ except KeyError:
+ # We may have deleted that inode
+ pass
+ else:
+ del self.attrs[old_id]
+ self.setattr(old_inode)
+ self.attrs[id_] = inode
+ return inode
+
+ def getattr(self, id_):
+ attrs = self.db.get_row("SELECT %s FROM inodes WHERE id=? " % ATTRIBUTE_STR,
+ (id_,))
+ inode = _Inode()
+
+ for (i, id_) in enumerate(ATTRIBUTES):
+ setattr(inode, id_, attrs[i])
+
+ # Convert to local time
+ # Pylint does not detect the attributes
+ #pylint: disable=E1101
+ inode.atime += TIMEZONE
+ inode.mtime += TIMEZONE
+ inode.ctime += TIMEZONE
+
+ inode.dirty = False
+
+ return inode
+
+ def create_inode(self, **kw):
+
+ inode = _Inode()
+
+ for (key, val) in kw.iteritems():
+ setattr(inode, key, val)
+
+ for i in ('atime', 'ctime', 'mtime'):
+ kw[i] -= TIMEZONE
+
+ init_attrs = [ x for x in ATTRIBUTES if x in kw ]
+
+ # We want to restrict inodes to 2^32, and we do not want to immediately
+ # reuse deleted inodes (so that the lack of generation numbers isn't too
+ # likely to cause problems with NFS)
+ sql = ('INSERT INTO inodes (id, %s) VALUES(?, %s)'
+ % (', '.join(init_attrs), ','.join('?' for _ in init_attrs)))
+ bindings = [ kw[x] for x in init_attrs ]
+ for _ in range(100):
+ # _Inode.id is not explicitly defined
+ #pylint: disable-msg=W0201
+ inode.id = randint(0, 2 ** 32 - 1)
+ try:
+ self.db.execute(sql, [inode.id] + bindings)
+ except apsw.ConstraintError:
+ pass
+ else:
+ break
+ else:
+ raise OutOfInodesError()
+
+
+ return self[inode.id]
+
+
+ def setattr(self, inode):
+ if not inode.dirty:
+ return
+ inode.dirty = False
+ inode = inode.copy()
+
+ inode.atime -= TIMEZONE
+ inode.mtime -= TIMEZONE
+ inode.ctime -= TIMEZONE
+
+ self.db.execute("UPDATE inodes SET %s WHERE id=?" % UPDATE_STR,
+ [ getattr(inode, x) for x in UPDATE_ATTRS ] + [inode.id])
+
+ def flush_id(self, id_):
+ if id_ in self.attrs:
+ self.setattr(self.attrs[id_])
+
+ def destroy(self):
+ '''Finalize cache'''
+
+ for i in xrange(len(self.cached_rows)):
+ id_ = self.cached_rows[i]
+ self.cached_rows[i] = None
+ if id_ is not None:
+ try:
+ inode = self.attrs[id_]
+ except KeyError:
+ # We may have deleted that inode
+ pass
+ else:
+ del self.attrs[id_]
+ self.setattr(inode)
+
+ def flush(self):
+ '''Flush all entries to database'''
+
+ # We don't want to use dict.itervalues() since
+ # the dict may change while we iterate
+ for i in xrange(len(self.cached_rows)):
+ id_ = self.cached_rows[i]
+ if id_ is not None:
+ try:
+ inode = self.attrs[id_]
+ except KeyError:
+ # We may have deleted that inode
+ pass
+ else:
+ self.setattr(inode)
+
+ def __del__(self):
+ if self.attrs:
+ raise RuntimeError('InodeCache instance was destroyed without calling close()')
+
+
+
+class OutOfInodesError(Exception):
+
+ def __str__(self):
+ return 'Could not find free rowid in inode table'