1 files changed, 643 insertions, 229 deletions
diff --git a/hgsubversion/maps.py b/hgsubversion/maps.py
index a3eb700..3fc6a5c 100644
--- a/hgsubversion/maps.py
+++ b/hgsubversion/maps.py
@@ -1,92 +1,236 @@
 ''' Module for self-contained maps. '''
 
+import collections
+import contextlib
 import errno
 import os
+import re
+import sqlite3
+import sys
+import weakref
+from mercurial import error
 from mercurial import util as hgutil
 from mercurial.node import bin, hex, nullid
 
-import svncommands
+import subprocess
 import util
 
-class AuthorMap(dict):
-    '''A mapping from Subversion-style authors to Mercurial-style
-    authors, and back. The data is stored persistently on disk.
-
-    If the 'hgsubversion.defaultauthors' configuration option is set to false,
-    attempting to obtain an unknown author will fail with an Abort.
+class BaseMap(dict):
+    '''A base class for the different type of mappings: author, branch, and
+    tags.'''
+    def __init__(self, ui, filepath):
+        super(BaseMap, self).__init__()
+        self._ui = ui
 
-    If the 'hgsubversion.caseignoreauthors' configuration option is set to true,
-    the userid from Subversion is always compared lowercase.
-    '''
+        self._commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
+        self.syntaxes = ('re', 'glob')
 
-    def __init__(self, meta):
-        '''Initialise a new AuthorMap.
+        self._filepath = filepath
+        self.load(filepath)
 
-        The ui argument is used to print diagnostic messages.
+        # Append mappings specified from the commandline. A little
+        # magic here: our name in the config mapping is the same as
+        # the class name lowercased.
+        clmap = util.configpath(self._ui, self.mapname())
+        if clmap:
+            self.load(clmap)
 
-        The path argument is the location of the backing store,
-        typically .hg/svn/authors.
+    @classmethod
+    def mapname(cls):
+        return cls.__name__.lower()
+
+    def _findkey(self, key):
+        '''Takes a string and finds the first corresponding key that matches
+        via regex'''
+        if not key:
+            return None
+
+        # compile a new regex key if we're given a string; can't use
+        # hgutil.compilere since we need regex.sub
+        k = key
+        if isinstance(key, str):
+            k = re.compile(re.escape(key))
+
+        # preference goes to matching the exact pattern, i.e. 'foo' should
+        # first match 'foo' before trying regexes
+        for regex in self:
+            if regex.pattern == k.pattern:
+                return regex
+
+        # if key isn't a string, then we are done; nothing matches
+        if not isinstance(key, str):
+            return None
+
+        # now we test the regex; the above loop will be faster and is
+        # equivalent to not having regexes (i.e. just doing string compares)
+        for regex in self:
+            if regex.search(key):
+                return regex
+        return None
+
+    def get(self, key, default=None):
+        '''Similar to dict.get, except we use our own matcher, _findkey.'''
+        if self._findkey(key):
+            return self[key]
+        return default
+
+    def __getitem__(self, key):
+        '''Similar to dict.get, except we use our own matcher, _findkey. If the key is
+        a string, then we can use our regex matching to map its value.
         '''
-        self.meta = meta
-        self.defaulthost = ''
-        if meta.defaulthost:
-            self.defaulthost = '@%s' % meta.defaulthost.lstrip('@')
+        k = self._findkey(key)
+        val = super(BaseMap, self).__getitem__(k)
 
-        self.super = super(AuthorMap, self)
-        self.super.__init__()
-        self.load(self.meta.authors_file)
+        # if key is a string then we can transform it using our regex, else we
+        # don't have enough information, so we just return the val
+        if isinstance(key, str):
+            val = k.sub(val, key)
 
-        # append authors specified from the commandline
-        clmap = util.configpath(self.meta.ui, 'authormap')
-        if clmap:
-            self.load(clmap)
+        return val
 
-    def load(self, path):
-        ''' Load mappings from a file at the specified path. '''
+    def __setitem__(self, key, value):
+        '''Similar to dict.__setitem__, except we compile the string into a regex, if
+        need be.
+        '''
+        # try to find the regex already in the map
+        k = self._findkey(key)
+        # if we found one, then use it
+        if k:
+            key = k
+        # else make a new regex
+        if isinstance(key, str):
+            key = re.compile(re.escape(key))
+        super(BaseMap, self).__setitem__(key, value)
+
+    def __contains__(self, key):
+        '''Similar to dict.get, except we use our own matcher, _findkey.'''
+        return self._findkey(key) is not None
 
+    def load(self, path):
+        '''Load mappings from a file at the specified path.'''
         path = os.path.expandvars(path)
         if not os.path.exists(path):
             return
 
         writing = False
-        if path != self.meta.authors_file:
-            writing = open(self.meta.authors_file, 'a')
+        mapfile = self._filepath
+        if path != mapfile:
+            writing = open(mapfile, 'a')
 
-        self.meta.ui.debug('reading authormap from %s\n' % path)
+        self._ui.debug('reading %s from %s\n' % (self.mapname() , path))
         f = open(path, 'r')
-        for number, line_org in enumerate(f):
+        syntax = ''
+        for number, line in enumerate(f):
 
-            line = line_org.split('#')[0]
-            if not line.strip():
+            if writing:
+                writing.write(line)
+
+            # strip out comments
+            if "#" in line:
+                # remove comments prefixed by an even number of escapes
+                line = self._commentre.sub(r'\1', line)
+                # fixup properly escaped comments that survived the above
+                line = line.replace("\\#", "#")
+            line = line.rstrip()
+            if not line:
                 continue
 
+            if line.startswith('syntax:'):
+                s = line[7:].strip()
+                syntax = ''
+                if s in self.syntaxes:
+                    syntax = s
+                continue
+            pat = syntax
+            for s in self.syntaxes:
+                if line.startswith(s + ':'):
+                    pat = s
+                    line = line[len(s) + 1:]
+                    break
+
+            # split on the first '='
             try:
                 src, dst = line.split('=', 1)
             except (IndexError, ValueError):
-                msg = 'ignoring line %i in author map %s: %s\n'
-                self.meta.ui.status(msg % (number, path, line.rstrip()))
+                msg = 'ignoring line %i in %s %s: %s\n'
+                self._ui.status(msg % (number, self.mapname(), path,
+                                           line.rstrip()))
                 continue
 
             src = src.strip()
             dst = dst.strip()
 
-            if self.meta.caseignoreauthors:
-                src = src.lower()
-
-            if writing:
-                if not src in self:
-                    self.meta.ui.debug('adding author %s to author map\n' % src)
-                elif dst != self[src]:
-                    msg = 'overriding author: "%s" to "%s" (%s)\n'
-                    self.meta.ui.status(msg % (self[src], dst, src))
-                writing.write(line_org)
-
+            if pat != 're':
+                src = re.escape(src)
+            if pat == 'glob':
+                src = src.replace('\\*', '.*')
+            src = re.compile(src)
+
+            if src not in self:
+                self._ui.debug('adding %s to %s\n' % (src, self.mapname()))
+            elif dst != self[src]:
+                msg = 'overriding %s: "%s" to "%s" (%s)\n'
+                self._ui.status(msg % (self.mapname(), self[src], dst, src))
             self[src] = dst
 
         f.close()
         if writing:
             writing.close()
 
+class AuthorMap(BaseMap):
+    '''A mapping from Subversion-style authors to Mercurial-style
+    authors, and back. The data is stored persistently on disk.
+
+    If the 'hgsubversion.defaultauthors' configuration option is set to false,
+    attempting to obtain an unknown author will fail with an Abort.
+
+    If the 'hgsubversion.caseignoreauthors' configuration option is set to true,
+    the userid from Subversion is always compared lowercase.
+    '''
+
+    def __init__(self, ui, filepath, defaulthost, caseignoreauthors,
+                 mapauthorscmd, defaultauthors):
+        '''Initialise a new AuthorMap.
+
+        The ui argument is used to print diagnostic messages.
+
+        The path argument is the location of the backing store,
+        typically .hg/svn/authors.
+        '''
+        if defaulthost:
+            self.defaulthost = '@%s' % defaulthost.lstrip('@')
+        else:
+            self.defaulthost = ''
+        self._caseignoreauthors = caseignoreauthors
+        self._mapauthorscmd = mapauthorscmd
+        self._defaulthost = defaulthost
+        self._defaultauthors = defaultauthors
+
+        super(AuthorMap, self).__init__(ui, filepath)
+
+    def _lowercase(self, key):
+        '''Determine whether or not to lowercase a str or regex using the
+        meta.caseignoreauthors.'''
+        k = key
+        if self._caseignoreauthors:
+            if isinstance(key, str):
+                k = key.lower()
+            else:
+                k = re.compile(key.pattern.lower())
+        return k
+
+    def __setitem__(self, key, value):
+        '''Similar to dict.__setitem__, except we check caseignoreauthors to
+        use lowercase string or not
+        '''
+        super(AuthorMap, self).__setitem__(self._lowercase(key), value)
+
+    def __contains__(self, key):
+        '''Similar to dict.__contains__, except we check caseignoreauthors to
+        use lowercase string or not
+        '''
+        return super(AuthorMap, self).__contains__(self._lowercase(key))
+
     def __getitem__(self, author):
         ''' Similar to dict.__getitem__, except in case of an unknown author.
         In such cases, a new value is generated and added to the dictionary
@@ -94,20 +238,34 @@ class AuthorMap(dict):
         if author is None:
             author = '(no author)'
 
+        if not isinstance(author, str):
+            return super(AuthorMap, self).__getitem__(author)
+
         search_author = author
-        if self.meta.caseignoreauthors:
+        if self._caseignoreauthors:
             search_author = author.lower()
 
+        result = None
         if search_author in self:
-            result = self.super.__getitem__(search_author)
-        elif self.meta.defaultauthors:
-            self[author] = result = '%s%s' % (author, self.defaulthost)
-            msg = 'substituting author "%s" for default "%s"\n'
-            self.meta.ui.debug(msg % (author, result))
-        else:
-            msg = 'author %s has no entry in the author map!'
-            raise hgutil.Abort(msg % author)
-        self.meta.ui.debug('mapping author "%s" to "%s"\n' % (author, result))
+            result = super(AuthorMap, self).__getitem__(search_author)
+        elif self._mapauthorscmd:
+            cmd = self._mapauthorscmd % author
+            process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
+            output, err = process.communicate()
+            retcode = process.poll()
+            if retcode:
+                msg = 'map author command "%s" exited with error'
+                raise hgutil.Abort(msg % cmd)
+            self[author] = result = output.strip()
+        if not result:
+            if self._defaultauthors:
+                self[author] = result = '%s%s' % (author, self.defaulthost)
+                msg = 'substituting author "%s" for default "%s"\n'
+                self._ui.debug(msg % (author, result))
+            else:
+                msg = 'author %s has no entry in the author map!'
+                raise hgutil.Abort(msg % author)
+        self._ui.debug('mapping author "%s" to "%s"\n' % (author, result))
         return result
 
     def reverselookup(self, author):
@@ -127,24 +285,22 @@ class Tags(dict):
     """
     VERSION = 2
 
-    def __init__(self, meta, endrev=None):
+    def __init__(self, ui, filepath, endrev=None):
         dict.__init__(self)
-        self.meta = meta
+        self._filepath = filepath
+        self._ui = ui
         self.endrev = endrev
-        if os.path.isfile(self.meta.tagfile):
+        if os.path.isfile(self._filepath):
             self._load()
         else:
             self._write()
 
     def _load(self):
-        f = open(self.meta.tagfile)
+        f = open(self._filepath)
         ver = int(f.readline())
         if ver < self.VERSION:
-            self.meta.ui.status('tag map outdated, running rebuildmeta...\n')
-            f.close()
-            os.unlink(self.meta.tagfile)
-            svncommands.rebuildmeta(self.meta.ui, self.meta.repo, ())
-            return
+            raise error.Abort(
+                'tag map outdated, please run `hg svn rebuildmeta`')
         elif ver != self.VERSION:
             raise hgutil.Abort('tagmap too new -- please upgrade')
         for l in f:
@@ -160,7 +316,7 @@ class Tags(dict):
 
     def _write(self):
         assert self.endrev is None
-        f = open(self.meta.tagfile, 'w')
+        f = open(self._filepath, 'w')
         f.write('%s\n' % self.VERSION)
         f.close()
 
@@ -181,7 +337,7 @@ class Tags(dict):
         if not tag:
             raise hgutil.Abort('tag cannot be empty')
         ha, revision = info
-        f = open(self.meta.tagfile, 'a')
+        f = open(self._filepath, 'a')
         f.write('%s %s %s\n' % (hex(ha), revision, tag))
         f.close()
         dict.__setitem__(self, tag, ha)
@@ -191,44 +347,97 @@ class RevMap(dict):
 
     VERSION = 1
 
-    def __init__(self, meta):
+    lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulled_file,
+                                   default=0, deserializer=int)
+
+    def __init__(self, revmap_path, lastpulled_path):
         dict.__init__(self)
-        self.meta = meta
+        self._filepath = revmap_path
+        self._lastpulled_file = lastpulled_path
         self._hashes = None
+        # disable iteration to have a consistent interface with SqliteRevMap
+        # it's less about performance since RevMap needs iteration internally
+        self._allowiter = False
 
-        if os.path.isfile(self.meta.revmap_file):
+        self.firstpulled = 0
+        if os.path.isfile(self._filepath):
             self._load()
         else:
             self._write()
 
     def hashes(self):
         if self._hashes is None:
-            self._hashes = dict((v, k) for (k, v) in self.iteritems())
+            self._hashes = dict((v, k) for (k, v) in self._origiteritems())
         return self._hashes
 
-    def branchedits(self, branch, rev):
-        check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum
-        return sorted(filter(check, self.iteritems()), reverse=True)
+    def branchedits(self, branch, revnum):
+        check = lambda x: x[0][1] == branch and x[0][0] < revnum
+        return sorted(filter(check, self._origiteritems()), reverse=True)
 
-    @classmethod
-    def readmapfile(cls, path, missingok=True):
+    def branchmaxrevnum(self, branch, maxrevnum):
+        result = 0
+        for num, br in self._origiterkeys():
+            if br == branch and num <= maxrevnum and num > result:
+                result = num
+        return result
+
+    @property
+    def lasthash(self):
+        lines = list(self._readmapfile())
+        if not lines:
+            return None
+        return bin(lines[-1].split(' ', 2)[1])
+
+    def revhashes(self, revnum):
+        for key, value in self._origiteritems():
+            if key[0] == revnum:
+                yield value
+
+    def clear(self):
+        self._write()
+        dict.clear(self)
+        self._hashes = None
+
+    def batchset(self, items, lastpulled):
+        '''Set items in batches
+
+        items is an array of (rev num, branch, binary hash)
+
+        For performance reason, internal in-memory state is not updated.
+        To get an up-to-date RevMap, reconstruct the object.
+        '''
+        with open(self._filepath, 'a') as f:
+            f.write(''.join('%s %s %s\n' % (revnum, hex(binhash), br or '')
+                            for revnum, br, binhash in items))
+        self.lastpulled = lastpulled
+
+    def _readmapfile(self):
+        path = self._filepath
         try:
             f = open(path)
         except IOError, err:
-            if not missingok or err.errno != errno.ENOENT:
+            if err.errno != errno.ENOENT:
                 raise
             return iter([])
         ver = int(f.readline())
-        if ver != cls.VERSION:
+        if ver == SqliteRevMap.VERSION:
+            revmap = SqliteRevMap(self._filepath, self._lastpulled_file)
+            tmppath = '%s.tmp' % self._filepath
+            revmap.exportrevmapv1(tmppath)
+            os.rename(tmppath, self._filepath)
+            hgutil.unlinkpath(revmap._dbpath)
+            hgutil.unlinkpath(revmap._rowcountpath, ignoremissing=True)
+            return self._readmapfile()
+        if ver != self.VERSION:
             raise hgutil.Abort('revmap too new -- please upgrade')
         return f
 
     @util.gcdisable
     def _load(self):
-        lastpulled = self.meta.lastpulled
-        firstpulled = self.meta.firstpulled
+        lastpulled = self.lastpulled
+        firstpulled = self.firstpulled
         setitem = dict.__setitem__
-        for l in self.readmapfile(self.meta.revmap_file):
+        for l in self._readmapfile():
             revnum, ha, branch = l.split(' ', 2)
             if branch == '\n':
                 branch = None
@@ -240,34 +449,349 @@ class RevMap(dict):
             if revnum < firstpulled or not firstpulled:
                 firstpulled = revnum
             setitem(self, (revnum, branch), bin(ha))
-        self.meta.lastpulled = lastpulled
-        self.meta.firstpulled = firstpulled
+        if self.lastpulled != lastpulled:
+            self.lastpulled = lastpulled
+        self.firstpulled = firstpulled
 
     def _write(self):
-        f = open(self.meta.revmap_file, 'w')
-        f.write('%s\n' % self.VERSION)
-        f.close()
+        with open(self._filepath, 'w') as f:
+            f.write('%s\n' % self.VERSION)
 
     def __setitem__(self, key, ha):
         revnum, branch = key
-        f = open(self.meta.revmap_file, 'a')
         b = branch or ''
-        f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n')
-        f.close()
-        if revnum > self.meta.lastpulled or not self.meta.lastpulled:
-            self.meta.lastpulled = revnum
-        if revnum < self.meta.firstpulled or not self.meta.firstpulled:
-            self.meta.firstpulled = revnum
+        with open(self._filepath, 'a') as f:
+            f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n')
+        if revnum > self.lastpulled or not self.lastpulled:
+            self.lastpulled = revnum
+        if revnum < self.firstpulled or not self.firstpulled:
+            self.firstpulled = revnum
         dict.__setitem__(self, (revnum, branch), ha)
         if self._hashes is not None:
             self._hashes[ha] = (revnum, branch)
 
+    @classmethod
+    def _wrapitermethods(cls):
+        def wrap(orig):
+            def wrapper(self, *args, **kwds):
+                if not self._allowiter:
+                    raise NotImplementedError(
+                        'Iteration methods on RevMap are disabled ' +
+                        'to avoid performance issues on SqliteRevMap')
+                return orig(self, *args, **kwds)
+            return wrapper
+        methodre = re.compile(r'^_*(?:iter|view)?(?:keys|items|values)?_*$')
+        for name in filter(methodre.match, dir(cls)):
+            orig = getattr(cls, name)
+            setattr(cls, '_orig%s' % name, orig)
+            setattr(cls, name, wrap(orig))
+
+RevMap._wrapitermethods()
+
+
+class SqliteRevMap(collections.MutableMapping):
+    """RevMap backed by sqlite3.
+
+    It tries to address performance issues for a very large rev map.
+    As such iteration is unavailable for both the map itself and the
+    reverse map (self.hashes).
+
+    It migrates from the old RevMap upon first use. Then it will bump the
+    version of revmap so RevMap no longer works. The real database is a
+    separated file which has a ".db" suffix.
+    """
+
+    VERSION = 2
+
+    TABLESCHEMA = [
+        '''CREATE TABLE IF NOT EXISTS revmap (
+               rev INTEGER NOT NULL,
+               branch TEXT NOT NULL DEFAULT '',
+               hash BLOB NOT NULL)''',
+    ]
+
+    INDEXSCHEMA = [
+        'CREATE UNIQUE INDEX IF NOT EXISTS revbranch ON revmap (rev,branch);',
+        'CREATE INDEX IF NOT EXISTS hash ON revmap (hash);',
+    ]
+
+    # "bytes" in Python 2 will get truncated at '\0' when storing as sqlite
+    # blobs. "buffer" does not have this issue. Python 3 does not have "buffer"
+    # but "bytes" won't get truncated.
+    sqlblobtype = bytes if sys.version_info >= (3, 0) else buffer
+
+    class ReverseRevMap(object):
+        # collections.Mapping is not suitable since we don't want 2/3 of
+        # its required interfaces: __iter__, __len__.
+        def __init__(self, revmap):
+            self.revmap = weakref.proxy(revmap)
+            self._cache = {}
+
+        def get(self, key, default=None):
+            if key not in self._cache:
+                result = None
+                for row in self.revmap._query(
+                    'SELECT rev, branch FROM revmap WHERE hash=?',
+                    (SqliteRevMap.sqlblobtype(key),)):
+                    result = (row[0], row[1] or None)
+                    break
+                self._cache[key] = result
+            return self._cache[key] or default
+
+        def __contains__(self, key):
+            return self.get(key) != None
+
+        def __getitem__(self, key):
+            dummy = self._cache
+            item = self.get(key, dummy)
+            if item == dummy:
+                raise KeyError(key)
+            else:
+                return item
+
+        def keys(self):
+            for row in self.revmap._query('SELECT hash FROM revmap'):
+                yield bytes(row[0])
+
+    lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulledpath,
+                                   default=0, deserializer=int)
+    rowcount = util.fileproperty('_rowcount', lambda x: x._rowcountpath,
+                                 default=0, deserializer=int)
+
+    def __init__(self, revmap_path, lastpulled_path, sqlitepragmas=None):
+        self._filepath = revmap_path
+        self._dbpath = revmap_path + '.db'
+        self._rowcountpath = self._dbpath + '.rowcount'
+        self._lastpulledpath = lastpulled_path
+
+        self._db = None
+        self._hashes = None
+        self._sqlitepragmas = sqlitepragmas
+        self.firstpulled = 0
+        self._updatefirstlastpulled()
+        # __iter__ is expensive and thus disabled by default
+        # it should only be enabled for testing
+        self._allowiter = False
+
+    def hashes(self):
+        if self._hashes is None:
+            self._hashes = self.ReverseRevMap(self)
+        return self._hashes
+
+    def branchedits(self, branch, revnum):
+        return [((r[0], r[1] or None), bytes(r[2])) for r in
+                self._query('SELECT rev, branch, hash FROM revmap ' +
+                                'WHERE rev < ? AND branch = ? ' +
+                                'ORDER BY rev DESC, branch DESC',
+                                (revnum, branch or ''))]
+
+    def branchmaxrevnum(self, branch, maxrev):
+        for row in self._query('SELECT rev FROM revmap ' +
+                               'WHERE rev <= ? AND branch = ? ' +
+                               'ORDER By rev DESC LIMIT 1',
+                               (maxrev, branch or '')):
+            return row[0]
+        return 0
+
+    @property
+    def lasthash(self):
+        for row in self._query('SELECT hash FROM revmap ORDER BY rev DESC'):
+            return bytes(row[0])
+        return None
+
+    def revhashes(self, revnum):
+        for row in self._query('SELECT hash FROM revmap WHERE rev = ?',
+                               (revnum,)):
+            yield bytes(row[0])
+
+    def clear(self):
+        hgutil.unlinkpath(self._filepath, ignoremissing=True)
+        hgutil.unlinkpath(self._dbpath, ignoremissing=True)
+        hgutil.unlinkpath(self._rowcountpath, ignoremissing=True)
+        self._db = None
+        self._hashes = None
+        self._firstpull = None
+        self._lastpull = None
+
+    def batchset(self, items, lastpulled):
+        with self._transaction():
+            self._insert(items)
+        self.lastpulled = lastpulled
+
+    def __getitem__(self, key):
+        for row in self._querybykey('SELECT hash', key):
+            return bytes(row[0])
+        raise KeyError(key)
+
+    def __iter__(self):
+        if not self._allowiter:
+            raise NotImplementedError(
+                'SqliteRevMap.__iter__ is not implemented intentionally ' +
+                'to avoid performance issues')
+        # collect result to avoid nested transaction issues
+        rows = []
+        for row in self._query('SELECT rev, branch FROM revmap'):
+            rows.append((row[0], row[1] or None))
+        return iter(rows)
+
+    def __len__(self):
+        # rowcount is faster than "SELECT COUNT(1)". the latter is not O(1)
+        return self.rowcount
+
+    def __setitem__(self, key, binha):
+        revnum, branch = key
+        with self._transaction():
+            self._insert([(revnum, branch, binha)])
+        if revnum < self.firstpulled or not self.firstpulled:
+            self.firstpulled = revnum
+        if revnum > self.lastpulled or not self.lastpulled:
+            self.lastpulled = revnum
+        if self._hashes is not None:
+            self._hashes._cache[binha] = key
+
+    def __delitem__(self, key):
+        for row in self._querybykey('DELETE', key):
+            if self.rowcount > 0:
+                self.rowcount -= 1
+            return
+        # For performance reason, self._hashes is not updated
+        raise KeyError(key)
+
+    @contextlib.contextmanager
+    def _transaction(self, mode='IMMEDIATE'):
+        if self._db is None:
+            self._opendb()
+        with self._db as db:
+            # wait indefinitely for database lock
+            while True:
+                try:
+                    db.execute('BEGIN %s' % mode)
+                    break
+                except sqlite3.OperationalError as ex:
+                    if str(ex) != 'database is locked':
+                        raise
+            yield db
+
+    def _query(self, sql, params=()):
+        with self._transaction() as db:
+            cur = db.execute(sql, params)
+            try:
+                for row in cur:
+                    yield row
+            finally:
+                cur.close()
+
+    def _querybykey(self, prefix, key):
+        revnum, branch = key
+        return self._query(
+            '%s FROM revmap WHERE rev=? AND branch=?'
+            % prefix, (revnum, branch or ''))
+
+    def _insert(self, rows):
+        # convert to a safe type so '\0' does not truncate the blob
+        if rows and type(rows[0][-1]) is not self.sqlblobtype:
+            rows = [(r, b, self.sqlblobtype(h)) for r, b, h in rows]
+        self._db.executemany(
+            'INSERT OR REPLACE INTO revmap (rev, branch, hash) ' +
+            'VALUES (?, ?, ?)', rows)
+        # If REPLACE happens, rowcount can be wrong. But it is only used to
+        # calculate how many revisions pulled, and during pull we don't
+        # replace rows. So it is fine.
+        self.rowcount += len(rows)
+
+    def _opendb(self):
+        '''Open the database and make sure the table is created on demand.'''
+        version = None
+        try:
+            version = int(open(self._filepath).read(2))
+        except (ValueError, IOError):
+            pass
+        if version and version not in [RevMap.VERSION, self.VERSION]:
+            raise error.Abort('revmap too new -- please upgrade')
+
+        if self._db:
+            self._db.close()
+
+        # if version mismatch, the database is considered invalid
+        if version != self.VERSION:
+            hgutil.unlinkpath(self._dbpath, ignoremissing=True)
+
+        self._db = sqlite3.connect(self._dbpath)
+        self._db.text_factory = bytes
+
+        # cache size affects random accessing (e.g. index building)
+        # performance greatly. default is 2MB (2000 KB), we want to have
+        # a big enough cache that can hold the entire map.
+        cachesize = 2000
+        for path, ratio in [(self._filepath, 1.7), (self._dbpath, 1)]:
+            if os.path.exists(path):
+                cachesize += os.stat(path).st_size * ratio // 1000
+        self._db.execute('PRAGMA cache_size=%d' % (-cachesize))
+
+        # PRAGMA statements provided by the user
+        for pragma in (self._sqlitepragmas or []):
+            # drop malicious ones
+            if re.match(r'\A\w+=\w+\Z', pragma):
+                self._db.execute('PRAGMA %s' % pragma)
+
+        # disable auto-commit. everything is inside a transaction
+        self._db.isolation_level = 'DEFERRED'
+
+        with self._transaction('EXCLUSIVE'):
+            map(self._db.execute, self.TABLESCHEMA)
+            if version == RevMap.VERSION:
+                self.rowcount = 0
+                self._importrevmapv1()
+            elif not self.rowcount:
+                self.rowcount = self._db.execute(
+                    'SELECT COUNT(1) FROM revmap').fetchone()[0]
+
+            # "bulk insert; then create index" is about 2.4x as fast as
+            # "create index; then bulk insert" on a large repo
+            map(self._db.execute, self.INDEXSCHEMA)
+
+        # write a dummy rev map file with just the revision number
+        if version != self.VERSION:
+            f = open(self._filepath, 'w')
+            f.write('%s\n' % self.VERSION)
+            f.close()
+
+    def _updatefirstlastpulled(self):
+        sql = 'SELECT rev FROM revmap ORDER BY rev %s LIMIT 1'
+        for row in self._query(sql % 'ASC'):
+            self.firstpulled = row[0]
+        for row in self._query(sql % 'DESC'):
+            if row[0] > self.lastpulled:
+                self.lastpulled = row[0]
+
+    @util.gcdisable
+    def _importrevmapv1(self):
+        with open(self._filepath, 'r') as f:
+            # 1st line is version
+            assert(int(f.readline())) == RevMap.VERSION
+            data = {}
+            for line in f:
+                revnum, ha, branch = line[:-1].split(' ', 2)
+                # ignore malicious lines
+                if len(ha) != 40:
+                    continue
+                data[revnum, branch or None] = bin(ha)
+            self._insert([(r, b, h) for (r, b), h in data.iteritems()])
+
+    @util.gcdisable
+    def exportrevmapv1(self, path):
+        with open(path, 'w') as f:
+            f.write('%s\n' % RevMap.VERSION)
+            for row in self._query('SELECT rev, branch, hash FROM revmap'):
+                rev, br, ha = row
+                f.write('%s %s %s\n' % (rev, hex(ha), br))
+
 
 class FileMap(object):
 
     VERSION = 1
 
-    def __init__(self, meta):
+    def __init__(self, ui, filepath):
         '''Initialise a new FileMap.
 
         The ui argument is used to print diagnostic messages.
@@ -275,16 +799,17 @@ class FileMap(object):
         The path argument is the location of the backing store,
         typically .hg/svn/filemap.
         '''
-        self.meta = meta
+        self._filename = filepath
+        self._ui = ui
         self.include = {}
         self.exclude = {}
-        if os.path.isfile(self.meta.filemap_file):
+        if os.path.isfile(self._filename):
             self._load()
         else:
             self._write()
 
         # append file mapping specified from the commandline
-        clmap = util.configpath(self.meta.ui, 'filemap')
+        clmap = util.configpath(self._ui, 'filemap')
         if clmap:
             self.load(clmap)
 
@@ -326,22 +851,20 @@ class FileMap(object):
         mapping = getattr(self, m)
         if path in mapping:
             msg = 'duplicate %s entry in %s: "%s"\n'
-            self.meta.ui.status(msg % (m, fn, path))
+            self._ui.status(msg % (m, fn, path))
             return
         bits = m.rstrip('e'), path
-        self.meta.ui.debug('%sing %s\n' % bits)
+        self._ui.debug('%sing %s\n' % bits)
         # respect rule order
         mapping[path] = len(self)
-        if fn != self.meta.filemap_file:
-            f = open(self.meta.filemap_file, 'a')
-            f.write(m + ' ' + path + '\n')
-            f.close()
+        if fn != self._filename:
+            with open(self._filename, 'a') as f:
+                f.write(m + ' ' + path + '\n')
 
     def load(self, fn):
-        self.meta.ui.debug('reading file map from %s\n' % fn)
-        f = open(fn, 'r')
-        self.load_fd(f, fn)
-        f.close()
+        self._ui.debug('reading file map from %s\n' % fn)
+        with open(fn, 'r') as f:
+            self.load_fd(f, fn)
 
     def load_fd(self, f, fn):
         for line in f:
@@ -354,26 +877,24 @@ class FileMap(object):
                 if cmd in ('include', 'exclude'):
                     self.add(fn, cmd, path)
                     continue
-                self.meta.ui.warn('unknown filemap command %s\n' % cmd)
+                self._ui.warn('unknown filemap command %s\n' % cmd)
             except IndexError:
                 msg = 'ignoring bad line in filemap %s: %s\n'
-                self.meta.ui.warn(msg % (fn, line.rstrip()))
+                self._ui.warn(msg % (fn, line.rstrip()))
 
     def _load(self):
-        self.meta.ui.debug('reading in-repo file map from %s\n' % self.meta.filemap_file)
-        f = open(self.meta.filemap_file)
-        ver = int(f.readline())
-        if ver != self.VERSION:
-            raise hgutil.Abort('filemap too new -- please upgrade')
-        self.load_fd(f, self.meta.filemap_file)
-        f.close()
+        self._ui.debug('reading in-repo file map from %s\n' % self._filename)
+        with open(self._filename) as f:
+            ver = int(f.readline())
+            if ver != self.VERSION:
+                raise hgutil.Abort('filemap too new -- please upgrade')
+            self.load_fd(f, self._filename)
 
     def _write(self):
-        f = open(self.meta.filemap_file, 'w')
-        f.write('%s\n' % self.VERSION)
-        f.close()
+        with open(self._filename, 'w') as f:
+            f.write('%s\n' % self.VERSION)
 
-class BranchMap(dict):
+class BranchMap(BaseMap):
     '''Facility for controlled renaming of branch names. Example:
 
     oldname = newname
@@ -383,63 +904,7 @@ class BranchMap(dict):
     changes on other will now be on default (have no branch name set).
     '''
 
-    def __init__(self, meta):
-        self.meta = meta
-        self.super = super(BranchMap, self)
-        self.super.__init__()
-        self.load(self.meta.branchmap_file)
-
-        # append branch mapping specified from the commandline
-        clmap = util.configpath(self.meta.ui, 'branchmap')
-        if clmap:
-            self.load(clmap)
-
-    def load(self, path):
-        '''Load mappings from a file at the specified path.'''
-        if not os.path.exists(path):
-            return
-
-        writing = False
-        if path != self.meta.branchmap_file:
-            writing = open(self.meta.branchmap_file, 'a')
-
-        self.meta.ui.debug('reading branchmap from %s\n' % path)
-        f = open(path, 'r')
-        for number, line in enumerate(f):
-
-            if writing:
-                writing.write(line)
-
-            line = line.split('#')[0]
-            if not line.strip():
-                continue
-
-            try:
-                src, dst = line.split('=', 1)
-            except (IndexError, ValueError):
-                msg = 'ignoring line %i in branch map %s: %s\n'
-                self.meta.ui.status(msg % (number, path, line.rstrip()))
-                continue
-
-            src = src.strip()
-            dst = dst.strip()
-            self.meta.ui.debug('adding branch %s to branch map\n' % src)
-
-            if not dst:
-                # prevent people from assuming such lines are valid
-                raise hgutil.Abort('removing branches is not supported, yet\n'
-                                   '(line %i in branch map %s)'
-                                   % (number, path))
-            elif src in self and dst != self[src]:
-                msg = 'overriding branch: "%s" to "%s" (%s)\n'
-                self.meta.ui.status(msg % (self[src], dst, src))
-            self[src] = dst
-
-        f.close()
-        if writing:
-            writing.close()
-
-class TagMap(dict):
+class TagMap(BaseMap):
     '''Facility for controlled renaming of tags. Example:
 
     oldname = newname
@@ -448,54 +913,3 @@ class TagMap(dict):
         The oldname tag from SVN will be represented as newname in the hg tags;
         the other tag will not be reflected in the hg repository.
     '''
-
-    def __init__(self, meta):
-        self.meta = meta
-        self.super = super(TagMap, self)
-        self.super.__init__()
-        self.load(self.meta.tagmap_file)
-
-        # append tag mapping specified from the commandline
-        clmap = util.configpath(self.meta.ui, 'tagmap')
-        if clmap:
-            self.load(clmap)
-
-    def load(self, path):
-        '''Load mappings from a file at the specified path.'''
-        if not os.path.exists(path):
-            return
-
-        writing = False
-        if path != self.meta.tagmap_file:
-            writing = open(self.meta.tagmap_file, 'a')
-
-        self.meta.ui.debug('reading tag renames from %s\n' % path)
-        f = open(path, 'r')
-        for number, line in enumerate(f):
-
-            if writing:
-                writing.write(line)
-
-            line = line.split('#')[0]
-            if not line.strip():
-                continue
-
-            try:
-                src, dst = line.split('=', 1)
-            except (IndexError, ValueError):
-                msg = 'ignoring line %i in tag renames %s: %s\n'
-                self.meta.ui.status(msg % (number, path, line.rstrip()))
-                continue
-
-            src = src.strip()
-            dst = dst.strip()
-            self.meta.ui.debug('adding tag %s to tag renames\n' % src)
-
-            if src in self and dst != self[src]:
-                msg = 'overriding tag rename: "%s" to "%s" (%s)\n'
-                self.meta.ui.status(msg % (self[src], dst, src))
-            self[src] = dst
-
-        f.close()
-        if writing:
-            writing.close()