summaryrefslogtreecommitdiff
path: root/hgsubversion/maps.py
diff options
context:
space:
mode:
Diffstat (limited to 'hgsubversion/maps.py')
-rw-r--r--hgsubversion/maps.py872
1 files changed, 643 insertions, 229 deletions
diff --git a/hgsubversion/maps.py b/hgsubversion/maps.py
index a3eb700..3fc6a5c 100644
--- a/hgsubversion/maps.py
+++ b/hgsubversion/maps.py
@@ -1,92 +1,236 @@
''' Module for self-contained maps. '''
+import collections
+import contextlib
import errno
import os
+import re
+import sqlite3
+import sys
+import weakref
+from mercurial import error
from mercurial import util as hgutil
from mercurial.node import bin, hex, nullid
-import svncommands
+import subprocess
import util
-class AuthorMap(dict):
- '''A mapping from Subversion-style authors to Mercurial-style
- authors, and back. The data is stored persistently on disk.
-
- If the 'hgsubversion.defaultauthors' configuration option is set to false,
- attempting to obtain an unknown author will fail with an Abort.
+class BaseMap(dict):
+ '''A base class for the different type of mappings: author, branch, and
+ tags.'''
+ def __init__(self, ui, filepath):
+ super(BaseMap, self).__init__()
+ self._ui = ui
- If the 'hgsubversion.caseignoreauthors' configuration option is set to true,
- the userid from Subversion is always compared lowercase.
- '''
+ self._commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
+ self.syntaxes = ('re', 'glob')
- def __init__(self, meta):
- '''Initialise a new AuthorMap.
+ self._filepath = filepath
+ self.load(filepath)
- The ui argument is used to print diagnostic messages.
+ # Append mappings specified from the commandline. A little
+ # magic here: our name in the config mapping is the same as
+ # the class name lowercased.
+ clmap = util.configpath(self._ui, self.mapname())
+ if clmap:
+ self.load(clmap)
- The path argument is the location of the backing store,
- typically .hg/svn/authors.
+ @classmethod
+ def mapname(cls):
+ return cls.__name__.lower()
+
+ def _findkey(self, key):
+ '''Takes a string and finds the first corresponding key that matches
+ via regex'''
+ if not key:
+ return None
+
+ # compile a new regex key if we're given a string; can't use
+ # hgutil.compilere since we need regex.sub
+ k = key
+ if isinstance(key, str):
+ k = re.compile(re.escape(key))
+
+ # preference goes to matching the exact pattern, i.e. 'foo' should
+ # first match 'foo' before trying regexes
+ for regex in self:
+ if regex.pattern == k.pattern:
+ return regex
+
+ # if key isn't a string, then we are done; nothing matches
+ if not isinstance(key, str):
+ return None
+
+ # now we test the regex; the above loop will be faster and is
+ # equivalent to not having regexes (i.e. just doing string compares)
+ for regex in self:
+ if regex.search(key):
+ return regex
+ return None
+
+ def get(self, key, default=None):
+ '''Similar to dict.get, except we use our own matcher, _findkey.'''
+ if self._findkey(key):
+ return self[key]
+ return default
+
+ def __getitem__(self, key):
+ '''Similar to dict.get, except we use our own matcher, _findkey. If the key is
+ a string, then we can use our regex matching to map its value.
'''
- self.meta = meta
- self.defaulthost = ''
- if meta.defaulthost:
- self.defaulthost = '@%s' % meta.defaulthost.lstrip('@')
+ k = self._findkey(key)
+ val = super(BaseMap, self).__getitem__(k)
- self.super = super(AuthorMap, self)
- self.super.__init__()
- self.load(self.meta.authors_file)
+ # if key is a string then we can transform it using our regex, else we
+ # don't have enough information, so we just return the val
+ if isinstance(key, str):
+ val = k.sub(val, key)
- # append authors specified from the commandline
- clmap = util.configpath(self.meta.ui, 'authormap')
- if clmap:
- self.load(clmap)
+ return val
- def load(self, path):
- ''' Load mappings from a file at the specified path. '''
+ def __setitem__(self, key, value):
+ '''Similar to dict.__setitem__, except we compile the string into a regex, if
+ need be.
+ '''
+ # try to find the regex already in the map
+ k = self._findkey(key)
+ # if we found one, then use it
+ if k:
+ key = k
+ # else make a new regex
+ if isinstance(key, str):
+ key = re.compile(re.escape(key))
+ super(BaseMap, self).__setitem__(key, value)
+
+ def __contains__(self, key):
+ '''Similar to dict.get, except we use our own matcher, _findkey.'''
+ return self._findkey(key) is not None
+ def load(self, path):
+ '''Load mappings from a file at the specified path.'''
path = os.path.expandvars(path)
if not os.path.exists(path):
return
writing = False
- if path != self.meta.authors_file:
- writing = open(self.meta.authors_file, 'a')
+ mapfile = self._filepath
+ if path != mapfile:
+ writing = open(mapfile, 'a')
- self.meta.ui.debug('reading authormap from %s\n' % path)
+ self._ui.debug('reading %s from %s\n' % (self.mapname() , path))
f = open(path, 'r')
- for number, line_org in enumerate(f):
+ syntax = ''
+ for number, line in enumerate(f):
- line = line_org.split('#')[0]
- if not line.strip():
+ if writing:
+ writing.write(line)
+
+ # strip out comments
+ if "#" in line:
+ # remove comments prefixed by an even number of escapes
+ line = self._commentre.sub(r'\1', line)
+ # fixup properly escaped comments that survived the above
+ line = line.replace("\\#", "#")
+ line = line.rstrip()
+ if not line:
continue
+ if line.startswith('syntax:'):
+ s = line[7:].strip()
+ syntax = ''
+ if s in self.syntaxes:
+ syntax = s
+ continue
+ pat = syntax
+ for s in self.syntaxes:
+ if line.startswith(s + ':'):
+ pat = s
+ line = line[len(s) + 1:]
+ break
+
+ # split on the first '='
try:
src, dst = line.split('=', 1)
except (IndexError, ValueError):
- msg = 'ignoring line %i in author map %s: %s\n'
- self.meta.ui.status(msg % (number, path, line.rstrip()))
+ msg = 'ignoring line %i in %s %s: %s\n'
+ self._ui.status(msg % (number, self.mapname(), path,
+ line.rstrip()))
continue
src = src.strip()
dst = dst.strip()
- if self.meta.caseignoreauthors:
- src = src.lower()
-
- if writing:
- if not src in self:
- self.meta.ui.debug('adding author %s to author map\n' % src)
- elif dst != self[src]:
- msg = 'overriding author: "%s" to "%s" (%s)\n'
- self.meta.ui.status(msg % (self[src], dst, src))
- writing.write(line_org)
-
+ if pat != 're':
+ src = re.escape(src)
+ if pat == 'glob':
+ src = src.replace('\\*', '.*')
+ src = re.compile(src)
+
+ if src not in self:
+ self._ui.debug('adding %s to %s\n' % (src, self.mapname()))
+ elif dst != self[src]:
+ msg = 'overriding %s: "%s" to "%s" (%s)\n'
+ self._ui.status(msg % (self.mapname(), self[src], dst, src))
self[src] = dst
f.close()
if writing:
writing.close()
+class AuthorMap(BaseMap):
+ '''A mapping from Subversion-style authors to Mercurial-style
+ authors, and back. The data is stored persistently on disk.
+
+ If the 'hgsubversion.defaultauthors' configuration option is set to false,
+ attempting to obtain an unknown author will fail with an Abort.
+
+ If the 'hgsubversion.caseignoreauthors' configuration option is set to true,
+ the userid from Subversion is always compared lowercase.
+ '''
+
+ def __init__(self, ui, filepath, defaulthost, caseignoreauthors,
+ mapauthorscmd, defaultauthors):
+ '''Initialise a new AuthorMap.
+
+ The ui argument is used to print diagnostic messages.
+
+ The path argument is the location of the backing store,
+ typically .hg/svn/authors.
+ '''
+ if defaulthost:
+ self.defaulthost = '@%s' % defaulthost.lstrip('@')
+ else:
+ self.defaulthost = ''
+ self._caseignoreauthors = caseignoreauthors
+ self._mapauthorscmd = mapauthorscmd
+ self._defaulthost = defaulthost
+ self._defaultauthors = defaultauthors
+
+ super(AuthorMap, self).__init__(ui, filepath)
+
+ def _lowercase(self, key):
+ '''Determine whether or not to lowercase a str or regex using the
+ meta.caseignoreauthors.'''
+ k = key
+ if self._caseignoreauthors:
+ if isinstance(key, str):
+ k = key.lower()
+ else:
+ k = re.compile(key.pattern.lower())
+ return k
+
+ def __setitem__(self, key, value):
+ '''Similar to dict.__setitem__, except we check caseignoreauthors to
+ use lowercase string or not
+ '''
+ super(AuthorMap, self).__setitem__(self._lowercase(key), value)
+
+ def __contains__(self, key):
+ '''Similar to dict.__contains__, except we check caseignoreauthors to
+ use lowercase string or not
+ '''
+ return super(AuthorMap, self).__contains__(self._lowercase(key))
+
def __getitem__(self, author):
''' Similar to dict.__getitem__, except in case of an unknown author.
In such cases, a new value is generated and added to the dictionary
@@ -94,20 +238,34 @@ class AuthorMap(dict):
if author is None:
author = '(no author)'
+ if not isinstance(author, str):
+ return super(AuthorMap, self).__getitem__(author)
+
search_author = author
- if self.meta.caseignoreauthors:
+ if self._caseignoreauthors:
search_author = author.lower()
+ result = None
if search_author in self:
- result = self.super.__getitem__(search_author)
- elif self.meta.defaultauthors:
- self[author] = result = '%s%s' % (author, self.defaulthost)
- msg = 'substituting author "%s" for default "%s"\n'
- self.meta.ui.debug(msg % (author, result))
- else:
- msg = 'author %s has no entry in the author map!'
- raise hgutil.Abort(msg % author)
- self.meta.ui.debug('mapping author "%s" to "%s"\n' % (author, result))
+ result = super(AuthorMap, self).__getitem__(search_author)
+ elif self._mapauthorscmd:
+ cmd = self._mapauthorscmd % author
+ process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
+ output, err = process.communicate()
+ retcode = process.poll()
+ if retcode:
+ msg = 'map author command "%s" exited with error'
+ raise hgutil.Abort(msg % cmd)
+ self[author] = result = output.strip()
+ if not result:
+ if self._defaultauthors:
+ self[author] = result = '%s%s' % (author, self.defaulthost)
+ msg = 'substituting author "%s" for default "%s"\n'
+ self._ui.debug(msg % (author, result))
+ else:
+ msg = 'author %s has no entry in the author map!'
+ raise hgutil.Abort(msg % author)
+ self._ui.debug('mapping author "%s" to "%s"\n' % (author, result))
return result
def reverselookup(self, author):
@@ -127,24 +285,22 @@ class Tags(dict):
"""
VERSION = 2
- def __init__(self, meta, endrev=None):
+ def __init__(self, ui, filepath, endrev=None):
dict.__init__(self)
- self.meta = meta
+ self._filepath = filepath
+ self._ui = ui
self.endrev = endrev
- if os.path.isfile(self.meta.tagfile):
+ if os.path.isfile(self._filepath):
self._load()
else:
self._write()
def _load(self):
- f = open(self.meta.tagfile)
+ f = open(self._filepath)
ver = int(f.readline())
if ver < self.VERSION:
- self.meta.ui.status('tag map outdated, running rebuildmeta...\n')
- f.close()
- os.unlink(self.meta.tagfile)
- svncommands.rebuildmeta(self.meta.ui, self.meta.repo, ())
- return
+ raise error.Abort(
+ 'tag map outdated, please run `hg svn rebuildmeta`')
elif ver != self.VERSION:
raise hgutil.Abort('tagmap too new -- please upgrade')
for l in f:
@@ -160,7 +316,7 @@ class Tags(dict):
def _write(self):
assert self.endrev is None
- f = open(self.meta.tagfile, 'w')
+ f = open(self._filepath, 'w')
f.write('%s\n' % self.VERSION)
f.close()
@@ -181,7 +337,7 @@ class Tags(dict):
if not tag:
raise hgutil.Abort('tag cannot be empty')
ha, revision = info
- f = open(self.meta.tagfile, 'a')
+ f = open(self._filepath, 'a')
f.write('%s %s %s\n' % (hex(ha), revision, tag))
f.close()
dict.__setitem__(self, tag, ha)
@@ -191,44 +347,97 @@ class RevMap(dict):
VERSION = 1
- def __init__(self, meta):
+ lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulled_file,
+ default=0, deserializer=int)
+
+ def __init__(self, revmap_path, lastpulled_path):
dict.__init__(self)
- self.meta = meta
+ self._filepath = revmap_path
+ self._lastpulled_file = lastpulled_path
self._hashes = None
+ # disable iteration to have a consistent interface with SqliteRevMap
+ # it's less about performance since RevMap needs iteration internally
+ self._allowiter = False
- if os.path.isfile(self.meta.revmap_file):
+ self.firstpulled = 0
+ if os.path.isfile(self._filepath):
self._load()
else:
self._write()
def hashes(self):
if self._hashes is None:
- self._hashes = dict((v, k) for (k, v) in self.iteritems())
+ self._hashes = dict((v, k) for (k, v) in self._origiteritems())
return self._hashes
- def branchedits(self, branch, rev):
- check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum
- return sorted(filter(check, self.iteritems()), reverse=True)
+ def branchedits(self, branch, revnum):
+ check = lambda x: x[0][1] == branch and x[0][0] < revnum
+ return sorted(filter(check, self._origiteritems()), reverse=True)
- @classmethod
- def readmapfile(cls, path, missingok=True):
+ def branchmaxrevnum(self, branch, maxrevnum):
+ result = 0
+ for num, br in self._origiterkeys():
+ if br == branch and num <= maxrevnum and num > result:
+ result = num
+ return result
+
+ @property
+ def lasthash(self):
+ lines = list(self._readmapfile())
+ if not lines:
+ return None
+ return bin(lines[-1].split(' ', 2)[1])
+
+ def revhashes(self, revnum):
+ for key, value in self._origiteritems():
+ if key[0] == revnum:
+ yield value
+
+ def clear(self):
+ self._write()
+ dict.clear(self)
+ self._hashes = None
+
+ def batchset(self, items, lastpulled):
+ '''Set items in batches
+
+ items is an array of (rev num, branch, binary hash)
+
+ For performance reason, internal in-memory state is not updated.
+ To get an up-to-date RevMap, reconstruct the object.
+ '''
+ with open(self._filepath, 'a') as f:
+ f.write(''.join('%s %s %s\n' % (revnum, hex(binhash), br or '')
+ for revnum, br, binhash in items))
+ self.lastpulled = lastpulled
+
+ def _readmapfile(self):
+ path = self._filepath
try:
f = open(path)
except IOError, err:
- if not missingok or err.errno != errno.ENOENT:
+ if err.errno != errno.ENOENT:
raise
return iter([])
ver = int(f.readline())
- if ver != cls.VERSION:
+ if ver == SqliteRevMap.VERSION:
+ revmap = SqliteRevMap(self._filepath, self._lastpulled_file)
+ tmppath = '%s.tmp' % self._filepath
+ revmap.exportrevmapv1(tmppath)
+ os.rename(tmppath, self._filepath)
+ hgutil.unlinkpath(revmap._dbpath)
+ hgutil.unlinkpath(revmap._rowcountpath, ignoremissing=True)
+ return self._readmapfile()
+ if ver != self.VERSION:
raise hgutil.Abort('revmap too new -- please upgrade')
return f
@util.gcdisable
def _load(self):
- lastpulled = self.meta.lastpulled
- firstpulled = self.meta.firstpulled
+ lastpulled = self.lastpulled
+ firstpulled = self.firstpulled
setitem = dict.__setitem__
- for l in self.readmapfile(self.meta.revmap_file):
+ for l in self._readmapfile():
revnum, ha, branch = l.split(' ', 2)
if branch == '\n':
branch = None
@@ -240,34 +449,349 @@ class RevMap(dict):
if revnum < firstpulled or not firstpulled:
firstpulled = revnum
setitem(self, (revnum, branch), bin(ha))
- self.meta.lastpulled = lastpulled
- self.meta.firstpulled = firstpulled
+ if self.lastpulled != lastpulled:
+ self.lastpulled = lastpulled
+ self.firstpulled = firstpulled
def _write(self):
- f = open(self.meta.revmap_file, 'w')
- f.write('%s\n' % self.VERSION)
- f.close()
+ with open(self._filepath, 'w') as f:
+ f.write('%s\n' % self.VERSION)
def __setitem__(self, key, ha):
revnum, branch = key
- f = open(self.meta.revmap_file, 'a')
b = branch or ''
- f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n')
- f.close()
- if revnum > self.meta.lastpulled or not self.meta.lastpulled:
- self.meta.lastpulled = revnum
- if revnum < self.meta.firstpulled or not self.meta.firstpulled:
- self.meta.firstpulled = revnum
+ with open(self._filepath, 'a') as f:
+ f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n')
+ if revnum > self.lastpulled or not self.lastpulled:
+ self.lastpulled = revnum
+ if revnum < self.firstpulled or not self.firstpulled:
+ self.firstpulled = revnum
dict.__setitem__(self, (revnum, branch), ha)
if self._hashes is not None:
self._hashes[ha] = (revnum, branch)
+ @classmethod
+ def _wrapitermethods(cls):
+ def wrap(orig):
+ def wrapper(self, *args, **kwds):
+ if not self._allowiter:
+ raise NotImplementedError(
+ 'Iteration methods on RevMap are disabled ' +
+ 'to avoid performance issues on SqliteRevMap')
+ return orig(self, *args, **kwds)
+ return wrapper
+ methodre = re.compile(r'^_*(?:iter|view)?(?:keys|items|values)?_*$')
+ for name in filter(methodre.match, dir(cls)):
+ orig = getattr(cls, name)
+ setattr(cls, '_orig%s' % name, orig)
+ setattr(cls, name, wrap(orig))
+
+RevMap._wrapitermethods()
+
+
+class SqliteRevMap(collections.MutableMapping):
+ """RevMap backed by sqlite3.
+
+ It tries to address performance issues for a very large rev map.
+ As such iteration is unavailable for both the map itself and the
+ reverse map (self.hashes).
+
+ It migrates from the old RevMap upon first use. Then it will bump the
+ version of revmap so RevMap no longer works. The real database is a
+ separated file which has a ".db" suffix.
+ """
+
+ VERSION = 2
+
+ TABLESCHEMA = [
+ '''CREATE TABLE IF NOT EXISTS revmap (
+ rev INTEGER NOT NULL,
+ branch TEXT NOT NULL DEFAULT '',
+ hash BLOB NOT NULL)''',
+ ]
+
+ INDEXSCHEMA = [
+ 'CREATE UNIQUE INDEX IF NOT EXISTS revbranch ON revmap (rev,branch);',
+ 'CREATE INDEX IF NOT EXISTS hash ON revmap (hash);',
+ ]
+
+ # "bytes" in Python 2 will get truncated at '\0' when storing as sqlite
+ # blobs. "buffer" does not have this issue. Python 3 does not have "buffer"
+ # but "bytes" won't get truncated.
+ sqlblobtype = bytes if sys.version_info >= (3, 0) else buffer
+
+ class ReverseRevMap(object):
+ # collections.Mapping is not suitable since we don't want 2/3 of
+ # its required interfaces: __iter__, __len__.
+ def __init__(self, revmap):
+ self.revmap = weakref.proxy(revmap)
+ self._cache = {}
+
+ def get(self, key, default=None):
+ if key not in self._cache:
+ result = None
+ for row in self.revmap._query(
+ 'SELECT rev, branch FROM revmap WHERE hash=?',
+ (SqliteRevMap.sqlblobtype(key),)):
+ result = (row[0], row[1] or None)
+ break
+ self._cache[key] = result
+ return self._cache[key] or default
+
+ def __contains__(self, key):
+ return self.get(key) != None
+
+ def __getitem__(self, key):
+ dummy = self._cache
+ item = self.get(key, dummy)
+ if item == dummy:
+ raise KeyError(key)
+ else:
+ return item
+
+ def keys(self):
+ for row in self.revmap._query('SELECT hash FROM revmap'):
+ yield bytes(row[0])
+
+ lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulledpath,
+ default=0, deserializer=int)
+ rowcount = util.fileproperty('_rowcount', lambda x: x._rowcountpath,
+ default=0, deserializer=int)
+
+ def __init__(self, revmap_path, lastpulled_path, sqlitepragmas=None):
+ self._filepath = revmap_path
+ self._dbpath = revmap_path + '.db'
+ self._rowcountpath = self._dbpath + '.rowcount'
+ self._lastpulledpath = lastpulled_path
+
+ self._db = None
+ self._hashes = None
+ self._sqlitepragmas = sqlitepragmas
+ self.firstpulled = 0
+ self._updatefirstlastpulled()
+ # __iter__ is expensive and thus disabled by default
+ # it should only be enabled for testing
+ self._allowiter = False
+
+ def hashes(self):
+ if self._hashes is None:
+ self._hashes = self.ReverseRevMap(self)
+ return self._hashes
+
+ def branchedits(self, branch, revnum):
+ return [((r[0], r[1] or None), bytes(r[2])) for r in
+ self._query('SELECT rev, branch, hash FROM revmap ' +
+ 'WHERE rev < ? AND branch = ? ' +
+ 'ORDER BY rev DESC, branch DESC',
+ (revnum, branch or ''))]
+
+ def branchmaxrevnum(self, branch, maxrev):
+ for row in self._query('SELECT rev FROM revmap ' +
+ 'WHERE rev <= ? AND branch = ? ' +
+ 'ORDER By rev DESC LIMIT 1',
+ (maxrev, branch or '')):
+ return row[0]
+ return 0
+
+ @property
+ def lasthash(self):
+ for row in self._query('SELECT hash FROM revmap ORDER BY rev DESC'):
+ return bytes(row[0])
+ return None
+
+ def revhashes(self, revnum):
+ for row in self._query('SELECT hash FROM revmap WHERE rev = ?',
+ (revnum,)):
+ yield bytes(row[0])
+
+ def clear(self):
+ hgutil.unlinkpath(self._filepath, ignoremissing=True)
+ hgutil.unlinkpath(self._dbpath, ignoremissing=True)
+ hgutil.unlinkpath(self._rowcountpath, ignoremissing=True)
+ self._db = None
+ self._hashes = None
+ self._firstpull = None
+ self._lastpull = None
+
+ def batchset(self, items, lastpulled):
+ with self._transaction():
+ self._insert(items)
+ self.lastpulled = lastpulled
+
+ def __getitem__(self, key):
+ for row in self._querybykey('SELECT hash', key):
+ return bytes(row[0])
+ raise KeyError(key)
+
+ def __iter__(self):
+ if not self._allowiter:
+ raise NotImplementedError(
+ 'SqliteRevMap.__iter__ is not implemented intentionally ' +
+ 'to avoid performance issues')
+ # collect result to avoid nested transaction issues
+ rows = []
+ for row in self._query('SELECT rev, branch FROM revmap'):
+ rows.append((row[0], row[1] or None))
+ return iter(rows)
+
+ def __len__(self):
+ # rowcount is faster than "SELECT COUNT(1)". the latter is not O(1)
+ return self.rowcount
+
+ def __setitem__(self, key, binha):
+ revnum, branch = key
+ with self._transaction():
+ self._insert([(revnum, branch, binha)])
+ if revnum < self.firstpulled or not self.firstpulled:
+ self.firstpulled = revnum
+ if revnum > self.lastpulled or not self.lastpulled:
+ self.lastpulled = revnum
+ if self._hashes is not None:
+ self._hashes._cache[binha] = key
+
+ def __delitem__(self, key):
+ for row in self._querybykey('DELETE', key):
+ if self.rowcount > 0:
+ self.rowcount -= 1
+ return
+ # For performance reason, self._hashes is not updated
+ raise KeyError(key)
+
+ @contextlib.contextmanager
+ def _transaction(self, mode='IMMEDIATE'):
+ if self._db is None:
+ self._opendb()
+ with self._db as db:
+ # wait indefinitely for database lock
+ while True:
+ try:
+ db.execute('BEGIN %s' % mode)
+ break
+ except sqlite3.OperationalError as ex:
+ if str(ex) != 'database is locked':
+ raise
+ yield db
+
+ def _query(self, sql, params=()):
+ with self._transaction() as db:
+ cur = db.execute(sql, params)
+ try:
+ for row in cur:
+ yield row
+ finally:
+ cur.close()
+
+ def _querybykey(self, prefix, key):
+ revnum, branch = key
+ return self._query(
+ '%s FROM revmap WHERE rev=? AND branch=?'
+ % prefix, (revnum, branch or ''))
+
+ def _insert(self, rows):
+ # convert to a safe type so '\0' does not truncate the blob
+ if rows and type(rows[0][-1]) is not self.sqlblobtype:
+ rows = [(r, b, self.sqlblobtype(h)) for r, b, h in rows]
+ self._db.executemany(
+ 'INSERT OR REPLACE INTO revmap (rev, branch, hash) ' +
+ 'VALUES (?, ?, ?)', rows)
+ # If REPLACE happens, rowcount can be wrong. But it is only used to
+ # calculate how many revisions pulled, and during pull we don't
+ # replace rows. So it is fine.
+ self.rowcount += len(rows)
+
+ def _opendb(self):
+ '''Open the database and make sure the table is created on demand.'''
+ version = None
+ try:
+ version = int(open(self._filepath).read(2))
+ except (ValueError, IOError):
+ pass
+ if version and version not in [RevMap.VERSION, self.VERSION]:
+ raise error.Abort('revmap too new -- please upgrade')
+
+ if self._db:
+ self._db.close()
+
+ # if version mismatch, the database is considered invalid
+ if version != self.VERSION:
+ hgutil.unlinkpath(self._dbpath, ignoremissing=True)
+
+ self._db = sqlite3.connect(self._dbpath)
+ self._db.text_factory = bytes
+
+ # cache size affects random accessing (e.g. index building)
+ # performance greatly. default is 2MB (2000 KB), we want to have
+ # a big enough cache that can hold the entire map.
+ cachesize = 2000
+ for path, ratio in [(self._filepath, 1.7), (self._dbpath, 1)]:
+ if os.path.exists(path):
+ cachesize += os.stat(path).st_size * ratio // 1000
+ self._db.execute('PRAGMA cache_size=%d' % (-cachesize))
+
+ # PRAGMA statements provided by the user
+ for pragma in (self._sqlitepragmas or []):
+ # drop malicious ones
+ if re.match(r'\A\w+=\w+\Z', pragma):
+ self._db.execute('PRAGMA %s' % pragma)
+
+ # disable auto-commit. everything is inside a transaction
+ self._db.isolation_level = 'DEFERRED'
+
+ with self._transaction('EXCLUSIVE'):
+ map(self._db.execute, self.TABLESCHEMA)
+ if version == RevMap.VERSION:
+ self.rowcount = 0
+ self._importrevmapv1()
+ elif not self.rowcount:
+ self.rowcount = self._db.execute(
+ 'SELECT COUNT(1) FROM revmap').fetchone()[0]
+
+ # "bulk insert; then create index" is about 2.4x as fast as
+ # "create index; then bulk insert" on a large repo
+ map(self._db.execute, self.INDEXSCHEMA)
+
+ # write a dummy rev map file with just the revision number
+ if version != self.VERSION:
+ f = open(self._filepath, 'w')
+ f.write('%s\n' % self.VERSION)
+ f.close()
+
+ def _updatefirstlastpulled(self):
+ sql = 'SELECT rev FROM revmap ORDER BY rev %s LIMIT 1'
+ for row in self._query(sql % 'ASC'):
+ self.firstpulled = row[0]
+ for row in self._query(sql % 'DESC'):
+ if row[0] > self.lastpulled:
+ self.lastpulled = row[0]
+
+ @util.gcdisable
+ def _importrevmapv1(self):
+ with open(self._filepath, 'r') as f:
+ # 1st line is version
+ assert(int(f.readline())) == RevMap.VERSION
+ data = {}
+ for line in f:
+ revnum, ha, branch = line[:-1].split(' ', 2)
+ # ignore malicious lines
+ if len(ha) != 40:
+ continue
+ data[revnum, branch or None] = bin(ha)
+ self._insert([(r, b, h) for (r, b), h in data.iteritems()])
+
+ @util.gcdisable
+ def exportrevmapv1(self, path):
+ with open(path, 'w') as f:
+ f.write('%s\n' % RevMap.VERSION)
+ for row in self._query('SELECT rev, branch, hash FROM revmap'):
+ rev, br, ha = row
+ f.write('%s %s %s\n' % (rev, hex(ha), br))
+
class FileMap(object):
VERSION = 1
- def __init__(self, meta):
+ def __init__(self, ui, filepath):
'''Initialise a new FileMap.
The ui argument is used to print diagnostic messages.
@@ -275,16 +799,17 @@ class FileMap(object):
The path argument is the location of the backing store,
typically .hg/svn/filemap.
'''
- self.meta = meta
+ self._filename = filepath
+ self._ui = ui
self.include = {}
self.exclude = {}
- if os.path.isfile(self.meta.filemap_file):
+ if os.path.isfile(self._filename):
self._load()
else:
self._write()
# append file mapping specified from the commandline
- clmap = util.configpath(self.meta.ui, 'filemap')
+ clmap = util.configpath(self._ui, 'filemap')
if clmap:
self.load(clmap)
@@ -326,22 +851,20 @@ class FileMap(object):
mapping = getattr(self, m)
if path in mapping:
msg = 'duplicate %s entry in %s: "%s"\n'
- self.meta.ui.status(msg % (m, fn, path))
+ self._ui.status(msg % (m, fn, path))
return
bits = m.rstrip('e'), path
- self.meta.ui.debug('%sing %s\n' % bits)
+ self._ui.debug('%sing %s\n' % bits)
# respect rule order
mapping[path] = len(self)
- if fn != self.meta.filemap_file:
- f = open(self.meta.filemap_file, 'a')
- f.write(m + ' ' + path + '\n')
- f.close()
+ if fn != self._filename:
+ with open(self._filename, 'a') as f:
+ f.write(m + ' ' + path + '\n')
def load(self, fn):
- self.meta.ui.debug('reading file map from %s\n' % fn)
- f = open(fn, 'r')
- self.load_fd(f, fn)
- f.close()
+ self._ui.debug('reading file map from %s\n' % fn)
+ with open(fn, 'r') as f:
+ self.load_fd(f, fn)
def load_fd(self, f, fn):
for line in f:
@@ -354,26 +877,24 @@ class FileMap(object):
if cmd in ('include', 'exclude'):
self.add(fn, cmd, path)
continue
- self.meta.ui.warn('unknown filemap command %s\n' % cmd)
+ self._ui.warn('unknown filemap command %s\n' % cmd)
except IndexError:
msg = 'ignoring bad line in filemap %s: %s\n'
- self.meta.ui.warn(msg % (fn, line.rstrip()))
+ self._ui.warn(msg % (fn, line.rstrip()))
def _load(self):
- self.meta.ui.debug('reading in-repo file map from %s\n' % self.meta.filemap_file)
- f = open(self.meta.filemap_file)
- ver = int(f.readline())
- if ver != self.VERSION:
- raise hgutil.Abort('filemap too new -- please upgrade')
- self.load_fd(f, self.meta.filemap_file)
- f.close()
+ self._ui.debug('reading in-repo file map from %s\n' % self._filename)
+ with open(self._filename) as f:
+ ver = int(f.readline())
+ if ver != self.VERSION:
+ raise hgutil.Abort('filemap too new -- please upgrade')
+ self.load_fd(f, self._filename)
def _write(self):
- f = open(self.meta.filemap_file, 'w')
- f.write('%s\n' % self.VERSION)
- f.close()
+ with open(self._filename, 'w') as f:
+ f.write('%s\n' % self.VERSION)
-class BranchMap(dict):
+class BranchMap(BaseMap):
'''Facility for controlled renaming of branch names. Example:
oldname = newname
@@ -383,63 +904,7 @@ class BranchMap(dict):
changes on other will now be on default (have no branch name set).
'''
- def __init__(self, meta):
- self.meta = meta
- self.super = super(BranchMap, self)
- self.super.__init__()
- self.load(self.meta.branchmap_file)
-
- # append branch mapping specified from the commandline
- clmap = util.configpath(self.meta.ui, 'branchmap')
- if clmap:
- self.load(clmap)
-
- def load(self, path):
- '''Load mappings from a file at the specified path.'''
- if not os.path.exists(path):
- return
-
- writing = False
- if path != self.meta.branchmap_file:
- writing = open(self.meta.branchmap_file, 'a')
-
- self.meta.ui.debug('reading branchmap from %s\n' % path)
- f = open(path, 'r')
- for number, line in enumerate(f):
-
- if writing:
- writing.write(line)
-
- line = line.split('#')[0]
- if not line.strip():
- continue
-
- try:
- src, dst = line.split('=', 1)
- except (IndexError, ValueError):
- msg = 'ignoring line %i in branch map %s: %s\n'
- self.meta.ui.status(msg % (number, path, line.rstrip()))
- continue
-
- src = src.strip()
- dst = dst.strip()
- self.meta.ui.debug('adding branch %s to branch map\n' % src)
-
- if not dst:
- # prevent people from assuming such lines are valid
- raise hgutil.Abort('removing branches is not supported, yet\n'
- '(line %i in branch map %s)'
- % (number, path))
- elif src in self and dst != self[src]:
- msg = 'overriding branch: "%s" to "%s" (%s)\n'
- self.meta.ui.status(msg % (self[src], dst, src))
- self[src] = dst
-
- f.close()
- if writing:
- writing.close()
-
-class TagMap(dict):
+class TagMap(BaseMap):
'''Facility for controlled renaming of tags. Example:
oldname = newname
@@ -448,54 +913,3 @@ class TagMap(dict):
The oldname tag from SVN will be represented as newname in the hg tags;
the other tag will not be reflected in the hg repository.
'''
-
- def __init__(self, meta):
- self.meta = meta
- self.super = super(TagMap, self)
- self.super.__init__()
- self.load(self.meta.tagmap_file)
-
- # append tag mapping specified from the commandline
- clmap = util.configpath(self.meta.ui, 'tagmap')
- if clmap:
- self.load(clmap)
-
- def load(self, path):
- '''Load mappings from a file at the specified path.'''
- if not os.path.exists(path):
- return
-
- writing = False
- if path != self.meta.tagmap_file:
- writing = open(self.meta.tagmap_file, 'a')
-
- self.meta.ui.debug('reading tag renames from %s\n' % path)
- f = open(path, 'r')
- for number, line in enumerate(f):
-
- if writing:
- writing.write(line)
-
- line = line.split('#')[0]
- if not line.strip():
- continue
-
- try:
- src, dst = line.split('=', 1)
- except (IndexError, ValueError):
- msg = 'ignoring line %i in tag renames %s: %s\n'
- self.meta.ui.status(msg % (number, path, line.rstrip()))
- continue
-
- src = src.strip()
- dst = dst.strip()
- self.meta.ui.debug('adding tag %s to tag renames\n' % src)
-
- if src in self and dst != self[src]:
- msg = 'overriding tag rename: "%s" to "%s" (%s)\n'
- self.meta.ui.status(msg % (self[src], dst, src))
- self[src] = dst
-
- f.close()
- if writing:
- writing.close()