diff options
Diffstat (limited to 'hgsubversion/maps.py')
-rw-r--r-- | hgsubversion/maps.py | 872 |
1 files changed, 643 insertions, 229 deletions
diff --git a/hgsubversion/maps.py b/hgsubversion/maps.py index a3eb700..3fc6a5c 100644 --- a/hgsubversion/maps.py +++ b/hgsubversion/maps.py @@ -1,92 +1,236 @@ ''' Module for self-contained maps. ''' +import collections +import contextlib import errno import os +import re +import sqlite3 +import sys +import weakref +from mercurial import error from mercurial import util as hgutil from mercurial.node import bin, hex, nullid -import svncommands +import subprocess import util -class AuthorMap(dict): - '''A mapping from Subversion-style authors to Mercurial-style - authors, and back. The data is stored persistently on disk. - - If the 'hgsubversion.defaultauthors' configuration option is set to false, - attempting to obtain an unknown author will fail with an Abort. +class BaseMap(dict): + '''A base class for the different type of mappings: author, branch, and + tags.''' + def __init__(self, ui, filepath): + super(BaseMap, self).__init__() + self._ui = ui - If the 'hgsubversion.caseignoreauthors' configuration option is set to true, - the userid from Subversion is always compared lowercase. - ''' + self._commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*') + self.syntaxes = ('re', 'glob') - def __init__(self, meta): - '''Initialise a new AuthorMap. + self._filepath = filepath + self.load(filepath) - The ui argument is used to print diagnostic messages. + # Append mappings specified from the commandline. A little + # magic here: our name in the config mapping is the same as + # the class name lowercased. + clmap = util.configpath(self._ui, self.mapname()) + if clmap: + self.load(clmap) - The path argument is the location of the backing store, - typically .hg/svn/authors. + @classmethod + def mapname(cls): + return cls.__name__.lower() + + def _findkey(self, key): + '''Takes a string and finds the first corresponding key that matches + via regex''' + if not key: + return None + + # compile a new regex key if we're given a string; can't use + # hgutil.compilere since we need regex.sub + k = key + if isinstance(key, str): + k = re.compile(re.escape(key)) + + # preference goes to matching the exact pattern, i.e. 'foo' should + # first match 'foo' before trying regexes + for regex in self: + if regex.pattern == k.pattern: + return regex + + # if key isn't a string, then we are done; nothing matches + if not isinstance(key, str): + return None + + # now we test the regex; the above loop will be faster and is + # equivalent to not having regexes (i.e. just doing string compares) + for regex in self: + if regex.search(key): + return regex + return None + + def get(self, key, default=None): + '''Similar to dict.get, except we use our own matcher, _findkey.''' + if self._findkey(key): + return self[key] + return default + + def __getitem__(self, key): + '''Similar to dict.get, except we use our own matcher, _findkey. If the key is + a string, then we can use our regex matching to map its value. ''' - self.meta = meta - self.defaulthost = '' - if meta.defaulthost: - self.defaulthost = '@%s' % meta.defaulthost.lstrip('@') + k = self._findkey(key) + val = super(BaseMap, self).__getitem__(k) - self.super = super(AuthorMap, self) - self.super.__init__() - self.load(self.meta.authors_file) + # if key is a string then we can transform it using our regex, else we + # don't have enough information, so we just return the val + if isinstance(key, str): + val = k.sub(val, key) - # append authors specified from the commandline - clmap = util.configpath(self.meta.ui, 'authormap') - if clmap: - self.load(clmap) + return val - def load(self, path): - ''' Load mappings from a file at the specified path. ''' + def __setitem__(self, key, value): + '''Similar to dict.__setitem__, except we compile the string into a regex, if + need be. + ''' + # try to find the regex already in the map + k = self._findkey(key) + # if we found one, then use it + if k: + key = k + # else make a new regex + if isinstance(key, str): + key = re.compile(re.escape(key)) + super(BaseMap, self).__setitem__(key, value) + + def __contains__(self, key): + '''Similar to dict.get, except we use our own matcher, _findkey.''' + return self._findkey(key) is not None + def load(self, path): + '''Load mappings from a file at the specified path.''' path = os.path.expandvars(path) if not os.path.exists(path): return writing = False - if path != self.meta.authors_file: - writing = open(self.meta.authors_file, 'a') + mapfile = self._filepath + if path != mapfile: + writing = open(mapfile, 'a') - self.meta.ui.debug('reading authormap from %s\n' % path) + self._ui.debug('reading %s from %s\n' % (self.mapname() , path)) f = open(path, 'r') - for number, line_org in enumerate(f): + syntax = '' + for number, line in enumerate(f): - line = line_org.split('#')[0] - if not line.strip(): + if writing: + writing.write(line) + + # strip out comments + if "#" in line: + # remove comments prefixed by an even number of escapes + line = self._commentre.sub(r'\1', line) + # fixup properly escaped comments that survived the above + line = line.replace("\\#", "#") + line = line.rstrip() + if not line: continue + if line.startswith('syntax:'): + s = line[7:].strip() + syntax = '' + if s in self.syntaxes: + syntax = s + continue + pat = syntax + for s in self.syntaxes: + if line.startswith(s + ':'): + pat = s + line = line[len(s) + 1:] + break + + # split on the first '=' try: src, dst = line.split('=', 1) except (IndexError, ValueError): - msg = 'ignoring line %i in author map %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) + msg = 'ignoring line %i in %s %s: %s\n' + self._ui.status(msg % (number, self.mapname(), path, + line.rstrip())) continue src = src.strip() dst = dst.strip() - if self.meta.caseignoreauthors: - src = src.lower() - - if writing: - if not src in self: - self.meta.ui.debug('adding author %s to author map\n' % src) - elif dst != self[src]: - msg = 'overriding author: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - writing.write(line_org) - + if pat != 're': + src = re.escape(src) + if pat == 'glob': + src = src.replace('\\*', '.*') + src = re.compile(src) + + if src not in self: + self._ui.debug('adding %s to %s\n' % (src, self.mapname())) + elif dst != self[src]: + msg = 'overriding %s: "%s" to "%s" (%s)\n' + self._ui.status(msg % (self.mapname(), self[src], dst, src)) self[src] = dst f.close() if writing: writing.close() +class AuthorMap(BaseMap): + '''A mapping from Subversion-style authors to Mercurial-style + authors, and back. The data is stored persistently on disk. + + If the 'hgsubversion.defaultauthors' configuration option is set to false, + attempting to obtain an unknown author will fail with an Abort. + + If the 'hgsubversion.caseignoreauthors' configuration option is set to true, + the userid from Subversion is always compared lowercase. + ''' + + def __init__(self, ui, filepath, defaulthost, caseignoreauthors, + mapauthorscmd, defaultauthors): + '''Initialise a new AuthorMap. + + The ui argument is used to print diagnostic messages. + + The path argument is the location of the backing store, + typically .hg/svn/authors. + ''' + if defaulthost: + self.defaulthost = '@%s' % defaulthost.lstrip('@') + else: + self.defaulthost = '' + self._caseignoreauthors = caseignoreauthors + self._mapauthorscmd = mapauthorscmd + self._defaulthost = defaulthost + self._defaultauthors = defaultauthors + + super(AuthorMap, self).__init__(ui, filepath) + + def _lowercase(self, key): + '''Determine whether or not to lowercase a str or regex using the + meta.caseignoreauthors.''' + k = key + if self._caseignoreauthors: + if isinstance(key, str): + k = key.lower() + else: + k = re.compile(key.pattern.lower()) + return k + + def __setitem__(self, key, value): + '''Similar to dict.__setitem__, except we check caseignoreauthors to + use lowercase string or not + ''' + super(AuthorMap, self).__setitem__(self._lowercase(key), value) + + def __contains__(self, key): + '''Similar to dict.__contains__, except we check caseignoreauthors to + use lowercase string or not + ''' + return super(AuthorMap, self).__contains__(self._lowercase(key)) + def __getitem__(self, author): ''' Similar to dict.__getitem__, except in case of an unknown author. In such cases, a new value is generated and added to the dictionary @@ -94,20 +238,34 @@ class AuthorMap(dict): if author is None: author = '(no author)' + if not isinstance(author, str): + return super(AuthorMap, self).__getitem__(author) + search_author = author - if self.meta.caseignoreauthors: + if self._caseignoreauthors: search_author = author.lower() + result = None if search_author in self: - result = self.super.__getitem__(search_author) - elif self.meta.defaultauthors: - self[author] = result = '%s%s' % (author, self.defaulthost) - msg = 'substituting author "%s" for default "%s"\n' - self.meta.ui.debug(msg % (author, result)) - else: - msg = 'author %s has no entry in the author map!' - raise hgutil.Abort(msg % author) - self.meta.ui.debug('mapping author "%s" to "%s"\n' % (author, result)) + result = super(AuthorMap, self).__getitem__(search_author) + elif self._mapauthorscmd: + cmd = self._mapauthorscmd % author + process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + output, err = process.communicate() + retcode = process.poll() + if retcode: + msg = 'map author command "%s" exited with error' + raise hgutil.Abort(msg % cmd) + self[author] = result = output.strip() + if not result: + if self._defaultauthors: + self[author] = result = '%s%s' % (author, self.defaulthost) + msg = 'substituting author "%s" for default "%s"\n' + self._ui.debug(msg % (author, result)) + else: + msg = 'author %s has no entry in the author map!' + raise hgutil.Abort(msg % author) + self._ui.debug('mapping author "%s" to "%s"\n' % (author, result)) return result def reverselookup(self, author): @@ -127,24 +285,22 @@ class Tags(dict): """ VERSION = 2 - def __init__(self, meta, endrev=None): + def __init__(self, ui, filepath, endrev=None): dict.__init__(self) - self.meta = meta + self._filepath = filepath + self._ui = ui self.endrev = endrev - if os.path.isfile(self.meta.tagfile): + if os.path.isfile(self._filepath): self._load() else: self._write() def _load(self): - f = open(self.meta.tagfile) + f = open(self._filepath) ver = int(f.readline()) if ver < self.VERSION: - self.meta.ui.status('tag map outdated, running rebuildmeta...\n') - f.close() - os.unlink(self.meta.tagfile) - svncommands.rebuildmeta(self.meta.ui, self.meta.repo, ()) - return + raise error.Abort( + 'tag map outdated, please run `hg svn rebuildmeta`') elif ver != self.VERSION: raise hgutil.Abort('tagmap too new -- please upgrade') for l in f: @@ -160,7 +316,7 @@ class Tags(dict): def _write(self): assert self.endrev is None - f = open(self.meta.tagfile, 'w') + f = open(self._filepath, 'w') f.write('%s\n' % self.VERSION) f.close() @@ -181,7 +337,7 @@ class Tags(dict): if not tag: raise hgutil.Abort('tag cannot be empty') ha, revision = info - f = open(self.meta.tagfile, 'a') + f = open(self._filepath, 'a') f.write('%s %s %s\n' % (hex(ha), revision, tag)) f.close() dict.__setitem__(self, tag, ha) @@ -191,44 +347,97 @@ class RevMap(dict): VERSION = 1 - def __init__(self, meta): + lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulled_file, + default=0, deserializer=int) + + def __init__(self, revmap_path, lastpulled_path): dict.__init__(self) - self.meta = meta + self._filepath = revmap_path + self._lastpulled_file = lastpulled_path self._hashes = None + # disable iteration to have a consistent interface with SqliteRevMap + # it's less about performance since RevMap needs iteration internally + self._allowiter = False - if os.path.isfile(self.meta.revmap_file): + self.firstpulled = 0 + if os.path.isfile(self._filepath): self._load() else: self._write() def hashes(self): if self._hashes is None: - self._hashes = dict((v, k) for (k, v) in self.iteritems()) + self._hashes = dict((v, k) for (k, v) in self._origiteritems()) return self._hashes - def branchedits(self, branch, rev): - check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum - return sorted(filter(check, self.iteritems()), reverse=True) + def branchedits(self, branch, revnum): + check = lambda x: x[0][1] == branch and x[0][0] < revnum + return sorted(filter(check, self._origiteritems()), reverse=True) - @classmethod - def readmapfile(cls, path, missingok=True): + def branchmaxrevnum(self, branch, maxrevnum): + result = 0 + for num, br in self._origiterkeys(): + if br == branch and num <= maxrevnum and num > result: + result = num + return result + + @property + def lasthash(self): + lines = list(self._readmapfile()) + if not lines: + return None + return bin(lines[-1].split(' ', 2)[1]) + + def revhashes(self, revnum): + for key, value in self._origiteritems(): + if key[0] == revnum: + yield value + + def clear(self): + self._write() + dict.clear(self) + self._hashes = None + + def batchset(self, items, lastpulled): + '''Set items in batches + + items is an array of (rev num, branch, binary hash) + + For performance reason, internal in-memory state is not updated. + To get an up-to-date RevMap, reconstruct the object. + ''' + with open(self._filepath, 'a') as f: + f.write(''.join('%s %s %s\n' % (revnum, hex(binhash), br or '') + for revnum, br, binhash in items)) + self.lastpulled = lastpulled + + def _readmapfile(self): + path = self._filepath try: f = open(path) except IOError, err: - if not missingok or err.errno != errno.ENOENT: + if err.errno != errno.ENOENT: raise return iter([]) ver = int(f.readline()) - if ver != cls.VERSION: + if ver == SqliteRevMap.VERSION: + revmap = SqliteRevMap(self._filepath, self._lastpulled_file) + tmppath = '%s.tmp' % self._filepath + revmap.exportrevmapv1(tmppath) + os.rename(tmppath, self._filepath) + hgutil.unlinkpath(revmap._dbpath) + hgutil.unlinkpath(revmap._rowcountpath, ignoremissing=True) + return self._readmapfile() + if ver != self.VERSION: raise hgutil.Abort('revmap too new -- please upgrade') return f @util.gcdisable def _load(self): - lastpulled = self.meta.lastpulled - firstpulled = self.meta.firstpulled + lastpulled = self.lastpulled + firstpulled = self.firstpulled setitem = dict.__setitem__ - for l in self.readmapfile(self.meta.revmap_file): + for l in self._readmapfile(): revnum, ha, branch = l.split(' ', 2) if branch == '\n': branch = None @@ -240,34 +449,349 @@ class RevMap(dict): if revnum < firstpulled or not firstpulled: firstpulled = revnum setitem(self, (revnum, branch), bin(ha)) - self.meta.lastpulled = lastpulled - self.meta.firstpulled = firstpulled + if self.lastpulled != lastpulled: + self.lastpulled = lastpulled + self.firstpulled = firstpulled def _write(self): - f = open(self.meta.revmap_file, 'w') - f.write('%s\n' % self.VERSION) - f.close() + with open(self._filepath, 'w') as f: + f.write('%s\n' % self.VERSION) def __setitem__(self, key, ha): revnum, branch = key - f = open(self.meta.revmap_file, 'a') b = branch or '' - f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n') - f.close() - if revnum > self.meta.lastpulled or not self.meta.lastpulled: - self.meta.lastpulled = revnum - if revnum < self.meta.firstpulled or not self.meta.firstpulled: - self.meta.firstpulled = revnum + with open(self._filepath, 'a') as f: + f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n') + if revnum > self.lastpulled or not self.lastpulled: + self.lastpulled = revnum + if revnum < self.firstpulled or not self.firstpulled: + self.firstpulled = revnum dict.__setitem__(self, (revnum, branch), ha) if self._hashes is not None: self._hashes[ha] = (revnum, branch) + @classmethod + def _wrapitermethods(cls): + def wrap(orig): + def wrapper(self, *args, **kwds): + if not self._allowiter: + raise NotImplementedError( + 'Iteration methods on RevMap are disabled ' + + 'to avoid performance issues on SqliteRevMap') + return orig(self, *args, **kwds) + return wrapper + methodre = re.compile(r'^_*(?:iter|view)?(?:keys|items|values)?_*$') + for name in filter(methodre.match, dir(cls)): + orig = getattr(cls, name) + setattr(cls, '_orig%s' % name, orig) + setattr(cls, name, wrap(orig)) + +RevMap._wrapitermethods() + + +class SqliteRevMap(collections.MutableMapping): + """RevMap backed by sqlite3. + + It tries to address performance issues for a very large rev map. + As such iteration is unavailable for both the map itself and the + reverse map (self.hashes). + + It migrates from the old RevMap upon first use. Then it will bump the + version of revmap so RevMap no longer works. The real database is a + separated file which has a ".db" suffix. + """ + + VERSION = 2 + + TABLESCHEMA = [ + '''CREATE TABLE IF NOT EXISTS revmap ( + rev INTEGER NOT NULL, + branch TEXT NOT NULL DEFAULT '', + hash BLOB NOT NULL)''', + ] + + INDEXSCHEMA = [ + 'CREATE UNIQUE INDEX IF NOT EXISTS revbranch ON revmap (rev,branch);', + 'CREATE INDEX IF NOT EXISTS hash ON revmap (hash);', + ] + + # "bytes" in Python 2 will get truncated at '\0' when storing as sqlite + # blobs. "buffer" does not have this issue. Python 3 does not have "buffer" + # but "bytes" won't get truncated. + sqlblobtype = bytes if sys.version_info >= (3, 0) else buffer + + class ReverseRevMap(object): + # collections.Mapping is not suitable since we don't want 2/3 of + # its required interfaces: __iter__, __len__. + def __init__(self, revmap): + self.revmap = weakref.proxy(revmap) + self._cache = {} + + def get(self, key, default=None): + if key not in self._cache: + result = None + for row in self.revmap._query( + 'SELECT rev, branch FROM revmap WHERE hash=?', + (SqliteRevMap.sqlblobtype(key),)): + result = (row[0], row[1] or None) + break + self._cache[key] = result + return self._cache[key] or default + + def __contains__(self, key): + return self.get(key) != None + + def __getitem__(self, key): + dummy = self._cache + item = self.get(key, dummy) + if item == dummy: + raise KeyError(key) + else: + return item + + def keys(self): + for row in self.revmap._query('SELECT hash FROM revmap'): + yield bytes(row[0]) + + lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulledpath, + default=0, deserializer=int) + rowcount = util.fileproperty('_rowcount', lambda x: x._rowcountpath, + default=0, deserializer=int) + + def __init__(self, revmap_path, lastpulled_path, sqlitepragmas=None): + self._filepath = revmap_path + self._dbpath = revmap_path + '.db' + self._rowcountpath = self._dbpath + '.rowcount' + self._lastpulledpath = lastpulled_path + + self._db = None + self._hashes = None + self._sqlitepragmas = sqlitepragmas + self.firstpulled = 0 + self._updatefirstlastpulled() + # __iter__ is expensive and thus disabled by default + # it should only be enabled for testing + self._allowiter = False + + def hashes(self): + if self._hashes is None: + self._hashes = self.ReverseRevMap(self) + return self._hashes + + def branchedits(self, branch, revnum): + return [((r[0], r[1] or None), bytes(r[2])) for r in + self._query('SELECT rev, branch, hash FROM revmap ' + + 'WHERE rev < ? AND branch = ? ' + + 'ORDER BY rev DESC, branch DESC', + (revnum, branch or ''))] + + def branchmaxrevnum(self, branch, maxrev): + for row in self._query('SELECT rev FROM revmap ' + + 'WHERE rev <= ? AND branch = ? ' + + 'ORDER By rev DESC LIMIT 1', + (maxrev, branch or '')): + return row[0] + return 0 + + @property + def lasthash(self): + for row in self._query('SELECT hash FROM revmap ORDER BY rev DESC'): + return bytes(row[0]) + return None + + def revhashes(self, revnum): + for row in self._query('SELECT hash FROM revmap WHERE rev = ?', + (revnum,)): + yield bytes(row[0]) + + def clear(self): + hgutil.unlinkpath(self._filepath, ignoremissing=True) + hgutil.unlinkpath(self._dbpath, ignoremissing=True) + hgutil.unlinkpath(self._rowcountpath, ignoremissing=True) + self._db = None + self._hashes = None + self._firstpull = None + self._lastpull = None + + def batchset(self, items, lastpulled): + with self._transaction(): + self._insert(items) + self.lastpulled = lastpulled + + def __getitem__(self, key): + for row in self._querybykey('SELECT hash', key): + return bytes(row[0]) + raise KeyError(key) + + def __iter__(self): + if not self._allowiter: + raise NotImplementedError( + 'SqliteRevMap.__iter__ is not implemented intentionally ' + + 'to avoid performance issues') + # collect result to avoid nested transaction issues + rows = [] + for row in self._query('SELECT rev, branch FROM revmap'): + rows.append((row[0], row[1] or None)) + return iter(rows) + + def __len__(self): + # rowcount is faster than "SELECT COUNT(1)". the latter is not O(1) + return self.rowcount + + def __setitem__(self, key, binha): + revnum, branch = key + with self._transaction(): + self._insert([(revnum, branch, binha)]) + if revnum < self.firstpulled or not self.firstpulled: + self.firstpulled = revnum + if revnum > self.lastpulled or not self.lastpulled: + self.lastpulled = revnum + if self._hashes is not None: + self._hashes._cache[binha] = key + + def __delitem__(self, key): + for row in self._querybykey('DELETE', key): + if self.rowcount > 0: + self.rowcount -= 1 + return + # For performance reason, self._hashes is not updated + raise KeyError(key) + + @contextlib.contextmanager + def _transaction(self, mode='IMMEDIATE'): + if self._db is None: + self._opendb() + with self._db as db: + # wait indefinitely for database lock + while True: + try: + db.execute('BEGIN %s' % mode) + break + except sqlite3.OperationalError as ex: + if str(ex) != 'database is locked': + raise + yield db + + def _query(self, sql, params=()): + with self._transaction() as db: + cur = db.execute(sql, params) + try: + for row in cur: + yield row + finally: + cur.close() + + def _querybykey(self, prefix, key): + revnum, branch = key + return self._query( + '%s FROM revmap WHERE rev=? AND branch=?' + % prefix, (revnum, branch or '')) + + def _insert(self, rows): + # convert to a safe type so '\0' does not truncate the blob + if rows and type(rows[0][-1]) is not self.sqlblobtype: + rows = [(r, b, self.sqlblobtype(h)) for r, b, h in rows] + self._db.executemany( + 'INSERT OR REPLACE INTO revmap (rev, branch, hash) ' + + 'VALUES (?, ?, ?)', rows) + # If REPLACE happens, rowcount can be wrong. But it is only used to + # calculate how many revisions pulled, and during pull we don't + # replace rows. So it is fine. + self.rowcount += len(rows) + + def _opendb(self): + '''Open the database and make sure the table is created on demand.''' + version = None + try: + version = int(open(self._filepath).read(2)) + except (ValueError, IOError): + pass + if version and version not in [RevMap.VERSION, self.VERSION]: + raise error.Abort('revmap too new -- please upgrade') + + if self._db: + self._db.close() + + # if version mismatch, the database is considered invalid + if version != self.VERSION: + hgutil.unlinkpath(self._dbpath, ignoremissing=True) + + self._db = sqlite3.connect(self._dbpath) + self._db.text_factory = bytes + + # cache size affects random accessing (e.g. index building) + # performance greatly. default is 2MB (2000 KB), we want to have + # a big enough cache that can hold the entire map. + cachesize = 2000 + for path, ratio in [(self._filepath, 1.7), (self._dbpath, 1)]: + if os.path.exists(path): + cachesize += os.stat(path).st_size * ratio // 1000 + self._db.execute('PRAGMA cache_size=%d' % (-cachesize)) + + # PRAGMA statements provided by the user + for pragma in (self._sqlitepragmas or []): + # drop malicious ones + if re.match(r'\A\w+=\w+\Z', pragma): + self._db.execute('PRAGMA %s' % pragma) + + # disable auto-commit. everything is inside a transaction + self._db.isolation_level = 'DEFERRED' + + with self._transaction('EXCLUSIVE'): + map(self._db.execute, self.TABLESCHEMA) + if version == RevMap.VERSION: + self.rowcount = 0 + self._importrevmapv1() + elif not self.rowcount: + self.rowcount = self._db.execute( + 'SELECT COUNT(1) FROM revmap').fetchone()[0] + + # "bulk insert; then create index" is about 2.4x as fast as + # "create index; then bulk insert" on a large repo + map(self._db.execute, self.INDEXSCHEMA) + + # write a dummy rev map file with just the revision number + if version != self.VERSION: + f = open(self._filepath, 'w') + f.write('%s\n' % self.VERSION) + f.close() + + def _updatefirstlastpulled(self): + sql = 'SELECT rev FROM revmap ORDER BY rev %s LIMIT 1' + for row in self._query(sql % 'ASC'): + self.firstpulled = row[0] + for row in self._query(sql % 'DESC'): + if row[0] > self.lastpulled: + self.lastpulled = row[0] + + @util.gcdisable + def _importrevmapv1(self): + with open(self._filepath, 'r') as f: + # 1st line is version + assert(int(f.readline())) == RevMap.VERSION + data = {} + for line in f: + revnum, ha, branch = line[:-1].split(' ', 2) + # ignore malicious lines + if len(ha) != 40: + continue + data[revnum, branch or None] = bin(ha) + self._insert([(r, b, h) for (r, b), h in data.iteritems()]) + + @util.gcdisable + def exportrevmapv1(self, path): + with open(path, 'w') as f: + f.write('%s\n' % RevMap.VERSION) + for row in self._query('SELECT rev, branch, hash FROM revmap'): + rev, br, ha = row + f.write('%s %s %s\n' % (rev, hex(ha), br)) + class FileMap(object): VERSION = 1 - def __init__(self, meta): + def __init__(self, ui, filepath): '''Initialise a new FileMap. The ui argument is used to print diagnostic messages. @@ -275,16 +799,17 @@ class FileMap(object): The path argument is the location of the backing store, typically .hg/svn/filemap. ''' - self.meta = meta + self._filename = filepath + self._ui = ui self.include = {} self.exclude = {} - if os.path.isfile(self.meta.filemap_file): + if os.path.isfile(self._filename): self._load() else: self._write() # append file mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'filemap') + clmap = util.configpath(self._ui, 'filemap') if clmap: self.load(clmap) @@ -326,22 +851,20 @@ class FileMap(object): mapping = getattr(self, m) if path in mapping: msg = 'duplicate %s entry in %s: "%s"\n' - self.meta.ui.status(msg % (m, fn, path)) + self._ui.status(msg % (m, fn, path)) return bits = m.rstrip('e'), path - self.meta.ui.debug('%sing %s\n' % bits) + self._ui.debug('%sing %s\n' % bits) # respect rule order mapping[path] = len(self) - if fn != self.meta.filemap_file: - f = open(self.meta.filemap_file, 'a') - f.write(m + ' ' + path + '\n') - f.close() + if fn != self._filename: + with open(self._filename, 'a') as f: + f.write(m + ' ' + path + '\n') def load(self, fn): - self.meta.ui.debug('reading file map from %s\n' % fn) - f = open(fn, 'r') - self.load_fd(f, fn) - f.close() + self._ui.debug('reading file map from %s\n' % fn) + with open(fn, 'r') as f: + self.load_fd(f, fn) def load_fd(self, f, fn): for line in f: @@ -354,26 +877,24 @@ class FileMap(object): if cmd in ('include', 'exclude'): self.add(fn, cmd, path) continue - self.meta.ui.warn('unknown filemap command %s\n' % cmd) + self._ui.warn('unknown filemap command %s\n' % cmd) except IndexError: msg = 'ignoring bad line in filemap %s: %s\n' - self.meta.ui.warn(msg % (fn, line.rstrip())) + self._ui.warn(msg % (fn, line.rstrip())) def _load(self): - self.meta.ui.debug('reading in-repo file map from %s\n' % self.meta.filemap_file) - f = open(self.meta.filemap_file) - ver = int(f.readline()) - if ver != self.VERSION: - raise hgutil.Abort('filemap too new -- please upgrade') - self.load_fd(f, self.meta.filemap_file) - f.close() + self._ui.debug('reading in-repo file map from %s\n' % self._filename) + with open(self._filename) as f: + ver = int(f.readline()) + if ver != self.VERSION: + raise hgutil.Abort('filemap too new -- please upgrade') + self.load_fd(f, self._filename) def _write(self): - f = open(self.meta.filemap_file, 'w') - f.write('%s\n' % self.VERSION) - f.close() + with open(self._filename, 'w') as f: + f.write('%s\n' % self.VERSION) -class BranchMap(dict): +class BranchMap(BaseMap): '''Facility for controlled renaming of branch names. Example: oldname = newname @@ -383,63 +904,7 @@ class BranchMap(dict): changes on other will now be on default (have no branch name set). ''' - def __init__(self, meta): - self.meta = meta - self.super = super(BranchMap, self) - self.super.__init__() - self.load(self.meta.branchmap_file) - - # append branch mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'branchmap') - if clmap: - self.load(clmap) - - def load(self, path): - '''Load mappings from a file at the specified path.''' - if not os.path.exists(path): - return - - writing = False - if path != self.meta.branchmap_file: - writing = open(self.meta.branchmap_file, 'a') - - self.meta.ui.debug('reading branchmap from %s\n' % path) - f = open(path, 'r') - for number, line in enumerate(f): - - if writing: - writing.write(line) - - line = line.split('#')[0] - if not line.strip(): - continue - - try: - src, dst = line.split('=', 1) - except (IndexError, ValueError): - msg = 'ignoring line %i in branch map %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) - continue - - src = src.strip() - dst = dst.strip() - self.meta.ui.debug('adding branch %s to branch map\n' % src) - - if not dst: - # prevent people from assuming such lines are valid - raise hgutil.Abort('removing branches is not supported, yet\n' - '(line %i in branch map %s)' - % (number, path)) - elif src in self and dst != self[src]: - msg = 'overriding branch: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - self[src] = dst - - f.close() - if writing: - writing.close() - -class TagMap(dict): +class TagMap(BaseMap): '''Facility for controlled renaming of tags. Example: oldname = newname @@ -448,54 +913,3 @@ class TagMap(dict): The oldname tag from SVN will be represented as newname in the hg tags; the other tag will not be reflected in the hg repository. ''' - - def __init__(self, meta): - self.meta = meta - self.super = super(TagMap, self) - self.super.__init__() - self.load(self.meta.tagmap_file) - - # append tag mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'tagmap') - if clmap: - self.load(clmap) - - def load(self, path): - '''Load mappings from a file at the specified path.''' - if not os.path.exists(path): - return - - writing = False - if path != self.meta.tagmap_file: - writing = open(self.meta.tagmap_file, 'a') - - self.meta.ui.debug('reading tag renames from %s\n' % path) - f = open(path, 'r') - for number, line in enumerate(f): - - if writing: - writing.write(line) - - line = line.split('#')[0] - if not line.strip(): - continue - - try: - src, dst = line.split('=', 1) - except (IndexError, ValueError): - msg = 'ignoring line %i in tag renames %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) - continue - - src = src.strip() - dst = dst.strip() - self.meta.ui.debug('adding tag %s to tag renames\n' % src) - - if src in self and dst != self[src]: - msg = 'overriding tag rename: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - self[src] = dst - - f.close() - if writing: - writing.close() |