12 May 2012 14:41
[PATCH] revsets: huge speedups for fromsvn and svnrev
# HG changeset patch # User Bryan O'Sullivan <bryano@...> # Date 1336826314 25200 # Node ID 2eb6bd7daf797b82eab8da0219a4c202e9b1ecdf # Parent f95c429124f3f0952a06e34c7773b09c9f7a9a81 revsets: huge speedups for fromsvn and svnrev I have a hgsubversion repo that contains over 300,000 commits. In that repo, this patch improves performance as follows: hg --time log -r 'first(fromsvn())' Before: 40.3 sec After: 0.8 sec hg --time log -r 'svnrev(350000)' Before: 40.3 sec After: 0.1 sec Note: the performance of these revset implementations is very sensitive to doing as little work as possible per line of the rev_map file. I originally attempted to hide the file format details by hoisting the parsing of each line up into RevMap.readmapfile, but the current less abstract code is dramatically (10x or more) faster. If the revmap file is missing, we error out and print a message describing what to do. diff -r f95c429124f3 -r 2eb6bd7daf79 hgsubversion/maps.py --- a/hgsubversion/maps.py Sat May 12 11:58:19 2012 +0200 +++ b/hgsubversion/maps.py Sat May 12 05:38:34 2012 -0700 <at> <at> -1,6 +1,6 <at> <at> ''' Module for self-contained maps. ''' -import os +import errno, os from mercurial import util as hgutil from mercurial import node <at> <at> -182,7 +182,8 <at> <at> def __init__(self, repo): dict.__init__(self) - self.path = os.path.join(repo.path, 'svn', 'rev_map') + self.path = self.mappath(repo) + self.repo = repo self.ypath = os.path.join(repo.path, 'svn', 'lastpulled') # TODO(durin42): Consider moving management of the youngest # file to svnmeta itself rather than leaving it here. <at> <at> -212,13 +213,26 <at> <at> check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum return sorted(filter(check, self.iteritems()), reverse=True) - def _load(self): - f = open(self.path) + <at> staticmethod + def mappath(repo): + return os.path.join(repo.path, 'svn', 'rev_map') + + <at> classmethod + def readmapfile(cls, repo, missingok=True): + try: + f = open(cls.mappath(repo)) + except IOError, err: + if not missingok or err.errno != errno.ENOENT: + raise + return iter([]) ver = int(f.readline()) - if ver != self.VERSION: + if ver != cls.VERSION: print 'revmap too new -- please upgrade' raise NotImplementedError - for l in f: + return f + + def _load(self): + for l in self.readmapfile(self.repo): revnum, ha, branch = l.split(' ', 2) if branch == '\n': branch = None <at> <at> -230,7 +244,6 <at> <at> if revnum < self.oldest or not self.oldest: self.oldest = revnum dict.__setitem__(self, (revnum, branch), node.bin(ha)) - f.close() def _write(self): f = open(self.path, 'w') diff -r f95c429124f3 -r 2eb6bd7daf79 hgsubversion/util.py --- a/hgsubversion/util.py Sat May 12 11:58:19 2012 +0200 +++ b/hgsubversion/util.py Sat May 12 05:38:34 2012 -0700 <at> <at> -1,3 +1,4 <at> <at> +import errno import re import os import urllib <at> <at> -13,6 +14,8 <at> <at> except ImportError: pass +import maps + ignoredfiles = set(['.hgtags', '.hgsvnexternals', '.hgsub', '.hgsubstate']) b_re = re.compile(r'^\+\+\+ b\/([^\n]*)', re.MULTILINE) <at> <at> -279,11 +282,17 <at> <at> ''' args = revset.getargs(x, 0, 0, "fromsvn takes no arguments") - def matches(r): - convertinfo = repo[r].extra().get('convert_revision', '') - return convertinfo[:4] == 'svn:' - - return [r for r in subset if matches(r)] + rev = repo.changelog.rev + bin = node.bin + try: + svnrevs = set(rev(bin(l.split(' ', 2)[1])) + for l in maps.RevMap.readmapfile(repo, missingok=False)) + return filter(svnrevs.__contains__, subset) + except IOError, err: + if err.errno != errno.ENOENT: + raise + raise hgutil.Abort("svn metadata is missing - " + "run 'hg svn rebuildmeta' to reconstruct it") def revset_svnrev(repo, subset, x): '''``svnrev(number)`` <at> <at> -294,17 +303,25 <at> <at> rev = revset.getstring(args[0], "the argument to svnrev() must be a number") try: - rev = int(rev) + revnum = int(rev) except ValueError: raise error.ParseError("the argument to svnrev() must be a number") - def matches(r): - convertinfo = repo[r].extra().get('convert_revision', '') - if convertinfo[:4] != 'svn:': - return False - return int(convertinfo[40:].rsplit(' <at> ', 1)[-1]) == rev - - return [r for r in subset if matches(r)] + rev = rev + ' ' + revs = [] + try: + for l in maps.RevMap.readmapfile(repo, missingok=False): + if l.startswith(rev): + n = l.split(' ', 2)[1] + r = repo[node.bin(n)].rev() + if r in subset: + revs.append(r) + return revs + except IOError, err: + if err.errno != errno.ENOENT: + raise + raise hgutil.Abort("svn metadata is missing - " + "run 'hg svn rebuildmeta' to reconstruct it") revsets = { 'fromsvn': revset_fromsvn, -- -- You received this message because you are subscribed to the Google Groups "hgsubversion" group. To post to this group, send email to hgsubversion@... To unsubscribe from this group, send email to hgsubversion+unsubscribe@... For more options, visit this group at http://groups.google.com/group/hgsubversion?hl=en.
RSS Feed