Mercurial > public > mercurial-scm > hg-stable
diff mercurial/manifest.py @ 24803:e89f909edffa stable 3.4-rc
merge default into stable for 3.4 freeze
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 16 Apr 2015 20:57:51 -0500 |
parents | 055b3cbe6c57 |
children | d9832a12a06e |
line wrap: on
line diff
--- a/mercurial/manifest.py Thu Apr 16 22:33:53 2015 +0900 +++ b/mercurial/manifest.py Thu Apr 16 20:57:51 2015 -0500 @@ -8,53 +8,271 @@ from i18n import _ import mdiff, parsers, error, revlog, util import array, struct +import os -class manifestdict(dict): - def __init__(self, mapping=None, flags=None): - if mapping is None: - mapping = {} - if flags is None: - flags = {} - dict.__init__(self, mapping) - self._flags = flags +propertycache = util.propertycache + +def _parsev1(data): + # This method does a little bit of excessive-looking + # precondition checking. This is so that the behavior of this + # class exactly matches its C counterpart to try and help + # prevent surprise breakage for anyone that develops against + # the pure version. + if data and data[-1] != '\n': + raise ValueError('Manifest did not end in a newline.') + prev = None + for l in data.splitlines(): + if prev is not None and prev > l: + raise ValueError('Manifest lines not in sorted order.') + prev = l + f, n = l.split('\0') + if len(n) > 40: + yield f, revlog.bin(n[:40]), n[40:] + else: + yield f, revlog.bin(n), '' + +def _parsev2(data): + metadataend = data.find('\n') + # Just ignore metadata for now + pos = metadataend + 1 + prevf = '' + while pos < len(data): + end = data.find('\n', pos + 1) # +1 to skip stem length byte + if end == -1: + raise ValueError('Manifest ended with incomplete file entry.') + stemlen = ord(data[pos]) + items = data[pos + 1:end].split('\0') + f = prevf[:stemlen] + items[0] + if prevf > f: + raise ValueError('Manifest entries not in sorted order.') + fl = items[1] + # Just ignore metadata (items[2:] for now) + n = data[end + 1:end + 21] + yield f, n, fl + pos = end + 22 + prevf = f + +def _parse(data): + """Generates (path, node, flags) tuples from a manifest text""" + if data.startswith('\0'): + return iter(_parsev2(data)) + else: + return iter(_parsev1(data)) + +def _text(it, usemanifestv2): + """Given an iterator over (path, node, flags) tuples, returns a manifest + text""" + if usemanifestv2: + return _textv2(it) + else: + return _textv1(it) + +def _textv1(it): + files = [] + lines = [] + _hex = revlog.hex + for f, n, fl in it: + files.append(f) + # if this is changed to support newlines in filenames, + # be sure to check the templates/ dir again (especially *-raw.tmpl) + lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) + + _checkforbidden(files) + return ''.join(lines) + +def _textv2(it): + files = [] + lines = ['\0\n'] + prevf = '' + for f, n, fl in it: + files.append(f) + stem = os.path.commonprefix([prevf, f]) + stemlen = min(len(stem), 255) + lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) + prevf = f + _checkforbidden(files) + return ''.join(lines) + +class _lazymanifest(dict): + """This is the pure implementation of lazymanifest. + + It has not been optimized *at all* and is not lazy. + """ + + def __init__(self, data): + dict.__init__(self) + for f, n, fl in _parse(data): + self[f] = n, fl + def __setitem__(self, k, v): - assert v is not None - dict.__setitem__(self, k, v) - def flags(self, f): - return self._flags.get(f, "") - def setflag(self, f, flags): - """Set the flags (symlink, executable) for path f.""" - self._flags[f] = flags + node, flag = v + assert node is not None + if len(node) > 21: + node = node[:21] # match c implementation behavior + dict.__setitem__(self, k, (node, flag)) + + def __iter__(self): + return iter(sorted(dict.keys(self))) + + def iterkeys(self): + return iter(sorted(dict.keys(self))) + + def iterentries(self): + return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) + def copy(self): - return manifestdict(self, dict.copy(self._flags)) - def intersectfiles(self, files): - '''make a new manifestdict with the intersection of self with files + c = _lazymanifest('') + c.update(self) + return c + + def diff(self, m2, clean=False): + '''Finds changes between the current manifest and m2.''' + diff = {} + + for fn, e1 in self.iteritems(): + if fn not in m2: + diff[fn] = e1, (None, '') + else: + e2 = m2[fn] + if e1 != e2: + diff[fn] = e1, e2 + elif clean: + diff[fn] = None + + for fn, e2 in m2.iteritems(): + if fn not in self: + diff[fn] = (None, ''), e2 + + return diff + + def filtercopy(self, filterfn): + c = _lazymanifest('') + for f, n, fl in self.iterentries(): + if filterfn(f): + c[f] = n, fl + return c + + def text(self): + """Get the full data of this manifest as a bytestring.""" + return _textv1(self.iterentries()) + +try: + _lazymanifest = parsers.lazymanifest +except AttributeError: + pass + +class manifestdict(object): + def __init__(self, data=''): + if data.startswith('\0'): + #_lazymanifest can not parse v2 + self._lm = _lazymanifest('') + for f, n, fl in _parsev2(data): + self._lm[f] = n, fl + else: + self._lm = _lazymanifest(data) + + def __getitem__(self, key): + return self._lm[key][0] + + def find(self, key): + return self._lm[key] + + def __len__(self): + return len(self._lm) + + def __setitem__(self, key, node): + self._lm[key] = node, self.flags(key, '') + + def __contains__(self, key): + return key in self._lm + + def __delitem__(self, key): + del self._lm[key] - The algorithm assumes that files is much smaller than self.''' - ret = manifestdict() - for fn in files: - if fn in self: - ret[fn] = self[fn] - flags = self._flags.get(fn, None) - if flags: - ret._flags[fn] = flags - return ret + def __iter__(self): + return self._lm.__iter__() + + def iterkeys(self): + return self._lm.iterkeys() + + def keys(self): + return list(self.iterkeys()) + + def filesnotin(self, m2): + '''Set of files in this manifest that are not in the other''' + files = set(self) + files.difference_update(m2) + return files + + @propertycache + def _dirs(self): + return util.dirs(self) + + def dirs(self): + return self._dirs + + def hasdir(self, dir): + return dir in self._dirs + + def _filesfastpath(self, match): + '''Checks whether we can correctly and quickly iterate over matcher + files instead of over manifest files.''' + files = match.files() + return (len(files) < 100 and (match.isexact() or + (not match.anypats() and util.all(fn in self for fn in files)))) + + def walk(self, match): + '''Generates matching file names. + + Equivalent to manifest.matches(match).iterkeys(), but without creating + an entirely new manifest. + + It also reports nonexistent files by marking them bad with match.bad(). + ''' + if match.always(): + for f in iter(self): + yield f + return + + fset = set(match.files()) + + # avoid the entire walk if we're only looking for specific files + if self._filesfastpath(match): + for fn in sorted(fset): + yield fn + return + + for fn in self: + if fn in fset: + # specified pattern is the exact name + fset.remove(fn) + if match(fn): + yield fn + + # for dirstate.walk, files=['.'] means "walk the whole tree". + # follow that here, too + fset.discard('.') + + for fn in sorted(fset): + if not self.hasdir(fn): + match.bad(fn, None) def matches(self, match): '''generate a new manifest filtered by the match argument''' if match.always(): return self.copy() - files = match.files() - if (match.matchfn == match.exact or - (not match.anypats() and util.all(fn in self for fn in files))): - return self.intersectfiles(files) + if self._filesfastpath(match): + m = manifestdict() + lm = self._lm + for fn in match.files(): + if fn in lm: + m._lm[fn] = lm[fn] + return m - mf = self.copy() - for fn in mf.keys(): - if not match(fn): - del mf[fn] - return mf + m = manifestdict() + m._lm = self._lm.filtercopy(match) + return m def diff(self, m2, clean=False): '''Finds changes between the current manifest and m2. @@ -71,35 +289,37 @@ the nodeid will be None and the flags will be the empty string. ''' - diff = {} + return self._lm.diff(m2._lm, clean) + + def setflag(self, key, flag): + self._lm[key] = self[key], flag - for fn, n1 in self.iteritems(): - fl1 = self._flags.get(fn, '') - n2 = m2.get(fn, None) - fl2 = m2._flags.get(fn, '') - if n2 is None: - fl2 = '' - if n1 != n2 or fl1 != fl2: - diff[fn] = ((n1, fl1), (n2, fl2)) - elif clean: - diff[fn] = None + def get(self, key, default=None): + try: + return self._lm[key][0] + except KeyError: + return default - for fn, n2 in m2.iteritems(): - if fn not in self: - fl2 = m2._flags.get(fn, '') - diff[fn] = ((None, ''), (n2, fl2)) - - return diff + def flags(self, key, default=''): + try: + return self._lm[key][1] + except KeyError: + return default - def text(self): - """Get the full data of this manifest as a bytestring.""" - fl = sorted(self) - _checkforbidden(fl) + def copy(self): + c = manifestdict() + c._lm = self._lm.copy() + return c - hex, flags = revlog.hex, self.flags - # if this is changed to support newlines in filenames, - # be sure to check the templates/ dir again (especially *-raw.tmpl) - return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl) + def iteritems(self): + return (x[:2] for x in self._lm.iterentries()) + + def text(self, usemanifestv2=False): + if usemanifestv2: + return _textv2(self._lm.iterentries()) + else: + # use (probably) native version for v1 + return self._lm.text() def fastdelta(self, base, changes): """Given a base manifest text as an array.array and a list of changes @@ -119,7 +339,8 @@ # bs will either be the index of the item or the insert point start, end = _msearch(addbuf, f, start) if not todelete: - l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f)) + h, fl = self._lm[f] + l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) else: if start == end: # item we want to delete was not found, error out @@ -213,21 +434,363 @@ + content for start, end, content in x) return deltatext, newaddlist -def _parse(lines): - mfdict = manifestdict() - parsers.parse_manifest(mfdict, mfdict._flags, lines) - return mfdict +def _splittopdir(f): + if '/' in f: + dir, subpath = f.split('/', 1) + return dir + '/', subpath + else: + return '', f + +class treemanifest(object): + def __init__(self, dir='', text=''): + self._dir = dir + self._dirs = {} + # Using _lazymanifest here is a little slower than plain old dicts + self._files = {} + self._flags = {} + self.parse(text) + + def _subpath(self, path): + return self._dir + path + + def __len__(self): + size = len(self._files) + for m in self._dirs.values(): + size += m.__len__() + return size + + def _isempty(self): + return (not self._files and (not self._dirs or + util.all(m._isempty() for m in self._dirs.values()))) + + def __str__(self): + return '<treemanifest dir=%s>' % self._dir + + def iteritems(self): + for p, n in sorted(self._dirs.items() + self._files.items()): + if p in self._files: + yield self._subpath(p), n + else: + for f, sn in n.iteritems(): + yield f, sn + + def iterkeys(self): + for p in sorted(self._dirs.keys() + self._files.keys()): + if p in self._files: + yield self._subpath(p) + else: + for f in self._dirs[p].iterkeys(): + yield f + + def keys(self): + return list(self.iterkeys()) + + def __iter__(self): + return self.iterkeys() + + def __contains__(self, f): + if f is None: + return False + dir, subpath = _splittopdir(f) + if dir: + if dir not in self._dirs: + return False + return self._dirs[dir].__contains__(subpath) + else: + return f in self._files + + def get(self, f, default=None): + dir, subpath = _splittopdir(f) + if dir: + if dir not in self._dirs: + return default + return self._dirs[dir].get(subpath, default) + else: + return self._files.get(f, default) + + def __getitem__(self, f): + dir, subpath = _splittopdir(f) + if dir: + return self._dirs[dir].__getitem__(subpath) + else: + return self._files[f] + + def flags(self, f): + dir, subpath = _splittopdir(f) + if dir: + if dir not in self._dirs: + return '' + return self._dirs[dir].flags(subpath) + else: + if f in self._dirs: + return '' + return self._flags.get(f, '') + + def find(self, f): + dir, subpath = _splittopdir(f) + if dir: + return self._dirs[dir].find(subpath) + else: + return self._files[f], self._flags.get(f, '') + + def __delitem__(self, f): + dir, subpath = _splittopdir(f) + if dir: + self._dirs[dir].__delitem__(subpath) + # If the directory is now empty, remove it + if self._dirs[dir]._isempty(): + del self._dirs[dir] + else: + del self._files[f] + if f in self._flags: + del self._flags[f] + + def __setitem__(self, f, n): + assert n is not None + dir, subpath = _splittopdir(f) + if dir: + if dir not in self._dirs: + self._dirs[dir] = treemanifest(self._subpath(dir)) + self._dirs[dir].__setitem__(subpath, n) + else: + self._files[f] = n[:21] # to match manifestdict's behavior + + def setflag(self, f, flags): + """Set the flags (symlink, executable) for path f.""" + dir, subpath = _splittopdir(f) + if dir: + if dir not in self._dirs: + self._dirs[dir] = treemanifest(self._subpath(dir)) + self._dirs[dir].setflag(subpath, flags) + else: + self._flags[f] = flags + + def copy(self): + copy = treemanifest(self._dir) + for d in self._dirs: + copy._dirs[d] = self._dirs[d].copy() + copy._files = dict.copy(self._files) + copy._flags = dict.copy(self._flags) + return copy + + def filesnotin(self, m2): + '''Set of files in this manifest that are not in the other''' + files = set() + def _filesnotin(t1, t2): + for d, m1 in t1._dirs.iteritems(): + if d in t2._dirs: + m2 = t2._dirs[d] + _filesnotin(m1, m2) + else: + files.update(m1.iterkeys()) + + for fn in t1._files.iterkeys(): + if fn not in t2._files: + files.add(t1._subpath(fn)) + + _filesnotin(self, m2) + return files + + @propertycache + def _alldirs(self): + return util.dirs(self) + + def dirs(self): + return self._alldirs + + def hasdir(self, dir): + topdir, subdir = _splittopdir(dir) + if topdir: + if topdir in self._dirs: + return self._dirs[topdir].hasdir(subdir) + return False + return (dir + '/') in self._dirs + + def walk(self, match): + '''Generates matching file names. + + Equivalent to manifest.matches(match).iterkeys(), but without creating + an entirely new manifest. + + It also reports nonexistent files by marking them bad with match.bad(). + ''' + if match.always(): + for f in iter(self): + yield f + return + + fset = set(match.files()) + + for fn in self._walk(match): + if fn in fset: + # specified pattern is the exact name + fset.remove(fn) + yield fn + + # for dirstate.walk, files=['.'] means "walk the whole tree". + # follow that here, too + fset.discard('.') + + for fn in sorted(fset): + if not self.hasdir(fn): + match.bad(fn, None) + + def _walk(self, match, alldirs=False): + '''Recursively generates matching file names for walk(). + + Will visit all subdirectories if alldirs is True, otherwise it will + only visit subdirectories for which match.visitdir is True.''' + + if not alldirs: + # substring to strip trailing slash + visit = match.visitdir(self._dir[:-1] or '.') + if not visit: + return + alldirs = (visit == 'all') + + # yield this dir's files and walk its submanifests + for p in sorted(self._dirs.keys() + self._files.keys()): + if p in self._files: + fullp = self._subpath(p) + if match(fullp): + yield fullp + else: + for f in self._dirs[p]._walk(match, alldirs): + yield f + + def matches(self, match): + '''generate a new manifest filtered by the match argument''' + if match.always(): + return self.copy() + + return self._matches(match) + + def _matches(self, match, alldirs=False): + '''recursively generate a new manifest filtered by the match argument. + + Will visit all subdirectories if alldirs is True, otherwise it will + only visit subdirectories for which match.visitdir is True.''' + + ret = treemanifest(self._dir) + if not alldirs: + # substring to strip trailing slash + visit = match.visitdir(self._dir[:-1] or '.') + if not visit: + return ret + alldirs = (visit == 'all') + + for fn in self._files: + fullp = self._subpath(fn) + if not match(fullp): + continue + ret._files[fn] = self._files[fn] + if fn in self._flags: + ret._flags[fn] = self._flags[fn] + + for dir, subm in self._dirs.iteritems(): + m = subm._matches(match, alldirs) + if not m._isempty(): + ret._dirs[dir] = m + + return ret + + def diff(self, m2, clean=False): + '''Finds changes between the current manifest and m2. + + Args: + m2: the manifest to which this manifest should be compared. + clean: if true, include files unchanged between these manifests + with a None value in the returned dictionary. + + The result is returned as a dict with filename as key and + values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the + nodeid in the current/other manifest and fl1/fl2 is the flag + in the current/other manifest. Where the file does not exist, + the nodeid will be None and the flags will be the empty + string. + ''' + result = {} + emptytree = treemanifest() + def _diff(t1, t2): + for d, m1 in t1._dirs.iteritems(): + m2 = t2._dirs.get(d, emptytree) + _diff(m1, m2) + + for d, m2 in t2._dirs.iteritems(): + if d not in t1._dirs: + _diff(emptytree, m2) + + for fn, n1 in t1._files.iteritems(): + fl1 = t1._flags.get(fn, '') + n2 = t2._files.get(fn, None) + fl2 = t2._flags.get(fn, '') + if n1 != n2 or fl1 != fl2: + result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2)) + elif clean: + result[t1._subpath(fn)] = None + + for fn, n2 in t2._files.iteritems(): + if fn not in t1._files: + fl2 = t2._flags.get(fn, '') + result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) + + _diff(self, m2) + return result + + def parse(self, text): + for f, n, fl in _parse(text): + self[f] = n + if fl: + self.setflag(f, fl) + + def text(self, usemanifestv2=False): + """Get the full data of this manifest as a bytestring.""" + flags = self.flags + return _text(((f, self[f], flags(f)) for f in self.keys()), + usemanifestv2) class manifest(revlog.revlog): def __init__(self, opener): - # we expect to deal with not more than four revs at a time, - # during a commit --amend - self._mancache = util.lrucachedict(4) + # During normal operations, we expect to deal with not more than four + # revs at a time (such as during commit --amend). When rebasing large + # stacks of commits, the number can go up, hence the config knob below. + cachesize = 4 + usetreemanifest = False + usemanifestv2 = False + opts = getattr(opener, 'options', None) + if opts is not None: + cachesize = opts.get('manifestcachesize', cachesize) + usetreemanifest = opts.get('usetreemanifest', usetreemanifest) + usemanifestv2 = opts.get('manifestv2', usemanifestv2) + self._mancache = util.lrucachedict(cachesize) revlog.revlog.__init__(self, opener, "00manifest.i") + self._treeinmem = usetreemanifest + self._treeondisk = usetreemanifest + self._usemanifestv2 = usemanifestv2 + + def _newmanifest(self, data=''): + if self._treeinmem: + return treemanifest('', data) + return manifestdict(data) + + def _slowreaddelta(self, node): + r0 = self.deltaparent(self.rev(node)) + m0 = self.read(self.node(r0)) + m1 = self.read(node) + md = self._newmanifest() + for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): + if n1: + md[f] = n1 + if fl1: + md.setflag(f, fl1) + return md def readdelta(self, node): + if self._usemanifestv2 or self._treeondisk: + return self._slowreaddelta(node) r = self.rev(node) - return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r))) + d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r)) + return self._newmanifest(d) def readfast(self, node): '''use the faster of readdelta or read''' @@ -239,31 +802,27 @@ def read(self, node): if node == revlog.nullid: - return manifestdict() # don't upset local cache + return self._newmanifest() # don't upset local cache if node in self._mancache: return self._mancache[node][0] text = self.revision(node) arraytext = array.array('c', text) - mapping = _parse(text) - self._mancache[node] = (mapping, arraytext) - return mapping + m = self._newmanifest(text) + self._mancache[node] = (m, arraytext) + return m def find(self, node, f): '''look up entry for a single file efficiently. return (node, flags) pair if found, (None, None) if not.''' - if node in self._mancache: - mapping = self._mancache[node][0] - return mapping.get(f), mapping.flags(f) - text = self.revision(node) - start, end = _msearch(text, f) - if start == end: + m = self.read(node) + try: + return m.find(f) + except KeyError: return None, None - l = text[start:end] - f, n = l.split('\0') - return revlog.bin(n[:40]), n[40:-1] - def add(self, map, transaction, link, p1, p2, added, removed): - if p1 in self._mancache: + def add(self, m, transaction, link, p1, p2, added, removed): + if (p1 in self._mancache and not self._treeinmem + and not self._usemanifestv2): # If our first parent is in the manifest cache, we can # compute a delta here using properties we know about the # manifest up-front, which may save time later for the @@ -277,19 +836,19 @@ # since the lists are already sorted work.sort() - arraytext, deltatext = map.fastdelta(self._mancache[p1][1], work) + arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work) cachedelta = self.rev(p1), deltatext text = util.buffer(arraytext) + n = self.addrevision(text, transaction, link, p1, p2, cachedelta) else: # The first parent manifest isn't already loaded, so we'll # just encode a fulltext of the manifest and pass that # through to the revlog layer, and let it handle the delta # process. - text = map.text() + text = m.text(self._usemanifestv2) arraytext = array.array('c', text) - cachedelta = None + n = self.addrevision(text, transaction, link, p1, p2) - n = self.addrevision(text, transaction, link, p1, p2, cachedelta) - self._mancache[n] = (map, arraytext) + self._mancache[n] = (m, arraytext) return n