mercurial-scm/hg-stable: mercurial/manifest.py comparison

comparison mercurial/manifest.py @ 24803:e89f909edffa stable 3.4-rc

merge default into stable for 3.4 freeze

author	Matt Mackall <mpm@selenic.com>
date	Thu, 16 Apr 2015 20:57:51 -0500
parents	055b3cbe6c57
children	d9832a12a06e

comparison

equal deleted inserted replaced

-:612ed41ae359
+:e89f909edffa
 # GNU General Public License version 2 or any later version.
 from i18n import _
 import mdiff, parsers, error, revlog, util
 import array, struct
+import os
-class manifestdict(dict):
-def __init__(self, mapping=None, flags=None):
+propertycache = util.propertycache
-if mapping is None:
-mapping = {}
+def _parsev1(data):
-if flags is None:
+# This method does a little bit of excessive-looking
-flags = {}
+# precondition checking. This is so that the behavior of this
-dict.__init__(self, mapping)
+# class exactly matches its C counterpart to try and help
-self._flags = flags
+# prevent surprise breakage for anyone that develops against
+# the pure version.
+if data and data[-1] != '\n':
+raise ValueError('Manifest did not end in a newline.')
+prev = None
+for l in data.splitlines():
+if prev is not None and prev > l:
+raise ValueError('Manifest lines not in sorted order.')
+prev = l
+f, n = l.split('\0')
+if len(n) > 40:
+yield f, revlog.bin(n[:40]), n[40:]
+else:
+yield f, revlog.bin(n), ''
+def _parsev2(data):
+metadataend = data.find('\n')
+# Just ignore metadata for now
+pos = metadataend + 1
+prevf = ''
+while pos < len(data):
+end = data.find('\n', pos + 1) # +1 to skip stem length byte
+if end == -1:
+raise ValueError('Manifest ended with incomplete file entry.')
+stemlen = ord(data[pos])
+items = data[pos + 1:end].split('\0')
+f = prevf[:stemlen] + items[0]
+if prevf > f:
+raise ValueError('Manifest entries not in sorted order.')
+fl = items[1]
+# Just ignore metadata (items[2:] for now)
+n = data[end + 1:end + 21]
+yield f, n, fl
+pos = end + 22
+prevf = f
+def _parse(data):
+"""Generates (path, node, flags) tuples from a manifest text"""
+if data.startswith('\0'):
+return iter(_parsev2(data))
+else:
+return iter(_parsev1(data))
+def _text(it, usemanifestv2):
+"""Given an iterator over (path, node, flags) tuples, returns a manifest
+text"""
+if usemanifestv2:
+return _textv2(it)
+else:
+return _textv1(it)
+def _textv1(it):
+files = []
+lines = []
+_hex = revlog.hex
+for f, n, fl in it:
+files.append(f)
+# if this is changed to support newlines in filenames,
+# be sure to check the templates/ dir again (especially *-raw.tmpl)
+lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
+_checkforbidden(files)
+return ''.join(lines)
+def _textv2(it):
+files = []
+lines = ['\0\n']
+prevf = ''
+for f, n, fl in it:
+files.append(f)
+stem = os.path.commonprefix([prevf, f])
+stemlen = min(len(stem), 255)
+lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
+prevf = f
+_checkforbidden(files)
+return ''.join(lines)
+class _lazymanifest(dict):
+"""This is the pure implementation of lazymanifest.
+It has not been optimized *at all* and is not lazy.
+"""
+def __init__(self, data):
+dict.__init__(self)
+for f, n, fl in _parse(data):
+self[f] = n, fl
 def __setitem__(self, k, v):
-assert v is not None
+node, flag = v
-dict.__setitem__(self, k, v)
+assert node is not None
-def flags(self, f):
+if len(node) > 21:
-return self._flags.get(f, "")
+node = node[:21] # match c implementation behavior
-def setflag(self, f, flags):
+dict.__setitem__(self, k, (node, flag))
-"""Set the flags (symlink, executable) for path f."""
-self._flags[f] = flags
+def __iter__(self):
+return iter(sorted(dict.keys(self)))
+def iterkeys(self):
+return iter(sorted(dict.keys(self)))
+def iterentries(self):
+return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
 def copy(self):
-return manifestdict(self, dict.copy(self._flags))
+c = _lazymanifest('')
-def intersectfiles(self, files):
+c.update(self)
-'''make a new manifestdict with the intersection of self with files
+return c
-The algorithm assumes that files is much smaller than self.'''
+def diff(self, m2, clean=False):
-ret = manifestdict()
+'''Finds changes between the current manifest and m2.'''
-for fn in files:
+diff = {}
-if fn in self:
-ret[fn] = self[fn]
+for fn, e1 in self.iteritems():
-flags = self._flags.get(fn, None)
+if fn not in m2:
-if flags:
+diff[fn] = e1, (None, '')
-ret._flags[fn] = flags
+else:
-return ret
+e2 = m2[fn]
+if e1 != e2:
+diff[fn] = e1, e2
+elif clean:
+diff[fn] = None
+for fn, e2 in m2.iteritems():
+if fn not in self:
+diff[fn] = (None, ''), e2
+return diff
+def filtercopy(self, filterfn):
+c = _lazymanifest('')
+for f, n, fl in self.iterentries():
+if filterfn(f):
+c[f] = n, fl
+return c
+def text(self):
+"""Get the full data of this manifest as a bytestring."""
+return _textv1(self.iterentries())
+try:
+_lazymanifest = parsers.lazymanifest
+except AttributeError:
+pass
+class manifestdict(object):
+def __init__(self, data=''):
+if data.startswith('\0'):
+#_lazymanifest can not parse v2
+self._lm = _lazymanifest('')
+for f, n, fl in _parsev2(data):
+self._lm[f] = n, fl
+else:
+self._lm = _lazymanifest(data)
+def __getitem__(self, key):
+return self._lm[key][0]
+def find(self, key):
+return self._lm[key]
+def __len__(self):
+return len(self._lm)
+def __setitem__(self, key, node):
+self._lm[key] = node, self.flags(key, '')
+def __contains__(self, key):
+return key in self._lm
+def __delitem__(self, key):
+del self._lm[key]
+def __iter__(self):
+return self._lm.__iter__()
+def iterkeys(self):
+return self._lm.iterkeys()
+def keys(self):
+return list(self.iterkeys())
+def filesnotin(self, m2):
+'''Set of files in this manifest that are not in the other'''
+files = set(self)
+files.difference_update(m2)
+return files
+@propertycache
+def _dirs(self):
+return util.dirs(self)
+def dirs(self):
+return self._dirs
+def hasdir(self, dir):
+return dir in self._dirs
+def _filesfastpath(self, match):
+'''Checks whether we can correctly and quickly iterate over matcher
+files instead of over manifest files.'''
+files = match.files()
+return (len(files) < 100 and (match.isexact() or
+(not match.anypats() and util.all(fn in self for fn in files))))
+def walk(self, match):
+'''Generates matching file names.
+Equivalent to manifest.matches(match).iterkeys(), but without creating
+an entirely new manifest.
+It also reports nonexistent files by marking them bad with match.bad().
+'''
+if match.always():
+for f in iter(self):
+yield f
+return
+fset = set(match.files())
+# avoid the entire walk if we're only looking for specific files
+if self._filesfastpath(match):
+for fn in sorted(fset):
+yield fn
+return
+for fn in self:
+if fn in fset:
+# specified pattern is the exact name
+fset.remove(fn)
+if match(fn):
+yield fn
+# for dirstate.walk, files=['.'] means "walk the whole tree".
+# follow that here, too
+fset.discard('.')
+for fn in sorted(fset):
+if not self.hasdir(fn):
+match.bad(fn, None)
 def matches(self, match):
 '''generate a new manifest filtered by the match argument'''
 if match.always():
 return self.copy()
-files = match.files()
+if self._filesfastpath(match):
-if (match.matchfn == match.exact or
+m = manifestdict()
-(not match.anypats() and util.all(fn in self for fn in files))):
+lm = self._lm
-return self.intersectfiles(files)
+for fn in match.files():
+if fn in lm:
-mf = self.copy()
+m._lm[fn] = lm[fn]
-for fn in mf.keys():
+return m
-if not match(fn):
-del mf[fn]
+m = manifestdict()
-return mf
+m._lm = self._lm.filtercopy(match)
+return m
 def diff(self, m2, clean=False):
 '''Finds changes between the current manifest and m2.
 Args:
 nodeid in the current/other manifest and fl1/fl2 is the flag
 in the current/other manifest. Where the file does not exist,
 the nodeid will be None and the flags will be the empty
 string.
 '''
-diff = {}
+return self._lm.diff(m2._lm, clean)
-for fn, n1 in self.iteritems():
+def setflag(self, key, flag):
-fl1 = self._flags.get(fn, '')
+self._lm[key] = self[key], flag
-n2 = m2.get(fn, None)
-fl2 = m2._flags.get(fn, '')
+def get(self, key, default=None):
-if n2 is None:
+try:
-fl2 = ''
+return self._lm[key][0]
-if n1 != n2 or fl1 != fl2:
+except KeyError:
-diff[fn] = ((n1, fl1), (n2, fl2))
+return default
-elif clean:
-diff[fn] = None
+def flags(self, key, default=''):
+try:
-for fn, n2 in m2.iteritems():
+return self._lm[key][1]
-if fn not in self:
+except KeyError:
-fl2 = m2._flags.get(fn, '')
+return default
-diff[fn] = ((None, ''), (n2, fl2))
+def copy(self):
-return diff
+c = manifestdict()
+c._lm = self._lm.copy()
-def text(self):
+return c
-"""Get the full data of this manifest as a bytestring."""
-fl = sorted(self)
+def iteritems(self):
-_checkforbidden(fl)
+return (x[:2] for x in self._lm.iterentries())
-hex, flags = revlog.hex, self.flags
+def text(self, usemanifestv2=False):
-# if this is changed to support newlines in filenames,
+if usemanifestv2:
-# be sure to check the templates/ dir again (especially *-raw.tmpl)
+return _textv2(self._lm.iterentries())
-return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
+else:
+# use (probably) native version for v1
+return self._lm.text()
 def fastdelta(self, base, changes):
 """Given a base manifest text as an array.array and a list of changes
 relative to that text, compute a delta that can be used by revlog.
 """
 # each line and creates the deltas
 for f, todelete in changes:
 # bs will either be the index of the item or the insert point
 start, end = _msearch(addbuf, f, start)
 if not todelete:
-l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
+h, fl = self._lm[f]
+l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
 else:
 if start == end:
 # item we want to delete was not found, error out
 raise AssertionError(
 _("failed to remove %s from manifest") % f)
 deltatext = "".join(struct.pack(">lll", start, end, len(content))
 + content for start, end, content in x)
 return deltatext, newaddlist
-def _parse(lines):
+def _splittopdir(f):
-mfdict = manifestdict()
+if '/' in f:
-parsers.parse_manifest(mfdict, mfdict._flags, lines)
+dir, subpath = f.split('/', 1)
-return mfdict
+return dir + '/', subpath
+else:
+return '', f
+class treemanifest(object):
+def __init__(self, dir='', text=''):
+self._dir = dir
+self._dirs = {}
+# Using _lazymanifest here is a little slower than plain old dicts
+self._files = {}
+self._flags = {}
+self.parse(text)
+def _subpath(self, path):
+return self._dir + path
+def __len__(self):
+size = len(self._files)
+for m in self._dirs.values():
+size += m.__len__()
+return size
+def _isempty(self):
+return (not self._files and (not self._dirs or
+util.all(m._isempty() for m in self._dirs.values())))
+def __str__(self):
+return '<treemanifest dir=%s>' % self._dir
+def iteritems(self):
+for p, n in sorted(self._dirs.items() + self._files.items()):
+if p in self._files:
+yield self._subpath(p), n
+else:
+for f, sn in n.iteritems():
+yield f, sn
+def iterkeys(self):
+for p in sorted(self._dirs.keys() + self._files.keys()):
+if p in self._files:
+yield self._subpath(p)
+else:
+for f in self._dirs[p].iterkeys():
+yield f
+def keys(self):
+return list(self.iterkeys())
+def __iter__(self):
+return self.iterkeys()
+def __contains__(self, f):
+if f is None:
+return False
+dir, subpath = _splittopdir(f)
+if dir:
+if dir not in self._dirs:
+return False
+return self._dirs[dir].__contains__(subpath)
+else:
+return f in self._files
+def get(self, f, default=None):
+dir, subpath = _splittopdir(f)
+if dir:
+if dir not in self._dirs:
+return default
+return self._dirs[dir].get(subpath, default)
+else:
+return self._files.get(f, default)
+def __getitem__(self, f):
+dir, subpath = _splittopdir(f)
+if dir:
+return self._dirs[dir].__getitem__(subpath)
+else:
+return self._files[f]
+def flags(self, f):
+dir, subpath = _splittopdir(f)
+if dir:
+if dir not in self._dirs:
+return ''
+return self._dirs[dir].flags(subpath)
+else:
+if f in self._dirs:
+return ''
+return self._flags.get(f, '')
+def find(self, f):
+dir, subpath = _splittopdir(f)
+if dir:
+return self._dirs[dir].find(subpath)
+else:
+return self._files[f], self._flags.get(f, '')
+def __delitem__(self, f):
+dir, subpath = _splittopdir(f)
+if dir:
+self._dirs[dir].__delitem__(subpath)
+# If the directory is now empty, remove it
+if self._dirs[dir]._isempty():
+del self._dirs[dir]
+else:
+del self._files[f]
+if f in self._flags:
+del self._flags[f]
+def __setitem__(self, f, n):
+assert n is not None
+dir, subpath = _splittopdir(f)
+if dir:
+if dir not in self._dirs:
+self._dirs[dir] = treemanifest(self._subpath(dir))
+self._dirs[dir].__setitem__(subpath, n)
+else:
+self._files[f] = n[:21] # to match manifestdict's behavior
+def setflag(self, f, flags):
+"""Set the flags (symlink, executable) for path f."""
+dir, subpath = _splittopdir(f)
+if dir:
+if dir not in self._dirs:
+self._dirs[dir] = treemanifest(self._subpath(dir))
+self._dirs[dir].setflag(subpath, flags)
+else:
+self._flags[f] = flags
+def copy(self):
+copy = treemanifest(self._dir)
+for d in self._dirs:
+copy._dirs[d] = self._dirs[d].copy()
+copy._files = dict.copy(self._files)
+copy._flags = dict.copy(self._flags)
+return copy
+def filesnotin(self, m2):
+'''Set of files in this manifest that are not in the other'''
+files = set()
+def _filesnotin(t1, t2):
+for d, m1 in t1._dirs.iteritems():
+if d in t2._dirs:
+m2 = t2._dirs[d]
+_filesnotin(m1, m2)
+else:
+files.update(m1.iterkeys())
+for fn in t1._files.iterkeys():
+if fn not in t2._files:
+files.add(t1._subpath(fn))
+_filesnotin(self, m2)
+return files
+@propertycache
+def _alldirs(self):
+return util.dirs(self)
+def dirs(self):
+return self._alldirs
+def hasdir(self, dir):
+topdir, subdir = _splittopdir(dir)
+if topdir:
+if topdir in self._dirs:
+return self._dirs[topdir].hasdir(subdir)
+return False
+return (dir + '/') in self._dirs
+def walk(self, match):
+'''Generates matching file names.
+Equivalent to manifest.matches(match).iterkeys(), but without creating
+an entirely new manifest.
+It also reports nonexistent files by marking them bad with match.bad().
+'''
+if match.always():
+for f in iter(self):
+yield f
+return
+fset = set(match.files())
+for fn in self._walk(match):
+if fn in fset:
+# specified pattern is the exact name
+fset.remove(fn)
+yield fn
+# for dirstate.walk, files=['.'] means "walk the whole tree".
+# follow that here, too
+fset.discard('.')
+for fn in sorted(fset):
+if not self.hasdir(fn):
+match.bad(fn, None)
+def _walk(self, match, alldirs=False):
+'''Recursively generates matching file names for walk().
+Will visit all subdirectories if alldirs is True, otherwise it will
+only visit subdirectories for which match.visitdir is True.'''
+if not alldirs:
+# substring to strip trailing slash
+visit = match.visitdir(self._dir[:-1] or '.')
+if not visit:
+return
+alldirs = (visit == 'all')
+# yield this dir's files and walk its submanifests
+for p in sorted(self._dirs.keys() + self._files.keys()):
+if p in self._files:
+fullp = self._subpath(p)
+if match(fullp):
+yield fullp
+else:
+for f in self._dirs[p]._walk(match, alldirs):
+yield f
+def matches(self, match):
+'''generate a new manifest filtered by the match argument'''
+if match.always():
+return self.copy()
+return self._matches(match)
+def _matches(self, match, alldirs=False):
+'''recursively generate a new manifest filtered by the match argument.
+Will visit all subdirectories if alldirs is True, otherwise it will
+only visit subdirectories for which match.visitdir is True.'''
+ret = treemanifest(self._dir)
+if not alldirs:
+# substring to strip trailing slash
+visit = match.visitdir(self._dir[:-1] or '.')
+if not visit:
+return ret
+alldirs = (visit == 'all')
+for fn in self._files:
+fullp = self._subpath(fn)
+if not match(fullp):
+continue
+ret._files[fn] = self._files[fn]
+if fn in self._flags:
+ret._flags[fn] = self._flags[fn]
+for dir, subm in self._dirs.iteritems():
+m = subm._matches(match, alldirs)
+if not m._isempty():
+ret._dirs[dir] = m
+return ret
+def diff(self, m2, clean=False):
+'''Finds changes between the current manifest and m2.
+Args:
+m2: the manifest to which this manifest should be compared.
+clean: if true, include files unchanged between these manifests
+with a None value in the returned dictionary.
+The result is returned as a dict with filename as key and
+values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
+nodeid in the current/other manifest and fl1/fl2 is the flag
+in the current/other manifest. Where the file does not exist,
+the nodeid will be None and the flags will be the empty
+string.
+'''
+result = {}
+emptytree = treemanifest()
+def _diff(t1, t2):
+for d, m1 in t1._dirs.iteritems():
+m2 = t2._dirs.get(d, emptytree)
+_diff(m1, m2)
+for d, m2 in t2._dirs.iteritems():
+if d not in t1._dirs:
+_diff(emptytree, m2)
+for fn, n1 in t1._files.iteritems():
+fl1 = t1._flags.get(fn, '')
+n2 = t2._files.get(fn, None)
+fl2 = t2._flags.get(fn, '')
+if n1 != n2 or fl1 != fl2:
+result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
+elif clean:
+result[t1._subpath(fn)] = None
+for fn, n2 in t2._files.iteritems():
+if fn not in t1._files:
+fl2 = t2._flags.get(fn, '')
+result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
+_diff(self, m2)
+return result
+def parse(self, text):
+for f, n, fl in _parse(text):
+self[f] = n
+if fl:
+self.setflag(f, fl)
+def text(self, usemanifestv2=False):
+"""Get the full data of this manifest as a bytestring."""
+flags = self.flags
+return _text(((f, self[f], flags(f)) for f in self.keys()),
+usemanifestv2)
 class manifest(revlog.revlog):
 def __init__(self, opener):
-# we expect to deal with not more than four revs at a time,
+# During normal operations, we expect to deal with not more than four
-# during a commit --amend
+# revs at a time (such as during commit --amend). When rebasing large
-self._mancache = util.lrucachedict(4)
+# stacks of commits, the number can go up, hence the config knob below.
+cachesize = 4
+usetreemanifest = False
+usemanifestv2 = False
+opts = getattr(opener, 'options', None)
+if opts is not None:
+cachesize = opts.get('manifestcachesize', cachesize)
+usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
+usemanifestv2 = opts.get('manifestv2', usemanifestv2)
+self._mancache = util.lrucachedict(cachesize)
 revlog.revlog.__init__(self, opener, "00manifest.i")
+self._treeinmem = usetreemanifest
+self._treeondisk = usetreemanifest
+self._usemanifestv2 = usemanifestv2
+def _newmanifest(self, data=''):
+if self._treeinmem:
+return treemanifest('', data)
+return manifestdict(data)
+def _slowreaddelta(self, node):
+r0 = self.deltaparent(self.rev(node))
+m0 = self.read(self.node(r0))
+m1 = self.read(node)
+md = self._newmanifest()
+for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
+if n1:
+md[f] = n1
+if fl1:
+md.setflag(f, fl1)
+return md
 def readdelta(self, node):
+if self._usemanifestv2 or self._treeondisk:
+return self._slowreaddelta(node)
 r = self.rev(node)
-return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
+d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
+return self._newmanifest(d)
 def readfast(self, node):
 '''use the faster of readdelta or read'''
 r = self.rev(node)
 deltaparent = self.deltaparent(r)
 return self.readdelta(node)
 return self.read(node)
 def read(self, node):
 if node == revlog.nullid:
-return manifestdict() # don't upset local cache
+return self._newmanifest() # don't upset local cache
 if node in self._mancache:
 return self._mancache[node][0]
 text = self.revision(node)
 arraytext = array.array('c', text)
-mapping = _parse(text)
+m = self._newmanifest(text)
-self._mancache[node] = (mapping, arraytext)
+self._mancache[node] = (m, arraytext)
-return mapping
+return m
 def find(self, node, f):
 '''look up entry for a single file efficiently.
 return (node, flags) pair if found, (None, None) if not.'''
-if node in self._mancache:
+m = self.read(node)
-mapping = self._mancache[node][0]
+try:
-return mapping.get(f), mapping.flags(f)
+return m.find(f)
-text = self.revision(node)
+except KeyError:
-start, end = _msearch(text, f)
-if start == end:
 return None, None
-l = text[start:end]
-f, n = l.split('\0')
+def add(self, m, transaction, link, p1, p2, added, removed):
-return revlog.bin(n[:40]), n[40:-1]
+if (p1 in self._mancache and not self._treeinmem
+and not self._usemanifestv2):
-def add(self, map, transaction, link, p1, p2, added, removed):
-if p1 in self._mancache:
 # If our first parent is in the manifest cache, we can
 # compute a delta here using properties we know about the
 # manifest up-front, which may save time later for the
 # revlog layer.
 work.extend((x, True) for x in removed)
 # this could use heapq.merge() (from Python 2.6+) or equivalent
 # since the lists are already sorted
 work.sort()
-arraytext, deltatext = map.fastdelta(self._mancache[p1][1], work)
+arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
 cachedelta = self.rev(p1), deltatext
 text = util.buffer(arraytext)
+n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
 else:
 # The first parent manifest isn't already loaded, so we'll
 # just encode a fulltext of the manifest and pass that
 # through to the revlog layer, and let it handle the delta
 # process.
-text = map.text()
+text = m.text(self._usemanifestv2)
 arraytext = array.array('c', text)
-cachedelta = None
+n = self.addrevision(text, transaction, link, p1, p2)
-n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
+self._mancache[n] = (m, arraytext)
-self._mancache[n] = (map, arraytext)
 return n

Mercurial > public > mercurial-scm > hg-stable

comparison mercurial/manifest.py @ 24803:e89f909edffa stable 3.4-rc