Mercurial > public > mercurial-scm > hg-stable
diff mercurial/revset.py @ 30913:1be65deb3d54
smartset: move set classes and related functions from revset module (API)
These classes are pretty large and independent from revset computation.
2961 mercurial/revset.py
973 mercurial/smartset.py
3934 total
revset.prettyformatset() is renamed to smartset.prettyformat(). Smartset
classes are aliased since they are quite common in revset.py.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 16 Oct 2016 17:28:51 +0900 |
parents | 41e31a6f5296 |
children | 11c253997b0e |
line wrap: on
line diff
--- a/mercurial/revset.py Wed Jan 25 22:39:17 2017 +0900 +++ b/mercurial/revset.py Sun Oct 16 17:28:51 2016 +0900 @@ -26,9 +26,15 @@ pycompat, registrar, repoview, + smartset, util, ) +baseset = smartset.baseset +generatorset = smartset.generatorset +spanset = smartset.spanset +fullreposet = smartset.fullreposet + def _revancestors(repo, revs, followfirst): """Like revlog.ancestors(), but supports followfirst.""" if followfirst: @@ -2940,967 +2946,6 @@ funcs.add(tree[1][1]) return funcs -def _formatsetrepr(r): - """Format an optional printable representation of a set - - ======== ================================= - type(r) example - ======== ================================= - tuple ('<not %r>', other) - str '<branch closed>' - callable lambda: '<branch %r>' % sorted(b) - object other - ======== ================================= - """ - if r is None: - return '' - elif isinstance(r, tuple): - return r[0] % r[1:] - elif isinstance(r, str): - return r - elif callable(r): - return r() - else: - return repr(r) - -class abstractsmartset(object): - - def __nonzero__(self): - """True if the smartset is not empty""" - raise NotImplementedError() - - def __contains__(self, rev): - """provide fast membership testing""" - raise NotImplementedError() - - def __iter__(self): - """iterate the set in the order it is supposed to be iterated""" - raise NotImplementedError() - - # Attributes containing a function to perform a fast iteration in a given - # direction. A smartset can have none, one, or both defined. - # - # Default value is None instead of a function returning None to avoid - # initializing an iterator just for testing if a fast method exists. - fastasc = None - fastdesc = None - - def isascending(self): - """True if the set will iterate in ascending order""" - raise NotImplementedError() - - def isdescending(self): - """True if the set will iterate in descending order""" - raise NotImplementedError() - - def istopo(self): - """True if the set will iterate in topographical order""" - raise NotImplementedError() - - def min(self): - """return the minimum element in the set""" - if self.fastasc is None: - v = min(self) - else: - for v in self.fastasc(): - break - else: - raise ValueError('arg is an empty sequence') - self.min = lambda: v - return v - - def max(self): - """return the maximum element in the set""" - if self.fastdesc is None: - return max(self) - else: - for v in self.fastdesc(): - break - else: - raise ValueError('arg is an empty sequence') - self.max = lambda: v - return v - - def first(self): - """return the first element in the set (user iteration perspective) - - Return None if the set is empty""" - raise NotImplementedError() - - def last(self): - """return the last element in the set (user iteration perspective) - - Return None if the set is empty""" - raise NotImplementedError() - - def __len__(self): - """return the length of the smartsets - - This can be expensive on smartset that could be lazy otherwise.""" - raise NotImplementedError() - - def reverse(self): - """reverse the expected iteration order""" - raise NotImplementedError() - - def sort(self, reverse=True): - """get the set to iterate in an ascending or descending order""" - raise NotImplementedError() - - def __and__(self, other): - """Returns a new object with the intersection of the two collections. - - This is part of the mandatory API for smartset.""" - if isinstance(other, fullreposet): - return self - return self.filter(other.__contains__, condrepr=other, cache=False) - - def __add__(self, other): - """Returns a new object with the union of the two collections. - - This is part of the mandatory API for smartset.""" - return addset(self, other) - - def __sub__(self, other): - """Returns a new object with the substraction of the two collections. - - This is part of the mandatory API for smartset.""" - c = other.__contains__ - return self.filter(lambda r: not c(r), condrepr=('<not %r>', other), - cache=False) - - def filter(self, condition, condrepr=None, cache=True): - """Returns this smartset filtered by condition as a new smartset. - - `condition` is a callable which takes a revision number and returns a - boolean. Optional `condrepr` provides a printable representation of - the given `condition`. - - This is part of the mandatory API for smartset.""" - # builtin cannot be cached. but do not needs to - if cache and util.safehasattr(condition, 'func_code'): - condition = util.cachefunc(condition) - return filteredset(self, condition, condrepr) - -class baseset(abstractsmartset): - """Basic data structure that represents a revset and contains the basic - operation that it should be able to perform. - - Every method in this class should be implemented by any smartset class. - """ - def __init__(self, data=(), datarepr=None, istopo=False): - """ - datarepr: a tuple of (format, obj, ...), a function or an object that - provides a printable representation of the given data. - """ - self._ascending = None - self._istopo = istopo - if not isinstance(data, list): - if isinstance(data, set): - self._set = data - # set has no order we pick one for stability purpose - self._ascending = True - data = list(data) - self._list = data - self._datarepr = datarepr - - @util.propertycache - def _set(self): - return set(self._list) - - @util.propertycache - def _asclist(self): - asclist = self._list[:] - asclist.sort() - return asclist - - def __iter__(self): - if self._ascending is None: - return iter(self._list) - elif self._ascending: - return iter(self._asclist) - else: - return reversed(self._asclist) - - def fastasc(self): - return iter(self._asclist) - - def fastdesc(self): - return reversed(self._asclist) - - @util.propertycache - def __contains__(self): - return self._set.__contains__ - - def __nonzero__(self): - return bool(self._list) - - def sort(self, reverse=False): - self._ascending = not bool(reverse) - self._istopo = False - - def reverse(self): - if self._ascending is None: - self._list.reverse() - else: - self._ascending = not self._ascending - self._istopo = False - - def __len__(self): - return len(self._list) - - def isascending(self): - """Returns True if the collection is ascending order, False if not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._ascending is not None and self._ascending - - def isdescending(self): - """Returns True if the collection is descending order, False if not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._ascending is not None and not self._ascending - - def istopo(self): - """Is the collection is in topographical order or not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._istopo - - def first(self): - if self: - if self._ascending is None: - return self._list[0] - elif self._ascending: - return self._asclist[0] - else: - return self._asclist[-1] - return None - - def last(self): - if self: - if self._ascending is None: - return self._list[-1] - elif self._ascending: - return self._asclist[-1] - else: - return self._asclist[0] - return None - - def __repr__(self): - d = {None: '', False: '-', True: '+'}[self._ascending] - s = _formatsetrepr(self._datarepr) - if not s: - l = self._list - # if _list has been built from a set, it might have a different - # order from one python implementation to another. - # We fallback to the sorted version for a stable output. - if self._ascending is not None: - l = self._asclist - s = repr(l) - return '<%s%s %s>' % (type(self).__name__, d, s) - -class filteredset(abstractsmartset): - """Duck type for baseset class which iterates lazily over the revisions in - the subset and contains a function which tests for membership in the - revset - """ - def __init__(self, subset, condition=lambda x: True, condrepr=None): - """ - condition: a function that decide whether a revision in the subset - belongs to the revset or not. - condrepr: a tuple of (format, obj, ...), a function or an object that - provides a printable representation of the given condition. - """ - self._subset = subset - self._condition = condition - self._condrepr = condrepr - - def __contains__(self, x): - return x in self._subset and self._condition(x) - - def __iter__(self): - return self._iterfilter(self._subset) - - def _iterfilter(self, it): - cond = self._condition - for x in it: - if cond(x): - yield x - - @property - def fastasc(self): - it = self._subset.fastasc - if it is None: - return None - return lambda: self._iterfilter(it()) - - @property - def fastdesc(self): - it = self._subset.fastdesc - if it is None: - return None - return lambda: self._iterfilter(it()) - - def __nonzero__(self): - fast = None - candidates = [self.fastasc if self.isascending() else None, - self.fastdesc if self.isdescending() else None, - self.fastasc, - self.fastdesc] - for candidate in candidates: - if candidate is not None: - fast = candidate - break - - if fast is not None: - it = fast() - else: - it = self - - for r in it: - return True - return False - - def __len__(self): - # Basic implementation to be changed in future patches. - # until this gets improved, we use generator expression - # here, since list comprehensions are free to call __len__ again - # causing infinite recursion - l = baseset(r for r in self) - return len(l) - - def sort(self, reverse=False): - self._subset.sort(reverse=reverse) - - def reverse(self): - self._subset.reverse() - - def isascending(self): - return self._subset.isascending() - - def isdescending(self): - return self._subset.isdescending() - - def istopo(self): - return self._subset.istopo() - - def first(self): - for x in self: - return x - return None - - def last(self): - it = None - if self.isascending(): - it = self.fastdesc - elif self.isdescending(): - it = self.fastasc - if it is not None: - for x in it(): - return x - return None #empty case - else: - x = None - for x in self: - pass - return x - - def __repr__(self): - xs = [repr(self._subset)] - s = _formatsetrepr(self._condrepr) - if s: - xs.append(s) - return '<%s %s>' % (type(self).__name__, ', '.join(xs)) - -def _iterordered(ascending, iter1, iter2): - """produce an ordered iteration from two iterators with the same order - - The ascending is used to indicated the iteration direction. - """ - choice = max - if ascending: - choice = min - - val1 = None - val2 = None - try: - # Consume both iterators in an ordered way until one is empty - while True: - if val1 is None: - val1 = next(iter1) - if val2 is None: - val2 = next(iter2) - n = choice(val1, val2) - yield n - if val1 == n: - val1 = None - if val2 == n: - val2 = None - except StopIteration: - # Flush any remaining values and consume the other one - it = iter2 - if val1 is not None: - yield val1 - it = iter1 - elif val2 is not None: - # might have been equality and both are empty - yield val2 - for val in it: - yield val - -class addset(abstractsmartset): - """Represent the addition of two sets - - Wrapper structure for lazily adding two structures without losing much - performance on the __contains__ method - - If the ascending attribute is set, that means the two structures are - ordered in either an ascending or descending way. Therefore, we can add - them maintaining the order by iterating over both at the same time - - >>> xs = baseset([0, 3, 2]) - >>> ys = baseset([5, 2, 4]) - - >>> rs = addset(xs, ys) - >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last() - (True, True, False, True, 0, 4) - >>> rs = addset(xs, baseset([])) - >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last() - (True, True, False, 0, 2) - >>> rs = addset(baseset([]), baseset([])) - >>> bool(rs), 0 in rs, rs.first(), rs.last() - (False, False, None, None) - - iterate unsorted: - >>> rs = addset(xs, ys) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs) # without _genlist - [0, 3, 2, 5, 4] - >>> assert not rs._genlist - >>> len(rs) - 5 - >>> [x for x in rs] # with _genlist - [0, 3, 2, 5, 4] - >>> assert rs._genlist - - iterate ascending: - >>> rs = addset(xs, ys, ascending=True) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs), list(x for x in rs.fastasc()) # without _asclist - ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) - >>> assert not rs._asclist - >>> len(rs) - 5 - >>> [x for x in rs], [x for x in rs.fastasc()] - ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) - >>> assert rs._asclist - - iterate descending: - >>> rs = addset(xs, ys, ascending=False) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs), list(x for x in rs.fastdesc()) # without _asclist - ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) - >>> assert not rs._asclist - >>> len(rs) - 5 - >>> [x for x in rs], [x for x in rs.fastdesc()] - ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) - >>> assert rs._asclist - - iterate ascending without fastasc: - >>> rs = addset(xs, generatorset(ys), ascending=True) - >>> assert rs.fastasc is None - >>> [x for x in rs] - [0, 2, 3, 4, 5] - - iterate descending without fastdesc: - >>> rs = addset(generatorset(xs), ys, ascending=False) - >>> assert rs.fastdesc is None - >>> [x for x in rs] - [5, 4, 3, 2, 0] - """ - def __init__(self, revs1, revs2, ascending=None): - self._r1 = revs1 - self._r2 = revs2 - self._iter = None - self._ascending = ascending - self._genlist = None - self._asclist = None - - def __len__(self): - return len(self._list) - - def __nonzero__(self): - return bool(self._r1) or bool(self._r2) - - @util.propertycache - def _list(self): - if not self._genlist: - self._genlist = baseset(iter(self)) - return self._genlist - - def __iter__(self): - """Iterate over both collections without repeating elements - - If the ascending attribute is not set, iterate over the first one and - then over the second one checking for membership on the first one so we - dont yield any duplicates. - - If the ascending attribute is set, iterate over both collections at the - same time, yielding only one value at a time in the given order. - """ - if self._ascending is None: - if self._genlist: - return iter(self._genlist) - def arbitraryordergen(): - for r in self._r1: - yield r - inr1 = self._r1.__contains__ - for r in self._r2: - if not inr1(r): - yield r - return arbitraryordergen() - # try to use our own fast iterator if it exists - self._trysetasclist() - if self._ascending: - attr = 'fastasc' - else: - attr = 'fastdesc' - it = getattr(self, attr) - if it is not None: - return it() - # maybe half of the component supports fast - # get iterator for _r1 - iter1 = getattr(self._r1, attr) - if iter1 is None: - # let's avoid side effect (not sure it matters) - iter1 = iter(sorted(self._r1, reverse=not self._ascending)) - else: - iter1 = iter1() - # get iterator for _r2 - iter2 = getattr(self._r2, attr) - if iter2 is None: - # let's avoid side effect (not sure it matters) - iter2 = iter(sorted(self._r2, reverse=not self._ascending)) - else: - iter2 = iter2() - return _iterordered(self._ascending, iter1, iter2) - - def _trysetasclist(self): - """populate the _asclist attribute if possible and necessary""" - if self._genlist is not None and self._asclist is None: - self._asclist = sorted(self._genlist) - - @property - def fastasc(self): - self._trysetasclist() - if self._asclist is not None: - return self._asclist.__iter__ - iter1 = self._r1.fastasc - iter2 = self._r2.fastasc - if None in (iter1, iter2): - return None - return lambda: _iterordered(True, iter1(), iter2()) - - @property - def fastdesc(self): - self._trysetasclist() - if self._asclist is not None: - return self._asclist.__reversed__ - iter1 = self._r1.fastdesc - iter2 = self._r2.fastdesc - if None in (iter1, iter2): - return None - return lambda: _iterordered(False, iter1(), iter2()) - - def __contains__(self, x): - return x in self._r1 or x in self._r2 - - def sort(self, reverse=False): - """Sort the added set - - For this we use the cached list with all the generated values and if we - know they are ascending or descending we can sort them in a smart way. - """ - self._ascending = not reverse - - def isascending(self): - return self._ascending is not None and self._ascending - - def isdescending(self): - return self._ascending is not None and not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def reverse(self): - if self._ascending is None: - self._list.reverse() - else: - self._ascending = not self._ascending - - def first(self): - for x in self: - return x - return None - - def last(self): - self.reverse() - val = self.first() - self.reverse() - return val - - def __repr__(self): - d = {None: '', False: '-', True: '+'}[self._ascending] - return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2) - -class generatorset(abstractsmartset): - """Wrap a generator for lazy iteration - - Wrapper structure for generators that provides lazy membership and can - be iterated more than once. - When asked for membership it generates values until either it finds the - requested one or has gone through all the elements in the generator - """ - def __init__(self, gen, iterasc=None): - """ - gen: a generator producing the values for the generatorset. - """ - self._gen = gen - self._asclist = None - self._cache = {} - self._genlist = [] - self._finished = False - self._ascending = True - if iterasc is not None: - if iterasc: - self.fastasc = self._iterator - self.__contains__ = self._asccontains - else: - self.fastdesc = self._iterator - self.__contains__ = self._desccontains - - def __nonzero__(self): - # Do not use 'for r in self' because it will enforce the iteration - # order (default ascending), possibly unrolling a whole descending - # iterator. - if self._genlist: - return True - for r in self._consumegen(): - return True - return False - - def __contains__(self, x): - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - - self._cache[x] = False - return False - - def _asccontains(self, x): - """version of contains optimised for ascending generator""" - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - if l > x: - break - - self._cache[x] = False - return False - - def _desccontains(self, x): - """version of contains optimised for descending generator""" - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - if l < x: - break - - self._cache[x] = False - return False - - def __iter__(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - if it is not None: - return it() - # we need to consume the iterator - for x in self._consumegen(): - pass - # recall the same code - return iter(self) - - def _iterator(self): - if self._finished: - return iter(self._genlist) - - # We have to use this complex iteration strategy to allow multiple - # iterations at the same time. We need to be able to catch revision - # removed from _consumegen and added to genlist in another instance. - # - # Getting rid of it would provide an about 15% speed up on this - # iteration. - genlist = self._genlist - nextrev = self._consumegen().next - _len = len # cache global lookup - def gen(): - i = 0 - while True: - if i < _len(genlist): - yield genlist[i] - else: - yield nextrev() - i += 1 - return gen() - - def _consumegen(self): - cache = self._cache - genlist = self._genlist.append - for item in self._gen: - cache[item] = True - genlist(item) - yield item - if not self._finished: - self._finished = True - asc = self._genlist[:] - asc.sort() - self._asclist = asc - self.fastasc = asc.__iter__ - self.fastdesc = asc.__reversed__ - - def __len__(self): - for x in self._consumegen(): - pass - return len(self._genlist) - - def sort(self, reverse=False): - self._ascending = not reverse - - def reverse(self): - self._ascending = not self._ascending - - def isascending(self): - return self._ascending - - def isdescending(self): - return not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def first(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - if it is None: - # we need to consume all and try again - for x in self._consumegen(): - pass - return self.first() - return next(it(), None) - - def last(self): - if self._ascending: - it = self.fastdesc - else: - it = self.fastasc - if it is None: - # we need to consume all and try again - for x in self._consumegen(): - pass - return self.first() - return next(it(), None) - - def __repr__(self): - d = {False: '-', True: '+'}[self._ascending] - return '<%s%s>' % (type(self).__name__, d) - -class spanset(abstractsmartset): - """Duck type for baseset class which represents a range of revisions and - can work lazily and without having all the range in memory - - Note that spanset(x, y) behave almost like xrange(x, y) except for two - notable points: - - when x < y it will be automatically descending, - - revision filtered with this repoview will be skipped. - - """ - def __init__(self, repo, start=0, end=None): - """ - start: first revision included the set - (default to 0) - end: first revision excluded (last+1) - (default to len(repo) - - Spanset will be descending if `end` < `start`. - """ - if end is None: - end = len(repo) - self._ascending = start <= end - if not self._ascending: - start, end = end + 1, start +1 - self._start = start - self._end = end - self._hiddenrevs = repo.changelog.filteredrevs - - def sort(self, reverse=False): - self._ascending = not reverse - - def reverse(self): - self._ascending = not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def _iterfilter(self, iterrange): - s = self._hiddenrevs - for r in iterrange: - if r not in s: - yield r - - def __iter__(self): - if self._ascending: - return self.fastasc() - else: - return self.fastdesc() - - def fastasc(self): - iterrange = xrange(self._start, self._end) - if self._hiddenrevs: - return self._iterfilter(iterrange) - return iter(iterrange) - - def fastdesc(self): - iterrange = xrange(self._end - 1, self._start - 1, -1) - if self._hiddenrevs: - return self._iterfilter(iterrange) - return iter(iterrange) - - def __contains__(self, rev): - hidden = self._hiddenrevs - return ((self._start <= rev < self._end) - and not (hidden and rev in hidden)) - - def __nonzero__(self): - for r in self: - return True - return False - - def __len__(self): - if not self._hiddenrevs: - return abs(self._end - self._start) - else: - count = 0 - start = self._start - end = self._end - for rev in self._hiddenrevs: - if (end < rev <= start) or (start <= rev < end): - count += 1 - return abs(self._end - self._start) - count - - def isascending(self): - return self._ascending - - def isdescending(self): - return not self._ascending - - def first(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - for x in it(): - return x - return None - - def last(self): - if self._ascending: - it = self.fastdesc - else: - it = self.fastasc - for x in it(): - return x - return None - - def __repr__(self): - d = {False: '-', True: '+'}[self._ascending] - return '<%s%s %d:%d>' % (type(self).__name__, d, - self._start, self._end - 1) - -class fullreposet(spanset): - """a set containing all revisions in the repo - - This class exists to host special optimization and magic to handle virtual - revisions such as "null". - """ - - def __init__(self, repo): - super(fullreposet, self).__init__(repo) - - def __and__(self, other): - """As self contains the whole repo, all of the other set should also be - in self. Therefore `self & other = other`. - - This boldly assumes the other contains valid revs only. - """ - # other not a smartset, make is so - if not util.safehasattr(other, 'isascending'): - # filter out hidden revision - # (this boldly assumes all smartset are pure) - # - # `other` was used with "&", let's assume this is a set like - # object. - other = baseset(other - self._hiddenrevs) - - other.sort(reverse=self.isdescending()) - return other - -def prettyformatset(revs): - lines = [] - rs = repr(revs) - p = 0 - while p < len(rs): - q = rs.find('<', p + 1) - if q < 0: - q = len(rs) - l = rs.count('<', 0, p) - rs.count('>', 0, p) - assert l >= 0 - lines.append((l, rs[p:q].rstrip())) - p = q - return '\n'.join(' ' * l + s for l, s in lines) - def loadpredicate(ui, extname, registrarobj): """Load revset predicates from specified registrarobj """