comparison mercurial/revset.py @ 29347:98535ad46fc0

revset: move groupbranchiter over from graphmod This move is to prepare the adaptation of this function into a toposort predicate.
author Martijn Pieters <mjpieters@fb.com>
date Mon, 13 Jun 2016 18:20:00 +0100
parents 38e0c83c7ee4
children 2188f170f5b6
comparison
equal deleted inserted replaced
29346:38e0c83c7ee4 29347:98535ad46fc0
1885 ctxs.sort(key=_sortkeyfuncs[k], reverse=reverse) 1885 ctxs.sort(key=_sortkeyfuncs[k], reverse=reverse)
1886 except KeyError: 1886 except KeyError:
1887 raise error.ParseError(_("unknown sort key %r") % fk) 1887 raise error.ParseError(_("unknown sort key %r") % fk)
1888 return baseset([c.rev() for c in ctxs]) 1888 return baseset([c.rev() for c in ctxs])
1889 1889
1890 def groupbranchiter(revs, parentsfunc, firstbranch=()):
1891 """Yield revisions from heads to roots one (topo) branch at a time.
1892
1893 This function aims to be used by a graph generator that wishes to minimize
1894 the number of parallel branches and their interleaving.
1895
1896 Example iteration order (numbers show the "true" order in a changelog):
1897
1898 o 4
1899 |
1900 o 1
1901 |
1902 | o 3
1903 | |
1904 | o 2
1905 |/
1906 o 0
1907
1908 Note that the ancestors of merges are understood by the current
1909 algorithm to be on the same branch. This means no reordering will
1910 occur behind a merge.
1911 """
1912
1913 ### Quick summary of the algorithm
1914 #
1915 # This function is based around a "retention" principle. We keep revisions
1916 # in memory until we are ready to emit a whole branch that immediately
1917 # "merges" into an existing one. This reduces the number of parallel
1918 # branches with interleaved revisions.
1919 #
1920 # During iteration revs are split into two groups:
1921 # A) revision already emitted
1922 # B) revision in "retention". They are stored as different subgroups.
1923 #
1924 # for each REV, we do the following logic:
1925 #
1926 # 1) if REV is a parent of (A), we will emit it. If there is a
1927 # retention group ((B) above) that is blocked on REV being
1928 # available, we emit all the revisions out of that retention
1929 # group first.
1930 #
1931 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
1932 # available, if such subgroup exist, we add REV to it and the subgroup is
1933 # now awaiting for REV.parents() to be available.
1934 #
1935 # 3) finally if no such group existed in (B), we create a new subgroup.
1936 #
1937 #
1938 # To bootstrap the algorithm, we emit the tipmost revision (which
1939 # puts it in group (A) from above).
1940
1941 revs.sort(reverse=True)
1942
1943 # Set of parents of revision that have been emitted. They can be considered
1944 # unblocked as the graph generator is already aware of them so there is no
1945 # need to delay the revisions that reference them.
1946 #
1947 # If someone wants to prioritize a branch over the others, pre-filling this
1948 # set will force all other branches to wait until this branch is ready to be
1949 # emitted.
1950 unblocked = set(firstbranch)
1951
1952 # list of groups waiting to be displayed, each group is defined by:
1953 #
1954 # (revs: lists of revs waiting to be displayed,
1955 # blocked: set of that cannot be displayed before those in 'revs')
1956 #
1957 # The second value ('blocked') correspond to parents of any revision in the
1958 # group ('revs') that is not itself contained in the group. The main idea
1959 # of this algorithm is to delay as much as possible the emission of any
1960 # revision. This means waiting for the moment we are about to display
1961 # these parents to display the revs in a group.
1962 #
1963 # This first implementation is smart until it encounters a merge: it will
1964 # emit revs as soon as any parent is about to be emitted and can grow an
1965 # arbitrary number of revs in 'blocked'. In practice this mean we properly
1966 # retains new branches but gives up on any special ordering for ancestors
1967 # of merges. The implementation can be improved to handle this better.
1968 #
1969 # The first subgroup is special. It corresponds to all the revision that
1970 # were already emitted. The 'revs' lists is expected to be empty and the
1971 # 'blocked' set contains the parents revisions of already emitted revision.
1972 #
1973 # You could pre-seed the <parents> set of groups[0] to a specific
1974 # changesets to select what the first emitted branch should be.
1975 groups = [([], unblocked)]
1976 pendingheap = []
1977 pendingset = set()
1978
1979 heapq.heapify(pendingheap)
1980 heappop = heapq.heappop
1981 heappush = heapq.heappush
1982 for currentrev in revs:
1983 # Heap works with smallest element, we want highest so we invert
1984 if currentrev not in pendingset:
1985 heappush(pendingheap, -currentrev)
1986 pendingset.add(currentrev)
1987 # iterates on pending rev until after the current rev have been
1988 # processed.
1989 rev = None
1990 while rev != currentrev:
1991 rev = -heappop(pendingheap)
1992 pendingset.remove(rev)
1993
1994 # Seek for a subgroup blocked, waiting for the current revision.
1995 matching = [i for i, g in enumerate(groups) if rev in g[1]]
1996
1997 if matching:
1998 # The main idea is to gather together all sets that are blocked
1999 # on the same revision.
2000 #
2001 # Groups are merged when a common blocking ancestor is
2002 # observed. For example, given two groups:
2003 #
2004 # revs [5, 4] waiting for 1
2005 # revs [3, 2] waiting for 1
2006 #
2007 # These two groups will be merged when we process
2008 # 1. In theory, we could have merged the groups when
2009 # we added 2 to the group it is now in (we could have
2010 # noticed the groups were both blocked on 1 then), but
2011 # the way it works now makes the algorithm simpler.
2012 #
2013 # We also always keep the oldest subgroup first. We can
2014 # probably improve the behavior by having the longest set
2015 # first. That way, graph algorithms could minimise the length
2016 # of parallel lines their drawing. This is currently not done.
2017 targetidx = matching.pop(0)
2018 trevs, tparents = groups[targetidx]
2019 for i in matching:
2020 gr = groups[i]
2021 trevs.extend(gr[0])
2022 tparents |= gr[1]
2023 # delete all merged subgroups (except the one we kept)
2024 # (starting from the last subgroup for performance and
2025 # sanity reasons)
2026 for i in reversed(matching):
2027 del groups[i]
2028 else:
2029 # This is a new head. We create a new subgroup for it.
2030 targetidx = len(groups)
2031 groups.append(([], set([rev])))
2032
2033 gr = groups[targetidx]
2034
2035 # We now add the current nodes to this subgroups. This is done
2036 # after the subgroup merging because all elements from a subgroup
2037 # that relied on this rev must precede it.
2038 #
2039 # we also update the <parents> set to include the parents of the
2040 # new nodes.
2041 if rev == currentrev: # only display stuff in rev
2042 gr[0].append(rev)
2043 gr[1].remove(rev)
2044 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
2045 gr[1].update(parents)
2046 for p in parents:
2047 if p not in pendingset:
2048 pendingset.add(p)
2049 heappush(pendingheap, -p)
2050
2051 # Look for a subgroup to display
2052 #
2053 # When unblocked is empty (if clause), we were not waiting for any
2054 # revisions during the first iteration (if no priority was given) or
2055 # if we emitted a whole disconnected set of the graph (reached a
2056 # root). In that case we arbitrarily take the oldest known
2057 # subgroup. The heuristic could probably be better.
2058 #
2059 # Otherwise (elif clause) if the subgroup is blocked on
2060 # a revision we just emitted, we can safely emit it as
2061 # well.
2062 if not unblocked:
2063 if len(groups) > 1: # display other subset
2064 targetidx = 1
2065 gr = groups[1]
2066 elif not gr[1] & unblocked:
2067 gr = None
2068
2069 if gr is not None:
2070 # update the set of awaited revisions with the one from the
2071 # subgroup
2072 unblocked |= gr[1]
2073 # output all revisions in the subgroup
2074 for r in gr[0]:
2075 yield r
2076 # delete the subgroup that you just output
2077 # unless it is groups[0] in which case you just empty it.
2078 if targetidx:
2079 del groups[targetidx]
2080 else:
2081 gr[0][:] = []
2082 # Check if we have some subgroup waiting for revisions we are not going to
2083 # iterate over
2084 for g in groups:
2085 for r in g[0]:
2086 yield r
2087
1890 @predicate('subrepo([pattern])') 2088 @predicate('subrepo([pattern])')
1891 def subrepo(repo, subset, x): 2089 def subrepo(repo, subset, x):
1892 """Changesets that add, modify or remove the given subrepo. If no subrepo 2090 """Changesets that add, modify or remove the given subrepo. If no subrepo
1893 pattern is named, any subrepo changes are returned. 2091 pattern is named, any subrepo changes are returned.
1894 """ 2092 """