Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revset.py @ 29001:923fa9e06ea0 stable
revset: make sort() do dumb multi-pass sorting for multiple keys (issue5218)
Our invert() function was too clever to not take length into account. I could
fix the problem by appending '\xff' as a terminator (opposite to '\0'), but
it turned out to be slower than simple multi-pass sorting.
New implementation is pretty straightforward, which just calls sort() from the
last key. We can do that since Python sort() is guaranteed to be stable. It
doesn't sound nice to call sort() multiple times, but actually it is faster.
That's probably because we have fewer Python codes in hot loop, and can avoid
heavy string and list manipulation.
revset #0: sort(0:10000, 'branch')
0) 0.412753
1) 0.393254
revset #1: sort(0:10000, '-branch')
0) 0.455377
1) 0.389191 85%
revset #2: sort(0:10000, 'date')
0) 0.408082
1) 0.376332 92%
revset #3: sort(0:10000, '-date')
0) 0.406910
1) 0.380498 93%
revset #4: sort(0:10000, 'desc branch user date rev')
0) 0.542996
1) 0.486397 89%
revset #5: sort(0:10000, '-desc -branch -user -date -rev')
0) 0.965032
1) 0.518426 53%
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 23 Apr 2016 16:09:30 +0900 |
parents | 1203159c8928 |
children | ea794f2eb19d |
comparison
equal
deleted
inserted
replaced
29000:2d3837a4bded | 29001:923fa9e06ea0 |
---|---|
1857 # i18n: "sort" is a keyword | 1857 # i18n: "sort" is a keyword |
1858 keys = getstring(l[1], _("sort spec must be a string")) | 1858 keys = getstring(l[1], _("sort spec must be a string")) |
1859 | 1859 |
1860 s = l[0] | 1860 s = l[0] |
1861 keys = keys.split() | 1861 keys = keys.split() |
1862 l = [] | |
1863 def invert(s): | |
1864 return "".join(chr(255 - ord(c)) for c in s) | |
1865 revs = getset(repo, subset, s) | 1862 revs = getset(repo, subset, s) |
1866 if keys == ["rev"]: | 1863 if keys == ["rev"]: |
1867 revs.sort() | 1864 revs.sort() |
1868 return revs | 1865 return revs |
1869 elif keys == ["-rev"]: | 1866 elif keys == ["-rev"]: |
1870 revs.sort(reverse=True) | 1867 revs.sort(reverse=True) |
1871 return revs | 1868 return revs |
1872 for r in revs: | 1869 # sort() is guaranteed to be stable |
1873 c = repo[r] | 1870 ctxs = [repo[r] for r in revs] |
1874 e = [] | 1871 if True: |
1875 for k in keys: | 1872 for k in reversed(keys): |
1876 if k == 'rev': | 1873 if k == 'rev': |
1877 e.append(r) | 1874 ctxs.sort(key=lambda c: c.rev()) |
1878 elif k == '-rev': | 1875 elif k == '-rev': |
1879 e.append(-r) | 1876 ctxs.sort(key=lambda c: c.rev(), reverse=True) |
1880 elif k == 'branch': | 1877 elif k == 'branch': |
1881 e.append(c.branch()) | 1878 ctxs.sort(key=lambda c: c.branch()) |
1882 elif k == '-branch': | 1879 elif k == '-branch': |
1883 e.append(invert(c.branch())) | 1880 ctxs.sort(key=lambda c: c.branch(), reverse=True) |
1884 elif k == 'desc': | 1881 elif k == 'desc': |
1885 e.append(c.description()) | 1882 ctxs.sort(key=lambda c: c.description()) |
1886 elif k == '-desc': | 1883 elif k == '-desc': |
1887 e.append(invert(c.description())) | 1884 ctxs.sort(key=lambda c: c.description(), reverse=True) |
1888 elif k in 'user author': | 1885 elif k in 'user author': |
1889 e.append(c.user()) | 1886 ctxs.sort(key=lambda c: c.user()) |
1890 elif k in '-user -author': | 1887 elif k in '-user -author': |
1891 e.append(invert(c.user())) | 1888 ctxs.sort(key=lambda c: c.user(), reverse=True) |
1892 elif k == 'date': | 1889 elif k == 'date': |
1893 e.append(c.date()[0]) | 1890 ctxs.sort(key=lambda c: c.date()[0]) |
1894 elif k == '-date': | 1891 elif k == '-date': |
1895 e.append(-c.date()[0]) | 1892 ctxs.sort(key=lambda c: c.date()[0], reverse=True) |
1896 else: | 1893 else: |
1897 raise error.ParseError(_("unknown sort key %r") % k) | 1894 raise error.ParseError(_("unknown sort key %r") % k) |
1898 e.append(r) | 1895 return baseset([c.rev() for c in ctxs]) |
1899 l.append(e) | |
1900 l.sort() | |
1901 return baseset([e[-1] for e in l]) | |
1902 | 1896 |
1903 @predicate('subrepo([pattern])') | 1897 @predicate('subrepo([pattern])') |
1904 def subrepo(repo, subset, x): | 1898 def subrepo(repo, subset, x): |
1905 """Changesets that add, modify or remove the given subrepo. If no subrepo | 1899 """Changesets that add, modify or remove the given subrepo. If no subrepo |
1906 pattern is named, any subrepo changes are returned. | 1900 pattern is named, any subrepo changes are returned. |