diff mercurial/revset.py @ 29001:923fa9e06ea0 stable

revset: make sort() do dumb multi-pass sorting for multiple keys (issue5218) Our invert() function was too clever to not take length into account. I could fix the problem by appending '\xff' as a terminator (opposite to '\0'), but it turned out to be slower than simple multi-pass sorting. New implementation is pretty straightforward, which just calls sort() from the last key. We can do that since Python sort() is guaranteed to be stable. It doesn't sound nice to call sort() multiple times, but actually it is faster. That's probably because we have fewer Python codes in hot loop, and can avoid heavy string and list manipulation. revset #0: sort(0:10000, 'branch') 0) 0.412753 1) 0.393254 revset #1: sort(0:10000, '-branch') 0) 0.455377 1) 0.389191 85% revset #2: sort(0:10000, 'date') 0) 0.408082 1) 0.376332 92% revset #3: sort(0:10000, '-date') 0) 0.406910 1) 0.380498 93% revset #4: sort(0:10000, 'desc branch user date rev') 0) 0.542996 1) 0.486397 89% revset #5: sort(0:10000, '-desc -branch -user -date -rev') 0) 0.965032 1) 0.518426 53%
author Yuya Nishihara <yuya@tcha.org>
date Sat, 23 Apr 2016 16:09:30 +0900
parents 1203159c8928
children ea794f2eb19d
line wrap: on
line diff
--- a/mercurial/revset.py	Thu Mar 24 22:55:56 2016 +0900
+++ b/mercurial/revset.py	Sat Apr 23 16:09:30 2016 +0900
@@ -1859,9 +1859,6 @@
 
     s = l[0]
     keys = keys.split()
-    l = []
-    def invert(s):
-        return "".join(chr(255 - ord(c)) for c in s)
     revs = getset(repo, subset, s)
     if keys == ["rev"]:
         revs.sort()
@@ -1869,36 +1866,33 @@
     elif keys == ["-rev"]:
         revs.sort(reverse=True)
         return revs
-    for r in revs:
-        c = repo[r]
-        e = []
-        for k in keys:
+    # sort() is guaranteed to be stable
+    ctxs = [repo[r] for r in revs]
+    if True:
+        for k in reversed(keys):
             if k == 'rev':
-                e.append(r)
+                ctxs.sort(key=lambda c: c.rev())
             elif k == '-rev':
-                e.append(-r)
+                ctxs.sort(key=lambda c: c.rev(), reverse=True)
             elif k == 'branch':
-                e.append(c.branch())
+                ctxs.sort(key=lambda c: c.branch())
             elif k == '-branch':
-                e.append(invert(c.branch()))
+                ctxs.sort(key=lambda c: c.branch(), reverse=True)
             elif k == 'desc':
-                e.append(c.description())
+                ctxs.sort(key=lambda c: c.description())
             elif k == '-desc':
-                e.append(invert(c.description()))
+                ctxs.sort(key=lambda c: c.description(), reverse=True)
             elif k in 'user author':
-                e.append(c.user())
+                ctxs.sort(key=lambda c: c.user())
             elif k in '-user -author':
-                e.append(invert(c.user()))
+                ctxs.sort(key=lambda c: c.user(), reverse=True)
             elif k == 'date':
-                e.append(c.date()[0])
+                ctxs.sort(key=lambda c: c.date()[0])
             elif k == '-date':
-                e.append(-c.date()[0])
+                ctxs.sort(key=lambda c: c.date()[0], reverse=True)
             else:
                 raise error.ParseError(_("unknown sort key %r") % k)
-        e.append(r)
-        l.append(e)
-    l.sort()
-    return baseset([e[-1] for e in l])
+    return baseset([c.rev() for c in ctxs])
 
 @predicate('subrepo([pattern])')
 def subrepo(repo, subset, x):