diff mercurial/revset.py @ 25343:7fbef7932af9

revset: optimize 'or' operation of trivial revisions to a list As seen in issue4565 and issue4624, GUI wrappers and automated scripts are likely to generate a long query that just has numeric revisions joined by 'or'. One reason why is that they allows users to choose arbitrary revisions from a list. Because this use case isn't handled well by smartset, let's optimize it to a plain old list. Benchmarks: 1) reduce nesting of chained 'or' operations 2) optimize to a list (this patch) revset #0: 0 + 1 + 2 + ... + 1000 1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20) 2) wall 0.025393 comb 0.020000 user 0.020000 sys 0.000000 (best of 107) revset #1: sort(0 + 1 + 2 + ... + 1000) 1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100) 2) wall 0.026432 comb 0.030000 user 0.030000 sys 0.000000 (best of 102) revset #2: first(0 + 1 + 2 + ... + 1000) 1) wall 0.028949 comb 0.030000 user 0.030000 sys 0.000000 (best of 100) 2) wall 0.025503 comb 0.030000 user 0.030000 sys 0.000000 (best of 106)
author Yuya Nishihara <yuya@tcha.org>
date Sun, 17 May 2015 15:11:38 +0900
parents 5dde117269b6
children ceaf04bb14ff
line wrap: on
line diff
--- a/mercurial/revset.py	Fri May 29 21:31:00 2015 +0900
+++ b/mercurial/revset.py	Sun May 17 15:11:38 2015 +0900
@@ -2169,11 +2169,36 @@
             return w, (op, tb, ta)
         return w, (op, ta, tb)
     elif op == 'or':
-        ws, ts = zip(*[optimize(y, False) for y in x[1:]])
+        # fast path for machine-generated expression, that is likely to have
+        # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
+        ws, ts, ss = [], [], []
+        def flushss():
+            if not ss:
+                return
+            if len(ss) == 1:
+                w, t = ss[0]
+            else:
+                s = '\0'.join(t[1] for w, t in ss)
+                y = ('func', ('symbol', '_list'), ('string', s))
+                w, t = optimize(y, False)
+            ws.append(w)
+            ts.append(t)
+            del ss[:]
+        for y in x[1:]:
+            w, t = optimize(y, False)
+            if t[0] == 'string' or t[0] == 'symbol':
+                ss.append((w, t))
+                continue
+            flushss()
+            ws.append(w)
+            ts.append(t)
+        flushss()
+        if len(ts) == 1:
+            return ws[0], ts[0] # 'or' operation is fully optimized out
         # we can't reorder trees by weight because it would change the order.
         # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
         #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
-        return max(ws), (op,) + ts
+        return max(ws), (op,) + tuple(ts)
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):