Mercurial > public > mercurial-scm > hg-stable
diff mercurial/filesetlang.py @ 38904:899b4c74209c
fileset: combine union of basic patterns into single matcher
This appears to improve query performance in a big repository than I thought.
Writing less Python in a hot loop, faster computation we gain.
$ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*'
(orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000)
(new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 21 Jul 2018 17:19:12 +0900 |
parents | 73731fa8d1bd |
children | e79a69af1593 |
line wrap: on
line diff
--- a/mercurial/filesetlang.py Sat Jul 21 17:13:34 2018 +0900 +++ b/mercurial/filesetlang.py Sat Jul 21 17:19:12 2018 +0900 @@ -185,6 +185,21 @@ return ('minus', ta, tb[1]) return (op, ta, tb) +def _optimizeunion(xs): + # collect string patterns so they can be compiled into a single regexp + ws, ts, ss = [], [], [] + for x in xs: + w, t = _optimize(x) + if t is not None and t[0] in {'string', 'symbol', 'kindpat'}: + ss.append(t) + continue + ws.append(w) + ts.append(t) + if ss: + ws.append(WEIGHT_CHECK_FILENAME) + ts.append(('patterns',) + tuple(ss)) + return ws, ts + def _optimize(x): if x is None: return 0, x @@ -206,7 +221,9 @@ else: return wb, _optimizeandops(op, tb, ta) if op == 'or': - ws, ts = zip(*(_optimize(y) for y in x[1:])) + ws, ts = _optimizeunion(x[1:]) + if len(ts) == 1: + return ws[0], ts[0] # 'or' operation is fully optimized out ts = tuple(it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]])) return max(ws), (op,) + ts