Mercurial > public > mercurial-scm > hg
annotate mercurial/minifileset.py @ 38865:899b4c74209c
fileset: combine union of basic patterns into single matcher
This appears to improve query performance in a big repository than I thought.
Writing less Python in a hot loop, faster computation we gain.
$ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*'
(orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000)
(new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 21 Jul 2018 17:19:12 +0900 |
parents | 7e7e2b2ff284 |
children | e79a69af1593 |
rev | line source |
---|---|
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
1 # minifileset.py - a simple language to select files |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
2 # |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
3 # Copyright 2017 Facebook, Inc. |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
4 # |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
7 |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
8 from __future__ import absolute_import |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
9 |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
10 from .i18n import _ |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
11 from . import ( |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
12 error, |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
13 fileset, |
38805
b9162ea1b815
fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents:
38804
diff
changeset
|
14 filesetlang, |
37876
9c98cb30f4de
minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents:
35800
diff
changeset
|
15 pycompat, |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
16 ) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
17 |
38687
1500cbe22d53
fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents:
37876
diff
changeset
|
18 def _sizep(x): |
1500cbe22d53
fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents:
37876
diff
changeset
|
19 # i18n: "size" is a keyword |
38805
b9162ea1b815
fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents:
38804
diff
changeset
|
20 expr = filesetlang.getstring(x, _("size requires an expression")) |
38687
1500cbe22d53
fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents:
37876
diff
changeset
|
21 return fileset.sizematcher(expr) |
1500cbe22d53
fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents:
37876
diff
changeset
|
22 |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
23 def _compile(tree): |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
24 if not tree: |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
25 raise error.ParseError(_("missing argument")) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
26 op = tree[0] |
35741
73432eee0ac4
fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents:
35740
diff
changeset
|
27 if op in {'symbol', 'string', 'kindpat'}: |
38805
b9162ea1b815
fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents:
38804
diff
changeset
|
28 name = filesetlang.getpattern(tree, {'path'}, _('invalid file pattern')) |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
29 if name.startswith('**'): # file extension test, ex. "**.tar.gz" |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
30 ext = name[2:] |
37876
9c98cb30f4de
minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents:
35800
diff
changeset
|
31 for c in pycompat.bytestr(ext): |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
32 if c in '*{}[]?/\\': |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
33 raise error.ParseError(_('reserved character: %s') % c) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
34 return lambda n, s: n.endswith(ext) |
35740
06a757b9e334
minifileset: unify handling of symbol and string patterns
Yuya Nishihara <yuya@tcha.org>
parents:
35691
diff
changeset
|
35 elif name.startswith('path:'): # directory or full path test |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
36 p = name[5:] # prefix |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
37 pl = len(p) |
37876
9c98cb30f4de
minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents:
35800
diff
changeset
|
38 f = lambda n, s: n.startswith(p) and (len(n) == pl |
9c98cb30f4de
minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents:
35800
diff
changeset
|
39 or n[pl:pl + 1] == '/') |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
40 return f |
35800
d5288b966e2f
minifileset: note the unsupported file pattern when raising a parse error
Matt Harbison <matt_harbison@yahoo.com>
parents:
35741
diff
changeset
|
41 raise error.ParseError(_("unsupported file pattern: %s") % name, |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
42 hint=_('paths must be prefixed with "path:"')) |
38865
899b4c74209c
fileset: combine union of basic patterns into single matcher
Yuya Nishihara <yuya@tcha.org>
parents:
38829
diff
changeset
|
43 elif op in {'or', 'patterns'}: |
38804
d82c4d42b615
fileset: flatten 'or' nodes to unnest unionmatchers
Yuya Nishihara <yuya@tcha.org>
parents:
38687
diff
changeset
|
44 funcs = [_compile(x) for x in tree[1:]] |
d82c4d42b615
fileset: flatten 'or' nodes to unnest unionmatchers
Yuya Nishihara <yuya@tcha.org>
parents:
38687
diff
changeset
|
45 return lambda n, s: any(f(n, s) for f in funcs) |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
46 elif op == 'and': |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
47 func1 = _compile(tree[1]) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
48 func2 = _compile(tree[2]) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
49 return lambda n, s: func1(n, s) and func2(n, s) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
50 elif op == 'not': |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
51 return lambda n, s: not _compile(tree[1])(n, s) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
52 elif op == 'func': |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
53 symbols = { |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
54 'all': lambda n, s: True, |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
55 'none': lambda n, s: False, |
38687
1500cbe22d53
fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents:
37876
diff
changeset
|
56 'size': lambda n, s: _sizep(tree[2])(s), |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
57 } |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
58 |
38805
b9162ea1b815
fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents:
38804
diff
changeset
|
59 name = filesetlang.getsymbol(tree[1]) |
35691
735f47b41521
fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents:
35616
diff
changeset
|
60 if name in symbols: |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
61 return symbols[name] |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
62 |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
63 raise error.UnknownIdentifier(name, symbols.keys()) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
64 elif op == 'minus': # equivalent to 'x and not y' |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
65 func1 = _compile(tree[1]) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
66 func2 = _compile(tree[2]) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
67 return lambda n, s: func1(n, s) and not func2(n, s) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
68 elif op == 'list': |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
69 raise error.ParseError(_("can't use a list in this context"), |
38810
4fe8d1f077b8
help: add quotes to a few commands we point to
Martin von Zweigbergk <martinvonz@google.com>
parents:
38805
diff
changeset
|
70 hint=_('see \'hg help "filesets.x or y"\'')) |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
71 raise error.ProgrammingError('illegal tree: %r' % (tree,)) |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
72 |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
73 def compile(text): |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
74 """generate a function (path, size) -> bool from filter specification. |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
75 |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
76 "text" could contain the operators defined by the fileset language for |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
77 common logic operations, and parenthesis for grouping. The supported path |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
78 tests are '**.extname' for file extension test, and '"path:dir/subdir"' |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
79 for prefix test. The ``size()`` predicate is borrowed from filesets to test |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
80 file size. The predicates ``all()`` and ``none()`` are also supported. |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
81 |
35741
73432eee0ac4
fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents:
35740
diff
changeset
|
82 '(**.php & size(">10MB")) | **.zip | (path:bin & !path:bin/README)' for |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
83 example, will catch all php files whose size is greater than 10 MB, all |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
84 files whose name ends with ".zip", and all files under "bin" in the repo |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
85 root except for "bin/README". |
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
86 """ |
38805
b9162ea1b815
fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents:
38804
diff
changeset
|
87 tree = filesetlang.parse(text) |
38826
6371ab78c3b3
fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents:
38810
diff
changeset
|
88 tree = filesetlang.analyze(tree) |
38829
7e7e2b2ff284
fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents:
38828
diff
changeset
|
89 tree = filesetlang.optimize(tree) |
35616
706aa203b396
fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff
changeset
|
90 return _compile(tree) |