Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/fileset.py @ 38845:b9162ea1b815
fileset: extract language processing part to new module (API)
I'll add a couple more functions that work on parsed tree.
% wc -l mercurial/fileset*.py
559 mercurial/fileset.py
135 mercurial/filesetlang.py
694 total
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 22 Jul 2018 15:50:45 +0900 |
parents | d82c4d42b615 |
children | 4fe8d1f077b8 |
comparison
equal
deleted
inserted
replaced
38844:d82c4d42b615 | 38845:b9162ea1b815 |
---|---|
11 import re | 11 import re |
12 | 12 |
13 from .i18n import _ | 13 from .i18n import _ |
14 from . import ( | 14 from . import ( |
15 error, | 15 error, |
16 filesetlang, | |
16 match as matchmod, | 17 match as matchmod, |
17 merge, | 18 merge, |
18 parser, | |
19 pycompat, | 19 pycompat, |
20 registrar, | 20 registrar, |
21 scmutil, | 21 scmutil, |
22 util, | 22 util, |
23 ) | 23 ) |
24 from .utils import ( | 24 from .utils import ( |
25 stringutil, | 25 stringutil, |
26 ) | 26 ) |
27 | 27 |
28 elements = { | 28 # helpers for processing parsed tree |
29 # token-type: binding-strength, primary, prefix, infix, suffix | 29 getsymbol = filesetlang.getsymbol |
30 "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), | 30 getstring = filesetlang.getstring |
31 ":": (15, None, None, ("kindpat", 15), None), | 31 _getkindpat = filesetlang.getkindpat |
32 "-": (5, None, ("negate", 19), ("minus", 5), None), | 32 getpattern = filesetlang.getpattern |
33 "not": (10, None, ("not", 10), None, None), | 33 getargs = filesetlang.getargs |
34 "!": (10, None, ("not", 10), None, None), | |
35 "and": (5, None, None, ("and", 5), None), | |
36 "&": (5, None, None, ("and", 5), None), | |
37 "or": (4, None, None, ("or", 4), None), | |
38 "|": (4, None, None, ("or", 4), None), | |
39 "+": (4, None, None, ("or", 4), None), | |
40 ",": (2, None, None, ("list", 2), None), | |
41 ")": (0, None, None, None, None), | |
42 "symbol": (0, "symbol", None, None, None), | |
43 "string": (0, "string", None, None, None), | |
44 "end": (0, None, None, None, None), | |
45 } | |
46 | |
47 keywords = {'and', 'or', 'not'} | |
48 | |
49 globchars = ".*{}[]?/\\_" | |
50 | |
51 def tokenize(program): | |
52 pos, l = 0, len(program) | |
53 program = pycompat.bytestr(program) | |
54 while pos < l: | |
55 c = program[pos] | |
56 if c.isspace(): # skip inter-token whitespace | |
57 pass | |
58 elif c in "(),-:|&+!": # handle simple operators | |
59 yield (c, None, pos) | |
60 elif (c in '"\'' or c == 'r' and | |
61 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | |
62 if c == 'r': | |
63 pos += 1 | |
64 c = program[pos] | |
65 decode = lambda x: x | |
66 else: | |
67 decode = parser.unescapestr | |
68 pos += 1 | |
69 s = pos | |
70 while pos < l: # find closing quote | |
71 d = program[pos] | |
72 if d == '\\': # skip over escaped characters | |
73 pos += 2 | |
74 continue | |
75 if d == c: | |
76 yield ('string', decode(program[s:pos]), s) | |
77 break | |
78 pos += 1 | |
79 else: | |
80 raise error.ParseError(_("unterminated string"), s) | |
81 elif c.isalnum() or c in globchars or ord(c) > 127: | |
82 # gather up a symbol/keyword | |
83 s = pos | |
84 pos += 1 | |
85 while pos < l: # find end of symbol | |
86 d = program[pos] | |
87 if not (d.isalnum() or d in globchars or ord(d) > 127): | |
88 break | |
89 pos += 1 | |
90 sym = program[s:pos] | |
91 if sym in keywords: # operator keywords | |
92 yield (sym, None, s) | |
93 else: | |
94 yield ('symbol', sym, s) | |
95 pos -= 1 | |
96 else: | |
97 raise error.ParseError(_("syntax error"), pos) | |
98 pos += 1 | |
99 yield ('end', None, pos) | |
100 | |
101 def parse(expr): | |
102 p = parser.parser(elements) | |
103 tree, pos = p.parse(tokenize(expr)) | |
104 if pos != len(expr): | |
105 raise error.ParseError(_("invalid token"), pos) | |
106 return parser.simplifyinfixops(tree, {'list', 'or'}) | |
107 | |
108 def getsymbol(x): | |
109 if x and x[0] == 'symbol': | |
110 return x[1] | |
111 raise error.ParseError(_('not a symbol')) | |
112 | |
113 def getstring(x, err): | |
114 if x and (x[0] == 'string' or x[0] == 'symbol'): | |
115 return x[1] | |
116 raise error.ParseError(err) | |
117 | |
118 def _getkindpat(x, y, allkinds, err): | |
119 kind = getsymbol(x) | |
120 pat = getstring(y, err) | |
121 if kind not in allkinds: | |
122 raise error.ParseError(_("invalid pattern kind: %s") % kind) | |
123 return '%s:%s' % (kind, pat) | |
124 | |
125 def getpattern(x, allkinds, err): | |
126 if x and x[0] == 'kindpat': | |
127 return _getkindpat(x[1], x[2], allkinds, err) | |
128 return getstring(x, err) | |
129 | |
130 def getlist(x): | |
131 if not x: | |
132 return [] | |
133 if x[0] == 'list': | |
134 return list(x[1:]) | |
135 return [x] | |
136 | |
137 def getargs(x, min, max, err): | |
138 l = getlist(x) | |
139 if len(l) < min or len(l) > max: | |
140 raise error.ParseError(err) | |
141 return l | |
142 | 34 |
143 def getmatch(mctx, x): | 35 def getmatch(mctx, x): |
144 if not x: | 36 if not x: |
145 raise error.ParseError(_("missing argument")) | 37 raise error.ParseError(_("missing argument")) |
146 return methods[x[0]](mctx, *x[1:]) | 38 return methods[x[0]](mctx, *x[1:]) |
190 # symbols are callable like: | 82 # symbols are callable like: |
191 # fun(mctx, x) | 83 # fun(mctx, x) |
192 # with: | 84 # with: |
193 # mctx - current matchctx instance | 85 # mctx - current matchctx instance |
194 # x - argument in tree form | 86 # x - argument in tree form |
195 symbols = {} | 87 symbols = filesetlang.symbols |
196 | 88 |
197 # filesets using matchctx.status() | 89 # filesets using matchctx.status() |
198 _statuscallers = set() | 90 _statuscallers = set() |
199 | 91 |
200 predicate = registrar.filesetpredicate() | 92 predicate = registrar.filesetpredicate() |
633 return True | 525 return True |
634 return False | 526 return False |
635 | 527 |
636 def match(ctx, expr, badfn=None): | 528 def match(ctx, expr, badfn=None): |
637 """Create a matcher for a single fileset expression""" | 529 """Create a matcher for a single fileset expression""" |
638 tree = parse(expr) | 530 tree = filesetlang.parse(expr) |
639 mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn) | 531 mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn) |
640 return getmatch(mctx, tree) | 532 return getmatch(mctx, tree) |
641 | 533 |
642 def _buildstatus(ctx, tree, basectx=None): | 534 def _buildstatus(ctx, tree, basectx=None): |
643 # do we need status info? | 535 # do we need status info? |
651 return basectx.status(ctx, listunknown=unknown, listignored=ignored, | 543 return basectx.status(ctx, listunknown=unknown, listignored=ignored, |
652 listclean=True) | 544 listclean=True) |
653 else: | 545 else: |
654 return None | 546 return None |
655 | 547 |
656 def prettyformat(tree): | |
657 return parser.prettyformat(tree, ('string', 'symbol')) | |
658 | |
659 def loadpredicate(ui, extname, registrarobj): | 548 def loadpredicate(ui, extname, registrarobj): |
660 """Load fileset predicates from specified registrarobj | 549 """Load fileset predicates from specified registrarobj |
661 """ | 550 """ |
662 for name, func in registrarobj._table.iteritems(): | 551 for name, func in registrarobj._table.iteritems(): |
663 symbols[name] = func | 552 symbols[name] = func |