Mercurial > public > mercurial-scm > hg
comparison mercurial/revset.py @ 31024:0b8356705de6
revset: split language services to revsetlang module (API)
New revsetlang module hosts parser, tokenizer, and miscellaneous functions
working on parsed tree. It does not include functions for evaluation such as
getset() and match().
2288 mercurial/revset.py
684 mercurial/revsetlang.py
2972 total
get*() functions are aliased since they are common in revset.py.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 19 Feb 2017 18:19:33 +0900 |
parents | 17b5cda5a84a |
children | 0e07855e6054 |
comparison
equal
deleted
inserted
replaced
31023:aea06029919e | 31024:0b8356705de6 |
---|---|
7 | 7 |
8 from __future__ import absolute_import | 8 from __future__ import absolute_import |
9 | 9 |
10 import heapq | 10 import heapq |
11 import re | 11 import re |
12 import string | |
13 | 12 |
14 from .i18n import _ | 13 from .i18n import _ |
15 from . import ( | 14 from . import ( |
16 destutil, | 15 destutil, |
17 encoding, | 16 encoding, |
18 error, | 17 error, |
19 hbisect, | 18 hbisect, |
20 match as matchmod, | 19 match as matchmod, |
21 node, | 20 node, |
22 obsolete as obsmod, | 21 obsolete as obsmod, |
23 parser, | |
24 pathutil, | 22 pathutil, |
25 phases, | 23 phases, |
26 pycompat, | |
27 registrar, | 24 registrar, |
28 repoview, | 25 repoview, |
26 revsetlang, | |
29 smartset, | 27 smartset, |
30 util, | 28 util, |
31 ) | 29 ) |
30 | |
31 # helpers for processing parsed tree | |
32 getsymbol = revsetlang.getsymbol | |
33 getstring = revsetlang.getstring | |
34 getinteger = revsetlang.getinteger | |
35 getlist = revsetlang.getlist | |
36 getrange = revsetlang.getrange | |
37 getargs = revsetlang.getargs | |
38 getargsdict = revsetlang.getargsdict | |
39 | |
40 # constants used as an argument of match() and matchany() | |
41 anyorder = revsetlang.anyorder | |
42 defineorder = revsetlang.defineorder | |
43 followorder = revsetlang.followorder | |
32 | 44 |
33 baseset = smartset.baseset | 45 baseset = smartset.baseset |
34 generatorset = smartset.generatorset | 46 generatorset = smartset.generatorset |
35 spanset = smartset.spanset | 47 spanset = smartset.spanset |
36 fullreposet = smartset.fullreposet | 48 fullreposet = smartset.fullreposet |
150 revs = _reachablerootspure(repo, minroot, roots, heads, includepath) | 162 revs = _reachablerootspure(repo, minroot, roots, heads, includepath) |
151 revs = baseset(revs) | 163 revs = baseset(revs) |
152 revs.sort() | 164 revs.sort() |
153 return revs | 165 return revs |
154 | 166 |
155 elements = { | |
156 # token-type: binding-strength, primary, prefix, infix, suffix | |
157 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), | |
158 "##": (20, None, None, ("_concat", 20), None), | |
159 "~": (18, None, None, ("ancestor", 18), None), | |
160 "^": (18, None, None, ("parent", 18), "parentpost"), | |
161 "-": (5, None, ("negate", 19), ("minus", 5), None), | |
162 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"), | |
163 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"), | |
164 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"), | |
165 "not": (10, None, ("not", 10), None, None), | |
166 "!": (10, None, ("not", 10), None, None), | |
167 "and": (5, None, None, ("and", 5), None), | |
168 "&": (5, None, None, ("and", 5), None), | |
169 "%": (5, None, None, ("only", 5), "onlypost"), | |
170 "or": (4, None, None, ("or", 4), None), | |
171 "|": (4, None, None, ("or", 4), None), | |
172 "+": (4, None, None, ("or", 4), None), | |
173 "=": (3, None, None, ("keyvalue", 3), None), | |
174 ",": (2, None, None, ("list", 2), None), | |
175 ")": (0, None, None, None, None), | |
176 "symbol": (0, "symbol", None, None, None), | |
177 "string": (0, "string", None, None, None), | |
178 "end": (0, None, None, None, None), | |
179 } | |
180 | |
181 keywords = set(['and', 'or', 'not']) | |
182 | |
183 # default set of valid characters for the initial letter of symbols | |
184 _syminitletters = set( | |
185 string.ascii_letters + | |
186 string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256))) | |
187 | |
188 # default set of valid characters for non-initial letters of symbols | |
189 _symletters = _syminitletters | set(pycompat.sysstr('-/')) | |
190 | |
191 def tokenize(program, lookup=None, syminitletters=None, symletters=None): | |
192 ''' | |
193 Parse a revset statement into a stream of tokens | |
194 | |
195 ``syminitletters`` is the set of valid characters for the initial | |
196 letter of symbols. | |
197 | |
198 By default, character ``c`` is recognized as valid for initial | |
199 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``. | |
200 | |
201 ``symletters`` is the set of valid characters for non-initial | |
202 letters of symbols. | |
203 | |
204 By default, character ``c`` is recognized as valid for non-initial | |
205 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``. | |
206 | |
207 Check that @ is a valid unquoted token character (issue3686): | |
208 >>> list(tokenize("@::")) | |
209 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] | |
210 | |
211 ''' | |
212 if syminitletters is None: | |
213 syminitletters = _syminitletters | |
214 if symletters is None: | |
215 symletters = _symletters | |
216 | |
217 if program and lookup: | |
218 # attempt to parse old-style ranges first to deal with | |
219 # things like old-tag which contain query metacharacters | |
220 parts = program.split(':', 1) | |
221 if all(lookup(sym) for sym in parts if sym): | |
222 if parts[0]: | |
223 yield ('symbol', parts[0], 0) | |
224 if len(parts) > 1: | |
225 s = len(parts[0]) | |
226 yield (':', None, s) | |
227 if parts[1]: | |
228 yield ('symbol', parts[1], s + 1) | |
229 yield ('end', None, len(program)) | |
230 return | |
231 | |
232 pos, l = 0, len(program) | |
233 while pos < l: | |
234 c = program[pos] | |
235 if c.isspace(): # skip inter-token whitespace | |
236 pass | |
237 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully | |
238 yield ('::', None, pos) | |
239 pos += 1 # skip ahead | |
240 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully | |
241 yield ('..', None, pos) | |
242 pos += 1 # skip ahead | |
243 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully | |
244 yield ('##', None, pos) | |
245 pos += 1 # skip ahead | |
246 elif c in "():=,-|&+!~^%": # handle simple operators | |
247 yield (c, None, pos) | |
248 elif (c in '"\'' or c == 'r' and | |
249 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | |
250 if c == 'r': | |
251 pos += 1 | |
252 c = program[pos] | |
253 decode = lambda x: x | |
254 else: | |
255 decode = parser.unescapestr | |
256 pos += 1 | |
257 s = pos | |
258 while pos < l: # find closing quote | |
259 d = program[pos] | |
260 if d == '\\': # skip over escaped characters | |
261 pos += 2 | |
262 continue | |
263 if d == c: | |
264 yield ('string', decode(program[s:pos]), s) | |
265 break | |
266 pos += 1 | |
267 else: | |
268 raise error.ParseError(_("unterminated string"), s) | |
269 # gather up a symbol/keyword | |
270 elif c in syminitletters: | |
271 s = pos | |
272 pos += 1 | |
273 while pos < l: # find end of symbol | |
274 d = program[pos] | |
275 if d not in symletters: | |
276 break | |
277 if d == '.' and program[pos - 1] == '.': # special case for .. | |
278 pos -= 1 | |
279 break | |
280 pos += 1 | |
281 sym = program[s:pos] | |
282 if sym in keywords: # operator keywords | |
283 yield (sym, None, s) | |
284 elif '-' in sym: | |
285 # some jerk gave us foo-bar-baz, try to check if it's a symbol | |
286 if lookup and lookup(sym): | |
287 # looks like a real symbol | |
288 yield ('symbol', sym, s) | |
289 else: | |
290 # looks like an expression | |
291 parts = sym.split('-') | |
292 for p in parts[:-1]: | |
293 if p: # possible consecutive - | |
294 yield ('symbol', p, s) | |
295 s += len(p) | |
296 yield ('-', None, pos) | |
297 s += 1 | |
298 if parts[-1]: # possible trailing - | |
299 yield ('symbol', parts[-1], s) | |
300 else: | |
301 yield ('symbol', sym, s) | |
302 pos -= 1 | |
303 else: | |
304 raise error.ParseError(_("syntax error in revset '%s'") % | |
305 program, pos) | |
306 pos += 1 | |
307 yield ('end', None, pos) | |
308 | |
309 # helpers | 167 # helpers |
310 | |
311 _notset = object() | |
312 | |
313 def getsymbol(x): | |
314 if x and x[0] == 'symbol': | |
315 return x[1] | |
316 raise error.ParseError(_('not a symbol')) | |
317 | |
318 def getstring(x, err): | |
319 if x and (x[0] == 'string' or x[0] == 'symbol'): | |
320 return x[1] | |
321 raise error.ParseError(err) | |
322 | |
323 def getinteger(x, err, default=_notset): | |
324 if not x and default is not _notset: | |
325 return default | |
326 try: | |
327 return int(getstring(x, err)) | |
328 except ValueError: | |
329 raise error.ParseError(err) | |
330 | |
331 def getlist(x): | |
332 if not x: | |
333 return [] | |
334 if x[0] == 'list': | |
335 return list(x[1:]) | |
336 return [x] | |
337 | |
338 def getrange(x, err): | |
339 if not x: | |
340 raise error.ParseError(err) | |
341 op = x[0] | |
342 if op == 'range': | |
343 return x[1], x[2] | |
344 elif op == 'rangepre': | |
345 return None, x[1] | |
346 elif op == 'rangepost': | |
347 return x[1], None | |
348 elif op == 'rangeall': | |
349 return None, None | |
350 raise error.ParseError(err) | |
351 | |
352 def getargs(x, min, max, err): | |
353 l = getlist(x) | |
354 if len(l) < min or (max >= 0 and len(l) > max): | |
355 raise error.ParseError(err) | |
356 return l | |
357 | |
358 def getargsdict(x, funcname, keys): | |
359 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), | |
360 keyvaluenode='keyvalue', keynode='symbol') | |
361 | 168 |
362 def getset(repo, subset, x): | 169 def getset(repo, subset, x): |
363 if not x: | 170 if not x: |
364 raise error.ParseError(_("missing argument")) | 171 raise error.ParseError(_("missing argument")) |
365 s = methods[x[0]](repo, subset, *x[1:]) | 172 s = methods[x[0]](repo, subset, *x[1:]) |
2410 "ancestor": ancestorspec, | 2217 "ancestor": ancestorspec, |
2411 "parent": parentspec, | 2218 "parent": parentspec, |
2412 "parentpost": parentpost, | 2219 "parentpost": parentpost, |
2413 } | 2220 } |
2414 | 2221 |
2415 # Constants for ordering requirement, used in _analyze(): | |
2416 # | |
2417 # If 'define', any nested functions and operations can change the ordering of | |
2418 # the entries in the set. If 'follow', any nested functions and operations | |
2419 # should take the ordering specified by the first operand to the '&' operator. | |
2420 # | |
2421 # For instance, | |
2422 # | |
2423 # X & (Y | Z) | |
2424 # ^ ^^^^^^^ | |
2425 # | follow | |
2426 # define | |
2427 # | |
2428 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order | |
2429 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't. | |
2430 # | |
2431 # 'any' means the order doesn't matter. For instance, | |
2432 # | |
2433 # X & !Y | |
2434 # ^ | |
2435 # any | |
2436 # | |
2437 # 'y()' can either enforce its ordering requirement or take the ordering | |
2438 # specified by 'x()' because 'not()' doesn't care the order. | |
2439 # | |
2440 # Transition of ordering requirement: | |
2441 # | |
2442 # 1. starts with 'define' | |
2443 # 2. shifts to 'follow' by 'x & y' | |
2444 # 3. changes back to 'define' on function call 'f(x)' or function-like | |
2445 # operation 'x (f) y' because 'f' may have its own ordering requirement | |
2446 # for 'x' and 'y' (e.g. 'first(x)') | |
2447 # | |
2448 anyorder = 'any' # don't care the order | |
2449 defineorder = 'define' # should define the order | |
2450 followorder = 'follow' # must follow the current order | |
2451 | |
2452 # transition table for 'x & y', from the current expression 'x' to 'y' | |
2453 _tofolloworder = { | |
2454 anyorder: anyorder, | |
2455 defineorder: followorder, | |
2456 followorder: followorder, | |
2457 } | |
2458 | |
2459 def _matchonly(revs, bases): | |
2460 """ | |
2461 >>> f = lambda *args: _matchonly(*map(parse, args)) | |
2462 >>> f('ancestors(A)', 'not ancestors(B)') | |
2463 ('list', ('symbol', 'A'), ('symbol', 'B')) | |
2464 """ | |
2465 if (revs is not None | |
2466 and revs[0] == 'func' | |
2467 and getsymbol(revs[1]) == 'ancestors' | |
2468 and bases is not None | |
2469 and bases[0] == 'not' | |
2470 and bases[1][0] == 'func' | |
2471 and getsymbol(bases[1][1]) == 'ancestors'): | |
2472 return ('list', revs[2], bases[1][2]) | |
2473 | |
2474 def _fixops(x): | |
2475 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be | |
2476 handled well by our simple top-down parser""" | |
2477 if not isinstance(x, tuple): | |
2478 return x | |
2479 | |
2480 op = x[0] | |
2481 if op == 'parent': | |
2482 # x^:y means (x^) : y, not x ^ (:y) | |
2483 # x^: means (x^) :, not x ^ (:) | |
2484 post = ('parentpost', x[1]) | |
2485 if x[2][0] == 'dagrangepre': | |
2486 return _fixops(('dagrange', post, x[2][1])) | |
2487 elif x[2][0] == 'rangepre': | |
2488 return _fixops(('range', post, x[2][1])) | |
2489 elif x[2][0] == 'rangeall': | |
2490 return _fixops(('rangepost', post)) | |
2491 elif op == 'or': | |
2492 # make number of arguments deterministic: | |
2493 # x + y + z -> (or x y z) -> (or (list x y z)) | |
2494 return (op, _fixops(('list',) + x[1:])) | |
2495 | |
2496 return (op,) + tuple(_fixops(y) for y in x[1:]) | |
2497 | |
2498 def _analyze(x, order): | |
2499 if x is None: | |
2500 return x | |
2501 | |
2502 op = x[0] | |
2503 if op == 'minus': | |
2504 return _analyze(('and', x[1], ('not', x[2])), order) | |
2505 elif op == 'only': | |
2506 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) | |
2507 return _analyze(t, order) | |
2508 elif op == 'onlypost': | |
2509 return _analyze(('func', ('symbol', 'only'), x[1]), order) | |
2510 elif op == 'dagrangepre': | |
2511 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order) | |
2512 elif op == 'dagrangepost': | |
2513 return _analyze(('func', ('symbol', 'descendants'), x[1]), order) | |
2514 elif op == 'negate': | |
2515 s = getstring(x[1], _("can't negate that")) | |
2516 return _analyze(('string', '-' + s), order) | |
2517 elif op in ('string', 'symbol'): | |
2518 return x | |
2519 elif op == 'and': | |
2520 ta = _analyze(x[1], order) | |
2521 tb = _analyze(x[2], _tofolloworder[order]) | |
2522 return (op, ta, tb, order) | |
2523 elif op == 'or': | |
2524 return (op, _analyze(x[1], order), order) | |
2525 elif op == 'not': | |
2526 return (op, _analyze(x[1], anyorder), order) | |
2527 elif op == 'rangeall': | |
2528 return (op, None, order) | |
2529 elif op in ('rangepre', 'rangepost', 'parentpost'): | |
2530 return (op, _analyze(x[1], defineorder), order) | |
2531 elif op == 'group': | |
2532 return _analyze(x[1], order) | |
2533 elif op in ('dagrange', 'range', 'parent', 'ancestor'): | |
2534 ta = _analyze(x[1], defineorder) | |
2535 tb = _analyze(x[2], defineorder) | |
2536 return (op, ta, tb, order) | |
2537 elif op == 'list': | |
2538 return (op,) + tuple(_analyze(y, order) for y in x[1:]) | |
2539 elif op == 'keyvalue': | |
2540 return (op, x[1], _analyze(x[2], order)) | |
2541 elif op == 'func': | |
2542 f = getsymbol(x[1]) | |
2543 d = defineorder | |
2544 if f == 'present': | |
2545 # 'present(set)' is known to return the argument set with no | |
2546 # modification, so forward the current order to its argument | |
2547 d = order | |
2548 return (op, x[1], _analyze(x[2], d), order) | |
2549 raise ValueError('invalid operator %r' % op) | |
2550 | |
2551 def analyze(x, order=defineorder): | |
2552 """Transform raw parsed tree to evaluatable tree which can be fed to | |
2553 optimize() or getset() | |
2554 | |
2555 All pseudo operations should be mapped to real operations or functions | |
2556 defined in methods or symbols table respectively. | |
2557 | |
2558 'order' specifies how the current expression 'x' is ordered (see the | |
2559 constants defined above.) | |
2560 """ | |
2561 return _analyze(x, order) | |
2562 | |
2563 def _optimize(x, small): | |
2564 if x is None: | |
2565 return 0, x | |
2566 | |
2567 smallbonus = 1 | |
2568 if small: | |
2569 smallbonus = .5 | |
2570 | |
2571 op = x[0] | |
2572 if op in ('string', 'symbol'): | |
2573 return smallbonus, x # single revisions are small | |
2574 elif op == 'and': | |
2575 wa, ta = _optimize(x[1], True) | |
2576 wb, tb = _optimize(x[2], True) | |
2577 order = x[3] | |
2578 w = min(wa, wb) | |
2579 | |
2580 # (::x and not ::y)/(not ::y and ::x) have a fast path | |
2581 tm = _matchonly(ta, tb) or _matchonly(tb, ta) | |
2582 if tm: | |
2583 return w, ('func', ('symbol', 'only'), tm, order) | |
2584 | |
2585 if tb is not None and tb[0] == 'not': | |
2586 return wa, ('difference', ta, tb[1], order) | |
2587 | |
2588 if wa > wb: | |
2589 return w, (op, tb, ta, order) | |
2590 return w, (op, ta, tb, order) | |
2591 elif op == 'or': | |
2592 # fast path for machine-generated expression, that is likely to have | |
2593 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' | |
2594 order = x[2] | |
2595 ws, ts, ss = [], [], [] | |
2596 def flushss(): | |
2597 if not ss: | |
2598 return | |
2599 if len(ss) == 1: | |
2600 w, t = ss[0] | |
2601 else: | |
2602 s = '\0'.join(t[1] for w, t in ss) | |
2603 y = ('func', ('symbol', '_list'), ('string', s), order) | |
2604 w, t = _optimize(y, False) | |
2605 ws.append(w) | |
2606 ts.append(t) | |
2607 del ss[:] | |
2608 for y in getlist(x[1]): | |
2609 w, t = _optimize(y, False) | |
2610 if t is not None and (t[0] == 'string' or t[0] == 'symbol'): | |
2611 ss.append((w, t)) | |
2612 continue | |
2613 flushss() | |
2614 ws.append(w) | |
2615 ts.append(t) | |
2616 flushss() | |
2617 if len(ts) == 1: | |
2618 return ws[0], ts[0] # 'or' operation is fully optimized out | |
2619 # we can't reorder trees by weight because it would change the order. | |
2620 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a") | |
2621 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0])) | |
2622 return max(ws), (op, ('list',) + tuple(ts), order) | |
2623 elif op == 'not': | |
2624 # Optimize not public() to _notpublic() because we have a fast version | |
2625 if x[1][:3] == ('func', ('symbol', 'public'), None): | |
2626 order = x[1][3] | |
2627 newsym = ('func', ('symbol', '_notpublic'), None, order) | |
2628 o = _optimize(newsym, not small) | |
2629 return o[0], o[1] | |
2630 else: | |
2631 o = _optimize(x[1], not small) | |
2632 order = x[2] | |
2633 return o[0], (op, o[1], order) | |
2634 elif op == 'rangeall': | |
2635 return smallbonus, x | |
2636 elif op in ('rangepre', 'rangepost', 'parentpost'): | |
2637 o = _optimize(x[1], small) | |
2638 order = x[2] | |
2639 return o[0], (op, o[1], order) | |
2640 elif op in ('dagrange', 'range', 'parent', 'ancestor'): | |
2641 wa, ta = _optimize(x[1], small) | |
2642 wb, tb = _optimize(x[2], small) | |
2643 order = x[3] | |
2644 return wa + wb, (op, ta, tb, order) | |
2645 elif op == 'list': | |
2646 ws, ts = zip(*(_optimize(y, small) for y in x[1:])) | |
2647 return sum(ws), (op,) + ts | |
2648 elif op == 'keyvalue': | |
2649 w, t = _optimize(x[2], small) | |
2650 return w, (op, x[1], t) | |
2651 elif op == 'func': | |
2652 f = getsymbol(x[1]) | |
2653 wa, ta = _optimize(x[2], small) | |
2654 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep', | |
2655 'keyword', 'outgoing', 'user', 'destination'): | |
2656 w = 10 # slow | |
2657 elif f in ('modifies', 'adds', 'removes'): | |
2658 w = 30 # slower | |
2659 elif f == "contains": | |
2660 w = 100 # very slow | |
2661 elif f == "ancestor": | |
2662 w = 1 * smallbonus | |
2663 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'): | |
2664 w = 0 | |
2665 elif f == "sort": | |
2666 w = 10 # assume most sorts look at changelog | |
2667 else: | |
2668 w = 1 | |
2669 order = x[3] | |
2670 return w + wa, (op, x[1], ta, order) | |
2671 raise ValueError('invalid operator %r' % op) | |
2672 | |
2673 def optimize(tree): | |
2674 """Optimize evaluatable tree | |
2675 | |
2676 All pseudo operations should be transformed beforehand. | |
2677 """ | |
2678 _weight, newtree = _optimize(tree, small=True) | |
2679 return newtree | |
2680 | |
2681 # the set of valid characters for the initial letter of symbols in | |
2682 # alias declarations and definitions | |
2683 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$')) | |
2684 | |
2685 def _parsewith(spec, lookup=None, syminitletters=None): | |
2686 """Generate a parse tree of given spec with given tokenizing options | |
2687 | |
2688 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters) | |
2689 ('func', ('symbol', 'foo'), ('symbol', '$1')) | |
2690 >>> _parsewith('$1') | |
2691 Traceback (most recent call last): | |
2692 ... | |
2693 ParseError: ("syntax error in revset '$1'", 0) | |
2694 >>> _parsewith('foo bar') | |
2695 Traceback (most recent call last): | |
2696 ... | |
2697 ParseError: ('invalid token', 4) | |
2698 """ | |
2699 p = parser.parser(elements) | |
2700 tree, pos = p.parse(tokenize(spec, lookup=lookup, | |
2701 syminitletters=syminitletters)) | |
2702 if pos != len(spec): | |
2703 raise error.ParseError(_('invalid token'), pos) | |
2704 return _fixops(parser.simplifyinfixops(tree, ('list', 'or'))) | |
2705 | |
2706 class _aliasrules(parser.basealiasrules): | |
2707 """Parsing and expansion rule set of revset aliases""" | |
2708 _section = _('revset alias') | |
2709 | |
2710 @staticmethod | |
2711 def _parse(spec): | |
2712 """Parse alias declaration/definition ``spec`` | |
2713 | |
2714 This allows symbol names to use also ``$`` as an initial letter | |
2715 (for backward compatibility), and callers of this function should | |
2716 examine whether ``$`` is used also for unexpected symbols or not. | |
2717 """ | |
2718 return _parsewith(spec, syminitletters=_aliassyminitletters) | |
2719 | |
2720 @staticmethod | |
2721 def _trygetfunc(tree): | |
2722 if tree[0] == 'func' and tree[1][0] == 'symbol': | |
2723 return tree[1][1], getlist(tree[2]) | |
2724 | |
2725 def expandaliases(ui, tree): | |
2726 aliases = _aliasrules.buildmap(ui.configitems('revsetalias')) | |
2727 tree = _aliasrules.expand(aliases, tree) | |
2728 # warn about problematic (but not referred) aliases | |
2729 for name, alias in sorted(aliases.iteritems()): | |
2730 if alias.error and not alias.warned: | |
2731 ui.warn(_('warning: %s\n') % (alias.error)) | |
2732 alias.warned = True | |
2733 return tree | |
2734 | |
2735 def foldconcat(tree): | |
2736 """Fold elements to be concatenated by `##` | |
2737 """ | |
2738 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | |
2739 return tree | |
2740 if tree[0] == '_concat': | |
2741 pending = [tree] | |
2742 l = [] | |
2743 while pending: | |
2744 e = pending.pop() | |
2745 if e[0] == '_concat': | |
2746 pending.extend(reversed(e[1:])) | |
2747 elif e[0] in ('string', 'symbol'): | |
2748 l.append(e[1]) | |
2749 else: | |
2750 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0]) | |
2751 raise error.ParseError(msg) | |
2752 return ('string', ''.join(l)) | |
2753 else: | |
2754 return tuple(foldconcat(t) for t in tree) | |
2755 | |
2756 def parse(spec, lookup=None): | |
2757 return _parsewith(spec, lookup=lookup) | |
2758 | |
2759 def posttreebuilthook(tree, repo): | 2222 def posttreebuilthook(tree, repo): |
2760 # hook for extensions to execute code on the optimized tree | 2223 # hook for extensions to execute code on the optimized tree |
2761 pass | 2224 pass |
2762 | 2225 |
2763 def match(ui, spec, repo=None, order=defineorder): | 2226 def match(ui, spec, repo=None, order=defineorder): |
2783 raise error.ParseError(_("empty query")) | 2246 raise error.ParseError(_("empty query")) |
2784 lookup = None | 2247 lookup = None |
2785 if repo: | 2248 if repo: |
2786 lookup = repo.__contains__ | 2249 lookup = repo.__contains__ |
2787 if len(specs) == 1: | 2250 if len(specs) == 1: |
2788 tree = parse(specs[0], lookup) | 2251 tree = revsetlang.parse(specs[0], lookup) |
2789 else: | 2252 else: |
2790 tree = ('or', ('list',) + tuple(parse(s, lookup) for s in specs)) | 2253 tree = ('or', |
2254 ('list',) + tuple(revsetlang.parse(s, lookup) for s in specs)) | |
2791 | 2255 |
2792 if ui: | 2256 if ui: |
2793 tree = expandaliases(ui, tree) | 2257 tree = revsetlang.expandaliases(ui, tree) |
2794 tree = foldconcat(tree) | 2258 tree = revsetlang.foldconcat(tree) |
2795 tree = analyze(tree, order) | 2259 tree = revsetlang.analyze(tree, order) |
2796 tree = optimize(tree) | 2260 tree = revsetlang.optimize(tree) |
2797 posttreebuilthook(tree, repo) | 2261 posttreebuilthook(tree, repo) |
2798 return makematcher(tree) | 2262 return makematcher(tree) |
2799 | 2263 |
2800 def makematcher(tree): | 2264 def makematcher(tree): |
2801 """Create a matcher from an evaluatable tree""" | 2265 """Create a matcher from an evaluatable tree""" |
2807 else: | 2271 else: |
2808 result = getset(repo, baseset(subset), tree) | 2272 result = getset(repo, baseset(subset), tree) |
2809 return result | 2273 return result |
2810 return mfunc | 2274 return mfunc |
2811 | 2275 |
2812 def formatspec(expr, *args): | |
2813 ''' | |
2814 This is a convenience function for using revsets internally, and | |
2815 escapes arguments appropriately. Aliases are intentionally ignored | |
2816 so that intended expression behavior isn't accidentally subverted. | |
2817 | |
2818 Supported arguments: | |
2819 | |
2820 %r = revset expression, parenthesized | |
2821 %d = int(arg), no quoting | |
2822 %s = string(arg), escaped and single-quoted | |
2823 %b = arg.branch(), escaped and single-quoted | |
2824 %n = hex(arg), single-quoted | |
2825 %% = a literal '%' | |
2826 | |
2827 Prefixing the type with 'l' specifies a parenthesized list of that type. | |
2828 | |
2829 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()")) | |
2830 '(10 or 11):: and ((this()) or (that()))' | |
2831 >>> formatspec('%d:: and not %d::', 10, 20) | |
2832 '10:: and not 20::' | |
2833 >>> formatspec('%ld or %ld', [], [1]) | |
2834 "_list('') or 1" | |
2835 >>> formatspec('keyword(%s)', 'foo\\xe9') | |
2836 "keyword('foo\\\\xe9')" | |
2837 >>> b = lambda: 'default' | |
2838 >>> b.branch = b | |
2839 >>> formatspec('branch(%b)', b) | |
2840 "branch('default')" | |
2841 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd']) | |
2842 "root(_list('a\\x00b\\x00c\\x00d'))" | |
2843 ''' | |
2844 | |
2845 def quote(s): | |
2846 return repr(str(s)) | |
2847 | |
2848 def argtype(c, arg): | |
2849 if c == 'd': | |
2850 return str(int(arg)) | |
2851 elif c == 's': | |
2852 return quote(arg) | |
2853 elif c == 'r': | |
2854 parse(arg) # make sure syntax errors are confined | |
2855 return '(%s)' % arg | |
2856 elif c == 'n': | |
2857 return quote(node.hex(arg)) | |
2858 elif c == 'b': | |
2859 return quote(arg.branch()) | |
2860 | |
2861 def listexp(s, t): | |
2862 l = len(s) | |
2863 if l == 0: | |
2864 return "_list('')" | |
2865 elif l == 1: | |
2866 return argtype(t, s[0]) | |
2867 elif t == 'd': | |
2868 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s) | |
2869 elif t == 's': | |
2870 return "_list('%s')" % "\0".join(s) | |
2871 elif t == 'n': | |
2872 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s) | |
2873 elif t == 'b': | |
2874 return "_list('%s')" % "\0".join(a.branch() for a in s) | |
2875 | |
2876 m = l // 2 | |
2877 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t)) | |
2878 | |
2879 ret = '' | |
2880 pos = 0 | |
2881 arg = 0 | |
2882 while pos < len(expr): | |
2883 c = expr[pos] | |
2884 if c == '%': | |
2885 pos += 1 | |
2886 d = expr[pos] | |
2887 if d == '%': | |
2888 ret += d | |
2889 elif d in 'dsnbr': | |
2890 ret += argtype(d, args[arg]) | |
2891 arg += 1 | |
2892 elif d == 'l': | |
2893 # a list of some type | |
2894 pos += 1 | |
2895 d = expr[pos] | |
2896 ret += listexp(list(args[arg]), d) | |
2897 arg += 1 | |
2898 else: | |
2899 raise error.Abort(_('unexpected revspec format character %s') | |
2900 % d) | |
2901 else: | |
2902 ret += c | |
2903 pos += 1 | |
2904 | |
2905 return ret | |
2906 | |
2907 def prettyformat(tree): | |
2908 return parser.prettyformat(tree, ('string', 'symbol')) | |
2909 | |
2910 def depth(tree): | |
2911 if isinstance(tree, tuple): | |
2912 return max(map(depth, tree)) + 1 | |
2913 else: | |
2914 return 0 | |
2915 | |
2916 def funcsused(tree): | |
2917 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | |
2918 return set() | |
2919 else: | |
2920 funcs = set() | |
2921 for s in tree[1:]: | |
2922 funcs |= funcsused(s) | |
2923 if tree[0] == 'func': | |
2924 funcs.add(tree[1][1]) | |
2925 return funcs | |
2926 | |
2927 def loadpredicate(ui, extname, registrarobj): | 2276 def loadpredicate(ui, extname, registrarobj): |
2928 """Load revset predicates from specified registrarobj | 2277 """Load revset predicates from specified registrarobj |
2929 """ | 2278 """ |
2930 for name, func in registrarobj._table.iteritems(): | 2279 for name, func in registrarobj._table.iteritems(): |
2931 symbols[name] = func | 2280 symbols[name] = func |