Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revsetlang.py @ 34029:1b28525e6698
revset: remove order information from tree (API)
Keeping `order` in tree makes AST operation harder. And there could be
invalid cases if trees could be generated and compounded freely, like:
SetA(order=define) & SetB(order=define)
^^^^^^ couldn't be satisfied
This patch changes the code to calculate order on the fly, during tree
traversal. Optimization of reordering `and` arguments is preserved by
introducing a new internal operation `flipand`.
.. api::
revset.stringset() now takes 'order' as the last argument.
Differential Revision: https://phab.mercurial-scm.org/D451
author | Jun Wu <quark@fb.com> |
---|---|
date | Sun, 20 Aug 2017 10:55:11 -0700 |
parents | 72b5f4d53c58 |
children | dcfdf4d09663 |
comparison
equal
deleted
inserted
replaced
34028:72b5f4d53c58 | 34029:1b28525e6698 |
---|---|
256 """ | 256 """ |
257 if not _isnamedfunc(x, funcname): | 257 if not _isnamedfunc(x, funcname): |
258 return | 258 return |
259 return x[2] | 259 return x[2] |
260 | 260 |
261 # Constants for ordering requirement, used in _analyze(): | 261 # Constants for ordering requirement, used in getset(): |
262 # | 262 # |
263 # If 'define', any nested functions and operations can change the ordering of | 263 # If 'define', any nested functions and operations can change the ordering of |
264 # the entries in the set. If 'follow', any nested functions and operations | 264 # the entries in the set. If 'follow', any nested functions and operations |
265 # should take the ordering specified by the first operand to the '&' operator. | 265 # should take the ordering specified by the first operand to the '&' operator. |
266 # | 266 # |
280 # ^ | 280 # ^ |
281 # any | 281 # any |
282 # | 282 # |
283 # 'y()' can either enforce its ordering requirement or take the ordering | 283 # 'y()' can either enforce its ordering requirement or take the ordering |
284 # specified by 'x()' because 'not()' doesn't care the order. | 284 # specified by 'x()' because 'not()' doesn't care the order. |
285 # | |
286 # Transition of ordering requirement: | |
287 # | |
288 # 1. starts with 'define' | |
289 # 2. shifts to 'follow' by 'x & y' | |
290 # 3. changes back to 'define' on function call 'f(x)' or function-like | |
291 # operation 'x (f) y' because 'f' may have its own ordering requirement | |
292 # for 'x' and 'y' (e.g. 'first(x)') | |
293 # | |
294 anyorder = 'any' # don't care the order | 285 anyorder = 'any' # don't care the order |
295 defineorder = 'define' # should define the order | 286 defineorder = 'define' # should define the order |
296 followorder = 'follow' # must follow the current order | 287 followorder = 'follow' # must follow the current order |
297 | |
298 # transition table for 'x & y', from the current expression 'x' to 'y' | |
299 _tofolloworder = { | |
300 anyorder: anyorder, | |
301 defineorder: followorder, | |
302 followorder: followorder, | |
303 } | |
304 | 288 |
305 def _matchonly(revs, bases): | 289 def _matchonly(revs, bases): |
306 """ | 290 """ |
307 >>> f = lambda *args: _matchonly(*map(parse, args)) | 291 >>> f = lambda *args: _matchonly(*map(parse, args)) |
308 >>> f('ancestors(A)', 'not ancestors(B)') | 292 >>> f('ancestors(A)', 'not ancestors(B)') |
338 # x#y[z] ternary | 322 # x#y[z] ternary |
339 return _fixops(('relsubscript', x[1][1], x[1][2], x[2])) | 323 return _fixops(('relsubscript', x[1][1], x[1][2], x[2])) |
340 | 324 |
341 return (op,) + tuple(_fixops(y) for y in x[1:]) | 325 return (op,) + tuple(_fixops(y) for y in x[1:]) |
342 | 326 |
343 def _analyze(x, order): | 327 def _analyze(x): |
344 if x is None: | 328 if x is None: |
345 return x | 329 return x |
346 | 330 |
347 op = x[0] | 331 op = x[0] |
348 if op == 'minus': | 332 if op == 'minus': |
349 return _analyze(('and', x[1], ('not', x[2])), order) | 333 return _analyze(('and', x[1], ('not', x[2]))) |
350 elif op == 'only': | 334 elif op == 'only': |
351 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) | 335 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) |
352 return _analyze(t, order) | 336 return _analyze(t) |
353 elif op == 'onlypost': | 337 elif op == 'onlypost': |
354 return _analyze(('func', ('symbol', 'only'), x[1]), order) | 338 return _analyze(('func', ('symbol', 'only'), x[1])) |
355 elif op == 'dagrangepre': | 339 elif op == 'dagrangepre': |
356 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order) | 340 return _analyze(('func', ('symbol', 'ancestors'), x[1])) |
357 elif op == 'dagrangepost': | 341 elif op == 'dagrangepost': |
358 return _analyze(('func', ('symbol', 'descendants'), x[1]), order) | 342 return _analyze(('func', ('symbol', 'descendants'), x[1])) |
359 elif op == 'negate': | 343 elif op == 'negate': |
360 s = getstring(x[1], _("can't negate that")) | 344 s = getstring(x[1], _("can't negate that")) |
361 return _analyze(('string', '-' + s), order) | 345 return _analyze(('string', '-' + s)) |
362 elif op in ('string', 'symbol'): | 346 elif op in ('string', 'symbol'): |
363 return x | 347 return x |
364 elif op == 'and': | |
365 ta = _analyze(x[1], order) | |
366 tb = _analyze(x[2], _tofolloworder[order]) | |
367 return (op, ta, tb, order) | |
368 elif op == 'or': | |
369 return (op, _analyze(x[1], order), order) | |
370 elif op == 'not': | |
371 return (op, _analyze(x[1], anyorder), order) | |
372 elif op == 'rangeall': | 348 elif op == 'rangeall': |
373 return (op, None, order) | 349 return (op, None) |
374 elif op in ('rangepre', 'rangepost', 'parentpost'): | 350 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}: |
375 return (op, _analyze(x[1], defineorder), order) | 351 return (op, _analyze(x[1])) |
376 elif op == 'group': | 352 elif op == 'group': |
377 return _analyze(x[1], order) | 353 return _analyze(x[1]) |
378 elif op in ('dagrange', 'range', 'parent', 'ancestor', 'relation', | 354 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation', |
379 'subscript'): | 355 'subscript'}: |
380 ta = _analyze(x[1], defineorder) | 356 ta = _analyze(x[1]) |
381 tb = _analyze(x[2], defineorder) | 357 tb = _analyze(x[2]) |
382 return (op, ta, tb, order) | 358 return (op, ta, tb) |
383 elif op == 'relsubscript': | 359 elif op == 'relsubscript': |
384 ta = _analyze(x[1], defineorder) | 360 ta = _analyze(x[1]) |
385 tb = _analyze(x[2], defineorder) | 361 tb = _analyze(x[2]) |
386 tc = _analyze(x[3], defineorder) | 362 tc = _analyze(x[3]) |
387 return (op, ta, tb, tc, order) | 363 return (op, ta, tb, tc) |
388 elif op == 'list': | 364 elif op == 'list': |
389 return (op,) + tuple(_analyze(y, order) for y in x[1:]) | 365 return (op,) + tuple(_analyze(y) for y in x[1:]) |
390 elif op == 'keyvalue': | 366 elif op == 'keyvalue': |
391 return (op, x[1], _analyze(x[2], order)) | 367 return (op, x[1], _analyze(x[2])) |
392 elif op == 'func': | 368 elif op == 'func': |
393 f = getsymbol(x[1]) | 369 return (op, x[1], _analyze(x[2])) |
394 d = defineorder | |
395 if f == 'present': | |
396 # 'present(set)' is known to return the argument set with no | |
397 # modification, so forward the current order to its argument | |
398 d = order | |
399 return (op, x[1], _analyze(x[2], d), order) | |
400 raise ValueError('invalid operator %r' % op) | 370 raise ValueError('invalid operator %r' % op) |
401 | 371 |
402 def analyze(x, order=defineorder): | 372 def analyze(x): |
403 """Transform raw parsed tree to evaluatable tree which can be fed to | 373 """Transform raw parsed tree to evaluatable tree which can be fed to |
404 optimize() or getset() | 374 optimize() or getset() |
405 | 375 |
406 All pseudo operations should be mapped to real operations or functions | 376 All pseudo operations should be mapped to real operations or functions |
407 defined in methods or symbols table respectively. | 377 defined in methods or symbols table respectively. |
408 | 378 """ |
409 'order' specifies how the current expression 'x' is ordered (see the | 379 return _analyze(x) |
410 constants defined above.) | |
411 """ | |
412 return _analyze(x, order) | |
413 | 380 |
414 def _optimize(x, small): | 381 def _optimize(x, small): |
415 if x is None: | 382 if x is None: |
416 return 0, x | 383 return 0, x |
417 | 384 |
423 if op in ('string', 'symbol'): | 390 if op in ('string', 'symbol'): |
424 return smallbonus, x # single revisions are small | 391 return smallbonus, x # single revisions are small |
425 elif op == 'and': | 392 elif op == 'and': |
426 wa, ta = _optimize(x[1], True) | 393 wa, ta = _optimize(x[1], True) |
427 wb, tb = _optimize(x[2], True) | 394 wb, tb = _optimize(x[2], True) |
428 order = x[3] | |
429 w = min(wa, wb) | 395 w = min(wa, wb) |
430 | 396 |
431 # (::x and not ::y)/(not ::y and ::x) have a fast path | 397 # (::x and not ::y)/(not ::y and ::x) have a fast path |
432 tm = _matchonly(ta, tb) or _matchonly(tb, ta) | 398 tm = _matchonly(ta, tb) or _matchonly(tb, ta) |
433 if tm: | 399 if tm: |
434 return w, ('func', ('symbol', 'only'), tm, order) | 400 return w, ('func', ('symbol', 'only'), tm) |
435 | 401 |
436 if tb is not None and tb[0] == 'not': | 402 if tb is not None and tb[0] == 'not': |
437 return wa, ('difference', ta, tb[1], order) | 403 return wa, ('difference', ta, tb[1]) |
438 | |
439 if wa > wb: | 404 if wa > wb: |
440 return w, (op, tb, ta, order) | 405 return w, ('flipand', tb, ta) |
441 return w, (op, ta, tb, order) | 406 return w, (op, ta, tb) |
442 elif op == 'or': | 407 elif op == 'or': |
443 # fast path for machine-generated expression, that is likely to have | 408 # fast path for machine-generated expression, that is likely to have |
444 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' | 409 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' |
445 order = x[2] | |
446 ws, ts, ss = [], [], [] | 410 ws, ts, ss = [], [], [] |
447 def flushss(): | 411 def flushss(): |
448 if not ss: | 412 if not ss: |
449 return | 413 return |
450 if len(ss) == 1: | 414 if len(ss) == 1: |
451 w, t = ss[0] | 415 w, t = ss[0] |
452 else: | 416 else: |
453 s = '\0'.join(t[1] for w, t in ss) | 417 s = '\0'.join(t[1] for w, t in ss) |
454 y = ('func', ('symbol', '_list'), ('string', s), order) | 418 y = ('func', ('symbol', '_list'), ('string', s)) |
455 w, t = _optimize(y, False) | 419 w, t = _optimize(y, False) |
456 ws.append(w) | 420 ws.append(w) |
457 ts.append(t) | 421 ts.append(t) |
458 del ss[:] | 422 del ss[:] |
459 for y in getlist(x[1]): | 423 for y in getlist(x[1]): |
465 ws.append(w) | 429 ws.append(w) |
466 ts.append(t) | 430 ts.append(t) |
467 flushss() | 431 flushss() |
468 if len(ts) == 1: | 432 if len(ts) == 1: |
469 return ws[0], ts[0] # 'or' operation is fully optimized out | 433 return ws[0], ts[0] # 'or' operation is fully optimized out |
470 return max(ws), (op, ('list',) + tuple(ts), order) | 434 return max(ws), (op, ('list',) + tuple(ts)) |
471 elif op == 'not': | 435 elif op == 'not': |
472 # Optimize not public() to _notpublic() because we have a fast version | 436 # Optimize not public() to _notpublic() because we have a fast version |
473 if x[1][:3] == ('func', ('symbol', 'public'), None): | 437 if x[1][:3] == ('func', ('symbol', 'public'), None): |
474 order = x[1][3] | 438 newsym = ('func', ('symbol', '_notpublic'), None) |
475 newsym = ('func', ('symbol', '_notpublic'), None, order) | |
476 o = _optimize(newsym, not small) | 439 o = _optimize(newsym, not small) |
477 return o[0], o[1] | 440 return o[0], o[1] |
478 else: | 441 else: |
479 o = _optimize(x[1], not small) | 442 o = _optimize(x[1], not small) |
480 order = x[2] | 443 return o[0], (op, o[1]) |
481 return o[0], (op, o[1], order) | |
482 elif op == 'rangeall': | 444 elif op == 'rangeall': |
483 return smallbonus, x | 445 return smallbonus, x |
484 elif op in ('rangepre', 'rangepost', 'parentpost'): | 446 elif op in ('rangepre', 'rangepost', 'parentpost'): |
485 o = _optimize(x[1], small) | 447 o = _optimize(x[1], small) |
486 order = x[2] | 448 return o[0], (op, o[1]) |
487 return o[0], (op, o[1], order) | |
488 elif op in ('dagrange', 'range'): | 449 elif op in ('dagrange', 'range'): |
489 wa, ta = _optimize(x[1], small) | 450 wa, ta = _optimize(x[1], small) |
490 wb, tb = _optimize(x[2], small) | 451 wb, tb = _optimize(x[2], small) |
491 order = x[3] | 452 return wa + wb, (op, ta, tb) |
492 return wa + wb, (op, ta, tb, order) | |
493 elif op in ('parent', 'ancestor', 'relation', 'subscript'): | 453 elif op in ('parent', 'ancestor', 'relation', 'subscript'): |
494 w, t = _optimize(x[1], small) | 454 w, t = _optimize(x[1], small) |
495 order = x[3] | 455 return w, (op, t, x[2]) |
496 return w, (op, t, x[2], order) | |
497 elif op == 'relsubscript': | 456 elif op == 'relsubscript': |
498 w, t = _optimize(x[1], small) | 457 w, t = _optimize(x[1], small) |
499 order = x[4] | 458 return w, (op, t, x[2], x[3]) |
500 return w, (op, t, x[2], x[3], order) | |
501 elif op == 'list': | 459 elif op == 'list': |
502 ws, ts = zip(*(_optimize(y, small) for y in x[1:])) | 460 ws, ts = zip(*(_optimize(y, small) for y in x[1:])) |
503 return sum(ws), (op,) + ts | 461 return sum(ws), (op,) + ts |
504 elif op == 'keyvalue': | 462 elif op == 'keyvalue': |
505 w, t = _optimize(x[2], small) | 463 w, t = _optimize(x[2], small) |
520 w = 0 | 478 w = 0 |
521 elif f == "sort": | 479 elif f == "sort": |
522 w = 10 # assume most sorts look at changelog | 480 w = 10 # assume most sorts look at changelog |
523 else: | 481 else: |
524 w = 1 | 482 w = 1 |
525 order = x[3] | 483 return w + wa, (op, x[1], ta) |
526 return w + wa, (op, x[1], ta, order) | |
527 raise ValueError('invalid operator %r' % op) | 484 raise ValueError('invalid operator %r' % op) |
528 | 485 |
529 def optimize(tree): | 486 def optimize(tree): |
530 """Optimize evaluatable tree | 487 """Optimize evaluatable tree |
531 | 488 |