comparison mercurial/revsetlang.py @ 34029:1b28525e6698

revset: remove order information from tree (API) Keeping `order` in tree makes AST operation harder. And there could be invalid cases if trees could be generated and compounded freely, like: SetA(order=define) & SetB(order=define) ^^^^^^ couldn't be satisfied This patch changes the code to calculate order on the fly, during tree traversal. Optimization of reordering `and` arguments is preserved by introducing a new internal operation `flipand`. .. api:: revset.stringset() now takes 'order' as the last argument. Differential Revision: https://phab.mercurial-scm.org/D451
author Jun Wu <quark@fb.com>
date Sun, 20 Aug 2017 10:55:11 -0700
parents 72b5f4d53c58
children dcfdf4d09663
comparison
equal deleted inserted replaced
34028:72b5f4d53c58 34029:1b28525e6698
256 """ 256 """
257 if not _isnamedfunc(x, funcname): 257 if not _isnamedfunc(x, funcname):
258 return 258 return
259 return x[2] 259 return x[2]
260 260
261 # Constants for ordering requirement, used in _analyze(): 261 # Constants for ordering requirement, used in getset():
262 # 262 #
263 # If 'define', any nested functions and operations can change the ordering of 263 # If 'define', any nested functions and operations can change the ordering of
264 # the entries in the set. If 'follow', any nested functions and operations 264 # the entries in the set. If 'follow', any nested functions and operations
265 # should take the ordering specified by the first operand to the '&' operator. 265 # should take the ordering specified by the first operand to the '&' operator.
266 # 266 #
280 # ^ 280 # ^
281 # any 281 # any
282 # 282 #
283 # 'y()' can either enforce its ordering requirement or take the ordering 283 # 'y()' can either enforce its ordering requirement or take the ordering
284 # specified by 'x()' because 'not()' doesn't care the order. 284 # specified by 'x()' because 'not()' doesn't care the order.
285 #
286 # Transition of ordering requirement:
287 #
288 # 1. starts with 'define'
289 # 2. shifts to 'follow' by 'x & y'
290 # 3. changes back to 'define' on function call 'f(x)' or function-like
291 # operation 'x (f) y' because 'f' may have its own ordering requirement
292 # for 'x' and 'y' (e.g. 'first(x)')
293 #
294 anyorder = 'any' # don't care the order 285 anyorder = 'any' # don't care the order
295 defineorder = 'define' # should define the order 286 defineorder = 'define' # should define the order
296 followorder = 'follow' # must follow the current order 287 followorder = 'follow' # must follow the current order
297
298 # transition table for 'x & y', from the current expression 'x' to 'y'
299 _tofolloworder = {
300 anyorder: anyorder,
301 defineorder: followorder,
302 followorder: followorder,
303 }
304 288
305 def _matchonly(revs, bases): 289 def _matchonly(revs, bases):
306 """ 290 """
307 >>> f = lambda *args: _matchonly(*map(parse, args)) 291 >>> f = lambda *args: _matchonly(*map(parse, args))
308 >>> f('ancestors(A)', 'not ancestors(B)') 292 >>> f('ancestors(A)', 'not ancestors(B)')
338 # x#y[z] ternary 322 # x#y[z] ternary
339 return _fixops(('relsubscript', x[1][1], x[1][2], x[2])) 323 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
340 324
341 return (op,) + tuple(_fixops(y) for y in x[1:]) 325 return (op,) + tuple(_fixops(y) for y in x[1:])
342 326
343 def _analyze(x, order): 327 def _analyze(x):
344 if x is None: 328 if x is None:
345 return x 329 return x
346 330
347 op = x[0] 331 op = x[0]
348 if op == 'minus': 332 if op == 'minus':
349 return _analyze(('and', x[1], ('not', x[2])), order) 333 return _analyze(('and', x[1], ('not', x[2])))
350 elif op == 'only': 334 elif op == 'only':
351 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) 335 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
352 return _analyze(t, order) 336 return _analyze(t)
353 elif op == 'onlypost': 337 elif op == 'onlypost':
354 return _analyze(('func', ('symbol', 'only'), x[1]), order) 338 return _analyze(('func', ('symbol', 'only'), x[1]))
355 elif op == 'dagrangepre': 339 elif op == 'dagrangepre':
356 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order) 340 return _analyze(('func', ('symbol', 'ancestors'), x[1]))
357 elif op == 'dagrangepost': 341 elif op == 'dagrangepost':
358 return _analyze(('func', ('symbol', 'descendants'), x[1]), order) 342 return _analyze(('func', ('symbol', 'descendants'), x[1]))
359 elif op == 'negate': 343 elif op == 'negate':
360 s = getstring(x[1], _("can't negate that")) 344 s = getstring(x[1], _("can't negate that"))
361 return _analyze(('string', '-' + s), order) 345 return _analyze(('string', '-' + s))
362 elif op in ('string', 'symbol'): 346 elif op in ('string', 'symbol'):
363 return x 347 return x
364 elif op == 'and':
365 ta = _analyze(x[1], order)
366 tb = _analyze(x[2], _tofolloworder[order])
367 return (op, ta, tb, order)
368 elif op == 'or':
369 return (op, _analyze(x[1], order), order)
370 elif op == 'not':
371 return (op, _analyze(x[1], anyorder), order)
372 elif op == 'rangeall': 348 elif op == 'rangeall':
373 return (op, None, order) 349 return (op, None)
374 elif op in ('rangepre', 'rangepost', 'parentpost'): 350 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
375 return (op, _analyze(x[1], defineorder), order) 351 return (op, _analyze(x[1]))
376 elif op == 'group': 352 elif op == 'group':
377 return _analyze(x[1], order) 353 return _analyze(x[1])
378 elif op in ('dagrange', 'range', 'parent', 'ancestor', 'relation', 354 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
379 'subscript'): 355 'subscript'}:
380 ta = _analyze(x[1], defineorder) 356 ta = _analyze(x[1])
381 tb = _analyze(x[2], defineorder) 357 tb = _analyze(x[2])
382 return (op, ta, tb, order) 358 return (op, ta, tb)
383 elif op == 'relsubscript': 359 elif op == 'relsubscript':
384 ta = _analyze(x[1], defineorder) 360 ta = _analyze(x[1])
385 tb = _analyze(x[2], defineorder) 361 tb = _analyze(x[2])
386 tc = _analyze(x[3], defineorder) 362 tc = _analyze(x[3])
387 return (op, ta, tb, tc, order) 363 return (op, ta, tb, tc)
388 elif op == 'list': 364 elif op == 'list':
389 return (op,) + tuple(_analyze(y, order) for y in x[1:]) 365 return (op,) + tuple(_analyze(y) for y in x[1:])
390 elif op == 'keyvalue': 366 elif op == 'keyvalue':
391 return (op, x[1], _analyze(x[2], order)) 367 return (op, x[1], _analyze(x[2]))
392 elif op == 'func': 368 elif op == 'func':
393 f = getsymbol(x[1]) 369 return (op, x[1], _analyze(x[2]))
394 d = defineorder
395 if f == 'present':
396 # 'present(set)' is known to return the argument set with no
397 # modification, so forward the current order to its argument
398 d = order
399 return (op, x[1], _analyze(x[2], d), order)
400 raise ValueError('invalid operator %r' % op) 370 raise ValueError('invalid operator %r' % op)
401 371
402 def analyze(x, order=defineorder): 372 def analyze(x):
403 """Transform raw parsed tree to evaluatable tree which can be fed to 373 """Transform raw parsed tree to evaluatable tree which can be fed to
404 optimize() or getset() 374 optimize() or getset()
405 375
406 All pseudo operations should be mapped to real operations or functions 376 All pseudo operations should be mapped to real operations or functions
407 defined in methods or symbols table respectively. 377 defined in methods or symbols table respectively.
408 378 """
409 'order' specifies how the current expression 'x' is ordered (see the 379 return _analyze(x)
410 constants defined above.)
411 """
412 return _analyze(x, order)
413 380
414 def _optimize(x, small): 381 def _optimize(x, small):
415 if x is None: 382 if x is None:
416 return 0, x 383 return 0, x
417 384
423 if op in ('string', 'symbol'): 390 if op in ('string', 'symbol'):
424 return smallbonus, x # single revisions are small 391 return smallbonus, x # single revisions are small
425 elif op == 'and': 392 elif op == 'and':
426 wa, ta = _optimize(x[1], True) 393 wa, ta = _optimize(x[1], True)
427 wb, tb = _optimize(x[2], True) 394 wb, tb = _optimize(x[2], True)
428 order = x[3]
429 w = min(wa, wb) 395 w = min(wa, wb)
430 396
431 # (::x and not ::y)/(not ::y and ::x) have a fast path 397 # (::x and not ::y)/(not ::y and ::x) have a fast path
432 tm = _matchonly(ta, tb) or _matchonly(tb, ta) 398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
433 if tm: 399 if tm:
434 return w, ('func', ('symbol', 'only'), tm, order) 400 return w, ('func', ('symbol', 'only'), tm)
435 401
436 if tb is not None and tb[0] == 'not': 402 if tb is not None and tb[0] == 'not':
437 return wa, ('difference', ta, tb[1], order) 403 return wa, ('difference', ta, tb[1])
438
439 if wa > wb: 404 if wa > wb:
440 return w, (op, tb, ta, order) 405 return w, ('flipand', tb, ta)
441 return w, (op, ta, tb, order) 406 return w, (op, ta, tb)
442 elif op == 'or': 407 elif op == 'or':
443 # fast path for machine-generated expression, that is likely to have 408 # fast path for machine-generated expression, that is likely to have
444 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' 409 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
445 order = x[2]
446 ws, ts, ss = [], [], [] 410 ws, ts, ss = [], [], []
447 def flushss(): 411 def flushss():
448 if not ss: 412 if not ss:
449 return 413 return
450 if len(ss) == 1: 414 if len(ss) == 1:
451 w, t = ss[0] 415 w, t = ss[0]
452 else: 416 else:
453 s = '\0'.join(t[1] for w, t in ss) 417 s = '\0'.join(t[1] for w, t in ss)
454 y = ('func', ('symbol', '_list'), ('string', s), order) 418 y = ('func', ('symbol', '_list'), ('string', s))
455 w, t = _optimize(y, False) 419 w, t = _optimize(y, False)
456 ws.append(w) 420 ws.append(w)
457 ts.append(t) 421 ts.append(t)
458 del ss[:] 422 del ss[:]
459 for y in getlist(x[1]): 423 for y in getlist(x[1]):
465 ws.append(w) 429 ws.append(w)
466 ts.append(t) 430 ts.append(t)
467 flushss() 431 flushss()
468 if len(ts) == 1: 432 if len(ts) == 1:
469 return ws[0], ts[0] # 'or' operation is fully optimized out 433 return ws[0], ts[0] # 'or' operation is fully optimized out
470 return max(ws), (op, ('list',) + tuple(ts), order) 434 return max(ws), (op, ('list',) + tuple(ts))
471 elif op == 'not': 435 elif op == 'not':
472 # Optimize not public() to _notpublic() because we have a fast version 436 # Optimize not public() to _notpublic() because we have a fast version
473 if x[1][:3] == ('func', ('symbol', 'public'), None): 437 if x[1][:3] == ('func', ('symbol', 'public'), None):
474 order = x[1][3] 438 newsym = ('func', ('symbol', '_notpublic'), None)
475 newsym = ('func', ('symbol', '_notpublic'), None, order)
476 o = _optimize(newsym, not small) 439 o = _optimize(newsym, not small)
477 return o[0], o[1] 440 return o[0], o[1]
478 else: 441 else:
479 o = _optimize(x[1], not small) 442 o = _optimize(x[1], not small)
480 order = x[2] 443 return o[0], (op, o[1])
481 return o[0], (op, o[1], order)
482 elif op == 'rangeall': 444 elif op == 'rangeall':
483 return smallbonus, x 445 return smallbonus, x
484 elif op in ('rangepre', 'rangepost', 'parentpost'): 446 elif op in ('rangepre', 'rangepost', 'parentpost'):
485 o = _optimize(x[1], small) 447 o = _optimize(x[1], small)
486 order = x[2] 448 return o[0], (op, o[1])
487 return o[0], (op, o[1], order)
488 elif op in ('dagrange', 'range'): 449 elif op in ('dagrange', 'range'):
489 wa, ta = _optimize(x[1], small) 450 wa, ta = _optimize(x[1], small)
490 wb, tb = _optimize(x[2], small) 451 wb, tb = _optimize(x[2], small)
491 order = x[3] 452 return wa + wb, (op, ta, tb)
492 return wa + wb, (op, ta, tb, order)
493 elif op in ('parent', 'ancestor', 'relation', 'subscript'): 453 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
494 w, t = _optimize(x[1], small) 454 w, t = _optimize(x[1], small)
495 order = x[3] 455 return w, (op, t, x[2])
496 return w, (op, t, x[2], order)
497 elif op == 'relsubscript': 456 elif op == 'relsubscript':
498 w, t = _optimize(x[1], small) 457 w, t = _optimize(x[1], small)
499 order = x[4] 458 return w, (op, t, x[2], x[3])
500 return w, (op, t, x[2], x[3], order)
501 elif op == 'list': 459 elif op == 'list':
502 ws, ts = zip(*(_optimize(y, small) for y in x[1:])) 460 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
503 return sum(ws), (op,) + ts 461 return sum(ws), (op,) + ts
504 elif op == 'keyvalue': 462 elif op == 'keyvalue':
505 w, t = _optimize(x[2], small) 463 w, t = _optimize(x[2], small)
520 w = 0 478 w = 0
521 elif f == "sort": 479 elif f == "sort":
522 w = 10 # assume most sorts look at changelog 480 w = 10 # assume most sorts look at changelog
523 else: 481 else:
524 w = 1 482 w = 1
525 order = x[3] 483 return w + wa, (op, x[1], ta)
526 return w + wa, (op, x[1], ta, order)
527 raise ValueError('invalid operator %r' % op) 484 raise ValueError('invalid operator %r' % op)
528 485
529 def optimize(tree): 486 def optimize(tree):
530 """Optimize evaluatable tree 487 """Optimize evaluatable tree
531 488