comparison mercurial/revset.py @ 28689:a14732e08fec

revset: move tagging of alias arguments from tokenization to parsing phase In short, this patch moves the hack from tokenizedefn() to _relabelaliasargs(), which is called after parsing. This change aims to eliminate tight dependency on the revset tokenizer. Before this patch, we had to rewrite an alias argument to a pseudo function: "$1" -> "_aliasarg('$1')" ('symbol', '$1') -> ('function', ('symbol', '_aliasarg'), ('string', '$1')) This was because the tokenizer must generate tokens that are syntactically valid. By moving the process to the parsing phase, we can assign a unique tag to an alias argument. ('symbol', '$1') -> ('_aliasarg', '$1') Since new _aliasarg node never be generated from a user input, we no longer have to verify a user input at findaliases(). The test for _aliasarg("$1") is removed as it is syntactically valid and should pass the parsing phase.
author Yuya Nishihara <yuya@tcha.org>
date Sun, 14 Feb 2016 19:48:33 +0900
parents d749b1832d2f
children b56bf98c8afb
comparison
equal deleted inserted replaced
28688:3e0d03c3c594 28689:a14732e08fec
2259 else: 2259 else:
2260 w = 1 2260 w = 1
2261 return w + wa, (op, x[1], ta) 2261 return w + wa, (op, x[1], ta)
2262 return 1, x 2262 return 1, x
2263 2263
2264 _aliasarg = ('func', ('symbol', '_aliasarg'))
2265 def _getaliasarg(tree): 2264 def _getaliasarg(tree):
2266 """If tree matches ('func', ('symbol', '_aliasarg'), ('string', X)) 2265 """If tree matches ('_aliasarg', X) return X, None otherwise"""
2267 return X, None otherwise. 2266 if tree[0] == '_aliasarg':
2268 """ 2267 return tree[1]
2269 if (len(tree) == 3 and tree[:2] == _aliasarg
2270 and tree[2][0] == 'string'):
2271 return tree[2][1]
2272 return None 2268 return None
2273 2269
2274 def _checkaliasarg(tree, known=None): 2270 def _checkaliasarg(tree, known=None):
2275 """Check tree contains no _aliasarg construct or only ones which 2271 """Check tree contains no _aliasarg construct or only ones which
2276 value is in known. Used to avoid alias placeholders injection. 2272 value is in known. Used to avoid alias placeholders injection.
2367 2363
2368 return (decl, None, None, _("invalid format")) 2364 return (decl, None, None, _("invalid format"))
2369 except error.ParseError as inst: 2365 except error.ParseError as inst:
2370 return (decl, None, None, parseerrordetail(inst)) 2366 return (decl, None, None, parseerrordetail(inst))
2371 2367
2368 def _relabelaliasargs(tree, args):
2369 if not isinstance(tree, tuple):
2370 return tree
2371 op = tree[0]
2372 if op != 'symbol':
2373 return (op,) + tuple(_relabelaliasargs(x, args) for x in tree[1:])
2374
2375 assert len(tree) == 2
2376 sym = tree[1]
2377 if sym in args:
2378 op = '_aliasarg'
2379 elif sym.startswith('$'):
2380 raise error.ParseError(_("'$' not for alias arguments"))
2381 return (op, sym)
2382
2372 def _parsealiasdefn(defn, args): 2383 def _parsealiasdefn(defn, args):
2373 """Parse alias definition ``defn`` 2384 """Parse alias definition ``defn``
2374 2385
2375 This function also replaces alias argument references in the 2386 This function marks alias argument references as ``_aliasarg``.
2376 specified definition by ``_aliasarg(ARGNAME)``.
2377 2387
2378 ``args`` is a list of alias argument names, or None if the alias 2388 ``args`` is a list of alias argument names, or None if the alias
2379 is declared as a symbol. 2389 is declared as a symbol.
2380 2390
2381 This returns "tree" as parsing result. 2391 This returns "tree" as parsing result.
2382 2392
2393 >>> def prettyformat(tree):
2394 ... return parser.prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
2383 >>> args = ['$1', '$2', 'foo'] 2395 >>> args = ['$1', '$2', 'foo']
2384 >>> print prettyformat(_parsealiasdefn('$1 or foo', args)) 2396 >>> print prettyformat(_parsealiasdefn('$1 or foo', args))
2385 (or 2397 (or
2386 (func 2398 ('_aliasarg', '$1')
2387 ('symbol', '_aliasarg') 2399 ('_aliasarg', 'foo'))
2388 ('string', '$1'))
2389 (func
2390 ('symbol', '_aliasarg')
2391 ('string', 'foo')))
2392 >>> try: 2400 >>> try:
2393 ... _parsealiasdefn('$1 or $bar', args) 2401 ... _parsealiasdefn('$1 or $bar', args)
2394 ... except error.ParseError, inst: 2402 ... except error.ParseError, inst:
2395 ... print parseerrordetail(inst) 2403 ... print parseerrordetail(inst)
2396 at 6: '$' not for alias arguments 2404 '$' not for alias arguments
2397 >>> args = ['$1', '$10', 'foo'] 2405 >>> args = ['$1', '$10', 'foo']
2398 >>> print prettyformat(_parsealiasdefn('$10 or foobar', args)) 2406 >>> print prettyformat(_parsealiasdefn('$10 or foobar', args))
2399 (or 2407 (or
2400 (func 2408 ('_aliasarg', '$10')
2401 ('symbol', '_aliasarg')
2402 ('string', '$10'))
2403 ('symbol', 'foobar')) 2409 ('symbol', 'foobar'))
2404 >>> print prettyformat(_parsealiasdefn('"$1" or "foo"', args)) 2410 >>> print prettyformat(_parsealiasdefn('"$1" or "foo"', args))
2405 (or 2411 (or
2406 ('string', '$1') 2412 ('string', '$1')
2407 ('string', 'foo')) 2413 ('string', 'foo'))
2408 """ 2414 """
2409 def tokenizedefn(program, lookup=None): 2415 if args:
2410 if args: 2416 args = set(args)
2411 argset = set(args) 2417 else:
2412 else: 2418 args = set()
2413 argset = set()
2414
2415 for t, value, pos in _tokenizealias(program, lookup=lookup):
2416 if t == 'symbol':
2417 if value in argset:
2418 # emulate tokenization of "_aliasarg('ARGNAME')":
2419 # "_aliasarg()" is an unknown symbol only used separate
2420 # alias argument placeholders from regular strings.
2421 yield ('symbol', '_aliasarg', pos)
2422 yield ('(', None, pos)
2423 yield ('string', value, pos)
2424 yield (')', None, pos)
2425 continue
2426 elif value.startswith('$'):
2427 raise error.ParseError(_("'$' not for alias arguments"),
2428 pos)
2429 yield (t, value, pos)
2430 2419
2431 p = parser.parser(elements) 2420 p = parser.parser(elements)
2432 tree, pos = p.parse(tokenizedefn(defn)) 2421 tree, pos = p.parse(_tokenizealias(defn))
2433 if pos != len(defn): 2422 if pos != len(defn):
2434 raise error.ParseError(_('invalid token'), pos) 2423 raise error.ParseError(_('invalid token'), pos)
2435 return parser.simplifyinfixops(tree, ('list', 'or')) 2424 tree = parser.simplifyinfixops(tree, ('list', 'or'))
2425 return _relabelaliasargs(tree, args)
2436 2426
2437 class revsetalias(object): 2427 class revsetalias(object):
2438 # whether own `error` information is already shown or not. 2428 # whether own `error` information is already shown or not.
2439 # this avoids showing same warning multiple times at each `findaliases`. 2429 # this avoids showing same warning multiple times at each `findaliases`.
2440 warned = False 2430 warned = False
2521 result = tuple(_expandaliases(aliases, t, expanding, cache) 2511 result = tuple(_expandaliases(aliases, t, expanding, cache)
2522 for t in tree) 2512 for t in tree)
2523 return result 2513 return result
2524 2514
2525 def findaliases(ui, tree, showwarning=None): 2515 def findaliases(ui, tree, showwarning=None):
2526 _checkaliasarg(tree)
2527 aliases = {} 2516 aliases = {}
2528 for k, v in ui.configitems('revsetalias'): 2517 for k, v in ui.configitems('revsetalias'):
2529 alias = revsetalias(k, v) 2518 alias = revsetalias(k, v)
2530 aliases[alias.name] = alias 2519 aliases[alias.name] = alias
2531 tree = _expandaliases(aliases, tree, [], {}) 2520 tree = _expandaliases(aliases, tree, [], {})