Mercurial > public > mercurial-scm > hg
comparison mercurial/revset.py @ 23845:0a7fd54d4e60
revset: introduce "_parsealiasdecl" to parse alias declarations strictly
This patch introduces "_parsealiasdecl" to parse alias declarations
strictly. For example, "_parsealiasdecl" can detect problems below,
which current implementation can't.
- un-closed parenthesis causes being treated as "alias symbol"
because all of declarations not in "func(....)" style are
recognized as "alias symbol".
for example, "foo($1, $2" is treated as the alias symbol.
- alias symbol/function names aren't examined whether they are valid
as symbol or not
for example, "foo bar" can be treated as the alias symbol, but of
course such invalid symbol can't be referred in revset.
- just splitting argument list by "," causes overlooking syntax
problems in the declaration
for example, all of invalid declarations below are overlooked:
- foo("bar") => taking one argument named as '"bar"'
- foo("unclosed) => taking one argument named as '"unclosed'
- foo(bar::baz) => taking one argument named as 'bar::baz'
- foo(bar($1)) => taking one argument named as 'bar($1)'
To decrease complication of patch, current implementation for alias
declarations is replaced by "_parsealiasdecl" in the subsequent
patch. This patch just introduces it.
This patch defines "_parsealiasdecl" not as a method of "revsetalias"
class but as a one of "revset" module, because of ease of testing by
doctest.
This patch factors some helper functions for "tree" out, because:
- direct accessing like "if tree[0] == 'func' and len(tree) > 1"
decreases readability
- subsequent patch (and also existing code paths, in the future) can
use them for readability
This patch also factors "_tokenizealias" out, because it can be used
also for parsing alias definitions strictly.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Sat, 10 Jan 2015 23:18:11 +0900 |
parents | ddf2172e901d |
children | aac4a1a7920e |
comparison
equal
deleted
inserted
replaced
23844:ddf2172e901d | 23845:0a7fd54d4e60 |
---|---|
265 l = getlist(x) | 265 l = getlist(x) |
266 if len(l) < min or (max >= 0 and len(l) > max): | 266 if len(l) < min or (max >= 0 and len(l) > max): |
267 raise error.ParseError(err) | 267 raise error.ParseError(err) |
268 return l | 268 return l |
269 | 269 |
270 def isvalidsymbol(tree): | |
271 """Examine whether specified ``tree`` is valid ``symbol`` or not | |
272 """ | |
273 return tree[0] == 'symbol' and len(tree) > 1 | |
274 | |
275 def getsymbol(tree): | |
276 """Get symbol name from valid ``symbol`` in ``tree`` | |
277 | |
278 This assumes that ``tree`` is already examined by ``isvalidsymbol``. | |
279 """ | |
280 return tree[1] | |
281 | |
282 def isvalidfunc(tree): | |
283 """Examine whether specified ``tree`` is valid ``func`` or not | |
284 """ | |
285 return tree[0] == 'func' and len(tree) > 1 and isvalidsymbol(tree[1]) | |
286 | |
287 def getfuncname(tree): | |
288 """Get function name from valid ``func`` in ``tree`` | |
289 | |
290 This assumes that ``tree`` is already examined by ``isvalidfunc``. | |
291 """ | |
292 return getsymbol(tree[1]) | |
293 | |
294 def getfuncargs(tree): | |
295 """Get list of function arguments from valid ``func`` in ``tree`` | |
296 | |
297 This assumes that ``tree`` is already examined by ``isvalidfunc``. | |
298 """ | |
299 if len(tree) > 2: | |
300 return getlist(tree[2]) | |
301 else: | |
302 return [] | |
303 | |
270 def getset(repo, subset, x): | 304 def getset(repo, subset, x): |
271 if not x: | 305 if not x: |
272 raise error.ParseError(_("missing argument")) | 306 raise error.ParseError(_("missing argument")) |
273 s = methods[x[0]](repo, subset, *x[1:]) | 307 s = methods[x[0]](repo, subset, *x[1:]) |
274 if util.safehasattr(s, 'isascending'): | 308 if util.safehasattr(s, 'isascending'): |
2116 if arg is not None and (not known or arg not in known): | 2150 if arg is not None and (not known or arg not in known): |
2117 raise error.ParseError(_("not a function: %s") % '_aliasarg') | 2151 raise error.ParseError(_("not a function: %s") % '_aliasarg') |
2118 for t in tree: | 2152 for t in tree: |
2119 _checkaliasarg(t, known) | 2153 _checkaliasarg(t, known) |
2120 | 2154 |
2155 # the set of valid characters for the initial letter of symbols in | |
2156 # alias declarations and definitions | |
2157 _aliassyminitletters = set(c for c in [chr(i) for i in xrange(256)] | |
2158 if c.isalnum() or c in '._@$' or ord(c) > 127) | |
2159 | |
2160 def _tokenizealias(program, lookup=None): | |
2161 """Parse alias declaration/definition into a stream of tokens | |
2162 | |
2163 This allows symbol names to use also ``$`` as an initial letter | |
2164 (for backward compatibility), and callers of this function should | |
2165 examine whether ``$`` is used also for unexpected symbols or not. | |
2166 """ | |
2167 return tokenize(program, lookup=lookup, | |
2168 syminitletters=_aliassyminitletters) | |
2169 | |
2170 def _parsealiasdecl(decl): | |
2171 """Parse alias declaration ``decl`` | |
2172 | |
2173 This returns ``(name, tree, args, errorstr)`` tuple: | |
2174 | |
2175 - ``name``: of declared alias (may be ``decl`` itself at error) | |
2176 - ``tree``: parse result (or ``None`` at error) | |
2177 - ``args``: list of alias argument names (or None for symbol declaration) | |
2178 - ``errorstr``: detail about detected error (or None) | |
2179 | |
2180 >>> _parsealiasdecl('foo') | |
2181 ('foo', ('symbol', 'foo'), None, None) | |
2182 >>> _parsealiasdecl('$foo') | |
2183 ('$foo', None, None, "'$' not for alias arguments") | |
2184 >>> _parsealiasdecl('foo::bar') | |
2185 ('foo::bar', None, None, 'invalid format') | |
2186 >>> _parsealiasdecl('foo bar') | |
2187 ('foo bar', None, None, 'at 4: invalid token') | |
2188 >>> _parsealiasdecl('foo()') | |
2189 ('foo', ('func', ('symbol', 'foo')), [], None) | |
2190 >>> _parsealiasdecl('$foo()') | |
2191 ('$foo()', None, None, "'$' not for alias arguments") | |
2192 >>> _parsealiasdecl('foo($1, $2)') | |
2193 ('foo', ('func', ('symbol', 'foo')), ['$1', '$2'], None) | |
2194 >>> _parsealiasdecl('foo(bar_bar, baz.baz)') | |
2195 ('foo', ('func', ('symbol', 'foo')), ['bar_bar', 'baz.baz'], None) | |
2196 >>> _parsealiasdecl('foo($1, $2, nested($1, $2))') | |
2197 ('foo($1, $2, nested($1, $2))', None, None, 'invalid argument list') | |
2198 >>> _parsealiasdecl('foo(bar($1, $2))') | |
2199 ('foo(bar($1, $2))', None, None, 'invalid argument list') | |
2200 >>> _parsealiasdecl('foo("string")') | |
2201 ('foo("string")', None, None, 'invalid argument list') | |
2202 >>> _parsealiasdecl('foo($1, $2') | |
2203 ('foo($1, $2', None, None, 'at 10: unexpected token: end') | |
2204 >>> _parsealiasdecl('foo("string') | |
2205 ('foo("string', None, None, 'at 5: unterminated string') | |
2206 """ | |
2207 p = parser.parser(_tokenizealias, elements) | |
2208 try: | |
2209 tree, pos = p.parse(decl) | |
2210 if (pos != len(decl)): | |
2211 raise error.ParseError(_('invalid token'), pos) | |
2212 | |
2213 if isvalidsymbol(tree): | |
2214 # "name = ...." style | |
2215 name = getsymbol(tree) | |
2216 if name.startswith('$'): | |
2217 return (decl, None, None, _("'$' not for alias arguments")) | |
2218 return (name, ('symbol', name), None, None) | |
2219 | |
2220 if isvalidfunc(tree): | |
2221 # "name(arg, ....) = ...." style | |
2222 name = getfuncname(tree) | |
2223 if name.startswith('$'): | |
2224 return (decl, None, None, _("'$' not for alias arguments")) | |
2225 args = [] | |
2226 for arg in getfuncargs(tree): | |
2227 if not isvalidsymbol(arg): | |
2228 return (decl, None, None, _("invalid argument list")) | |
2229 args.append(getsymbol(arg)) | |
2230 return (name, ('func', ('symbol', name)), args, None) | |
2231 | |
2232 return (decl, None, None, _("invalid format")) | |
2233 except error.ParseError, inst: | |
2234 return (decl, None, None, parseerrordetail(inst)) | |
2235 | |
2121 class revsetalias(object): | 2236 class revsetalias(object): |
2122 funcre = re.compile('^([^(]+)\(([^)]+)\)$') | 2237 funcre = re.compile('^([^(]+)\(([^)]+)\)$') |
2123 args = None | 2238 args = None |
2124 | 2239 |
2125 # error message at parsing, or None | 2240 # error message at parsing, or None |