diff -r 9d25bb84cf6c -r 91dbb98b3513 mercurial/revset.py --- a/mercurial/revset.py Fri Jan 09 18:38:02 2015 +0100 +++ b/mercurial/revset.py Sat Jan 10 23:18:11 2015 +0900 @@ -129,15 +129,39 @@ keywords = set(['and', 'or', 'not']) -def tokenize(program, lookup=None): +# default set of valid characters for the initial letter of symbols +_syminitletters = set(c for c in [chr(i) for i in xrange(256)] + if c.isalnum() or c in '._@' or ord(c) > 127) + +# default set of valid characters for non-initial letters of symbols +_symletters = set(c for c in [chr(i) for i in xrange(256)] + if c.isalnum() or c in '-._/@' or ord(c) > 127) + +def tokenize(program, lookup=None, syminitletters=None, symletters=None): ''' Parse a revset statement into a stream of tokens + ``syminitletters`` is the set of valid characters for the initial + letter of symbols. + + By default, character ``c`` is recognized as valid for initial + letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``. + + ``symletters`` is the set of valid characters for non-initial + letters of symbols. + + By default, character ``c`` is recognized as valid for non-initial + letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``. + Check that @ is a valid unquoted token character (issue3686): >>> list(tokenize("@::")) [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] ''' + if syminitletters is None: + syminitletters = _syminitletters + if symletters is None: + symletters = _symletters pos, l = 0, len(program) while pos < l: @@ -177,12 +201,12 @@ else: raise error.ParseError(_("unterminated string"), s) # gather up a symbol/keyword - elif c.isalnum() or c in '._@' or ord(c) > 127: + elif c in syminitletters: s = pos pos += 1 while pos < l: # find end of symbol d = program[pos] - if not (d.isalnum() or d in "-._/@" or ord(d) > 127): + if d not in symletters: break if d == '.' and program[pos - 1] == '.': # special case for .. pos -= 1