Mercurial > public > mercurial-scm > hg-stable
diff mercurial/filesetlang.py @ 38845:b9162ea1b815
fileset: extract language processing part to new module (API)
I'll add a couple more functions that work on parsed tree.
% wc -l mercurial/fileset*.py
559 mercurial/fileset.py
135 mercurial/filesetlang.py
694 total
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 22 Jul 2018 15:50:45 +0900 |
parents | mercurial/fileset.py@d82c4d42b615 |
children | 6371ab78c3b3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/filesetlang.py Sun Jul 22 15:50:45 2018 +0900 @@ -0,0 +1,135 @@ +# filesetlang.py - parser, tokenizer and utility for file set language +# +# Copyright 2010 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from .i18n import _ +from . import ( + error, + parser, + pycompat, +) + +elements = { + # token-type: binding-strength, primary, prefix, infix, suffix + "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), + ":": (15, None, None, ("kindpat", 15), None), + "-": (5, None, ("negate", 19), ("minus", 5), None), + "not": (10, None, ("not", 10), None, None), + "!": (10, None, ("not", 10), None, None), + "and": (5, None, None, ("and", 5), None), + "&": (5, None, None, ("and", 5), None), + "or": (4, None, None, ("or", 4), None), + "|": (4, None, None, ("or", 4), None), + "+": (4, None, None, ("or", 4), None), + ",": (2, None, None, ("list", 2), None), + ")": (0, None, None, None, None), + "symbol": (0, "symbol", None, None, None), + "string": (0, "string", None, None, None), + "end": (0, None, None, None, None), +} + +keywords = {'and', 'or', 'not'} + +symbols = {} + +globchars = ".*{}[]?/\\_" + +def tokenize(program): + pos, l = 0, len(program) + program = pycompat.bytestr(program) + while pos < l: + c = program[pos] + if c.isspace(): # skip inter-token whitespace + pass + elif c in "(),-:|&+!": # handle simple operators + yield (c, None, pos) + elif (c in '"\'' or c == 'r' and + program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings + if c == 'r': + pos += 1 + c = program[pos] + decode = lambda x: x + else: + decode = parser.unescapestr + pos += 1 + s = pos + while pos < l: # find closing quote + d = program[pos] + if d == '\\': # skip over escaped characters + pos += 2 + continue + if d == c: + yield ('string', decode(program[s:pos]), s) + break + pos += 1 + else: + raise error.ParseError(_("unterminated string"), s) + elif c.isalnum() or c in globchars or ord(c) > 127: + # gather up a symbol/keyword + s = pos + pos += 1 + while pos < l: # find end of symbol + d = program[pos] + if not (d.isalnum() or d in globchars or ord(d) > 127): + break + pos += 1 + sym = program[s:pos] + if sym in keywords: # operator keywords + yield (sym, None, s) + else: + yield ('symbol', sym, s) + pos -= 1 + else: + raise error.ParseError(_("syntax error"), pos) + pos += 1 + yield ('end', None, pos) + +def parse(expr): + p = parser.parser(elements) + tree, pos = p.parse(tokenize(expr)) + if pos != len(expr): + raise error.ParseError(_("invalid token"), pos) + return parser.simplifyinfixops(tree, {'list', 'or'}) + +def getsymbol(x): + if x and x[0] == 'symbol': + return x[1] + raise error.ParseError(_('not a symbol')) + +def getstring(x, err): + if x and (x[0] == 'string' or x[0] == 'symbol'): + return x[1] + raise error.ParseError(err) + +def getkindpat(x, y, allkinds, err): + kind = getsymbol(x) + pat = getstring(y, err) + if kind not in allkinds: + raise error.ParseError(_("invalid pattern kind: %s") % kind) + return '%s:%s' % (kind, pat) + +def getpattern(x, allkinds, err): + if x and x[0] == 'kindpat': + return getkindpat(x[1], x[2], allkinds, err) + return getstring(x, err) + +def getlist(x): + if not x: + return [] + if x[0] == 'list': + return list(x[1:]) + return [x] + +def getargs(x, min, max, err): + l = getlist(x) + if len(l) < min or len(l) > max: + raise error.ParseError(err) + return l + +def prettyformat(tree): + return parser.prettyformat(tree, ('string', 'symbol'))