|
1 # filesetlang.py - parser, tokenizer and utility for file set language |
|
2 # |
|
3 # Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 # |
|
5 # This software may be used and distributed according to the terms of the |
|
6 # GNU General Public License version 2 or any later version. |
|
7 |
|
8 from __future__ import absolute_import |
|
9 |
|
10 from .i18n import _ |
|
11 from . import ( |
|
12 error, |
|
13 parser, |
|
14 pycompat, |
|
15 ) |
|
16 |
|
17 elements = { |
|
18 # token-type: binding-strength, primary, prefix, infix, suffix |
|
19 "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), |
|
20 ":": (15, None, None, ("kindpat", 15), None), |
|
21 "-": (5, None, ("negate", 19), ("minus", 5), None), |
|
22 "not": (10, None, ("not", 10), None, None), |
|
23 "!": (10, None, ("not", 10), None, None), |
|
24 "and": (5, None, None, ("and", 5), None), |
|
25 "&": (5, None, None, ("and", 5), None), |
|
26 "or": (4, None, None, ("or", 4), None), |
|
27 "|": (4, None, None, ("or", 4), None), |
|
28 "+": (4, None, None, ("or", 4), None), |
|
29 ",": (2, None, None, ("list", 2), None), |
|
30 ")": (0, None, None, None, None), |
|
31 "symbol": (0, "symbol", None, None, None), |
|
32 "string": (0, "string", None, None, None), |
|
33 "end": (0, None, None, None, None), |
|
34 } |
|
35 |
|
36 keywords = {'and', 'or', 'not'} |
|
37 |
|
38 symbols = {} |
|
39 |
|
40 globchars = ".*{}[]?/\\_" |
|
41 |
|
42 def tokenize(program): |
|
43 pos, l = 0, len(program) |
|
44 program = pycompat.bytestr(program) |
|
45 while pos < l: |
|
46 c = program[pos] |
|
47 if c.isspace(): # skip inter-token whitespace |
|
48 pass |
|
49 elif c in "(),-:|&+!": # handle simple operators |
|
50 yield (c, None, pos) |
|
51 elif (c in '"\'' or c == 'r' and |
|
52 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings |
|
53 if c == 'r': |
|
54 pos += 1 |
|
55 c = program[pos] |
|
56 decode = lambda x: x |
|
57 else: |
|
58 decode = parser.unescapestr |
|
59 pos += 1 |
|
60 s = pos |
|
61 while pos < l: # find closing quote |
|
62 d = program[pos] |
|
63 if d == '\\': # skip over escaped characters |
|
64 pos += 2 |
|
65 continue |
|
66 if d == c: |
|
67 yield ('string', decode(program[s:pos]), s) |
|
68 break |
|
69 pos += 1 |
|
70 else: |
|
71 raise error.ParseError(_("unterminated string"), s) |
|
72 elif c.isalnum() or c in globchars or ord(c) > 127: |
|
73 # gather up a symbol/keyword |
|
74 s = pos |
|
75 pos += 1 |
|
76 while pos < l: # find end of symbol |
|
77 d = program[pos] |
|
78 if not (d.isalnum() or d in globchars or ord(d) > 127): |
|
79 break |
|
80 pos += 1 |
|
81 sym = program[s:pos] |
|
82 if sym in keywords: # operator keywords |
|
83 yield (sym, None, s) |
|
84 else: |
|
85 yield ('symbol', sym, s) |
|
86 pos -= 1 |
|
87 else: |
|
88 raise error.ParseError(_("syntax error"), pos) |
|
89 pos += 1 |
|
90 yield ('end', None, pos) |
|
91 |
|
92 def parse(expr): |
|
93 p = parser.parser(elements) |
|
94 tree, pos = p.parse(tokenize(expr)) |
|
95 if pos != len(expr): |
|
96 raise error.ParseError(_("invalid token"), pos) |
|
97 return parser.simplifyinfixops(tree, {'list', 'or'}) |
|
98 |
|
99 def getsymbol(x): |
|
100 if x and x[0] == 'symbol': |
|
101 return x[1] |
|
102 raise error.ParseError(_('not a symbol')) |
|
103 |
|
104 def getstring(x, err): |
|
105 if x and (x[0] == 'string' or x[0] == 'symbol'): |
|
106 return x[1] |
|
107 raise error.ParseError(err) |
|
108 |
|
109 def getkindpat(x, y, allkinds, err): |
|
110 kind = getsymbol(x) |
|
111 pat = getstring(y, err) |
|
112 if kind not in allkinds: |
|
113 raise error.ParseError(_("invalid pattern kind: %s") % kind) |
|
114 return '%s:%s' % (kind, pat) |
|
115 |
|
116 def getpattern(x, allkinds, err): |
|
117 if x and x[0] == 'kindpat': |
|
118 return getkindpat(x[1], x[2], allkinds, err) |
|
119 return getstring(x, err) |
|
120 |
|
121 def getlist(x): |
|
122 if not x: |
|
123 return [] |
|
124 if x[0] == 'list': |
|
125 return list(x[1:]) |
|
126 return [x] |
|
127 |
|
128 def getargs(x, min, max, err): |
|
129 l = getlist(x) |
|
130 if len(l) < min or len(l) > max: |
|
131 raise error.ParseError(err) |
|
132 return l |
|
133 |
|
134 def prettyformat(tree): |
|
135 return parser.prettyformat(tree, ('string', 'symbol')) |