mercurial/filesetlang.py
changeset 38805 b9162ea1b815
parent 38804 d82c4d42b615
child 38826 6371ab78c3b3
equal deleted inserted replaced
38804:d82c4d42b615 38805:b9162ea1b815
       
     1 # filesetlang.py - parser, tokenizer and utility for file set language
       
     2 #
       
     3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from __future__ import absolute_import
       
     9 
       
    10 from .i18n import _
       
    11 from . import (
       
    12     error,
       
    13     parser,
       
    14     pycompat,
       
    15 )
       
    16 
       
    17 elements = {
       
    18     # token-type: binding-strength, primary, prefix, infix, suffix
       
    19     "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
       
    20     ":": (15, None, None, ("kindpat", 15), None),
       
    21     "-": (5, None, ("negate", 19), ("minus", 5), None),
       
    22     "not": (10, None, ("not", 10), None, None),
       
    23     "!": (10, None, ("not", 10), None, None),
       
    24     "and": (5, None, None, ("and", 5), None),
       
    25     "&": (5, None, None, ("and", 5), None),
       
    26     "or": (4, None, None, ("or", 4), None),
       
    27     "|": (4, None, None, ("or", 4), None),
       
    28     "+": (4, None, None, ("or", 4), None),
       
    29     ",": (2, None, None, ("list", 2), None),
       
    30     ")": (0, None, None, None, None),
       
    31     "symbol": (0, "symbol", None, None, None),
       
    32     "string": (0, "string", None, None, None),
       
    33     "end": (0, None, None, None, None),
       
    34 }
       
    35 
       
    36 keywords = {'and', 'or', 'not'}
       
    37 
       
    38 symbols = {}
       
    39 
       
    40 globchars = ".*{}[]?/\\_"
       
    41 
       
    42 def tokenize(program):
       
    43     pos, l = 0, len(program)
       
    44     program = pycompat.bytestr(program)
       
    45     while pos < l:
       
    46         c = program[pos]
       
    47         if c.isspace(): # skip inter-token whitespace
       
    48             pass
       
    49         elif c in "(),-:|&+!": # handle simple operators
       
    50             yield (c, None, pos)
       
    51         elif (c in '"\'' or c == 'r' and
       
    52               program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
       
    53             if c == 'r':
       
    54                 pos += 1
       
    55                 c = program[pos]
       
    56                 decode = lambda x: x
       
    57             else:
       
    58                 decode = parser.unescapestr
       
    59             pos += 1
       
    60             s = pos
       
    61             while pos < l: # find closing quote
       
    62                 d = program[pos]
       
    63                 if d == '\\': # skip over escaped characters
       
    64                     pos += 2
       
    65                     continue
       
    66                 if d == c:
       
    67                     yield ('string', decode(program[s:pos]), s)
       
    68                     break
       
    69                 pos += 1
       
    70             else:
       
    71                 raise error.ParseError(_("unterminated string"), s)
       
    72         elif c.isalnum() or c in globchars or ord(c) > 127:
       
    73             # gather up a symbol/keyword
       
    74             s = pos
       
    75             pos += 1
       
    76             while pos < l: # find end of symbol
       
    77                 d = program[pos]
       
    78                 if not (d.isalnum() or d in globchars or ord(d) > 127):
       
    79                     break
       
    80                 pos += 1
       
    81             sym = program[s:pos]
       
    82             if sym in keywords: # operator keywords
       
    83                 yield (sym, None, s)
       
    84             else:
       
    85                 yield ('symbol', sym, s)
       
    86             pos -= 1
       
    87         else:
       
    88             raise error.ParseError(_("syntax error"), pos)
       
    89         pos += 1
       
    90     yield ('end', None, pos)
       
    91 
       
    92 def parse(expr):
       
    93     p = parser.parser(elements)
       
    94     tree, pos = p.parse(tokenize(expr))
       
    95     if pos != len(expr):
       
    96         raise error.ParseError(_("invalid token"), pos)
       
    97     return parser.simplifyinfixops(tree, {'list', 'or'})
       
    98 
       
    99 def getsymbol(x):
       
   100     if x and x[0] == 'symbol':
       
   101         return x[1]
       
   102     raise error.ParseError(_('not a symbol'))
       
   103 
       
   104 def getstring(x, err):
       
   105     if x and (x[0] == 'string' or x[0] == 'symbol'):
       
   106         return x[1]
       
   107     raise error.ParseError(err)
       
   108 
       
   109 def getkindpat(x, y, allkinds, err):
       
   110     kind = getsymbol(x)
       
   111     pat = getstring(y, err)
       
   112     if kind not in allkinds:
       
   113         raise error.ParseError(_("invalid pattern kind: %s") % kind)
       
   114     return '%s:%s' % (kind, pat)
       
   115 
       
   116 def getpattern(x, allkinds, err):
       
   117     if x and x[0] == 'kindpat':
       
   118         return getkindpat(x[1], x[2], allkinds, err)
       
   119     return getstring(x, err)
       
   120 
       
   121 def getlist(x):
       
   122     if not x:
       
   123         return []
       
   124     if x[0] == 'list':
       
   125         return list(x[1:])
       
   126     return [x]
       
   127 
       
   128 def getargs(x, min, max, err):
       
   129     l = getlist(x)
       
   130     if len(l) < min or len(l) > max:
       
   131         raise error.ParseError(err)
       
   132     return l
       
   133 
       
   134 def prettyformat(tree):
       
   135     return parser.prettyformat(tree, ('string', 'symbol'))