Mercurial > public > mercurial-scm > hg
diff mercurial/util.py @ 8570:7fe2012b3bd0
match: move util match functions over
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Sun, 24 May 2009 02:56:14 -0500 |
parents | 4fadac101818 |
children | 573734e7e6d0 |
line wrap: on
line diff
--- a/mercurial/util.py Sun May 24 02:56:14 2009 -0500 +++ b/mercurial/util.py Sun May 24 02:56:14 2009 -0500 @@ -207,67 +207,6 @@ def always(fn): return True def never(fn): return False -def _patsplit(pat, default): - """Split a string into an optional pattern kind prefix and the - actual pattern.""" - for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre': - if pat.startswith(prefix + ':'): return pat.split(':', 1) - return default, pat - -def _globre(pat, head='^', tail='$'): - "convert a glob pattern into a regexp" - i, n = 0, len(pat) - res = '' - group = 0 - def peek(): return i < n and pat[i] - while i < n: - c = pat[i] - i = i+1 - if c == '*': - if peek() == '*': - i += 1 - res += '.*' - else: - res += '[^/]*' - elif c == '?': - res += '.' - elif c == '[': - j = i - if j < n and pat[j] in '!]': - j += 1 - while j < n and pat[j] != ']': - j += 1 - if j >= n: - res += '\\[' - else: - stuff = pat[i:j].replace('\\','\\\\') - i = j + 1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] == '^': - stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) - elif c == '{': - group += 1 - res += '(?:' - elif c == '}' and group: - res += ')' - group -= 1 - elif c == ',' and group: - res += '|' - elif c == '\\': - p = peek() - if p: - i += 1 - res += re.escape(p) - else: - res += re.escape(c) - else: - res += re.escape(c) - return head + res + tail - -_globchars = set('[{*?') - def pathto(root, n1, n2): '''return the relative path from one place to another. root should use os.sep to separate directories @@ -342,140 +281,6 @@ raise Abort('%s not under root' % myname) -def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'): - """build a function to match a set of file patterns - - arguments: - canonroot - the canonical root of the tree you're matching against - cwd - the current working directory, if relevant - names - patterns to find - inc - patterns to include - exc - patterns to exclude - dflt_pat - if a pattern in names has no explicit type, assume this one - - a pattern is one of: - 'glob:<glob>' - a glob relative to cwd - 're:<regexp>' - a regular expression - 'path:<path>' - a path relative to canonroot - 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) - 'relpath:<path>' - a path relative to cwd - 'relre:<regexp>' - a regexp that doesn't have to match the start of a name - '<something>' - one of the cases above, selected by the dflt_pat argument - - returns: - a 3-tuple containing - - list of roots (places where one should start a recursive walk of the fs); - this often matches the explicit non-pattern names passed in, but also - includes the initial part of glob: patterns that has no glob characters - - a bool match(filename) function - - a bool indicating if any patterns were passed in - """ - - # a common case: no patterns at all - if not names and not inc and not exc: - return [], always, False - - def contains_glob(name): - for c in name: - if c in _globchars: return True - return False - - def regex(kind, name, tail): - '''convert a pattern into a regular expression''' - if not name: - return '' - if kind == 're': - return name - elif kind == 'path': - return '^' + re.escape(name) + '(?:/|$)' - elif kind == 'relglob': - return _globre(name, '(?:|.*/)', tail) - elif kind == 'relpath': - return re.escape(name) + '(?:/|$)' - elif kind == 'relre': - if name.startswith('^'): - return name - return '.*' + name - return _globre(name, '', tail) - - def matchfn(pats, tail): - """build a matching function from a set of patterns""" - if not pats: - return - try: - pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats]) - if len(pat) > 20000: - raise OverflowError() - return re.compile(pat).match - except OverflowError: - # We're using a Python with a tiny regex engine and we - # made it explode, so we'll divide the pattern list in two - # until it works - l = len(pats) - if l < 2: - raise - a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail) - return lambda s: a(s) or b(s) - except re.error: - for k, p in pats: - try: - re.compile('(?:%s)' % regex(k, p, tail)) - except re.error: - raise Abort("invalid pattern (%s): %s" % (k, p)) - raise Abort("invalid pattern") - - def globprefix(pat): - '''return the non-glob prefix of a path, e.g. foo/* -> foo''' - root = [] - for p in pat.split('/'): - if contains_glob(p): break - root.append(p) - return '/'.join(root) or '.' - - def normalizepats(names, default): - pats = [] - roots = [] - anypats = False - for kind, name in [_patsplit(p, default) for p in names]: - if kind in ('glob', 'relpath'): - name = canonpath(canonroot, cwd, name) - elif kind in ('relglob', 'path'): - name = normpath(name) - - pats.append((kind, name)) - - if kind in ('glob', 're', 'relglob', 'relre'): - anypats = True - - if kind == 'glob': - root = globprefix(name) - roots.append(root) - elif kind in ('relpath', 'path'): - roots.append(name or '.') - elif kind == 'relglob': - roots.append('.') - return roots, pats, anypats - - roots, pats, anypats = normalizepats(names, dflt_pat) - - patmatch = matchfn(pats, '$') or always - incmatch = always - if inc: - dummy, inckinds, dummy = normalizepats(inc, 'glob') - incmatch = matchfn(inckinds, '(?:/|$)') - excmatch = never - if exc: - dummy, exckinds, dummy = normalizepats(exc, 'glob') - excmatch = matchfn(exckinds, '(?:/|$)') - - if not names and inc and not exc: - # common case: hgignore patterns - match = incmatch - else: - match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn) - - return (roots, match, (inc or exc or anypats) and True) - _hgexecutable = None def main_is_frozen():