Mercurial > public > mercurial-scm > hg-stable
diff mercurial/match.py @ 16943:8d08a28aa63e
matcher: use re2 bindings if available
There are two sets of Python re2 bindings available on the internet;
this code works with both.
Using re2 can greatly improve "hg status" performance when a .hgignore
file becomes even modestly complex.
Example: "hg status" on a clean tree with 134K files, where "hg
debugignore" reports a regexp 4256 bytes in size.
no .hgignore: 1.76 sec
Python re: 2.79
re2: 1.82
The overhead of regexp matching drops from 1.03 seconds with stock
re to 0.06 with re2.
(For comparison, a git repo with the same contents and .gitignore
file runs "git status -s" in 1.71 seconds, i.e. only slightly faster
than hg with re2.)
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Fri, 01 Jun 2012 15:26:20 -0700 |
parents | 977c80123835 |
children | e95ec38f86b0 |
line wrap: on
line diff
--- a/mercurial/match.py Thu Jun 07 01:42:50 2012 +0200 +++ b/mercurial/match.py Fri Jun 01 15:26:20 2012 -0700 @@ -9,6 +9,14 @@ import scmutil, util, fileset from i18n import _ +def _rematcher(pat): + m = util.compilere(pat) + try: + # slightly faster, provided by facebook's re2 bindings + return m.test_match + except AttributeError: + return m.match + def _expandsets(pats, ctx): '''convert set: patterns into a list of files in the given context''' fset = set() @@ -280,7 +288,7 @@ pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats]) if len(pat) > 20000: raise OverflowError - return pat, re.compile(pat).match + return pat, _rematcher(pat) except OverflowError: # We're using a Python with a tiny regex engine and we # made it explode, so we'll divide the pattern list in two @@ -294,7 +302,7 @@ except re.error: for k, p in pats: try: - re.compile('(?:%s)' % _regex(k, p, tail)) + _rematcher('(?:%s)' % _regex(k, p, tail)) except re.error: raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) raise util.Abort(_("invalid pattern"))