diff mercurial/util.py @ 16943:8d08a28aa63e

matcher: use re2 bindings if available There are two sets of Python re2 bindings available on the internet; this code works with both. Using re2 can greatly improve "hg status" performance when a .hgignore file becomes even modestly complex. Example: "hg status" on a clean tree with 134K files, where "hg debugignore" reports a regexp 4256 bytes in size. no .hgignore: 1.76 sec Python re: 2.79 re2: 1.82 The overhead of regexp matching drops from 1.03 seconds with stock re to 0.06 with re2. (For comparison, a git repo with the same contents and .gitignore file runs "git status -s" in 1.71 seconds, i.e. only slightly faster than hg with re2.)
author Bryan O'Sullivan <bryano@fb.com>
date Fri, 01 Jun 2012 15:26:20 -0700
parents 37e081609828
children 0cb55b5c19a3
line wrap: on
line diff
--- a/mercurial/util.py	Thu Jun 07 01:42:50 2012 +0200
+++ b/mercurial/util.py	Fri Jun 01 15:26:20 2012 -0700
@@ -629,6 +629,30 @@
     except OSError:
         return True
 
+try:
+    import re2
+    _re2 = None
+except ImportError:
+    _re2 = False
+
+def compilere(pat):
+    '''Compile a regular expression, using re2 if possible
+
+    For best performance, use only re2-compatible regexp features.'''
+    global _re2
+    if _re2 is None:
+        try:
+            re2.compile
+            _re2 = True
+        except ImportError:
+            _re2 = False
+    if _re2:
+        try:
+            return re2.compile(pat)
+        except re2.error:
+            pass
+    return re.compile(pat)
+
 _fspathcache = {}
 def fspath(name, root):
     '''Get name in the case stored in the filesystem