Mercurial > public > mercurial-scm > hg
comparison mercurial/util.py @ 30773:c390b40fe1d7
util: teach stringmatcher to handle forced case insensitive matches
The 'author' and 'desc' revsets are documented to be case insensitive.
Unfortunately, this was implemented in 'author' by forcing the input to
lowercase, including for regex like '\B'. (This actually inverts the meaning of
the sequence.) For backward compatibility, we will keep that a case insensitive
regex, but by using matcher options instead of brute force.
This doesn't preclude future hypothetical 'icase-literal:' style prefixes that
can be provided by the user. Such user specified cases can probably be handled
up front by stripping 'icase-', setting the variable, and letting it drop
through the existing code.
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Wed, 11 Jan 2017 21:47:19 -0500 |
parents | 7283719e2bfd |
children | 31e1f0d4ab44 |
comparison
equal
deleted
inserted
replaced
30772:b1012cb1bec3 | 30773:c390b40fe1d7 |
---|---|
1994 return lambda x: x >= start and x <= stop | 1994 return lambda x: x >= start and x <= stop |
1995 else: | 1995 else: |
1996 start, stop = lower(date), upper(date) | 1996 start, stop = lower(date), upper(date) |
1997 return lambda x: x >= start and x <= stop | 1997 return lambda x: x >= start and x <= stop |
1998 | 1998 |
1999 def stringmatcher(pattern): | 1999 def stringmatcher(pattern, casesensitive=True): |
2000 """ | 2000 """ |
2001 accepts a string, possibly starting with 're:' or 'literal:' prefix. | 2001 accepts a string, possibly starting with 're:' or 'literal:' prefix. |
2002 returns the matcher name, pattern, and matcher function. | 2002 returns the matcher name, pattern, and matcher function. |
2003 missing or unknown prefixes are treated as literal matches. | 2003 missing or unknown prefixes are treated as literal matches. |
2004 | 2004 |
2005 helper for tests: | 2005 helper for tests: |
2006 >>> def test(pattern, *tests): | 2006 >>> def test(pattern, *tests): |
2007 ... kind, pattern, matcher = stringmatcher(pattern) | 2007 ... kind, pattern, matcher = stringmatcher(pattern) |
2008 ... return (kind, pattern, [bool(matcher(t)) for t in tests]) | 2008 ... return (kind, pattern, [bool(matcher(t)) for t in tests]) |
2009 >>> def itest(pattern, *tests): | |
2010 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) | |
2011 ... return (kind, pattern, [bool(matcher(t)) for t in tests]) | |
2009 | 2012 |
2010 exact matching (no prefix): | 2013 exact matching (no prefix): |
2011 >>> test('abcdefg', 'abc', 'def', 'abcdefg') | 2014 >>> test('abcdefg', 'abc', 'def', 'abcdefg') |
2012 ('literal', 'abcdefg', [False, False, True]) | 2015 ('literal', 'abcdefg', [False, False, True]) |
2013 | 2016 |
2020 ('literal', 're:foobar', [False, True]) | 2023 ('literal', 're:foobar', [False, True]) |
2021 | 2024 |
2022 unknown prefixes are ignored and treated as literals | 2025 unknown prefixes are ignored and treated as literals |
2023 >>> test('foo:bar', 'foo', 'bar', 'foo:bar') | 2026 >>> test('foo:bar', 'foo', 'bar', 'foo:bar') |
2024 ('literal', 'foo:bar', [False, False, True]) | 2027 ('literal', 'foo:bar', [False, False, True]) |
2028 | |
2029 case insensitive regex matches | |
2030 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar') | |
2031 ('re', 'A.+b', [False, False, True]) | |
2032 | |
2033 case insensitive literal matches | |
2034 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg') | |
2035 ('literal', 'ABCDEFG', [False, False, True]) | |
2025 """ | 2036 """ |
2026 if pattern.startswith('re:'): | 2037 if pattern.startswith('re:'): |
2027 pattern = pattern[3:] | 2038 pattern = pattern[3:] |
2028 try: | 2039 try: |
2029 regex = remod.compile(pattern) | 2040 flags = 0 |
2041 if not casesensitive: | |
2042 flags = remod.I | |
2043 regex = remod.compile(pattern, flags) | |
2030 except remod.error as e: | 2044 except remod.error as e: |
2031 raise error.ParseError(_('invalid regular expression: %s') | 2045 raise error.ParseError(_('invalid regular expression: %s') |
2032 % e) | 2046 % e) |
2033 return 're', pattern, regex.search | 2047 return 're', pattern, regex.search |
2034 elif pattern.startswith('literal:'): | 2048 elif pattern.startswith('literal:'): |
2035 pattern = pattern[8:] | 2049 pattern = pattern[8:] |
2036 return 'literal', pattern, pattern.__eq__ | 2050 |
2051 match = pattern.__eq__ | |
2052 | |
2053 if not casesensitive: | |
2054 ipat = encoding.lower(pattern) | |
2055 match = lambda s: ipat == encoding.lower(s) | |
2056 return 'literal', pattern, match | |
2037 | 2057 |
2038 def shortuser(user): | 2058 def shortuser(user): |
2039 """Return a short representation of a user name or email address.""" | 2059 """Return a short representation of a user name or email address.""" |
2040 f = user.find('@') | 2060 f = user.find('@') |
2041 if f >= 0: | 2061 if f >= 0: |