comparison mercurial/revlog.py @ 49415:5fe7e9eda0f3

revlog: make _partialmatch fail fast on almost-hex inputs Before this change, resolving a revision like [0123456789^] on a large repo can take multiple seconds because: - hg does not realize this is a revset, so it tries various things, including _partialmatch(b"0123456789^") - after the rust lookup fails, it falls back to pure hg - pure hg takes all-but-last chars and converts them to binary, which *succeeds*, so it does the expensive part.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 15 Aug 2022 16:12:41 +0100
parents 2e726c934fcd
children 5846bc8a2855
comparison
equal deleted inserted replaced
49414:3c5d0f879404 49415:5fe7e9eda0f3
233 FILE_TOO_SHORT_MSG = _( 233 FILE_TOO_SHORT_MSG = _(
234 b'cannot read from revlog %s;' 234 b'cannot read from revlog %s;'
235 b' expected %d bytes from offset %d, data size is %d' 235 b' expected %d bytes from offset %d, data size is %d'
236 ) 236 )
237 237
238 hexdigits = b'0123456789abcdefABCDEF'
239
238 240
239 class revlog: 241 class revlog:
240 """ 242 """
241 the underlying revision storage object 243 the underlying revision storage object
242 244
1507 # fast path: for unfiltered changelog, radix tree is accurate 1509 # fast path: for unfiltered changelog, radix tree is accurate
1508 if not getattr(self, 'filteredrevs', None): 1510 if not getattr(self, 'filteredrevs', None):
1509 ambiguous = True 1511 ambiguous = True
1510 # fall through to slow path that filters hidden revisions 1512 # fall through to slow path that filters hidden revisions
1511 except (AttributeError, ValueError): 1513 except (AttributeError, ValueError):
1512 # we are pure python, or key was too short to search radix tree 1514 # we are pure python, or key is not hex
1513 pass 1515 pass
1514 if ambiguous: 1516 if ambiguous:
1515 raise error.AmbiguousPrefixLookupError( 1517 raise error.AmbiguousPrefixLookupError(
1516 id, self.display_id, _(b'ambiguous identifier') 1518 id, self.display_id, _(b'ambiguous identifier')
1517 ) 1519 )
1521 1523
1522 if len(id) <= 40: 1524 if len(id) <= 40:
1523 # hex(node)[:...] 1525 # hex(node)[:...]
1524 l = len(id) // 2 * 2 # grab an even number of digits 1526 l = len(id) // 2 * 2 # grab an even number of digits
1525 try: 1527 try:
1528 # we're dropping the last digit, so let's check that it's hex,
1529 # to avoid the expensive computation below if it's not
1530 if len(id) % 2 > 0:
1531 if not (id[-1] in hexdigits):
1532 return None
1526 prefix = bin(id[:l]) 1533 prefix = bin(id[:l])
1527 except binascii.Error: 1534 except binascii.Error:
1528 pass 1535 pass
1529 else: 1536 else:
1530 nl = [e[7] for e in self.index if e[7].startswith(prefix)] 1537 nl = [e[7] for e in self.index if e[7].startswith(prefix)]