comparison mercurial/branchmap.py @ 51554:82c1a388e86a

branchcache: explicitly track inheritence "state" We move from a binary "dirty" flag to a three value "state": "clean", "inherited", "dirty". The "inherited" means that the branch cache is not only "clean", but it is a duplicate of its parent filter. If a branch cache is "inherited", we can non only skip writing its value on disk, but it is a good idea to delete any stale value on disk, as those will just waste time (and possibly induce bug) in the future. We only do this in the update related to transaction or explicit cache update (e.g `hg debugupdatecache`). Deleting the file when we simply detected a stall cache during a read only operation seems more dangerous. We rename `copy` to `inherit_for` to clarify we associate a stronger semantic to the operation.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sun, 10 Mar 2024 05:10:00 +0100
parents 2e8a88e5809f
children 54f0dd798346
comparison
equal deleted inserted replaced
51553:2e8a88e5809f 51554:82c1a388e86a
83 bcache = self._per_filter[repo.filtername] 83 bcache = self._per_filter[repo.filtername]
84 assert bcache._filtername == repo.filtername, ( 84 assert bcache._filtername == repo.filtername, (
85 bcache._filtername, 85 bcache._filtername,
86 repo.filtername, 86 repo.filtername,
87 ) 87 )
88 if bcache._dirty: 88 bcache.sync_disk(repo)
89 bcache.write(repo)
90 89
91 def updatecache(self, repo): 90 def updatecache(self, repo):
92 """Update the cache for the given filtered view on a repository""" 91 """Update the cache for the given filtered view on a repository"""
93 # This can trigger updates for the caches for subsets of the filtered 92 # This can trigger updates for the caches for subsets of the filtered
94 # view, e.g. when there is no cache for this filtered view or the cache 93 # view, e.g. when there is no cache for this filtered view or the cache
107 # the cache for a subset, then extend that to add info on missing 106 # the cache for a subset, then extend that to add info on missing
108 # revisions. 107 # revisions.
109 subsetname = subsettable.get(filtername) 108 subsetname = subsettable.get(filtername)
110 if subsetname is not None: 109 if subsetname is not None:
111 subset = repo.filtered(subsetname) 110 subset = repo.filtered(subsetname)
112 bcache = self[subset].copy(repo) 111 bcache = self[subset].inherit_for(repo)
113 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
114 revs.extend(r for r in extrarevs if r <= bcache.tiprev) 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
115 else: 114 else:
116 # nothing to fall back on, start empty. 115 # nothing to fall back on, start empty.
117 bcache = branchcache(repo) 116 bcache = branchcache(repo)
158 for candidate in (b'base', b'immutable', b'served'): 157 for candidate in (b'base', b'immutable', b'served'):
159 rview = repo.filtered(candidate) 158 rview = repo.filtered(candidate)
160 if cache.validfor(rview): 159 if cache.validfor(rview):
161 cache._filtername = candidate 160 cache._filtername = candidate
162 self._per_filter[candidate] = cache 161 self._per_filter[candidate] = cache
163 cache._dirty = True 162 cache._state = STATE_DIRTY
164 cache.write(rview) 163 cache.write(rview)
165 return 164 return
166 165
167 def clear(self): 166 def clear(self):
168 self._per_filter.clear() 167 self._per_filter.clear()
171 unfi = repo.unfiltered() 170 unfi = repo.unfiltered()
172 for filtername in repoviewutil.get_ordered_subset(): 171 for filtername in repoviewutil.get_ordered_subset():
173 cache = self._per_filter.get(filtername) 172 cache = self._per_filter.get(filtername)
174 if cache is None: 173 if cache is None:
175 continue 174 continue
176 if cache._dirty: 175 if filtername is None:
177 if filtername is None: 176 repo = unfi
178 repo = unfi 177 else:
179 else: 178 repo = unfi.filtered(filtername)
180 repo = unfi.filtered(filtername) 179 cache.sync_disk(repo)
181 cache.write(repo)
182 180
183 181
184 def _unknownnode(node): 182 def _unknownnode(node):
185 """raises ValueError when branchcache found a node which does not exists""" 183 """raises ValueError when branchcache found a node which does not exists"""
186 raise ValueError('node %s does not exist' % node.hex()) 184 raise ValueError('node %s does not exist' % node.hex())
412 duration, 410 duration,
413 ) 411 )
414 return max_rev 412 return max_rev
415 413
416 414
415 STATE_CLEAN = 1
416 STATE_INHERITED = 2
417 STATE_DIRTY = 3
418
419
417 class branchcache(_BaseBranchCache): 420 class branchcache(_BaseBranchCache):
418 """Branchmap info for a local repo or repoview""" 421 """Branchmap info for a local repo or repoview"""
419 422
420 _base_filename = b"branch2" 423 _base_filename = b"branch2"
421 424
429 tiprev: Optional[int] = nullrev, 432 tiprev: Optional[int] = nullrev,
430 filteredhash: Optional[bytes] = None, 433 filteredhash: Optional[bytes] = None,
431 closednodes: Optional[Set[bytes]] = None, 434 closednodes: Optional[Set[bytes]] = None,
432 hasnode: Optional[Callable[[bytes], bool]] = None, 435 hasnode: Optional[Callable[[bytes], bool]] = None,
433 verify_node: bool = False, 436 verify_node: bool = False,
437 inherited: bool = False,
434 ) -> None: 438 ) -> None:
435 """hasnode is a function which can be used to verify whether changelog 439 """hasnode is a function which can be used to verify whether changelog
436 has a given node or not. If it's not provided, we assume that every node 440 has a given node or not. If it's not provided, we assume that every node
437 we have exists in changelog""" 441 we have exists in changelog"""
438 self._filtername = repo.filtername 442 self._filtername = repo.filtername
440 self.tipnode = repo.nullid 444 self.tipnode = repo.nullid
441 else: 445 else:
442 self.tipnode = tipnode 446 self.tipnode = tipnode
443 self.tiprev = tiprev 447 self.tiprev = tiprev
444 self.filteredhash = filteredhash 448 self.filteredhash = filteredhash
445 self._dirty = False 449 self._state = STATE_CLEAN
450 if inherited:
451 self._state = STATE_INHERITED
446 452
447 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) 453 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
448 # closednodes is a set of nodes that close their branch. If the branch 454 # closednodes is a set of nodes that close their branch. If the branch
449 # cache has been updated, it may contain nodes that are no longer 455 # cache has been updated, it may contain nodes that are no longer
450 # heads. 456 # heads.
553 filename = cls._base_filename 559 filename = cls._base_filename
554 if repo.filtername: 560 if repo.filtername:
555 filename = b'%s-%s' % (filename, repo.filtername) 561 filename = b'%s-%s' % (filename, repo.filtername)
556 return filename 562 return filename
557 563
558 def copy(self, repo): 564 def inherit_for(self, repo):
559 """return a deep copy of the branchcache object""" 565 """return a deep copy of the branchcache object"""
560 assert repo.filtername != self._filtername 566 assert repo.filtername != self._filtername
561 other = type(self)( 567 other = type(self)(
562 repo=repo, 568 repo=repo,
563 # we always do a shally copy of self._entries, and the values is 569 # we always do a shally copy of self._entries, and the values is
567 tipnode=self.tipnode, 573 tipnode=self.tipnode,
568 tiprev=self.tiprev, 574 tiprev=self.tiprev,
569 filteredhash=self.filteredhash, 575 filteredhash=self.filteredhash,
570 closednodes=set(self._closednodes), 576 closednodes=set(self._closednodes),
571 verify_node=self._verify_node, 577 verify_node=self._verify_node,
578 inherited=True,
572 ) 579 )
573 # also copy information about the current verification state 580 # also copy information about the current verification state
574 other._verifiedbranches = set(self._verifiedbranches) 581 other._verifiedbranches = set(self._verifiedbranches)
575 return other 582 return other
583
584 def sync_disk(self, repo):
585 """synchronise the on disk file with the cache state
586
587 If new value specific to this filter level need to be written, the file
588 will be updated, if the state of the branchcache is inherited from a
589 subset, any stalled on disk file will be deleted.
590
591 That method does nothing if there is nothing to do.
592 """
593 if self._state == STATE_DIRTY:
594 self.write(repo)
595 elif self._state == STATE_INHERITED:
596 filename = self._filename(repo)
597 repo.cachevfs.tryunlink(filename)
576 598
577 def write(self, repo): 599 def write(self, repo):
578 assert self._filtername == repo.filtername, ( 600 assert self._filtername == repo.filtername, (
579 self._filtername, 601 self._filtername,
580 repo.filtername, 602 repo.filtername,
581 ) 603 )
604 assert self._state == STATE_DIRTY, self._state
582 tr = repo.currenttransaction() 605 tr = repo.currenttransaction()
583 if not getattr(tr, 'finalized', True): 606 if not getattr(tr, 'finalized', True):
584 # Avoid premature writing. 607 # Avoid premature writing.
585 # 608 #
586 # (The cache warming setup by localrepo will update the file later.) 609 # (The cache warming setup by localrepo will update the file later.)
595 b'wrote %s with %d labels and %d nodes\n', 618 b'wrote %s with %d labels and %d nodes\n',
596 _branchcachedesc(repo), 619 _branchcachedesc(repo),
597 len(self._entries), 620 len(self._entries),
598 nodecount, 621 nodecount,
599 ) 622 )
600 self._dirty = False 623 self._state = STATE_CLEAN
601 except (IOError, OSError, error.Abort) as inst: 624 except (IOError, OSError, error.Abort) as inst:
602 # Abort may be raised by read only opener, so log and continue 625 # Abort may be raised by read only opener, so log and continue
603 repo.ui.debug( 626 repo.ui.debug(
604 b"couldn't write branch cache: %s\n" 627 b"couldn't write branch cache: %s\n"
605 % stringutil.forcebytestr(inst) 628 % stringutil.forcebytestr(inst)
705 self.tipnode = cl.node(tiprev) 728 self.tipnode = cl.node(tiprev)
706 self.tiprev = tiprev 729 self.tiprev = tiprev
707 self.filteredhash = scmutil.filteredhash( 730 self.filteredhash = scmutil.filteredhash(
708 repo, self.tiprev, needobsolete=True 731 repo, self.tiprev, needobsolete=True
709 ) 732 )
710 self._dirty = True 733 self._state = STATE_DIRTY
711 self.write(repo) 734 self.write(repo)
712 735
713 736
714 class remotebranchcache(_BaseBranchCache): 737 class remotebranchcache(_BaseBranchCache):
715 """Branchmap info for a remote connection, should not write locally""" 738 """Branchmap info for a remote connection, should not write locally"""