Mercurial > public > mercurial-scm > hg
comparison mercurial/merge.py @ 35171:b85962350bb3 stable
merge: cache unknown dir checks (issue5716)
As mentioned in D1222, the recent pathconflicts change regresses update
performance in large repositories when many files are being updated.
To mitigate this, we introduce two caches of directories that have
already found to be either:
- unknown directories, but which are not aliased by files and
so don't need to be checked if they are files again; and
- missing directores, which cannot cause path conflicts, and
cannot contain a file that causes a path conflict.
When checking the paths of a file, testing against this caches means we can
skip tests that involve touching the filesystem.
Differential Revision: https://phab.mercurial-scm.org/D1224
author | Mark Thomas <mbthomas@fb.com> |
---|---|
date | Fri, 24 Nov 2017 12:53:58 -0800 |
parents | 37450a122128 |
children | a92b9f8e11ba |
comparison
equal
deleted
inserted
replaced
35170:c9740b69b9b7 | 35171:b85962350bb3 |
---|---|
651 return (repo.wvfs.audit.check(f) | 651 return (repo.wvfs.audit.check(f) |
652 and repo.wvfs.isfileorlink(f) | 652 and repo.wvfs.isfileorlink(f) |
653 and repo.dirstate.normalize(f) not in repo.dirstate | 653 and repo.dirstate.normalize(f) not in repo.dirstate |
654 and mctx[f2].cmp(wctx[f])) | 654 and mctx[f2].cmp(wctx[f])) |
655 | 655 |
656 def _checkunknowndirs(repo, f): | 656 class _unknowndirschecker(object): |
657 """ | 657 """ |
658 Look for any unknown files or directories that may have a path conflict | 658 Look for any unknown files or directories that may have a path conflict |
659 with a file. If any path prefix of the file exists as a file or link, | 659 with a file. If any path prefix of the file exists as a file or link, |
660 then it conflicts. If the file itself is a directory that contains any | 660 then it conflicts. If the file itself is a directory that contains any |
661 file that is not tracked, then it conflicts. | 661 file that is not tracked, then it conflicts. |
662 | 662 |
663 Returns the shortest path at which a conflict occurs, or None if there is | 663 Returns the shortest path at which a conflict occurs, or None if there is |
664 no conflict. | 664 no conflict. |
665 """ | 665 """ |
666 | 666 def __init__(self): |
667 # Check for path prefixes that exist as unknown files. | 667 # A set of paths known to be good. This prevents repeated checking of |
668 for p in reversed(list(util.finddirs(f))): | 668 # dirs. It will be updated with any new dirs that are checked and found |
669 if (repo.wvfs.audit.check(p) | 669 # to be safe. |
670 and repo.wvfs.isfileorlink(p) | 670 self._unknowndircache = set() |
671 and repo.dirstate.normalize(p) not in repo.dirstate): | 671 |
672 return p | 672 # A set of paths that are known to be absent. This prevents repeated |
673 | 673 # checking of subdirectories that are known not to exist. It will be |
674 # Check if the file conflicts with a directory containing unknown files. | 674 # updated with any new dirs that are checked and found to be absent. |
675 if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): | 675 self._missingdircache = set() |
676 # Does the directory contain any files that are not in the dirstate? | 676 |
677 for p, dirs, files in repo.wvfs.walk(f): | 677 def __call__(self, repo, f): |
678 for fn in files: | 678 # Check for path prefixes that exist as unknown files. |
679 relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) | 679 for p in reversed(list(util.finddirs(f))): |
680 if relf not in repo.dirstate: | 680 if p in self._missingdircache: |
681 return f | 681 return |
682 return None | 682 if p in self._unknowndircache: |
683 continue | |
684 if repo.wvfs.audit.check(p): | |
685 if (repo.wvfs.isfileorlink(p) | |
686 and repo.dirstate.normalize(p) not in repo.dirstate): | |
687 return p | |
688 if not repo.wvfs.lexists(p): | |
689 self._missingdircache.add(p) | |
690 return | |
691 self._unknowndircache.add(p) | |
692 | |
693 # Check if the file conflicts with a directory containing unknown files. | |
694 if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f): | |
695 # Does the directory contain any files that are not in the dirstate? | |
696 for p, dirs, files in repo.wvfs.walk(f): | |
697 for fn in files: | |
698 relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn)) | |
699 if relf not in repo.dirstate: | |
700 return f | |
701 return None | |
683 | 702 |
684 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): | 703 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce): |
685 """ | 704 """ |
686 Considers any actions that care about the presence of conflicting unknown | 705 Considers any actions that care about the presence of conflicting unknown |
687 files. For some actions, the result is to abort; for others, it is to | 706 files. For some actions, the result is to abort; for others, it is to |
699 if config == 'abort': | 718 if config == 'abort': |
700 abortconflicts.update(conflicts) | 719 abortconflicts.update(conflicts) |
701 elif config == 'warn': | 720 elif config == 'warn': |
702 warnconflicts.update(conflicts) | 721 warnconflicts.update(conflicts) |
703 | 722 |
723 checkunknowndirs = _unknowndirschecker() | |
704 for f, (m, args, msg) in actions.iteritems(): | 724 for f, (m, args, msg) in actions.iteritems(): |
705 if m in ('c', 'dc'): | 725 if m in ('c', 'dc'): |
706 if _checkunknownfile(repo, wctx, mctx, f): | 726 if _checkunknownfile(repo, wctx, mctx, f): |
707 fileconflicts.add(f) | 727 fileconflicts.add(f) |
708 elif pathconfig and f not in wctx: | 728 elif pathconfig and f not in wctx: |
709 path = _checkunknowndirs(repo, f) | 729 path = checkunknowndirs(repo, f) |
710 if path is not None: | 730 if path is not None: |
711 pathconflicts.add(path) | 731 pathconflicts.add(path) |
712 elif m == 'dg': | 732 elif m == 'dg': |
713 if _checkunknownfile(repo, wctx, mctx, f, args[0]): | 733 if _checkunknownfile(repo, wctx, mctx, f, args[0]): |
714 fileconflicts.add(f) | 734 fileconflicts.add(f) |