comparison mercurial/changelog.py @ 42406:f385ba70e4af

changelog: optionally store added and removed files in changeset extras As mentioned in an earlier patch, copies._chain() is used a lot in the changeset-centric version of pathcopies(). It is expensive because it needs to look at the manifest in order to filter out copies whose target file has since been removed. I want to store the sets of added and removed files in the changeset in order to speed that up. This patch does the writing part of that. It could easily be a separate config, but it's currently tied to experimental.copies.write-to since that's the only real use case (it will also make the {file_*} template keywords faster, but I doubt that anyone cares enough about those to write extra metadata for them). The new information is stored in the changeset extras. Since they're always subsets of the changeset's "files" list, they're stored as indexes into that list. I've stored the indexes as stringified ints separated by NUL bytes. The size of 00changelog.d for the hg repo increased in size by 0.28% percent (compared to the size with only copy information in the changesets, which in turn is 0.17% larger than without copy information). We could store only the delta between the indexes and we could store them in binary, but the chosen format is more readable. We could also have implemented this as a cache outside the changelog. One advantage of doing it that way is that we would get the speedups from the {file_*} template keywords also on old repos. Another advantage is that it we can rewrite the cache if we find a bug in how we calculate the set of files. A disadvantage is that it would be more complex. Another is that it would surely use more space. We already write the copy information to the changeset extras, so it seems like a small step to also write these file sets. Differential Revision: https://phab.mercurial-scm.org/D6416
author Martin von Zweigbergk <martinvonz@google.com>
date Tue, 14 May 2019 22:19:51 -0700
parents 2a7109cc5a28
children 602469a91550
comparison
equal deleted inserted replaced
42405:0c72eddb4be5 42406:f385ba70e4af
96 return copies 96 return copies
97 except ValueError: 97 except ValueError:
98 # Perhaps someone had chosen the same key name (e.g. "p1copies") and 98 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
99 # used different syntax for the value. 99 # used different syntax for the value.
100 return None 100 return None
101
102 def encodefileindices(files, subset):
103 subset = set(subset)
104 indices = []
105 for i, f in enumerate(files):
106 if f in subset:
107 indices.append('%d' % i)
108 return '\0'.join(indices)
101 109
102 def stripdesc(desc): 110 def stripdesc(desc):
103 """strip trailing whitespace and leading and trailing empty lines""" 111 """strip trailing whitespace and leading and trailing empty lines"""
104 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n') 112 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
105 113
562 last = text.index("\n\n") 570 last = text.index("\n\n")
563 l = text[:last].split('\n') 571 l = text[:last].split('\n')
564 return l[3:] 572 return l[3:]
565 573
566 def add(self, manifest, files, desc, transaction, p1, p2, 574 def add(self, manifest, files, desc, transaction, p1, p2,
567 user, date=None, extra=None, p1copies=None, p2copies=None): 575 user, date=None, extra=None, p1copies=None, p2copies=None,
576 filesadded=None, filesremoved=None):
568 # Convert to UTF-8 encoded bytestrings as the very first 577 # Convert to UTF-8 encoded bytestrings as the very first
569 # thing: calling any method on a localstr object will turn it 578 # thing: calling any method on a localstr object will turn it
570 # into a str object and the cached UTF-8 string is thus lost. 579 # into a str object and the cached UTF-8 string is thus lost.
571 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc) 580 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
572 581
591 if branch in ("default", ""): 600 if branch in ("default", ""):
592 del extra["branch"] 601 del extra["branch"]
593 elif branch in (".", "null", "tip"): 602 elif branch in (".", "null", "tip"):
594 raise error.StorageError(_('the name \'%s\' is reserved') 603 raise error.StorageError(_('the name \'%s\' is reserved')
595 % branch) 604 % branch)
596 if (p1copies is not None or p2copies is not None) and extra is None: 605 extrasentries = p1copies, p2copies, filesadded, filesremoved
606 if extra is None and any(x is not None for x in extrasentries):
597 extra = {} 607 extra = {}
598 if p1copies is not None: 608 if p1copies is not None:
599 extra['p1copies'] = encodecopies(p1copies) 609 extra['p1copies'] = encodecopies(p1copies)
600 if p2copies is not None: 610 if p2copies is not None:
601 extra['p2copies'] = encodecopies(p2copies) 611 extra['p2copies'] = encodecopies(p2copies)
612 sortedfiles = sorted(files)
613 if filesadded is not None:
614 extra['filesadded'] = encodefileindices(sortedfiles, filesadded)
615 if filesremoved is not None:
616 extra['filesremoved'] = encodefileindices(sortedfiles, filesremoved)
602 617
603 if extra: 618 if extra:
604 extra = encodeextra(extra) 619 extra = encodeextra(extra)
605 parseddate = "%s %s" % (parseddate, extra) 620 parseddate = "%s %s" % (parseddate, extra)
606 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc] 621 l = [hex(manifest), user, parseddate] + sortedfiles + ["", desc]
607 text = "\n".join(l) 622 text = "\n".join(l)
608 return self.addrevision(text, transaction, len(self), p1, p2) 623 return self.addrevision(text, transaction, len(self), p1, p2)
609 624
610 def branchinfo(self, rev): 625 def branchinfo(self, rev):
611 """return the branch name and open/close state of a revision 626 """return the branch name and open/close state of a revision