Mercurial > public > mercurial-scm > hg
comparison mercurial/manifest.py @ 36373:0147a4730420
cleanup: say goodbye to manifestv2 format
This experiment was a bust: we'd hoped for smaller repository sizes,
but things got larger. Google ended up rolling out tree manifests in a
format that's compatible with the original manifest format, and I
believe Facebook is doing the same. This code was never implemented as
native speedups, so I'm pretty comfortable saying nobody is using the
experimental feature. Let's rip it out.
I noticed this code still kicking around because I was investigating a
repo corruption issue for timeless.
.. bc::
Support for the experimental manifestv2 format has been removed, as
it was never completed and failed to meet expectations.
Differential Revision: https://phab.mercurial-scm.org/D2393
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 22 Feb 2018 20:04:42 -0500 |
parents | 5245bac09e6a |
children | 6ff8bd691fb8 |
comparison
equal
deleted
inserted
replaced
36372:b8d0761a85c7 | 36373:0147a4730420 |
---|---|
7 | 7 |
8 from __future__ import absolute_import | 8 from __future__ import absolute_import |
9 | 9 |
10 import heapq | 10 import heapq |
11 import itertools | 11 import itertools |
12 import os | |
13 import struct | 12 import struct |
14 | 13 |
15 from .i18n import _ | 14 from .i18n import _ |
16 from .node import ( | 15 from .node import ( |
17 bin, | 16 bin, |
26 ) | 25 ) |
27 | 26 |
28 parsers = policy.importmod(r'parsers') | 27 parsers = policy.importmod(r'parsers') |
29 propertycache = util.propertycache | 28 propertycache = util.propertycache |
30 | 29 |
31 def _parsev1(data): | 30 def _parse(data): |
32 # This method does a little bit of excessive-looking | 31 # This method does a little bit of excessive-looking |
33 # precondition checking. This is so that the behavior of this | 32 # precondition checking. This is so that the behavior of this |
34 # class exactly matches its C counterpart to try and help | 33 # class exactly matches its C counterpart to try and help |
35 # prevent surprise breakage for anyone that develops against | 34 # prevent surprise breakage for anyone that develops against |
36 # the pure version. | 35 # the pure version. |
45 if len(n) > 40: | 44 if len(n) > 40: |
46 yield f, bin(n[:40]), n[40:] | 45 yield f, bin(n[:40]), n[40:] |
47 else: | 46 else: |
48 yield f, bin(n), '' | 47 yield f, bin(n), '' |
49 | 48 |
50 def _parsev2(data): | 49 def _text(it): |
51 metadataend = data.find('\n') | |
52 # Just ignore metadata for now | |
53 pos = metadataend + 1 | |
54 prevf = '' | |
55 while pos < len(data): | |
56 end = data.find('\n', pos + 1) # +1 to skip stem length byte | |
57 if end == -1: | |
58 raise ValueError('Manifest ended with incomplete file entry.') | |
59 stemlen = ord(data[pos:pos + 1]) | |
60 items = data[pos + 1:end].split('\0') | |
61 f = prevf[:stemlen] + items[0] | |
62 if prevf > f: | |
63 raise ValueError('Manifest entries not in sorted order.') | |
64 fl = items[1] | |
65 # Just ignore metadata (items[2:] for now) | |
66 n = data[end + 1:end + 21] | |
67 yield f, n, fl | |
68 pos = end + 22 | |
69 prevf = f | |
70 | |
71 def _parse(data): | |
72 """Generates (path, node, flags) tuples from a manifest text""" | |
73 if data.startswith('\0'): | |
74 return iter(_parsev2(data)) | |
75 else: | |
76 return iter(_parsev1(data)) | |
77 | |
78 def _text(it, usemanifestv2): | |
79 """Given an iterator over (path, node, flags) tuples, returns a manifest | |
80 text""" | |
81 if usemanifestv2: | |
82 return _textv2(it) | |
83 else: | |
84 return _textv1(it) | |
85 | |
86 def _textv1(it): | |
87 files = [] | 50 files = [] |
88 lines = [] | 51 lines = [] |
89 _hex = revlog.hex | 52 _hex = revlog.hex |
90 for f, n, fl in it: | 53 for f, n, fl in it: |
91 files.append(f) | 54 files.append(f) |
92 # if this is changed to support newlines in filenames, | 55 # if this is changed to support newlines in filenames, |
93 # be sure to check the templates/ dir again (especially *-raw.tmpl) | 56 # be sure to check the templates/ dir again (especially *-raw.tmpl) |
94 lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) | 57 lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) |
95 | 58 |
96 _checkforbidden(files) | |
97 return ''.join(lines) | |
98 | |
99 def _textv2(it): | |
100 files = [] | |
101 lines = ['\0\n'] | |
102 prevf = '' | |
103 for f, n, fl in it: | |
104 files.append(f) | |
105 stem = os.path.commonprefix([prevf, f]) | |
106 stemlen = min(len(stem), 255) | |
107 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) | |
108 prevf = f | |
109 _checkforbidden(files) | 59 _checkforbidden(files) |
110 return ''.join(lines) | 60 return ''.join(lines) |
111 | 61 |
112 class lazymanifestiter(object): | 62 class lazymanifestiter(object): |
113 def __init__(self, lm): | 63 def __init__(self, lm): |
412 except AttributeError: | 362 except AttributeError: |
413 pass | 363 pass |
414 | 364 |
415 class manifestdict(object): | 365 class manifestdict(object): |
416 def __init__(self, data=''): | 366 def __init__(self, data=''): |
417 if data.startswith('\0'): | 367 self._lm = _lazymanifest(data) |
418 #_lazymanifest can not parse v2 | |
419 self._lm = _lazymanifest('') | |
420 for f, n, fl in _parsev2(data): | |
421 self._lm[f] = n, fl | |
422 else: | |
423 self._lm = _lazymanifest(data) | |
424 | 368 |
425 def __getitem__(self, key): | 369 def __getitem__(self, key): |
426 return self._lm[key][0] | 370 return self._lm[key][0] |
427 | 371 |
428 def find(self, key): | 372 def find(self, key): |
587 iteritems = items | 531 iteritems = items |
588 | 532 |
589 def iterentries(self): | 533 def iterentries(self): |
590 return self._lm.iterentries() | 534 return self._lm.iterentries() |
591 | 535 |
592 def text(self, usemanifestv2=False): | 536 def text(self): |
593 if usemanifestv2: | 537 # most likely uses native version |
594 return _textv2(self._lm.iterentries()) | 538 return self._lm.text() |
595 else: | |
596 # use (probably) native version for v1 | |
597 return self._lm.text() | |
598 | 539 |
599 def fastdelta(self, base, changes): | 540 def fastdelta(self, base, changes): |
600 """Given a base manifest text as a bytearray and a list of changes | 541 """Given a base manifest text as a bytearray and a list of changes |
601 relative to that text, compute a delta that can be used by revlog. | 542 relative to that text, compute a delta that can be used by revlog. |
602 """ | 543 """ |
1136 # and should be a little faster. | 1077 # and should be a little faster. |
1137 self._files[f] = n | 1078 self._files[f] = n |
1138 if fl: | 1079 if fl: |
1139 self._flags[f] = fl | 1080 self._flags[f] = fl |
1140 | 1081 |
1141 def text(self, usemanifestv2=False): | 1082 def text(self): |
1142 """Get the full data of this manifest as a bytestring.""" | 1083 """Get the full data of this manifest as a bytestring.""" |
1143 self._load() | 1084 self._load() |
1144 return _text(self.iterentries(), usemanifestv2) | 1085 return _text(self.iterentries()) |
1145 | 1086 |
1146 def dirtext(self, usemanifestv2=False): | 1087 def dirtext(self): |
1147 """Get the full data of this directory as a bytestring. Make sure that | 1088 """Get the full data of this directory as a bytestring. Make sure that |
1148 any submanifests have been written first, so their nodeids are correct. | 1089 any submanifests have been written first, so their nodeids are correct. |
1149 """ | 1090 """ |
1150 self._load() | 1091 self._load() |
1151 flags = self.flags | 1092 flags = self.flags |
1152 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs] | 1093 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs] |
1153 files = [(f, self._files[f], flags(f)) for f in self._files] | 1094 files = [(f, self._files[f], flags(f)) for f in self._files] |
1154 return _text(sorted(dirs + files), usemanifestv2) | 1095 return _text(sorted(dirs + files)) |
1155 | 1096 |
1156 def read(self, gettext, readsubtree): | 1097 def read(self, gettext, readsubtree): |
1157 def _load_for_read(s): | 1098 def _load_for_read(s): |
1158 s.parse(gettext(), readsubtree) | 1099 s.parse(gettext(), readsubtree) |
1159 s._dirty = False | 1100 s._dirty = False |
1206 # During normal operations, we expect to deal with not more than four | 1147 # During normal operations, we expect to deal with not more than four |
1207 # revs at a time (such as during commit --amend). When rebasing large | 1148 # revs at a time (such as during commit --amend). When rebasing large |
1208 # stacks of commits, the number can go up, hence the config knob below. | 1149 # stacks of commits, the number can go up, hence the config knob below. |
1209 cachesize = 4 | 1150 cachesize = 4 |
1210 optiontreemanifest = False | 1151 optiontreemanifest = False |
1211 usemanifestv2 = False | |
1212 opts = getattr(opener, 'options', None) | 1152 opts = getattr(opener, 'options', None) |
1213 if opts is not None: | 1153 if opts is not None: |
1214 cachesize = opts.get('manifestcachesize', cachesize) | 1154 cachesize = opts.get('manifestcachesize', cachesize) |
1215 optiontreemanifest = opts.get('treemanifest', False) | 1155 optiontreemanifest = opts.get('treemanifest', False) |
1216 usemanifestv2 = opts.get('manifestv2', usemanifestv2) | |
1217 | 1156 |
1218 self._treeondisk = optiontreemanifest or treemanifest | 1157 self._treeondisk = optiontreemanifest or treemanifest |
1219 self._usemanifestv2 = usemanifestv2 | |
1220 | 1158 |
1221 self._fulltextcache = util.lrucachedict(cachesize) | 1159 self._fulltextcache = util.lrucachedict(cachesize) |
1222 | 1160 |
1223 if dir: | 1161 if dir: |
1224 assert self._treeondisk, 'opts is %r' % opts | 1162 assert self._treeondisk, 'opts is %r' % opts |
1260 treemanifest=self._treeondisk) | 1198 treemanifest=self._treeondisk) |
1261 self._dirlogcache[d] = mfrevlog | 1199 self._dirlogcache[d] = mfrevlog |
1262 return self._dirlogcache[d] | 1200 return self._dirlogcache[d] |
1263 | 1201 |
1264 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None): | 1202 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None): |
1265 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta') | 1203 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'): |
1266 and not self._usemanifestv2): | |
1267 # If our first parent is in the manifest cache, we can | 1204 # If our first parent is in the manifest cache, we can |
1268 # compute a delta here using properties we know about the | 1205 # compute a delta here using properties we know about the |
1269 # manifest up-front, which may save time later for the | 1206 # manifest up-front, which may save time later for the |
1270 # revlog layer. | 1207 # revlog layer. |
1271 | 1208 |
1288 m1 = readtree(self._dir, p1) | 1225 m1 = readtree(self._dir, p1) |
1289 m2 = readtree(self._dir, p2) | 1226 m2 = readtree(self._dir, p2) |
1290 n = self._addtree(m, transaction, link, m1, m2, readtree) | 1227 n = self._addtree(m, transaction, link, m1, m2, readtree) |
1291 arraytext = None | 1228 arraytext = None |
1292 else: | 1229 else: |
1293 text = m.text(self._usemanifestv2) | 1230 text = m.text() |
1294 n = self.addrevision(text, transaction, link, p1, p2) | 1231 n = self.addrevision(text, transaction, link, p1, p2) |
1295 arraytext = bytearray(text) | 1232 arraytext = bytearray(text) |
1296 | 1233 |
1297 if arraytext is not None: | 1234 if arraytext is not None: |
1298 self.fulltextcache[n] = arraytext | 1235 self.fulltextcache[n] = arraytext |
1307 def writesubtree(subm, subp1, subp2): | 1244 def writesubtree(subm, subp1, subp2): |
1308 sublog = self.dirlog(subm.dir()) | 1245 sublog = self.dirlog(subm.dir()) |
1309 sublog.add(subm, transaction, link, subp1, subp2, None, None, | 1246 sublog.add(subm, transaction, link, subp1, subp2, None, None, |
1310 readtree=readtree) | 1247 readtree=readtree) |
1311 m.writesubtrees(m1, m2, writesubtree) | 1248 m.writesubtrees(m1, m2, writesubtree) |
1312 text = m.dirtext(self._usemanifestv2) | 1249 text = m.dirtext() |
1313 n = None | 1250 n = None |
1314 if self._dir != '': | 1251 if self._dir != '': |
1315 # Double-check whether contents are unchanged to one parent | 1252 # Double-check whether contents are unchanged to one parent |
1316 if text == m1.dirtext(self._usemanifestv2): | 1253 if text == m1.dirtext(): |
1317 n = m1.node() | 1254 n = m1.node() |
1318 elif text == m2.dirtext(self._usemanifestv2): | 1255 elif text == m2.dirtext(): |
1319 n = m2.node() | 1256 n = m2.node() |
1320 | 1257 |
1321 if not n: | 1258 if not n: |
1322 n = self.addrevision(text, transaction, link, m1.node(), m2.node()) | 1259 n = self.addrevision(text, transaction, link, m1.node(), m2.node()) |
1323 | 1260 |
1491 if the revlog delta is already p1. | 1428 if the revlog delta is already p1. |
1492 | 1429 |
1493 Changing the value of `shallow` has no effect on flat manifests. | 1430 Changing the value of `shallow` has no effect on flat manifests. |
1494 ''' | 1431 ''' |
1495 revlog = self._revlog() | 1432 revlog = self._revlog() |
1496 if revlog._usemanifestv2: | |
1497 # Need to perform a slow delta | |
1498 r0 = revlog.deltaparent(revlog.rev(self._node)) | |
1499 m0 = self._manifestlog[revlog.node(r0)].read() | |
1500 m1 = self.read() | |
1501 md = manifestdict() | |
1502 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): | |
1503 if n1: | |
1504 md[f] = n1 | |
1505 if fl1: | |
1506 md.setflag(f, fl1) | |
1507 return md | |
1508 | |
1509 r = revlog.rev(self._node) | 1433 r = revlog.rev(self._node) |
1510 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) | 1434 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) |
1511 return manifestdict(d) | 1435 return manifestdict(d) |
1512 | 1436 |
1513 def find(self, key): | 1437 def find(self, key): |
1606 subdirectory entry will be reported as it appears in the manifest, i.e. | 1530 subdirectory entry will be reported as it appears in the manifest, i.e. |
1607 the subdirectory will be reported among files and distinguished only by | 1531 the subdirectory will be reported among files and distinguished only by |
1608 its 't' flag. | 1532 its 't' flag. |
1609 ''' | 1533 ''' |
1610 revlog = self._revlog() | 1534 revlog = self._revlog() |
1611 if shallow and not revlog._usemanifestv2: | 1535 if shallow: |
1612 r = revlog.rev(self._node) | 1536 r = revlog.rev(self._node) |
1613 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) | 1537 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) |
1614 return manifestdict(d) | 1538 return manifestdict(d) |
1615 else: | 1539 else: |
1616 # Need to perform a slow delta | 1540 # Need to perform a slow delta |