comparison mercurial/manifest.py @ 36373:0147a4730420

cleanup: say goodbye to manifestv2 format This experiment was a bust: we'd hoped for smaller repository sizes, but things got larger. Google ended up rolling out tree manifests in a format that's compatible with the original manifest format, and I believe Facebook is doing the same. This code was never implemented as native speedups, so I'm pretty comfortable saying nobody is using the experimental feature. Let's rip it out. I noticed this code still kicking around because I was investigating a repo corruption issue for timeless. .. bc:: Support for the experimental manifestv2 format has been removed, as it was never completed and failed to meet expectations. Differential Revision: https://phab.mercurial-scm.org/D2393
author Augie Fackler <augie@google.com>
date Thu, 22 Feb 2018 20:04:42 -0500
parents 5245bac09e6a
children 6ff8bd691fb8
comparison
equal deleted inserted replaced
36372:b8d0761a85c7 36373:0147a4730420
7 7
8 from __future__ import absolute_import 8 from __future__ import absolute_import
9 9
10 import heapq 10 import heapq
11 import itertools 11 import itertools
12 import os
13 import struct 12 import struct
14 13
15 from .i18n import _ 14 from .i18n import _
16 from .node import ( 15 from .node import (
17 bin, 16 bin,
26 ) 25 )
27 26
28 parsers = policy.importmod(r'parsers') 27 parsers = policy.importmod(r'parsers')
29 propertycache = util.propertycache 28 propertycache = util.propertycache
30 29
31 def _parsev1(data): 30 def _parse(data):
32 # This method does a little bit of excessive-looking 31 # This method does a little bit of excessive-looking
33 # precondition checking. This is so that the behavior of this 32 # precondition checking. This is so that the behavior of this
34 # class exactly matches its C counterpart to try and help 33 # class exactly matches its C counterpart to try and help
35 # prevent surprise breakage for anyone that develops against 34 # prevent surprise breakage for anyone that develops against
36 # the pure version. 35 # the pure version.
45 if len(n) > 40: 44 if len(n) > 40:
46 yield f, bin(n[:40]), n[40:] 45 yield f, bin(n[:40]), n[40:]
47 else: 46 else:
48 yield f, bin(n), '' 47 yield f, bin(n), ''
49 48
50 def _parsev2(data): 49 def _text(it):
51 metadataend = data.find('\n')
52 # Just ignore metadata for now
53 pos = metadataend + 1
54 prevf = ''
55 while pos < len(data):
56 end = data.find('\n', pos + 1) # +1 to skip stem length byte
57 if end == -1:
58 raise ValueError('Manifest ended with incomplete file entry.')
59 stemlen = ord(data[pos:pos + 1])
60 items = data[pos + 1:end].split('\0')
61 f = prevf[:stemlen] + items[0]
62 if prevf > f:
63 raise ValueError('Manifest entries not in sorted order.')
64 fl = items[1]
65 # Just ignore metadata (items[2:] for now)
66 n = data[end + 1:end + 21]
67 yield f, n, fl
68 pos = end + 22
69 prevf = f
70
71 def _parse(data):
72 """Generates (path, node, flags) tuples from a manifest text"""
73 if data.startswith('\0'):
74 return iter(_parsev2(data))
75 else:
76 return iter(_parsev1(data))
77
78 def _text(it, usemanifestv2):
79 """Given an iterator over (path, node, flags) tuples, returns a manifest
80 text"""
81 if usemanifestv2:
82 return _textv2(it)
83 else:
84 return _textv1(it)
85
86 def _textv1(it):
87 files = [] 50 files = []
88 lines = [] 51 lines = []
89 _hex = revlog.hex 52 _hex = revlog.hex
90 for f, n, fl in it: 53 for f, n, fl in it:
91 files.append(f) 54 files.append(f)
92 # if this is changed to support newlines in filenames, 55 # if this is changed to support newlines in filenames,
93 # be sure to check the templates/ dir again (especially *-raw.tmpl) 56 # be sure to check the templates/ dir again (especially *-raw.tmpl)
94 lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) 57 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
95 58
96 _checkforbidden(files)
97 return ''.join(lines)
98
99 def _textv2(it):
100 files = []
101 lines = ['\0\n']
102 prevf = ''
103 for f, n, fl in it:
104 files.append(f)
105 stem = os.path.commonprefix([prevf, f])
106 stemlen = min(len(stem), 255)
107 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
108 prevf = f
109 _checkforbidden(files) 59 _checkforbidden(files)
110 return ''.join(lines) 60 return ''.join(lines)
111 61
112 class lazymanifestiter(object): 62 class lazymanifestiter(object):
113 def __init__(self, lm): 63 def __init__(self, lm):
412 except AttributeError: 362 except AttributeError:
413 pass 363 pass
414 364
415 class manifestdict(object): 365 class manifestdict(object):
416 def __init__(self, data=''): 366 def __init__(self, data=''):
417 if data.startswith('\0'): 367 self._lm = _lazymanifest(data)
418 #_lazymanifest can not parse v2
419 self._lm = _lazymanifest('')
420 for f, n, fl in _parsev2(data):
421 self._lm[f] = n, fl
422 else:
423 self._lm = _lazymanifest(data)
424 368
425 def __getitem__(self, key): 369 def __getitem__(self, key):
426 return self._lm[key][0] 370 return self._lm[key][0]
427 371
428 def find(self, key): 372 def find(self, key):
587 iteritems = items 531 iteritems = items
588 532
589 def iterentries(self): 533 def iterentries(self):
590 return self._lm.iterentries() 534 return self._lm.iterentries()
591 535
592 def text(self, usemanifestv2=False): 536 def text(self):
593 if usemanifestv2: 537 # most likely uses native version
594 return _textv2(self._lm.iterentries()) 538 return self._lm.text()
595 else:
596 # use (probably) native version for v1
597 return self._lm.text()
598 539
599 def fastdelta(self, base, changes): 540 def fastdelta(self, base, changes):
600 """Given a base manifest text as a bytearray and a list of changes 541 """Given a base manifest text as a bytearray and a list of changes
601 relative to that text, compute a delta that can be used by revlog. 542 relative to that text, compute a delta that can be used by revlog.
602 """ 543 """
1136 # and should be a little faster. 1077 # and should be a little faster.
1137 self._files[f] = n 1078 self._files[f] = n
1138 if fl: 1079 if fl:
1139 self._flags[f] = fl 1080 self._flags[f] = fl
1140 1081
1141 def text(self, usemanifestv2=False): 1082 def text(self):
1142 """Get the full data of this manifest as a bytestring.""" 1083 """Get the full data of this manifest as a bytestring."""
1143 self._load() 1084 self._load()
1144 return _text(self.iterentries(), usemanifestv2) 1085 return _text(self.iterentries())
1145 1086
1146 def dirtext(self, usemanifestv2=False): 1087 def dirtext(self):
1147 """Get the full data of this directory as a bytestring. Make sure that 1088 """Get the full data of this directory as a bytestring. Make sure that
1148 any submanifests have been written first, so their nodeids are correct. 1089 any submanifests have been written first, so their nodeids are correct.
1149 """ 1090 """
1150 self._load() 1091 self._load()
1151 flags = self.flags 1092 flags = self.flags
1152 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs] 1093 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1153 files = [(f, self._files[f], flags(f)) for f in self._files] 1094 files = [(f, self._files[f], flags(f)) for f in self._files]
1154 return _text(sorted(dirs + files), usemanifestv2) 1095 return _text(sorted(dirs + files))
1155 1096
1156 def read(self, gettext, readsubtree): 1097 def read(self, gettext, readsubtree):
1157 def _load_for_read(s): 1098 def _load_for_read(s):
1158 s.parse(gettext(), readsubtree) 1099 s.parse(gettext(), readsubtree)
1159 s._dirty = False 1100 s._dirty = False
1206 # During normal operations, we expect to deal with not more than four 1147 # During normal operations, we expect to deal with not more than four
1207 # revs at a time (such as during commit --amend). When rebasing large 1148 # revs at a time (such as during commit --amend). When rebasing large
1208 # stacks of commits, the number can go up, hence the config knob below. 1149 # stacks of commits, the number can go up, hence the config knob below.
1209 cachesize = 4 1150 cachesize = 4
1210 optiontreemanifest = False 1151 optiontreemanifest = False
1211 usemanifestv2 = False
1212 opts = getattr(opener, 'options', None) 1152 opts = getattr(opener, 'options', None)
1213 if opts is not None: 1153 if opts is not None:
1214 cachesize = opts.get('manifestcachesize', cachesize) 1154 cachesize = opts.get('manifestcachesize', cachesize)
1215 optiontreemanifest = opts.get('treemanifest', False) 1155 optiontreemanifest = opts.get('treemanifest', False)
1216 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1217 1156
1218 self._treeondisk = optiontreemanifest or treemanifest 1157 self._treeondisk = optiontreemanifest or treemanifest
1219 self._usemanifestv2 = usemanifestv2
1220 1158
1221 self._fulltextcache = util.lrucachedict(cachesize) 1159 self._fulltextcache = util.lrucachedict(cachesize)
1222 1160
1223 if dir: 1161 if dir:
1224 assert self._treeondisk, 'opts is %r' % opts 1162 assert self._treeondisk, 'opts is %r' % opts
1260 treemanifest=self._treeondisk) 1198 treemanifest=self._treeondisk)
1261 self._dirlogcache[d] = mfrevlog 1199 self._dirlogcache[d] = mfrevlog
1262 return self._dirlogcache[d] 1200 return self._dirlogcache[d]
1263 1201
1264 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None): 1202 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
1265 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta') 1203 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1266 and not self._usemanifestv2):
1267 # If our first parent is in the manifest cache, we can 1204 # If our first parent is in the manifest cache, we can
1268 # compute a delta here using properties we know about the 1205 # compute a delta here using properties we know about the
1269 # manifest up-front, which may save time later for the 1206 # manifest up-front, which may save time later for the
1270 # revlog layer. 1207 # revlog layer.
1271 1208
1288 m1 = readtree(self._dir, p1) 1225 m1 = readtree(self._dir, p1)
1289 m2 = readtree(self._dir, p2) 1226 m2 = readtree(self._dir, p2)
1290 n = self._addtree(m, transaction, link, m1, m2, readtree) 1227 n = self._addtree(m, transaction, link, m1, m2, readtree)
1291 arraytext = None 1228 arraytext = None
1292 else: 1229 else:
1293 text = m.text(self._usemanifestv2) 1230 text = m.text()
1294 n = self.addrevision(text, transaction, link, p1, p2) 1231 n = self.addrevision(text, transaction, link, p1, p2)
1295 arraytext = bytearray(text) 1232 arraytext = bytearray(text)
1296 1233
1297 if arraytext is not None: 1234 if arraytext is not None:
1298 self.fulltextcache[n] = arraytext 1235 self.fulltextcache[n] = arraytext
1307 def writesubtree(subm, subp1, subp2): 1244 def writesubtree(subm, subp1, subp2):
1308 sublog = self.dirlog(subm.dir()) 1245 sublog = self.dirlog(subm.dir())
1309 sublog.add(subm, transaction, link, subp1, subp2, None, None, 1246 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1310 readtree=readtree) 1247 readtree=readtree)
1311 m.writesubtrees(m1, m2, writesubtree) 1248 m.writesubtrees(m1, m2, writesubtree)
1312 text = m.dirtext(self._usemanifestv2) 1249 text = m.dirtext()
1313 n = None 1250 n = None
1314 if self._dir != '': 1251 if self._dir != '':
1315 # Double-check whether contents are unchanged to one parent 1252 # Double-check whether contents are unchanged to one parent
1316 if text == m1.dirtext(self._usemanifestv2): 1253 if text == m1.dirtext():
1317 n = m1.node() 1254 n = m1.node()
1318 elif text == m2.dirtext(self._usemanifestv2): 1255 elif text == m2.dirtext():
1319 n = m2.node() 1256 n = m2.node()
1320 1257
1321 if not n: 1258 if not n:
1322 n = self.addrevision(text, transaction, link, m1.node(), m2.node()) 1259 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1323 1260
1491 if the revlog delta is already p1. 1428 if the revlog delta is already p1.
1492 1429
1493 Changing the value of `shallow` has no effect on flat manifests. 1430 Changing the value of `shallow` has no effect on flat manifests.
1494 ''' 1431 '''
1495 revlog = self._revlog() 1432 revlog = self._revlog()
1496 if revlog._usemanifestv2:
1497 # Need to perform a slow delta
1498 r0 = revlog.deltaparent(revlog.rev(self._node))
1499 m0 = self._manifestlog[revlog.node(r0)].read()
1500 m1 = self.read()
1501 md = manifestdict()
1502 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1503 if n1:
1504 md[f] = n1
1505 if fl1:
1506 md.setflag(f, fl1)
1507 return md
1508
1509 r = revlog.rev(self._node) 1433 r = revlog.rev(self._node)
1510 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) 1434 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1511 return manifestdict(d) 1435 return manifestdict(d)
1512 1436
1513 def find(self, key): 1437 def find(self, key):
1606 subdirectory entry will be reported as it appears in the manifest, i.e. 1530 subdirectory entry will be reported as it appears in the manifest, i.e.
1607 the subdirectory will be reported among files and distinguished only by 1531 the subdirectory will be reported among files and distinguished only by
1608 its 't' flag. 1532 its 't' flag.
1609 ''' 1533 '''
1610 revlog = self._revlog() 1534 revlog = self._revlog()
1611 if shallow and not revlog._usemanifestv2: 1535 if shallow:
1612 r = revlog.rev(self._node) 1536 r = revlog.rev(self._node)
1613 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) 1537 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1614 return manifestdict(d) 1538 return manifestdict(d)
1615 else: 1539 else:
1616 # Need to perform a slow delta 1540 # Need to perform a slow delta