comparison mercurial/manifest.py @ 45118:d0ef8c1dddd4

manifest: tigher manifest parsing and flag use In the manifest line, flags are put directly after the hash, so the parser has been guessing the presence of flags based on the length of the hash. Replace this assumption by an enumeration of the valid flags and removing them from the hash first as they are distinct input values. Consistently handle the expected 256bit length of the SHA1-replacement in the pure Python parser. Check that setting flags will use one of the blessed values. Extend write logic in the C version to handle 256bit hashes as well. Verify that hashes always have exactly the expected length. Since 1070df141718 we should no longer depend on the old extra-byte hack. Differential Revision: https://phab.mercurial-scm.org/D8679
author Joerg Sonnenberger <joerg@bec.de>
date Mon, 06 Jul 2020 03:43:32 +0200
parents 5a80915e99ce
children 19748c73c208
comparison
equal deleted inserted replaced
45117:b1e51ef4e536 45118:d0ef8c1dddd4
119 raise StopIteration 119 raise StopIteration
120 if pos == -1: 120 if pos == -1:
121 self.pos += 1 121 self.pos += 1
122 return data 122 return data
123 zeropos = data.find(b'\x00', pos) 123 zeropos = data.find(b'\x00', pos)
124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40) 124 nlpos = data.find(b'\n', pos)
125 flags = self.lm._getflags(data, self.pos, zeropos) 125 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
126 raise error.StorageError(b'Invalid manifest line')
127 flags = data[nlpos - 1 : nlpos]
128 if flags in _manifestflags:
129 hlen = nlpos - zeropos - 2
130 else:
131 hlen = nlpos - zeropos - 1
132 flags = b''
133 if hlen not in (40, 64):
134 raise error.StorageError(b'Invalid manifest line')
135 hashval = unhexlify(
136 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
137 )
126 self.pos += 1 138 self.pos += 1
127 return (data[pos:zeropos], hashval, flags) 139 return (data[pos:zeropos], hashval, flags)
128 140
129 __next__ = next 141 __next__ = next
130 142
136 return s 148 return s
137 149
138 150
139 def _cmp(a, b): 151 def _cmp(a, b):
140 return (a > b) - (a < b) 152 return (a > b) - (a < b)
153
154
155 _manifestflags = {b'', b'l', b't', b'x'}
141 156
142 157
143 class _lazymanifest(object): 158 class _lazymanifest(object):
144 """A pure python manifest backed by a byte string. It is supplimented with 159 """A pure python manifest backed by a byte string. It is supplimented with
145 internal lists as it is modified, until it is compacted back to a pure byte 160 internal lists as it is modified, until it is compacted back to a pure byte
249 return (first, False) 264 return (first, False)
250 265
251 def __contains__(self, key): 266 def __contains__(self, key):
252 return self.bsearch(key) != -1 267 return self.bsearch(key) != -1
253 268
254 def _getflags(self, data, needle, pos):
255 start = pos + 41
256 end = data.find(b"\n", start)
257 if end == -1:
258 end = len(data) - 1
259 if start == end:
260 return b''
261 return self.data[start:end]
262
263 def __getitem__(self, key): 269 def __getitem__(self, key):
264 if not isinstance(key, bytes): 270 if not isinstance(key, bytes):
265 raise TypeError(b"getitem: manifest keys must be a bytes.") 271 raise TypeError(b"getitem: manifest keys must be a bytes.")
266 needle = self.bsearch(key) 272 needle = self.bsearch(key)
267 if needle == -1: 273 if needle == -1:
271 return (data[1], data[2]) 277 return (data[1], data[2])
272 zeropos = data.find(b'\x00', pos) 278 zeropos = data.find(b'\x00', pos)
273 nlpos = data.find(b'\n', zeropos) 279 nlpos = data.find(b'\n', zeropos)
274 assert 0 <= needle <= len(self.positions) 280 assert 0 <= needle <= len(self.positions)
275 assert len(self.extrainfo) == len(self.positions) 281 assert len(self.extrainfo) == len(self.positions)
282 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
283 raise error.StorageError(b'Invalid manifest line')
276 hlen = nlpos - zeropos - 1 284 hlen = nlpos - zeropos - 1
277 # Hashes sometimes have an extra byte tucked on the end, so 285 flags = data[nlpos - 1 : nlpos]
278 # detect that. 286 if flags in _manifestflags:
279 if hlen % 2:
280 hlen -= 1 287 hlen -= 1
288 else:
289 flags = b''
290 if hlen not in (40, 64):
291 raise error.StorageError(b'Invalid manifest line')
281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen) 292 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 flags = self._getflags(data, needle, zeropos)
283 return (hashval, flags) 293 return (hashval, flags)
284 294
285 def __delitem__(self, key): 295 def __delitem__(self, key):
286 needle, found = self.bsearch2(key) 296 needle, found = self.bsearch2(key)
287 if not found: 297 if not found:
406 self.hasremovals = False 416 self.hasremovals = False
407 self.extradata = [] 417 self.extradata = []
408 418
409 def _pack(self, d): 419 def _pack(self, d):
410 n = d[1] 420 n = d[1]
411 if len(n) == 21 or len(n) == 33: 421 assert len(n) in (20, 32)
412 n = n[:-1]
413 assert len(n) == 20 or len(n) == 32
414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n' 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415 423
416 def text(self): 424 def text(self):
417 self._compact() 425 self._compact()
418 return self.data 426 return self.data
607 m2 = m2._matches(match) 615 m2 = m2._matches(match)
608 return m1.diff(m2, clean=clean) 616 return m1.diff(m2, clean=clean)
609 return self._lm.diff(m2._lm, clean) 617 return self._lm.diff(m2._lm, clean)
610 618
611 def setflag(self, key, flag): 619 def setflag(self, key, flag):
620 if flag not in _manifestflags:
621 raise TypeError(b"Invalid manifest flag set.")
612 self._lm[key] = self[key], flag 622 self._lm[key] = self[key], flag
613 623
614 def get(self, key, default=None): 624 def get(self, key, default=None):
615 try: 625 try:
616 return self._lm[key][0] 626 return self._lm[key][0]
1047 if dir not in self._dirs: 1057 if dir not in self._dirs:
1048 self._dirs[dir] = treemanifest(self._subpath(dir)) 1058 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 self._dirs[dir].__setitem__(subpath, n) 1059 self._dirs[dir].__setitem__(subpath, n)
1050 else: 1060 else:
1051 # manifest nodes are either 20 bytes or 32 bytes, 1061 # manifest nodes are either 20 bytes or 32 bytes,
1052 # depending on the hash in use. An extra byte is 1062 # depending on the hash in use. Assert this as historically
1053 # occasionally used by hg, but won't ever be 1063 # sometimes extra bytes were added.
1054 # persisted. Trim to 21 or 33 bytes as appropriate. 1064 assert len(n) in (20, 32)
1055 trim = 21 if len(n) < 25 else 33 1065 self._files[f] = n
1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1057 self._dirty = True 1066 self._dirty = True
1058 1067
1059 def _load(self): 1068 def _load(self):
1060 if self._loadfunc is not _noop: 1069 if self._loadfunc is not _noop:
1061 lf, self._loadfunc = self._loadfunc, _noop 1070 lf, self._loadfunc = self._loadfunc, _noop
1064 cf, self._copyfunc = self._copyfunc, _noop 1073 cf, self._copyfunc = self._copyfunc, _noop
1065 cf(self) 1074 cf(self)
1066 1075
1067 def setflag(self, f, flags): 1076 def setflag(self, f, flags):
1068 """Set the flags (symlink, executable) for path f.""" 1077 """Set the flags (symlink, executable) for path f."""
1078 if flags not in _manifestflags:
1079 raise TypeError(b"Invalid manifest flag set.")
1069 self._load() 1080 self._load()
1070 dir, subpath = _splittopdir(f) 1081 dir, subpath = _splittopdir(f)
1071 if dir: 1082 if dir:
1072 self._loadlazy(dir) 1083 self._loadlazy(dir)
1073 if dir not in self._dirs: 1084 if dir not in self._dirs: