Mercurial > public > mercurial-scm > hg
comparison mercurial/manifest.py @ 45118:d0ef8c1dddd4
manifest: tigher manifest parsing and flag use
In the manifest line, flags are put directly after the hash, so the
parser has been guessing the presence of flags based on the length of
the hash. Replace this assumption by an enumeration of the valid flags
and removing them from the hash first as they are distinct input values.
Consistently handle the expected 256bit length of the SHA1-replacement
in the pure Python parser. Check that setting flags will use one of the
blessed values.
Extend write logic in the C version to handle 256bit hashes as well.
Verify that hashes always have exactly the expected length. Since
1070df141718 we should no longer depend on the old extra-byte hack.
Differential Revision: https://phab.mercurial-scm.org/D8679
author | Joerg Sonnenberger <joerg@bec.de> |
---|---|
date | Mon, 06 Jul 2020 03:43:32 +0200 |
parents | 5a80915e99ce |
children | 19748c73c208 |
comparison
equal
deleted
inserted
replaced
45117:b1e51ef4e536 | 45118:d0ef8c1dddd4 |
---|---|
119 raise StopIteration | 119 raise StopIteration |
120 if pos == -1: | 120 if pos == -1: |
121 self.pos += 1 | 121 self.pos += 1 |
122 return data | 122 return data |
123 zeropos = data.find(b'\x00', pos) | 123 zeropos = data.find(b'\x00', pos) |
124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40) | 124 nlpos = data.find(b'\n', pos) |
125 flags = self.lm._getflags(data, self.pos, zeropos) | 125 if zeropos == -1 or nlpos == -1 or nlpos < zeropos: |
126 raise error.StorageError(b'Invalid manifest line') | |
127 flags = data[nlpos - 1 : nlpos] | |
128 if flags in _manifestflags: | |
129 hlen = nlpos - zeropos - 2 | |
130 else: | |
131 hlen = nlpos - zeropos - 1 | |
132 flags = b'' | |
133 if hlen not in (40, 64): | |
134 raise error.StorageError(b'Invalid manifest line') | |
135 hashval = unhexlify( | |
136 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen | |
137 ) | |
126 self.pos += 1 | 138 self.pos += 1 |
127 return (data[pos:zeropos], hashval, flags) | 139 return (data[pos:zeropos], hashval, flags) |
128 | 140 |
129 __next__ = next | 141 __next__ = next |
130 | 142 |
136 return s | 148 return s |
137 | 149 |
138 | 150 |
139 def _cmp(a, b): | 151 def _cmp(a, b): |
140 return (a > b) - (a < b) | 152 return (a > b) - (a < b) |
153 | |
154 | |
155 _manifestflags = {b'', b'l', b't', b'x'} | |
141 | 156 |
142 | 157 |
143 class _lazymanifest(object): | 158 class _lazymanifest(object): |
144 """A pure python manifest backed by a byte string. It is supplimented with | 159 """A pure python manifest backed by a byte string. It is supplimented with |
145 internal lists as it is modified, until it is compacted back to a pure byte | 160 internal lists as it is modified, until it is compacted back to a pure byte |
249 return (first, False) | 264 return (first, False) |
250 | 265 |
251 def __contains__(self, key): | 266 def __contains__(self, key): |
252 return self.bsearch(key) != -1 | 267 return self.bsearch(key) != -1 |
253 | 268 |
254 def _getflags(self, data, needle, pos): | |
255 start = pos + 41 | |
256 end = data.find(b"\n", start) | |
257 if end == -1: | |
258 end = len(data) - 1 | |
259 if start == end: | |
260 return b'' | |
261 return self.data[start:end] | |
262 | |
263 def __getitem__(self, key): | 269 def __getitem__(self, key): |
264 if not isinstance(key, bytes): | 270 if not isinstance(key, bytes): |
265 raise TypeError(b"getitem: manifest keys must be a bytes.") | 271 raise TypeError(b"getitem: manifest keys must be a bytes.") |
266 needle = self.bsearch(key) | 272 needle = self.bsearch(key) |
267 if needle == -1: | 273 if needle == -1: |
271 return (data[1], data[2]) | 277 return (data[1], data[2]) |
272 zeropos = data.find(b'\x00', pos) | 278 zeropos = data.find(b'\x00', pos) |
273 nlpos = data.find(b'\n', zeropos) | 279 nlpos = data.find(b'\n', zeropos) |
274 assert 0 <= needle <= len(self.positions) | 280 assert 0 <= needle <= len(self.positions) |
275 assert len(self.extrainfo) == len(self.positions) | 281 assert len(self.extrainfo) == len(self.positions) |
282 if zeropos == -1 or nlpos == -1 or nlpos < zeropos: | |
283 raise error.StorageError(b'Invalid manifest line') | |
276 hlen = nlpos - zeropos - 1 | 284 hlen = nlpos - zeropos - 1 |
277 # Hashes sometimes have an extra byte tucked on the end, so | 285 flags = data[nlpos - 1 : nlpos] |
278 # detect that. | 286 if flags in _manifestflags: |
279 if hlen % 2: | |
280 hlen -= 1 | 287 hlen -= 1 |
288 else: | |
289 flags = b'' | |
290 if hlen not in (40, 64): | |
291 raise error.StorageError(b'Invalid manifest line') | |
281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen) | 292 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen) |
282 flags = self._getflags(data, needle, zeropos) | |
283 return (hashval, flags) | 293 return (hashval, flags) |
284 | 294 |
285 def __delitem__(self, key): | 295 def __delitem__(self, key): |
286 needle, found = self.bsearch2(key) | 296 needle, found = self.bsearch2(key) |
287 if not found: | 297 if not found: |
406 self.hasremovals = False | 416 self.hasremovals = False |
407 self.extradata = [] | 417 self.extradata = [] |
408 | 418 |
409 def _pack(self, d): | 419 def _pack(self, d): |
410 n = d[1] | 420 n = d[1] |
411 if len(n) == 21 or len(n) == 33: | 421 assert len(n) in (20, 32) |
412 n = n[:-1] | |
413 assert len(n) == 20 or len(n) == 32 | |
414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n' | 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n' |
415 | 423 |
416 def text(self): | 424 def text(self): |
417 self._compact() | 425 self._compact() |
418 return self.data | 426 return self.data |
607 m2 = m2._matches(match) | 615 m2 = m2._matches(match) |
608 return m1.diff(m2, clean=clean) | 616 return m1.diff(m2, clean=clean) |
609 return self._lm.diff(m2._lm, clean) | 617 return self._lm.diff(m2._lm, clean) |
610 | 618 |
611 def setflag(self, key, flag): | 619 def setflag(self, key, flag): |
620 if flag not in _manifestflags: | |
621 raise TypeError(b"Invalid manifest flag set.") | |
612 self._lm[key] = self[key], flag | 622 self._lm[key] = self[key], flag |
613 | 623 |
614 def get(self, key, default=None): | 624 def get(self, key, default=None): |
615 try: | 625 try: |
616 return self._lm[key][0] | 626 return self._lm[key][0] |
1047 if dir not in self._dirs: | 1057 if dir not in self._dirs: |
1048 self._dirs[dir] = treemanifest(self._subpath(dir)) | 1058 self._dirs[dir] = treemanifest(self._subpath(dir)) |
1049 self._dirs[dir].__setitem__(subpath, n) | 1059 self._dirs[dir].__setitem__(subpath, n) |
1050 else: | 1060 else: |
1051 # manifest nodes are either 20 bytes or 32 bytes, | 1061 # manifest nodes are either 20 bytes or 32 bytes, |
1052 # depending on the hash in use. An extra byte is | 1062 # depending on the hash in use. Assert this as historically |
1053 # occasionally used by hg, but won't ever be | 1063 # sometimes extra bytes were added. |
1054 # persisted. Trim to 21 or 33 bytes as appropriate. | 1064 assert len(n) in (20, 32) |
1055 trim = 21 if len(n) < 25 else 33 | 1065 self._files[f] = n |
1056 self._files[f] = n[:trim] # to match manifestdict's behavior | |
1057 self._dirty = True | 1066 self._dirty = True |
1058 | 1067 |
1059 def _load(self): | 1068 def _load(self): |
1060 if self._loadfunc is not _noop: | 1069 if self._loadfunc is not _noop: |
1061 lf, self._loadfunc = self._loadfunc, _noop | 1070 lf, self._loadfunc = self._loadfunc, _noop |
1064 cf, self._copyfunc = self._copyfunc, _noop | 1073 cf, self._copyfunc = self._copyfunc, _noop |
1065 cf(self) | 1074 cf(self) |
1066 | 1075 |
1067 def setflag(self, f, flags): | 1076 def setflag(self, f, flags): |
1068 """Set the flags (symlink, executable) for path f.""" | 1077 """Set the flags (symlink, executable) for path f.""" |
1078 if flags not in _manifestflags: | |
1079 raise TypeError(b"Invalid manifest flag set.") | |
1069 self._load() | 1080 self._load() |
1070 dir, subpath = _splittopdir(f) | 1081 dir, subpath = _splittopdir(f) |
1071 if dir: | 1082 if dir: |
1072 self._loadlazy(dir) | 1083 self._loadlazy(dir) |
1073 if dir not in self._dirs: | 1084 if dir not in self._dirs: |