Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 47759:d7515d29761d stable 5.9rc0
branching: merge default into stable
This mark the start of the 5.9 freeze.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 21 Jul 2021 22:52:09 +0200 |
parents | 411dc27fd9fd bc8536e09a20 |
children | 60ccc86a12f3 |
comparison
equal
deleted
inserted
replaced
47054:29ea3b4c4f62 | 47759:d7515d29761d |
---|---|
1 # revlog.py - storage back-end for mercurial | 1 # revlog.py - storage back-end for mercurial |
2 # coding: utf8 | |
2 # | 3 # |
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> | 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> |
4 # | 5 # |
5 # This software may be used and distributed according to the terms of the | 6 # This software may be used and distributed according to the terms of the |
6 # GNU General Public License version 2 or any later version. | 7 # GNU General Public License version 2 or any later version. |
24 | 25 |
25 # import stuff from node for others to import from revlog | 26 # import stuff from node for others to import from revlog |
26 from .node import ( | 27 from .node import ( |
27 bin, | 28 bin, |
28 hex, | 29 hex, |
29 nullhex, | |
30 nullid, | |
31 nullrev, | 30 nullrev, |
32 sha1nodeconstants, | 31 sha1nodeconstants, |
33 short, | 32 short, |
34 wdirfilenodeids, | |
35 wdirhex, | |
36 wdirid, | |
37 wdirrev, | 33 wdirrev, |
38 ) | 34 ) |
39 from .i18n import _ | 35 from .i18n import _ |
40 from .pycompat import getattr | 36 from .pycompat import getattr |
41 from .revlogutils.constants import ( | 37 from .revlogutils.constants import ( |
38 ALL_KINDS, | |
39 CHANGELOGV2, | |
40 COMP_MODE_DEFAULT, | |
41 COMP_MODE_INLINE, | |
42 COMP_MODE_PLAIN, | |
43 FEATURES_BY_VERSION, | |
42 FLAG_GENERALDELTA, | 44 FLAG_GENERALDELTA, |
43 FLAG_INLINE_DATA, | 45 FLAG_INLINE_DATA, |
44 INDEX_ENTRY_V0, | |
45 INDEX_ENTRY_V1, | |
46 INDEX_ENTRY_V2, | |
47 INDEX_HEADER, | 46 INDEX_HEADER, |
47 KIND_CHANGELOG, | |
48 REVLOGV0, | 48 REVLOGV0, |
49 REVLOGV1, | 49 REVLOGV1, |
50 REVLOGV1_FLAGS, | 50 REVLOGV1_FLAGS, |
51 REVLOGV2, | 51 REVLOGV2, |
52 REVLOGV2_FLAGS, | 52 REVLOGV2_FLAGS, |
53 REVLOG_DEFAULT_FLAGS, | 53 REVLOG_DEFAULT_FLAGS, |
54 REVLOG_DEFAULT_FORMAT, | 54 REVLOG_DEFAULT_FORMAT, |
55 REVLOG_DEFAULT_VERSION, | 55 REVLOG_DEFAULT_VERSION, |
56 SUPPORTED_FLAGS, | |
56 ) | 57 ) |
57 from .revlogutils.flagutil import ( | 58 from .revlogutils.flagutil import ( |
58 REVIDX_DEFAULT_FLAGS, | 59 REVIDX_DEFAULT_FLAGS, |
59 REVIDX_ELLIPSIS, | 60 REVIDX_ELLIPSIS, |
60 REVIDX_EXTSTORED, | 61 REVIDX_EXTSTORED, |
61 REVIDX_FLAGS_ORDER, | 62 REVIDX_FLAGS_ORDER, |
62 REVIDX_HASCOPIESINFO, | 63 REVIDX_HASCOPIESINFO, |
63 REVIDX_ISCENSORED, | 64 REVIDX_ISCENSORED, |
64 REVIDX_RAWTEXT_CHANGING_FLAGS, | 65 REVIDX_RAWTEXT_CHANGING_FLAGS, |
65 REVIDX_SIDEDATA, | |
66 ) | 66 ) |
67 from .thirdparty import attr | 67 from .thirdparty import attr |
68 from . import ( | 68 from . import ( |
69 ancestor, | 69 ancestor, |
70 dagop, | 70 dagop, |
71 error, | 71 error, |
72 mdiff, | 72 mdiff, |
73 policy, | 73 policy, |
74 pycompat, | 74 pycompat, |
75 revlogutils, | |
75 templatefilters, | 76 templatefilters, |
76 util, | 77 util, |
77 ) | 78 ) |
78 from .interfaces import ( | 79 from .interfaces import ( |
79 repository, | 80 repository, |
80 util as interfaceutil, | 81 util as interfaceutil, |
81 ) | 82 ) |
82 from .revlogutils import ( | 83 from .revlogutils import ( |
83 deltas as deltautil, | 84 deltas as deltautil, |
85 docket as docketutil, | |
84 flagutil, | 86 flagutil, |
85 nodemap as nodemaputil, | 87 nodemap as nodemaputil, |
88 randomaccessfile, | |
89 revlogv0, | |
90 rewrite, | |
86 sidedata as sidedatautil, | 91 sidedata as sidedatautil, |
87 ) | 92 ) |
88 from .utils import ( | 93 from .utils import ( |
89 storageutil, | 94 storageutil, |
90 stringutil, | 95 stringutil, |
91 ) | 96 ) |
92 | 97 |
93 # blanked usage of all the name to prevent pyflakes constraints | 98 # blanked usage of all the name to prevent pyflakes constraints |
94 # We need these name available in the module for extensions. | 99 # We need these name available in the module for extensions. |
100 | |
95 REVLOGV0 | 101 REVLOGV0 |
96 REVLOGV1 | 102 REVLOGV1 |
97 REVLOGV2 | 103 REVLOGV2 |
98 FLAG_INLINE_DATA | 104 FLAG_INLINE_DATA |
99 FLAG_GENERALDELTA | 105 FLAG_GENERALDELTA |
102 REVLOG_DEFAULT_VERSION | 108 REVLOG_DEFAULT_VERSION |
103 REVLOGV1_FLAGS | 109 REVLOGV1_FLAGS |
104 REVLOGV2_FLAGS | 110 REVLOGV2_FLAGS |
105 REVIDX_ISCENSORED | 111 REVIDX_ISCENSORED |
106 REVIDX_ELLIPSIS | 112 REVIDX_ELLIPSIS |
107 REVIDX_SIDEDATA | |
108 REVIDX_HASCOPIESINFO | 113 REVIDX_HASCOPIESINFO |
109 REVIDX_EXTSTORED | 114 REVIDX_EXTSTORED |
110 REVIDX_DEFAULT_FLAGS | 115 REVIDX_DEFAULT_FLAGS |
111 REVIDX_FLAGS_ORDER | 116 REVIDX_FLAGS_ORDER |
112 REVIDX_RAWTEXT_CHANGING_FLAGS | 117 REVIDX_RAWTEXT_CHANGING_FLAGS |
119 # Aliased for performance. | 124 # Aliased for performance. |
120 _zlibdecompress = zlib.decompress | 125 _zlibdecompress = zlib.decompress |
121 | 126 |
122 # max size of revlog with inline data | 127 # max size of revlog with inline data |
123 _maxinline = 131072 | 128 _maxinline = 131072 |
124 _chunksize = 1048576 | |
125 | 129 |
126 # Flag processors for REVIDX_ELLIPSIS. | 130 # Flag processors for REVIDX_ELLIPSIS. |
127 def ellipsisreadprocessor(rl, text): | 131 def ellipsisreadprocessor(rl, text): |
128 return text, False | 132 return text, False |
129 | 133 |
139 ellipsisprocessor = ( | 143 ellipsisprocessor = ( |
140 ellipsisreadprocessor, | 144 ellipsisreadprocessor, |
141 ellipsiswriteprocessor, | 145 ellipsiswriteprocessor, |
142 ellipsisrawprocessor, | 146 ellipsisrawprocessor, |
143 ) | 147 ) |
144 | |
145 | |
146 def getoffset(q): | |
147 return int(q >> 16) | |
148 | |
149 | |
150 def gettype(q): | |
151 return int(q & 0xFFFF) | |
152 | |
153 | |
154 def offset_type(offset, type): | |
155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0: | |
156 raise ValueError(b'unknown revlog index flags') | |
157 return int(int(offset) << 16 | type) | |
158 | 148 |
159 | 149 |
160 def _verify_revision(rl, skipflags, state, node): | 150 def _verify_revision(rl, skipflags, state, node): |
161 """Verify the integrity of the given revlog ``node`` while providing a hook | 151 """Verify the integrity of the given revlog ``node`` while providing a hook |
162 point for extensions to influence the operation.""" | 152 point for extensions to influence the operation.""" |
173 # people using pure don't really have performance consideration (and a | 163 # people using pure don't really have performance consideration (and a |
174 # wheelbarrow of other slowness source) | 164 # wheelbarrow of other slowness source) |
175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr( | 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr( |
176 parsers, 'BaseIndexObject' | 166 parsers, 'BaseIndexObject' |
177 ) | 167 ) |
178 | |
179 | |
180 @attr.s(slots=True, frozen=True) | |
181 class _revisioninfo(object): | |
182 """Information about a revision that allows building its fulltext | |
183 node: expected hash of the revision | |
184 p1, p2: parent revs of the revision | |
185 btext: built text cache consisting of a one-element list | |
186 cachedelta: (baserev, uncompressed_delta) or None | |
187 flags: flags associated to the revision storage | |
188 | |
189 One of btext[0] or cachedelta must be set. | |
190 """ | |
191 | |
192 node = attr.ib() | |
193 p1 = attr.ib() | |
194 p2 = attr.ib() | |
195 btext = attr.ib() | |
196 textlen = attr.ib() | |
197 cachedelta = attr.ib() | |
198 flags = attr.ib() | |
199 | 168 |
200 | 169 |
201 @interfaceutil.implementer(repository.irevisiondelta) | 170 @interfaceutil.implementer(repository.irevisiondelta) |
202 @attr.s(slots=True) | 171 @attr.s(slots=True) |
203 class revlogrevisiondelta(object): | 172 class revlogrevisiondelta(object): |
208 flags = attr.ib() | 177 flags = attr.ib() |
209 baserevisionsize = attr.ib() | 178 baserevisionsize = attr.ib() |
210 revision = attr.ib() | 179 revision = attr.ib() |
211 delta = attr.ib() | 180 delta = attr.ib() |
212 sidedata = attr.ib() | 181 sidedata = attr.ib() |
182 protocol_flags = attr.ib() | |
213 linknode = attr.ib(default=None) | 183 linknode = attr.ib(default=None) |
214 | 184 |
215 | 185 |
216 @interfaceutil.implementer(repository.iverifyproblem) | 186 @interfaceutil.implementer(repository.iverifyproblem) |
217 @attr.s(frozen=True) | 187 @attr.s(frozen=True) |
219 warning = attr.ib(default=None) | 189 warning = attr.ib(default=None) |
220 error = attr.ib(default=None) | 190 error = attr.ib(default=None) |
221 node = attr.ib(default=None) | 191 node = attr.ib(default=None) |
222 | 192 |
223 | 193 |
224 class revlogoldindex(list): | 194 def parse_index_v1(data, inline): |
225 entry_size = INDEX_ENTRY_V0.size | 195 # call the C implementation to parse the index data |
226 | 196 index, cache = parsers.parse_index2(data, inline) |
227 @property | 197 return index, cache |
228 def nodemap(self): | 198 |
229 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" | 199 |
230 util.nouideprecwarn(msg, b'5.3', stacklevel=2) | 200 def parse_index_v2(data, inline): |
231 return self._nodemap | 201 # call the C implementation to parse the index data |
232 | 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True) |
233 @util.propertycache | 203 return index, cache |
234 def _nodemap(self): | 204 |
235 nodemap = nodemaputil.NodeMap({nullid: nullrev}) | 205 |
236 for r in range(0, len(self)): | 206 def parse_index_cl_v2(data, inline): |
237 n = self[r][7] | 207 # call the C implementation to parse the index data |
238 nodemap[n] = r | 208 assert not inline |
239 return nodemap | 209 from .pure.parsers import parse_index_cl_v2 |
240 | 210 |
241 def has_node(self, node): | 211 index, cache = parse_index_cl_v2(data) |
242 """return True if the node exist in the index""" | 212 return index, cache |
243 return node in self._nodemap | 213 |
244 | 214 |
245 def rev(self, node): | 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'): |
246 """return a revision for a node | 216 |
247 | 217 def parse_index_v1_nodemap(data, inline): |
248 If the node is unknown, raise a RevlogError""" | 218 index, cache = parsers.parse_index_devel_nodemap(data, inline) |
249 return self._nodemap[node] | 219 return index, cache |
250 | 220 |
251 def get_rev(self, node): | 221 |
252 """return a revision for a node | 222 else: |
253 | 223 parse_index_v1_nodemap = None |
254 If the node is unknown, return None""" | 224 |
255 return self._nodemap.get(node) | 225 |
256 | 226 def parse_index_v1_mixed(data, inline): |
257 def append(self, tup): | 227 index, cache = parse_index_v1(data, inline) |
258 self._nodemap[tup[7]] = len(self) | 228 return rustrevlog.MixedIndex(index), cache |
259 super(revlogoldindex, self).append(tup) | |
260 | |
261 def __delitem__(self, i): | |
262 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: | |
263 raise ValueError(b"deleting slices only supports a:-1 with step 1") | |
264 for r in pycompat.xrange(i.start, len(self)): | |
265 del self._nodemap[self[r][7]] | |
266 super(revlogoldindex, self).__delitem__(i) | |
267 | |
268 def clearcaches(self): | |
269 self.__dict__.pop('_nodemap', None) | |
270 | |
271 def __getitem__(self, i): | |
272 if i == -1: | |
273 return (0, 0, 0, -1, -1, -1, -1, nullid) | |
274 return list.__getitem__(self, i) | |
275 | |
276 | |
277 class revlogoldio(object): | |
278 def parseindex(self, data, inline): | |
279 s = INDEX_ENTRY_V0.size | |
280 index = [] | |
281 nodemap = nodemaputil.NodeMap({nullid: nullrev}) | |
282 n = off = 0 | |
283 l = len(data) | |
284 while off + s <= l: | |
285 cur = data[off : off + s] | |
286 off += s | |
287 e = INDEX_ENTRY_V0.unpack(cur) | |
288 # transform to revlogv1 format | |
289 e2 = ( | |
290 offset_type(e[0], 0), | |
291 e[1], | |
292 -1, | |
293 e[2], | |
294 e[3], | |
295 nodemap.get(e[4], nullrev), | |
296 nodemap.get(e[5], nullrev), | |
297 e[6], | |
298 ) | |
299 index.append(e2) | |
300 nodemap[e[6]] = n | |
301 n += 1 | |
302 | |
303 index = revlogoldindex(index) | |
304 return index, None | |
305 | |
306 def packentry(self, entry, node, version, rev): | |
307 """return the binary representation of an entry | |
308 | |
309 entry: a tuple containing all the values (see index.__getitem__) | |
310 node: a callback to convert a revision to nodeid | |
311 version: the changelog version | |
312 rev: the revision number | |
313 """ | |
314 if gettype(entry[0]): | |
315 raise error.RevlogError( | |
316 _(b'index entry flags need revlog version 1') | |
317 ) | |
318 e2 = ( | |
319 getoffset(entry[0]), | |
320 entry[1], | |
321 entry[3], | |
322 entry[4], | |
323 node(entry[5]), | |
324 node(entry[6]), | |
325 entry[7], | |
326 ) | |
327 return INDEX_ENTRY_V0.pack(*e2) | |
328 | 229 |
329 | 230 |
330 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte | 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte |
331 # signed integer) | 232 # signed integer) |
332 _maxentrysize = 0x7FFFFFFF | 233 _maxentrysize = 0x7FFFFFFF |
333 | 234 |
334 | 235 FILE_TOO_SHORT_MSG = _( |
335 class revlogio(object): | 236 b'cannot read from revlog %s;' |
336 def parseindex(self, data, inline): | 237 b' expected %d bytes from offset %d, data size is %d' |
337 # call the C implementation to parse the index data | 238 ) |
338 index, cache = parsers.parse_index2(data, inline) | |
339 return index, cache | |
340 | |
341 def packentry(self, entry, node, version, rev): | |
342 p = INDEX_ENTRY_V1.pack(*entry) | |
343 if rev == 0: | |
344 p = INDEX_HEADER.pack(version) + p[4:] | |
345 return p | |
346 | |
347 | |
348 class revlogv2io(object): | |
349 def parseindex(self, data, inline): | |
350 index, cache = parsers.parse_index2(data, inline, revlogv2=True) | |
351 return index, cache | |
352 | |
353 def packentry(self, entry, node, version, rev): | |
354 p = INDEX_ENTRY_V2.pack(*entry) | |
355 if rev == 0: | |
356 p = INDEX_HEADER.pack(version) + p[4:] | |
357 return p | |
358 | |
359 | |
360 NodemapRevlogIO = None | |
361 | |
362 if util.safehasattr(parsers, 'parse_index_devel_nodemap'): | |
363 | |
364 class NodemapRevlogIO(revlogio): | |
365 """A debug oriented IO class that return a PersistentNodeMapIndexObject | |
366 | |
367 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature. | |
368 """ | |
369 | |
370 def parseindex(self, data, inline): | |
371 index, cache = parsers.parse_index_devel_nodemap(data, inline) | |
372 return index, cache | |
373 | |
374 | |
375 class rustrevlogio(revlogio): | |
376 def parseindex(self, data, inline): | |
377 index, cache = super(rustrevlogio, self).parseindex(data, inline) | |
378 return rustrevlog.MixedIndex(index), cache | |
379 | 239 |
380 | 240 |
381 class revlog(object): | 241 class revlog(object): |
382 """ | 242 """ |
383 the underlying revision storage object | 243 the underlying revision storage object |
417 | 277 |
418 `concurrencychecker` is an optional function that receives 3 arguments: a | 278 `concurrencychecker` is an optional function that receives 3 arguments: a |
419 file handle, a filename, and an expected position. It should check whether | 279 file handle, a filename, and an expected position. It should check whether |
420 the current position in the file handle is valid, and log/warn/fail (by | 280 the current position in the file handle is valid, and log/warn/fail (by |
421 raising). | 281 raising). |
282 | |
283 See mercurial/revlogutils/contants.py for details about the content of an | |
284 index entry. | |
422 """ | 285 """ |
423 | 286 |
424 _flagserrorclass = error.RevlogError | 287 _flagserrorclass = error.RevlogError |
425 | 288 |
426 def __init__( | 289 def __init__( |
427 self, | 290 self, |
428 opener, | 291 opener, |
429 indexfile, | 292 target, |
430 datafile=None, | 293 radix, |
294 postfix=None, # only exist for `tmpcensored` now | |
431 checkambig=False, | 295 checkambig=False, |
432 mmaplargeindex=False, | 296 mmaplargeindex=False, |
433 censorable=False, | 297 censorable=False, |
434 upperboundcomp=None, | 298 upperboundcomp=None, |
435 persistentnodemap=False, | 299 persistentnodemap=False, |
436 concurrencychecker=None, | 300 concurrencychecker=None, |
301 trypending=False, | |
437 ): | 302 ): |
438 """ | 303 """ |
439 create a revlog object | 304 create a revlog object |
440 | 305 |
441 opener is a function that abstracts the file opening operation | 306 opener is a function that abstracts the file opening operation |
442 and can be used to implement COW semantics or the like. | 307 and can be used to implement COW semantics or the like. |
443 | 308 |
309 `target`: a (KIND, ID) tuple that identify the content stored in | |
310 this revlog. It help the rest of the code to understand what the revlog | |
311 is about without having to resort to heuristic and index filename | |
312 analysis. Note: that this must be reliably be set by normal code, but | |
313 that test, debug, or performance measurement code might not set this to | |
314 accurate value. | |
444 """ | 315 """ |
445 self.upperboundcomp = upperboundcomp | 316 self.upperboundcomp = upperboundcomp |
446 self.indexfile = indexfile | 317 |
447 self.datafile = datafile or (indexfile[:-2] + b".d") | 318 self.radix = radix |
448 self.nodemap_file = None | 319 |
320 self._docket_file = None | |
321 self._indexfile = None | |
322 self._datafile = None | |
323 self._sidedatafile = None | |
324 self._nodemap_file = None | |
325 self.postfix = postfix | |
326 self._trypending = trypending | |
327 self.opener = opener | |
449 if persistentnodemap: | 328 if persistentnodemap: |
450 self.nodemap_file = nodemaputil.get_nodemap_file( | 329 self._nodemap_file = nodemaputil.get_nodemap_file(self) |
451 opener, self.indexfile | 330 |
452 ) | 331 assert target[0] in ALL_KINDS |
453 | 332 assert len(target) == 2 |
454 self.opener = opener | 333 self.target = target |
455 # When True, indexfile is opened with checkambig=True at writing, to | 334 # When True, indexfile is opened with checkambig=True at writing, to |
456 # avoid file stat ambiguity. | 335 # avoid file stat ambiguity. |
457 self._checkambig = checkambig | 336 self._checkambig = checkambig |
458 self._mmaplargeindex = mmaplargeindex | 337 self._mmaplargeindex = mmaplargeindex |
459 self._censorable = censorable | 338 self._censorable = censorable |
466 # How much data to read and cache into the raw revlog data cache. | 345 # How much data to read and cache into the raw revlog data cache. |
467 self._chunkcachesize = 65536 | 346 self._chunkcachesize = 65536 |
468 self._maxchainlen = None | 347 self._maxchainlen = None |
469 self._deltabothparents = True | 348 self._deltabothparents = True |
470 self.index = None | 349 self.index = None |
350 self._docket = None | |
471 self._nodemap_docket = None | 351 self._nodemap_docket = None |
472 # Mapping of partial identifiers to full nodes. | 352 # Mapping of partial identifiers to full nodes. |
473 self._pcache = {} | 353 self._pcache = {} |
474 # Mapping of revision integer to full node. | 354 # Mapping of revision integer to full node. |
475 self._compengine = b'zlib' | 355 self._compengine = b'zlib' |
476 self._compengineopts = {} | 356 self._compengineopts = {} |
477 self._maxdeltachainspan = -1 | 357 self._maxdeltachainspan = -1 |
478 self._withsparseread = False | 358 self._withsparseread = False |
479 self._sparserevlog = False | 359 self._sparserevlog = False |
360 self.hassidedata = False | |
480 self._srdensitythreshold = 0.50 | 361 self._srdensitythreshold = 0.50 |
481 self._srmingapsize = 262144 | 362 self._srmingapsize = 262144 |
482 | 363 |
483 # Make copy of flag processors so each revlog instance can support | 364 # Make copy of flag processors so each revlog instance can support |
484 # custom flags. | 365 # custom flags. |
485 self._flagprocessors = dict(flagutil.flagprocessors) | 366 self._flagprocessors = dict(flagutil.flagprocessors) |
486 | 367 |
487 # 2-tuple of file handles being used for active writing. | 368 # 3-tuple of file handles being used for active writing. |
488 self._writinghandles = None | 369 self._writinghandles = None |
370 # prevent nesting of addgroup | |
371 self._adding_group = None | |
489 | 372 |
490 self._loadindex() | 373 self._loadindex() |
491 | 374 |
492 self._concurrencychecker = concurrencychecker | 375 self._concurrencychecker = concurrencychecker |
493 | 376 |
494 def _loadindex(self): | 377 def _init_opts(self): |
378 """process options (from above/config) to setup associated default revlog mode | |
379 | |
380 These values might be affected when actually reading on disk information. | |
381 | |
382 The relevant values are returned for use in _loadindex(). | |
383 | |
384 * newversionflags: | |
385 version header to use if we need to create a new revlog | |
386 | |
387 * mmapindexthreshold: | |
388 minimal index size for start to use mmap | |
389 | |
390 * force_nodemap: | |
391 force the usage of a "development" version of the nodemap code | |
392 """ | |
495 mmapindexthreshold = None | 393 mmapindexthreshold = None |
496 opts = self.opener.options | 394 opts = self.opener.options |
497 | 395 |
498 if b'revlogv2' in opts: | 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: |
499 newversionflags = REVLOGV2 | FLAG_INLINE_DATA | 397 new_header = CHANGELOGV2 |
398 elif b'revlogv2' in opts: | |
399 new_header = REVLOGV2 | |
500 elif b'revlogv1' in opts: | 400 elif b'revlogv1' in opts: |
501 newversionflags = REVLOGV1 | FLAG_INLINE_DATA | 401 new_header = REVLOGV1 | FLAG_INLINE_DATA |
502 if b'generaldelta' in opts: | 402 if b'generaldelta' in opts: |
503 newversionflags |= FLAG_GENERALDELTA | 403 new_header |= FLAG_GENERALDELTA |
504 elif b'revlogv0' in self.opener.options: | 404 elif b'revlogv0' in self.opener.options: |
505 newversionflags = REVLOGV0 | 405 new_header = REVLOGV0 |
506 else: | 406 else: |
507 newversionflags = REVLOG_DEFAULT_VERSION | 407 new_header = REVLOG_DEFAULT_VERSION |
508 | 408 |
509 if b'chunkcachesize' in opts: | 409 if b'chunkcachesize' in opts: |
510 self._chunkcachesize = opts[b'chunkcachesize'] | 410 self._chunkcachesize = opts[b'chunkcachesize'] |
511 if b'maxchainlen' in opts: | 411 if b'maxchainlen' in opts: |
512 self._maxchainlen = opts[b'maxchainlen'] | 412 self._maxchainlen = opts[b'maxchainlen'] |
524 self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] | 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] |
525 if b'maxdeltachainspan' in opts: | 425 if b'maxdeltachainspan' in opts: |
526 self._maxdeltachainspan = opts[b'maxdeltachainspan'] | 426 self._maxdeltachainspan = opts[b'maxdeltachainspan'] |
527 if self._mmaplargeindex and b'mmapindexthreshold' in opts: | 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts: |
528 mmapindexthreshold = opts[b'mmapindexthreshold'] | 428 mmapindexthreshold = opts[b'mmapindexthreshold'] |
529 self.hassidedata = bool(opts.get(b'side-data', False)) | |
530 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) | 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) |
531 withsparseread = bool(opts.get(b'with-sparse-read', False)) | 430 withsparseread = bool(opts.get(b'with-sparse-read', False)) |
532 # sparse-revlog forces sparse-read | 431 # sparse-revlog forces sparse-read |
533 self._withsparseread = self._sparserevlog or withsparseread | 432 self._withsparseread = self._sparserevlog or withsparseread |
534 if b'sparse-read-density-threshold' in opts: | 433 if b'sparse-read-density-threshold' in opts: |
552 elif self._chunkcachesize & (self._chunkcachesize - 1): | 451 elif self._chunkcachesize & (self._chunkcachesize - 1): |
553 raise error.RevlogError( | 452 raise error.RevlogError( |
554 _(b'revlog chunk cache size %r is not a power of 2') | 453 _(b'revlog chunk cache size %r is not a power of 2') |
555 % self._chunkcachesize | 454 % self._chunkcachesize |
556 ) | 455 ) |
557 | 456 force_nodemap = opts.get(b'devel-force-nodemap', False) |
558 indexdata = b'' | 457 return new_header, mmapindexthreshold, force_nodemap |
559 self._initempty = True | 458 |
459 def _get_data(self, filepath, mmap_threshold, size=None): | |
460 """return a file content with or without mmap | |
461 | |
462 If the file is missing return the empty string""" | |
560 try: | 463 try: |
561 with self._indexfp() as f: | 464 with self.opener(filepath) as fp: |
562 if ( | 465 if mmap_threshold is not None: |
563 mmapindexthreshold is not None | 466 file_size = self.opener.fstat(fp).st_size |
564 and self.opener.fstat(f).st_size >= mmapindexthreshold | 467 if file_size >= mmap_threshold: |
565 ): | 468 if size is not None: |
566 # TODO: should .close() to release resources without | 469 # avoid potentiel mmap crash |
567 # relying on Python GC | 470 size = min(file_size, size) |
568 indexdata = util.buffer(util.mmapread(f)) | 471 # TODO: should .close() to release resources without |
472 # relying on Python GC | |
473 if size is None: | |
474 return util.buffer(util.mmapread(fp)) | |
475 else: | |
476 return util.buffer(util.mmapread(fp, size)) | |
477 if size is None: | |
478 return fp.read() | |
569 else: | 479 else: |
570 indexdata = f.read() | 480 return fp.read(size) |
571 if len(indexdata) > 0: | |
572 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0] | |
573 self._initempty = False | |
574 else: | |
575 versionflags = newversionflags | |
576 except IOError as inst: | 481 except IOError as inst: |
577 if inst.errno != errno.ENOENT: | 482 if inst.errno != errno.ENOENT: |
578 raise | 483 raise |
579 | 484 return b'' |
580 versionflags = newversionflags | 485 |
581 | 486 def _loadindex(self, docket=None): |
582 self.version = versionflags | 487 |
583 | 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts() |
584 flags = versionflags & ~0xFFFF | 489 |
585 fmt = versionflags & 0xFFFF | 490 if self.postfix is not None: |
586 | 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix) |
587 if fmt == REVLOGV0: | 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix): |
588 if flags: | 493 entry_point = b'%s.i.a' % self.radix |
589 raise error.RevlogError( | 494 else: |
590 _(b'unknown flags (%#04x) in version %d revlog %s') | 495 entry_point = b'%s.i' % self.radix |
591 % (flags >> 16, fmt, self.indexfile) | 496 |
497 if docket is not None: | |
498 self._docket = docket | |
499 self._docket_file = entry_point | |
500 else: | |
501 entry_data = b'' | |
502 self._initempty = True | |
503 entry_data = self._get_data(entry_point, mmapindexthreshold) | |
504 if len(entry_data) > 0: | |
505 header = INDEX_HEADER.unpack(entry_data[:4])[0] | |
506 self._initempty = False | |
507 else: | |
508 header = new_header | |
509 | |
510 self._format_flags = header & ~0xFFFF | |
511 self._format_version = header & 0xFFFF | |
512 | |
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version) | |
514 if supported_flags is None: | |
515 msg = _(b'unknown version (%d) in revlog %s') | |
516 msg %= (self._format_version, self.display_id) | |
517 raise error.RevlogError(msg) | |
518 elif self._format_flags & ~supported_flags: | |
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s') | |
520 display_flag = self._format_flags >> 16 | |
521 msg %= (display_flag, self._format_version, self.display_id) | |
522 raise error.RevlogError(msg) | |
523 | |
524 features = FEATURES_BY_VERSION[self._format_version] | |
525 self._inline = features[b'inline'](self._format_flags) | |
526 self._generaldelta = features[b'generaldelta'](self._format_flags) | |
527 self.hassidedata = features[b'sidedata'] | |
528 | |
529 if not features[b'docket']: | |
530 self._indexfile = entry_point | |
531 index_data = entry_data | |
532 else: | |
533 self._docket_file = entry_point | |
534 if self._initempty: | |
535 self._docket = docketutil.default_docket(self, header) | |
536 else: | |
537 self._docket = docketutil.parse_docket( | |
538 self, entry_data, use_pending=self._trypending | |
539 ) | |
540 | |
541 if self._docket is not None: | |
542 self._indexfile = self._docket.index_filepath() | |
543 index_data = b'' | |
544 index_size = self._docket.index_end | |
545 if index_size > 0: | |
546 index_data = self._get_data( | |
547 self._indexfile, mmapindexthreshold, size=index_size | |
592 ) | 548 ) |
593 | 549 if len(index_data) < index_size: |
594 self._inline = False | 550 msg = _(b'too few index data for %s: got %d, expected %d') |
595 self._generaldelta = False | 551 msg %= (self.display_id, len(index_data), index_size) |
596 | 552 raise error.RevlogError(msg) |
597 elif fmt == REVLOGV1: | 553 |
598 if flags & ~REVLOGV1_FLAGS: | |
599 raise error.RevlogError( | |
600 _(b'unknown flags (%#04x) in version %d revlog %s') | |
601 % (flags >> 16, fmt, self.indexfile) | |
602 ) | |
603 | |
604 self._inline = versionflags & FLAG_INLINE_DATA | |
605 self._generaldelta = versionflags & FLAG_GENERALDELTA | |
606 | |
607 elif fmt == REVLOGV2: | |
608 if flags & ~REVLOGV2_FLAGS: | |
609 raise error.RevlogError( | |
610 _(b'unknown flags (%#04x) in version %d revlog %s') | |
611 % (flags >> 16, fmt, self.indexfile) | |
612 ) | |
613 | |
614 # There is a bug in the transaction handling when going from an | |
615 # inline revlog to a separate index and data file. Turn it off until | |
616 # it's fixed, since v2 revlogs sometimes get rewritten on exchange. | |
617 # See issue6485 | |
618 self._inline = False | 554 self._inline = False |
619 # generaldelta implied by version 2 revlogs. | 555 # generaldelta implied by version 2 revlogs. |
620 self._generaldelta = True | 556 self._generaldelta = True |
621 | 557 # the logic for persistent nodemap will be dealt with within the |
558 # main docket, so disable it for now. | |
559 self._nodemap_file = None | |
560 | |
561 if self._docket is not None: | |
562 self._datafile = self._docket.data_filepath() | |
563 self._sidedatafile = self._docket.sidedata_filepath() | |
564 elif self.postfix is None: | |
565 self._datafile = b'%s.d' % self.radix | |
622 else: | 566 else: |
623 raise error.RevlogError( | 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix) |
624 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile) | |
625 ) | |
626 | 568 |
627 self.nodeconstants = sha1nodeconstants | 569 self.nodeconstants = sha1nodeconstants |
628 self.nullid = self.nodeconstants.nullid | 570 self.nullid = self.nodeconstants.nullid |
629 | 571 |
630 # sparse-revlog can't be on without general-delta (issue6056) | 572 # sparse-revlog can't be on without general-delta (issue6056) |
632 self._sparserevlog = False | 574 self._sparserevlog = False |
633 | 575 |
634 self._storedeltachains = True | 576 self._storedeltachains = True |
635 | 577 |
636 devel_nodemap = ( | 578 devel_nodemap = ( |
637 self.nodemap_file | 579 self._nodemap_file |
638 and opts.get(b'devel-force-nodemap', False) | 580 and force_nodemap |
639 and NodemapRevlogIO is not None | 581 and parse_index_v1_nodemap is not None |
640 ) | 582 ) |
641 | 583 |
642 use_rust_index = False | 584 use_rust_index = False |
643 if rustrevlog is not None: | 585 if rustrevlog is not None: |
644 if self.nodemap_file is not None: | 586 if self._nodemap_file is not None: |
645 use_rust_index = True | 587 use_rust_index = True |
646 else: | 588 else: |
647 use_rust_index = self.opener.options.get(b'rust.index') | 589 use_rust_index = self.opener.options.get(b'rust.index') |
648 | 590 |
649 self._io = revlogio() | 591 self._parse_index = parse_index_v1 |
650 if self.version == REVLOGV0: | 592 if self._format_version == REVLOGV0: |
651 self._io = revlogoldio() | 593 self._parse_index = revlogv0.parse_index_v0 |
652 elif fmt == REVLOGV2: | 594 elif self._format_version == REVLOGV2: |
653 self._io = revlogv2io() | 595 self._parse_index = parse_index_v2 |
596 elif self._format_version == CHANGELOGV2: | |
597 self._parse_index = parse_index_cl_v2 | |
654 elif devel_nodemap: | 598 elif devel_nodemap: |
655 self._io = NodemapRevlogIO() | 599 self._parse_index = parse_index_v1_nodemap |
656 elif use_rust_index: | 600 elif use_rust_index: |
657 self._io = rustrevlogio() | 601 self._parse_index = parse_index_v1_mixed |
658 try: | 602 try: |
659 d = self._io.parseindex(indexdata, self._inline) | 603 d = self._parse_index(index_data, self._inline) |
660 index, _chunkcache = d | 604 index, chunkcache = d |
661 use_nodemap = ( | 605 use_nodemap = ( |
662 not self._inline | 606 not self._inline |
663 and self.nodemap_file is not None | 607 and self._nodemap_file is not None |
664 and util.safehasattr(index, 'update_nodemap_data') | 608 and util.safehasattr(index, 'update_nodemap_data') |
665 ) | 609 ) |
666 if use_nodemap: | 610 if use_nodemap: |
667 nodemap_data = nodemaputil.persisted_data(self) | 611 nodemap_data = nodemaputil.persisted_data(self) |
668 if nodemap_data is not None: | 612 if nodemap_data is not None: |
674 # no changelog tampering | 618 # no changelog tampering |
675 self._nodemap_docket = docket | 619 self._nodemap_docket = docket |
676 index.update_nodemap_data(*nodemap_data) | 620 index.update_nodemap_data(*nodemap_data) |
677 except (ValueError, IndexError): | 621 except (ValueError, IndexError): |
678 raise error.RevlogError( | 622 raise error.RevlogError( |
679 _(b"index %s is corrupted") % self.indexfile | 623 _(b"index %s is corrupted") % self.display_id |
680 ) | 624 ) |
681 self.index, self._chunkcache = d | 625 self.index = index |
682 if not self._chunkcache: | 626 self._segmentfile = randomaccessfile.randomaccessfile( |
683 self._chunkclear() | 627 self.opener, |
628 (self._indexfile if self._inline else self._datafile), | |
629 self._chunkcachesize, | |
630 chunkcache, | |
631 ) | |
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile( | |
633 self.opener, | |
634 self._sidedatafile, | |
635 self._chunkcachesize, | |
636 ) | |
684 # revnum -> (chain-length, sum-delta-length) | 637 # revnum -> (chain-length, sum-delta-length) |
685 self._chaininfocache = util.lrucachedict(500) | 638 self._chaininfocache = util.lrucachedict(500) |
686 # revlog header -> revlog compressor | 639 # revlog header -> revlog compressor |
687 self._decompressors = {} | 640 self._decompressors = {} |
688 | 641 |
689 @util.propertycache | 642 @util.propertycache |
643 def revlog_kind(self): | |
644 return self.target[0] | |
645 | |
646 @util.propertycache | |
647 def display_id(self): | |
648 """The public facing "ID" of the revlog that we use in message""" | |
649 # Maybe we should build a user facing representation of | |
650 # revlog.target instead of using `self.radix` | |
651 return self.radix | |
652 | |
653 def _get_decompressor(self, t): | |
654 try: | |
655 compressor = self._decompressors[t] | |
656 except KeyError: | |
657 try: | |
658 engine = util.compengines.forrevlogheader(t) | |
659 compressor = engine.revlogcompressor(self._compengineopts) | |
660 self._decompressors[t] = compressor | |
661 except KeyError: | |
662 raise error.RevlogError( | |
663 _(b'unknown compression type %s') % binascii.hexlify(t) | |
664 ) | |
665 return compressor | |
666 | |
667 @util.propertycache | |
690 def _compressor(self): | 668 def _compressor(self): |
691 engine = util.compengines[self._compengine] | 669 engine = util.compengines[self._compengine] |
692 return engine.revlogcompressor(self._compengineopts) | 670 return engine.revlogcompressor(self._compengineopts) |
693 | 671 |
694 def _indexfp(self, mode=b'r'): | 672 @util.propertycache |
673 def _decompressor(self): | |
674 """the default decompressor""" | |
675 if self._docket is None: | |
676 return None | |
677 t = self._docket.default_compression_header | |
678 c = self._get_decompressor(t) | |
679 return c.decompress | |
680 | |
681 def _indexfp(self): | |
695 """file object for the revlog's index file""" | 682 """file object for the revlog's index file""" |
696 args = {'mode': mode} | 683 return self.opener(self._indexfile, mode=b"r") |
697 if mode != b'r': | 684 |
698 args['checkambig'] = self._checkambig | 685 def __index_write_fp(self): |
699 if mode == b'w': | 686 # You should not use this directly and use `_writing` instead |
700 args['atomictemp'] = True | 687 try: |
701 return self.opener(self.indexfile, **args) | 688 f = self.opener( |
689 self._indexfile, mode=b"r+", checkambig=self._checkambig | |
690 ) | |
691 if self._docket is None: | |
692 f.seek(0, os.SEEK_END) | |
693 else: | |
694 f.seek(self._docket.index_end, os.SEEK_SET) | |
695 return f | |
696 except IOError as inst: | |
697 if inst.errno != errno.ENOENT: | |
698 raise | |
699 return self.opener( | |
700 self._indexfile, mode=b"w+", checkambig=self._checkambig | |
701 ) | |
702 | |
703 def __index_new_fp(self): | |
704 # You should not use this unless you are upgrading from inline revlog | |
705 return self.opener( | |
706 self._indexfile, | |
707 mode=b"w", | |
708 checkambig=self._checkambig, | |
709 atomictemp=True, | |
710 ) | |
702 | 711 |
703 def _datafp(self, mode=b'r'): | 712 def _datafp(self, mode=b'r'): |
704 """file object for the revlog's data file""" | 713 """file object for the revlog's data file""" |
705 return self.opener(self.datafile, mode=mode) | 714 return self.opener(self._datafile, mode=mode) |
706 | 715 |
707 @contextlib.contextmanager | 716 @contextlib.contextmanager |
708 def _datareadfp(self, existingfp=None): | 717 def _sidedatareadfp(self): |
709 """file object suitable to read data""" | 718 """file object suitable to read sidedata""" |
710 # Use explicit file handle, if given. | 719 if self._writinghandles: |
711 if existingfp is not None: | 720 yield self._writinghandles[2] |
712 yield existingfp | |
713 | |
714 # Use a file handle being actively used for writes, if available. | |
715 # There is some danger to doing this because reads will seek the | |
716 # file. However, _writeentry() performs a SEEK_END before all writes, | |
717 # so we should be safe. | |
718 elif self._writinghandles: | |
719 if self._inline: | |
720 yield self._writinghandles[0] | |
721 else: | |
722 yield self._writinghandles[1] | |
723 | |
724 # Otherwise open a new file handle. | |
725 else: | 721 else: |
726 if self._inline: | 722 with self.opener(self._sidedatafile) as fp: |
727 func = self._indexfp | |
728 else: | |
729 func = self._datafp | |
730 with func() as fp: | |
731 yield fp | 723 yield fp |
732 | 724 |
733 def tiprev(self): | 725 def tiprev(self): |
734 return len(self.index) - 1 | 726 return len(self.index) - 1 |
735 | 727 |
783 ): | 775 ): |
784 return False | 776 return False |
785 return True | 777 return True |
786 | 778 |
787 def update_caches(self, transaction): | 779 def update_caches(self, transaction): |
788 if self.nodemap_file is not None: | 780 if self._nodemap_file is not None: |
789 if transaction is None: | 781 if transaction is None: |
790 nodemaputil.update_persistent_nodemap(self) | 782 nodemaputil.update_persistent_nodemap(self) |
791 else: | 783 else: |
792 nodemaputil.setup_persistent_nodemap(transaction, self) | 784 nodemaputil.setup_persistent_nodemap(transaction, self) |
793 | 785 |
794 def clearcaches(self): | 786 def clearcaches(self): |
795 self._revisioncache = None | 787 self._revisioncache = None |
796 self._chainbasecache.clear() | 788 self._chainbasecache.clear() |
797 self._chunkcache = (0, b'') | 789 self._segmentfile.clear_cache() |
790 self._segmentfile_sidedata.clear_cache() | |
798 self._pcache = {} | 791 self._pcache = {} |
799 self._nodemap_docket = None | 792 self._nodemap_docket = None |
800 self.index.clearcaches() | 793 self.index.clearcaches() |
801 # The python code is the one responsible for validating the docket, we | 794 # The python code is the one responsible for validating the docket, we |
802 # end up having to refresh it here. | 795 # end up having to refresh it here. |
803 use_nodemap = ( | 796 use_nodemap = ( |
804 not self._inline | 797 not self._inline |
805 and self.nodemap_file is not None | 798 and self._nodemap_file is not None |
806 and util.safehasattr(self.index, 'update_nodemap_data') | 799 and util.safehasattr(self.index, 'update_nodemap_data') |
807 ) | 800 ) |
808 if use_nodemap: | 801 if use_nodemap: |
809 nodemap_data = nodemaputil.persisted_data(self) | 802 nodemap_data = nodemaputil.persisted_data(self) |
810 if nodemap_data is not None: | 803 if nodemap_data is not None: |
816 return self.index.rev(node) | 809 return self.index.rev(node) |
817 except TypeError: | 810 except TypeError: |
818 raise | 811 raise |
819 except error.RevlogError: | 812 except error.RevlogError: |
820 # parsers.c radix tree lookup failed | 813 # parsers.c radix tree lookup failed |
821 if node == wdirid or node in wdirfilenodeids: | 814 if ( |
815 node == self.nodeconstants.wdirid | |
816 or node in self.nodeconstants.wdirfilenodeids | |
817 ): | |
822 raise error.WdirUnsupported | 818 raise error.WdirUnsupported |
823 raise error.LookupError(node, self.indexfile, _(b'no node')) | 819 raise error.LookupError(node, self.display_id, _(b'no node')) |
824 | 820 |
825 # Accessors for index entries. | 821 # Accessors for index entries. |
826 | 822 |
827 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes | 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes |
828 # are flags. | 824 # are flags. |
829 def start(self, rev): | 825 def start(self, rev): |
830 return int(self.index[rev][0] >> 16) | 826 return int(self.index[rev][0] >> 16) |
831 | 827 |
828 def sidedata_cut_off(self, rev): | |
829 sd_cut_off = self.index[rev][8] | |
830 if sd_cut_off != 0: | |
831 return sd_cut_off | |
832 # This is some annoying dance, because entries without sidedata | |
833 # currently use 0 as their ofsset. (instead of previous-offset + | |
834 # previous-size) | |
835 # | |
836 # We should reconsider this sidedata → 0 sidata_offset policy. | |
837 # In the meantime, we need this. | |
838 while 0 <= rev: | |
839 e = self.index[rev] | |
840 if e[9] != 0: | |
841 return e[8] + e[9] | |
842 rev -= 1 | |
843 return 0 | |
844 | |
832 def flags(self, rev): | 845 def flags(self, rev): |
833 return self.index[rev][0] & 0xFFFF | 846 return self.index[rev][0] & 0xFFFF |
834 | 847 |
835 def length(self, rev): | 848 def length(self, rev): |
836 return self.index[rev][1] | 849 return self.index[rev][1] |
837 | 850 |
838 def sidedata_length(self, rev): | 851 def sidedata_length(self, rev): |
839 if self.version & 0xFFFF != REVLOGV2: | 852 if not self.hassidedata: |
840 return 0 | 853 return 0 |
841 return self.index[rev][9] | 854 return self.index[rev][9] |
842 | 855 |
843 def rawsize(self, rev): | 856 def rawsize(self, rev): |
844 """return the length of the uncompressed text for a given revision""" | 857 """return the length of the uncompressed text for a given revision""" |
994 checkrev = self.node | 1007 checkrev = self.node |
995 for r in revs: | 1008 for r in revs: |
996 checkrev(r) | 1009 checkrev(r) |
997 # and we're sure ancestors aren't filtered as well | 1010 # and we're sure ancestors aren't filtered as well |
998 | 1011 |
999 if rustancestor is not None: | 1012 if rustancestor is not None and self.index.rust_ext_compat: |
1000 lazyancestors = rustancestor.LazyAncestors | 1013 lazyancestors = rustancestor.LazyAncestors |
1001 arg = self.index | 1014 arg = self.index |
1002 else: | 1015 else: |
1003 lazyancestors = ancestor.lazyancestors | 1016 lazyancestors = ancestor.lazyancestors |
1004 arg = self._uncheckedparentrevs | 1017 arg = self._uncheckedparentrevs |
1019 | 1032 |
1020 'heads' and 'common' are both lists of node IDs. If heads is | 1033 'heads' and 'common' are both lists of node IDs. If heads is |
1021 not supplied, uses all of the revlog's heads. If common is not | 1034 not supplied, uses all of the revlog's heads. If common is not |
1022 supplied, uses nullid.""" | 1035 supplied, uses nullid.""" |
1023 if common is None: | 1036 if common is None: |
1024 common = [nullid] | 1037 common = [self.nullid] |
1025 if heads is None: | 1038 if heads is None: |
1026 heads = self.heads() | 1039 heads = self.heads() |
1027 | 1040 |
1028 common = [self.rev(n) for n in common] | 1041 common = [self.rev(n) for n in common] |
1029 heads = [self.rev(n) for n in heads] | 1042 heads = [self.rev(n) for n in heads] |
1081 nullrev. | 1094 nullrev. |
1082 """ | 1095 """ |
1083 if common is None: | 1096 if common is None: |
1084 common = [nullrev] | 1097 common = [nullrev] |
1085 | 1098 |
1086 if rustancestor is not None: | 1099 if rustancestor is not None and self.index.rust_ext_compat: |
1087 return rustancestor.MissingAncestors(self.index, common) | 1100 return rustancestor.MissingAncestors(self.index, common) |
1088 return ancestor.incrementalmissingancestors(self.parentrevs, common) | 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common) |
1089 | 1102 |
1090 def findmissingrevs(self, common=None, heads=None): | 1103 def findmissingrevs(self, common=None, heads=None): |
1091 """Return the revision numbers of the ancestors of heads that | 1104 """Return the revision numbers of the ancestors of heads that |
1125 | 1138 |
1126 'heads' and 'common' are both lists of node IDs. If heads is | 1139 'heads' and 'common' are both lists of node IDs. If heads is |
1127 not supplied, uses all of the revlog's heads. If common is not | 1140 not supplied, uses all of the revlog's heads. If common is not |
1128 supplied, uses nullid.""" | 1141 supplied, uses nullid.""" |
1129 if common is None: | 1142 if common is None: |
1130 common = [nullid] | 1143 common = [self.nullid] |
1131 if heads is None: | 1144 if heads is None: |
1132 heads = self.heads() | 1145 heads = self.heads() |
1133 | 1146 |
1134 common = [self.rev(n) for n in common] | 1147 common = [self.rev(n) for n in common] |
1135 heads = [self.rev(n) for n in heads] | 1148 heads = [self.rev(n) for n in heads] |
1163 roots = list(roots) | 1176 roots = list(roots) |
1164 if not roots: | 1177 if not roots: |
1165 return nonodes | 1178 return nonodes |
1166 lowestrev = min([self.rev(n) for n in roots]) | 1179 lowestrev = min([self.rev(n) for n in roots]) |
1167 else: | 1180 else: |
1168 roots = [nullid] # Everybody's a descendant of nullid | 1181 roots = [self.nullid] # Everybody's a descendant of nullid |
1169 lowestrev = nullrev | 1182 lowestrev = nullrev |
1170 if (lowestrev == nullrev) and (heads is None): | 1183 if (lowestrev == nullrev) and (heads is None): |
1171 # We want _all_ the nodes! | 1184 # We want _all_ the nodes! |
1172 return ([self.node(r) for r in self], [nullid], list(self.heads())) | 1185 return ( |
1186 [self.node(r) for r in self], | |
1187 [self.nullid], | |
1188 list(self.heads()), | |
1189 ) | |
1173 if heads is None: | 1190 if heads is None: |
1174 # All nodes are ancestors, so the latest ancestor is the last | 1191 # All nodes are ancestors, so the latest ancestor is the last |
1175 # node. | 1192 # node. |
1176 highestrev = len(self) - 1 | 1193 highestrev = len(self) - 1 |
1177 # Set ancestors to None to signal that every node is an ancestor. | 1194 # Set ancestors to None to signal that every node is an ancestor. |
1193 highestrev = max([self.rev(n) for n in nodestotag]) | 1210 highestrev = max([self.rev(n) for n in nodestotag]) |
1194 while nodestotag: | 1211 while nodestotag: |
1195 # grab a node to tag | 1212 # grab a node to tag |
1196 n = nodestotag.pop() | 1213 n = nodestotag.pop() |
1197 # Never tag nullid | 1214 # Never tag nullid |
1198 if n == nullid: | 1215 if n == self.nullid: |
1199 continue | 1216 continue |
1200 # A node's revision number represents its place in a | 1217 # A node's revision number represents its place in a |
1201 # topologically sorted list of nodes. | 1218 # topologically sorted list of nodes. |
1202 r = self.rev(n) | 1219 r = self.rev(n) |
1203 if r >= lowestrev: | 1220 if r >= lowestrev: |
1205 # If we are possibly a descendant of one of the roots | 1222 # If we are possibly a descendant of one of the roots |
1206 # and we haven't already been marked as an ancestor | 1223 # and we haven't already been marked as an ancestor |
1207 ancestors.add(n) # Mark as ancestor | 1224 ancestors.add(n) # Mark as ancestor |
1208 # Add non-nullid parents to list of nodes to tag. | 1225 # Add non-nullid parents to list of nodes to tag. |
1209 nodestotag.update( | 1226 nodestotag.update( |
1210 [p for p in self.parents(n) if p != nullid] | 1227 [p for p in self.parents(n) if p != self.nullid] |
1211 ) | 1228 ) |
1212 elif n in heads: # We've seen it before, is it a fake head? | 1229 elif n in heads: # We've seen it before, is it a fake head? |
1213 # So it is, real heads should not be the ancestors of | 1230 # So it is, real heads should not be the ancestors of |
1214 # any other heads. | 1231 # any other heads. |
1215 heads.pop(n) | 1232 heads.pop(n) |
1233 return nonodes | 1250 return nonodes |
1234 else: | 1251 else: |
1235 # We are descending from nullid, and don't need to care about | 1252 # We are descending from nullid, and don't need to care about |
1236 # any other roots. | 1253 # any other roots. |
1237 lowestrev = nullrev | 1254 lowestrev = nullrev |
1238 roots = [nullid] | 1255 roots = [self.nullid] |
1239 # Transform our roots list into a set. | 1256 # Transform our roots list into a set. |
1240 descendants = set(roots) | 1257 descendants = set(roots) |
1241 # Also, keep the original roots so we can filter out roots that aren't | 1258 # Also, keep the original roots so we can filter out roots that aren't |
1242 # 'real' roots (i.e. are descended from other roots). | 1259 # 'real' roots (i.e. are descended from other roots). |
1243 roots = descendants.copy() | 1260 roots = descendants.copy() |
1297 if revs is None: | 1314 if revs is None: |
1298 try: | 1315 try: |
1299 return self.index.headrevs() | 1316 return self.index.headrevs() |
1300 except AttributeError: | 1317 except AttributeError: |
1301 return self._headrevs() | 1318 return self._headrevs() |
1302 if rustdagop is not None: | 1319 if rustdagop is not None and self.index.rust_ext_compat: |
1303 return rustdagop.headrevs(self.index, revs) | 1320 return rustdagop.headrevs(self.index, revs) |
1304 return dagop.headrevs(revs, self._uncheckedparentrevs) | 1321 return dagop.headrevs(revs, self._uncheckedparentrevs) |
1305 | 1322 |
1306 def computephases(self, roots): | 1323 def computephases(self, roots): |
1307 return self.index.computephasesmapsets(roots) | 1324 return self.index.computephasesmapsets(roots) |
1327 if stop is specified, it will consider all the revs from stop | 1344 if stop is specified, it will consider all the revs from stop |
1328 as if they had no children | 1345 as if they had no children |
1329 """ | 1346 """ |
1330 if start is None and stop is None: | 1347 if start is None and stop is None: |
1331 if not len(self): | 1348 if not len(self): |
1332 return [nullid] | 1349 return [self.nullid] |
1333 return [self.node(r) for r in self.headrevs()] | 1350 return [self.node(r) for r in self.headrevs()] |
1334 | 1351 |
1335 if start is None: | 1352 if start is None: |
1336 start = nullrev | 1353 start = nullrev |
1337 else: | 1354 else: |
1417 except (AttributeError, OverflowError): | 1434 except (AttributeError, OverflowError): |
1418 ancs = ancestor.ancestors(self.parentrevs, a, b) | 1435 ancs = ancestor.ancestors(self.parentrevs, a, b) |
1419 if ancs: | 1436 if ancs: |
1420 # choose a consistent winner when there's a tie | 1437 # choose a consistent winner when there's a tie |
1421 return min(map(self.node, ancs)) | 1438 return min(map(self.node, ancs)) |
1422 return nullid | 1439 return self.nullid |
1423 | 1440 |
1424 def _match(self, id): | 1441 def _match(self, id): |
1425 if isinstance(id, int): | 1442 if isinstance(id, int): |
1426 # rev | 1443 # rev |
1427 return self.node(id) | 1444 return self.node(id) |
1428 if len(id) == 20: | 1445 if len(id) == self.nodeconstants.nodelen: |
1429 # possibly a binary node | 1446 # possibly a binary node |
1430 # odds of a binary node being all hex in ASCII are 1 in 10**25 | 1447 # odds of a binary node being all hex in ASCII are 1 in 10**25 |
1431 try: | 1448 try: |
1432 node = id | 1449 node = id |
1433 self.rev(node) # quick search the index | 1450 self.rev(node) # quick search the index |
1444 if rev < 0 or rev >= len(self): | 1461 if rev < 0 or rev >= len(self): |
1445 raise ValueError | 1462 raise ValueError |
1446 return self.node(rev) | 1463 return self.node(rev) |
1447 except (ValueError, OverflowError): | 1464 except (ValueError, OverflowError): |
1448 pass | 1465 pass |
1449 if len(id) == 40: | 1466 if len(id) == 2 * self.nodeconstants.nodelen: |
1450 try: | 1467 try: |
1451 # a full hex nodeid? | 1468 # a full hex nodeid? |
1452 node = bin(id) | 1469 node = bin(id) |
1453 self.rev(node) | 1470 self.rev(node) |
1454 return node | 1471 return node |
1455 except (TypeError, error.LookupError): | 1472 except (TypeError, error.LookupError): |
1456 pass | 1473 pass |
1457 | 1474 |
1458 def _partialmatch(self, id): | 1475 def _partialmatch(self, id): |
1459 # we don't care wdirfilenodeids as they should be always full hash | 1476 # we don't care wdirfilenodeids as they should be always full hash |
1460 maybewdir = wdirhex.startswith(id) | 1477 maybewdir = self.nodeconstants.wdirhex.startswith(id) |
1478 ambiguous = False | |
1461 try: | 1479 try: |
1462 partial = self.index.partialmatch(id) | 1480 partial = self.index.partialmatch(id) |
1463 if partial and self.hasnode(partial): | 1481 if partial and self.hasnode(partial): |
1464 if maybewdir: | 1482 if maybewdir: |
1465 # single 'ff...' match in radix tree, ambiguous with wdir | 1483 # single 'ff...' match in radix tree, ambiguous with wdir |
1466 raise error.RevlogError | 1484 ambiguous = True |
1467 return partial | 1485 else: |
1468 if maybewdir: | 1486 return partial |
1487 elif maybewdir: | |
1469 # no 'ff...' match in radix tree, wdir identified | 1488 # no 'ff...' match in radix tree, wdir identified |
1470 raise error.WdirUnsupported | 1489 raise error.WdirUnsupported |
1471 return None | 1490 else: |
1491 return None | |
1472 except error.RevlogError: | 1492 except error.RevlogError: |
1473 # parsers.c radix tree lookup gave multiple matches | 1493 # parsers.c radix tree lookup gave multiple matches |
1474 # fast path: for unfiltered changelog, radix tree is accurate | 1494 # fast path: for unfiltered changelog, radix tree is accurate |
1475 if not getattr(self, 'filteredrevs', None): | 1495 if not getattr(self, 'filteredrevs', None): |
1476 raise error.AmbiguousPrefixLookupError( | 1496 ambiguous = True |
1477 id, self.indexfile, _(b'ambiguous identifier') | |
1478 ) | |
1479 # fall through to slow path that filters hidden revisions | 1497 # fall through to slow path that filters hidden revisions |
1480 except (AttributeError, ValueError): | 1498 except (AttributeError, ValueError): |
1481 # we are pure python, or key was too short to search radix tree | 1499 # we are pure python, or key was too short to search radix tree |
1482 pass | 1500 pass |
1501 if ambiguous: | |
1502 raise error.AmbiguousPrefixLookupError( | |
1503 id, self.display_id, _(b'ambiguous identifier') | |
1504 ) | |
1483 | 1505 |
1484 if id in self._pcache: | 1506 if id in self._pcache: |
1485 return self._pcache[id] | 1507 return self._pcache[id] |
1486 | 1508 |
1487 if len(id) <= 40: | 1509 if len(id) <= 40: |
1491 prefix = bin(id[: l * 2]) | 1513 prefix = bin(id[: l * 2]) |
1492 nl = [e[7] for e in self.index if e[7].startswith(prefix)] | 1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)] |
1493 nl = [ | 1515 nl = [ |
1494 n for n in nl if hex(n).startswith(id) and self.hasnode(n) | 1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n) |
1495 ] | 1517 ] |
1496 if nullhex.startswith(id): | 1518 if self.nodeconstants.nullhex.startswith(id): |
1497 nl.append(nullid) | 1519 nl.append(self.nullid) |
1498 if len(nl) > 0: | 1520 if len(nl) > 0: |
1499 if len(nl) == 1 and not maybewdir: | 1521 if len(nl) == 1 and not maybewdir: |
1500 self._pcache[id] = nl[0] | 1522 self._pcache[id] = nl[0] |
1501 return nl[0] | 1523 return nl[0] |
1502 raise error.AmbiguousPrefixLookupError( | 1524 raise error.AmbiguousPrefixLookupError( |
1503 id, self.indexfile, _(b'ambiguous identifier') | 1525 id, self.display_id, _(b'ambiguous identifier') |
1504 ) | 1526 ) |
1505 if maybewdir: | 1527 if maybewdir: |
1506 raise error.WdirUnsupported | 1528 raise error.WdirUnsupported |
1507 return None | 1529 return None |
1508 except TypeError: | 1530 except TypeError: |
1518 return n | 1540 return n |
1519 n = self._partialmatch(id) | 1541 n = self._partialmatch(id) |
1520 if n: | 1542 if n: |
1521 return n | 1543 return n |
1522 | 1544 |
1523 raise error.LookupError(id, self.indexfile, _(b'no match found')) | 1545 raise error.LookupError(id, self.display_id, _(b'no match found')) |
1524 | 1546 |
1525 def shortest(self, node, minlength=1): | 1547 def shortest(self, node, minlength=1): |
1526 """Find the shortest unambiguous prefix that matches node.""" | 1548 """Find the shortest unambiguous prefix that matches node.""" |
1527 | 1549 |
1528 def isvalid(prefix): | 1550 def isvalid(prefix): |
1532 return False | 1554 return False |
1533 except error.WdirUnsupported: | 1555 except error.WdirUnsupported: |
1534 # single 'ff...' match | 1556 # single 'ff...' match |
1535 return True | 1557 return True |
1536 if matchednode is None: | 1558 if matchednode is None: |
1537 raise error.LookupError(node, self.indexfile, _(b'no node')) | 1559 raise error.LookupError(node, self.display_id, _(b'no node')) |
1538 return True | 1560 return True |
1539 | 1561 |
1540 def maybewdir(prefix): | 1562 def maybewdir(prefix): |
1541 return all(c == b'f' for c in pycompat.iterbytestr(prefix)) | 1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix)) |
1542 | 1564 |
1552 if not getattr(self, 'filteredrevs', None): | 1574 if not getattr(self, 'filteredrevs', None): |
1553 try: | 1575 try: |
1554 length = max(self.index.shortest(node), minlength) | 1576 length = max(self.index.shortest(node), minlength) |
1555 return disambiguate(hexnode, length) | 1577 return disambiguate(hexnode, length) |
1556 except error.RevlogError: | 1578 except error.RevlogError: |
1557 if node != wdirid: | 1579 if node != self.nodeconstants.wdirid: |
1558 raise error.LookupError(node, self.indexfile, _(b'no node')) | 1580 raise error.LookupError( |
1581 node, self.display_id, _(b'no node') | |
1582 ) | |
1559 except AttributeError: | 1583 except AttributeError: |
1560 # Fall through to pure code | 1584 # Fall through to pure code |
1561 pass | 1585 pass |
1562 | 1586 |
1563 if node == wdirid: | 1587 if node == self.nodeconstants.wdirid: |
1564 for length in range(minlength, len(hexnode) + 1): | 1588 for length in range(minlength, len(hexnode) + 1): |
1565 prefix = hexnode[:length] | 1589 prefix = hexnode[:length] |
1566 if isvalid(prefix): | 1590 if isvalid(prefix): |
1567 return prefix | 1591 return prefix |
1568 | 1592 |
1576 | 1600 |
1577 returns True if text is different than what is stored. | 1601 returns True if text is different than what is stored. |
1578 """ | 1602 """ |
1579 p1, p2 = self.parents(node) | 1603 p1, p2 = self.parents(node) |
1580 return storageutil.hashrevisionsha1(text, p1, p2) != node | 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node |
1581 | |
1582 def _cachesegment(self, offset, data): | |
1583 """Add a segment to the revlog cache. | |
1584 | |
1585 Accepts an absolute offset and the data that is at that location. | |
1586 """ | |
1587 o, d = self._chunkcache | |
1588 # try to add to existing cache | |
1589 if o + len(d) == offset and len(d) + len(data) < _chunksize: | |
1590 self._chunkcache = o, d + data | |
1591 else: | |
1592 self._chunkcache = offset, data | |
1593 | |
1594 def _readsegment(self, offset, length, df=None): | |
1595 """Load a segment of raw data from the revlog. | |
1596 | |
1597 Accepts an absolute offset, length to read, and an optional existing | |
1598 file handle to read from. | |
1599 | |
1600 If an existing file handle is passed, it will be seeked and the | |
1601 original seek position will NOT be restored. | |
1602 | |
1603 Returns a str or buffer of raw byte data. | |
1604 | |
1605 Raises if the requested number of bytes could not be read. | |
1606 """ | |
1607 # Cache data both forward and backward around the requested | |
1608 # data, in a fixed size window. This helps speed up operations | |
1609 # involving reading the revlog backwards. | |
1610 cachesize = self._chunkcachesize | |
1611 realoffset = offset & ~(cachesize - 1) | |
1612 reallength = ( | |
1613 (offset + length + cachesize) & ~(cachesize - 1) | |
1614 ) - realoffset | |
1615 with self._datareadfp(df) as df: | |
1616 df.seek(realoffset) | |
1617 d = df.read(reallength) | |
1618 | |
1619 self._cachesegment(realoffset, d) | |
1620 if offset != realoffset or reallength != length: | |
1621 startoffset = offset - realoffset | |
1622 if len(d) - startoffset < length: | |
1623 raise error.RevlogError( | |
1624 _( | |
1625 b'partial read of revlog %s; expected %d bytes from ' | |
1626 b'offset %d, got %d' | |
1627 ) | |
1628 % ( | |
1629 self.indexfile if self._inline else self.datafile, | |
1630 length, | |
1631 realoffset, | |
1632 len(d) - startoffset, | |
1633 ) | |
1634 ) | |
1635 | |
1636 return util.buffer(d, startoffset, length) | |
1637 | |
1638 if len(d) < length: | |
1639 raise error.RevlogError( | |
1640 _( | |
1641 b'partial read of revlog %s; expected %d bytes from offset ' | |
1642 b'%d, got %d' | |
1643 ) | |
1644 % ( | |
1645 self.indexfile if self._inline else self.datafile, | |
1646 length, | |
1647 offset, | |
1648 len(d), | |
1649 ) | |
1650 ) | |
1651 | |
1652 return d | |
1653 | |
1654 def _getsegment(self, offset, length, df=None): | |
1655 """Obtain a segment of raw data from the revlog. | |
1656 | |
1657 Accepts an absolute offset, length of bytes to obtain, and an | |
1658 optional file handle to the already-opened revlog. If the file | |
1659 handle is used, it's original seek position will not be preserved. | |
1660 | |
1661 Requests for data may be returned from a cache. | |
1662 | |
1663 Returns a str or a buffer instance of raw byte data. | |
1664 """ | |
1665 o, d = self._chunkcache | |
1666 l = len(d) | |
1667 | |
1668 # is it in the cache? | |
1669 cachestart = offset - o | |
1670 cacheend = cachestart + length | |
1671 if cachestart >= 0 and cacheend <= l: | |
1672 if cachestart == 0 and cacheend == l: | |
1673 return d # avoid a copy | |
1674 return util.buffer(d, cachestart, cacheend - cachestart) | |
1675 | |
1676 return self._readsegment(offset, length, df=df) | |
1677 | 1605 |
1678 def _getsegmentforrevs(self, startrev, endrev, df=None): | 1606 def _getsegmentforrevs(self, startrev, endrev, df=None): |
1679 """Obtain a segment of raw data corresponding to a range of revisions. | 1607 """Obtain a segment of raw data corresponding to a range of revisions. |
1680 | 1608 |
1681 Accepts the start and end revisions and an optional already-open | 1609 Accepts the start and end revisions and an optional already-open |
1705 if self._inline: | 1633 if self._inline: |
1706 start += (startrev + 1) * self.index.entry_size | 1634 start += (startrev + 1) * self.index.entry_size |
1707 end += (endrev + 1) * self.index.entry_size | 1635 end += (endrev + 1) * self.index.entry_size |
1708 length = end - start | 1636 length = end - start |
1709 | 1637 |
1710 return start, self._getsegment(start, length, df=df) | 1638 return start, self._segmentfile.read_chunk(start, length, df) |
1711 | 1639 |
1712 def _chunk(self, rev, df=None): | 1640 def _chunk(self, rev, df=None): |
1713 """Obtain a single decompressed chunk for a revision. | 1641 """Obtain a single decompressed chunk for a revision. |
1714 | 1642 |
1715 Accepts an integer revision and an optional already-open file handle | 1643 Accepts an integer revision and an optional already-open file handle |
1716 to be used for reading. If used, the seek position of the file will not | 1644 to be used for reading. If used, the seek position of the file will not |
1717 be preserved. | 1645 be preserved. |
1718 | 1646 |
1719 Returns a str holding uncompressed data for the requested revision. | 1647 Returns a str holding uncompressed data for the requested revision. |
1720 """ | 1648 """ |
1721 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) | 1649 compression_mode = self.index[rev][10] |
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1] | |
1651 if compression_mode == COMP_MODE_PLAIN: | |
1652 return data | |
1653 elif compression_mode == COMP_MODE_DEFAULT: | |
1654 return self._decompressor(data) | |
1655 elif compression_mode == COMP_MODE_INLINE: | |
1656 return self.decompress(data) | |
1657 else: | |
1658 msg = b'unknown compression mode %d' | |
1659 msg %= compression_mode | |
1660 raise error.RevlogError(msg) | |
1722 | 1661 |
1723 def _chunks(self, revs, df=None, targetsize=None): | 1662 def _chunks(self, revs, df=None, targetsize=None): |
1724 """Obtain decompressed chunks for the specified revisions. | 1663 """Obtain decompressed chunks for the specified revisions. |
1725 | 1664 |
1726 Accepts an iterable of numeric revisions that are assumed to be in | 1665 Accepts an iterable of numeric revisions that are assumed to be in |
1764 # issue4215 - we can't cache a run of chunks greater than | 1703 # issue4215 - we can't cache a run of chunks greater than |
1765 # 2G on Windows | 1704 # 2G on Windows |
1766 return [self._chunk(rev, df=df) for rev in revschunk] | 1705 return [self._chunk(rev, df=df) for rev in revschunk] |
1767 | 1706 |
1768 decomp = self.decompress | 1707 decomp = self.decompress |
1708 # self._decompressor might be None, but will not be used in that case | |
1709 def_decomp = self._decompressor | |
1769 for rev in revschunk: | 1710 for rev in revschunk: |
1770 chunkstart = start(rev) | 1711 chunkstart = start(rev) |
1771 if inline: | 1712 if inline: |
1772 chunkstart += (rev + 1) * iosize | 1713 chunkstart += (rev + 1) * iosize |
1773 chunklength = length(rev) | 1714 chunklength = length(rev) |
1774 ladd(decomp(buffer(data, chunkstart - offset, chunklength))) | 1715 comp_mode = self.index[rev][10] |
1716 c = buffer(data, chunkstart - offset, chunklength) | |
1717 if comp_mode == COMP_MODE_PLAIN: | |
1718 ladd(c) | |
1719 elif comp_mode == COMP_MODE_INLINE: | |
1720 ladd(decomp(c)) | |
1721 elif comp_mode == COMP_MODE_DEFAULT: | |
1722 ladd(def_decomp(c)) | |
1723 else: | |
1724 msg = b'unknown compression mode %d' | |
1725 msg %= comp_mode | |
1726 raise error.RevlogError(msg) | |
1775 | 1727 |
1776 return l | 1728 return l |
1777 | |
1778 def _chunkclear(self): | |
1779 """Clear the raw chunk cache.""" | |
1780 self._chunkcache = (0, b'') | |
1781 | 1729 |
1782 def deltaparent(self, rev): | 1730 def deltaparent(self, rev): |
1783 """return deltaparent of the given revision""" | 1731 """return deltaparent of the given revision""" |
1784 base = self.index[rev][3] | 1732 base = self.index[rev][3] |
1785 if base == rev: | 1733 if base == rev: |
1852 msg = ( | 1800 msg = ( |
1853 b'revlog.revision(..., raw=True) is deprecated, ' | 1801 b'revlog.revision(..., raw=True) is deprecated, ' |
1854 b'use revlog.rawdata(...)' | 1802 b'use revlog.rawdata(...)' |
1855 ) | 1803 ) |
1856 util.nouideprecwarn(msg, b'5.2', stacklevel=2) | 1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2) |
1857 return self._revisiondata(nodeorrev, _df, raw=raw)[0] | 1805 return self._revisiondata(nodeorrev, _df, raw=raw) |
1858 | 1806 |
1859 def sidedata(self, nodeorrev, _df=None): | 1807 def sidedata(self, nodeorrev, _df=None): |
1860 """a map of extra data related to the changeset but not part of the hash | 1808 """a map of extra data related to the changeset but not part of the hash |
1861 | 1809 |
1862 This function currently return a dictionary. However, more advanced | 1810 This function currently return a dictionary. However, more advanced |
1863 mapping object will likely be used in the future for a more | 1811 mapping object will likely be used in the future for a more |
1864 efficient/lazy code. | 1812 efficient/lazy code. |
1865 """ | 1813 """ |
1866 return self._revisiondata(nodeorrev, _df)[1] | 1814 # deal with <nodeorrev> argument type |
1815 if isinstance(nodeorrev, int): | |
1816 rev = nodeorrev | |
1817 else: | |
1818 rev = self.rev(nodeorrev) | |
1819 return self._sidedata(rev) | |
1867 | 1820 |
1868 def _revisiondata(self, nodeorrev, _df=None, raw=False): | 1821 def _revisiondata(self, nodeorrev, _df=None, raw=False): |
1869 # deal with <nodeorrev> argument type | 1822 # deal with <nodeorrev> argument type |
1870 if isinstance(nodeorrev, int): | 1823 if isinstance(nodeorrev, int): |
1871 rev = nodeorrev | 1824 rev = nodeorrev |
1873 else: | 1826 else: |
1874 node = nodeorrev | 1827 node = nodeorrev |
1875 rev = None | 1828 rev = None |
1876 | 1829 |
1877 # fast path the special `nullid` rev | 1830 # fast path the special `nullid` rev |
1878 if node == nullid: | 1831 if node == self.nullid: |
1879 return b"", {} | 1832 return b"" |
1880 | 1833 |
1881 # ``rawtext`` is the text as stored inside the revlog. Might be the | 1834 # ``rawtext`` is the text as stored inside the revlog. Might be the |
1882 # revision or might need to be processed to retrieve the revision. | 1835 # revision or might need to be processed to retrieve the revision. |
1883 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) | 1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) |
1884 | 1837 |
1885 if self.version & 0xFFFF == REVLOGV2: | |
1886 if rev is None: | |
1887 rev = self.rev(node) | |
1888 sidedata = self._sidedata(rev) | |
1889 else: | |
1890 sidedata = {} | |
1891 | |
1892 if raw and validated: | 1838 if raw and validated: |
1893 # if we don't want to process the raw text and that raw | 1839 # if we don't want to process the raw text and that raw |
1894 # text is cached, we can exit early. | 1840 # text is cached, we can exit early. |
1895 return rawtext, sidedata | 1841 return rawtext |
1896 if rev is None: | 1842 if rev is None: |
1897 rev = self.rev(node) | 1843 rev = self.rev(node) |
1898 # the revlog's flag for this revision | 1844 # the revlog's flag for this revision |
1899 # (usually alter its state or content) | 1845 # (usually alter its state or content) |
1900 flags = self.flags(rev) | 1846 flags = self.flags(rev) |
1901 | 1847 |
1902 if validated and flags == REVIDX_DEFAULT_FLAGS: | 1848 if validated and flags == REVIDX_DEFAULT_FLAGS: |
1903 # no extra flags set, no flag processor runs, text = rawtext | 1849 # no extra flags set, no flag processor runs, text = rawtext |
1904 return rawtext, sidedata | 1850 return rawtext |
1905 | 1851 |
1906 if raw: | 1852 if raw: |
1907 validatehash = flagutil.processflagsraw(self, rawtext, flags) | 1853 validatehash = flagutil.processflagsraw(self, rawtext, flags) |
1908 text = rawtext | 1854 text = rawtext |
1909 else: | 1855 else: |
1912 if validatehash: | 1858 if validatehash: |
1913 self.checkhash(text, node, rev=rev) | 1859 self.checkhash(text, node, rev=rev) |
1914 if not validated: | 1860 if not validated: |
1915 self._revisioncache = (node, rev, rawtext) | 1861 self._revisioncache = (node, rev, rawtext) |
1916 | 1862 |
1917 return text, sidedata | 1863 return text |
1918 | 1864 |
1919 def _rawtext(self, node, rev, _df=None): | 1865 def _rawtext(self, node, rev, _df=None): |
1920 """return the possibly unvalidated rawtext for a revision | 1866 """return the possibly unvalidated rawtext for a revision |
1921 | 1867 |
1922 returns (rev, rawtext, validated) | 1868 returns (rev, rawtext, validated) |
1968 if self._inline: | 1914 if self._inline: |
1969 sidedata_offset += self.index.entry_size * (1 + rev) | 1915 sidedata_offset += self.index.entry_size * (1 + rev) |
1970 if sidedata_size == 0: | 1916 if sidedata_size == 0: |
1971 return {} | 1917 return {} |
1972 | 1918 |
1973 segment = self._getsegment(sidedata_offset, sidedata_size) | 1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size: |
1920 filename = self._sidedatafile | |
1921 end = self._docket.sidedata_end | |
1922 offset = sidedata_offset | |
1923 length = sidedata_size | |
1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) | |
1925 raise error.RevlogError(m) | |
1926 | |
1927 comp_segment = self._segmentfile_sidedata.read_chunk( | |
1928 sidedata_offset, sidedata_size | |
1929 ) | |
1930 | |
1931 comp = self.index[rev][11] | |
1932 if comp == COMP_MODE_PLAIN: | |
1933 segment = comp_segment | |
1934 elif comp == COMP_MODE_DEFAULT: | |
1935 segment = self._decompressor(comp_segment) | |
1936 elif comp == COMP_MODE_INLINE: | |
1937 segment = self.decompress(comp_segment) | |
1938 else: | |
1939 msg = b'unknown compression mode %d' | |
1940 msg %= comp | |
1941 raise error.RevlogError(msg) | |
1942 | |
1974 sidedata = sidedatautil.deserialize_sidedata(segment) | 1943 sidedata = sidedatautil.deserialize_sidedata(segment) |
1975 return sidedata | 1944 return sidedata |
1976 | 1945 |
1977 def rawdata(self, nodeorrev, _df=None): | 1946 def rawdata(self, nodeorrev, _df=None): |
1978 """return an uncompressed raw data of a given node or revision number. | 1947 """return an uncompressed raw data of a given node or revision number. |
1979 | 1948 |
1980 _df - an existing file handle to read from. (internal-only) | 1949 _df - an existing file handle to read from. (internal-only) |
1981 """ | 1950 """ |
1982 return self._revisiondata(nodeorrev, _df, raw=True)[0] | 1951 return self._revisiondata(nodeorrev, _df, raw=True) |
1983 | 1952 |
1984 def hash(self, text, p1, p2): | 1953 def hash(self, text, p1, p2): |
1985 """Compute a node hash. | 1954 """Compute a node hash. |
1986 | 1955 |
1987 Available as a function so that subclasses can replace the hash | 1956 Available as a function so that subclasses can replace the hash |
2011 revornode = rev | 1980 revornode = rev |
2012 if revornode is None: | 1981 if revornode is None: |
2013 revornode = templatefilters.short(hex(node)) | 1982 revornode = templatefilters.short(hex(node)) |
2014 raise error.RevlogError( | 1983 raise error.RevlogError( |
2015 _(b"integrity check failed on %s:%s") | 1984 _(b"integrity check failed on %s:%s") |
2016 % (self.indexfile, pycompat.bytestr(revornode)) | 1985 % (self.display_id, pycompat.bytestr(revornode)) |
2017 ) | 1986 ) |
2018 except error.RevlogError: | 1987 except error.RevlogError: |
2019 if self._censorable and storageutil.iscensoredtext(text): | 1988 if self._censorable and storageutil.iscensoredtext(text): |
2020 raise error.CensoredNodeError(self.indexfile, node, text) | 1989 raise error.CensoredNodeError(self.display_id, node, text) |
2021 raise | 1990 raise |
2022 | 1991 |
2023 def _enforceinlinesize(self, tr, fp=None): | 1992 def _enforceinlinesize(self, tr): |
2024 """Check if the revlog is too big for inline and convert if so. | 1993 """Check if the revlog is too big for inline and convert if so. |
2025 | 1994 |
2026 This should be called after revisions are added to the revlog. If the | 1995 This should be called after revisions are added to the revlog. If the |
2027 revlog has grown too large to be an inline revlog, it will convert it | 1996 revlog has grown too large to be an inline revlog, it will convert it |
2028 to use multiple index and data files. | 1997 to use multiple index and data files. |
2029 """ | 1998 """ |
2030 tiprev = len(self) - 1 | 1999 tiprev = len(self) - 1 |
2031 if ( | 2000 total_size = self.start(tiprev) + self.length(tiprev) |
2032 not self._inline | 2001 if not self._inline or total_size < _maxinline: |
2033 or (self.start(tiprev) + self.length(tiprev)) < _maxinline | |
2034 ): | |
2035 return | 2002 return |
2036 | 2003 |
2037 troffset = tr.findoffset(self.indexfile) | 2004 troffset = tr.findoffset(self._indexfile) |
2038 if troffset is None: | 2005 if troffset is None: |
2039 raise error.RevlogError( | 2006 raise error.RevlogError( |
2040 _(b"%s not found in the transaction") % self.indexfile | 2007 _(b"%s not found in the transaction") % self._indexfile |
2041 ) | 2008 ) |
2042 trindex = 0 | 2009 trindex = 0 |
2043 tr.add(self.datafile, 0) | 2010 tr.add(self._datafile, 0) |
2044 | 2011 |
2045 if fp: | 2012 existing_handles = False |
2013 if self._writinghandles is not None: | |
2014 existing_handles = True | |
2015 fp = self._writinghandles[0] | |
2046 fp.flush() | 2016 fp.flush() |
2047 fp.close() | 2017 fp.close() |
2048 # We can't use the cached file handle after close(). So prevent | 2018 # We can't use the cached file handle after close(). So prevent |
2049 # its usage. | 2019 # its usage. |
2050 self._writinghandles = None | 2020 self._writinghandles = None |
2051 | 2021 self._segmentfile.writing_handle = None |
2052 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh: | 2022 # No need to deal with sidedata writing handle as it is only |
2053 for r in self: | 2023 # relevant with revlog-v2 which is never inline, not reaching |
2054 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1]) | 2024 # this code |
2055 if troffset <= self.start(r): | 2025 |
2056 trindex = r | 2026 new_dfh = self._datafp(b'w+') |
2057 | 2027 new_dfh.truncate(0) # drop any potentially existing data |
2058 with self._indexfp(b'w') as fp: | 2028 try: |
2059 self.version &= ~FLAG_INLINE_DATA | 2029 with self._indexfp() as read_ifh: |
2060 self._inline = False | 2030 for r in self: |
2061 io = self._io | 2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1]) |
2062 for i in self: | 2032 if troffset <= self.start(r) + r * self.index.entry_size: |
2063 e = io.packentry(self.index[i], self.node, self.version, i) | 2033 trindex = r |
2064 fp.write(e) | 2034 new_dfh.flush() |
2065 | 2035 |
2066 # the temp file replace the real index when we exit the context | 2036 with self.__index_new_fp() as fp: |
2067 # manager | 2037 self._format_flags &= ~FLAG_INLINE_DATA |
2068 | 2038 self._inline = False |
2069 tr.replace(self.indexfile, trindex * self.index.entry_size) | 2039 for i in self: |
2070 nodemaputil.setup_persistent_nodemap(tr, self) | 2040 e = self.index.entry_binary(i) |
2071 self._chunkclear() | 2041 if i == 0 and self._docket is None: |
2042 header = self._format_flags | self._format_version | |
2043 header = self.index.pack_header(header) | |
2044 e = header + e | |
2045 fp.write(e) | |
2046 if self._docket is not None: | |
2047 self._docket.index_end = fp.tell() | |
2048 | |
2049 # There is a small transactional race here. If the rename of | |
2050 # the index fails, we should remove the datafile. It is more | |
2051 # important to ensure that the data file is not truncated | |
2052 # when the index is replaced as otherwise data is lost. | |
2053 tr.replace(self._datafile, self.start(trindex)) | |
2054 | |
2055 # the temp file replace the real index when we exit the context | |
2056 # manager | |
2057 | |
2058 tr.replace(self._indexfile, trindex * self.index.entry_size) | |
2059 nodemaputil.setup_persistent_nodemap(tr, self) | |
2060 self._segmentfile = randomaccessfile.randomaccessfile( | |
2061 self.opener, | |
2062 self._datafile, | |
2063 self._chunkcachesize, | |
2064 ) | |
2065 | |
2066 if existing_handles: | |
2067 # switched from inline to conventional reopen the index | |
2068 ifh = self.__index_write_fp() | |
2069 self._writinghandles = (ifh, new_dfh, None) | |
2070 self._segmentfile.writing_handle = new_dfh | |
2071 new_dfh = None | |
2072 # No need to deal with sidedata writing handle as it is only | |
2073 # relevant with revlog-v2 which is never inline, not reaching | |
2074 # this code | |
2075 finally: | |
2076 if new_dfh is not None: | |
2077 new_dfh.close() | |
2072 | 2078 |
2073 def _nodeduplicatecallback(self, transaction, node): | 2079 def _nodeduplicatecallback(self, transaction, node): |
2074 """called when trying to add a node already stored.""" | 2080 """called when trying to add a node already stored.""" |
2081 | |
2082 @contextlib.contextmanager | |
2083 def reading(self): | |
2084 """Context manager that keeps data and sidedata files open for reading""" | |
2085 with self._segmentfile.reading(): | |
2086 with self._segmentfile_sidedata.reading(): | |
2087 yield | |
2088 | |
2089 @contextlib.contextmanager | |
2090 def _writing(self, transaction): | |
2091 if self._trypending: | |
2092 msg = b'try to write in a `trypending` revlog: %s' | |
2093 msg %= self.display_id | |
2094 raise error.ProgrammingError(msg) | |
2095 if self._writinghandles is not None: | |
2096 yield | |
2097 else: | |
2098 ifh = dfh = sdfh = None | |
2099 try: | |
2100 r = len(self) | |
2101 # opening the data file. | |
2102 dsize = 0 | |
2103 if r: | |
2104 dsize = self.end(r - 1) | |
2105 dfh = None | |
2106 if not self._inline: | |
2107 try: | |
2108 dfh = self._datafp(b"r+") | |
2109 if self._docket is None: | |
2110 dfh.seek(0, os.SEEK_END) | |
2111 else: | |
2112 dfh.seek(self._docket.data_end, os.SEEK_SET) | |
2113 except IOError as inst: | |
2114 if inst.errno != errno.ENOENT: | |
2115 raise | |
2116 dfh = self._datafp(b"w+") | |
2117 transaction.add(self._datafile, dsize) | |
2118 if self._sidedatafile is not None: | |
2119 try: | |
2120 sdfh = self.opener(self._sidedatafile, mode=b"r+") | |
2121 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) | |
2122 except IOError as inst: | |
2123 if inst.errno != errno.ENOENT: | |
2124 raise | |
2125 sdfh = self.opener(self._sidedatafile, mode=b"w+") | |
2126 transaction.add( | |
2127 self._sidedatafile, self._docket.sidedata_end | |
2128 ) | |
2129 | |
2130 # opening the index file. | |
2131 isize = r * self.index.entry_size | |
2132 ifh = self.__index_write_fp() | |
2133 if self._inline: | |
2134 transaction.add(self._indexfile, dsize + isize) | |
2135 else: | |
2136 transaction.add(self._indexfile, isize) | |
2137 # exposing all file handle for writing. | |
2138 self._writinghandles = (ifh, dfh, sdfh) | |
2139 self._segmentfile.writing_handle = ifh if self._inline else dfh | |
2140 self._segmentfile_sidedata.writing_handle = sdfh | |
2141 yield | |
2142 if self._docket is not None: | |
2143 self._write_docket(transaction) | |
2144 finally: | |
2145 self._writinghandles = None | |
2146 self._segmentfile.writing_handle = None | |
2147 self._segmentfile_sidedata.writing_handle = None | |
2148 if dfh is not None: | |
2149 dfh.close() | |
2150 if sdfh is not None: | |
2151 sdfh.close() | |
2152 # closing the index file last to avoid exposing referent to | |
2153 # potential unflushed data content. | |
2154 if ifh is not None: | |
2155 ifh.close() | |
2156 | |
2157 def _write_docket(self, transaction): | |
2158 """write the current docket on disk | |
2159 | |
2160 Exist as a method to help changelog to implement transaction logic | |
2161 | |
2162 We could also imagine using the same transaction logic for all revlog | |
2163 since docket are cheap.""" | |
2164 self._docket.write(transaction) | |
2075 | 2165 |
2076 def addrevision( | 2166 def addrevision( |
2077 self, | 2167 self, |
2078 text, | 2168 text, |
2079 transaction, | 2169 transaction, |
2100 deltacomputer - an optional deltacomputer instance shared between | 2190 deltacomputer - an optional deltacomputer instance shared between |
2101 multiple calls | 2191 multiple calls |
2102 """ | 2192 """ |
2103 if link == nullrev: | 2193 if link == nullrev: |
2104 raise error.RevlogError( | 2194 raise error.RevlogError( |
2105 _(b"attempted to add linkrev -1 to %s") % self.indexfile | 2195 _(b"attempted to add linkrev -1 to %s") % self.display_id |
2106 ) | 2196 ) |
2107 | 2197 |
2108 if sidedata is None: | 2198 if sidedata is None: |
2109 sidedata = {} | 2199 sidedata = {} |
2110 elif not self.hassidedata: | 2200 elif sidedata and not self.hassidedata: |
2111 raise error.ProgrammingError( | 2201 raise error.ProgrammingError( |
2112 _(b"trying to add sidedata to a revlog who don't support them") | 2202 _(b"trying to add sidedata to a revlog who don't support them") |
2113 ) | 2203 ) |
2114 | 2204 |
2115 if flags: | 2205 if flags: |
2125 if len(rawtext) > _maxentrysize: | 2215 if len(rawtext) > _maxentrysize: |
2126 raise error.RevlogError( | 2216 raise error.RevlogError( |
2127 _( | 2217 _( |
2128 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB" | 2218 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB" |
2129 ) | 2219 ) |
2130 % (self.indexfile, len(rawtext)) | 2220 % (self.display_id, len(rawtext)) |
2131 ) | 2221 ) |
2132 | 2222 |
2133 node = node or self.hash(rawtext, p1, p2) | 2223 node = node or self.hash(rawtext, p1, p2) |
2134 rev = self.index.get_rev(node) | 2224 rev = self.index.get_rev(node) |
2135 if rev is not None: | 2225 if rev is not None: |
2166 ): | 2256 ): |
2167 """add a raw revision with known flags, node and parents | 2257 """add a raw revision with known flags, node and parents |
2168 useful when reusing a revision not stored in this revlog (ex: received | 2258 useful when reusing a revision not stored in this revlog (ex: received |
2169 over wire, or read from an external bundle). | 2259 over wire, or read from an external bundle). |
2170 """ | 2260 """ |
2171 dfh = None | 2261 with self._writing(transaction): |
2172 if not self._inline: | |
2173 dfh = self._datafp(b"a+") | |
2174 ifh = self._indexfp(b"a+") | |
2175 try: | |
2176 return self._addrevision( | 2262 return self._addrevision( |
2177 node, | 2263 node, |
2178 rawtext, | 2264 rawtext, |
2179 transaction, | 2265 transaction, |
2180 link, | 2266 link, |
2181 p1, | 2267 p1, |
2182 p2, | 2268 p2, |
2183 flags, | 2269 flags, |
2184 cachedelta, | 2270 cachedelta, |
2185 ifh, | |
2186 dfh, | |
2187 deltacomputer=deltacomputer, | 2271 deltacomputer=deltacomputer, |
2188 sidedata=sidedata, | 2272 sidedata=sidedata, |
2189 ) | 2273 ) |
2190 finally: | |
2191 if dfh: | |
2192 dfh.close() | |
2193 ifh.close() | |
2194 | 2274 |
2195 def compress(self, data): | 2275 def compress(self, data): |
2196 """Generate a possibly-compressed representation of data.""" | 2276 """Generate a possibly-compressed representation of data.""" |
2197 if not data: | 2277 if not data: |
2198 return b'', data | 2278 return b'', data |
2251 elif t == b'\0': | 2331 elif t == b'\0': |
2252 return data | 2332 return data |
2253 elif t == b'u': | 2333 elif t == b'u': |
2254 return util.buffer(data, 1) | 2334 return util.buffer(data, 1) |
2255 | 2335 |
2256 try: | 2336 compressor = self._get_decompressor(t) |
2257 compressor = self._decompressors[t] | |
2258 except KeyError: | |
2259 try: | |
2260 engine = util.compengines.forrevlogheader(t) | |
2261 compressor = engine.revlogcompressor(self._compengineopts) | |
2262 self._decompressors[t] = compressor | |
2263 except KeyError: | |
2264 raise error.RevlogError( | |
2265 _(b'unknown compression type %s') % binascii.hexlify(t) | |
2266 ) | |
2267 | 2337 |
2268 return compressor.decompress(data) | 2338 return compressor.decompress(data) |
2269 | 2339 |
2270 def _addrevision( | 2340 def _addrevision( |
2271 self, | 2341 self, |
2275 link, | 2345 link, |
2276 p1, | 2346 p1, |
2277 p2, | 2347 p2, |
2278 flags, | 2348 flags, |
2279 cachedelta, | 2349 cachedelta, |
2280 ifh, | |
2281 dfh, | |
2282 alwayscache=False, | 2350 alwayscache=False, |
2283 deltacomputer=None, | 2351 deltacomputer=None, |
2284 sidedata=None, | 2352 sidedata=None, |
2285 ): | 2353 ): |
2286 """internal function to add revisions to the log | 2354 """internal function to add revisions to the log |
2294 | 2362 |
2295 invariants: | 2363 invariants: |
2296 - rawtext is optional (can be None); if not set, cachedelta must be set. | 2364 - rawtext is optional (can be None); if not set, cachedelta must be set. |
2297 if both are set, they must correspond to each other. | 2365 if both are set, they must correspond to each other. |
2298 """ | 2366 """ |
2299 if node == nullid: | 2367 if node == self.nullid: |
2300 raise error.RevlogError( | 2368 raise error.RevlogError( |
2301 _(b"%s: attempt to add null revision") % self.indexfile | 2369 _(b"%s: attempt to add null revision") % self.display_id |
2302 ) | 2370 ) |
2303 if node == wdirid or node in wdirfilenodeids: | 2371 if ( |
2372 node == self.nodeconstants.wdirid | |
2373 or node in self.nodeconstants.wdirfilenodeids | |
2374 ): | |
2304 raise error.RevlogError( | 2375 raise error.RevlogError( |
2305 _(b"%s: attempt to add wdir revision") % self.indexfile | 2376 _(b"%s: attempt to add wdir revision") % self.display_id |
2306 ) | 2377 ) |
2378 if self._writinghandles is None: | |
2379 msg = b'adding revision outside `revlog._writing` context' | |
2380 raise error.ProgrammingError(msg) | |
2307 | 2381 |
2308 if self._inline: | 2382 if self._inline: |
2309 fh = ifh | 2383 fh = self._writinghandles[0] |
2310 else: | 2384 else: |
2311 fh = dfh | 2385 fh = self._writinghandles[1] |
2312 | 2386 |
2313 btext = [rawtext] | 2387 btext = [rawtext] |
2314 | 2388 |
2315 curr = len(self) | 2389 curr = len(self) |
2316 prev = curr - 1 | 2390 prev = curr - 1 |
2317 | 2391 |
2318 offset = self._get_data_offset(prev) | 2392 offset = self._get_data_offset(prev) |
2319 | 2393 |
2320 if self._concurrencychecker: | 2394 if self._concurrencychecker: |
2395 ifh, dfh, sdfh = self._writinghandles | |
2396 # XXX no checking for the sidedata file | |
2321 if self._inline: | 2397 if self._inline: |
2322 # offset is "as if" it were in the .d file, so we need to add on | 2398 # offset is "as if" it were in the .d file, so we need to add on |
2323 # the size of the entry metadata. | 2399 # the size of the entry metadata. |
2324 self._concurrencychecker( | 2400 self._concurrencychecker( |
2325 ifh, self.indexfile, offset + curr * self.index.entry_size | 2401 ifh, self._indexfile, offset + curr * self.index.entry_size |
2326 ) | 2402 ) |
2327 else: | 2403 else: |
2328 # Entries in the .i are a consistent size. | 2404 # Entries in the .i are a consistent size. |
2329 self._concurrencychecker( | 2405 self._concurrencychecker( |
2330 ifh, self.indexfile, curr * self.index.entry_size | 2406 ifh, self._indexfile, curr * self.index.entry_size |
2331 ) | 2407 ) |
2332 self._concurrencychecker(dfh, self.datafile, offset) | 2408 self._concurrencychecker(dfh, self._datafile, offset) |
2333 | 2409 |
2334 p1r, p2r = self.rev(p1), self.rev(p2) | 2410 p1r, p2r = self.rev(p1), self.rev(p2) |
2335 | 2411 |
2336 # full versions are inserted when the needed deltas | 2412 # full versions are inserted when the needed deltas |
2337 # become comparable to the uncompressed text | 2413 # become comparable to the uncompressed text |
2346 textlen = len(rawtext) | 2422 textlen = len(rawtext) |
2347 | 2423 |
2348 if deltacomputer is None: | 2424 if deltacomputer is None: |
2349 deltacomputer = deltautil.deltacomputer(self) | 2425 deltacomputer = deltautil.deltacomputer(self) |
2350 | 2426 |
2351 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) | 2427 revinfo = revlogutils.revisioninfo( |
2428 node, | |
2429 p1, | |
2430 p2, | |
2431 btext, | |
2432 textlen, | |
2433 cachedelta, | |
2434 flags, | |
2435 ) | |
2352 | 2436 |
2353 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) | 2437 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) |
2354 | 2438 |
2355 if sidedata: | 2439 compression_mode = COMP_MODE_INLINE |
2440 if self._docket is not None: | |
2441 default_comp = self._docket.default_compression_header | |
2442 r = deltautil.delta_compression(default_comp, deltainfo) | |
2443 compression_mode, deltainfo = r | |
2444 | |
2445 sidedata_compression_mode = COMP_MODE_INLINE | |
2446 if sidedata and self.hassidedata: | |
2447 sidedata_compression_mode = COMP_MODE_PLAIN | |
2356 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) | 2448 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
2357 sidedata_offset = offset + deltainfo.deltalen | 2449 sidedata_offset = self._docket.sidedata_end |
2450 h, comp_sidedata = self.compress(serialized_sidedata) | |
2451 if ( | |
2452 h != b'u' | |
2453 and comp_sidedata[0:1] != b'\0' | |
2454 and len(comp_sidedata) < len(serialized_sidedata) | |
2455 ): | |
2456 assert not h | |
2457 if ( | |
2458 comp_sidedata[0:1] | |
2459 == self._docket.default_compression_header | |
2460 ): | |
2461 sidedata_compression_mode = COMP_MODE_DEFAULT | |
2462 serialized_sidedata = comp_sidedata | |
2463 else: | |
2464 sidedata_compression_mode = COMP_MODE_INLINE | |
2465 serialized_sidedata = comp_sidedata | |
2358 else: | 2466 else: |
2359 serialized_sidedata = b"" | 2467 serialized_sidedata = b"" |
2360 # Don't store the offset if the sidedata is empty, that way | 2468 # Don't store the offset if the sidedata is empty, that way |
2361 # we can easily detect empty sidedata and they will be no different | 2469 # we can easily detect empty sidedata and they will be no different |
2362 # than ones we manually add. | 2470 # than ones we manually add. |
2363 sidedata_offset = 0 | 2471 sidedata_offset = 0 |
2364 | 2472 |
2365 e = ( | 2473 e = revlogutils.entry( |
2366 offset_type(offset, flags), | 2474 flags=flags, |
2367 deltainfo.deltalen, | 2475 data_offset=offset, |
2368 textlen, | 2476 data_compressed_length=deltainfo.deltalen, |
2369 deltainfo.base, | 2477 data_uncompressed_length=textlen, |
2370 link, | 2478 data_compression_mode=compression_mode, |
2371 p1r, | 2479 data_delta_base=deltainfo.base, |
2372 p2r, | 2480 link_rev=link, |
2373 node, | 2481 parent_rev_1=p1r, |
2374 sidedata_offset, | 2482 parent_rev_2=p2r, |
2375 len(serialized_sidedata), | 2483 node_id=node, |
2484 sidedata_offset=sidedata_offset, | |
2485 sidedata_compressed_length=len(serialized_sidedata), | |
2486 sidedata_compression_mode=sidedata_compression_mode, | |
2376 ) | 2487 ) |
2377 | 2488 |
2378 if self.version & 0xFFFF != REVLOGV2: | |
2379 e = e[:8] | |
2380 | |
2381 self.index.append(e) | 2489 self.index.append(e) |
2382 entry = self._io.packentry(e, self.node, self.version, curr) | 2490 entry = self.index.entry_binary(curr) |
2491 if curr == 0 and self._docket is None: | |
2492 header = self._format_flags | self._format_version | |
2493 header = self.index.pack_header(header) | |
2494 entry = header + entry | |
2383 self._writeentry( | 2495 self._writeentry( |
2384 transaction, | 2496 transaction, |
2385 ifh, | |
2386 dfh, | |
2387 entry, | 2497 entry, |
2388 deltainfo.data, | 2498 deltainfo.data, |
2389 link, | 2499 link, |
2390 offset, | 2500 offset, |
2391 serialized_sidedata, | 2501 serialized_sidedata, |
2502 sidedata_offset, | |
2392 ) | 2503 ) |
2393 | 2504 |
2394 rawtext = btext[0] | 2505 rawtext = btext[0] |
2395 | 2506 |
2396 if alwayscache and rawtext is None: | 2507 if alwayscache and rawtext is None: |
2408 end of the data file within a transaction, you can have cases where, for | 2519 end of the data file within a transaction, you can have cases where, for |
2409 example, rev `n` does not have sidedata while rev `n - 1` does, leading | 2520 example, rev `n` does not have sidedata while rev `n - 1` does, leading |
2410 to `n - 1`'s sidedata being written after `n`'s data. | 2521 to `n - 1`'s sidedata being written after `n`'s data. |
2411 | 2522 |
2412 TODO cache this in a docket file before getting out of experimental.""" | 2523 TODO cache this in a docket file before getting out of experimental.""" |
2413 if self.version & 0xFFFF != REVLOGV2: | 2524 if self._docket is None: |
2414 return self.end(prev) | 2525 return self.end(prev) |
2415 | 2526 else: |
2416 offset = 0 | 2527 return self._docket.data_end |
2417 for rev, entry in enumerate(self.index): | |
2418 sidedata_end = entry[8] + entry[9] | |
2419 # Sidedata for a previous rev has potentially been written after | |
2420 # this rev's end, so take the max. | |
2421 offset = max(self.end(rev), offset, sidedata_end) | |
2422 return offset | |
2423 | 2528 |
2424 def _writeentry( | 2529 def _writeentry( |
2425 self, transaction, ifh, dfh, entry, data, link, offset, sidedata | 2530 self, transaction, entry, data, link, offset, sidedata, sidedata_offset |
2426 ): | 2531 ): |
2427 # Files opened in a+ mode have inconsistent behavior on various | 2532 # Files opened in a+ mode have inconsistent behavior on various |
2428 # platforms. Windows requires that a file positioning call be made | 2533 # platforms. Windows requires that a file positioning call be made |
2429 # when the file handle transitions between reads and writes. See | 2534 # when the file handle transitions between reads and writes. See |
2430 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other | 2535 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other |
2434 # | 2539 # |
2435 # We work around this issue by inserting a seek() before writing. | 2540 # We work around this issue by inserting a seek() before writing. |
2436 # Note: This is likely not necessary on Python 3. However, because | 2541 # Note: This is likely not necessary on Python 3. However, because |
2437 # the file handle is reused for reads and may be seeked there, we need | 2542 # the file handle is reused for reads and may be seeked there, we need |
2438 # to be careful before changing this. | 2543 # to be careful before changing this. |
2439 ifh.seek(0, os.SEEK_END) | 2544 if self._writinghandles is None: |
2545 msg = b'adding revision outside `revlog._writing` context' | |
2546 raise error.ProgrammingError(msg) | |
2547 ifh, dfh, sdfh = self._writinghandles | |
2548 if self._docket is None: | |
2549 ifh.seek(0, os.SEEK_END) | |
2550 else: | |
2551 ifh.seek(self._docket.index_end, os.SEEK_SET) | |
2440 if dfh: | 2552 if dfh: |
2441 dfh.seek(0, os.SEEK_END) | 2553 if self._docket is None: |
2554 dfh.seek(0, os.SEEK_END) | |
2555 else: | |
2556 dfh.seek(self._docket.data_end, os.SEEK_SET) | |
2557 if sdfh: | |
2558 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET) | |
2442 | 2559 |
2443 curr = len(self) - 1 | 2560 curr = len(self) - 1 |
2444 if not self._inline: | 2561 if not self._inline: |
2445 transaction.add(self.datafile, offset) | 2562 transaction.add(self._datafile, offset) |
2446 transaction.add(self.indexfile, curr * len(entry)) | 2563 if self._sidedatafile: |
2564 transaction.add(self._sidedatafile, sidedata_offset) | |
2565 transaction.add(self._indexfile, curr * len(entry)) | |
2447 if data[0]: | 2566 if data[0]: |
2448 dfh.write(data[0]) | 2567 dfh.write(data[0]) |
2449 dfh.write(data[1]) | 2568 dfh.write(data[1]) |
2450 if sidedata: | 2569 if sidedata: |
2451 dfh.write(sidedata) | 2570 sdfh.write(sidedata) |
2452 ifh.write(entry) | 2571 ifh.write(entry) |
2453 else: | 2572 else: |
2454 offset += curr * self.index.entry_size | 2573 offset += curr * self.index.entry_size |
2455 transaction.add(self.indexfile, offset) | 2574 transaction.add(self._indexfile, offset) |
2456 ifh.write(entry) | 2575 ifh.write(entry) |
2457 ifh.write(data[0]) | 2576 ifh.write(data[0]) |
2458 ifh.write(data[1]) | 2577 ifh.write(data[1]) |
2459 if sidedata: | 2578 assert not sidedata |
2460 ifh.write(sidedata) | 2579 self._enforceinlinesize(transaction) |
2461 self._enforceinlinesize(transaction, ifh) | 2580 if self._docket is not None: |
2581 self._docket.index_end = self._writinghandles[0].tell() | |
2582 self._docket.data_end = self._writinghandles[1].tell() | |
2583 self._docket.sidedata_end = self._writinghandles[2].tell() | |
2584 | |
2462 nodemaputil.setup_persistent_nodemap(transaction, self) | 2585 nodemaputil.setup_persistent_nodemap(transaction, self) |
2463 | 2586 |
2464 def addgroup( | 2587 def addgroup( |
2465 self, | 2588 self, |
2466 deltas, | 2589 deltas, |
2479 | 2602 |
2480 If ``addrevisioncb`` is defined, it will be called with arguments of | 2603 If ``addrevisioncb`` is defined, it will be called with arguments of |
2481 this revlog and the node that was added. | 2604 this revlog and the node that was added. |
2482 """ | 2605 """ |
2483 | 2606 |
2484 if self._writinghandles: | 2607 if self._adding_group: |
2485 raise error.ProgrammingError(b'cannot nest addgroup() calls') | 2608 raise error.ProgrammingError(b'cannot nest addgroup() calls') |
2486 | 2609 |
2487 r = len(self) | 2610 self._adding_group = True |
2488 end = 0 | |
2489 if r: | |
2490 end = self.end(r - 1) | |
2491 ifh = self._indexfp(b"a+") | |
2492 isize = r * self.index.entry_size | |
2493 if self._inline: | |
2494 transaction.add(self.indexfile, end + isize) | |
2495 dfh = None | |
2496 else: | |
2497 transaction.add(self.indexfile, isize) | |
2498 transaction.add(self.datafile, end) | |
2499 dfh = self._datafp(b"a+") | |
2500 | |
2501 def flush(): | |
2502 if dfh: | |
2503 dfh.flush() | |
2504 ifh.flush() | |
2505 | |
2506 self._writinghandles = (ifh, dfh) | |
2507 empty = True | 2611 empty = True |
2508 | |
2509 try: | 2612 try: |
2510 deltacomputer = deltautil.deltacomputer(self) | 2613 with self._writing(transaction): |
2511 # loop through our set of deltas | 2614 deltacomputer = deltautil.deltacomputer(self) |
2512 for data in deltas: | 2615 # loop through our set of deltas |
2513 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data | 2616 for data in deltas: |
2514 link = linkmapper(linknode) | 2617 ( |
2515 flags = flags or REVIDX_DEFAULT_FLAGS | 2618 node, |
2516 | 2619 p1, |
2517 rev = self.index.get_rev(node) | 2620 p2, |
2518 if rev is not None: | 2621 linknode, |
2519 # this can happen if two branches make the same change | 2622 deltabase, |
2520 self._nodeduplicatecallback(transaction, rev) | 2623 delta, |
2521 if duplicaterevisioncb: | 2624 flags, |
2522 duplicaterevisioncb(self, rev) | 2625 sidedata, |
2626 ) = data | |
2627 link = linkmapper(linknode) | |
2628 flags = flags or REVIDX_DEFAULT_FLAGS | |
2629 | |
2630 rev = self.index.get_rev(node) | |
2631 if rev is not None: | |
2632 # this can happen if two branches make the same change | |
2633 self._nodeduplicatecallback(transaction, rev) | |
2634 if duplicaterevisioncb: | |
2635 duplicaterevisioncb(self, rev) | |
2636 empty = False | |
2637 continue | |
2638 | |
2639 for p in (p1, p2): | |
2640 if not self.index.has_node(p): | |
2641 raise error.LookupError( | |
2642 p, self.radix, _(b'unknown parent') | |
2643 ) | |
2644 | |
2645 if not self.index.has_node(deltabase): | |
2646 raise error.LookupError( | |
2647 deltabase, self.display_id, _(b'unknown delta base') | |
2648 ) | |
2649 | |
2650 baserev = self.rev(deltabase) | |
2651 | |
2652 if baserev != nullrev and self.iscensored(baserev): | |
2653 # if base is censored, delta must be full replacement in a | |
2654 # single patch operation | |
2655 hlen = struct.calcsize(b">lll") | |
2656 oldlen = self.rawsize(baserev) | |
2657 newlen = len(delta) - hlen | |
2658 if delta[:hlen] != mdiff.replacediffheader( | |
2659 oldlen, newlen | |
2660 ): | |
2661 raise error.CensoredBaseError( | |
2662 self.display_id, self.node(baserev) | |
2663 ) | |
2664 | |
2665 if not flags and self._peek_iscensored(baserev, delta): | |
2666 flags |= REVIDX_ISCENSORED | |
2667 | |
2668 # We assume consumers of addrevisioncb will want to retrieve | |
2669 # the added revision, which will require a call to | |
2670 # revision(). revision() will fast path if there is a cache | |
2671 # hit. So, we tell _addrevision() to always cache in this case. | |
2672 # We're only using addgroup() in the context of changegroup | |
2673 # generation so the revision data can always be handled as raw | |
2674 # by the flagprocessor. | |
2675 rev = self._addrevision( | |
2676 node, | |
2677 None, | |
2678 transaction, | |
2679 link, | |
2680 p1, | |
2681 p2, | |
2682 flags, | |
2683 (baserev, delta), | |
2684 alwayscache=alwayscache, | |
2685 deltacomputer=deltacomputer, | |
2686 sidedata=sidedata, | |
2687 ) | |
2688 | |
2689 if addrevisioncb: | |
2690 addrevisioncb(self, rev) | |
2523 empty = False | 2691 empty = False |
2524 continue | |
2525 | |
2526 for p in (p1, p2): | |
2527 if not self.index.has_node(p): | |
2528 raise error.LookupError( | |
2529 p, self.indexfile, _(b'unknown parent') | |
2530 ) | |
2531 | |
2532 if not self.index.has_node(deltabase): | |
2533 raise error.LookupError( | |
2534 deltabase, self.indexfile, _(b'unknown delta base') | |
2535 ) | |
2536 | |
2537 baserev = self.rev(deltabase) | |
2538 | |
2539 if baserev != nullrev and self.iscensored(baserev): | |
2540 # if base is censored, delta must be full replacement in a | |
2541 # single patch operation | |
2542 hlen = struct.calcsize(b">lll") | |
2543 oldlen = self.rawsize(baserev) | |
2544 newlen = len(delta) - hlen | |
2545 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen): | |
2546 raise error.CensoredBaseError( | |
2547 self.indexfile, self.node(baserev) | |
2548 ) | |
2549 | |
2550 if not flags and self._peek_iscensored(baserev, delta, flush): | |
2551 flags |= REVIDX_ISCENSORED | |
2552 | |
2553 # We assume consumers of addrevisioncb will want to retrieve | |
2554 # the added revision, which will require a call to | |
2555 # revision(). revision() will fast path if there is a cache | |
2556 # hit. So, we tell _addrevision() to always cache in this case. | |
2557 # We're only using addgroup() in the context of changegroup | |
2558 # generation so the revision data can always be handled as raw | |
2559 # by the flagprocessor. | |
2560 rev = self._addrevision( | |
2561 node, | |
2562 None, | |
2563 transaction, | |
2564 link, | |
2565 p1, | |
2566 p2, | |
2567 flags, | |
2568 (baserev, delta), | |
2569 ifh, | |
2570 dfh, | |
2571 alwayscache=alwayscache, | |
2572 deltacomputer=deltacomputer, | |
2573 sidedata=sidedata, | |
2574 ) | |
2575 | |
2576 if addrevisioncb: | |
2577 addrevisioncb(self, rev) | |
2578 empty = False | |
2579 | |
2580 if not dfh and not self._inline: | |
2581 # addrevision switched from inline to conventional | |
2582 # reopen the index | |
2583 ifh.close() | |
2584 dfh = self._datafp(b"a+") | |
2585 ifh = self._indexfp(b"a+") | |
2586 self._writinghandles = (ifh, dfh) | |
2587 finally: | 2692 finally: |
2588 self._writinghandles = None | 2693 self._adding_group = False |
2589 | |
2590 if dfh: | |
2591 dfh.close() | |
2592 ifh.close() | |
2593 return not empty | 2694 return not empty |
2594 | 2695 |
2595 def iscensored(self, rev): | 2696 def iscensored(self, rev): |
2596 """Check if a file revision is censored.""" | 2697 """Check if a file revision is censored.""" |
2597 if not self._censorable: | 2698 if not self._censorable: |
2598 return False | 2699 return False |
2599 | 2700 |
2600 return self.flags(rev) & REVIDX_ISCENSORED | 2701 return self.flags(rev) & REVIDX_ISCENSORED |
2601 | 2702 |
2602 def _peek_iscensored(self, baserev, delta, flush): | 2703 def _peek_iscensored(self, baserev, delta): |
2603 """Quickly check if a delta produces a censored revision.""" | 2704 """Quickly check if a delta produces a censored revision.""" |
2604 if not self._censorable: | 2705 if not self._censorable: |
2605 return False | 2706 return False |
2606 | 2707 |
2607 return storageutil.deltaiscensored(delta, baserev, self.rawsize) | 2708 return storageutil.deltaiscensored(delta, baserev, self.rawsize) |
2640 rev, _ = self.getstrippoint(minlink) | 2741 rev, _ = self.getstrippoint(minlink) |
2641 if rev == len(self): | 2742 if rev == len(self): |
2642 return | 2743 return |
2643 | 2744 |
2644 # first truncate the files on disk | 2745 # first truncate the files on disk |
2645 end = self.start(rev) | 2746 data_end = self.start(rev) |
2646 if not self._inline: | 2747 if not self._inline: |
2647 transaction.add(self.datafile, end) | 2748 transaction.add(self._datafile, data_end) |
2648 end = rev * self.index.entry_size | 2749 end = rev * self.index.entry_size |
2649 else: | 2750 else: |
2650 end += rev * self.index.entry_size | 2751 end = data_end + (rev * self.index.entry_size) |
2651 | 2752 |
2652 transaction.add(self.indexfile, end) | 2753 if self._sidedatafile: |
2754 sidedata_end = self.sidedata_cut_off(rev) | |
2755 transaction.add(self._sidedatafile, sidedata_end) | |
2756 | |
2757 transaction.add(self._indexfile, end) | |
2758 if self._docket is not None: | |
2759 # XXX we could, leverage the docket while stripping. However it is | |
2760 # not powerfull enough at the time of this comment | |
2761 self._docket.index_end = end | |
2762 self._docket.data_end = data_end | |
2763 self._docket.sidedata_end = sidedata_end | |
2764 self._docket.write(transaction, stripping=True) | |
2653 | 2765 |
2654 # then reset internal state in memory to forget those revisions | 2766 # then reset internal state in memory to forget those revisions |
2655 self._revisioncache = None | 2767 self._revisioncache = None |
2656 self._chaininfocache = util.lrucachedict(500) | 2768 self._chaininfocache = util.lrucachedict(500) |
2657 self._chunkclear() | 2769 self._segmentfile.clear_cache() |
2770 self._segmentfile_sidedata.clear_cache() | |
2658 | 2771 |
2659 del self.index[rev:-1] | 2772 del self.index[rev:-1] |
2660 | 2773 |
2661 def checksize(self): | 2774 def checksize(self): |
2662 """Check size of index and data files | 2775 """Check size of index and data files |
2680 if inst.errno != errno.ENOENT: | 2793 if inst.errno != errno.ENOENT: |
2681 raise | 2794 raise |
2682 dd = 0 | 2795 dd = 0 |
2683 | 2796 |
2684 try: | 2797 try: |
2685 f = self.opener(self.indexfile) | 2798 f = self.opener(self._indexfile) |
2686 f.seek(0, io.SEEK_END) | 2799 f.seek(0, io.SEEK_END) |
2687 actual = f.tell() | 2800 actual = f.tell() |
2688 f.close() | 2801 f.close() |
2689 s = self.index.entry_size | 2802 s = self.index.entry_size |
2690 i = max(0, actual // s) | 2803 i = max(0, actual // s) |
2701 di = 0 | 2814 di = 0 |
2702 | 2815 |
2703 return (dd, di) | 2816 return (dd, di) |
2704 | 2817 |
2705 def files(self): | 2818 def files(self): |
2706 res = [self.indexfile] | 2819 res = [self._indexfile] |
2707 if not self._inline: | 2820 if self._docket_file is None: |
2708 res.append(self.datafile) | 2821 if not self._inline: |
2822 res.append(self._datafile) | |
2823 else: | |
2824 res.append(self._docket_file) | |
2825 res.extend(self._docket.old_index_filepaths(include_empty=False)) | |
2826 if self._docket.data_end: | |
2827 res.append(self._datafile) | |
2828 res.extend(self._docket.old_data_filepaths(include_empty=False)) | |
2829 if self._docket.sidedata_end: | |
2830 res.append(self._sidedatafile) | |
2831 res.extend(self._docket.old_sidedata_filepaths(include_empty=False)) | |
2709 return res | 2832 return res |
2710 | 2833 |
2711 def emitrevisions( | 2834 def emitrevisions( |
2712 self, | 2835 self, |
2713 nodes, | 2836 nodes, |
2760 tr, | 2883 tr, |
2761 destrevlog, | 2884 destrevlog, |
2762 addrevisioncb=None, | 2885 addrevisioncb=None, |
2763 deltareuse=DELTAREUSESAMEREVS, | 2886 deltareuse=DELTAREUSESAMEREVS, |
2764 forcedeltabothparents=None, | 2887 forcedeltabothparents=None, |
2765 sidedatacompanion=None, | 2888 sidedata_helpers=None, |
2766 ): | 2889 ): |
2767 """Copy this revlog to another, possibly with format changes. | 2890 """Copy this revlog to another, possibly with format changes. |
2768 | 2891 |
2769 The destination revlog will contain the same revisions and nodes. | 2892 The destination revlog will contain the same revisions and nodes. |
2770 However, it may not be bit-for-bit identical due to e.g. delta encoding | 2893 However, it may not be bit-for-bit identical due to e.g. delta encoding |
2803 | 2926 |
2804 In addition to the delta policy, the ``forcedeltabothparents`` | 2927 In addition to the delta policy, the ``forcedeltabothparents`` |
2805 argument controls whether to force compute deltas against both parents | 2928 argument controls whether to force compute deltas against both parents |
2806 for merges. By default, the current default is used. | 2929 for merges. By default, the current default is used. |
2807 | 2930 |
2808 If not None, the `sidedatacompanion` is callable that accept two | 2931 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on |
2809 arguments: | 2932 `sidedata_helpers`. |
2810 | |
2811 (srcrevlog, rev) | |
2812 | |
2813 and return a quintet that control changes to sidedata content from the | |
2814 old revision to the new clone result: | |
2815 | |
2816 (dropall, filterout, update, new_flags, dropped_flags) | |
2817 | |
2818 * if `dropall` is True, all sidedata should be dropped | |
2819 * `filterout` is a set of sidedata keys that should be dropped | |
2820 * `update` is a mapping of additionnal/new key -> value | |
2821 * new_flags is a bitfields of new flags that the revision should get | |
2822 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have | |
2823 """ | 2933 """ |
2824 if deltareuse not in self.DELTAREUSEALL: | 2934 if deltareuse not in self.DELTAREUSEALL: |
2825 raise ValueError( | 2935 raise ValueError( |
2826 _(b'value for deltareuse invalid: %s') % deltareuse | 2936 _(b'value for deltareuse invalid: %s') % deltareuse |
2827 ) | 2937 ) |
2857 tr, | 2967 tr, |
2858 destrevlog, | 2968 destrevlog, |
2859 addrevisioncb, | 2969 addrevisioncb, |
2860 deltareuse, | 2970 deltareuse, |
2861 forcedeltabothparents, | 2971 forcedeltabothparents, |
2862 sidedatacompanion, | 2972 sidedata_helpers, |
2863 ) | 2973 ) |
2864 | 2974 |
2865 finally: | 2975 finally: |
2866 destrevlog._lazydelta = oldlazydelta | 2976 destrevlog._lazydelta = oldlazydelta |
2867 destrevlog._lazydeltabase = oldlazydeltabase | 2977 destrevlog._lazydeltabase = oldlazydeltabase |
2872 tr, | 2982 tr, |
2873 destrevlog, | 2983 destrevlog, |
2874 addrevisioncb, | 2984 addrevisioncb, |
2875 deltareuse, | 2985 deltareuse, |
2876 forcedeltabothparents, | 2986 forcedeltabothparents, |
2877 sidedatacompanion, | 2987 sidedata_helpers, |
2878 ): | 2988 ): |
2879 """perform the core duty of `revlog.clone` after parameter processing""" | 2989 """perform the core duty of `revlog.clone` after parameter processing""" |
2880 deltacomputer = deltautil.deltacomputer(destrevlog) | 2990 deltacomputer = deltautil.deltacomputer(destrevlog) |
2881 index = self.index | 2991 index = self.index |
2882 for rev in self: | 2992 for rev in self: |
2888 linkrev = entry[4] | 2998 linkrev = entry[4] |
2889 p1 = index[entry[5]][7] | 2999 p1 = index[entry[5]][7] |
2890 p2 = index[entry[6]][7] | 3000 p2 = index[entry[6]][7] |
2891 node = entry[7] | 3001 node = entry[7] |
2892 | 3002 |
2893 sidedataactions = (False, [], {}, 0, 0) | |
2894 if sidedatacompanion is not None: | |
2895 sidedataactions = sidedatacompanion(self, rev) | |
2896 | |
2897 # (Possibly) reuse the delta from the revlog if allowed and | 3003 # (Possibly) reuse the delta from the revlog if allowed and |
2898 # the revlog chunk is a delta. | 3004 # the revlog chunk is a delta. |
2899 cachedelta = None | 3005 cachedelta = None |
2900 rawtext = None | 3006 rawtext = None |
2901 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD: | 3007 if deltareuse == self.DELTAREUSEFULLADD: |
2902 dropall = sidedataactions[0] | 3008 text = self._revisiondata(rev) |
2903 filterout = sidedataactions[1] | 3009 sidedata = self.sidedata(rev) |
2904 update = sidedataactions[2] | 3010 |
2905 new_flags = sidedataactions[3] | 3011 if sidedata_helpers is not None: |
2906 dropped_flags = sidedataactions[4] | 3012 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( |
2907 text, sidedata = self._revisiondata(rev) | 3013 self, sidedata_helpers, sidedata, rev |
2908 if dropall: | 3014 ) |
2909 sidedata = {} | 3015 flags = flags | new_flags[0] & ~new_flags[1] |
2910 for key in filterout: | |
2911 sidedata.pop(key, None) | |
2912 sidedata.update(update) | |
2913 if not sidedata: | |
2914 sidedata = None | |
2915 | |
2916 flags |= new_flags | |
2917 flags &= ~dropped_flags | |
2918 | 3016 |
2919 destrevlog.addrevision( | 3017 destrevlog.addrevision( |
2920 text, | 3018 text, |
2921 tr, | 3019 tr, |
2922 linkrev, | 3020 linkrev, |
2932 if destrevlog._lazydelta: | 3030 if destrevlog._lazydelta: |
2933 dp = self.deltaparent(rev) | 3031 dp = self.deltaparent(rev) |
2934 if dp != nullrev: | 3032 if dp != nullrev: |
2935 cachedelta = (dp, bytes(self._chunk(rev))) | 3033 cachedelta = (dp, bytes(self._chunk(rev))) |
2936 | 3034 |
3035 sidedata = None | |
2937 if not cachedelta: | 3036 if not cachedelta: |
2938 rawtext = self.rawdata(rev) | 3037 rawtext = self._revisiondata(rev) |
2939 | 3038 sidedata = self.sidedata(rev) |
2940 ifh = destrevlog.opener( | 3039 if sidedata is None: |
2941 destrevlog.indexfile, b'a+', checkambig=False | 3040 sidedata = self.sidedata(rev) |
2942 ) | 3041 |
2943 dfh = None | 3042 if sidedata_helpers is not None: |
2944 if not destrevlog._inline: | 3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers( |
2945 dfh = destrevlog.opener(destrevlog.datafile, b'a+') | 3044 self, sidedata_helpers, sidedata, rev |
2946 try: | 3045 ) |
3046 flags = flags | new_flags[0] & ~new_flags[1] | |
3047 | |
3048 with destrevlog._writing(tr): | |
2947 destrevlog._addrevision( | 3049 destrevlog._addrevision( |
2948 node, | 3050 node, |
2949 rawtext, | 3051 rawtext, |
2950 tr, | 3052 tr, |
2951 linkrev, | 3053 linkrev, |
2952 p1, | 3054 p1, |
2953 p2, | 3055 p2, |
2954 flags, | 3056 flags, |
2955 cachedelta, | 3057 cachedelta, |
2956 ifh, | |
2957 dfh, | |
2958 deltacomputer=deltacomputer, | 3058 deltacomputer=deltacomputer, |
3059 sidedata=sidedata, | |
2959 ) | 3060 ) |
2960 finally: | |
2961 if dfh: | |
2962 dfh.close() | |
2963 ifh.close() | |
2964 | 3061 |
2965 if addrevisioncb: | 3062 if addrevisioncb: |
2966 addrevisioncb(self, rev, node) | 3063 addrevisioncb(self, rev, node) |
2967 | 3064 |
2968 def censorrevision(self, tr, censornode, tombstone=b''): | 3065 def censorrevision(self, tr, censornode, tombstone=b''): |
2969 if (self.version & 0xFFFF) == REVLOGV0: | 3066 if self._format_version == REVLOGV0: |
2970 raise error.RevlogError( | 3067 raise error.RevlogError( |
2971 _(b'cannot censor with version %d revlogs') % self.version | 3068 _(b'cannot censor with version %d revlogs') |
3069 % self._format_version | |
2972 ) | 3070 ) |
2973 | 3071 elif self._format_version == REVLOGV1: |
2974 censorrev = self.rev(censornode) | 3072 rewrite.v1_censor(self, tr, censornode, tombstone) |
2975 tombstone = storageutil.packmeta({b'censored': tombstone}, b'') | 3073 else: |
2976 | 3074 rewrite.v2_censor(self, tr, censornode, tombstone) |
2977 if len(tombstone) > self.rawsize(censorrev): | |
2978 raise error.Abort( | |
2979 _(b'censor tombstone must be no longer than censored data') | |
2980 ) | |
2981 | |
2982 # Rewriting the revlog in place is hard. Our strategy for censoring is | |
2983 # to create a new revlog, copy all revisions to it, then replace the | |
2984 # revlogs on transaction close. | |
2985 | |
2986 newindexfile = self.indexfile + b'.tmpcensored' | |
2987 newdatafile = self.datafile + b'.tmpcensored' | |
2988 | |
2989 # This is a bit dangerous. We could easily have a mismatch of state. | |
2990 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True) | |
2991 newrl.version = self.version | |
2992 newrl._generaldelta = self._generaldelta | |
2993 newrl._io = self._io | |
2994 | |
2995 for rev in self.revs(): | |
2996 node = self.node(rev) | |
2997 p1, p2 = self.parents(node) | |
2998 | |
2999 if rev == censorrev: | |
3000 newrl.addrawrevision( | |
3001 tombstone, | |
3002 tr, | |
3003 self.linkrev(censorrev), | |
3004 p1, | |
3005 p2, | |
3006 censornode, | |
3007 REVIDX_ISCENSORED, | |
3008 ) | |
3009 | |
3010 if newrl.deltaparent(rev) != nullrev: | |
3011 raise error.Abort( | |
3012 _( | |
3013 b'censored revision stored as delta; ' | |
3014 b'cannot censor' | |
3015 ), | |
3016 hint=_( | |
3017 b'censoring of revlogs is not ' | |
3018 b'fully implemented; please report ' | |
3019 b'this bug' | |
3020 ), | |
3021 ) | |
3022 continue | |
3023 | |
3024 if self.iscensored(rev): | |
3025 if self.deltaparent(rev) != nullrev: | |
3026 raise error.Abort( | |
3027 _( | |
3028 b'cannot censor due to censored ' | |
3029 b'revision having delta stored' | |
3030 ) | |
3031 ) | |
3032 rawtext = self._chunk(rev) | |
3033 else: | |
3034 rawtext = self.rawdata(rev) | |
3035 | |
3036 newrl.addrawrevision( | |
3037 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev) | |
3038 ) | |
3039 | |
3040 tr.addbackup(self.indexfile, location=b'store') | |
3041 if not self._inline: | |
3042 tr.addbackup(self.datafile, location=b'store') | |
3043 | |
3044 self.opener.rename(newrl.indexfile, self.indexfile) | |
3045 if not self._inline: | |
3046 self.opener.rename(newrl.datafile, self.datafile) | |
3047 | |
3048 self.clearcaches() | |
3049 self._loadindex() | |
3050 | 3075 |
3051 def verifyintegrity(self, state): | 3076 def verifyintegrity(self, state): |
3052 """Verifies the integrity of the revlog. | 3077 """Verifies the integrity of the revlog. |
3053 | 3078 |
3054 Yields ``revlogproblem`` instances describing problems that are | 3079 Yields ``revlogproblem`` instances describing problems that are |
3058 if dd: | 3083 if dd: |
3059 yield revlogproblem(error=_(b'data length off by %d bytes') % dd) | 3084 yield revlogproblem(error=_(b'data length off by %d bytes') % dd) |
3060 if di: | 3085 if di: |
3061 yield revlogproblem(error=_(b'index contains %d extra bytes') % di) | 3086 yield revlogproblem(error=_(b'index contains %d extra bytes') % di) |
3062 | 3087 |
3063 version = self.version & 0xFFFF | 3088 version = self._format_version |
3064 | 3089 |
3065 # The verifier tells us what version revlog we should be. | 3090 # The verifier tells us what version revlog we should be. |
3066 if version != state[b'expectedversion']: | 3091 if version != state[b'expectedversion']: |
3067 yield revlogproblem( | 3092 yield revlogproblem( |
3068 warning=_(b"warning: '%s' uses revlog format %d; expected %d") | 3093 warning=_(b"warning: '%s' uses revlog format %d; expected %d") |
3069 % (self.indexfile, version, state[b'expectedversion']) | 3094 % (self.display_id, version, state[b'expectedversion']) |
3070 ) | 3095 ) |
3071 | 3096 |
3072 state[b'skipread'] = set() | 3097 state[b'skipread'] = set() |
3073 state[b'safe_renamed'] = set() | 3098 state[b'safe_renamed'] = set() |
3074 | 3099 |
3162 storedsize=False, | 3187 storedsize=False, |
3163 ): | 3188 ): |
3164 d = {} | 3189 d = {} |
3165 | 3190 |
3166 if exclusivefiles: | 3191 if exclusivefiles: |
3167 d[b'exclusivefiles'] = [(self.opener, self.indexfile)] | 3192 d[b'exclusivefiles'] = [(self.opener, self._indexfile)] |
3168 if not self._inline: | 3193 if not self._inline: |
3169 d[b'exclusivefiles'].append((self.opener, self.datafile)) | 3194 d[b'exclusivefiles'].append((self.opener, self._datafile)) |
3170 | 3195 |
3171 if sharedfiles: | 3196 if sharedfiles: |
3172 d[b'sharedfiles'] = [] | 3197 d[b'sharedfiles'] = [] |
3173 | 3198 |
3174 if revisionscount: | 3199 if revisionscount: |
3182 self.opener.stat(path).st_size for path in self.files() | 3207 self.opener.stat(path).st_size for path in self.files() |
3183 ) | 3208 ) |
3184 | 3209 |
3185 return d | 3210 return d |
3186 | 3211 |
3187 def rewrite_sidedata(self, helpers, startrev, endrev): | 3212 def rewrite_sidedata(self, transaction, helpers, startrev, endrev): |
3188 if self.version & 0xFFFF != REVLOGV2: | 3213 if not self.hassidedata: |
3189 return | 3214 return |
3190 # inline are not yet supported because they suffer from an issue when | 3215 # revlog formats with sidedata support does not support inline |
3191 # rewriting them (since it's not an append-only operation). | |
3192 # See issue6485. | |
3193 assert not self._inline | 3216 assert not self._inline |
3194 if not helpers[1] and not helpers[2]: | 3217 if not helpers[1] and not helpers[2]: |
3195 # Nothing to generate or remove | 3218 # Nothing to generate or remove |
3196 return | 3219 return |
3197 | 3220 |
3198 new_entries = [] | 3221 new_entries = [] |
3199 # append the new sidedata | 3222 # append the new sidedata |
3200 with self._datafp(b'a+') as fp: | 3223 with self._writing(transaction): |
3201 # Maybe this bug still exists, see revlog._writeentry | 3224 ifh, dfh, sdfh = self._writinghandles |
3202 fp.seek(0, os.SEEK_END) | 3225 dfh.seek(self._docket.sidedata_end, os.SEEK_SET) |
3203 current_offset = fp.tell() | 3226 |
3227 current_offset = sdfh.tell() | |
3204 for rev in range(startrev, endrev + 1): | 3228 for rev in range(startrev, endrev + 1): |
3205 entry = self.index[rev] | 3229 entry = self.index[rev] |
3206 new_sidedata = storageutil.run_sidedata_helpers( | 3230 new_sidedata, flags = sidedatautil.run_sidedata_helpers( |
3207 store=self, | 3231 store=self, |
3208 sidedata_helpers=helpers, | 3232 sidedata_helpers=helpers, |
3209 sidedata={}, | 3233 sidedata={}, |
3210 rev=rev, | 3234 rev=rev, |
3211 ) | 3235 ) |
3212 | 3236 |
3213 serialized_sidedata = sidedatautil.serialize_sidedata( | 3237 serialized_sidedata = sidedatautil.serialize_sidedata( |
3214 new_sidedata | 3238 new_sidedata |
3215 ) | 3239 ) |
3240 | |
3241 sidedata_compression_mode = COMP_MODE_INLINE | |
3242 if serialized_sidedata and self.hassidedata: | |
3243 sidedata_compression_mode = COMP_MODE_PLAIN | |
3244 h, comp_sidedata = self.compress(serialized_sidedata) | |
3245 if ( | |
3246 h != b'u' | |
3247 and comp_sidedata[0] != b'\0' | |
3248 and len(comp_sidedata) < len(serialized_sidedata) | |
3249 ): | |
3250 assert not h | |
3251 if ( | |
3252 comp_sidedata[0] | |
3253 == self._docket.default_compression_header | |
3254 ): | |
3255 sidedata_compression_mode = COMP_MODE_DEFAULT | |
3256 serialized_sidedata = comp_sidedata | |
3257 else: | |
3258 sidedata_compression_mode = COMP_MODE_INLINE | |
3259 serialized_sidedata = comp_sidedata | |
3216 if entry[8] != 0 or entry[9] != 0: | 3260 if entry[8] != 0 or entry[9] != 0: |
3217 # rewriting entries that already have sidedata is not | 3261 # rewriting entries that already have sidedata is not |
3218 # supported yet, because it introduces garbage data in the | 3262 # supported yet, because it introduces garbage data in the |
3219 # revlog. | 3263 # revlog. |
3220 msg = b"Rewriting existing sidedata is not supported yet" | 3264 msg = b"rewriting existing sidedata is not supported yet" |
3221 raise error.Abort(msg) | 3265 raise error.Abort(msg) |
3222 entry = entry[:8] | 3266 |
3223 entry += (current_offset, len(serialized_sidedata)) | 3267 # Apply (potential) flags to add and to remove after running |
3224 | 3268 # the sidedata helpers |
3225 fp.write(serialized_sidedata) | 3269 new_offset_flags = entry[0] | flags[0] & ~flags[1] |
3226 new_entries.append(entry) | 3270 entry_update = ( |
3271 current_offset, | |
3272 len(serialized_sidedata), | |
3273 new_offset_flags, | |
3274 sidedata_compression_mode, | |
3275 ) | |
3276 | |
3277 # the sidedata computation might have move the file cursors around | |
3278 sdfh.seek(current_offset, os.SEEK_SET) | |
3279 sdfh.write(serialized_sidedata) | |
3280 new_entries.append(entry_update) | |
3227 current_offset += len(serialized_sidedata) | 3281 current_offset += len(serialized_sidedata) |
3228 | 3282 self._docket.sidedata_end = sdfh.tell() |
3229 # rewrite the new index entries | 3283 |
3230 with self._indexfp(b'w+') as fp: | 3284 # rewrite the new index entries |
3231 fp.seek(startrev * self.index.entry_size) | 3285 ifh.seek(startrev * self.index.entry_size) |
3232 for i, entry in enumerate(new_entries): | 3286 for i, e in enumerate(new_entries): |
3233 rev = startrev + i | 3287 rev = startrev + i |
3234 self.index.replace_sidedata_info(rev, entry[8], entry[9]) | 3288 self.index.replace_sidedata_info(rev, *e) |
3235 packed = self._io.packentry(entry, self.node, self.version, rev) | 3289 packed = self.index.entry_binary(rev) |
3236 fp.write(packed) | 3290 if rev == 0 and self._docket is None: |
3291 header = self._format_flags | self._format_version | |
3292 header = self.index.pack_header(header) | |
3293 packed = header + packed | |
3294 ifh.write(packed) |