comparison mercurial/revlog.py @ 47759:d7515d29761d stable 5.9rc0

branching: merge default into stable This mark the start of the 5.9 freeze.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 21 Jul 2021 22:52:09 +0200
parents 411dc27fd9fd bc8536e09a20
children 60ccc86a12f3
comparison
equal deleted inserted replaced
47054:29ea3b4c4f62 47759:d7515d29761d
1 # revlog.py - storage back-end for mercurial 1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # 3 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # 5 #
5 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version. 7 # GNU General Public License version 2 or any later version.
24 25
25 # import stuff from node for others to import from revlog 26 # import stuff from node for others to import from revlog
26 from .node import ( 27 from .node import (
27 bin, 28 bin,
28 hex, 29 hex,
29 nullhex,
30 nullid,
31 nullrev, 30 nullrev,
32 sha1nodeconstants, 31 sha1nodeconstants,
33 short, 32 short,
34 wdirfilenodeids,
35 wdirhex,
36 wdirid,
37 wdirrev, 33 wdirrev,
38 ) 34 )
39 from .i18n import _ 35 from .i18n import _
40 from .pycompat import getattr 36 from .pycompat import getattr
41 from .revlogutils.constants import ( 37 from .revlogutils.constants import (
38 ALL_KINDS,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
42 FLAG_GENERALDELTA, 44 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA, 45 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
45 INDEX_ENTRY_V1,
46 INDEX_ENTRY_V2,
47 INDEX_HEADER, 46 INDEX_HEADER,
47 KIND_CHANGELOG,
48 REVLOGV0, 48 REVLOGV0,
49 REVLOGV1, 49 REVLOGV1,
50 REVLOGV1_FLAGS, 50 REVLOGV1_FLAGS,
51 REVLOGV2, 51 REVLOGV2,
52 REVLOGV2_FLAGS, 52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS, 53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT, 54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION, 55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 ) 57 )
57 from .revlogutils.flagutil import ( 58 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS, 59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS, 60 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED, 61 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER, 62 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO, 63 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED, 64 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS, 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_SIDEDATA,
66 ) 66 )
67 from .thirdparty import attr 67 from .thirdparty import attr
68 from . import ( 68 from . import (
69 ancestor, 69 ancestor,
70 dagop, 70 dagop,
71 error, 71 error,
72 mdiff, 72 mdiff,
73 policy, 73 policy,
74 pycompat, 74 pycompat,
75 revlogutils,
75 templatefilters, 76 templatefilters,
76 util, 77 util,
77 ) 78 )
78 from .interfaces import ( 79 from .interfaces import (
79 repository, 80 repository,
80 util as interfaceutil, 81 util as interfaceutil,
81 ) 82 )
82 from .revlogutils import ( 83 from .revlogutils import (
83 deltas as deltautil, 84 deltas as deltautil,
85 docket as docketutil,
84 flagutil, 86 flagutil,
85 nodemap as nodemaputil, 87 nodemap as nodemaputil,
88 randomaccessfile,
89 revlogv0,
90 rewrite,
86 sidedata as sidedatautil, 91 sidedata as sidedatautil,
87 ) 92 )
88 from .utils import ( 93 from .utils import (
89 storageutil, 94 storageutil,
90 stringutil, 95 stringutil,
91 ) 96 )
92 97
93 # blanked usage of all the name to prevent pyflakes constraints 98 # blanked usage of all the name to prevent pyflakes constraints
94 # We need these name available in the module for extensions. 99 # We need these name available in the module for extensions.
100
95 REVLOGV0 101 REVLOGV0
96 REVLOGV1 102 REVLOGV1
97 REVLOGV2 103 REVLOGV2
98 FLAG_INLINE_DATA 104 FLAG_INLINE_DATA
99 FLAG_GENERALDELTA 105 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_VERSION 108 REVLOG_DEFAULT_VERSION
103 REVLOGV1_FLAGS 109 REVLOGV1_FLAGS
104 REVLOGV2_FLAGS 110 REVLOGV2_FLAGS
105 REVIDX_ISCENSORED 111 REVIDX_ISCENSORED
106 REVIDX_ELLIPSIS 112 REVIDX_ELLIPSIS
107 REVIDX_SIDEDATA
108 REVIDX_HASCOPIESINFO 113 REVIDX_HASCOPIESINFO
109 REVIDX_EXTSTORED 114 REVIDX_EXTSTORED
110 REVIDX_DEFAULT_FLAGS 115 REVIDX_DEFAULT_FLAGS
111 REVIDX_FLAGS_ORDER 116 REVIDX_FLAGS_ORDER
112 REVIDX_RAWTEXT_CHANGING_FLAGS 117 REVIDX_RAWTEXT_CHANGING_FLAGS
119 # Aliased for performance. 124 # Aliased for performance.
120 _zlibdecompress = zlib.decompress 125 _zlibdecompress = zlib.decompress
121 126
122 # max size of revlog with inline data 127 # max size of revlog with inline data
123 _maxinline = 131072 128 _maxinline = 131072
124 _chunksize = 1048576
125 129
126 # Flag processors for REVIDX_ELLIPSIS. 130 # Flag processors for REVIDX_ELLIPSIS.
127 def ellipsisreadprocessor(rl, text): 131 def ellipsisreadprocessor(rl, text):
128 return text, False 132 return text, False
129 133
139 ellipsisprocessor = ( 143 ellipsisprocessor = (
140 ellipsisreadprocessor, 144 ellipsisreadprocessor,
141 ellipsiswriteprocessor, 145 ellipsiswriteprocessor,
142 ellipsisrawprocessor, 146 ellipsisrawprocessor,
143 ) 147 )
144
145
146 def getoffset(q):
147 return int(q >> 16)
148
149
150 def gettype(q):
151 return int(q & 0xFFFF)
152
153
154 def offset_type(offset, type):
155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
156 raise ValueError(b'unknown revlog index flags')
157 return int(int(offset) << 16 | type)
158 148
159 149
160 def _verify_revision(rl, skipflags, state, node): 150 def _verify_revision(rl, skipflags, state, node):
161 """Verify the integrity of the given revlog ``node`` while providing a hook 151 """Verify the integrity of the given revlog ``node`` while providing a hook
162 point for extensions to influence the operation.""" 152 point for extensions to influence the operation."""
173 # people using pure don't really have performance consideration (and a 163 # people using pure don't really have performance consideration (and a
174 # wheelbarrow of other slowness source) 164 # wheelbarrow of other slowness source)
175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr( 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
176 parsers, 'BaseIndexObject' 166 parsers, 'BaseIndexObject'
177 ) 167 )
178
179
180 @attr.s(slots=True, frozen=True)
181 class _revisioninfo(object):
182 """Information about a revision that allows building its fulltext
183 node: expected hash of the revision
184 p1, p2: parent revs of the revision
185 btext: built text cache consisting of a one-element list
186 cachedelta: (baserev, uncompressed_delta) or None
187 flags: flags associated to the revision storage
188
189 One of btext[0] or cachedelta must be set.
190 """
191
192 node = attr.ib()
193 p1 = attr.ib()
194 p2 = attr.ib()
195 btext = attr.ib()
196 textlen = attr.ib()
197 cachedelta = attr.ib()
198 flags = attr.ib()
199 168
200 169
201 @interfaceutil.implementer(repository.irevisiondelta) 170 @interfaceutil.implementer(repository.irevisiondelta)
202 @attr.s(slots=True) 171 @attr.s(slots=True)
203 class revlogrevisiondelta(object): 172 class revlogrevisiondelta(object):
208 flags = attr.ib() 177 flags = attr.ib()
209 baserevisionsize = attr.ib() 178 baserevisionsize = attr.ib()
210 revision = attr.ib() 179 revision = attr.ib()
211 delta = attr.ib() 180 delta = attr.ib()
212 sidedata = attr.ib() 181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
213 linknode = attr.ib(default=None) 183 linknode = attr.ib(default=None)
214 184
215 185
216 @interfaceutil.implementer(repository.iverifyproblem) 186 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True) 187 @attr.s(frozen=True)
219 warning = attr.ib(default=None) 189 warning = attr.ib(default=None)
220 error = attr.ib(default=None) 190 error = attr.ib(default=None)
221 node = attr.ib(default=None) 191 node = attr.ib(default=None)
222 192
223 193
224 class revlogoldindex(list): 194 def parse_index_v1(data, inline):
225 entry_size = INDEX_ENTRY_V0.size 195 # call the C implementation to parse the index data
226 196 index, cache = parsers.parse_index2(data, inline)
227 @property 197 return index, cache
228 def nodemap(self): 198
229 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" 199
230 util.nouideprecwarn(msg, b'5.3', stacklevel=2) 200 def parse_index_v2(data, inline):
231 return self._nodemap 201 # call the C implementation to parse the index data
232 202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
233 @util.propertycache 203 return index, cache
234 def _nodemap(self): 204
235 nodemap = nodemaputil.NodeMap({nullid: nullrev}) 205
236 for r in range(0, len(self)): 206 def parse_index_cl_v2(data, inline):
237 n = self[r][7] 207 # call the C implementation to parse the index data
238 nodemap[n] = r 208 assert not inline
239 return nodemap 209 from .pure.parsers import parse_index_cl_v2
240 210
241 def has_node(self, node): 211 index, cache = parse_index_cl_v2(data)
242 """return True if the node exist in the index""" 212 return index, cache
243 return node in self._nodemap 213
244 214
245 def rev(self, node): 215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
246 """return a revision for a node 216
247 217 def parse_index_v1_nodemap(data, inline):
248 If the node is unknown, raise a RevlogError""" 218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
249 return self._nodemap[node] 219 return index, cache
250 220
251 def get_rev(self, node): 221
252 """return a revision for a node 222 else:
253 223 parse_index_v1_nodemap = None
254 If the node is unknown, return None""" 224
255 return self._nodemap.get(node) 225
256 226 def parse_index_v1_mixed(data, inline):
257 def append(self, tup): 227 index, cache = parse_index_v1(data, inline)
258 self._nodemap[tup[7]] = len(self) 228 return rustrevlog.MixedIndex(index), cache
259 super(revlogoldindex, self).append(tup)
260
261 def __delitem__(self, i):
262 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
263 raise ValueError(b"deleting slices only supports a:-1 with step 1")
264 for r in pycompat.xrange(i.start, len(self)):
265 del self._nodemap[self[r][7]]
266 super(revlogoldindex, self).__delitem__(i)
267
268 def clearcaches(self):
269 self.__dict__.pop('_nodemap', None)
270
271 def __getitem__(self, i):
272 if i == -1:
273 return (0, 0, 0, -1, -1, -1, -1, nullid)
274 return list.__getitem__(self, i)
275
276
277 class revlogoldio(object):
278 def parseindex(self, data, inline):
279 s = INDEX_ENTRY_V0.size
280 index = []
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
282 n = off = 0
283 l = len(data)
284 while off + s <= l:
285 cur = data[off : off + s]
286 off += s
287 e = INDEX_ENTRY_V0.unpack(cur)
288 # transform to revlogv1 format
289 e2 = (
290 offset_type(e[0], 0),
291 e[1],
292 -1,
293 e[2],
294 e[3],
295 nodemap.get(e[4], nullrev),
296 nodemap.get(e[5], nullrev),
297 e[6],
298 )
299 index.append(e2)
300 nodemap[e[6]] = n
301 n += 1
302
303 index = revlogoldindex(index)
304 return index, None
305
306 def packentry(self, entry, node, version, rev):
307 """return the binary representation of an entry
308
309 entry: a tuple containing all the values (see index.__getitem__)
310 node: a callback to convert a revision to nodeid
311 version: the changelog version
312 rev: the revision number
313 """
314 if gettype(entry[0]):
315 raise error.RevlogError(
316 _(b'index entry flags need revlog version 1')
317 )
318 e2 = (
319 getoffset(entry[0]),
320 entry[1],
321 entry[3],
322 entry[4],
323 node(entry[5]),
324 node(entry[6]),
325 entry[7],
326 )
327 return INDEX_ENTRY_V0.pack(*e2)
328 229
329 230
330 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte 231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
331 # signed integer) 232 # signed integer)
332 _maxentrysize = 0x7FFFFFFF 233 _maxentrysize = 0x7FFFFFFF
333 234
334 235 FILE_TOO_SHORT_MSG = _(
335 class revlogio(object): 236 b'cannot read from revlog %s;'
336 def parseindex(self, data, inline): 237 b' expected %d bytes from offset %d, data size is %d'
337 # call the C implementation to parse the index data 238 )
338 index, cache = parsers.parse_index2(data, inline)
339 return index, cache
340
341 def packentry(self, entry, node, version, rev):
342 p = INDEX_ENTRY_V1.pack(*entry)
343 if rev == 0:
344 p = INDEX_HEADER.pack(version) + p[4:]
345 return p
346
347
348 class revlogv2io(object):
349 def parseindex(self, data, inline):
350 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
351 return index, cache
352
353 def packentry(self, entry, node, version, rev):
354 p = INDEX_ENTRY_V2.pack(*entry)
355 if rev == 0:
356 p = INDEX_HEADER.pack(version) + p[4:]
357 return p
358
359
360 NodemapRevlogIO = None
361
362 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
363
364 class NodemapRevlogIO(revlogio):
365 """A debug oriented IO class that return a PersistentNodeMapIndexObject
366
367 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
368 """
369
370 def parseindex(self, data, inline):
371 index, cache = parsers.parse_index_devel_nodemap(data, inline)
372 return index, cache
373
374
375 class rustrevlogio(revlogio):
376 def parseindex(self, data, inline):
377 index, cache = super(rustrevlogio, self).parseindex(data, inline)
378 return rustrevlog.MixedIndex(index), cache
379 239
380 240
381 class revlog(object): 241 class revlog(object):
382 """ 242 """
383 the underlying revision storage object 243 the underlying revision storage object
417 277
418 `concurrencychecker` is an optional function that receives 3 arguments: a 278 `concurrencychecker` is an optional function that receives 3 arguments: a
419 file handle, a filename, and an expected position. It should check whether 279 file handle, a filename, and an expected position. It should check whether
420 the current position in the file handle is valid, and log/warn/fail (by 280 the current position in the file handle is valid, and log/warn/fail (by
421 raising). 281 raising).
282
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
422 """ 285 """
423 286
424 _flagserrorclass = error.RevlogError 287 _flagserrorclass = error.RevlogError
425 288
426 def __init__( 289 def __init__(
427 self, 290 self,
428 opener, 291 opener,
429 indexfile, 292 target,
430 datafile=None, 293 radix,
294 postfix=None, # only exist for `tmpcensored` now
431 checkambig=False, 295 checkambig=False,
432 mmaplargeindex=False, 296 mmaplargeindex=False,
433 censorable=False, 297 censorable=False,
434 upperboundcomp=None, 298 upperboundcomp=None,
435 persistentnodemap=False, 299 persistentnodemap=False,
436 concurrencychecker=None, 300 concurrencychecker=None,
301 trypending=False,
437 ): 302 ):
438 """ 303 """
439 create a revlog object 304 create a revlog object
440 305
441 opener is a function that abstracts the file opening operation 306 opener is a function that abstracts the file opening operation
442 and can be used to implement COW semantics or the like. 307 and can be used to implement COW semantics or the like.
443 308
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
444 """ 315 """
445 self.upperboundcomp = upperboundcomp 316 self.upperboundcomp = upperboundcomp
446 self.indexfile = indexfile 317
447 self.datafile = datafile or (indexfile[:-2] + b".d") 318 self.radix = radix
448 self.nodemap_file = None 319
320 self._docket_file = None
321 self._indexfile = None
322 self._datafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
325 self.postfix = postfix
326 self._trypending = trypending
327 self.opener = opener
449 if persistentnodemap: 328 if persistentnodemap:
450 self.nodemap_file = nodemaputil.get_nodemap_file( 329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
451 opener, self.indexfile 330
452 ) 331 assert target[0] in ALL_KINDS
453 332 assert len(target) == 2
454 self.opener = opener 333 self.target = target
455 # When True, indexfile is opened with checkambig=True at writing, to 334 # When True, indexfile is opened with checkambig=True at writing, to
456 # avoid file stat ambiguity. 335 # avoid file stat ambiguity.
457 self._checkambig = checkambig 336 self._checkambig = checkambig
458 self._mmaplargeindex = mmaplargeindex 337 self._mmaplargeindex = mmaplargeindex
459 self._censorable = censorable 338 self._censorable = censorable
466 # How much data to read and cache into the raw revlog data cache. 345 # How much data to read and cache into the raw revlog data cache.
467 self._chunkcachesize = 65536 346 self._chunkcachesize = 65536
468 self._maxchainlen = None 347 self._maxchainlen = None
469 self._deltabothparents = True 348 self._deltabothparents = True
470 self.index = None 349 self.index = None
350 self._docket = None
471 self._nodemap_docket = None 351 self._nodemap_docket = None
472 # Mapping of partial identifiers to full nodes. 352 # Mapping of partial identifiers to full nodes.
473 self._pcache = {} 353 self._pcache = {}
474 # Mapping of revision integer to full node. 354 # Mapping of revision integer to full node.
475 self._compengine = b'zlib' 355 self._compengine = b'zlib'
476 self._compengineopts = {} 356 self._compengineopts = {}
477 self._maxdeltachainspan = -1 357 self._maxdeltachainspan = -1
478 self._withsparseread = False 358 self._withsparseread = False
479 self._sparserevlog = False 359 self._sparserevlog = False
360 self.hassidedata = False
480 self._srdensitythreshold = 0.50 361 self._srdensitythreshold = 0.50
481 self._srmingapsize = 262144 362 self._srmingapsize = 262144
482 363
483 # Make copy of flag processors so each revlog instance can support 364 # Make copy of flag processors so each revlog instance can support
484 # custom flags. 365 # custom flags.
485 self._flagprocessors = dict(flagutil.flagprocessors) 366 self._flagprocessors = dict(flagutil.flagprocessors)
486 367
487 # 2-tuple of file handles being used for active writing. 368 # 3-tuple of file handles being used for active writing.
488 self._writinghandles = None 369 self._writinghandles = None
370 # prevent nesting of addgroup
371 self._adding_group = None
489 372
490 self._loadindex() 373 self._loadindex()
491 374
492 self._concurrencychecker = concurrencychecker 375 self._concurrencychecker = concurrencychecker
493 376
494 def _loadindex(self): 377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
379
380 These values might be affected when actually reading on disk information.
381
382 The relevant values are returned for use in _loadindex().
383
384 * newversionflags:
385 version header to use if we need to create a new revlog
386
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
389
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
392 """
495 mmapindexthreshold = None 393 mmapindexthreshold = None
496 opts = self.opener.options 394 opts = self.opener.options
497 395
498 if b'revlogv2' in opts: 396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
499 newversionflags = REVLOGV2 | FLAG_INLINE_DATA 397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
500 elif b'revlogv1' in opts: 400 elif b'revlogv1' in opts:
501 newversionflags = REVLOGV1 | FLAG_INLINE_DATA 401 new_header = REVLOGV1 | FLAG_INLINE_DATA
502 if b'generaldelta' in opts: 402 if b'generaldelta' in opts:
503 newversionflags |= FLAG_GENERALDELTA 403 new_header |= FLAG_GENERALDELTA
504 elif b'revlogv0' in self.opener.options: 404 elif b'revlogv0' in self.opener.options:
505 newversionflags = REVLOGV0 405 new_header = REVLOGV0
506 else: 406 else:
507 newversionflags = REVLOG_DEFAULT_VERSION 407 new_header = REVLOG_DEFAULT_VERSION
508 408
509 if b'chunkcachesize' in opts: 409 if b'chunkcachesize' in opts:
510 self._chunkcachesize = opts[b'chunkcachesize'] 410 self._chunkcachesize = opts[b'chunkcachesize']
511 if b'maxchainlen' in opts: 411 if b'maxchainlen' in opts:
512 self._maxchainlen = opts[b'maxchainlen'] 412 self._maxchainlen = opts[b'maxchainlen']
524 self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] 424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
525 if b'maxdeltachainspan' in opts: 425 if b'maxdeltachainspan' in opts:
526 self._maxdeltachainspan = opts[b'maxdeltachainspan'] 426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
527 if self._mmaplargeindex and b'mmapindexthreshold' in opts: 427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
528 mmapindexthreshold = opts[b'mmapindexthreshold'] 428 mmapindexthreshold = opts[b'mmapindexthreshold']
529 self.hassidedata = bool(opts.get(b'side-data', False))
530 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) 429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
531 withsparseread = bool(opts.get(b'with-sparse-read', False)) 430 withsparseread = bool(opts.get(b'with-sparse-read', False))
532 # sparse-revlog forces sparse-read 431 # sparse-revlog forces sparse-read
533 self._withsparseread = self._sparserevlog or withsparseread 432 self._withsparseread = self._sparserevlog or withsparseread
534 if b'sparse-read-density-threshold' in opts: 433 if b'sparse-read-density-threshold' in opts:
552 elif self._chunkcachesize & (self._chunkcachesize - 1): 451 elif self._chunkcachesize & (self._chunkcachesize - 1):
553 raise error.RevlogError( 452 raise error.RevlogError(
554 _(b'revlog chunk cache size %r is not a power of 2') 453 _(b'revlog chunk cache size %r is not a power of 2')
555 % self._chunkcachesize 454 % self._chunkcachesize
556 ) 455 )
557 456 force_nodemap = opts.get(b'devel-force-nodemap', False)
558 indexdata = b'' 457 return new_header, mmapindexthreshold, force_nodemap
559 self._initempty = True 458
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
461
462 If the file is missing return the empty string"""
560 try: 463 try:
561 with self._indexfp() as f: 464 with self.opener(filepath) as fp:
562 if ( 465 if mmap_threshold is not None:
563 mmapindexthreshold is not None 466 file_size = self.opener.fstat(fp).st_size
564 and self.opener.fstat(f).st_size >= mmapindexthreshold 467 if file_size >= mmap_threshold:
565 ): 468 if size is not None:
566 # TODO: should .close() to release resources without 469 # avoid potentiel mmap crash
567 # relying on Python GC 470 size = min(file_size, size)
568 indexdata = util.buffer(util.mmapread(f)) 471 # TODO: should .close() to release resources without
472 # relying on Python GC
473 if size is None:
474 return util.buffer(util.mmapread(fp))
475 else:
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
478 return fp.read()
569 else: 479 else:
570 indexdata = f.read() 480 return fp.read(size)
571 if len(indexdata) > 0:
572 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
573 self._initempty = False
574 else:
575 versionflags = newversionflags
576 except IOError as inst: 481 except IOError as inst:
577 if inst.errno != errno.ENOENT: 482 if inst.errno != errno.ENOENT:
578 raise 483 raise
579 484 return b''
580 versionflags = newversionflags 485
581 486 def _loadindex(self, docket=None):
582 self.version = versionflags 487
583 488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
584 flags = versionflags & ~0xFFFF 489
585 fmt = versionflags & 0xFFFF 490 if self.postfix is not None:
586 491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
587 if fmt == REVLOGV0: 492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
588 if flags: 493 entry_point = b'%s.i.a' % self.radix
589 raise error.RevlogError( 494 else:
590 _(b'unknown flags (%#04x) in version %d revlog %s') 495 entry_point = b'%s.i' % self.radix
591 % (flags >> 16, fmt, self.indexfile) 496
497 if docket is not None:
498 self._docket = docket
499 self._docket_file = entry_point
500 else:
501 entry_data = b''
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
507 else:
508 header = new_header
509
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
523
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
528
529 if not features[b'docket']:
530 self._indexfile = entry_point
531 index_data = entry_data
532 else:
533 self._docket_file = entry_point
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
536 else:
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
539 )
540
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
544 index_size = self._docket.index_end
545 if index_size > 0:
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
592 ) 548 )
593 549 if len(index_data) < index_size:
594 self._inline = False 550 msg = _(b'too few index data for %s: got %d, expected %d')
595 self._generaldelta = False 551 msg %= (self.display_id, len(index_data), index_size)
596 552 raise error.RevlogError(msg)
597 elif fmt == REVLOGV1: 553
598 if flags & ~REVLOGV1_FLAGS:
599 raise error.RevlogError(
600 _(b'unknown flags (%#04x) in version %d revlog %s')
601 % (flags >> 16, fmt, self.indexfile)
602 )
603
604 self._inline = versionflags & FLAG_INLINE_DATA
605 self._generaldelta = versionflags & FLAG_GENERALDELTA
606
607 elif fmt == REVLOGV2:
608 if flags & ~REVLOGV2_FLAGS:
609 raise error.RevlogError(
610 _(b'unknown flags (%#04x) in version %d revlog %s')
611 % (flags >> 16, fmt, self.indexfile)
612 )
613
614 # There is a bug in the transaction handling when going from an
615 # inline revlog to a separate index and data file. Turn it off until
616 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
617 # See issue6485
618 self._inline = False 554 self._inline = False
619 # generaldelta implied by version 2 revlogs. 555 # generaldelta implied by version 2 revlogs.
620 self._generaldelta = True 556 self._generaldelta = True
621 557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
559 self._nodemap_file = None
560
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
622 else: 566 else:
623 raise error.RevlogError( 567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
624 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
625 )
626 568
627 self.nodeconstants = sha1nodeconstants 569 self.nodeconstants = sha1nodeconstants
628 self.nullid = self.nodeconstants.nullid 570 self.nullid = self.nodeconstants.nullid
629 571
630 # sparse-revlog can't be on without general-delta (issue6056) 572 # sparse-revlog can't be on without general-delta (issue6056)
632 self._sparserevlog = False 574 self._sparserevlog = False
633 575
634 self._storedeltachains = True 576 self._storedeltachains = True
635 577
636 devel_nodemap = ( 578 devel_nodemap = (
637 self.nodemap_file 579 self._nodemap_file
638 and opts.get(b'devel-force-nodemap', False) 580 and force_nodemap
639 and NodemapRevlogIO is not None 581 and parse_index_v1_nodemap is not None
640 ) 582 )
641 583
642 use_rust_index = False 584 use_rust_index = False
643 if rustrevlog is not None: 585 if rustrevlog is not None:
644 if self.nodemap_file is not None: 586 if self._nodemap_file is not None:
645 use_rust_index = True 587 use_rust_index = True
646 else: 588 else:
647 use_rust_index = self.opener.options.get(b'rust.index') 589 use_rust_index = self.opener.options.get(b'rust.index')
648 590
649 self._io = revlogio() 591 self._parse_index = parse_index_v1
650 if self.version == REVLOGV0: 592 if self._format_version == REVLOGV0:
651 self._io = revlogoldio() 593 self._parse_index = revlogv0.parse_index_v0
652 elif fmt == REVLOGV2: 594 elif self._format_version == REVLOGV2:
653 self._io = revlogv2io() 595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
654 elif devel_nodemap: 598 elif devel_nodemap:
655 self._io = NodemapRevlogIO() 599 self._parse_index = parse_index_v1_nodemap
656 elif use_rust_index: 600 elif use_rust_index:
657 self._io = rustrevlogio() 601 self._parse_index = parse_index_v1_mixed
658 try: 602 try:
659 d = self._io.parseindex(indexdata, self._inline) 603 d = self._parse_index(index_data, self._inline)
660 index, _chunkcache = d 604 index, chunkcache = d
661 use_nodemap = ( 605 use_nodemap = (
662 not self._inline 606 not self._inline
663 and self.nodemap_file is not None 607 and self._nodemap_file is not None
664 and util.safehasattr(index, 'update_nodemap_data') 608 and util.safehasattr(index, 'update_nodemap_data')
665 ) 609 )
666 if use_nodemap: 610 if use_nodemap:
667 nodemap_data = nodemaputil.persisted_data(self) 611 nodemap_data = nodemaputil.persisted_data(self)
668 if nodemap_data is not None: 612 if nodemap_data is not None:
674 # no changelog tampering 618 # no changelog tampering
675 self._nodemap_docket = docket 619 self._nodemap_docket = docket
676 index.update_nodemap_data(*nodemap_data) 620 index.update_nodemap_data(*nodemap_data)
677 except (ValueError, IndexError): 621 except (ValueError, IndexError):
678 raise error.RevlogError( 622 raise error.RevlogError(
679 _(b"index %s is corrupted") % self.indexfile 623 _(b"index %s is corrupted") % self.display_id
680 ) 624 )
681 self.index, self._chunkcache = d 625 self.index = index
682 if not self._chunkcache: 626 self._segmentfile = randomaccessfile.randomaccessfile(
683 self._chunkclear() 627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
630 chunkcache,
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
634 self._sidedatafile,
635 self._chunkcachesize,
636 )
684 # revnum -> (chain-length, sum-delta-length) 637 # revnum -> (chain-length, sum-delta-length)
685 self._chaininfocache = util.lrucachedict(500) 638 self._chaininfocache = util.lrucachedict(500)
686 # revlog header -> revlog compressor 639 # revlog header -> revlog compressor
687 self._decompressors = {} 640 self._decompressors = {}
688 641
689 @util.propertycache 642 @util.propertycache
643 def revlog_kind(self):
644 return self.target[0]
645
646 @util.propertycache
647 def display_id(self):
648 """The public facing "ID" of the revlog that we use in message"""
649 # Maybe we should build a user facing representation of
650 # revlog.target instead of using `self.radix`
651 return self.radix
652
653 def _get_decompressor(self, t):
654 try:
655 compressor = self._decompressors[t]
656 except KeyError:
657 try:
658 engine = util.compengines.forrevlogheader(t)
659 compressor = engine.revlogcompressor(self._compengineopts)
660 self._decompressors[t] = compressor
661 except KeyError:
662 raise error.RevlogError(
663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 )
665 return compressor
666
667 @util.propertycache
690 def _compressor(self): 668 def _compressor(self):
691 engine = util.compengines[self._compengine] 669 engine = util.compengines[self._compengine]
692 return engine.revlogcompressor(self._compengineopts) 670 return engine.revlogcompressor(self._compengineopts)
693 671
694 def _indexfp(self, mode=b'r'): 672 @util.propertycache
673 def _decompressor(self):
674 """the default decompressor"""
675 if self._docket is None:
676 return None
677 t = self._docket.default_compression_header
678 c = self._get_decompressor(t)
679 return c.decompress
680
681 def _indexfp(self):
695 """file object for the revlog's index file""" 682 """file object for the revlog's index file"""
696 args = {'mode': mode} 683 return self.opener(self._indexfile, mode=b"r")
697 if mode != b'r': 684
698 args['checkambig'] = self._checkambig 685 def __index_write_fp(self):
699 if mode == b'w': 686 # You should not use this directly and use `_writing` instead
700 args['atomictemp'] = True 687 try:
701 return self.opener(self.indexfile, **args) 688 f = self.opener(
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 )
691 if self._docket is None:
692 f.seek(0, os.SEEK_END)
693 else:
694 f.seek(self._docket.index_end, os.SEEK_SET)
695 return f
696 except IOError as inst:
697 if inst.errno != errno.ENOENT:
698 raise
699 return self.opener(
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 )
702
703 def __index_new_fp(self):
704 # You should not use this unless you are upgrading from inline revlog
705 return self.opener(
706 self._indexfile,
707 mode=b"w",
708 checkambig=self._checkambig,
709 atomictemp=True,
710 )
702 711
703 def _datafp(self, mode=b'r'): 712 def _datafp(self, mode=b'r'):
704 """file object for the revlog's data file""" 713 """file object for the revlog's data file"""
705 return self.opener(self.datafile, mode=mode) 714 return self.opener(self._datafile, mode=mode)
706 715
707 @contextlib.contextmanager 716 @contextlib.contextmanager
708 def _datareadfp(self, existingfp=None): 717 def _sidedatareadfp(self):
709 """file object suitable to read data""" 718 """file object suitable to read sidedata"""
710 # Use explicit file handle, if given. 719 if self._writinghandles:
711 if existingfp is not None: 720 yield self._writinghandles[2]
712 yield existingfp
713
714 # Use a file handle being actively used for writes, if available.
715 # There is some danger to doing this because reads will seek the
716 # file. However, _writeentry() performs a SEEK_END before all writes,
717 # so we should be safe.
718 elif self._writinghandles:
719 if self._inline:
720 yield self._writinghandles[0]
721 else:
722 yield self._writinghandles[1]
723
724 # Otherwise open a new file handle.
725 else: 721 else:
726 if self._inline: 722 with self.opener(self._sidedatafile) as fp:
727 func = self._indexfp
728 else:
729 func = self._datafp
730 with func() as fp:
731 yield fp 723 yield fp
732 724
733 def tiprev(self): 725 def tiprev(self):
734 return len(self.index) - 1 726 return len(self.index) - 1
735 727
783 ): 775 ):
784 return False 776 return False
785 return True 777 return True
786 778
787 def update_caches(self, transaction): 779 def update_caches(self, transaction):
788 if self.nodemap_file is not None: 780 if self._nodemap_file is not None:
789 if transaction is None: 781 if transaction is None:
790 nodemaputil.update_persistent_nodemap(self) 782 nodemaputil.update_persistent_nodemap(self)
791 else: 783 else:
792 nodemaputil.setup_persistent_nodemap(transaction, self) 784 nodemaputil.setup_persistent_nodemap(transaction, self)
793 785
794 def clearcaches(self): 786 def clearcaches(self):
795 self._revisioncache = None 787 self._revisioncache = None
796 self._chainbasecache.clear() 788 self._chainbasecache.clear()
797 self._chunkcache = (0, b'') 789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
798 self._pcache = {} 791 self._pcache = {}
799 self._nodemap_docket = None 792 self._nodemap_docket = None
800 self.index.clearcaches() 793 self.index.clearcaches()
801 # The python code is the one responsible for validating the docket, we 794 # The python code is the one responsible for validating the docket, we
802 # end up having to refresh it here. 795 # end up having to refresh it here.
803 use_nodemap = ( 796 use_nodemap = (
804 not self._inline 797 not self._inline
805 and self.nodemap_file is not None 798 and self._nodemap_file is not None
806 and util.safehasattr(self.index, 'update_nodemap_data') 799 and util.safehasattr(self.index, 'update_nodemap_data')
807 ) 800 )
808 if use_nodemap: 801 if use_nodemap:
809 nodemap_data = nodemaputil.persisted_data(self) 802 nodemap_data = nodemaputil.persisted_data(self)
810 if nodemap_data is not None: 803 if nodemap_data is not None:
816 return self.index.rev(node) 809 return self.index.rev(node)
817 except TypeError: 810 except TypeError:
818 raise 811 raise
819 except error.RevlogError: 812 except error.RevlogError:
820 # parsers.c radix tree lookup failed 813 # parsers.c radix tree lookup failed
821 if node == wdirid or node in wdirfilenodeids: 814 if (
815 node == self.nodeconstants.wdirid
816 or node in self.nodeconstants.wdirfilenodeids
817 ):
822 raise error.WdirUnsupported 818 raise error.WdirUnsupported
823 raise error.LookupError(node, self.indexfile, _(b'no node')) 819 raise error.LookupError(node, self.display_id, _(b'no node'))
824 820
825 # Accessors for index entries. 821 # Accessors for index entries.
826 822
827 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes 823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
828 # are flags. 824 # are flags.
829 def start(self, rev): 825 def start(self, rev):
830 return int(self.index[rev][0] >> 16) 826 return int(self.index[rev][0] >> 16)
831 827
828 def sidedata_cut_off(self, rev):
829 sd_cut_off = self.index[rev][8]
830 if sd_cut_off != 0:
831 return sd_cut_off
832 # This is some annoying dance, because entries without sidedata
833 # currently use 0 as their ofsset. (instead of previous-offset +
834 # previous-size)
835 #
836 # We should reconsider this sidedata → 0 sidata_offset policy.
837 # In the meantime, we need this.
838 while 0 <= rev:
839 e = self.index[rev]
840 if e[9] != 0:
841 return e[8] + e[9]
842 rev -= 1
843 return 0
844
832 def flags(self, rev): 845 def flags(self, rev):
833 return self.index[rev][0] & 0xFFFF 846 return self.index[rev][0] & 0xFFFF
834 847
835 def length(self, rev): 848 def length(self, rev):
836 return self.index[rev][1] 849 return self.index[rev][1]
837 850
838 def sidedata_length(self, rev): 851 def sidedata_length(self, rev):
839 if self.version & 0xFFFF != REVLOGV2: 852 if not self.hassidedata:
840 return 0 853 return 0
841 return self.index[rev][9] 854 return self.index[rev][9]
842 855
843 def rawsize(self, rev): 856 def rawsize(self, rev):
844 """return the length of the uncompressed text for a given revision""" 857 """return the length of the uncompressed text for a given revision"""
994 checkrev = self.node 1007 checkrev = self.node
995 for r in revs: 1008 for r in revs:
996 checkrev(r) 1009 checkrev(r)
997 # and we're sure ancestors aren't filtered as well 1010 # and we're sure ancestors aren't filtered as well
998 1011
999 if rustancestor is not None: 1012 if rustancestor is not None and self.index.rust_ext_compat:
1000 lazyancestors = rustancestor.LazyAncestors 1013 lazyancestors = rustancestor.LazyAncestors
1001 arg = self.index 1014 arg = self.index
1002 else: 1015 else:
1003 lazyancestors = ancestor.lazyancestors 1016 lazyancestors = ancestor.lazyancestors
1004 arg = self._uncheckedparentrevs 1017 arg = self._uncheckedparentrevs
1019 1032
1020 'heads' and 'common' are both lists of node IDs. If heads is 1033 'heads' and 'common' are both lists of node IDs. If heads is
1021 not supplied, uses all of the revlog's heads. If common is not 1034 not supplied, uses all of the revlog's heads. If common is not
1022 supplied, uses nullid.""" 1035 supplied, uses nullid."""
1023 if common is None: 1036 if common is None:
1024 common = [nullid] 1037 common = [self.nullid]
1025 if heads is None: 1038 if heads is None:
1026 heads = self.heads() 1039 heads = self.heads()
1027 1040
1028 common = [self.rev(n) for n in common] 1041 common = [self.rev(n) for n in common]
1029 heads = [self.rev(n) for n in heads] 1042 heads = [self.rev(n) for n in heads]
1081 nullrev. 1094 nullrev.
1082 """ 1095 """
1083 if common is None: 1096 if common is None:
1084 common = [nullrev] 1097 common = [nullrev]
1085 1098
1086 if rustancestor is not None: 1099 if rustancestor is not None and self.index.rust_ext_compat:
1087 return rustancestor.MissingAncestors(self.index, common) 1100 return rustancestor.MissingAncestors(self.index, common)
1088 return ancestor.incrementalmissingancestors(self.parentrevs, common) 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1089 1102
1090 def findmissingrevs(self, common=None, heads=None): 1103 def findmissingrevs(self, common=None, heads=None):
1091 """Return the revision numbers of the ancestors of heads that 1104 """Return the revision numbers of the ancestors of heads that
1125 1138
1126 'heads' and 'common' are both lists of node IDs. If heads is 1139 'heads' and 'common' are both lists of node IDs. If heads is
1127 not supplied, uses all of the revlog's heads. If common is not 1140 not supplied, uses all of the revlog's heads. If common is not
1128 supplied, uses nullid.""" 1141 supplied, uses nullid."""
1129 if common is None: 1142 if common is None:
1130 common = [nullid] 1143 common = [self.nullid]
1131 if heads is None: 1144 if heads is None:
1132 heads = self.heads() 1145 heads = self.heads()
1133 1146
1134 common = [self.rev(n) for n in common] 1147 common = [self.rev(n) for n in common]
1135 heads = [self.rev(n) for n in heads] 1148 heads = [self.rev(n) for n in heads]
1163 roots = list(roots) 1176 roots = list(roots)
1164 if not roots: 1177 if not roots:
1165 return nonodes 1178 return nonodes
1166 lowestrev = min([self.rev(n) for n in roots]) 1179 lowestrev = min([self.rev(n) for n in roots])
1167 else: 1180 else:
1168 roots = [nullid] # Everybody's a descendant of nullid 1181 roots = [self.nullid] # Everybody's a descendant of nullid
1169 lowestrev = nullrev 1182 lowestrev = nullrev
1170 if (lowestrev == nullrev) and (heads is None): 1183 if (lowestrev == nullrev) and (heads is None):
1171 # We want _all_ the nodes! 1184 # We want _all_ the nodes!
1172 return ([self.node(r) for r in self], [nullid], list(self.heads())) 1185 return (
1186 [self.node(r) for r in self],
1187 [self.nullid],
1188 list(self.heads()),
1189 )
1173 if heads is None: 1190 if heads is None:
1174 # All nodes are ancestors, so the latest ancestor is the last 1191 # All nodes are ancestors, so the latest ancestor is the last
1175 # node. 1192 # node.
1176 highestrev = len(self) - 1 1193 highestrev = len(self) - 1
1177 # Set ancestors to None to signal that every node is an ancestor. 1194 # Set ancestors to None to signal that every node is an ancestor.
1193 highestrev = max([self.rev(n) for n in nodestotag]) 1210 highestrev = max([self.rev(n) for n in nodestotag])
1194 while nodestotag: 1211 while nodestotag:
1195 # grab a node to tag 1212 # grab a node to tag
1196 n = nodestotag.pop() 1213 n = nodestotag.pop()
1197 # Never tag nullid 1214 # Never tag nullid
1198 if n == nullid: 1215 if n == self.nullid:
1199 continue 1216 continue
1200 # A node's revision number represents its place in a 1217 # A node's revision number represents its place in a
1201 # topologically sorted list of nodes. 1218 # topologically sorted list of nodes.
1202 r = self.rev(n) 1219 r = self.rev(n)
1203 if r >= lowestrev: 1220 if r >= lowestrev:
1205 # If we are possibly a descendant of one of the roots 1222 # If we are possibly a descendant of one of the roots
1206 # and we haven't already been marked as an ancestor 1223 # and we haven't already been marked as an ancestor
1207 ancestors.add(n) # Mark as ancestor 1224 ancestors.add(n) # Mark as ancestor
1208 # Add non-nullid parents to list of nodes to tag. 1225 # Add non-nullid parents to list of nodes to tag.
1209 nodestotag.update( 1226 nodestotag.update(
1210 [p for p in self.parents(n) if p != nullid] 1227 [p for p in self.parents(n) if p != self.nullid]
1211 ) 1228 )
1212 elif n in heads: # We've seen it before, is it a fake head? 1229 elif n in heads: # We've seen it before, is it a fake head?
1213 # So it is, real heads should not be the ancestors of 1230 # So it is, real heads should not be the ancestors of
1214 # any other heads. 1231 # any other heads.
1215 heads.pop(n) 1232 heads.pop(n)
1233 return nonodes 1250 return nonodes
1234 else: 1251 else:
1235 # We are descending from nullid, and don't need to care about 1252 # We are descending from nullid, and don't need to care about
1236 # any other roots. 1253 # any other roots.
1237 lowestrev = nullrev 1254 lowestrev = nullrev
1238 roots = [nullid] 1255 roots = [self.nullid]
1239 # Transform our roots list into a set. 1256 # Transform our roots list into a set.
1240 descendants = set(roots) 1257 descendants = set(roots)
1241 # Also, keep the original roots so we can filter out roots that aren't 1258 # Also, keep the original roots so we can filter out roots that aren't
1242 # 'real' roots (i.e. are descended from other roots). 1259 # 'real' roots (i.e. are descended from other roots).
1243 roots = descendants.copy() 1260 roots = descendants.copy()
1297 if revs is None: 1314 if revs is None:
1298 try: 1315 try:
1299 return self.index.headrevs() 1316 return self.index.headrevs()
1300 except AttributeError: 1317 except AttributeError:
1301 return self._headrevs() 1318 return self._headrevs()
1302 if rustdagop is not None: 1319 if rustdagop is not None and self.index.rust_ext_compat:
1303 return rustdagop.headrevs(self.index, revs) 1320 return rustdagop.headrevs(self.index, revs)
1304 return dagop.headrevs(revs, self._uncheckedparentrevs) 1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1305 1322
1306 def computephases(self, roots): 1323 def computephases(self, roots):
1307 return self.index.computephasesmapsets(roots) 1324 return self.index.computephasesmapsets(roots)
1327 if stop is specified, it will consider all the revs from stop 1344 if stop is specified, it will consider all the revs from stop
1328 as if they had no children 1345 as if they had no children
1329 """ 1346 """
1330 if start is None and stop is None: 1347 if start is None and stop is None:
1331 if not len(self): 1348 if not len(self):
1332 return [nullid] 1349 return [self.nullid]
1333 return [self.node(r) for r in self.headrevs()] 1350 return [self.node(r) for r in self.headrevs()]
1334 1351
1335 if start is None: 1352 if start is None:
1336 start = nullrev 1353 start = nullrev
1337 else: 1354 else:
1417 except (AttributeError, OverflowError): 1434 except (AttributeError, OverflowError):
1418 ancs = ancestor.ancestors(self.parentrevs, a, b) 1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1419 if ancs: 1436 if ancs:
1420 # choose a consistent winner when there's a tie 1437 # choose a consistent winner when there's a tie
1421 return min(map(self.node, ancs)) 1438 return min(map(self.node, ancs))
1422 return nullid 1439 return self.nullid
1423 1440
1424 def _match(self, id): 1441 def _match(self, id):
1425 if isinstance(id, int): 1442 if isinstance(id, int):
1426 # rev 1443 # rev
1427 return self.node(id) 1444 return self.node(id)
1428 if len(id) == 20: 1445 if len(id) == self.nodeconstants.nodelen:
1429 # possibly a binary node 1446 # possibly a binary node
1430 # odds of a binary node being all hex in ASCII are 1 in 10**25 1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1431 try: 1448 try:
1432 node = id 1449 node = id
1433 self.rev(node) # quick search the index 1450 self.rev(node) # quick search the index
1444 if rev < 0 or rev >= len(self): 1461 if rev < 0 or rev >= len(self):
1445 raise ValueError 1462 raise ValueError
1446 return self.node(rev) 1463 return self.node(rev)
1447 except (ValueError, OverflowError): 1464 except (ValueError, OverflowError):
1448 pass 1465 pass
1449 if len(id) == 40: 1466 if len(id) == 2 * self.nodeconstants.nodelen:
1450 try: 1467 try:
1451 # a full hex nodeid? 1468 # a full hex nodeid?
1452 node = bin(id) 1469 node = bin(id)
1453 self.rev(node) 1470 self.rev(node)
1454 return node 1471 return node
1455 except (TypeError, error.LookupError): 1472 except (TypeError, error.LookupError):
1456 pass 1473 pass
1457 1474
1458 def _partialmatch(self, id): 1475 def _partialmatch(self, id):
1459 # we don't care wdirfilenodeids as they should be always full hash 1476 # we don't care wdirfilenodeids as they should be always full hash
1460 maybewdir = wdirhex.startswith(id) 1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478 ambiguous = False
1461 try: 1479 try:
1462 partial = self.index.partialmatch(id) 1480 partial = self.index.partialmatch(id)
1463 if partial and self.hasnode(partial): 1481 if partial and self.hasnode(partial):
1464 if maybewdir: 1482 if maybewdir:
1465 # single 'ff...' match in radix tree, ambiguous with wdir 1483 # single 'ff...' match in radix tree, ambiguous with wdir
1466 raise error.RevlogError 1484 ambiguous = True
1467 return partial 1485 else:
1468 if maybewdir: 1486 return partial
1487 elif maybewdir:
1469 # no 'ff...' match in radix tree, wdir identified 1488 # no 'ff...' match in radix tree, wdir identified
1470 raise error.WdirUnsupported 1489 raise error.WdirUnsupported
1471 return None 1490 else:
1491 return None
1472 except error.RevlogError: 1492 except error.RevlogError:
1473 # parsers.c radix tree lookup gave multiple matches 1493 # parsers.c radix tree lookup gave multiple matches
1474 # fast path: for unfiltered changelog, radix tree is accurate 1494 # fast path: for unfiltered changelog, radix tree is accurate
1475 if not getattr(self, 'filteredrevs', None): 1495 if not getattr(self, 'filteredrevs', None):
1476 raise error.AmbiguousPrefixLookupError( 1496 ambiguous = True
1477 id, self.indexfile, _(b'ambiguous identifier')
1478 )
1479 # fall through to slow path that filters hidden revisions 1497 # fall through to slow path that filters hidden revisions
1480 except (AttributeError, ValueError): 1498 except (AttributeError, ValueError):
1481 # we are pure python, or key was too short to search radix tree 1499 # we are pure python, or key was too short to search radix tree
1482 pass 1500 pass
1501 if ambiguous:
1502 raise error.AmbiguousPrefixLookupError(
1503 id, self.display_id, _(b'ambiguous identifier')
1504 )
1483 1505
1484 if id in self._pcache: 1506 if id in self._pcache:
1485 return self._pcache[id] 1507 return self._pcache[id]
1486 1508
1487 if len(id) <= 40: 1509 if len(id) <= 40:
1491 prefix = bin(id[: l * 2]) 1513 prefix = bin(id[: l * 2])
1492 nl = [e[7] for e in self.index if e[7].startswith(prefix)] 1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1493 nl = [ 1515 nl = [
1494 n for n in nl if hex(n).startswith(id) and self.hasnode(n) 1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1495 ] 1517 ]
1496 if nullhex.startswith(id): 1518 if self.nodeconstants.nullhex.startswith(id):
1497 nl.append(nullid) 1519 nl.append(self.nullid)
1498 if len(nl) > 0: 1520 if len(nl) > 0:
1499 if len(nl) == 1 and not maybewdir: 1521 if len(nl) == 1 and not maybewdir:
1500 self._pcache[id] = nl[0] 1522 self._pcache[id] = nl[0]
1501 return nl[0] 1523 return nl[0]
1502 raise error.AmbiguousPrefixLookupError( 1524 raise error.AmbiguousPrefixLookupError(
1503 id, self.indexfile, _(b'ambiguous identifier') 1525 id, self.display_id, _(b'ambiguous identifier')
1504 ) 1526 )
1505 if maybewdir: 1527 if maybewdir:
1506 raise error.WdirUnsupported 1528 raise error.WdirUnsupported
1507 return None 1529 return None
1508 except TypeError: 1530 except TypeError:
1518 return n 1540 return n
1519 n = self._partialmatch(id) 1541 n = self._partialmatch(id)
1520 if n: 1542 if n:
1521 return n 1543 return n
1522 1544
1523 raise error.LookupError(id, self.indexfile, _(b'no match found')) 1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1524 1546
1525 def shortest(self, node, minlength=1): 1547 def shortest(self, node, minlength=1):
1526 """Find the shortest unambiguous prefix that matches node.""" 1548 """Find the shortest unambiguous prefix that matches node."""
1527 1549
1528 def isvalid(prefix): 1550 def isvalid(prefix):
1532 return False 1554 return False
1533 except error.WdirUnsupported: 1555 except error.WdirUnsupported:
1534 # single 'ff...' match 1556 # single 'ff...' match
1535 return True 1557 return True
1536 if matchednode is None: 1558 if matchednode is None:
1537 raise error.LookupError(node, self.indexfile, _(b'no node')) 1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1538 return True 1560 return True
1539 1561
1540 def maybewdir(prefix): 1562 def maybewdir(prefix):
1541 return all(c == b'f' for c in pycompat.iterbytestr(prefix)) 1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1542 1564
1552 if not getattr(self, 'filteredrevs', None): 1574 if not getattr(self, 'filteredrevs', None):
1553 try: 1575 try:
1554 length = max(self.index.shortest(node), minlength) 1576 length = max(self.index.shortest(node), minlength)
1555 return disambiguate(hexnode, length) 1577 return disambiguate(hexnode, length)
1556 except error.RevlogError: 1578 except error.RevlogError:
1557 if node != wdirid: 1579 if node != self.nodeconstants.wdirid:
1558 raise error.LookupError(node, self.indexfile, _(b'no node')) 1580 raise error.LookupError(
1581 node, self.display_id, _(b'no node')
1582 )
1559 except AttributeError: 1583 except AttributeError:
1560 # Fall through to pure code 1584 # Fall through to pure code
1561 pass 1585 pass
1562 1586
1563 if node == wdirid: 1587 if node == self.nodeconstants.wdirid:
1564 for length in range(minlength, len(hexnode) + 1): 1588 for length in range(minlength, len(hexnode) + 1):
1565 prefix = hexnode[:length] 1589 prefix = hexnode[:length]
1566 if isvalid(prefix): 1590 if isvalid(prefix):
1567 return prefix 1591 return prefix
1568 1592
1576 1600
1577 returns True if text is different than what is stored. 1601 returns True if text is different than what is stored.
1578 """ 1602 """
1579 p1, p2 = self.parents(node) 1603 p1, p2 = self.parents(node)
1580 return storageutil.hashrevisionsha1(text, p1, p2) != node 1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1581
1582 def _cachesegment(self, offset, data):
1583 """Add a segment to the revlog cache.
1584
1585 Accepts an absolute offset and the data that is at that location.
1586 """
1587 o, d = self._chunkcache
1588 # try to add to existing cache
1589 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1590 self._chunkcache = o, d + data
1591 else:
1592 self._chunkcache = offset, data
1593
1594 def _readsegment(self, offset, length, df=None):
1595 """Load a segment of raw data from the revlog.
1596
1597 Accepts an absolute offset, length to read, and an optional existing
1598 file handle to read from.
1599
1600 If an existing file handle is passed, it will be seeked and the
1601 original seek position will NOT be restored.
1602
1603 Returns a str or buffer of raw byte data.
1604
1605 Raises if the requested number of bytes could not be read.
1606 """
1607 # Cache data both forward and backward around the requested
1608 # data, in a fixed size window. This helps speed up operations
1609 # involving reading the revlog backwards.
1610 cachesize = self._chunkcachesize
1611 realoffset = offset & ~(cachesize - 1)
1612 reallength = (
1613 (offset + length + cachesize) & ~(cachesize - 1)
1614 ) - realoffset
1615 with self._datareadfp(df) as df:
1616 df.seek(realoffset)
1617 d = df.read(reallength)
1618
1619 self._cachesegment(realoffset, d)
1620 if offset != realoffset or reallength != length:
1621 startoffset = offset - realoffset
1622 if len(d) - startoffset < length:
1623 raise error.RevlogError(
1624 _(
1625 b'partial read of revlog %s; expected %d bytes from '
1626 b'offset %d, got %d'
1627 )
1628 % (
1629 self.indexfile if self._inline else self.datafile,
1630 length,
1631 realoffset,
1632 len(d) - startoffset,
1633 )
1634 )
1635
1636 return util.buffer(d, startoffset, length)
1637
1638 if len(d) < length:
1639 raise error.RevlogError(
1640 _(
1641 b'partial read of revlog %s; expected %d bytes from offset '
1642 b'%d, got %d'
1643 )
1644 % (
1645 self.indexfile if self._inline else self.datafile,
1646 length,
1647 offset,
1648 len(d),
1649 )
1650 )
1651
1652 return d
1653
1654 def _getsegment(self, offset, length, df=None):
1655 """Obtain a segment of raw data from the revlog.
1656
1657 Accepts an absolute offset, length of bytes to obtain, and an
1658 optional file handle to the already-opened revlog. If the file
1659 handle is used, it's original seek position will not be preserved.
1660
1661 Requests for data may be returned from a cache.
1662
1663 Returns a str or a buffer instance of raw byte data.
1664 """
1665 o, d = self._chunkcache
1666 l = len(d)
1667
1668 # is it in the cache?
1669 cachestart = offset - o
1670 cacheend = cachestart + length
1671 if cachestart >= 0 and cacheend <= l:
1672 if cachestart == 0 and cacheend == l:
1673 return d # avoid a copy
1674 return util.buffer(d, cachestart, cacheend - cachestart)
1675
1676 return self._readsegment(offset, length, df=df)
1677 1605
1678 def _getsegmentforrevs(self, startrev, endrev, df=None): 1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1679 """Obtain a segment of raw data corresponding to a range of revisions. 1607 """Obtain a segment of raw data corresponding to a range of revisions.
1680 1608
1681 Accepts the start and end revisions and an optional already-open 1609 Accepts the start and end revisions and an optional already-open
1705 if self._inline: 1633 if self._inline:
1706 start += (startrev + 1) * self.index.entry_size 1634 start += (startrev + 1) * self.index.entry_size
1707 end += (endrev + 1) * self.index.entry_size 1635 end += (endrev + 1) * self.index.entry_size
1708 length = end - start 1636 length = end - start
1709 1637
1710 return start, self._getsegment(start, length, df=df) 1638 return start, self._segmentfile.read_chunk(start, length, df)
1711 1639
1712 def _chunk(self, rev, df=None): 1640 def _chunk(self, rev, df=None):
1713 """Obtain a single decompressed chunk for a revision. 1641 """Obtain a single decompressed chunk for a revision.
1714 1642
1715 Accepts an integer revision and an optional already-open file handle 1643 Accepts an integer revision and an optional already-open file handle
1716 to be used for reading. If used, the seek position of the file will not 1644 to be used for reading. If used, the seek position of the file will not
1717 be preserved. 1645 be preserved.
1718 1646
1719 Returns a str holding uncompressed data for the requested revision. 1647 Returns a str holding uncompressed data for the requested revision.
1720 """ 1648 """
1721 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) 1649 compression_mode = self.index[rev][10]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651 if compression_mode == COMP_MODE_PLAIN:
1652 return data
1653 elif compression_mode == COMP_MODE_DEFAULT:
1654 return self._decompressor(data)
1655 elif compression_mode == COMP_MODE_INLINE:
1656 return self.decompress(data)
1657 else:
1658 msg = b'unknown compression mode %d'
1659 msg %= compression_mode
1660 raise error.RevlogError(msg)
1722 1661
1723 def _chunks(self, revs, df=None, targetsize=None): 1662 def _chunks(self, revs, df=None, targetsize=None):
1724 """Obtain decompressed chunks for the specified revisions. 1663 """Obtain decompressed chunks for the specified revisions.
1725 1664
1726 Accepts an iterable of numeric revisions that are assumed to be in 1665 Accepts an iterable of numeric revisions that are assumed to be in
1764 # issue4215 - we can't cache a run of chunks greater than 1703 # issue4215 - we can't cache a run of chunks greater than
1765 # 2G on Windows 1704 # 2G on Windows
1766 return [self._chunk(rev, df=df) for rev in revschunk] 1705 return [self._chunk(rev, df=df) for rev in revschunk]
1767 1706
1768 decomp = self.decompress 1707 decomp = self.decompress
1708 # self._decompressor might be None, but will not be used in that case
1709 def_decomp = self._decompressor
1769 for rev in revschunk: 1710 for rev in revschunk:
1770 chunkstart = start(rev) 1711 chunkstart = start(rev)
1771 if inline: 1712 if inline:
1772 chunkstart += (rev + 1) * iosize 1713 chunkstart += (rev + 1) * iosize
1773 chunklength = length(rev) 1714 chunklength = length(rev)
1774 ladd(decomp(buffer(data, chunkstart - offset, chunklength))) 1715 comp_mode = self.index[rev][10]
1716 c = buffer(data, chunkstart - offset, chunklength)
1717 if comp_mode == COMP_MODE_PLAIN:
1718 ladd(c)
1719 elif comp_mode == COMP_MODE_INLINE:
1720 ladd(decomp(c))
1721 elif comp_mode == COMP_MODE_DEFAULT:
1722 ladd(def_decomp(c))
1723 else:
1724 msg = b'unknown compression mode %d'
1725 msg %= comp_mode
1726 raise error.RevlogError(msg)
1775 1727
1776 return l 1728 return l
1777
1778 def _chunkclear(self):
1779 """Clear the raw chunk cache."""
1780 self._chunkcache = (0, b'')
1781 1729
1782 def deltaparent(self, rev): 1730 def deltaparent(self, rev):
1783 """return deltaparent of the given revision""" 1731 """return deltaparent of the given revision"""
1784 base = self.index[rev][3] 1732 base = self.index[rev][3]
1785 if base == rev: 1733 if base == rev:
1852 msg = ( 1800 msg = (
1853 b'revlog.revision(..., raw=True) is deprecated, ' 1801 b'revlog.revision(..., raw=True) is deprecated, '
1854 b'use revlog.rawdata(...)' 1802 b'use revlog.rawdata(...)'
1855 ) 1803 )
1856 util.nouideprecwarn(msg, b'5.2', stacklevel=2) 1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1857 return self._revisiondata(nodeorrev, _df, raw=raw)[0] 1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1858 1806
1859 def sidedata(self, nodeorrev, _df=None): 1807 def sidedata(self, nodeorrev, _df=None):
1860 """a map of extra data related to the changeset but not part of the hash 1808 """a map of extra data related to the changeset but not part of the hash
1861 1809
1862 This function currently return a dictionary. However, more advanced 1810 This function currently return a dictionary. However, more advanced
1863 mapping object will likely be used in the future for a more 1811 mapping object will likely be used in the future for a more
1864 efficient/lazy code. 1812 efficient/lazy code.
1865 """ 1813 """
1866 return self._revisiondata(nodeorrev, _df)[1] 1814 # deal with <nodeorrev> argument type
1815 if isinstance(nodeorrev, int):
1816 rev = nodeorrev
1817 else:
1818 rev = self.rev(nodeorrev)
1819 return self._sidedata(rev)
1867 1820
1868 def _revisiondata(self, nodeorrev, _df=None, raw=False): 1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1869 # deal with <nodeorrev> argument type 1822 # deal with <nodeorrev> argument type
1870 if isinstance(nodeorrev, int): 1823 if isinstance(nodeorrev, int):
1871 rev = nodeorrev 1824 rev = nodeorrev
1873 else: 1826 else:
1874 node = nodeorrev 1827 node = nodeorrev
1875 rev = None 1828 rev = None
1876 1829
1877 # fast path the special `nullid` rev 1830 # fast path the special `nullid` rev
1878 if node == nullid: 1831 if node == self.nullid:
1879 return b"", {} 1832 return b""
1880 1833
1881 # ``rawtext`` is the text as stored inside the revlog. Might be the 1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1882 # revision or might need to be processed to retrieve the revision. 1835 # revision or might need to be processed to retrieve the revision.
1883 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) 1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1884 1837
1885 if self.version & 0xFFFF == REVLOGV2:
1886 if rev is None:
1887 rev = self.rev(node)
1888 sidedata = self._sidedata(rev)
1889 else:
1890 sidedata = {}
1891
1892 if raw and validated: 1838 if raw and validated:
1893 # if we don't want to process the raw text and that raw 1839 # if we don't want to process the raw text and that raw
1894 # text is cached, we can exit early. 1840 # text is cached, we can exit early.
1895 return rawtext, sidedata 1841 return rawtext
1896 if rev is None: 1842 if rev is None:
1897 rev = self.rev(node) 1843 rev = self.rev(node)
1898 # the revlog's flag for this revision 1844 # the revlog's flag for this revision
1899 # (usually alter its state or content) 1845 # (usually alter its state or content)
1900 flags = self.flags(rev) 1846 flags = self.flags(rev)
1901 1847
1902 if validated and flags == REVIDX_DEFAULT_FLAGS: 1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1903 # no extra flags set, no flag processor runs, text = rawtext 1849 # no extra flags set, no flag processor runs, text = rawtext
1904 return rawtext, sidedata 1850 return rawtext
1905 1851
1906 if raw: 1852 if raw:
1907 validatehash = flagutil.processflagsraw(self, rawtext, flags) 1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1908 text = rawtext 1854 text = rawtext
1909 else: 1855 else:
1912 if validatehash: 1858 if validatehash:
1913 self.checkhash(text, node, rev=rev) 1859 self.checkhash(text, node, rev=rev)
1914 if not validated: 1860 if not validated:
1915 self._revisioncache = (node, rev, rawtext) 1861 self._revisioncache = (node, rev, rawtext)
1916 1862
1917 return text, sidedata 1863 return text
1918 1864
1919 def _rawtext(self, node, rev, _df=None): 1865 def _rawtext(self, node, rev, _df=None):
1920 """return the possibly unvalidated rawtext for a revision 1866 """return the possibly unvalidated rawtext for a revision
1921 1867
1922 returns (rev, rawtext, validated) 1868 returns (rev, rawtext, validated)
1968 if self._inline: 1914 if self._inline:
1969 sidedata_offset += self.index.entry_size * (1 + rev) 1915 sidedata_offset += self.index.entry_size * (1 + rev)
1970 if sidedata_size == 0: 1916 if sidedata_size == 0:
1971 return {} 1917 return {}
1972 1918
1973 segment = self._getsegment(sidedata_offset, sidedata_size) 1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 filename = self._sidedatafile
1921 end = self._docket.sidedata_end
1922 offset = sidedata_offset
1923 length = sidedata_size
1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 raise error.RevlogError(m)
1926
1927 comp_segment = self._segmentfile_sidedata.read_chunk(
1928 sidedata_offset, sidedata_size
1929 )
1930
1931 comp = self.index[rev][11]
1932 if comp == COMP_MODE_PLAIN:
1933 segment = comp_segment
1934 elif comp == COMP_MODE_DEFAULT:
1935 segment = self._decompressor(comp_segment)
1936 elif comp == COMP_MODE_INLINE:
1937 segment = self.decompress(comp_segment)
1938 else:
1939 msg = b'unknown compression mode %d'
1940 msg %= comp
1941 raise error.RevlogError(msg)
1942
1974 sidedata = sidedatautil.deserialize_sidedata(segment) 1943 sidedata = sidedatautil.deserialize_sidedata(segment)
1975 return sidedata 1944 return sidedata
1976 1945
1977 def rawdata(self, nodeorrev, _df=None): 1946 def rawdata(self, nodeorrev, _df=None):
1978 """return an uncompressed raw data of a given node or revision number. 1947 """return an uncompressed raw data of a given node or revision number.
1979 1948
1980 _df - an existing file handle to read from. (internal-only) 1949 _df - an existing file handle to read from. (internal-only)
1981 """ 1950 """
1982 return self._revisiondata(nodeorrev, _df, raw=True)[0] 1951 return self._revisiondata(nodeorrev, _df, raw=True)
1983 1952
1984 def hash(self, text, p1, p2): 1953 def hash(self, text, p1, p2):
1985 """Compute a node hash. 1954 """Compute a node hash.
1986 1955
1987 Available as a function so that subclasses can replace the hash 1956 Available as a function so that subclasses can replace the hash
2011 revornode = rev 1980 revornode = rev
2012 if revornode is None: 1981 if revornode is None:
2013 revornode = templatefilters.short(hex(node)) 1982 revornode = templatefilters.short(hex(node))
2014 raise error.RevlogError( 1983 raise error.RevlogError(
2015 _(b"integrity check failed on %s:%s") 1984 _(b"integrity check failed on %s:%s")
2016 % (self.indexfile, pycompat.bytestr(revornode)) 1985 % (self.display_id, pycompat.bytestr(revornode))
2017 ) 1986 )
2018 except error.RevlogError: 1987 except error.RevlogError:
2019 if self._censorable and storageutil.iscensoredtext(text): 1988 if self._censorable and storageutil.iscensoredtext(text):
2020 raise error.CensoredNodeError(self.indexfile, node, text) 1989 raise error.CensoredNodeError(self.display_id, node, text)
2021 raise 1990 raise
2022 1991
2023 def _enforceinlinesize(self, tr, fp=None): 1992 def _enforceinlinesize(self, tr):
2024 """Check if the revlog is too big for inline and convert if so. 1993 """Check if the revlog is too big for inline and convert if so.
2025 1994
2026 This should be called after revisions are added to the revlog. If the 1995 This should be called after revisions are added to the revlog. If the
2027 revlog has grown too large to be an inline revlog, it will convert it 1996 revlog has grown too large to be an inline revlog, it will convert it
2028 to use multiple index and data files. 1997 to use multiple index and data files.
2029 """ 1998 """
2030 tiprev = len(self) - 1 1999 tiprev = len(self) - 1
2031 if ( 2000 total_size = self.start(tiprev) + self.length(tiprev)
2032 not self._inline 2001 if not self._inline or total_size < _maxinline:
2033 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2034 ):
2035 return 2002 return
2036 2003
2037 troffset = tr.findoffset(self.indexfile) 2004 troffset = tr.findoffset(self._indexfile)
2038 if troffset is None: 2005 if troffset is None:
2039 raise error.RevlogError( 2006 raise error.RevlogError(
2040 _(b"%s not found in the transaction") % self.indexfile 2007 _(b"%s not found in the transaction") % self._indexfile
2041 ) 2008 )
2042 trindex = 0 2009 trindex = 0
2043 tr.add(self.datafile, 0) 2010 tr.add(self._datafile, 0)
2044 2011
2045 if fp: 2012 existing_handles = False
2013 if self._writinghandles is not None:
2014 existing_handles = True
2015 fp = self._writinghandles[0]
2046 fp.flush() 2016 fp.flush()
2047 fp.close() 2017 fp.close()
2048 # We can't use the cached file handle after close(). So prevent 2018 # We can't use the cached file handle after close(). So prevent
2049 # its usage. 2019 # its usage.
2050 self._writinghandles = None 2020 self._writinghandles = None
2051 2021 self._segmentfile.writing_handle = None
2052 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh: 2022 # No need to deal with sidedata writing handle as it is only
2053 for r in self: 2023 # relevant with revlog-v2 which is never inline, not reaching
2054 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1]) 2024 # this code
2055 if troffset <= self.start(r): 2025
2056 trindex = r 2026 new_dfh = self._datafp(b'w+')
2057 2027 new_dfh.truncate(0) # drop any potentially existing data
2058 with self._indexfp(b'w') as fp: 2028 try:
2059 self.version &= ~FLAG_INLINE_DATA 2029 with self._indexfp() as read_ifh:
2060 self._inline = False 2030 for r in self:
2061 io = self._io 2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2062 for i in self: 2032 if troffset <= self.start(r) + r * self.index.entry_size:
2063 e = io.packentry(self.index[i], self.node, self.version, i) 2033 trindex = r
2064 fp.write(e) 2034 new_dfh.flush()
2065 2035
2066 # the temp file replace the real index when we exit the context 2036 with self.__index_new_fp() as fp:
2067 # manager 2037 self._format_flags &= ~FLAG_INLINE_DATA
2068 2038 self._inline = False
2069 tr.replace(self.indexfile, trindex * self.index.entry_size) 2039 for i in self:
2070 nodemaputil.setup_persistent_nodemap(tr, self) 2040 e = self.index.entry_binary(i)
2071 self._chunkclear() 2041 if i == 0 and self._docket is None:
2042 header = self._format_flags | self._format_version
2043 header = self.index.pack_header(header)
2044 e = header + e
2045 fp.write(e)
2046 if self._docket is not None:
2047 self._docket.index_end = fp.tell()
2048
2049 # There is a small transactional race here. If the rename of
2050 # the index fails, we should remove the datafile. It is more
2051 # important to ensure that the data file is not truncated
2052 # when the index is replaced as otherwise data is lost.
2053 tr.replace(self._datafile, self.start(trindex))
2054
2055 # the temp file replace the real index when we exit the context
2056 # manager
2057
2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 nodemaputil.setup_persistent_nodemap(tr, self)
2060 self._segmentfile = randomaccessfile.randomaccessfile(
2061 self.opener,
2062 self._datafile,
2063 self._chunkcachesize,
2064 )
2065
2066 if existing_handles:
2067 # switched from inline to conventional reopen the index
2068 ifh = self.__index_write_fp()
2069 self._writinghandles = (ifh, new_dfh, None)
2070 self._segmentfile.writing_handle = new_dfh
2071 new_dfh = None
2072 # No need to deal with sidedata writing handle as it is only
2073 # relevant with revlog-v2 which is never inline, not reaching
2074 # this code
2075 finally:
2076 if new_dfh is not None:
2077 new_dfh.close()
2072 2078
2073 def _nodeduplicatecallback(self, transaction, node): 2079 def _nodeduplicatecallback(self, transaction, node):
2074 """called when trying to add a node already stored.""" 2080 """called when trying to add a node already stored."""
2081
2082 @contextlib.contextmanager
2083 def reading(self):
2084 """Context manager that keeps data and sidedata files open for reading"""
2085 with self._segmentfile.reading():
2086 with self._segmentfile_sidedata.reading():
2087 yield
2088
2089 @contextlib.contextmanager
2090 def _writing(self, transaction):
2091 if self._trypending:
2092 msg = b'try to write in a `trypending` revlog: %s'
2093 msg %= self.display_id
2094 raise error.ProgrammingError(msg)
2095 if self._writinghandles is not None:
2096 yield
2097 else:
2098 ifh = dfh = sdfh = None
2099 try:
2100 r = len(self)
2101 # opening the data file.
2102 dsize = 0
2103 if r:
2104 dsize = self.end(r - 1)
2105 dfh = None
2106 if not self._inline:
2107 try:
2108 dfh = self._datafp(b"r+")
2109 if self._docket is None:
2110 dfh.seek(0, os.SEEK_END)
2111 else:
2112 dfh.seek(self._docket.data_end, os.SEEK_SET)
2113 except IOError as inst:
2114 if inst.errno != errno.ENOENT:
2115 raise
2116 dfh = self._datafp(b"w+")
2117 transaction.add(self._datafile, dsize)
2118 if self._sidedatafile is not None:
2119 try:
2120 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2121 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2122 except IOError as inst:
2123 if inst.errno != errno.ENOENT:
2124 raise
2125 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2126 transaction.add(
2127 self._sidedatafile, self._docket.sidedata_end
2128 )
2129
2130 # opening the index file.
2131 isize = r * self.index.entry_size
2132 ifh = self.__index_write_fp()
2133 if self._inline:
2134 transaction.add(self._indexfile, dsize + isize)
2135 else:
2136 transaction.add(self._indexfile, isize)
2137 # exposing all file handle for writing.
2138 self._writinghandles = (ifh, dfh, sdfh)
2139 self._segmentfile.writing_handle = ifh if self._inline else dfh
2140 self._segmentfile_sidedata.writing_handle = sdfh
2141 yield
2142 if self._docket is not None:
2143 self._write_docket(transaction)
2144 finally:
2145 self._writinghandles = None
2146 self._segmentfile.writing_handle = None
2147 self._segmentfile_sidedata.writing_handle = None
2148 if dfh is not None:
2149 dfh.close()
2150 if sdfh is not None:
2151 sdfh.close()
2152 # closing the index file last to avoid exposing referent to
2153 # potential unflushed data content.
2154 if ifh is not None:
2155 ifh.close()
2156
2157 def _write_docket(self, transaction):
2158 """write the current docket on disk
2159
2160 Exist as a method to help changelog to implement transaction logic
2161
2162 We could also imagine using the same transaction logic for all revlog
2163 since docket are cheap."""
2164 self._docket.write(transaction)
2075 2165
2076 def addrevision( 2166 def addrevision(
2077 self, 2167 self,
2078 text, 2168 text,
2079 transaction, 2169 transaction,
2100 deltacomputer - an optional deltacomputer instance shared between 2190 deltacomputer - an optional deltacomputer instance shared between
2101 multiple calls 2191 multiple calls
2102 """ 2192 """
2103 if link == nullrev: 2193 if link == nullrev:
2104 raise error.RevlogError( 2194 raise error.RevlogError(
2105 _(b"attempted to add linkrev -1 to %s") % self.indexfile 2195 _(b"attempted to add linkrev -1 to %s") % self.display_id
2106 ) 2196 )
2107 2197
2108 if sidedata is None: 2198 if sidedata is None:
2109 sidedata = {} 2199 sidedata = {}
2110 elif not self.hassidedata: 2200 elif sidedata and not self.hassidedata:
2111 raise error.ProgrammingError( 2201 raise error.ProgrammingError(
2112 _(b"trying to add sidedata to a revlog who don't support them") 2202 _(b"trying to add sidedata to a revlog who don't support them")
2113 ) 2203 )
2114 2204
2115 if flags: 2205 if flags:
2125 if len(rawtext) > _maxentrysize: 2215 if len(rawtext) > _maxentrysize:
2126 raise error.RevlogError( 2216 raise error.RevlogError(
2127 _( 2217 _(
2128 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB" 2218 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2129 ) 2219 )
2130 % (self.indexfile, len(rawtext)) 2220 % (self.display_id, len(rawtext))
2131 ) 2221 )
2132 2222
2133 node = node or self.hash(rawtext, p1, p2) 2223 node = node or self.hash(rawtext, p1, p2)
2134 rev = self.index.get_rev(node) 2224 rev = self.index.get_rev(node)
2135 if rev is not None: 2225 if rev is not None:
2166 ): 2256 ):
2167 """add a raw revision with known flags, node and parents 2257 """add a raw revision with known flags, node and parents
2168 useful when reusing a revision not stored in this revlog (ex: received 2258 useful when reusing a revision not stored in this revlog (ex: received
2169 over wire, or read from an external bundle). 2259 over wire, or read from an external bundle).
2170 """ 2260 """
2171 dfh = None 2261 with self._writing(transaction):
2172 if not self._inline:
2173 dfh = self._datafp(b"a+")
2174 ifh = self._indexfp(b"a+")
2175 try:
2176 return self._addrevision( 2262 return self._addrevision(
2177 node, 2263 node,
2178 rawtext, 2264 rawtext,
2179 transaction, 2265 transaction,
2180 link, 2266 link,
2181 p1, 2267 p1,
2182 p2, 2268 p2,
2183 flags, 2269 flags,
2184 cachedelta, 2270 cachedelta,
2185 ifh,
2186 dfh,
2187 deltacomputer=deltacomputer, 2271 deltacomputer=deltacomputer,
2188 sidedata=sidedata, 2272 sidedata=sidedata,
2189 ) 2273 )
2190 finally:
2191 if dfh:
2192 dfh.close()
2193 ifh.close()
2194 2274
2195 def compress(self, data): 2275 def compress(self, data):
2196 """Generate a possibly-compressed representation of data.""" 2276 """Generate a possibly-compressed representation of data."""
2197 if not data: 2277 if not data:
2198 return b'', data 2278 return b'', data
2251 elif t == b'\0': 2331 elif t == b'\0':
2252 return data 2332 return data
2253 elif t == b'u': 2333 elif t == b'u':
2254 return util.buffer(data, 1) 2334 return util.buffer(data, 1)
2255 2335
2256 try: 2336 compressor = self._get_decompressor(t)
2257 compressor = self._decompressors[t]
2258 except KeyError:
2259 try:
2260 engine = util.compengines.forrevlogheader(t)
2261 compressor = engine.revlogcompressor(self._compengineopts)
2262 self._decompressors[t] = compressor
2263 except KeyError:
2264 raise error.RevlogError(
2265 _(b'unknown compression type %s') % binascii.hexlify(t)
2266 )
2267 2337
2268 return compressor.decompress(data) 2338 return compressor.decompress(data)
2269 2339
2270 def _addrevision( 2340 def _addrevision(
2271 self, 2341 self,
2275 link, 2345 link,
2276 p1, 2346 p1,
2277 p2, 2347 p2,
2278 flags, 2348 flags,
2279 cachedelta, 2349 cachedelta,
2280 ifh,
2281 dfh,
2282 alwayscache=False, 2350 alwayscache=False,
2283 deltacomputer=None, 2351 deltacomputer=None,
2284 sidedata=None, 2352 sidedata=None,
2285 ): 2353 ):
2286 """internal function to add revisions to the log 2354 """internal function to add revisions to the log
2294 2362
2295 invariants: 2363 invariants:
2296 - rawtext is optional (can be None); if not set, cachedelta must be set. 2364 - rawtext is optional (can be None); if not set, cachedelta must be set.
2297 if both are set, they must correspond to each other. 2365 if both are set, they must correspond to each other.
2298 """ 2366 """
2299 if node == nullid: 2367 if node == self.nullid:
2300 raise error.RevlogError( 2368 raise error.RevlogError(
2301 _(b"%s: attempt to add null revision") % self.indexfile 2369 _(b"%s: attempt to add null revision") % self.display_id
2302 ) 2370 )
2303 if node == wdirid or node in wdirfilenodeids: 2371 if (
2372 node == self.nodeconstants.wdirid
2373 or node in self.nodeconstants.wdirfilenodeids
2374 ):
2304 raise error.RevlogError( 2375 raise error.RevlogError(
2305 _(b"%s: attempt to add wdir revision") % self.indexfile 2376 _(b"%s: attempt to add wdir revision") % self.display_id
2306 ) 2377 )
2378 if self._writinghandles is None:
2379 msg = b'adding revision outside `revlog._writing` context'
2380 raise error.ProgrammingError(msg)
2307 2381
2308 if self._inline: 2382 if self._inline:
2309 fh = ifh 2383 fh = self._writinghandles[0]
2310 else: 2384 else:
2311 fh = dfh 2385 fh = self._writinghandles[1]
2312 2386
2313 btext = [rawtext] 2387 btext = [rawtext]
2314 2388
2315 curr = len(self) 2389 curr = len(self)
2316 prev = curr - 1 2390 prev = curr - 1
2317 2391
2318 offset = self._get_data_offset(prev) 2392 offset = self._get_data_offset(prev)
2319 2393
2320 if self._concurrencychecker: 2394 if self._concurrencychecker:
2395 ifh, dfh, sdfh = self._writinghandles
2396 # XXX no checking for the sidedata file
2321 if self._inline: 2397 if self._inline:
2322 # offset is "as if" it were in the .d file, so we need to add on 2398 # offset is "as if" it were in the .d file, so we need to add on
2323 # the size of the entry metadata. 2399 # the size of the entry metadata.
2324 self._concurrencychecker( 2400 self._concurrencychecker(
2325 ifh, self.indexfile, offset + curr * self.index.entry_size 2401 ifh, self._indexfile, offset + curr * self.index.entry_size
2326 ) 2402 )
2327 else: 2403 else:
2328 # Entries in the .i are a consistent size. 2404 # Entries in the .i are a consistent size.
2329 self._concurrencychecker( 2405 self._concurrencychecker(
2330 ifh, self.indexfile, curr * self.index.entry_size 2406 ifh, self._indexfile, curr * self.index.entry_size
2331 ) 2407 )
2332 self._concurrencychecker(dfh, self.datafile, offset) 2408 self._concurrencychecker(dfh, self._datafile, offset)
2333 2409
2334 p1r, p2r = self.rev(p1), self.rev(p2) 2410 p1r, p2r = self.rev(p1), self.rev(p2)
2335 2411
2336 # full versions are inserted when the needed deltas 2412 # full versions are inserted when the needed deltas
2337 # become comparable to the uncompressed text 2413 # become comparable to the uncompressed text
2346 textlen = len(rawtext) 2422 textlen = len(rawtext)
2347 2423
2348 if deltacomputer is None: 2424 if deltacomputer is None:
2349 deltacomputer = deltautil.deltacomputer(self) 2425 deltacomputer = deltautil.deltacomputer(self)
2350 2426
2351 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) 2427 revinfo = revlogutils.revisioninfo(
2428 node,
2429 p1,
2430 p2,
2431 btext,
2432 textlen,
2433 cachedelta,
2434 flags,
2435 )
2352 2436
2353 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) 2437 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2354 2438
2355 if sidedata: 2439 compression_mode = COMP_MODE_INLINE
2440 if self._docket is not None:
2441 default_comp = self._docket.default_compression_header
2442 r = deltautil.delta_compression(default_comp, deltainfo)
2443 compression_mode, deltainfo = r
2444
2445 sidedata_compression_mode = COMP_MODE_INLINE
2446 if sidedata and self.hassidedata:
2447 sidedata_compression_mode = COMP_MODE_PLAIN
2356 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) 2448 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2357 sidedata_offset = offset + deltainfo.deltalen 2449 sidedata_offset = self._docket.sidedata_end
2450 h, comp_sidedata = self.compress(serialized_sidedata)
2451 if (
2452 h != b'u'
2453 and comp_sidedata[0:1] != b'\0'
2454 and len(comp_sidedata) < len(serialized_sidedata)
2455 ):
2456 assert not h
2457 if (
2458 comp_sidedata[0:1]
2459 == self._docket.default_compression_header
2460 ):
2461 sidedata_compression_mode = COMP_MODE_DEFAULT
2462 serialized_sidedata = comp_sidedata
2463 else:
2464 sidedata_compression_mode = COMP_MODE_INLINE
2465 serialized_sidedata = comp_sidedata
2358 else: 2466 else:
2359 serialized_sidedata = b"" 2467 serialized_sidedata = b""
2360 # Don't store the offset if the sidedata is empty, that way 2468 # Don't store the offset if the sidedata is empty, that way
2361 # we can easily detect empty sidedata and they will be no different 2469 # we can easily detect empty sidedata and they will be no different
2362 # than ones we manually add. 2470 # than ones we manually add.
2363 sidedata_offset = 0 2471 sidedata_offset = 0
2364 2472
2365 e = ( 2473 e = revlogutils.entry(
2366 offset_type(offset, flags), 2474 flags=flags,
2367 deltainfo.deltalen, 2475 data_offset=offset,
2368 textlen, 2476 data_compressed_length=deltainfo.deltalen,
2369 deltainfo.base, 2477 data_uncompressed_length=textlen,
2370 link, 2478 data_compression_mode=compression_mode,
2371 p1r, 2479 data_delta_base=deltainfo.base,
2372 p2r, 2480 link_rev=link,
2373 node, 2481 parent_rev_1=p1r,
2374 sidedata_offset, 2482 parent_rev_2=p2r,
2375 len(serialized_sidedata), 2483 node_id=node,
2484 sidedata_offset=sidedata_offset,
2485 sidedata_compressed_length=len(serialized_sidedata),
2486 sidedata_compression_mode=sidedata_compression_mode,
2376 ) 2487 )
2377 2488
2378 if self.version & 0xFFFF != REVLOGV2:
2379 e = e[:8]
2380
2381 self.index.append(e) 2489 self.index.append(e)
2382 entry = self._io.packentry(e, self.node, self.version, curr) 2490 entry = self.index.entry_binary(curr)
2491 if curr == 0 and self._docket is None:
2492 header = self._format_flags | self._format_version
2493 header = self.index.pack_header(header)
2494 entry = header + entry
2383 self._writeentry( 2495 self._writeentry(
2384 transaction, 2496 transaction,
2385 ifh,
2386 dfh,
2387 entry, 2497 entry,
2388 deltainfo.data, 2498 deltainfo.data,
2389 link, 2499 link,
2390 offset, 2500 offset,
2391 serialized_sidedata, 2501 serialized_sidedata,
2502 sidedata_offset,
2392 ) 2503 )
2393 2504
2394 rawtext = btext[0] 2505 rawtext = btext[0]
2395 2506
2396 if alwayscache and rawtext is None: 2507 if alwayscache and rawtext is None:
2408 end of the data file within a transaction, you can have cases where, for 2519 end of the data file within a transaction, you can have cases where, for
2409 example, rev `n` does not have sidedata while rev `n - 1` does, leading 2520 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2410 to `n - 1`'s sidedata being written after `n`'s data. 2521 to `n - 1`'s sidedata being written after `n`'s data.
2411 2522
2412 TODO cache this in a docket file before getting out of experimental.""" 2523 TODO cache this in a docket file before getting out of experimental."""
2413 if self.version & 0xFFFF != REVLOGV2: 2524 if self._docket is None:
2414 return self.end(prev) 2525 return self.end(prev)
2415 2526 else:
2416 offset = 0 2527 return self._docket.data_end
2417 for rev, entry in enumerate(self.index):
2418 sidedata_end = entry[8] + entry[9]
2419 # Sidedata for a previous rev has potentially been written after
2420 # this rev's end, so take the max.
2421 offset = max(self.end(rev), offset, sidedata_end)
2422 return offset
2423 2528
2424 def _writeentry( 2529 def _writeentry(
2425 self, transaction, ifh, dfh, entry, data, link, offset, sidedata 2530 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2426 ): 2531 ):
2427 # Files opened in a+ mode have inconsistent behavior on various 2532 # Files opened in a+ mode have inconsistent behavior on various
2428 # platforms. Windows requires that a file positioning call be made 2533 # platforms. Windows requires that a file positioning call be made
2429 # when the file handle transitions between reads and writes. See 2534 # when the file handle transitions between reads and writes. See
2430 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other 2535 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2434 # 2539 #
2435 # We work around this issue by inserting a seek() before writing. 2540 # We work around this issue by inserting a seek() before writing.
2436 # Note: This is likely not necessary on Python 3. However, because 2541 # Note: This is likely not necessary on Python 3. However, because
2437 # the file handle is reused for reads and may be seeked there, we need 2542 # the file handle is reused for reads and may be seeked there, we need
2438 # to be careful before changing this. 2543 # to be careful before changing this.
2439 ifh.seek(0, os.SEEK_END) 2544 if self._writinghandles is None:
2545 msg = b'adding revision outside `revlog._writing` context'
2546 raise error.ProgrammingError(msg)
2547 ifh, dfh, sdfh = self._writinghandles
2548 if self._docket is None:
2549 ifh.seek(0, os.SEEK_END)
2550 else:
2551 ifh.seek(self._docket.index_end, os.SEEK_SET)
2440 if dfh: 2552 if dfh:
2441 dfh.seek(0, os.SEEK_END) 2553 if self._docket is None:
2554 dfh.seek(0, os.SEEK_END)
2555 else:
2556 dfh.seek(self._docket.data_end, os.SEEK_SET)
2557 if sdfh:
2558 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2442 2559
2443 curr = len(self) - 1 2560 curr = len(self) - 1
2444 if not self._inline: 2561 if not self._inline:
2445 transaction.add(self.datafile, offset) 2562 transaction.add(self._datafile, offset)
2446 transaction.add(self.indexfile, curr * len(entry)) 2563 if self._sidedatafile:
2564 transaction.add(self._sidedatafile, sidedata_offset)
2565 transaction.add(self._indexfile, curr * len(entry))
2447 if data[0]: 2566 if data[0]:
2448 dfh.write(data[0]) 2567 dfh.write(data[0])
2449 dfh.write(data[1]) 2568 dfh.write(data[1])
2450 if sidedata: 2569 if sidedata:
2451 dfh.write(sidedata) 2570 sdfh.write(sidedata)
2452 ifh.write(entry) 2571 ifh.write(entry)
2453 else: 2572 else:
2454 offset += curr * self.index.entry_size 2573 offset += curr * self.index.entry_size
2455 transaction.add(self.indexfile, offset) 2574 transaction.add(self._indexfile, offset)
2456 ifh.write(entry) 2575 ifh.write(entry)
2457 ifh.write(data[0]) 2576 ifh.write(data[0])
2458 ifh.write(data[1]) 2577 ifh.write(data[1])
2459 if sidedata: 2578 assert not sidedata
2460 ifh.write(sidedata) 2579 self._enforceinlinesize(transaction)
2461 self._enforceinlinesize(transaction, ifh) 2580 if self._docket is not None:
2581 self._docket.index_end = self._writinghandles[0].tell()
2582 self._docket.data_end = self._writinghandles[1].tell()
2583 self._docket.sidedata_end = self._writinghandles[2].tell()
2584
2462 nodemaputil.setup_persistent_nodemap(transaction, self) 2585 nodemaputil.setup_persistent_nodemap(transaction, self)
2463 2586
2464 def addgroup( 2587 def addgroup(
2465 self, 2588 self,
2466 deltas, 2589 deltas,
2479 2602
2480 If ``addrevisioncb`` is defined, it will be called with arguments of 2603 If ``addrevisioncb`` is defined, it will be called with arguments of
2481 this revlog and the node that was added. 2604 this revlog and the node that was added.
2482 """ 2605 """
2483 2606
2484 if self._writinghandles: 2607 if self._adding_group:
2485 raise error.ProgrammingError(b'cannot nest addgroup() calls') 2608 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2486 2609
2487 r = len(self) 2610 self._adding_group = True
2488 end = 0
2489 if r:
2490 end = self.end(r - 1)
2491 ifh = self._indexfp(b"a+")
2492 isize = r * self.index.entry_size
2493 if self._inline:
2494 transaction.add(self.indexfile, end + isize)
2495 dfh = None
2496 else:
2497 transaction.add(self.indexfile, isize)
2498 transaction.add(self.datafile, end)
2499 dfh = self._datafp(b"a+")
2500
2501 def flush():
2502 if dfh:
2503 dfh.flush()
2504 ifh.flush()
2505
2506 self._writinghandles = (ifh, dfh)
2507 empty = True 2611 empty = True
2508
2509 try: 2612 try:
2510 deltacomputer = deltautil.deltacomputer(self) 2613 with self._writing(transaction):
2511 # loop through our set of deltas 2614 deltacomputer = deltautil.deltacomputer(self)
2512 for data in deltas: 2615 # loop through our set of deltas
2513 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data 2616 for data in deltas:
2514 link = linkmapper(linknode) 2617 (
2515 flags = flags or REVIDX_DEFAULT_FLAGS 2618 node,
2516 2619 p1,
2517 rev = self.index.get_rev(node) 2620 p2,
2518 if rev is not None: 2621 linknode,
2519 # this can happen if two branches make the same change 2622 deltabase,
2520 self._nodeduplicatecallback(transaction, rev) 2623 delta,
2521 if duplicaterevisioncb: 2624 flags,
2522 duplicaterevisioncb(self, rev) 2625 sidedata,
2626 ) = data
2627 link = linkmapper(linknode)
2628 flags = flags or REVIDX_DEFAULT_FLAGS
2629
2630 rev = self.index.get_rev(node)
2631 if rev is not None:
2632 # this can happen if two branches make the same change
2633 self._nodeduplicatecallback(transaction, rev)
2634 if duplicaterevisioncb:
2635 duplicaterevisioncb(self, rev)
2636 empty = False
2637 continue
2638
2639 for p in (p1, p2):
2640 if not self.index.has_node(p):
2641 raise error.LookupError(
2642 p, self.radix, _(b'unknown parent')
2643 )
2644
2645 if not self.index.has_node(deltabase):
2646 raise error.LookupError(
2647 deltabase, self.display_id, _(b'unknown delta base')
2648 )
2649
2650 baserev = self.rev(deltabase)
2651
2652 if baserev != nullrev and self.iscensored(baserev):
2653 # if base is censored, delta must be full replacement in a
2654 # single patch operation
2655 hlen = struct.calcsize(b">lll")
2656 oldlen = self.rawsize(baserev)
2657 newlen = len(delta) - hlen
2658 if delta[:hlen] != mdiff.replacediffheader(
2659 oldlen, newlen
2660 ):
2661 raise error.CensoredBaseError(
2662 self.display_id, self.node(baserev)
2663 )
2664
2665 if not flags and self._peek_iscensored(baserev, delta):
2666 flags |= REVIDX_ISCENSORED
2667
2668 # We assume consumers of addrevisioncb will want to retrieve
2669 # the added revision, which will require a call to
2670 # revision(). revision() will fast path if there is a cache
2671 # hit. So, we tell _addrevision() to always cache in this case.
2672 # We're only using addgroup() in the context of changegroup
2673 # generation so the revision data can always be handled as raw
2674 # by the flagprocessor.
2675 rev = self._addrevision(
2676 node,
2677 None,
2678 transaction,
2679 link,
2680 p1,
2681 p2,
2682 flags,
2683 (baserev, delta),
2684 alwayscache=alwayscache,
2685 deltacomputer=deltacomputer,
2686 sidedata=sidedata,
2687 )
2688
2689 if addrevisioncb:
2690 addrevisioncb(self, rev)
2523 empty = False 2691 empty = False
2524 continue
2525
2526 for p in (p1, p2):
2527 if not self.index.has_node(p):
2528 raise error.LookupError(
2529 p, self.indexfile, _(b'unknown parent')
2530 )
2531
2532 if not self.index.has_node(deltabase):
2533 raise error.LookupError(
2534 deltabase, self.indexfile, _(b'unknown delta base')
2535 )
2536
2537 baserev = self.rev(deltabase)
2538
2539 if baserev != nullrev and self.iscensored(baserev):
2540 # if base is censored, delta must be full replacement in a
2541 # single patch operation
2542 hlen = struct.calcsize(b">lll")
2543 oldlen = self.rawsize(baserev)
2544 newlen = len(delta) - hlen
2545 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2546 raise error.CensoredBaseError(
2547 self.indexfile, self.node(baserev)
2548 )
2549
2550 if not flags and self._peek_iscensored(baserev, delta, flush):
2551 flags |= REVIDX_ISCENSORED
2552
2553 # We assume consumers of addrevisioncb will want to retrieve
2554 # the added revision, which will require a call to
2555 # revision(). revision() will fast path if there is a cache
2556 # hit. So, we tell _addrevision() to always cache in this case.
2557 # We're only using addgroup() in the context of changegroup
2558 # generation so the revision data can always be handled as raw
2559 # by the flagprocessor.
2560 rev = self._addrevision(
2561 node,
2562 None,
2563 transaction,
2564 link,
2565 p1,
2566 p2,
2567 flags,
2568 (baserev, delta),
2569 ifh,
2570 dfh,
2571 alwayscache=alwayscache,
2572 deltacomputer=deltacomputer,
2573 sidedata=sidedata,
2574 )
2575
2576 if addrevisioncb:
2577 addrevisioncb(self, rev)
2578 empty = False
2579
2580 if not dfh and not self._inline:
2581 # addrevision switched from inline to conventional
2582 # reopen the index
2583 ifh.close()
2584 dfh = self._datafp(b"a+")
2585 ifh = self._indexfp(b"a+")
2586 self._writinghandles = (ifh, dfh)
2587 finally: 2692 finally:
2588 self._writinghandles = None 2693 self._adding_group = False
2589
2590 if dfh:
2591 dfh.close()
2592 ifh.close()
2593 return not empty 2694 return not empty
2594 2695
2595 def iscensored(self, rev): 2696 def iscensored(self, rev):
2596 """Check if a file revision is censored.""" 2697 """Check if a file revision is censored."""
2597 if not self._censorable: 2698 if not self._censorable:
2598 return False 2699 return False
2599 2700
2600 return self.flags(rev) & REVIDX_ISCENSORED 2701 return self.flags(rev) & REVIDX_ISCENSORED
2601 2702
2602 def _peek_iscensored(self, baserev, delta, flush): 2703 def _peek_iscensored(self, baserev, delta):
2603 """Quickly check if a delta produces a censored revision.""" 2704 """Quickly check if a delta produces a censored revision."""
2604 if not self._censorable: 2705 if not self._censorable:
2605 return False 2706 return False
2606 2707
2607 return storageutil.deltaiscensored(delta, baserev, self.rawsize) 2708 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2640 rev, _ = self.getstrippoint(minlink) 2741 rev, _ = self.getstrippoint(minlink)
2641 if rev == len(self): 2742 if rev == len(self):
2642 return 2743 return
2643 2744
2644 # first truncate the files on disk 2745 # first truncate the files on disk
2645 end = self.start(rev) 2746 data_end = self.start(rev)
2646 if not self._inline: 2747 if not self._inline:
2647 transaction.add(self.datafile, end) 2748 transaction.add(self._datafile, data_end)
2648 end = rev * self.index.entry_size 2749 end = rev * self.index.entry_size
2649 else: 2750 else:
2650 end += rev * self.index.entry_size 2751 end = data_end + (rev * self.index.entry_size)
2651 2752
2652 transaction.add(self.indexfile, end) 2753 if self._sidedatafile:
2754 sidedata_end = self.sidedata_cut_off(rev)
2755 transaction.add(self._sidedatafile, sidedata_end)
2756
2757 transaction.add(self._indexfile, end)
2758 if self._docket is not None:
2759 # XXX we could, leverage the docket while stripping. However it is
2760 # not powerfull enough at the time of this comment
2761 self._docket.index_end = end
2762 self._docket.data_end = data_end
2763 self._docket.sidedata_end = sidedata_end
2764 self._docket.write(transaction, stripping=True)
2653 2765
2654 # then reset internal state in memory to forget those revisions 2766 # then reset internal state in memory to forget those revisions
2655 self._revisioncache = None 2767 self._revisioncache = None
2656 self._chaininfocache = util.lrucachedict(500) 2768 self._chaininfocache = util.lrucachedict(500)
2657 self._chunkclear() 2769 self._segmentfile.clear_cache()
2770 self._segmentfile_sidedata.clear_cache()
2658 2771
2659 del self.index[rev:-1] 2772 del self.index[rev:-1]
2660 2773
2661 def checksize(self): 2774 def checksize(self):
2662 """Check size of index and data files 2775 """Check size of index and data files
2680 if inst.errno != errno.ENOENT: 2793 if inst.errno != errno.ENOENT:
2681 raise 2794 raise
2682 dd = 0 2795 dd = 0
2683 2796
2684 try: 2797 try:
2685 f = self.opener(self.indexfile) 2798 f = self.opener(self._indexfile)
2686 f.seek(0, io.SEEK_END) 2799 f.seek(0, io.SEEK_END)
2687 actual = f.tell() 2800 actual = f.tell()
2688 f.close() 2801 f.close()
2689 s = self.index.entry_size 2802 s = self.index.entry_size
2690 i = max(0, actual // s) 2803 i = max(0, actual // s)
2701 di = 0 2814 di = 0
2702 2815
2703 return (dd, di) 2816 return (dd, di)
2704 2817
2705 def files(self): 2818 def files(self):
2706 res = [self.indexfile] 2819 res = [self._indexfile]
2707 if not self._inline: 2820 if self._docket_file is None:
2708 res.append(self.datafile) 2821 if not self._inline:
2822 res.append(self._datafile)
2823 else:
2824 res.append(self._docket_file)
2825 res.extend(self._docket.old_index_filepaths(include_empty=False))
2826 if self._docket.data_end:
2827 res.append(self._datafile)
2828 res.extend(self._docket.old_data_filepaths(include_empty=False))
2829 if self._docket.sidedata_end:
2830 res.append(self._sidedatafile)
2831 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2709 return res 2832 return res
2710 2833
2711 def emitrevisions( 2834 def emitrevisions(
2712 self, 2835 self,
2713 nodes, 2836 nodes,
2760 tr, 2883 tr,
2761 destrevlog, 2884 destrevlog,
2762 addrevisioncb=None, 2885 addrevisioncb=None,
2763 deltareuse=DELTAREUSESAMEREVS, 2886 deltareuse=DELTAREUSESAMEREVS,
2764 forcedeltabothparents=None, 2887 forcedeltabothparents=None,
2765 sidedatacompanion=None, 2888 sidedata_helpers=None,
2766 ): 2889 ):
2767 """Copy this revlog to another, possibly with format changes. 2890 """Copy this revlog to another, possibly with format changes.
2768 2891
2769 The destination revlog will contain the same revisions and nodes. 2892 The destination revlog will contain the same revisions and nodes.
2770 However, it may not be bit-for-bit identical due to e.g. delta encoding 2893 However, it may not be bit-for-bit identical due to e.g. delta encoding
2803 2926
2804 In addition to the delta policy, the ``forcedeltabothparents`` 2927 In addition to the delta policy, the ``forcedeltabothparents``
2805 argument controls whether to force compute deltas against both parents 2928 argument controls whether to force compute deltas against both parents
2806 for merges. By default, the current default is used. 2929 for merges. By default, the current default is used.
2807 2930
2808 If not None, the `sidedatacompanion` is callable that accept two 2931 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2809 arguments: 2932 `sidedata_helpers`.
2810
2811 (srcrevlog, rev)
2812
2813 and return a quintet that control changes to sidedata content from the
2814 old revision to the new clone result:
2815
2816 (dropall, filterout, update, new_flags, dropped_flags)
2817
2818 * if `dropall` is True, all sidedata should be dropped
2819 * `filterout` is a set of sidedata keys that should be dropped
2820 * `update` is a mapping of additionnal/new key -> value
2821 * new_flags is a bitfields of new flags that the revision should get
2822 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2823 """ 2933 """
2824 if deltareuse not in self.DELTAREUSEALL: 2934 if deltareuse not in self.DELTAREUSEALL:
2825 raise ValueError( 2935 raise ValueError(
2826 _(b'value for deltareuse invalid: %s') % deltareuse 2936 _(b'value for deltareuse invalid: %s') % deltareuse
2827 ) 2937 )
2857 tr, 2967 tr,
2858 destrevlog, 2968 destrevlog,
2859 addrevisioncb, 2969 addrevisioncb,
2860 deltareuse, 2970 deltareuse,
2861 forcedeltabothparents, 2971 forcedeltabothparents,
2862 sidedatacompanion, 2972 sidedata_helpers,
2863 ) 2973 )
2864 2974
2865 finally: 2975 finally:
2866 destrevlog._lazydelta = oldlazydelta 2976 destrevlog._lazydelta = oldlazydelta
2867 destrevlog._lazydeltabase = oldlazydeltabase 2977 destrevlog._lazydeltabase = oldlazydeltabase
2872 tr, 2982 tr,
2873 destrevlog, 2983 destrevlog,
2874 addrevisioncb, 2984 addrevisioncb,
2875 deltareuse, 2985 deltareuse,
2876 forcedeltabothparents, 2986 forcedeltabothparents,
2877 sidedatacompanion, 2987 sidedata_helpers,
2878 ): 2988 ):
2879 """perform the core duty of `revlog.clone` after parameter processing""" 2989 """perform the core duty of `revlog.clone` after parameter processing"""
2880 deltacomputer = deltautil.deltacomputer(destrevlog) 2990 deltacomputer = deltautil.deltacomputer(destrevlog)
2881 index = self.index 2991 index = self.index
2882 for rev in self: 2992 for rev in self:
2888 linkrev = entry[4] 2998 linkrev = entry[4]
2889 p1 = index[entry[5]][7] 2999 p1 = index[entry[5]][7]
2890 p2 = index[entry[6]][7] 3000 p2 = index[entry[6]][7]
2891 node = entry[7] 3001 node = entry[7]
2892 3002
2893 sidedataactions = (False, [], {}, 0, 0)
2894 if sidedatacompanion is not None:
2895 sidedataactions = sidedatacompanion(self, rev)
2896
2897 # (Possibly) reuse the delta from the revlog if allowed and 3003 # (Possibly) reuse the delta from the revlog if allowed and
2898 # the revlog chunk is a delta. 3004 # the revlog chunk is a delta.
2899 cachedelta = None 3005 cachedelta = None
2900 rawtext = None 3006 rawtext = None
2901 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD: 3007 if deltareuse == self.DELTAREUSEFULLADD:
2902 dropall = sidedataactions[0] 3008 text = self._revisiondata(rev)
2903 filterout = sidedataactions[1] 3009 sidedata = self.sidedata(rev)
2904 update = sidedataactions[2] 3010
2905 new_flags = sidedataactions[3] 3011 if sidedata_helpers is not None:
2906 dropped_flags = sidedataactions[4] 3012 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2907 text, sidedata = self._revisiondata(rev) 3013 self, sidedata_helpers, sidedata, rev
2908 if dropall: 3014 )
2909 sidedata = {} 3015 flags = flags | new_flags[0] & ~new_flags[1]
2910 for key in filterout:
2911 sidedata.pop(key, None)
2912 sidedata.update(update)
2913 if not sidedata:
2914 sidedata = None
2915
2916 flags |= new_flags
2917 flags &= ~dropped_flags
2918 3016
2919 destrevlog.addrevision( 3017 destrevlog.addrevision(
2920 text, 3018 text,
2921 tr, 3019 tr,
2922 linkrev, 3020 linkrev,
2932 if destrevlog._lazydelta: 3030 if destrevlog._lazydelta:
2933 dp = self.deltaparent(rev) 3031 dp = self.deltaparent(rev)
2934 if dp != nullrev: 3032 if dp != nullrev:
2935 cachedelta = (dp, bytes(self._chunk(rev))) 3033 cachedelta = (dp, bytes(self._chunk(rev)))
2936 3034
3035 sidedata = None
2937 if not cachedelta: 3036 if not cachedelta:
2938 rawtext = self.rawdata(rev) 3037 rawtext = self._revisiondata(rev)
2939 3038 sidedata = self.sidedata(rev)
2940 ifh = destrevlog.opener( 3039 if sidedata is None:
2941 destrevlog.indexfile, b'a+', checkambig=False 3040 sidedata = self.sidedata(rev)
2942 ) 3041
2943 dfh = None 3042 if sidedata_helpers is not None:
2944 if not destrevlog._inline: 3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2945 dfh = destrevlog.opener(destrevlog.datafile, b'a+') 3044 self, sidedata_helpers, sidedata, rev
2946 try: 3045 )
3046 flags = flags | new_flags[0] & ~new_flags[1]
3047
3048 with destrevlog._writing(tr):
2947 destrevlog._addrevision( 3049 destrevlog._addrevision(
2948 node, 3050 node,
2949 rawtext, 3051 rawtext,
2950 tr, 3052 tr,
2951 linkrev, 3053 linkrev,
2952 p1, 3054 p1,
2953 p2, 3055 p2,
2954 flags, 3056 flags,
2955 cachedelta, 3057 cachedelta,
2956 ifh,
2957 dfh,
2958 deltacomputer=deltacomputer, 3058 deltacomputer=deltacomputer,
3059 sidedata=sidedata,
2959 ) 3060 )
2960 finally:
2961 if dfh:
2962 dfh.close()
2963 ifh.close()
2964 3061
2965 if addrevisioncb: 3062 if addrevisioncb:
2966 addrevisioncb(self, rev, node) 3063 addrevisioncb(self, rev, node)
2967 3064
2968 def censorrevision(self, tr, censornode, tombstone=b''): 3065 def censorrevision(self, tr, censornode, tombstone=b''):
2969 if (self.version & 0xFFFF) == REVLOGV0: 3066 if self._format_version == REVLOGV0:
2970 raise error.RevlogError( 3067 raise error.RevlogError(
2971 _(b'cannot censor with version %d revlogs') % self.version 3068 _(b'cannot censor with version %d revlogs')
3069 % self._format_version
2972 ) 3070 )
2973 3071 elif self._format_version == REVLOGV1:
2974 censorrev = self.rev(censornode) 3072 rewrite.v1_censor(self, tr, censornode, tombstone)
2975 tombstone = storageutil.packmeta({b'censored': tombstone}, b'') 3073 else:
2976 3074 rewrite.v2_censor(self, tr, censornode, tombstone)
2977 if len(tombstone) > self.rawsize(censorrev):
2978 raise error.Abort(
2979 _(b'censor tombstone must be no longer than censored data')
2980 )
2981
2982 # Rewriting the revlog in place is hard. Our strategy for censoring is
2983 # to create a new revlog, copy all revisions to it, then replace the
2984 # revlogs on transaction close.
2985
2986 newindexfile = self.indexfile + b'.tmpcensored'
2987 newdatafile = self.datafile + b'.tmpcensored'
2988
2989 # This is a bit dangerous. We could easily have a mismatch of state.
2990 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2991 newrl.version = self.version
2992 newrl._generaldelta = self._generaldelta
2993 newrl._io = self._io
2994
2995 for rev in self.revs():
2996 node = self.node(rev)
2997 p1, p2 = self.parents(node)
2998
2999 if rev == censorrev:
3000 newrl.addrawrevision(
3001 tombstone,
3002 tr,
3003 self.linkrev(censorrev),
3004 p1,
3005 p2,
3006 censornode,
3007 REVIDX_ISCENSORED,
3008 )
3009
3010 if newrl.deltaparent(rev) != nullrev:
3011 raise error.Abort(
3012 _(
3013 b'censored revision stored as delta; '
3014 b'cannot censor'
3015 ),
3016 hint=_(
3017 b'censoring of revlogs is not '
3018 b'fully implemented; please report '
3019 b'this bug'
3020 ),
3021 )
3022 continue
3023
3024 if self.iscensored(rev):
3025 if self.deltaparent(rev) != nullrev:
3026 raise error.Abort(
3027 _(
3028 b'cannot censor due to censored '
3029 b'revision having delta stored'
3030 )
3031 )
3032 rawtext = self._chunk(rev)
3033 else:
3034 rawtext = self.rawdata(rev)
3035
3036 newrl.addrawrevision(
3037 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3038 )
3039
3040 tr.addbackup(self.indexfile, location=b'store')
3041 if not self._inline:
3042 tr.addbackup(self.datafile, location=b'store')
3043
3044 self.opener.rename(newrl.indexfile, self.indexfile)
3045 if not self._inline:
3046 self.opener.rename(newrl.datafile, self.datafile)
3047
3048 self.clearcaches()
3049 self._loadindex()
3050 3075
3051 def verifyintegrity(self, state): 3076 def verifyintegrity(self, state):
3052 """Verifies the integrity of the revlog. 3077 """Verifies the integrity of the revlog.
3053 3078
3054 Yields ``revlogproblem`` instances describing problems that are 3079 Yields ``revlogproblem`` instances describing problems that are
3058 if dd: 3083 if dd:
3059 yield revlogproblem(error=_(b'data length off by %d bytes') % dd) 3084 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3060 if di: 3085 if di:
3061 yield revlogproblem(error=_(b'index contains %d extra bytes') % di) 3086 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3062 3087
3063 version = self.version & 0xFFFF 3088 version = self._format_version
3064 3089
3065 # The verifier tells us what version revlog we should be. 3090 # The verifier tells us what version revlog we should be.
3066 if version != state[b'expectedversion']: 3091 if version != state[b'expectedversion']:
3067 yield revlogproblem( 3092 yield revlogproblem(
3068 warning=_(b"warning: '%s' uses revlog format %d; expected %d") 3093 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3069 % (self.indexfile, version, state[b'expectedversion']) 3094 % (self.display_id, version, state[b'expectedversion'])
3070 ) 3095 )
3071 3096
3072 state[b'skipread'] = set() 3097 state[b'skipread'] = set()
3073 state[b'safe_renamed'] = set() 3098 state[b'safe_renamed'] = set()
3074 3099
3162 storedsize=False, 3187 storedsize=False,
3163 ): 3188 ):
3164 d = {} 3189 d = {}
3165 3190
3166 if exclusivefiles: 3191 if exclusivefiles:
3167 d[b'exclusivefiles'] = [(self.opener, self.indexfile)] 3192 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3168 if not self._inline: 3193 if not self._inline:
3169 d[b'exclusivefiles'].append((self.opener, self.datafile)) 3194 d[b'exclusivefiles'].append((self.opener, self._datafile))
3170 3195
3171 if sharedfiles: 3196 if sharedfiles:
3172 d[b'sharedfiles'] = [] 3197 d[b'sharedfiles'] = []
3173 3198
3174 if revisionscount: 3199 if revisionscount:
3182 self.opener.stat(path).st_size for path in self.files() 3207 self.opener.stat(path).st_size for path in self.files()
3183 ) 3208 )
3184 3209
3185 return d 3210 return d
3186 3211
3187 def rewrite_sidedata(self, helpers, startrev, endrev): 3212 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3188 if self.version & 0xFFFF != REVLOGV2: 3213 if not self.hassidedata:
3189 return 3214 return
3190 # inline are not yet supported because they suffer from an issue when 3215 # revlog formats with sidedata support does not support inline
3191 # rewriting them (since it's not an append-only operation).
3192 # See issue6485.
3193 assert not self._inline 3216 assert not self._inline
3194 if not helpers[1] and not helpers[2]: 3217 if not helpers[1] and not helpers[2]:
3195 # Nothing to generate or remove 3218 # Nothing to generate or remove
3196 return 3219 return
3197 3220
3198 new_entries = [] 3221 new_entries = []
3199 # append the new sidedata 3222 # append the new sidedata
3200 with self._datafp(b'a+') as fp: 3223 with self._writing(transaction):
3201 # Maybe this bug still exists, see revlog._writeentry 3224 ifh, dfh, sdfh = self._writinghandles
3202 fp.seek(0, os.SEEK_END) 3225 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3203 current_offset = fp.tell() 3226
3227 current_offset = sdfh.tell()
3204 for rev in range(startrev, endrev + 1): 3228 for rev in range(startrev, endrev + 1):
3205 entry = self.index[rev] 3229 entry = self.index[rev]
3206 new_sidedata = storageutil.run_sidedata_helpers( 3230 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3207 store=self, 3231 store=self,
3208 sidedata_helpers=helpers, 3232 sidedata_helpers=helpers,
3209 sidedata={}, 3233 sidedata={},
3210 rev=rev, 3234 rev=rev,
3211 ) 3235 )
3212 3236
3213 serialized_sidedata = sidedatautil.serialize_sidedata( 3237 serialized_sidedata = sidedatautil.serialize_sidedata(
3214 new_sidedata 3238 new_sidedata
3215 ) 3239 )
3240
3241 sidedata_compression_mode = COMP_MODE_INLINE
3242 if serialized_sidedata and self.hassidedata:
3243 sidedata_compression_mode = COMP_MODE_PLAIN
3244 h, comp_sidedata = self.compress(serialized_sidedata)
3245 if (
3246 h != b'u'
3247 and comp_sidedata[0] != b'\0'
3248 and len(comp_sidedata) < len(serialized_sidedata)
3249 ):
3250 assert not h
3251 if (
3252 comp_sidedata[0]
3253 == self._docket.default_compression_header
3254 ):
3255 sidedata_compression_mode = COMP_MODE_DEFAULT
3256 serialized_sidedata = comp_sidedata
3257 else:
3258 sidedata_compression_mode = COMP_MODE_INLINE
3259 serialized_sidedata = comp_sidedata
3216 if entry[8] != 0 or entry[9] != 0: 3260 if entry[8] != 0 or entry[9] != 0:
3217 # rewriting entries that already have sidedata is not 3261 # rewriting entries that already have sidedata is not
3218 # supported yet, because it introduces garbage data in the 3262 # supported yet, because it introduces garbage data in the
3219 # revlog. 3263 # revlog.
3220 msg = b"Rewriting existing sidedata is not supported yet" 3264 msg = b"rewriting existing sidedata is not supported yet"
3221 raise error.Abort(msg) 3265 raise error.Abort(msg)
3222 entry = entry[:8] 3266
3223 entry += (current_offset, len(serialized_sidedata)) 3267 # Apply (potential) flags to add and to remove after running
3224 3268 # the sidedata helpers
3225 fp.write(serialized_sidedata) 3269 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3226 new_entries.append(entry) 3270 entry_update = (
3271 current_offset,
3272 len(serialized_sidedata),
3273 new_offset_flags,
3274 sidedata_compression_mode,
3275 )
3276
3277 # the sidedata computation might have move the file cursors around
3278 sdfh.seek(current_offset, os.SEEK_SET)
3279 sdfh.write(serialized_sidedata)
3280 new_entries.append(entry_update)
3227 current_offset += len(serialized_sidedata) 3281 current_offset += len(serialized_sidedata)
3228 3282 self._docket.sidedata_end = sdfh.tell()
3229 # rewrite the new index entries 3283
3230 with self._indexfp(b'w+') as fp: 3284 # rewrite the new index entries
3231 fp.seek(startrev * self.index.entry_size) 3285 ifh.seek(startrev * self.index.entry_size)
3232 for i, entry in enumerate(new_entries): 3286 for i, e in enumerate(new_entries):
3233 rev = startrev + i 3287 rev = startrev + i
3234 self.index.replace_sidedata_info(rev, entry[8], entry[9]) 3288 self.index.replace_sidedata_info(rev, *e)
3235 packed = self._io.packentry(entry, self.node, self.version, rev) 3289 packed = self.index.entry_binary(rev)
3236 fp.write(packed) 3290 if rev == 0 and self._docket is None:
3291 header = self._format_flags | self._format_version
3292 header = self.index.pack_header(header)
3293 packed = header + packed
3294 ifh.write(packed)