comparison mercurial/revlog.py @ 39778:a6b3c4c1019f

revlog: move censor logic out of censor extension The censor extension is doing very low-level things with revlogs. It is fundamentally impossible for this logic to remain in the censor extension while support multiple storage backends: we need each storage backend to implement censor in its own storage-specific way. This commit effectively moves the revlog-specific censoring code to be a method of revlogs themselves. We've defined a new API on the file storage interface for censoring an individual node. Even though the current censoring code doesn't use it, the API requires a transaction instance because it logically makes sense for storage backends to require an active transaction (which implies a held write lock) in order to rewrite storage. After this commit, the censor extension has been reduced to boilerplate precondition checking before invoking the generic storage API. I tried to keep the code as similar as possible. But some minor changes were made: * We use self._io instead of instantiating a new revlogio instance. * We compare self.version against REVLOGV0 instead of != REVLOGV1 because presumably all future revlog versions will support censoring. * We use self.opener instead of going through repo.svfs (we don't have a handle on the repo instance from a revlog). * "revlog" dropped * Replace "flog" with "self". Differential Revision: https://phab.mercurial-scm.org/D4656
author Gregory Szorc <gregory.szorc@gmail.com>
date Tue, 18 Sep 2018 17:51:43 -0700
parents 974592474dee
children 7a9e2d85f475
comparison
equal deleted inserted replaced
39777:b63dee7bd0d9 39778:a6b3c4c1019f
2490 if addrevisioncb: 2490 if addrevisioncb:
2491 addrevisioncb(self, rev, node) 2491 addrevisioncb(self, rev, node)
2492 finally: 2492 finally:
2493 destrevlog._lazydeltabase = oldlazydeltabase 2493 destrevlog._lazydeltabase = oldlazydeltabase
2494 destrevlog._deltabothparents = oldamd 2494 destrevlog._deltabothparents = oldamd
2495
2496 def censorrevision(self, node, tombstone=b''):
2497 if (self.version & 0xFFFF) == REVLOGV0:
2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2499 self.version)
2500
2501 rev = self.rev(node)
2502 tombstone = packmeta({b'censored': tombstone}, b'')
2503
2504 if len(tombstone) > self.rawsize(rev):
2505 raise error.Abort(_('censor tombstone must be no longer than '
2506 'censored data'))
2507
2508 # Using two files instead of one makes it easy to rewrite entry-by-entry
2509 idxread = self.opener(self.indexfile, 'r')
2510 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2511 if self.version & FLAG_INLINE_DATA:
2512 dataread, datawrite = idxread, idxwrite
2513 else:
2514 dataread = self.opener(self.datafile, 'r')
2515 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2516
2517 # Copy all revlog data up to the entry to be censored.
2518 offset = self.start(rev)
2519
2520 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2521 idxwrite.write(chunk)
2522 for chunk in util.filechunkiter(dataread, limit=offset):
2523 datawrite.write(chunk)
2524
2525 def rewriteindex(r, newoffs, newdata=None):
2526 """Rewrite the index entry with a new data offset and new data.
2527
2528 The newdata argument, if given, is a tuple of three positive
2529 integers: (new compressed, new uncompressed, added flag bits).
2530 """
2531 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2532 flags = gettype(offlags)
2533 if newdata:
2534 comp, uncomp, nflags = newdata
2535 flags |= nflags
2536 offlags = offset_type(newoffs, flags)
2537 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2538 idxwrite.write(self._io.packentry(e, None, self.version, r))
2539 idxread.seek(self._io.size, 1)
2540
2541 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2542 """Write the given fulltext with the given data offset.
2543
2544 Returns:
2545 The integer number of data bytes written, for tracking data
2546 offsets.
2547 """
2548 flag, compdata = self.compress(data)
2549 newcomp = len(flag) + len(compdata)
2550 rewriteindex(r, offs, (newcomp, len(data), nflags))
2551 datawrite.write(flag)
2552 datawrite.write(compdata)
2553 dataread.seek(self.length(r), 1)
2554 return newcomp
2555
2556 # Rewrite censored entry with (padded) tombstone data.
2557 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2558 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2559
2560 # Rewrite all following filelog revisions fixing up offsets and deltas.
2561 for srev in pycompat.xrange(rev + 1, len(self)):
2562 if rev in self.parentrevs(srev):
2563 # Immediate children of censored node must be re-added as
2564 # fulltext.
2565 try:
2566 revdata = self.revision(srev)
2567 except error.CensoredNodeError as e:
2568 revdata = e.tombstone
2569 dlen = rewrite(srev, offset, revdata)
2570 else:
2571 # Copy any other revision data verbatim after fixing up the
2572 # offset.
2573 rewriteindex(srev, offset)
2574 dlen = self.length(srev)
2575 for chunk in util.filechunkiter(dataread, limit=dlen):
2576 datawrite.write(chunk)
2577 offset += dlen
2578
2579 idxread.close()
2580 idxwrite.close()
2581 if dataread is not idxread:
2582 dataread.close()
2583 datawrite.close()