Mercurial > public > mercurial-scm > hg
diff hgext/censor.py @ 24347:1bcfecbbf569
censor: add censor command to hgext with basic client-side tests
The censor command is a core extension which can replace the contents of a
historical file revision with a censor "tombstone" which can be exchanged
with older clients in place of the real revision data. The command rewrites
the filelog by copying revision-by-revision.
Care must be taken to expand the fulltext of the children of the censored
revision before copying them to the new filelog; they might be stored as
deltas against the uncensored revision, and those deltas will be invalidated.
For more background on the censorship feature design, see:
http://mercurial.selenic.com/wiki/CensorPlan
author | Mike Edgar <adgar@google.com> |
---|---|
date | Sun, 15 Mar 2015 21:52:17 -0400 |
parents | |
children | 5e111acc1170 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/censor.py Sun Mar 15 21:52:17 2015 -0400 @@ -0,0 +1,168 @@ +# Copyright (C) 2015 - Mike Edgar <adgar@google.com> +# +# This extension enables removal of file content at a given revision, +# rewriting the data/metadata of successive revisions to preserve revision log +# integrity. + +"""erase file content at a given revision + +The censor command instructs Mercurial to erase all content of a file at a given +revision *without updating the changeset hash.* This allows existing history to +remain valid while preventing future clones/pulls from receiving the erased +data. + +Typical uses for censor are due to security or legal requirements, including:: + + * Passwords, private keys, crytographic material + * Licensed data/code/libraries for which the license has expired + * Personally Identifiable Information or other private data + +Censored file revisions are listed in a tracked file called .hgcensored stored +in the repository root. The censor command adds an entry to the .hgcensored file +in the working directory and commits it (much like ``hg tag`` and .hgtags). The +censored file data is then replaced with a pointer to the new commit, enabling +verification. + +Censored nodes can interrupt mercurial's typical operation whenever the excised +data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, +simply fail when asked to produce censored data. Others, like ``hg verify`` and +``hg update``, must be capable of tolerating censored data to continue to +function in a meaningful way. Such commands only tolerate censored file +revisions if they are allowed by the policy specified by the "censor.allow" +config option. +""" + +from mercurial.node import short +from mercurial import cmdutil, error, filelog, revlog, scmutil, util +from mercurial.i18n import _ + +cmdtable = {} +command = cmdutil.command(cmdtable) +testedwith = 'internal' + +@command('censor', + [('r', 'rev', '', _('censor file from specified revision'), _('REV')), + ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], + _('-r REV [-t TEXT] [FILE]')) +def censor(ui, repo, path, rev='', tombstone='', **opts): + if not path: + raise util.Abort(_('must specify file path to censor')) + if not rev: + raise util.Abort(_('must specify revision to censor')) + + flog = repo.file(path) + if not len(flog): + raise util.Abort(_('cannot censor file with no history')) + + rev = scmutil.revsingle(repo, rev, rev).rev() + try: + ctx = repo[rev] + except KeyError: + raise util.Abort(_('invalid revision identifier %s') % rev) + + try: + fctx = ctx.filectx(path) + except error.LookupError: + raise util.Abort(_('file does not exist at revision %s') % rev) + + fnode = fctx.filenode() + headctxs = [repo[c] for c in repo.heads()] + heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] + if heads: + headlist = ', '.join([short(c.node()) for c in heads]) + raise util.Abort(_('cannot censor file in heads (%s)') % headlist, + hint=_('clean/delete and commit first')) + + wctx = repo[None] + wp = wctx.parents() + if ctx.node() in [p.node() for p in wp]: + raise util.Abort(_('cannot censor working directory'), + hint=_('clean/delete/update first')) + + flogv = flog.version & 0xFFFF + if flogv != revlog.REVLOGNG: + raise util.Abort( + _('censor does not support revlog version %d') % (flogv,)) + + tombstone = filelog.packmeta({"censored": tombstone}, "") + + crev = fctx.filerev() + + if len(tombstone) > flog.rawsize(crev): + raise util.Abort(_( + 'censor tombstone must be no longer than censored data')) + + # Using two files instead of one makes it easy to rewrite entry-by-entry + idxread = repo.svfs(flog.indexfile, 'r') + idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) + if flog.version & revlog.REVLOGNGINLINEDATA: + dataread, datawrite = idxread, idxwrite + else: + dataread = repo.svfs(flog.datafile, 'r') + datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) + + # Copy all revlog data up to the entry to be censored. + rio = revlog.revlogio() + offset = flog.start(crev) + + for chunk in util.filechunkiter(idxread, limit=crev * rio.size): + idxwrite.write(chunk) + for chunk in util.filechunkiter(dataread, limit=offset): + datawrite.write(chunk) + + def rewriteindex(r, newoffs, newdata=None): + """Rewrite the index entry with a new data offset and optional new data. + + The newdata argument, if given, is a tuple of three positive integers: + (new compressed, new uncompressed, added flag bits). + """ + offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] + flags = revlog.gettype(offlags) + if newdata: + comp, uncomp, nflags = newdata + flags |= nflags + offlags = revlog.offset_type(newoffs, flags) + e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) + idxwrite.write(rio.packentry(e, None, flog.version, r)) + idxread.seek(rio.size, 1) + + def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): + """Write the given full text to the filelog with the given data offset. + + Returns: + The integer number of data bytes written, for tracking data offsets. + """ + flag, compdata = flog.compress(data) + newcomp = len(flag) + len(compdata) + rewriteindex(r, offs, (newcomp, len(data), nflags)) + datawrite.write(flag) + datawrite.write(compdata) + dataread.seek(flog.length(r), 1) + return newcomp + + # Rewrite censored revlog entry with (padded) tombstone data. + pad = ' ' * (flog.rawsize(crev) - len(tombstone)) + offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) + + # Rewrite all following filelog revisions fixing up offsets and deltas. + for srev in xrange(crev + 1, len(flog)): + if crev in flog.parentrevs(srev): + # Immediate children of censored node must be re-added as fulltext. + try: + revdata = flog.revision(srev) + except error.CensoredNodeError, e: + revdata = e.tombstone + dlen = rewrite(srev, offset, revdata) + else: + # Copy any other revision data verbatim after fixing up the offset. + rewriteindex(srev, offset) + dlen = flog.length(srev) + for chunk in util.filechunkiter(dataread, limit=dlen): + datawrite.write(chunk) + offset += dlen + + idxread.close() + idxwrite.close() + if dataread is not idxread: + dataread.close() + datawrite.close()