diff hgext/lfs/blobstore.py @ 35481:bb6a80fc969a

lfs: only hardlink between the usercache and local store if the blob verifies This fixes the issue where verify (and other read commands) would propagate corrupt blobs. I originalled coded this to only hardlink if 'verify=True' for store.read(), but then good blobs weren't being linked, and this broke a bunch of tests. (The blob in repo5 that is being corrupted seems to be linked into repo5 in the loop running dumpflog.py prior to it being corrupted, but only if verify=False is handled too.) It's probably better to do a one time extra verification in order to create these files, so that the repo can be copied to a removable drive. Adding the same check to store.write() was only for completeness, but also needs to do a one time extra verification to avoid breaking tests.
author Matt Harbison <matt_harbison@yahoo.com>
date Thu, 21 Dec 2017 14:13:39 -0500
parents 417e8e040102
children 5a73a0446afd
line wrap: on
line diff
--- a/hgext/lfs/blobstore.py	Fri Nov 17 00:06:45 2017 -0500
+++ b/hgext/lfs/blobstore.py	Thu Dec 21 14:13:39 2017 -0500
@@ -111,15 +111,23 @@
         # XXX: should we verify the content of the cache, and hardlink back to
         # the local store on success, but truncate, write and link on failure?
         if not self.cachevfs.exists(oid):
-            self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
-            lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
+            if verify or hashlib.sha256(data).hexdigest() == oid:
+                self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
+                lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
 
     def read(self, oid, verify=True):
         """Read blob from local blobstore."""
         if not self.vfs.exists(oid):
             blob = self._read(self.cachevfs, oid, verify)
-            self.ui.note(_('lfs: found %s in the usercache\n') % oid)
-            lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
+
+            # Even if revlog will verify the content, it needs to be verified
+            # now before making the hardlink to avoid propagating corrupt blobs.
+            # Don't abort if corruption is detected, because `hg verify` will
+            # give more useful info about the corruption- simply don't add the
+            # hardlink.
+            if verify or hashlib.sha256(blob).hexdigest() == oid:
+                self.ui.note(_('lfs: found %s in the usercache\n') % oid)
+                lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
         else:
             self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
             blob = self._read(self.vfs, oid, verify)