--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/packaging/hgpackaging/util.py Thu Mar 07 10:10:04 2019 -0800
@@ -0,0 +1,128 @@
+# util.py - Common packaging utility code.
+#
+# Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+# no-check-code because Python 3 native.
+
+import gzip
+import hashlib
+import pathlib
+import tarfile
+import urllib.request
+import zipfile
+
+
+def hash_path(p: pathlib.Path):
+ h = hashlib.sha256()
+
+ with p.open('rb') as fh:
+ while True:
+ chunk = fh.read(65536)
+ if not chunk:
+ break
+
+ h.update(chunk)
+
+ return h.hexdigest()
+
+
+class IntegrityError(Exception):
+ """Represents an integrity error when downloading a URL."""
+
+
+def secure_download_stream(url, size, sha256):
+ """Securely download a URL to a stream of chunks.
+
+ If the integrity of the download fails, an IntegrityError is
+ raised.
+ """
+ h = hashlib.sha256()
+ length = 0
+
+ with urllib.request.urlopen(url) as fh:
+ if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
+ fh = gzip.GzipFile(fileobj=fh)
+
+ while True:
+ chunk = fh.read(65536)
+ if not chunk:
+ break
+
+ h.update(chunk)
+ length += len(chunk)
+
+ yield chunk
+
+ digest = h.hexdigest()
+
+ if length != size:
+ raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
+ url, size, length))
+
+ if digest != sha256:
+ raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
+ url, sha256, digest))
+
+
+def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str):
+ """Download a URL to a filesystem path, possibly with verification."""
+
+ # We download to a temporary file and rename at the end so there's
+ # no chance of the final file being partially written or containing
+ # bad data.
+ print('downloading %s to %s' % (url, path))
+
+ if path.exists():
+ good = True
+
+ if path.stat().st_size != size:
+ print('existing file size is wrong; removing')
+ good = False
+
+ if good:
+ if hash_path(path) != sha256:
+ print('existing file hash is wrong; removing')
+ good = False
+
+ if good:
+ print('%s exists and passes integrity checks' % path)
+ return
+
+ path.unlink()
+
+ tmp = path.with_name('%s.tmp' % path.name)
+
+ try:
+ with tmp.open('wb') as fh:
+ for chunk in secure_download_stream(url, size, sha256):
+ fh.write(chunk)
+ except IntegrityError:
+ tmp.unlink()
+ raise
+
+ tmp.rename(path)
+ print('successfully downloaded %s' % url)
+
+
+def download_entry(entry: dict, dest_path: pathlib.Path, local_name=None) -> pathlib.Path:
+ url = entry['url']
+
+ local_name = local_name or url[url.rindex('/') + 1:]
+
+ local_path = dest_path / local_name
+ download_to_path(url, local_path, entry['size'], entry['sha256'])
+
+ return local_path
+
+
+def extract_tar_to_directory(source: pathlib.Path, dest: pathlib.Path):
+ with tarfile.open(source, 'r') as tf:
+ tf.extractall(dest)
+
+
+def extract_zip_to_directory(source: pathlib.Path, dest: pathlib.Path):
+ with zipfile.ZipFile(source, 'r') as zf:
+ zf.extractall(dest)