view tests/md5sum.py @ 50220:02fe65f74be5

convert: use a priority queue for sorting commits, to make sorting faster To achieve this, we turn commit sorters into classes so they can encapsulate state. This reduces the sorting time from ~30s to ~10s on a 500k-commit prefix of a repo I tried to convert. (and probably reduces the time to sort the whole repo from many tens of minutes to minutes, but I didn't try that again) The date caching gets removed because priority queue already caches the key.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Thu, 23 Feb 2023 23:25:28 +0100
parents 6000f5b25c9b
children 24ee91ba9aa8
line wrap: on
line source

#!/usr/bin/env python3
#
# Based on python's Tools/scripts/md5sum.py
#
# This software may be used and distributed according to the terms
# of the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2, which is
# GPL-compatible.


import hashlib
import os
import sys

try:
    import msvcrt

    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
except ImportError:
    pass

for filename in sys.argv[1:]:
    try:
        fp = open(filename, 'rb')
    except IOError as msg:
        sys.stderr.write('%s: Can\'t open: %s\n' % (filename, msg))
        sys.exit(1)

    m = hashlib.md5()
    try:
        for data in iter(lambda: fp.read(8192), b''):
            m.update(data)
    except IOError as msg:
        sys.stderr.write('%s: I/O error: %s\n' % (filename, msg))
        sys.exit(1)
    sys.stdout.write('%s  %s\n' % (m.hexdigest(), filename))

sys.exit(0)