mercurial-scm/hg: contrib/bdiff-torture.py@d195fa651b51

bdiff: don't check border condition in loop This is pretty much a copy of d500ddae7494, just to a different loop. The condition `p == plast` (`plast == a + len - 1`) was only true on the final iteration of the loop. So it was wasteful to check for it on every iteration. We decrease the iteration count by 1 and add an explicit check for `p == plast` after the loop. Again, we see modest wins. From the mozilla-unified repository: $ perfbdiff -m 3041e4d59df2 ! wall 0.035502 comb 0.040000 user 0.040000 sys 0.000000 (best of 100) ! wall 0.030480 comb 0.030000 user 0.030000 sys 0.000000 (best of 100) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 4.097394 comb 4.100000 user 4.100000 sys 0.000000 (best of 3) ! wall 3.597798 comb 3.600000 user 3.600000 sys 0.000000 (best of 3) The 2nd example throws a total of ~3.3GB of data at bdiff. This change increases the throughput from ~811 MB/s to ~924 MB/s.


# Randomized torture test generation for bdiff

from __future__ import absolute_import, print_function
import random
import sys

from mercurial import (
    bdiff,
    mpatch,
)

def reducetest(a, b):
    tries = 0
    reductions = 0
    print("reducing...")
    while tries < 1000:
        a2 = "\n".join(l for l in a.splitlines()
                       if random.randint(0, 100) > 0) + "\n"
        b2 = "\n".join(l for l in b.splitlines()
                       if random.randint(0, 100) > 0) + "\n"
        if a2 == a and b2 == b:
            continue
        if a2 == b2:
            continue
        tries += 1

        try:
            test1(a, b)
        except Exception as inst:
            reductions += 1
            tries = 0
            a = a2
            b = b2

    print("reduced:", reductions, len(a) + len(b),
          repr(a), repr(b))
    try:
        test1(a, b)
    except Exception as inst:
        print("failed:", inst)

    sys.exit(0)

def test1(a, b):
    d = bdiff.bdiff(a, b)
    if not d:
        raise ValueError("empty")
    c = mpatch.patches(a, [d])
    if c != b:
        raise ValueError("bad")

def testwrap(a, b):
    try:
        test1(a, b)
        return
    except Exception as inst:
        pass
    print("exception:", inst)
    reducetest(a, b)

def test(a, b):
    testwrap(a, b)
    testwrap(b, a)

def rndtest(size, noise):
    a = []
    src = "                aaaaaaaabbbbccd"
    for x in xrange(size):
        a.append(src[random.randint(0, len(src) - 1)])

    while True:
        b = [c for c in a if random.randint(0, 99) > noise]
        b2 = []
        for c in b:
            b2.append(c)
            while random.randint(0, 99) < noise:
                b2.append(src[random.randint(0, len(src) - 1)])
        if b2 != a:
            break

    a = "\n".join(a) + "\n"
    b = "\n".join(b2) + "\n"

    test(a, b)

maxvol = 10000
startsize = 2
while True:
    size = startsize
    count = 0
    while size < maxvol:
        print(size)
        volume = 0
        while volume < maxvol:
            rndtest(size, 2)
            volume += size
            count += 2
        size *= 2
    maxvol *= 4
    startsize *= 4

author	Gregory Szorc <gregory.szorc@gmail.com>
	Sun, 20 Nov 2016 16:56:21 -0800
changeset 30461	d195fa651b51
parent 29209	eccfd6500636
child 32202	ded48ad55146
permissions	-rw-r--r--