bdiff: don't check border condition in loop
This is pretty much a copy of d500ddae7494, just to a different loop.
The condition `p == plast` (`plast == a + len - 1`) was only true on
the final iteration of the loop. So it was wasteful to check for it
on every iteration. We decrease the iteration count by 1 and add an
explicit check for `p == plast` after the loop.
Again, we see modest wins.
From the mozilla-unified repository:
$ perfbdiff -m 3041e4d59df2
! wall 0.035502 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
! wall 0.030480 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
$ perfbdiff 0e9928989e9c --alldata --count 100
! wall 4.097394 comb 4.100000 user 4.100000 sys 0.000000 (best of 3)
! wall 3.597798 comb 3.600000 user 3.600000 sys 0.000000 (best of 3)
The 2nd example throws a total of ~3.3GB of data at bdiff. This
change increases the throughput from ~811 MB/s to ~924 MB/s.
# Randomized torture test generation for bdiff
from __future__ import absolute_import, print_function
import random
import sys
from mercurial import (
bdiff,
mpatch,
)
def reducetest(a, b):
tries = 0
reductions = 0
print("reducing...")
while tries < 1000:
a2 = "\n".join(l for l in a.splitlines()
if random.randint(0, 100) > 0) + "\n"
b2 = "\n".join(l for l in b.splitlines()
if random.randint(0, 100) > 0) + "\n"
if a2 == a and b2 == b:
continue
if a2 == b2:
continue
tries += 1
try:
test1(a, b)
except Exception as inst:
reductions += 1
tries = 0
a = a2
b = b2
print("reduced:", reductions, len(a) + len(b),
repr(a), repr(b))
try:
test1(a, b)
except Exception as inst:
print("failed:", inst)
sys.exit(0)
def test1(a, b):
d = bdiff.bdiff(a, b)
if not d:
raise ValueError("empty")
c = mpatch.patches(a, [d])
if c != b:
raise ValueError("bad")
def testwrap(a, b):
try:
test1(a, b)
return
except Exception as inst:
pass
print("exception:", inst)
reducetest(a, b)
def test(a, b):
testwrap(a, b)
testwrap(b, a)
def rndtest(size, noise):
a = []
src = " aaaaaaaabbbbccd"
for x in xrange(size):
a.append(src[random.randint(0, len(src) - 1)])
while True:
b = [c for c in a if random.randint(0, 99) > noise]
b2 = []
for c in b:
b2.append(c)
while random.randint(0, 99) < noise:
b2.append(src[random.randint(0, len(src) - 1)])
if b2 != a:
break
a = "\n".join(a) + "\n"
b = "\n".join(b2) + "\n"
test(a, b)
maxvol = 10000
startsize = 2
while True:
size = startsize
count = 0
while size < maxvol:
print(size)
volume = 0
while volume < maxvol:
rndtest(size, 2)
volume += size
count += 2
size *= 2
maxvol *= 4
startsize *= 4