bdiff: don't check border condition in loop
This is pretty much a copy of d500ddae7494, just to a different loop.
The condition `p == plast` (`plast == a + len - 1`) was only true on
the final iteration of the loop. So it was wasteful to check for it
on every iteration. We decrease the iteration count by 1 and add an
explicit check for `p == plast` after the loop.
Again, we see modest wins.
From the mozilla-unified repository:
$ perfbdiff -m 3041e4d59df2
! wall 0.035502 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
! wall 0.030480 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
$ perfbdiff 0e9928989e9c --alldata --count 100
! wall 4.097394 comb 4.100000 user 4.100000 sys 0.000000 (best of 3)
! wall 3.597798 comb 3.600000 user 3.600000 sys 0.000000 (best of 3)
The 2nd example throws a total of ~3.3GB of data at bdiff. This
change increases the throughput from ~811 MB/s to ~924 MB/s.
from __future__ import absolute_import, print_function
import struct
from mercurial import (
bdiff,
mpatch,
)
def test1(a, b):
d = bdiff.bdiff(a, b)
c = a
if d:
c = mpatch.patches(a, [d])
if c != b:
print("bad diff+patch result from\n %r to\n %r:" % (a, b))
print("bdiff: %r" % d)
print("patched: %r" % c[:200])
def test(a, b):
print("test", repr(a), repr(b))
test1(a, b)
test1(b, a)
test("a\nc\n\n\n\n", "a\nb\n\n\n")
test("a\nb\nc\n", "a\nc\n")
test("", "")
test("a\nb\nc", "a\nb\nc")
test("a\nb\nc\nd\n", "a\nd\n")
test("a\nb\nc\nd\n", "a\nc\ne\n")
test("a\nb\nc\n", "a\nc\n")
test("a\n", "c\na\nb\n")
test("a\n", "")
test("a\n", "b\nc\n")
test("a\n", "c\na\n")
test("", "adjfkjdjksdhfksj")
test("", "ab")
test("", "abc")
test("a", "a")
test("ab", "ab")
test("abc", "abc")
test("a\n", "a\n")
test("a\nb", "a\nb")
#issue1295
def showdiff(a, b):
print('showdiff(\n %r,\n %r):' % (a, b))
bin = bdiff.bdiff(a, b)
pos = 0
q = 0
while pos < len(bin):
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
pos += 12
if p1:
print('', repr(a[q:p1]))
print('', p1, p2, repr(a[p1:p2]), '->', repr(bin[pos:pos + l]))
pos += l
q = p2
if q < len(a):
print('', repr(a[q:]))
showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\nx\n\nz\n")
showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n")
# we should pick up abbbc. rather than bc.de as the longest match
showdiff("a\nb\nb\nb\nc\n.\nd\ne\n.\nf\n",
"a\nb\nb\na\nb\nb\nb\nc\n.\nb\nc\n.\nd\ne\nf\n")
print("done")
def testfixws(a, b, allws):
c = bdiff.fixws(a, allws)
if c != b:
print("*** fixws", repr(a), repr(b), allws)
print("got:")
print(repr(c))
testfixws(" \ta\r b\t\n", "ab\n", 1)
testfixws(" \ta\r b\t\n", " a b\n", 0)
testfixws("", "", 1)
testfixws("", "", 0)
print("done")
print("Nice diff for a trivial change:")
showdiff(
''.join('<%s\n-\n' % i for i in range(5)),
''.join('>%s\n-\n' % i for i in range(5)))
print("Diff 1 to 3 lines - preference for appending:")
showdiff('a\n', 'a\n' * 3)
print("Diff 1 to 5 lines - preference for appending:")
showdiff('a\n', 'a\n' * 5)
print("Diff 3 to 1 lines - preference for removing trailing lines:")
showdiff('a\n' * 3, 'a\n')
print("Diff 5 to 1 lines - preference for removing trailing lines:")
showdiff('a\n' * 5, 'a\n')