bdiff: don't check border condition in loop
This is pretty much a copy of d500ddae7494, just to a different loop.
The condition `p == plast` (`plast == a + len - 1`) was only true on
the final iteration of the loop. So it was wasteful to check for it
on every iteration. We decrease the iteration count by 1 and add an
explicit check for `p == plast` after the loop.
Again, we see modest wins.
From the mozilla-unified repository:
$ perfbdiff -m 3041e4d59df2
! wall 0.035502 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
! wall 0.030480 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
$ perfbdiff 0e9928989e9c --alldata --count 100
! wall 4.097394 comb 4.100000 user 4.100000 sys 0.000000 (best of 3)
! wall 3.597798 comb 3.600000 user 3.600000 sys 0.000000 (best of 3)
The 2nd example throws a total of ~3.3GB of data at bdiff. This
change increases the throughput from ~811 MB/s to ~924 MB/s.
# Read the output of a "svn log --xml" command on stdin, parse it and
# print a subset of attributes common to all svn versions tested by
# hg.
from __future__ import absolute_import
import sys
import xml.dom.minidom
def xmltext(e):
return ''.join(c.data for c
in e.childNodes
if c.nodeType == c.TEXT_NODE)
def parseentry(entry):
e = {}
e['revision'] = entry.getAttribute('revision')
e['author'] = xmltext(entry.getElementsByTagName('author')[0])
e['msg'] = xmltext(entry.getElementsByTagName('msg')[0])
e['paths'] = []
paths = entry.getElementsByTagName('paths')
if paths:
paths = paths[0]
for p in paths.getElementsByTagName('path'):
action = p.getAttribute('action')
path = xmltext(p)
frompath = p.getAttribute('copyfrom-path')
fromrev = p.getAttribute('copyfrom-rev')
e['paths'].append((path, action, frompath, fromrev))
return e
def parselog(data):
entries = []
doc = xml.dom.minidom.parseString(data)
for e in doc.getElementsByTagName('logentry'):
entries.append(parseentry(e))
return entries
def printentries(entries):
fp = sys.stdout
for e in entries:
for k in ('revision', 'author', 'msg'):
fp.write(('%s: %s\n' % (k, e[k])).encode('utf-8'))
for path, action, fpath, frev in sorted(e['paths']):
frominfo = ''
if frev:
frominfo = ' (from %s@%s)' % (fpath, frev)
p = ' %s %s%s\n' % (action, path, frominfo)
fp.write(p.encode('utf-8'))
if __name__ == '__main__':
data = sys.stdin.read()
entries = parselog(data)
printentries(entries)