Mercurial > public > mercurial-scm > hg
diff mercurial/mdiff.py @ 71:47c9a869adee
Add mdiff.patches to speed up applying thousands of patches to the manifest
author | mpm@selenic.com |
---|---|
date | Sat, 14 May 2005 10:27:14 -0800 |
parents | b3e2ddff0159 |
children | 4a6ab4d80dc4 |
line wrap: on
line diff
--- a/mercurial/mdiff.py Sat May 14 10:13:49 2005 -0800 +++ b/mercurial/mdiff.py Sat May 14 10:27:14 2005 -0800 @@ -1,6 +1,7 @@ #!/usr/bin/python -import difflib, struct -from cStringIO import StringIO +import difflib, struct, mmap + +devzero = file("/dev/zero") def unidiff(a, ad, b, bd, fn): if not a and not b: return "" @@ -52,6 +53,74 @@ return "".join(bin) +# This attempts to apply a series of patches in time proportional to +# the total size of the patches, rather than patches * len(text). This +# means rather than shuffling strings around, we shuffle around +# pointers to fragments with fragment lists. +# +# When the fragment lists get too long, we collapse them. To do this +# efficiently, we do all our operations inside a buffer created by +# mmap and simply use memmove. This avoids creating a bunch of large +# temporary string buffers. + +def patches(a, bins): + if not bins: return a + + plens = [len(x) for x in bins] + pl = sum(plens) + bl = len(a) + pl + tl = bl + bl + pl # enough for the patches and two working texts + b1, b2 = 0, bl + m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) + + # load our original text + m.write(a) + frags = [(len(a), b1)] + + # copy all the patches into our segment so we can memmove from them + pos = b2 + bl + m.seek(pos) + for p in bins: m.write(p) + + def pull(dst, src, l): # pull l bytes from src + while l: + f = src.pop(0) + if f[0] > l: # do we need to split? + src.insert(0, (f[0] - l, f[1] + l)) + dst.append((l, f[1])) + return + dst.append(f) + l -= f[0] + + def collect(buf, list): + start = buf + for l, p in list: + m.move(buf, p, l) + buf += l + return (buf - start, start) + + for plen in plens: + # if our list gets too long, execute it + if len(frags) > 128: + b2, b1 = b1, b2 + frags = [collect(b1, frags)] + + new = [] + end = pos + plen + last = 0 + while pos < end: + p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) + pull(new, frags, p1 - last) # what didn't change + pull([], frags, p2 - p1) # what got deleted + new.append((l, pos + 12)) # what got added + pos += l + 12 + last = p2 + frags = new + frags # what was left at the end + + t = collect(b2, frags) + + return m[t[1]:t[1] + t[0]] + def patch(a, bin): last = pos = 0 r = []