Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 5451:0a43875677b1
revlog: break up compression of large deltas
Python's zlib apparently makes an internal copy of strings passed to
compress(). To avoid this, compress strings 1M at a time, then join
them at the end if the result would be smaller than the original.
For initial commits of large but compressible files, this cuts peak
memory usage nearly in half.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 11 Oct 2007 00:46:54 -0500 |
parents | c728424d44c6 |
children | 9d77f2b47eb7 |
comparison
equal
deleted
inserted
replaced
5450:c728424d44c6 | 5451:0a43875677b1 |
---|---|
59 | 59 |
60 def compress(text): | 60 def compress(text): |
61 """ generate a possibly-compressed representation of text """ | 61 """ generate a possibly-compressed representation of text """ |
62 if not text: | 62 if not text: |
63 return ("", text) | 63 return ("", text) |
64 if len(text) < 44: | 64 l = len(text) |
65 if l < 44: | |
65 if text[0] == '\0': | 66 if text[0] == '\0': |
66 return ("", text) | 67 return ("", text) |
67 return ('u', text) | 68 return ('u', text) |
68 bin = _compress(text) | 69 elif l > 1000000: |
69 if len(bin) > len(text): | 70 # zlib makes an internal copy, thus doubling memory usage for |
71 # large files, so lets do this in pieces | |
72 z = zlib.compressobj() | |
73 p = [] | |
74 pos = 0 | |
75 while pos < l: | |
76 pos2 = pos + 2**20 | |
77 p.append(z.compress(text[pos:pos2])) | |
78 pos = pos2 | |
79 p.append(z.flush()) | |
80 if sum(map(len, p)) < l: | |
81 bin = "".join(p) | |
82 else: | |
83 bin = _compress(text) | |
84 if len(bin) > l: | |
70 if text[0] == '\0': | 85 if text[0] == '\0': |
71 return ("", text) | 86 return ("", text) |
72 return ('u', text) | 87 return ('u', text) |
73 return ("", bin) | 88 return ("", bin) |
74 | 89 |