Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 17128:1028a1c9077a
revlog: make compress a method
This allows an extension to optionally use a new compression type based
on the options applied by the repo to the revlog's opener.
(decompress doesn't need the same treatment, as it can be replaced using
extensions.wrapfunction, and can figure out which compression algorithm
is in use based on the first byte of the compressed payload.)
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Mon, 25 Jun 2012 13:56:13 -0700 |
parents | 0c18aed2fcca |
children | ad1b5e070f16 |
comparison
equal
deleted
inserted
replaced
17127:9e1616307c4c | 17128:1028a1c9077a |
---|---|
72 l.sort() | 72 l.sort() |
73 s = _sha(l[0]) | 73 s = _sha(l[0]) |
74 s.update(l[1]) | 74 s.update(l[1]) |
75 s.update(text) | 75 s.update(text) |
76 return s.digest() | 76 return s.digest() |
77 | |
78 def compress(text): | |
79 """ generate a possibly-compressed representation of text """ | |
80 if not text: | |
81 return ("", text) | |
82 l = len(text) | |
83 bin = None | |
84 if l < 44: | |
85 pass | |
86 elif l > 1000000: | |
87 # zlib makes an internal copy, thus doubling memory usage for | |
88 # large files, so lets do this in pieces | |
89 z = zlib.compressobj() | |
90 p = [] | |
91 pos = 0 | |
92 while pos < l: | |
93 pos2 = pos + 2**20 | |
94 p.append(z.compress(text[pos:pos2])) | |
95 pos = pos2 | |
96 p.append(z.flush()) | |
97 if sum(map(len, p)) < l: | |
98 bin = "".join(p) | |
99 else: | |
100 bin = _compress(text) | |
101 if bin is None or len(bin) > l: | |
102 if text[0] == '\0': | |
103 return ("", text) | |
104 return ('u', text) | |
105 return ("", bin) | |
106 | 77 |
107 def decompress(bin): | 78 def decompress(bin): |
108 """ decompress the given input """ | 79 """ decompress the given input """ |
109 if not bin: | 80 if not bin: |
110 return bin | 81 return bin |
1006 finally: | 977 finally: |
1007 if dfh: | 978 if dfh: |
1008 dfh.close() | 979 dfh.close() |
1009 ifh.close() | 980 ifh.close() |
1010 | 981 |
982 def compress(self, text): | |
983 """ generate a possibly-compressed representation of text """ | |
984 if not text: | |
985 return ("", text) | |
986 l = len(text) | |
987 bin = None | |
988 if l < 44: | |
989 pass | |
990 elif l > 1000000: | |
991 # zlib makes an internal copy, thus doubling memory usage for | |
992 # large files, so lets do this in pieces | |
993 z = zlib.compressobj() | |
994 p = [] | |
995 pos = 0 | |
996 while pos < l: | |
997 pos2 = pos + 2**20 | |
998 p.append(z.compress(text[pos:pos2])) | |
999 pos = pos2 | |
1000 p.append(z.flush()) | |
1001 if sum(map(len, p)) < l: | |
1002 bin = "".join(p) | |
1003 else: | |
1004 bin = _compress(text) | |
1005 if bin is None or len(bin) > l: | |
1006 if text[0] == '\0': | |
1007 return ("", text) | |
1008 return ('u', text) | |
1009 return ("", bin) | |
1010 | |
1011 def _addrevision(self, node, text, transaction, link, p1, p2, | 1011 def _addrevision(self, node, text, transaction, link, p1, p2, |
1012 cachedelta, ifh, dfh): | 1012 cachedelta, ifh, dfh): |
1013 """internal function to add revisions to the log | 1013 """internal function to add revisions to the log |
1014 | 1014 |
1015 see addrevision for argument descriptions. | 1015 see addrevision for argument descriptions. |
1038 delta = cachedelta[1] | 1038 delta = cachedelta[1] |
1039 else: | 1039 else: |
1040 t = buildtext() | 1040 t = buildtext() |
1041 ptext = self.revision(self.node(rev)) | 1041 ptext = self.revision(self.node(rev)) |
1042 delta = mdiff.textdiff(ptext, t) | 1042 delta = mdiff.textdiff(ptext, t) |
1043 data = compress(delta) | 1043 data = self.compress(delta) |
1044 l = len(data[1]) + len(data[0]) | 1044 l = len(data[1]) + len(data[0]) |
1045 if basecache[0] == rev: | 1045 if basecache[0] == rev: |
1046 chainbase = basecache[1] | 1046 chainbase = basecache[1] |
1047 else: | 1047 else: |
1048 chainbase = self.chainbase(rev) | 1048 chainbase = self.chainbase(rev) |
1082 cachedelta[1]) | 1082 cachedelta[1]) |
1083 else: | 1083 else: |
1084 textlen = len(text) | 1084 textlen = len(text) |
1085 if d is None or dist > textlen * 2: | 1085 if d is None or dist > textlen * 2: |
1086 text = buildtext() | 1086 text = buildtext() |
1087 data = compress(text) | 1087 data = self.compress(text) |
1088 l = len(data[1]) + len(data[0]) | 1088 l = len(data[1]) + len(data[0]) |
1089 base = chainbase = curr | 1089 base = chainbase = curr |
1090 | 1090 |
1091 e = (offset_type(offset, flags), l, textlen, | 1091 e = (offset_type(offset, flags), l, textlen, |
1092 base, link, p1r, p2r, node) | 1092 base, link, p1r, p2r, node) |