comparison mercurial/revlog.py @ 17128:1028a1c9077a

revlog: make compress a method This allows an extension to optionally use a new compression type based on the options applied by the repo to the revlog's opener. (decompress doesn't need the same treatment, as it can be replaced using extensions.wrapfunction, and can figure out which compression algorithm is in use based on the first byte of the compressed payload.)
author Bryan O'Sullivan <bryano@fb.com>
date Mon, 25 Jun 2012 13:56:13 -0700
parents 0c18aed2fcca
children ad1b5e070f16
comparison
equal deleted inserted replaced
17127:9e1616307c4c 17128:1028a1c9077a
72 l.sort() 72 l.sort()
73 s = _sha(l[0]) 73 s = _sha(l[0])
74 s.update(l[1]) 74 s.update(l[1])
75 s.update(text) 75 s.update(text)
76 return s.digest() 76 return s.digest()
77
78 def compress(text):
79 """ generate a possibly-compressed representation of text """
80 if not text:
81 return ("", text)
82 l = len(text)
83 bin = None
84 if l < 44:
85 pass
86 elif l > 1000000:
87 # zlib makes an internal copy, thus doubling memory usage for
88 # large files, so lets do this in pieces
89 z = zlib.compressobj()
90 p = []
91 pos = 0
92 while pos < l:
93 pos2 = pos + 2**20
94 p.append(z.compress(text[pos:pos2]))
95 pos = pos2
96 p.append(z.flush())
97 if sum(map(len, p)) < l:
98 bin = "".join(p)
99 else:
100 bin = _compress(text)
101 if bin is None or len(bin) > l:
102 if text[0] == '\0':
103 return ("", text)
104 return ('u', text)
105 return ("", bin)
106 77
107 def decompress(bin): 78 def decompress(bin):
108 """ decompress the given input """ 79 """ decompress the given input """
109 if not bin: 80 if not bin:
110 return bin 81 return bin
1006 finally: 977 finally:
1007 if dfh: 978 if dfh:
1008 dfh.close() 979 dfh.close()
1009 ifh.close() 980 ifh.close()
1010 981
982 def compress(self, text):
983 """ generate a possibly-compressed representation of text """
984 if not text:
985 return ("", text)
986 l = len(text)
987 bin = None
988 if l < 44:
989 pass
990 elif l > 1000000:
991 # zlib makes an internal copy, thus doubling memory usage for
992 # large files, so lets do this in pieces
993 z = zlib.compressobj()
994 p = []
995 pos = 0
996 while pos < l:
997 pos2 = pos + 2**20
998 p.append(z.compress(text[pos:pos2]))
999 pos = pos2
1000 p.append(z.flush())
1001 if sum(map(len, p)) < l:
1002 bin = "".join(p)
1003 else:
1004 bin = _compress(text)
1005 if bin is None or len(bin) > l:
1006 if text[0] == '\0':
1007 return ("", text)
1008 return ('u', text)
1009 return ("", bin)
1010
1011 def _addrevision(self, node, text, transaction, link, p1, p2, 1011 def _addrevision(self, node, text, transaction, link, p1, p2,
1012 cachedelta, ifh, dfh): 1012 cachedelta, ifh, dfh):
1013 """internal function to add revisions to the log 1013 """internal function to add revisions to the log
1014 1014
1015 see addrevision for argument descriptions. 1015 see addrevision for argument descriptions.
1038 delta = cachedelta[1] 1038 delta = cachedelta[1]
1039 else: 1039 else:
1040 t = buildtext() 1040 t = buildtext()
1041 ptext = self.revision(self.node(rev)) 1041 ptext = self.revision(self.node(rev))
1042 delta = mdiff.textdiff(ptext, t) 1042 delta = mdiff.textdiff(ptext, t)
1043 data = compress(delta) 1043 data = self.compress(delta)
1044 l = len(data[1]) + len(data[0]) 1044 l = len(data[1]) + len(data[0])
1045 if basecache[0] == rev: 1045 if basecache[0] == rev:
1046 chainbase = basecache[1] 1046 chainbase = basecache[1]
1047 else: 1047 else:
1048 chainbase = self.chainbase(rev) 1048 chainbase = self.chainbase(rev)
1082 cachedelta[1]) 1082 cachedelta[1])
1083 else: 1083 else:
1084 textlen = len(text) 1084 textlen = len(text)
1085 if d is None or dist > textlen * 2: 1085 if d is None or dist > textlen * 2:
1086 text = buildtext() 1086 text = buildtext()
1087 data = compress(text) 1087 data = self.compress(text)
1088 l = len(data[1]) + len(data[0]) 1088 l = len(data[1]) + len(data[0])
1089 base = chainbase = curr 1089 base = chainbase = curr
1090 1090
1091 e = (offset_type(offset, flags), l, textlen, 1091 e = (offset_type(offset, flags), l, textlen,
1092 base, link, p1r, p2r, node) 1092 base, link, p1r, p2r, node)