Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 30793:b6f455a6e4d6
revlog: move decompress() from module to revlog class (API)
Upcoming patches will convert revlogs to use the compression engine
APIs to perform all things compression. The yet-to-be-introduced
APIs support a persistent "compressor" object so the same object
can be reused for multiple compression operations, leading to
better performance. In addition, compression engines like zstd
may wish to tweak compression engine state based on the revlog
(e.g. per-revlog compression dictionaries).
A global and shared decompress() function will shortly no longer
make much sense. So, we move decompress() to be a method of the
revlog class. It joins compress() there.
On the mozilla-unified repo, we can measure the impact of this change
on reading performance:
$ hg perfrevlogchunks -c
! chunk
! wall 1.932573 comb 1.930000 user 1.900000 sys 0.030000 (best of 6)
! wall 1.955183 comb 1.960000 user 1.930000 sys 0.030000 (best of 6)
! chunk batch
! wall 1.787879 comb 1.780000 user 1.770000 sys 0.010000 (best of 6
! wall 1.774444 comb 1.770000 user 1.750000 sys 0.020000 (best of 6)
"chunk" appeared to become slower but "chunk batch" got faster. Upon
further examination by running both sets multiple times, the numbers
appear to converge across all runs. This tells me that there is no
perceived performance impact to this refactor.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 02 Jan 2017 13:00:16 -0800 |
parents | 4215dc1b708b |
children | 78ac56aebab6 |
comparison
equal
deleted
inserted
replaced
30792:4215dc1b708b | 30793:b6f455a6e4d6 |
---|---|
137 l.sort() | 137 l.sort() |
138 s = hashlib.sha1(l[0]) | 138 s = hashlib.sha1(l[0]) |
139 s.update(l[1]) | 139 s.update(l[1]) |
140 s.update(text) | 140 s.update(text) |
141 return s.digest() | 141 return s.digest() |
142 | |
143 def decompress(bin): | |
144 """ decompress the given input """ | |
145 if not bin: | |
146 return bin | |
147 t = bin[0] | |
148 if t == '\0': | |
149 return bin | |
150 if t == 'x': | |
151 try: | |
152 return _decompress(bin) | |
153 except zlib.error as e: | |
154 raise RevlogError(_("revlog decompress error: %s") % str(e)) | |
155 if t == 'u': | |
156 return util.buffer(bin, 1) | |
157 raise RevlogError(_("unknown compression type %r") % t) | |
158 | 142 |
159 # index v0: | 143 # index v0: |
160 # 4 bytes: offset | 144 # 4 bytes: offset |
161 # 4 bytes: compressed length | 145 # 4 bytes: compressed length |
162 # 4 bytes: base rev | 146 # 4 bytes: base rev |
1177 to be used for reading. If used, the seek position of the file will not | 1161 to be used for reading. If used, the seek position of the file will not |
1178 be preserved. | 1162 be preserved. |
1179 | 1163 |
1180 Returns a str holding uncompressed data for the requested revision. | 1164 Returns a str holding uncompressed data for the requested revision. |
1181 """ | 1165 """ |
1182 return decompress(self._chunkraw(rev, rev, df=df)[1]) | 1166 return self.decompress(self._chunkraw(rev, rev, df=df)[1]) |
1183 | 1167 |
1184 def _chunks(self, revs, df=None): | 1168 def _chunks(self, revs, df=None): |
1185 """Obtain decompressed chunks for the specified revisions. | 1169 """Obtain decompressed chunks for the specified revisions. |
1186 | 1170 |
1187 Accepts an iterable of numeric revisions that are assumed to be in | 1171 Accepts an iterable of numeric revisions that are assumed to be in |
1210 except OverflowError: | 1194 except OverflowError: |
1211 # issue4215 - we can't cache a run of chunks greater than | 1195 # issue4215 - we can't cache a run of chunks greater than |
1212 # 2G on Windows | 1196 # 2G on Windows |
1213 return [self._chunk(rev, df=df) for rev in revs] | 1197 return [self._chunk(rev, df=df) for rev in revs] |
1214 | 1198 |
1199 decomp = self.decompress | |
1215 for rev in revs: | 1200 for rev in revs: |
1216 chunkstart = start(rev) | 1201 chunkstart = start(rev) |
1217 if inline: | 1202 if inline: |
1218 chunkstart += (rev + 1) * iosize | 1203 chunkstart += (rev + 1) * iosize |
1219 chunklength = length(rev) | 1204 chunklength = length(rev) |
1220 ladd(decompress(buffer(data, chunkstart - offset, chunklength))) | 1205 ladd(decomp(buffer(data, chunkstart - offset, chunklength))) |
1221 | 1206 |
1222 return l | 1207 return l |
1223 | 1208 |
1224 def _chunkclear(self): | 1209 def _chunkclear(self): |
1225 """Clear the raw chunk cache.""" | 1210 """Clear the raw chunk cache.""" |
1506 if bin is None or len(bin) >= l: | 1491 if bin is None or len(bin) >= l: |
1507 if text[0] == '\0': | 1492 if text[0] == '\0': |
1508 return ("", text) | 1493 return ("", text) |
1509 return ('u', text) | 1494 return ('u', text) |
1510 return ("", bin) | 1495 return ("", bin) |
1496 | |
1497 def decompress(self, data): | |
1498 """Decompress a revlog chunk. | |
1499 | |
1500 The chunk is expected to begin with a header identifying the | |
1501 format type so it can be routed to an appropriate decompressor. | |
1502 """ | |
1503 if not data: | |
1504 return data | |
1505 t = data[0] | |
1506 if t == '\0': | |
1507 return data | |
1508 if t == 'x': | |
1509 try: | |
1510 return _decompress(data) | |
1511 except zlib.error as e: | |
1512 raise RevlogError(_('revlog decompress error: %s') % str(e)) | |
1513 if t == 'u': | |
1514 return util.buffer(data, 1) | |
1515 raise RevlogError(_('unknown compression type %r') % t) | |
1511 | 1516 |
1512 def _isgooddelta(self, d, textlen): | 1517 def _isgooddelta(self, d, textlen): |
1513 """Returns True if the given delta is good. Good means that it is within | 1518 """Returns True if the given delta is good. Good means that it is within |
1514 the disk span, disk size, and chain length bounds that we know to be | 1519 the disk span, disk size, and chain length bounds that we know to be |
1515 performant.""" | 1520 performant.""" |