Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 23255:76effa770ff9
revlog: add config variable for limiting delta-chain length
The current heuristic for deciding between storing delta and full texts
is based on ratio of (sizeofdeltas)/(sizeoffulltext).
In some cases (for example a manifest for ahuge repo) this approach
can result in extremely long delta chains (~30,000) which are very slow to
read. (In the case of a manifest ~500ms are added to every hg command because of that).
This commit introduces "revlog.maxchainlength" configuration variable that will
limit delta chain length.
author | Mateusz Kwapich <mitrandir@fb.com> |
---|---|
date | Thu, 06 Nov 2014 14:20:05 -0800 |
parents | d23834b871ac |
children | 8d47c212b0dd |
comparison
equal
deleted
inserted
replaced
23254:d23834b871ac | 23255:76effa770ff9 |
---|---|
202 self.opener = opener | 202 self.opener = opener |
203 self._cache = None | 203 self._cache = None |
204 self._basecache = None | 204 self._basecache = None |
205 self._chunkcache = (0, '') | 205 self._chunkcache = (0, '') |
206 self._chunkcachesize = 65536 | 206 self._chunkcachesize = 65536 |
207 self._maxchainlen = None | |
207 self.index = [] | 208 self.index = [] |
208 self._pcache = {} | 209 self._pcache = {} |
209 self._nodecache = {nullid: nullrev} | 210 self._nodecache = {nullid: nullrev} |
210 self._nodepos = None | 211 self._nodepos = None |
211 | 212 |
217 v |= REVLOGGENERALDELTA | 218 v |= REVLOGGENERALDELTA |
218 else: | 219 else: |
219 v = 0 | 220 v = 0 |
220 if 'chunkcachesize' in opts: | 221 if 'chunkcachesize' in opts: |
221 self._chunkcachesize = opts['chunkcachesize'] | 222 self._chunkcachesize = opts['chunkcachesize'] |
223 if 'maxchainlen' in opts: | |
224 self._maxchainlen = opts['maxchainlen'] | |
222 | 225 |
223 if self._chunkcachesize <= 0: | 226 if self._chunkcachesize <= 0: |
224 raise RevlogError(_('revlog chunk cache size %r is not greater ' | 227 raise RevlogError(_('revlog chunk cache size %r is not greater ' |
225 'than 0') % self._chunkcachesize) | 228 'than 0') % self._chunkcachesize) |
226 elif self._chunkcachesize & (self._chunkcachesize - 1): | 229 elif self._chunkcachesize & (self._chunkcachesize - 1): |
1214 dist = l + offset - self.start(chainbase) | 1217 dist = l + offset - self.start(chainbase) |
1215 if self._generaldelta: | 1218 if self._generaldelta: |
1216 base = rev | 1219 base = rev |
1217 else: | 1220 else: |
1218 base = chainbase | 1221 base = chainbase |
1219 return dist, l, data, base, chainbase | 1222 chainlen = self.chainlen(rev) + 1 |
1223 return dist, l, data, base, chainbase, chainlen | |
1220 | 1224 |
1221 curr = len(self) | 1225 curr = len(self) |
1222 prev = curr - 1 | 1226 prev = curr - 1 |
1223 base = chainbase = curr | 1227 base = chainbase = curr |
1228 chainlen = None | |
1224 offset = self.end(prev) | 1229 offset = self.end(prev) |
1225 flags = 0 | 1230 flags = 0 |
1226 d = None | 1231 d = None |
1227 if self._basecache is None: | 1232 if self._basecache is None: |
1228 self._basecache = (prev, self.chainbase(prev)) | 1233 self._basecache = (prev, self.chainbase(prev)) |
1238 d = builddelta(p2r) | 1243 d = builddelta(p2r) |
1239 else: | 1244 else: |
1240 d = builddelta(prev) | 1245 d = builddelta(prev) |
1241 else: | 1246 else: |
1242 d = builddelta(prev) | 1247 d = builddelta(prev) |
1243 dist, l, data, base, chainbase = d | 1248 dist, l, data, base, chainbase, chainlen = d |
1244 | 1249 |
1245 # full versions are inserted when the needed deltas | 1250 # full versions are inserted when the needed deltas |
1246 # become comparable to the uncompressed text | 1251 # become comparable to the uncompressed text |
1247 if text is None: | 1252 if text is None: |
1248 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]), | 1253 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]), |
1249 cachedelta[1]) | 1254 cachedelta[1]) |
1250 else: | 1255 else: |
1251 textlen = len(text) | 1256 textlen = len(text) |
1252 if d is None or dist > textlen * 2: | 1257 if (d is None or dist > textlen * 2 or |
1258 self._maxchainlen and chainlen > self._maxchainlen): | |
1253 text = buildtext() | 1259 text = buildtext() |
1254 data = self.compress(text) | 1260 data = self.compress(text) |
1255 l = len(data[1]) + len(data[0]) | 1261 l = len(data[1]) + len(data[0]) |
1256 base = chainbase = curr | 1262 base = chainbase = curr |
1257 | 1263 |