comparison mercurial/revlog.py @ 23255:76effa770ff9

revlog: add config variable for limiting delta-chain length The current heuristic for deciding between storing delta and full texts is based on ratio of (sizeofdeltas)/(sizeoffulltext). In some cases (for example a manifest for ahuge repo) this approach can result in extremely long delta chains (~30,000) which are very slow to read. (In the case of a manifest ~500ms are added to every hg command because of that). This commit introduces "revlog.maxchainlength" configuration variable that will limit delta chain length.
author Mateusz Kwapich <mitrandir@fb.com>
date Thu, 06 Nov 2014 14:20:05 -0800
parents d23834b871ac
children 8d47c212b0dd
comparison
equal deleted inserted replaced
23254:d23834b871ac 23255:76effa770ff9
202 self.opener = opener 202 self.opener = opener
203 self._cache = None 203 self._cache = None
204 self._basecache = None 204 self._basecache = None
205 self._chunkcache = (0, '') 205 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536 206 self._chunkcachesize = 65536
207 self._maxchainlen = None
207 self.index = [] 208 self.index = []
208 self._pcache = {} 209 self._pcache = {}
209 self._nodecache = {nullid: nullrev} 210 self._nodecache = {nullid: nullrev}
210 self._nodepos = None 211 self._nodepos = None
211 212
217 v |= REVLOGGENERALDELTA 218 v |= REVLOGGENERALDELTA
218 else: 219 else:
219 v = 0 220 v = 0
220 if 'chunkcachesize' in opts: 221 if 'chunkcachesize' in opts:
221 self._chunkcachesize = opts['chunkcachesize'] 222 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
222 225
223 if self._chunkcachesize <= 0: 226 if self._chunkcachesize <= 0:
224 raise RevlogError(_('revlog chunk cache size %r is not greater ' 227 raise RevlogError(_('revlog chunk cache size %r is not greater '
225 'than 0') % self._chunkcachesize) 228 'than 0') % self._chunkcachesize)
226 elif self._chunkcachesize & (self._chunkcachesize - 1): 229 elif self._chunkcachesize & (self._chunkcachesize - 1):
1214 dist = l + offset - self.start(chainbase) 1217 dist = l + offset - self.start(chainbase)
1215 if self._generaldelta: 1218 if self._generaldelta:
1216 base = rev 1219 base = rev
1217 else: 1220 else:
1218 base = chainbase 1221 base = chainbase
1219 return dist, l, data, base, chainbase 1222 chainlen = self.chainlen(rev) + 1
1223 return dist, l, data, base, chainbase, chainlen
1220 1224
1221 curr = len(self) 1225 curr = len(self)
1222 prev = curr - 1 1226 prev = curr - 1
1223 base = chainbase = curr 1227 base = chainbase = curr
1228 chainlen = None
1224 offset = self.end(prev) 1229 offset = self.end(prev)
1225 flags = 0 1230 flags = 0
1226 d = None 1231 d = None
1227 if self._basecache is None: 1232 if self._basecache is None:
1228 self._basecache = (prev, self.chainbase(prev)) 1233 self._basecache = (prev, self.chainbase(prev))
1238 d = builddelta(p2r) 1243 d = builddelta(p2r)
1239 else: 1244 else:
1240 d = builddelta(prev) 1245 d = builddelta(prev)
1241 else: 1246 else:
1242 d = builddelta(prev) 1247 d = builddelta(prev)
1243 dist, l, data, base, chainbase = d 1248 dist, l, data, base, chainbase, chainlen = d
1244 1249
1245 # full versions are inserted when the needed deltas 1250 # full versions are inserted when the needed deltas
1246 # become comparable to the uncompressed text 1251 # become comparable to the uncompressed text
1247 if text is None: 1252 if text is None:
1248 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]), 1253 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1249 cachedelta[1]) 1254 cachedelta[1])
1250 else: 1255 else:
1251 textlen = len(text) 1256 textlen = len(text)
1252 if d is None or dist > textlen * 2: 1257 if (d is None or dist > textlen * 2 or
1258 self._maxchainlen and chainlen > self._maxchainlen):
1253 text = buildtext() 1259 text = buildtext()
1254 data = self.compress(text) 1260 data = self.compress(text)
1255 l = len(data[1]) + len(data[0]) 1261 l = len(data[1]) + len(data[0])
1256 base = chainbase = curr 1262 base = chainbase = curr
1257 1263