comparison mercurial/revlog.py @ 34296:3c9691728237

revlog: add option to mmap revlog index Following on from Jun Wu's patch last October[1], we have found that using mmap for the revlog index in repos with large revlogs gives a noticable performance improvment (~110ms on each hg invocation), particularly for commands that don't touch the index very much. This changeset adds this as an option, activated by a new experimental config option so that it can be enabled on a per-repo basis. The configuration option specifies an index size threshold at which Mercurial will switch to using mmap to access the index. If the configuration option is not specified, the default remains to load the full file, which seems to be the best option for smaller repos. Some initial performance numbers for average of 5 invocations of `hg log -l 5` for different cache states: | Repo: | HG | FB | |---|---|---| | Index size: | 2.3MB | much bigger | | read (warm): | 237ms | 432ms | | mmap (warm): | 227ms | 321ms | | | (-3%) | (-26%) | | read (cold): | 397ms | 696ms | | mmap (cold): | 410ms | 888ms | | | (+3%) | (+28%) | [1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2016-October/088737.html Test Plan: `hg log --config experimental.mmapindex=true` Differential Revision: https://phab.mercurial-scm.org/D477
author Mark Thomas <mbthomas@fb.com>
date Wed, 13 Sep 2017 17:26:26 +0000
parents 1db9abf407c5
children 7891d243d821
comparison
equal deleted inserted replaced
34295:3bb2a9f25fe9 34296:3c9691728237
266 remove data, and can use some simple techniques to avoid the need 266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading. 267 for locking while reading.
268 268
269 If checkambig, indexfile is opened with checkambig=True at 269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity. 270 writing, to avoid file stat ambiguity.
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
271 """ 275 """
272 def __init__(self, opener, indexfile, datafile=None, checkambig=False): 276 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
277 mmaplargeindex=False):
273 """ 278 """
274 create a revlog object 279 create a revlog object
275 280
276 opener is a function that abstracts the file opening operation 281 opener is a function that abstracts the file opening operation
277 and can be used to implement COW semantics or the like. 282 and can be used to implement COW semantics or the like.
299 self._nodecache = {nullid: nullrev} 304 self._nodecache = {nullid: nullrev}
300 self._nodepos = None 305 self._nodepos = None
301 self._compengine = 'zlib' 306 self._compengine = 'zlib'
302 self._maxdeltachainspan = -1 307 self._maxdeltachainspan = -1
303 308
309 mmapindexthreshold = None
304 v = REVLOG_DEFAULT_VERSION 310 v = REVLOG_DEFAULT_VERSION
305 opts = getattr(opener, 'options', None) 311 opts = getattr(opener, 'options', None)
306 if opts is not None: 312 if opts is not None:
307 if 'revlogv2' in opts: 313 if 'revlogv2' in opts:
308 # version 2 revlogs always use generaldelta. 314 # version 2 revlogs always use generaldelta.
321 self._lazydeltabase = bool(opts.get('lazydeltabase', False)) 327 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
322 if 'compengine' in opts: 328 if 'compengine' in opts:
323 self._compengine = opts['compengine'] 329 self._compengine = opts['compengine']
324 if 'maxdeltachainspan' in opts: 330 if 'maxdeltachainspan' in opts:
325 self._maxdeltachainspan = opts['maxdeltachainspan'] 331 self._maxdeltachainspan = opts['maxdeltachainspan']
332 if mmaplargeindex and 'mmapindexthreshold' in opts:
333 mmapindexthreshold = opts['mmapindexthreshold']
326 334
327 if self._chunkcachesize <= 0: 335 if self._chunkcachesize <= 0:
328 raise RevlogError(_('revlog chunk cache size %r is not greater ' 336 raise RevlogError(_('revlog chunk cache size %r is not greater '
329 'than 0') % self._chunkcachesize) 337 'than 0') % self._chunkcachesize)
330 elif self._chunkcachesize & (self._chunkcachesize - 1): 338 elif self._chunkcachesize & (self._chunkcachesize - 1):
333 341
334 indexdata = '' 342 indexdata = ''
335 self._initempty = True 343 self._initempty = True
336 try: 344 try:
337 f = self.opener(self.indexfile) 345 f = self.opener(self.indexfile)
338 indexdata = f.read() 346 if (mmapindexthreshold is not None and
347 self.opener.fstat(f).st_size >= mmapindexthreshold):
348 indexdata = util.buffer(util.mmapread(f))
349 else:
350 indexdata = f.read()
339 f.close() 351 f.close()
340 if len(indexdata) > 0: 352 if len(indexdata) > 0:
341 v = versionformat_unpack(indexdata[:4])[0] 353 v = versionformat_unpack(indexdata[:4])[0]
342 self._initempty = False 354 self._initempty = False
343 except IOError as inst: 355 except IOError as inst: