Mercurial > public > mercurial-scm > hg
view tests/test-lrucachedict.py @ 39570:f296c0b366c8
util: lower water mark when removing nodes after cost limit reached
See the inline comment for the reasoning here. This is a pretty
common strategy for garbage collectors, other cache-like primtives.
The performance impact is substantial:
$ hg perflrucachedict --size 4 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 100
! inserts w/ cost limit
! wall 1.659181 comb 1.650000 user 1.650000 sys 0.000000 (best of 7)
! wall 1.722122 comb 1.720000 user 1.720000 sys 0.000000 (best of 6)
! mixed w/ cost limit
! wall 1.139955 comb 1.140000 user 1.140000 sys 0.000000 (best of 9)
! wall 1.182513 comb 1.180000 user 1.180000 sys 0.000000 (best of 9)
$ hg perflrucachedict --size 1000 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 10000
! inserts
! wall 0.679546 comb 0.680000 user 0.680000 sys 0.000000 (best of 15)
! sets
! wall 0.825147 comb 0.830000 user 0.830000 sys 0.000000 (best of 13)
! inserts w/ cost limit
! wall 25.105273 comb 25.080000 user 25.080000 sys 0.000000 (best of 3)
! wall 1.724397 comb 1.720000 user 1.720000 sys 0.000000 (best of 6)
! mixed
! wall 0.807096 comb 0.810000 user 0.810000 sys 0.000000 (best of 13)
! mixed w/ cost limit
! wall 12.104470 comb 12.070000 user 12.070000 sys 0.000000 (best of 3)
! wall 1.190563 comb 1.190000 user 1.190000 sys 0.000000 (best of 9)
$ hg perflrucachedict --size 1000 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 10000 --mixedgetfreq 90
! inserts
! wall 0.711177 comb 0.710000 user 0.710000 sys 0.000000 (best of 14)
! sets
! wall 0.846992 comb 0.850000 user 0.850000 sys 0.000000 (best of 12)
! inserts w/ cost limit
! wall 25.963028 comb 25.960000 user 25.960000 sys 0.000000 (best of 3)
! wall 2.184311 comb 2.180000 user 2.180000 sys 0.000000 (best of 5)
! mixed
! wall 0.728256 comb 0.730000 user 0.730000 sys 0.000000 (best of 14)
! mixed w/ cost limit
! wall 3.174256 comb 3.170000 user 3.170000 sys 0.000000 (best of 4)
! wall 0.773186 comb 0.770000 user 0.770000 sys 0.000000 (best of 13)
$ hg perflrucachedict --size 100000 --gets 1000000 --sets 1000000 --mixed 1000000 --mixedgetfreq 90 --costlimit 5000000
! gets
! wall 1.191368 comb 1.190000 user 1.190000 sys 0.000000 (best of 9)
! wall 1.195304 comb 1.190000 user 1.190000 sys 0.000000 (best of 9)
! inserts
! wall 0.950995 comb 0.950000 user 0.950000 sys 0.000000 (best of 11)
! inserts w/ cost limit
! wall 1.589732 comb 1.590000 user 1.590000 sys 0.000000 (best of 7)
! sets
! wall 1.094941 comb 1.100000 user 1.090000 sys 0.010000 (best of 9)
! mixed
! wall 0.936420 comb 0.940000 user 0.930000 sys 0.010000 (best of 10)
! mixed w/ cost limit
! wall 0.882780 comb 0.870000 user 0.870000 sys 0.000000 (best of 11)
This puts us ~2x slower than caches without cost accounting. And for
read-heavy workloads (the prime use cases for caches), performance is
nearly identical.
In the worst case (pure write workloads with cost accounting enabled),
we're looking at ~1.5us per insert on large caches. That seems "fast
enough."
Differential Revision: https://phab.mercurial-scm.org/D4505
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 06 Sep 2018 18:04:27 -0700 |
parents | 842cd0bdda75 |
children | 8f2c0d1b454c |
line wrap: on
line source
from __future__ import absolute_import, print_function import unittest import silenttestrunner from mercurial import ( util, ) class testlrucachedict(unittest.TestCase): def testsimple(self): d = util.lrucachedict(4) self.assertEqual(d.capacity, 4) d.insert('a', 'va', cost=2) d['b'] = 'vb' d['c'] = 'vc' d.insert('d', 'vd', cost=42) self.assertEqual(d['a'], 'va') self.assertEqual(d['b'], 'vb') self.assertEqual(d['c'], 'vc') self.assertEqual(d['d'], 'vd') self.assertEqual(d.totalcost, 44) # 'a' should be dropped because it was least recently used. d['e'] = 've' self.assertNotIn('a', d) self.assertIsNone(d.get('a')) self.assertEqual(d.totalcost, 42) self.assertEqual(d['b'], 'vb') self.assertEqual(d['c'], 'vc') self.assertEqual(d['d'], 'vd') self.assertEqual(d['e'], 've') # Replacing item with different cost adjusts totalcost. d.insert('e', 've', cost=4) self.assertEqual(d.totalcost, 46) # Touch entries in some order (both get and set). d['e'] d['c'] = 'vc2' d['d'] d['b'] = 'vb2' # 'e' should be dropped now d['f'] = 'vf' self.assertNotIn('e', d) self.assertEqual(d['b'], 'vb2') self.assertEqual(d['c'], 'vc2') self.assertEqual(d['d'], 'vd') self.assertEqual(d['f'], 'vf') d.clear() for key in ('a', 'b', 'c', 'd', 'e', 'f'): self.assertNotIn(key, d) def testunfull(self): d = util.lrucachedict(4) d['a'] = 1 d['b'] = 2 d['a'] d['b'] for key in ('a', 'b'): self.assertIn(key, d) def testcopypartial(self): d = util.lrucachedict(4) d.insert('a', 'va', cost=4) d.insert('b', 'vb', cost=2) dc = d.copy() self.assertEqual(len(dc), 2) self.assertEqual(dc.totalcost, 6) for key in ('a', 'b'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) self.assertEqual(len(d), 2) for key in ('a', 'b'): self.assertIn(key, d) self.assertEqual(d[key], 'v%s' % key) d['c'] = 'vc' del d['b'] self.assertEqual(d.totalcost, 4) dc = d.copy() self.assertEqual(len(dc), 2) self.assertEqual(dc.totalcost, 4) for key in ('a', 'c'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) def testcopyempty(self): d = util.lrucachedict(4) dc = d.copy() self.assertEqual(len(dc), 0) def testcopyfull(self): d = util.lrucachedict(4) d.insert('a', 'va', cost=42) d['b'] = 'vb' d['c'] = 'vc' d['d'] = 'vd' dc = d.copy() for key in ('a', 'b', 'c', 'd'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) self.assertEqual(d.totalcost, 42) self.assertEqual(dc.totalcost, 42) # 'a' should be dropped because it was least recently used. dc['e'] = 've' self.assertNotIn('a', dc) for key in ('b', 'c', 'd', 'e'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) self.assertEqual(d.totalcost, 42) self.assertEqual(dc.totalcost, 0) # Contents and order of original dict should remain unchanged. dc['b'] = 'vb_new' self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a']) for key in ('a', 'b', 'c', 'd'): self.assertEqual(d[key], 'v%s' % key) d = util.lrucachedict(4, maxcost=42) d.insert('a', 'va', cost=5) d.insert('b', 'vb', cost=4) d.insert('c', 'vc', cost=3) dc = d.copy() self.assertEqual(dc.maxcost, 42) self.assertEqual(len(dc), 3) # Max cost can be lowered as part of copy. dc = d.copy(maxcost=10) self.assertEqual(dc.maxcost, 10) self.assertEqual(len(dc), 2) self.assertEqual(dc.totalcost, 7) self.assertIn('b', dc) self.assertIn('c', dc) def testcopydecreasecapacity(self): d = util.lrucachedict(5) d.insert('a', 'va', cost=4) d.insert('b', 'vb', cost=2) d['c'] = 'vc' d['d'] = 'vd' dc = d.copy(2) self.assertEqual(dc.totalcost, 0) for key in ('a', 'b'): self.assertNotIn(key, dc) for key in ('c', 'd'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) dc.insert('e', 've', cost=7) self.assertEqual(dc.totalcost, 7) self.assertNotIn('c', dc) for key in ('d', 'e'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) # Original should remain unchanged. self.assertEqual(d.totalcost, 6) for key in ('a', 'b', 'c', 'd'): self.assertIn(key, d) self.assertEqual(d[key], 'v%s' % key) def testcopyincreasecapacity(self): d = util.lrucachedict(5) d['a'] = 'va' d['b'] = 'vb' d['c'] = 'vc' d['d'] = 'vd' dc = d.copy(6) for key in ('a', 'b', 'c', 'd'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) dc['e'] = 've' dc['f'] = 'vf' for key in ('a', 'b', 'c', 'd', 'e', 'f'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) dc['g'] = 'vg' self.assertNotIn('a', dc) for key in ('b', 'c', 'd', 'e', 'f', 'g'): self.assertIn(key, dc) self.assertEqual(dc[key], 'v%s' % key) # Original should remain unchanged. for key in ('a', 'b', 'c', 'd'): self.assertIn(key, d) self.assertEqual(d[key], 'v%s' % key) def testpopoldest(self): d = util.lrucachedict(4) d.insert('a', 'va', cost=10) d.insert('b', 'vb', cost=5) self.assertEqual(len(d), 2) self.assertEqual(d.popoldest(), ('a', 'va')) self.assertEqual(len(d), 1) self.assertEqual(d.totalcost, 5) self.assertEqual(d.popoldest(), ('b', 'vb')) self.assertEqual(len(d), 0) self.assertEqual(d.totalcost, 0) self.assertIsNone(d.popoldest()) d['a'] = 'va' d['b'] = 'vb' d['c'] = 'vc' d['d'] = 'vd' self.assertEqual(d.popoldest(), ('a', 'va')) self.assertEqual(len(d), 3) for key in ('b', 'c', 'd'): self.assertEqual(d[key], 'v%s' % key) d['a'] = 'va' self.assertEqual(d.popoldest(), ('b', 'vb')) def testmaxcost(self): # Item cost is zero by default. d = util.lrucachedict(6, maxcost=10) d['a'] = 'va' d['b'] = 'vb' d['c'] = 'vc' d['d'] = 'vd' self.assertEqual(len(d), 4) self.assertEqual(d.totalcost, 0) d.clear() # Insertion to exact cost threshold works without eviction. d.insert('a', 'va', cost=6) d.insert('b', 'vb', cost=4) self.assertEqual(len(d), 2) self.assertEqual(d['a'], 'va') self.assertEqual(d['b'], 'vb') # Inserting a new element with 0 cost works. d['c'] = 'vc' self.assertEqual(len(d), 3) # Inserting a new element with cost putting us above high # water mark evicts oldest single item. d.insert('d', 'vd', cost=1) self.assertEqual(len(d), 3) self.assertEqual(d.totalcost, 5) self.assertNotIn('a', d) for key in ('b', 'c', 'd'): self.assertEqual(d[key], 'v%s' % key) # Inserting a new element with enough room for just itself # evicts all items before. d.insert('e', 've', cost=10) self.assertEqual(len(d), 1) self.assertEqual(d.totalcost, 10) self.assertIn('e', d) # Inserting a new element with cost greater than threshold # still retains that item. d.insert('f', 'vf', cost=11) self.assertEqual(len(d), 1) self.assertEqual(d.totalcost, 11) self.assertIn('f', d) # Inserting a new element will evict the last item since it is # too large. d['g'] = 'vg' self.assertEqual(len(d), 1) self.assertEqual(d.totalcost, 0) self.assertIn('g', d) d.clear() d.insert('a', 'va', cost=7) d.insert('b', 'vb', cost=3) self.assertEqual(len(d), 2) # Replacing a value with smaller cost won't result in eviction. d.insert('b', 'vb2', cost=2) self.assertEqual(len(d), 2) # Replacing a value with a higher cost will evict when threshold # exceeded. d.insert('b', 'vb3', cost=4) self.assertEqual(len(d), 1) self.assertNotIn('a', d) def testmaxcostcomplex(self): d = util.lrucachedict(100, maxcost=100) d.insert('a', 'va', cost=9) d.insert('b', 'vb', cost=21) d.insert('c', 'vc', cost=7) d.insert('d', 'vc', cost=50) self.assertEqual(d.totalcost, 87) # Inserting new element should free multiple elements so we hit # low water mark. d.insert('e', 'vd', cost=25) self.assertEqual(len(d), 2) self.assertNotIn('a', d) self.assertNotIn('b', d) self.assertNotIn('c', d) self.assertIn('d', d) self.assertIn('e', d) if __name__ == '__main__': silenttestrunner.main(__name__)