comparison contrib/shrink-revlog.py @ 10625:add562abc28a

shrink-revlog: remove branchsort algorithm (it behaves poorly)
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Wed, 10 Mar 2010 09:48:15 +0100
parents 432eb853a2c6
children 3fc95c3bc3ba
comparison
equal deleted inserted replaced
10624:432eb853a2c6 10625:add562abc28a
22 import os, tempfile, errno 22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util 23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup 24 from mercurial import changegroup
25 from mercurial.i18n import _ 25 from mercurial.i18n import _
26 26
27 def toposort_branchsort(ui, rl): 27
28
29 children = {}
30 root = []
31 # build children and roots
32 ui.status(_('reading revs\n'))
33 try:
34 for rev in rl:
35 ui.progress(_('reading'), rev, total=len(rl))
36 children[rev] = []
37 parents = [p for p in rl.parentrevs(rev) if p != node.nullrev]
38 # in case of duplicate parents
39 if len(parents) == 2 and parents[0] == parents[1]:
40 del parents[1]
41 for p in parents:
42 assert p in children
43 children[p].append(rev)
44
45 if len(parents) == 0:
46 root.append(rev)
47 finally:
48 ui.progress(_('reading'), None, total=len(rl))
49
50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 # the algorithm
53 ui.status(_('sorting revs\n'))
54 visit = root
55 result = []
56
57 # suboptimal: nodes whose predecessor is not first parent
58 suboptimal = 0
59
60 while visit:
61 cur = visit.pop(0)
62 # revlog will compute delta relative to result[-1], so keep track
63 # of nodes where this might result in a large delta
64 parents = rl.parentrevs(cur)
65 if result:
66 if result[-1] != parents[0]:
67 suboptimal += 1
68
69 result.append(cur)
70 if cur not in children:
71 # This only happens if some node's p1 == p2, which can
72 # happen in the manifest in certain circumstances.
73 continue
74 next = []
75 for c in children.pop(cur):
76 parents_unseen = [p for p in rl.parentrevs(c)
77 if p != node.nullrev and p in children]
78 if len(parents_unseen) == 0:
79 next.append(c)
80 visit = next + visit
81 ui.note(_('%d suboptimal nodes\n') % suboptimal)
82 return result
83 28
84 def toposort_reversepostorder(ui, rl): 29 def toposort_reversepostorder(ui, rl):
85 # postorder of the reverse directed graph 30 # postorder of the reverse directed graph
86 31
87 # map rev to list of parent revs (p2 first) 32 # map rev to list of parent revs (p2 first)
238 Rewrites all the entries in some revlog of the current repository 183 Rewrites all the entries in some revlog of the current repository
239 (by default, the manifest log) to save space. 184 (by default, the manifest log) to save space.
240 185
241 Different sort algorithms have different performance 186 Different sort algorithms have different performance
242 characteristics. Use ``--sort`` to select a sort algorithm so you 187 characteristics. Use ``--sort`` to select a sort algorithm so you
243 can determine which works best for your data. The default 188 can determine which works best for your data.
244 algorithm, ``branchsort``, works well for workflows with lots of
245 active (unmerged) branches, but not so well when all branches have
246 been merged and there is only one repository head.
247 """ 189 """
248 190
249 if not repo.local(): 191 if not repo.local():
250 raise util.Abort(_('not a local repository: %s') % repo.root) 192 raise util.Abort(_('not a local repository: %s') % repo.root)
251 193
357 299
358 cmdtable = { 300 cmdtable = {
359 'shrink': (shrink, 301 'shrink': (shrink,
360 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')), 302 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
361 ('n', 'dry-run', None, _('do not shrink, simulate only')), 303 ('n', 'dry-run', None, _('do not shrink, simulate only')),
362 ('', 'sort', 'branchsort', 'name of sort algorithm to use'), 304 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
363 ], 305 ],
364 _('hg shrink [--revlog PATH]')) 306 _('hg shrink [--revlog PATH]'))
365 } 307 }
366 308
367 if __name__ == "__main__": 309 if __name__ == "__main__":