Mercurial > public > mercurial-scm > hg
comparison contrib/shrink-revlog.py @ 10625:add562abc28a
shrink-revlog: remove branchsort algorithm (it behaves poorly)
author | Benoit Boissinot <benoit.boissinot@ens-lyon.org> |
---|---|
date | Wed, 10 Mar 2010 09:48:15 +0100 |
parents | 432eb853a2c6 |
children | 3fc95c3bc3ba |
comparison
equal
deleted
inserted
replaced
10624:432eb853a2c6 | 10625:add562abc28a |
---|---|
22 import os, tempfile, errno | 22 import os, tempfile, errno |
23 from mercurial import revlog, transaction, node, util | 23 from mercurial import revlog, transaction, node, util |
24 from mercurial import changegroup | 24 from mercurial import changegroup |
25 from mercurial.i18n import _ | 25 from mercurial.i18n import _ |
26 | 26 |
27 def toposort_branchsort(ui, rl): | 27 |
28 | |
29 children = {} | |
30 root = [] | |
31 # build children and roots | |
32 ui.status(_('reading revs\n')) | |
33 try: | |
34 for rev in rl: | |
35 ui.progress(_('reading'), rev, total=len(rl)) | |
36 children[rev] = [] | |
37 parents = [p for p in rl.parentrevs(rev) if p != node.nullrev] | |
38 # in case of duplicate parents | |
39 if len(parents) == 2 and parents[0] == parents[1]: | |
40 del parents[1] | |
41 for p in parents: | |
42 assert p in children | |
43 children[p].append(rev) | |
44 | |
45 if len(parents) == 0: | |
46 root.append(rev) | |
47 finally: | |
48 ui.progress(_('reading'), None, total=len(rl)) | |
49 | |
50 # XXX this is a reimplementation of the 'branchsort' topo sort | |
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate | |
52 # the algorithm | |
53 ui.status(_('sorting revs\n')) | |
54 visit = root | |
55 result = [] | |
56 | |
57 # suboptimal: nodes whose predecessor is not first parent | |
58 suboptimal = 0 | |
59 | |
60 while visit: | |
61 cur = visit.pop(0) | |
62 # revlog will compute delta relative to result[-1], so keep track | |
63 # of nodes where this might result in a large delta | |
64 parents = rl.parentrevs(cur) | |
65 if result: | |
66 if result[-1] != parents[0]: | |
67 suboptimal += 1 | |
68 | |
69 result.append(cur) | |
70 if cur not in children: | |
71 # This only happens if some node's p1 == p2, which can | |
72 # happen in the manifest in certain circumstances. | |
73 continue | |
74 next = [] | |
75 for c in children.pop(cur): | |
76 parents_unseen = [p for p in rl.parentrevs(c) | |
77 if p != node.nullrev and p in children] | |
78 if len(parents_unseen) == 0: | |
79 next.append(c) | |
80 visit = next + visit | |
81 ui.note(_('%d suboptimal nodes\n') % suboptimal) | |
82 return result | |
83 | 28 |
84 def toposort_reversepostorder(ui, rl): | 29 def toposort_reversepostorder(ui, rl): |
85 # postorder of the reverse directed graph | 30 # postorder of the reverse directed graph |
86 | 31 |
87 # map rev to list of parent revs (p2 first) | 32 # map rev to list of parent revs (p2 first) |
238 Rewrites all the entries in some revlog of the current repository | 183 Rewrites all the entries in some revlog of the current repository |
239 (by default, the manifest log) to save space. | 184 (by default, the manifest log) to save space. |
240 | 185 |
241 Different sort algorithms have different performance | 186 Different sort algorithms have different performance |
242 characteristics. Use ``--sort`` to select a sort algorithm so you | 187 characteristics. Use ``--sort`` to select a sort algorithm so you |
243 can determine which works best for your data. The default | 188 can determine which works best for your data. |
244 algorithm, ``branchsort``, works well for workflows with lots of | |
245 active (unmerged) branches, but not so well when all branches have | |
246 been merged and there is only one repository head. | |
247 """ | 189 """ |
248 | 190 |
249 if not repo.local(): | 191 if not repo.local(): |
250 raise util.Abort(_('not a local repository: %s') % repo.root) | 192 raise util.Abort(_('not a local repository: %s') % repo.root) |
251 | 193 |
357 | 299 |
358 cmdtable = { | 300 cmdtable = { |
359 'shrink': (shrink, | 301 'shrink': (shrink, |
360 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')), | 302 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')), |
361 ('n', 'dry-run', None, _('do not shrink, simulate only')), | 303 ('n', 'dry-run', None, _('do not shrink, simulate only')), |
362 ('', 'sort', 'branchsort', 'name of sort algorithm to use'), | 304 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'), |
363 ], | 305 ], |
364 _('hg shrink [--revlog PATH]')) | 306 _('hg shrink [--revlog PATH]')) |
365 } | 307 } |
366 | 308 |
367 if __name__ == "__main__": | 309 if __name__ == "__main__": |