Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlogutils/nodemap.py @ 44350:c577bb4a04d4
nodemap: have some python code writing a nodemap in persistent binary form
This python code aims to be as "simple" as possible. It is a reference
implementation of the data we are going to write on disk (and possibly,
later a way for pure python install to make sure the on disk data are up to
date).
It is not optimized for performance and rebuild the full data structure from
the index every time.
This is a stepping stone toward a persistent nodemap on disk.
Differential Revision: https://phab.mercurial-scm.org/D7834
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 15 Jan 2020 15:47:12 +0100 |
parents | ab595920de0e |
children | 5962fd0d1045 |
comparison
equal
deleted
inserted
replaced
44349:a0ec05d93c8e | 44350:c577bb4a04d4 |
---|---|
5 # | 5 # |
6 # This software may be used and distributed according to the terms of the | 6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2 or any later version. | 7 # GNU General Public License version 2 or any later version. |
8 | 8 |
9 from __future__ import absolute_import | 9 from __future__ import absolute_import |
10 from .. import error | 10 |
11 import struct | |
12 | |
13 from .. import ( | |
14 error, | |
15 node as nodemod, | |
16 pycompat, | |
17 ) | |
11 | 18 |
12 | 19 |
13 class NodeMap(dict): | 20 class NodeMap(dict): |
14 def __missing__(self, x): | 21 def __missing__(self, x): |
15 raise error.RevlogError(b'unknown node: %s' % x) | 22 raise error.RevlogError(b'unknown node: %s' % x) |
23 | |
24 | |
25 ### Nodemap Trie | |
26 # | |
27 # This is a simple reference implementation to compute and persist a nodemap | |
28 # trie. This reference implementation is write only. The python version of this | |
29 # is not expected to be actually used, since it wont provide performance | |
30 # improvement over existing non-persistent C implementation. | |
31 # | |
32 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each | |
33 # revision can be adressed using its node shortest prefix. | |
34 # | |
35 # The trie is stored as a sequence of block. Each block contains 16 entries | |
36 # (signed 64bit integer, big endian). Each entry can be one of the following: | |
37 # | |
38 # * value >= 0 -> index of sub-block | |
39 # * value == -1 -> no value | |
40 # * value < -1 -> a revision value: rev = -(value+10) | |
41 # | |
42 # The implementation focus on simplicity, not on performance. A Rust | |
43 # implementation should provide a efficient version of the same binary | |
44 # persistence. This reference python implementation is never meant to be | |
45 # extensively use in production. | |
46 | |
47 | |
48 def persistent_data(index): | |
49 """return the persistent binary form for a nodemap for a given index | |
50 """ | |
51 trie = _build_trie(index) | |
52 return _persist_trie(trie) | |
53 | |
54 | |
55 S_BLOCK = struct.Struct(">" + ("l" * 16)) | |
56 | |
57 NO_ENTRY = -1 | |
58 # rev 0 need to be -2 because 0 is used by block, -1 is a special value. | |
59 REV_OFFSET = 2 | |
60 | |
61 | |
62 def _transform_rev(rev): | |
63 """Return the number used to represent the rev in the tree. | |
64 | |
65 (or retrieve a rev number from such representation) | |
66 | |
67 Note that this is an involution, a function equal to its inverse (i.e. | |
68 which gives the identity when applied to itself). | |
69 """ | |
70 return -(rev + REV_OFFSET) | |
71 | |
72 | |
73 def _to_int(hex_digit): | |
74 """turn an hexadecimal digit into a proper integer""" | |
75 return int(hex_digit, 16) | |
76 | |
77 | |
78 def _build_trie(index): | |
79 """build a nodemap trie | |
80 | |
81 The nodemap stores revision number for each unique prefix. | |
82 | |
83 Each block is a dictionary with keys in `[0, 15]`. Values are either | |
84 another block or a revision number. | |
85 """ | |
86 root = {} | |
87 for rev in range(len(index)): | |
88 hex = nodemod.hex(index[rev][7]) | |
89 _insert_into_block(index, 0, root, rev, hex) | |
90 return root | |
91 | |
92 | |
93 def _insert_into_block(index, level, block, current_rev, current_hex): | |
94 """insert a new revision in a block | |
95 | |
96 index: the index we are adding revision for | |
97 level: the depth of the current block in the trie | |
98 block: the block currently being considered | |
99 current_rev: the revision number we are adding | |
100 current_hex: the hexadecimal representation of the of that revision | |
101 """ | |
102 hex_digit = _to_int(current_hex[level : level + 1]) | |
103 entry = block.get(hex_digit) | |
104 if entry is None: | |
105 # no entry, simply store the revision number | |
106 block[hex_digit] = current_rev | |
107 elif isinstance(entry, dict): | |
108 # need to recurse to an underlying block | |
109 _insert_into_block(index, level + 1, entry, current_rev, current_hex) | |
110 else: | |
111 # collision with a previously unique prefix, inserting new | |
112 # vertices to fit both entry. | |
113 other_hex = nodemod.hex(index[entry][7]) | |
114 other_rev = entry | |
115 new = {} | |
116 block[hex_digit] = new | |
117 _insert_into_block(index, level + 1, new, other_rev, other_hex) | |
118 _insert_into_block(index, level + 1, new, current_rev, current_hex) | |
119 | |
120 | |
121 def _persist_trie(root): | |
122 """turn a nodemap trie into persistent binary data | |
123 | |
124 See `_build_trie` for nodemap trie structure""" | |
125 block_map = {} | |
126 chunks = [] | |
127 for tn in _walk_trie(root): | |
128 block_map[id(tn)] = len(chunks) | |
129 chunks.append(_persist_block(tn, block_map)) | |
130 return b''.join(chunks) | |
131 | |
132 | |
133 def _walk_trie(block): | |
134 """yield all the block in a trie | |
135 | |
136 Children blocks are always yield before their parent block. | |
137 """ | |
138 for (_, item) in sorted(block.items()): | |
139 if isinstance(item, dict): | |
140 for sub_block in _walk_trie(item): | |
141 yield sub_block | |
142 yield block | |
143 | |
144 | |
145 def _persist_block(block_node, block_map): | |
146 """produce persistent binary data for a single block | |
147 | |
148 Children block are assumed to be already persisted and present in | |
149 block_map. | |
150 """ | |
151 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) | |
152 return S_BLOCK.pack(*data) | |
153 | |
154 | |
155 def _to_value(item, block_map): | |
156 """persist any value as an integer""" | |
157 if item is None: | |
158 return NO_ENTRY | |
159 elif isinstance(item, dict): | |
160 return block_map[id(item)] | |
161 else: | |
162 return _transform_rev(item) |