mercurial/revlogutils/nodemap.py
changeset 44307 c577bb4a04d4
parent 44034 ab595920de0e
child 44308 5962fd0d1045
--- a/mercurial/revlogutils/nodemap.py	Mon Feb 10 17:31:05 2020 -0500
+++ b/mercurial/revlogutils/nodemap.py	Wed Jan 15 15:47:12 2020 +0100
@@ -7,9 +7,156 @@
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
-from .. import error
+
+import struct
+
+from .. import (
+    error,
+    node as nodemod,
+    pycompat,
+)
 
 
 class NodeMap(dict):
     def __missing__(self, x):
         raise error.RevlogError(b'unknown node: %s' % x)
+
+
+### Nodemap Trie
+#
+# This is a simple reference implementation to compute and persist a nodemap
+# trie. This reference implementation is write only. The python version of this
+# is not expected to be actually used, since it wont provide performance
+# improvement over existing non-persistent C implementation.
+#
+# The nodemap is persisted as Trie using 4bits-address/16-entries block. each
+# revision can be adressed using its node shortest prefix.
+#
+# The trie is stored as a sequence of block. Each block contains 16 entries
+# (signed 64bit integer, big endian). Each entry can be one of the following:
+#
+#  * value >=  0 -> index of sub-block
+#  * value == -1 -> no value
+#  * value <  -1 -> a revision value: rev = -(value+10)
+#
+# The implementation focus on simplicity, not on performance. A Rust
+# implementation should provide a efficient version of the same binary
+# persistence. This reference python implementation is never meant to be
+# extensively use in production.
+
+
+def persistent_data(index):
+    """return the persistent binary form for a nodemap for a given index
+    """
+    trie = _build_trie(index)
+    return _persist_trie(trie)
+
+
+S_BLOCK = struct.Struct(">" + ("l" * 16))
+
+NO_ENTRY = -1
+# rev 0 need to be -2 because 0 is used by block, -1 is a special value.
+REV_OFFSET = 2
+
+
+def _transform_rev(rev):
+    """Return the number used to represent the rev in the tree.
+
+    (or retrieve a rev number from such representation)
+
+    Note that this is an involution, a function equal to its inverse (i.e.
+    which gives the identity when applied to itself).
+    """
+    return -(rev + REV_OFFSET)
+
+
+def _to_int(hex_digit):
+    """turn an hexadecimal digit into a proper integer"""
+    return int(hex_digit, 16)
+
+
+def _build_trie(index):
+    """build a nodemap trie
+
+    The nodemap stores revision number for each unique prefix.
+
+    Each block is a dictionary with keys in `[0, 15]`. Values are either
+    another block or a revision number.
+    """
+    root = {}
+    for rev in range(len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
+def _insert_into_block(index, level, block, current_rev, current_hex):
+    """insert a new revision in a block
+
+    index: the index we are adding revision for
+    level: the depth of the current block in the trie
+    block: the block currently being considered
+    current_rev: the revision number we are adding
+    current_hex: the hexadecimal representation of the of that revision
+    """
+    hex_digit = _to_int(current_hex[level : level + 1])
+    entry = block.get(hex_digit)
+    if entry is None:
+        # no entry, simply store the revision number
+        block[hex_digit] = current_rev
+    elif isinstance(entry, dict):
+        # need to recurse to an underlying block
+        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+    else:
+        # collision with a previously unique prefix, inserting new
+        # vertices to fit both entry.
+        other_hex = nodemod.hex(index[entry][7])
+        other_rev = entry
+        new = {}
+        block[hex_digit] = new
+        _insert_into_block(index, level + 1, new, other_rev, other_hex)
+        _insert_into_block(index, level + 1, new, current_rev, current_hex)
+
+
+def _persist_trie(root):
+    """turn a nodemap trie into persistent binary data
+
+    See `_build_trie` for nodemap trie structure"""
+    block_map = {}
+    chunks = []
+    for tn in _walk_trie(root):
+        block_map[id(tn)] = len(chunks)
+        chunks.append(_persist_block(tn, block_map))
+    return b''.join(chunks)
+
+
+def _walk_trie(block):
+    """yield all the block in a trie
+
+    Children blocks are always yield before their parent block.
+    """
+    for (_, item) in sorted(block.items()):
+        if isinstance(item, dict):
+            for sub_block in _walk_trie(item):
+                yield sub_block
+    yield block
+
+
+def _persist_block(block_node, block_map):
+    """produce persistent binary data for a single block
+
+    Children block are assumed to be already persisted and present in
+    block_map.
+    """
+    data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
+    return S_BLOCK.pack(*data)
+
+
+def _to_value(item, block_map):
+    """persist any value as an integer"""
+    if item is None:
+        return NO_ENTRY
+    elif isinstance(item, dict):
+        return block_map[id(item)]
+    else:
+        return _transform_rev(item)