comparison mercurial/revlogutils/nodemap.py @ 44311:2b72c4ff8ed1

nodemap: use an intermediate "docket" file to carry small metadata This intermediate file will make mmapping, transaction and content validation easier. (Most of this usefulness will arrive gradually in later changeset). In particular it will become very useful to append new data are the end of raw file instead of rewriting on the file on each transaction. See in code comments for details. Differential Revision: https://phab.mercurial-scm.org/D7838
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 15 Jan 2020 15:47:50 +0100
parents daad3aace942
children 563dfdfd01a4
comparison
equal deleted inserted replaced
44310:daad3aace942 44311:2b72c4ff8ed1
6 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version. 7 # GNU General Public License version 2 or any later version.
8 8
9 from __future__ import absolute_import 9 from __future__ import absolute_import
10 10
11 import os
11 import struct 12 import struct
12 13
13 from .. import ( 14 from .. import (
14 error, 15 error,
15 node as nodemod, 16 node as nodemod,
24 25
25 def persisted_data(revlog): 26 def persisted_data(revlog):
26 """read the nodemap for a revlog from disk""" 27 """read the nodemap for a revlog from disk"""
27 if revlog.nodemap_file is None: 28 if revlog.nodemap_file is None:
28 return None 29 return None
29 return revlog.opener.tryread(revlog.nodemap_file) 30 pdata = revlog.opener.tryread(revlog.nodemap_file)
31 if not pdata:
32 return None
33 offset = 0
34 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
35 if version != ONDISK_VERSION:
36 return None
37 offset += S_VERSION.size
38 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
39 offset += S_HEADER.size
40 uid = pdata[offset : offset + uuid_size]
41
42 filename = _rawdata_filepath(revlog, uid)
43 return revlog.opener.tryread(filename)
30 44
31 45
32 def setup_persistent_nodemap(tr, revlog): 46 def setup_persistent_nodemap(tr, revlog):
33 """Install whatever is needed transaction side to persist a nodemap on disk 47 """Install whatever is needed transaction side to persist a nodemap on disk
34 48
53 ) 67 )
54 if revlog.nodemap_file is None: 68 if revlog.nodemap_file is None:
55 msg = "calling persist nodemap on a revlog without the feature enableb" 69 msg = "calling persist nodemap on a revlog without the feature enableb"
56 raise error.ProgrammingError(msg) 70 raise error.ProgrammingError(msg)
57 data = persistent_data(revlog.index) 71 data = persistent_data(revlog.index)
72 uid = _make_uid()
73 datafile = _rawdata_filepath(revlog, uid)
58 # EXP-TODO: if this is a cache, this should use a cache vfs, not a 74 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
59 # store vfs 75 # store vfs
60 with revlog.opener(revlog.nodemap_file, b'w') as f: 76 with revlog.opener(datafile, b'w') as fd:
61 f.write(data) 77 fd.write(data)
78 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
79 # store vfs
80 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
81 fp.write(_serialize_docket(uid))
62 # EXP-TODO: if the transaction abort, we should remove the new data and 82 # EXP-TODO: if the transaction abort, we should remove the new data and
63 # reinstall the old one. (This will be simpler when the file format get a 83 # reinstall the old one.
64 # bit more advanced) 84
85
86 ### Nodemap docket file
87 #
88 # The nodemap data are stored on disk using 2 files:
89 #
90 # * a raw data files containing a persistent nodemap
91 # (see `Nodemap Trie` section)
92 #
93 # * a small "docket" file containing medatadata
94 #
95 # While the nodemap data can be multiple tens of megabytes, the "docket" is
96 # small, it is easy to update it automatically or to duplicated its content
97 # during a transaction.
98 #
99 # Multiple raw data can exist at the same time (The currently valid one and a
100 # new one beind used by an in progress transaction). To accomodate this, the
101 # filename hosting the raw data has a variable parts. The exact filename is
102 # specified inside the "docket" file.
103 #
104 # The docket file contains information to find, qualify and validate the raw
105 # data. Its content is currently very light, but it will expand as the on disk
106 # nodemap gains the necessary features to be used in production.
107
108 # version 0 is experimental, no BC garantee, do no use outside of tests.
109 ONDISK_VERSION = 0
110
111 S_VERSION = struct.Struct(">B")
112 S_HEADER = struct.Struct(">B")
113
114 ID_SIZE = 8
115
116
117 def _make_uid():
118 """return a new unique identifier.
119
120 The identifier is random and composed of ascii characters."""
121 return nodemod.hex(os.urandom(ID_SIZE))
122
123
124 def _serialize_docket(uid):
125 """return serialized bytes for a docket using the passed uid"""
126 data = []
127 data.append(S_VERSION.pack(ONDISK_VERSION))
128 data.append(S_HEADER.pack(len(uid)))
129 data.append(uid)
130 return b''.join(data)
131
132
133 def _rawdata_filepath(revlog, uid):
134 """The (vfs relative) nodemap's rawdata file for a given uid"""
135 prefix = revlog.nodemap_file[:-2]
136 return b"%s-%s.nd" % (prefix, uid)
65 137
66 138
67 ### Nodemap Trie 139 ### Nodemap Trie
68 # 140 #
69 # This is a simple reference implementation to compute and persist a nodemap 141 # This is a simple reference implementation to compute and persist a nodemap