|
1 # rev_cache.py - caching branch information per revision |
|
2 # |
|
3 # This software may be used and distributed according to the terms of the |
|
4 # GNU General Public License version 2 or any later version. |
|
5 from __future__ import annotations |
|
6 |
|
7 import struct |
|
8 |
|
9 from ..node import ( |
|
10 nullrev, |
|
11 ) |
|
12 |
|
13 from .. import ( |
|
14 encoding, |
|
15 error, |
|
16 util, |
|
17 ) |
|
18 |
|
19 from ..utils import ( |
|
20 stringutil, |
|
21 ) |
|
22 |
|
23 calcsize = struct.calcsize |
|
24 pack_into = struct.pack_into |
|
25 unpack_from = struct.unpack_from |
|
26 |
|
27 |
|
28 # Revision branch info cache |
|
29 |
|
30 _rbcversion = b'-v1' |
|
31 _rbcnames = b'rbc-names' + _rbcversion |
|
32 _rbcrevs = b'rbc-revs' + _rbcversion |
|
33 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open] |
|
34 _rbcrecfmt = b'>4sI' |
|
35 _rbcrecsize = calcsize(_rbcrecfmt) |
|
36 _rbcmininc = 64 * _rbcrecsize |
|
37 _rbcnodelen = 4 |
|
38 _rbcbranchidxmask = 0x7FFFFFFF |
|
39 _rbccloseflag = 0x80000000 |
|
40 |
|
41 |
|
42 class rbcrevs: |
|
43 """a byte string consisting of an immutable prefix followed by a mutable suffix""" |
|
44 |
|
45 def __init__(self, revs): |
|
46 self._prefix = revs |
|
47 self._rest = bytearray() |
|
48 |
|
49 def __len__(self): |
|
50 return len(self._prefix) + len(self._rest) |
|
51 |
|
52 def unpack_record(self, rbcrevidx): |
|
53 if rbcrevidx < len(self._prefix): |
|
54 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx) |
|
55 else: |
|
56 return unpack_from( |
|
57 _rbcrecfmt, |
|
58 util.buffer(self._rest), |
|
59 rbcrevidx - len(self._prefix), |
|
60 ) |
|
61 |
|
62 def make_mutable(self): |
|
63 if len(self._prefix) > 0: |
|
64 entirety = bytearray() |
|
65 entirety[:] = self._prefix |
|
66 entirety.extend(self._rest) |
|
67 self._rest = entirety |
|
68 self._prefix = bytearray() |
|
69 |
|
70 def truncate(self, pos): |
|
71 self.make_mutable() |
|
72 del self._rest[pos:] |
|
73 |
|
74 def pack_into(self, rbcrevidx, node, branchidx): |
|
75 if rbcrevidx < len(self._prefix): |
|
76 self.make_mutable() |
|
77 buf = self._rest |
|
78 start_offset = rbcrevidx - len(self._prefix) |
|
79 end_offset = start_offset + _rbcrecsize |
|
80 |
|
81 if len(self._rest) < end_offset: |
|
82 # bytearray doesn't allocate extra space at least in Python 3.7. |
|
83 # When multiple changesets are added in a row, precise resize would |
|
84 # result in quadratic complexity. Overallocate to compensate by |
|
85 # using the classic doubling technique for dynamic arrays instead. |
|
86 # If there was a gap in the map before, less space will be reserved. |
|
87 self._rest.extend(b'\0' * end_offset) |
|
88 return pack_into( |
|
89 _rbcrecfmt, |
|
90 buf, |
|
91 start_offset, |
|
92 node, |
|
93 branchidx, |
|
94 ) |
|
95 |
|
96 def extend(self, extension): |
|
97 return self._rest.extend(extension) |
|
98 |
|
99 def slice(self, begin, end): |
|
100 if begin < len(self._prefix): |
|
101 acc = bytearray() |
|
102 acc[:] = self._prefix[begin:end] |
|
103 acc.extend( |
|
104 self._rest[begin - len(self._prefix) : end - len(self._prefix)] |
|
105 ) |
|
106 return acc |
|
107 return self._rest[begin - len(self._prefix) : end - len(self._prefix)] |
|
108 |
|
109 |
|
110 class revbranchcache: |
|
111 """Persistent cache, mapping from revision number to branch name and close. |
|
112 This is a low level cache, independent of filtering. |
|
113 |
|
114 Branch names are stored in rbc-names in internal encoding separated by 0. |
|
115 rbc-names is append-only, and each branch name is only stored once and will |
|
116 thus have a unique index. |
|
117 |
|
118 The branch info for each revision is stored in rbc-revs as constant size |
|
119 records. The whole file is read into memory, but it is only 'parsed' on |
|
120 demand. The file is usually append-only but will be truncated if repo |
|
121 modification is detected. |
|
122 The record for each revision contains the first 4 bytes of the |
|
123 corresponding node hash, and the record is only used if it still matches. |
|
124 Even a completely trashed rbc-revs fill thus still give the right result |
|
125 while converging towards full recovery ... assuming no incorrectly matching |
|
126 node hashes. |
|
127 The record also contains 4 bytes where 31 bits contains the index of the |
|
128 branch and the last bit indicate that it is a branch close commit. |
|
129 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i |
|
130 and will grow with it but be 1/8th of its size. |
|
131 """ |
|
132 |
|
133 def __init__(self, repo, readonly=True): |
|
134 assert repo.filtername is None |
|
135 self._repo = repo |
|
136 self._names = [] # branch names in local encoding with static index |
|
137 self._rbcrevs = rbcrevs(bytearray()) |
|
138 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen |
|
139 try: |
|
140 bndata = repo.cachevfs.read(_rbcnames) |
|
141 self._rbcsnameslen = len(bndata) # for verification before writing |
|
142 if bndata: |
|
143 self._names = [ |
|
144 encoding.tolocal(bn) for bn in bndata.split(b'\0') |
|
145 ] |
|
146 except (IOError, OSError): |
|
147 if readonly: |
|
148 # don't try to use cache - fall back to the slow path |
|
149 self.branchinfo = self._branchinfo |
|
150 |
|
151 if self._names: |
|
152 try: |
|
153 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap') |
|
154 with repo.cachevfs(_rbcrevs) as fp: |
|
155 if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs): |
|
156 data = util.buffer(util.mmapread(fp)) |
|
157 else: |
|
158 data = fp.read() |
|
159 self._rbcrevs = rbcrevs(data) |
|
160 except (IOError, OSError) as inst: |
|
161 repo.ui.debug( |
|
162 b"couldn't read revision branch cache: %s\n" |
|
163 % stringutil.forcebytestr(inst) |
|
164 ) |
|
165 # remember number of good records on disk |
|
166 self._rbcrevslen = min( |
|
167 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog) |
|
168 ) |
|
169 if self._rbcrevslen == 0: |
|
170 self._names = [] |
|
171 self._rbcnamescount = len(self._names) # number of names read at |
|
172 # _rbcsnameslen |
|
173 |
|
174 def _clear(self): |
|
175 self._rbcsnameslen = 0 |
|
176 del self._names[:] |
|
177 self._rbcnamescount = 0 |
|
178 self._rbcrevslen = len(self._repo.changelog) |
|
179 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize)) |
|
180 util.clearcachedproperty(self, b'_namesreverse') |
|
181 |
|
182 @util.propertycache |
|
183 def _namesreverse(self): |
|
184 return {b: r for r, b in enumerate(self._names)} |
|
185 |
|
186 def branchinfo(self, rev): |
|
187 """Return branch name and close flag for rev, using and updating |
|
188 persistent cache.""" |
|
189 changelog = self._repo.changelog |
|
190 rbcrevidx = rev * _rbcrecsize |
|
191 |
|
192 # avoid negative index, changelog.read(nullrev) is fast without cache |
|
193 if rev == nullrev: |
|
194 return changelog.branchinfo(rev) |
|
195 |
|
196 # if requested rev isn't allocated, grow and cache the rev info |
|
197 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: |
|
198 return self._branchinfo(rev) |
|
199 |
|
200 # fast path: extract data from cache, use it if node is matching |
|
201 reponode = changelog.node(rev)[:_rbcnodelen] |
|
202 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx) |
|
203 close = bool(branchidx & _rbccloseflag) |
|
204 if close: |
|
205 branchidx &= _rbcbranchidxmask |
|
206 if cachenode == b'\0\0\0\0': |
|
207 pass |
|
208 elif cachenode == reponode: |
|
209 try: |
|
210 return self._names[branchidx], close |
|
211 except IndexError: |
|
212 # recover from invalid reference to unknown branch |
|
213 self._repo.ui.debug( |
|
214 b"referenced branch names not found" |
|
215 b" - rebuilding revision branch cache from scratch\n" |
|
216 ) |
|
217 self._clear() |
|
218 else: |
|
219 # rev/node map has changed, invalidate the cache from here up |
|
220 self._repo.ui.debug( |
|
221 b"history modification detected - truncating " |
|
222 b"revision branch cache to revision %d\n" % rev |
|
223 ) |
|
224 truncate = rbcrevidx + _rbcrecsize |
|
225 self._rbcrevs.truncate(truncate) |
|
226 self._rbcrevslen = min(self._rbcrevslen, truncate) |
|
227 |
|
228 # fall back to slow path and make sure it will be written to disk |
|
229 return self._branchinfo(rev) |
|
230 |
|
231 def _branchinfo(self, rev): |
|
232 """Retrieve branch info from changelog and update _rbcrevs""" |
|
233 changelog = self._repo.changelog |
|
234 b, close = changelog.branchinfo(rev) |
|
235 if b in self._namesreverse: |
|
236 branchidx = self._namesreverse[b] |
|
237 else: |
|
238 branchidx = len(self._names) |
|
239 self._names.append(b) |
|
240 self._namesreverse[b] = branchidx |
|
241 reponode = changelog.node(rev) |
|
242 if close: |
|
243 branchidx |= _rbccloseflag |
|
244 self._setcachedata(rev, reponode, branchidx) |
|
245 return b, close |
|
246 |
|
247 def setdata(self, rev, changelogrevision): |
|
248 """add new data information to the cache""" |
|
249 branch, close = changelogrevision.branchinfo |
|
250 |
|
251 if branch in self._namesreverse: |
|
252 branchidx = self._namesreverse[branch] |
|
253 else: |
|
254 branchidx = len(self._names) |
|
255 self._names.append(branch) |
|
256 self._namesreverse[branch] = branchidx |
|
257 if close: |
|
258 branchidx |= _rbccloseflag |
|
259 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx) |
|
260 # If no cache data were readable (non exists, bad permission, etc) |
|
261 # the cache was bypassing itself by setting: |
|
262 # |
|
263 # self.branchinfo = self._branchinfo |
|
264 # |
|
265 # Since we now have data in the cache, we need to drop this bypassing. |
|
266 if 'branchinfo' in vars(self): |
|
267 del self.branchinfo |
|
268 |
|
269 def _setcachedata(self, rev, node, branchidx): |
|
270 """Writes the node's branch data to the in-memory cache data.""" |
|
271 if rev == nullrev: |
|
272 return |
|
273 rbcrevidx = rev * _rbcrecsize |
|
274 self._rbcrevs.pack_into(rbcrevidx, node, branchidx) |
|
275 self._rbcrevslen = min(self._rbcrevslen, rev) |
|
276 |
|
277 tr = self._repo.currenttransaction() |
|
278 if tr: |
|
279 tr.addfinalize(b'write-revbranchcache', self.write) |
|
280 |
|
281 def write(self, tr=None): |
|
282 """Save branch cache if it is dirty.""" |
|
283 repo = self._repo |
|
284 wlock = None |
|
285 step = b'' |
|
286 try: |
|
287 # write the new names |
|
288 if self._rbcnamescount < len(self._names): |
|
289 wlock = repo.wlock(wait=False) |
|
290 step = b' names' |
|
291 self._writenames(repo) |
|
292 |
|
293 # write the new revs |
|
294 start = self._rbcrevslen * _rbcrecsize |
|
295 if start != len(self._rbcrevs): |
|
296 step = b'' |
|
297 if wlock is None: |
|
298 wlock = repo.wlock(wait=False) |
|
299 self._writerevs(repo, start) |
|
300 |
|
301 except (IOError, OSError, error.Abort, error.LockError) as inst: |
|
302 repo.ui.debug( |
|
303 b"couldn't write revision branch cache%s: %s\n" |
|
304 % (step, stringutil.forcebytestr(inst)) |
|
305 ) |
|
306 finally: |
|
307 if wlock is not None: |
|
308 wlock.release() |
|
309 |
|
310 def _writenames(self, repo): |
|
311 """write the new branch names to revbranchcache""" |
|
312 if self._rbcnamescount != 0: |
|
313 f = repo.cachevfs.open(_rbcnames, b'ab') |
|
314 if f.tell() == self._rbcsnameslen: |
|
315 f.write(b'\0') |
|
316 else: |
|
317 f.close() |
|
318 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames) |
|
319 self._rbcnamescount = 0 |
|
320 self._rbcrevslen = 0 |
|
321 if self._rbcnamescount == 0: |
|
322 # before rewriting names, make sure references are removed |
|
323 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True) |
|
324 f = repo.cachevfs.open(_rbcnames, b'wb') |
|
325 f.write( |
|
326 b'\0'.join( |
|
327 encoding.fromlocal(b) |
|
328 for b in self._names[self._rbcnamescount :] |
|
329 ) |
|
330 ) |
|
331 self._rbcsnameslen = f.tell() |
|
332 f.close() |
|
333 self._rbcnamescount = len(self._names) |
|
334 |
|
335 def _writerevs(self, repo, start): |
|
336 """write the new revs to revbranchcache""" |
|
337 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize) |
|
338 with repo.cachevfs.open(_rbcrevs, b'ab') as f: |
|
339 if f.tell() != start: |
|
340 repo.ui.debug( |
|
341 b"truncating cache/%s to %d\n" % (_rbcrevs, start) |
|
342 ) |
|
343 f.seek(start) |
|
344 if f.tell() != start: |
|
345 start = 0 |
|
346 f.seek(start) |
|
347 f.truncate() |
|
348 end = revs * _rbcrecsize |
|
349 f.write(self._rbcrevs.slice(start, end)) |
|
350 self._rbcrevslen = revs |