comparison mercurial/revlog.py @ 46709:3d740058b467

sidedata: move to new sidedata storage in revlogv2 The current (experimental) sidedata system uses flagprocessors to signify the presence and store/retrieve sidedata from the raw revlog data. This proved to be quite fragile from an exchange perspective and a lot more complex than simply having a dedicated space in the new revlog format. This change does not handle exchange (ironically), so the test for amend - that uses a bundle - is broken. This functionality is split into the next patches. Differential Revision: https://phab.mercurial-scm.org/D9993
author Rapha?l Gom?s <rgomes@octobus.net>
date Mon, 18 Jan 2021 11:44:51 +0100
parents 913485776542
children 4cd214c9948d
comparison
equal deleted inserted replaced
46708:358737abeeef 46709:3d740058b467
118 _maxinline = 131072 118 _maxinline = 131072
119 _chunksize = 1048576 119 _chunksize = 1048576
120 120
121 # Flag processors for REVIDX_ELLIPSIS. 121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text): 122 def ellipsisreadprocessor(rl, text):
123 return text, False, {} 123 return text, False
124 124
125 125
126 def ellipsiswriteprocessor(rl, text, sidedata): 126 def ellipsiswriteprocessor(rl, text):
127 return text, False 127 return text, False
128 128
129 129
130 def ellipsisrawprocessor(rl, text): 130 def ellipsisrawprocessor(rl, text):
131 return False 131 return False
552 if b'maxdeltachainspan' in opts: 552 if b'maxdeltachainspan' in opts:
553 self._maxdeltachainspan = opts[b'maxdeltachainspan'] 553 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 if self._mmaplargeindex and b'mmapindexthreshold' in opts: 554 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 mmapindexthreshold = opts[b'mmapindexthreshold'] 555 mmapindexthreshold = opts[b'mmapindexthreshold']
556 self.hassidedata = bool(opts.get(b'side-data', False)) 556 self.hassidedata = bool(opts.get(b'side-data', False))
557 if self.hassidedata:
558 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
559 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) 557 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
560 withsparseread = bool(opts.get(b'with-sparse-read', False)) 558 withsparseread = bool(opts.get(b'with-sparse-read', False))
561 # sparse-revlog forces sparse-read 559 # sparse-revlog forces sparse-read
562 self._withsparseread = self._sparserevlog or withsparseread 560 self._withsparseread = self._sparserevlog or withsparseread
563 if b'sparse-read-density-threshold' in opts: 561 if b'sparse-read-density-threshold' in opts:
854 return self.index[rev][0] & 0xFFFF 852 return self.index[rev][0] & 0xFFFF
855 853
856 def length(self, rev): 854 def length(self, rev):
857 return self.index[rev][1] 855 return self.index[rev][1]
858 856
857 def sidedata_length(self, rev):
858 if self.version & 0xFFFF != REVLOGV2:
859 return 0
860 return self.index[rev][9]
861
859 def rawsize(self, rev): 862 def rawsize(self, rev):
860 """return the length of the uncompressed text for a given revision""" 863 """return the length of the uncompressed text for a given revision"""
861 l = self.index[rev][2] 864 l = self.index[rev][2]
862 if l >= 0: 865 if l >= 0:
863 return l 866 return l
915 raise 918 raise
916 919
917 # Derived from index values. 920 # Derived from index values.
918 921
919 def end(self, rev): 922 def end(self, rev):
920 return self.start(rev) + self.length(rev) 923 return self.start(rev) + self.length(rev) + self.sidedata_length(rev)
921 924
922 def parents(self, node): 925 def parents(self, node):
923 i = self.index 926 i = self.index
924 d = i[self.rev(node)] 927 d = i[self.rev(node)]
925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline 928 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1851 if raw: 1854 if raw:
1852 return text, flagutil.processflagsraw(self, text, flags) 1855 return text, flagutil.processflagsraw(self, text, flags)
1853 elif operation == b'read': 1856 elif operation == b'read':
1854 return flagutil.processflagsread(self, text, flags) 1857 return flagutil.processflagsread(self, text, flags)
1855 else: # write operation 1858 else: # write operation
1856 return flagutil.processflagswrite(self, text, flags, None) 1859 return flagutil.processflagswrite(self, text, flags)
1857 1860
1858 def revision(self, nodeorrev, _df=None, raw=False): 1861 def revision(self, nodeorrev, _df=None, raw=False):
1859 """return an uncompressed revision of a given node or revision 1862 """return an uncompressed revision of a given node or revision
1860 number. 1863 number.
1861 1864
1896 1899
1897 # ``rawtext`` is the text as stored inside the revlog. Might be the 1900 # ``rawtext`` is the text as stored inside the revlog. Might be the
1898 # revision or might need to be processed to retrieve the revision. 1901 # revision or might need to be processed to retrieve the revision.
1899 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) 1902 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1900 1903
1904 if self.version & 0xFFFF == REVLOGV2:
1905 if rev is None:
1906 rev = self.rev(node)
1907 sidedata = self._sidedata(rev)
1908 else:
1909 sidedata = {}
1910
1901 if raw and validated: 1911 if raw and validated:
1902 # if we don't want to process the raw text and that raw 1912 # if we don't want to process the raw text and that raw
1903 # text is cached, we can exit early. 1913 # text is cached, we can exit early.
1904 return rawtext, {} 1914 return rawtext, sidedata
1905 if rev is None: 1915 if rev is None:
1906 rev = self.rev(node) 1916 rev = self.rev(node)
1907 # the revlog's flag for this revision 1917 # the revlog's flag for this revision
1908 # (usually alter its state or content) 1918 # (usually alter its state or content)
1909 flags = self.flags(rev) 1919 flags = self.flags(rev)
1910 1920
1911 if validated and flags == REVIDX_DEFAULT_FLAGS: 1921 if validated and flags == REVIDX_DEFAULT_FLAGS:
1912 # no extra flags set, no flag processor runs, text = rawtext 1922 # no extra flags set, no flag processor runs, text = rawtext
1913 return rawtext, {} 1923 return rawtext, sidedata
1914 1924
1915 sidedata = {}
1916 if raw: 1925 if raw:
1917 validatehash = flagutil.processflagsraw(self, rawtext, flags) 1926 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1918 text = rawtext 1927 text = rawtext
1919 else: 1928 else:
1920 try: 1929 r = flagutil.processflagsread(self, rawtext, flags)
1921 r = flagutil.processflagsread(self, rawtext, flags) 1930 text, validatehash = r
1922 except error.SidedataHashError as exc:
1923 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1924 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1925 raise error.RevlogError(msg)
1926 text, validatehash, sidedata = r
1927 if validatehash: 1931 if validatehash:
1928 self.checkhash(text, node, rev=rev) 1932 self.checkhash(text, node, rev=rev)
1929 if not validated: 1933 if not validated:
1930 self._revisioncache = (node, rev, rawtext) 1934 self._revisioncache = (node, rev, rawtext)
1931 1935
1971 bins = bins[1:] 1975 bins = bins[1:]
1972 1976
1973 rawtext = mdiff.patches(basetext, bins) 1977 rawtext = mdiff.patches(basetext, bins)
1974 del basetext # let us have a chance to free memory early 1978 del basetext # let us have a chance to free memory early
1975 return (rev, rawtext, False) 1979 return (rev, rawtext, False)
1980
1981 def _sidedata(self, rev):
1982 """Return the sidedata for a given revision number."""
1983 index_entry = self.index[rev]
1984 sidedata_offset = index_entry[8]
1985 sidedata_size = index_entry[9]
1986
1987 if self._inline:
1988 sidedata_offset += self._io.size * (1 + rev)
1989 if sidedata_size == 0:
1990 return {}
1991
1992 segment = self._getsegment(sidedata_offset, sidedata_size)
1993 sidedata = sidedatautil.deserialize_sidedata(segment)
1994 return sidedata
1976 1995
1977 def rawdata(self, nodeorrev, _df=None): 1996 def rawdata(self, nodeorrev, _df=None):
1978 """return an uncompressed raw data of a given node or revision number. 1997 """return an uncompressed raw data of a given node or revision number.
1979 1998
1980 _df - an existing file handle to read from. (internal-only) 1999 _df - an existing file handle to read from. (internal-only)
2105 _(b"attempted to add linkrev -1 to %s") % self.indexfile 2124 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2106 ) 2125 )
2107 2126
2108 if sidedata is None: 2127 if sidedata is None:
2109 sidedata = {} 2128 sidedata = {}
2110 flags = flags & ~REVIDX_SIDEDATA
2111 elif not self.hassidedata: 2129 elif not self.hassidedata:
2112 raise error.ProgrammingError( 2130 raise error.ProgrammingError(
2113 _(b"trying to add sidedata to a revlog who don't support them") 2131 _(b"trying to add sidedata to a revlog who don't support them")
2114 ) 2132 )
2115 else:
2116 flags |= REVIDX_SIDEDATA
2117 2133
2118 if flags: 2134 if flags:
2119 node = node or self.hash(text, p1, p2) 2135 node = node or self.hash(text, p1, p2)
2120 2136
2121 rawtext, validatehash = flagutil.processflagswrite( 2137 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2122 self, text, flags, sidedata=sidedata
2123 )
2124 2138
2125 # If the flag processor modifies the revision data, ignore any provided 2139 # If the flag processor modifies the revision data, ignore any provided
2126 # cachedelta. 2140 # cachedelta.
2127 if rawtext != text: 2141 if rawtext != text:
2128 cachedelta = None 2142 cachedelta = None
2151 p2, 2165 p2,
2152 node, 2166 node,
2153 flags, 2167 flags,
2154 cachedelta=cachedelta, 2168 cachedelta=cachedelta,
2155 deltacomputer=deltacomputer, 2169 deltacomputer=deltacomputer,
2170 sidedata=sidedata,
2156 ) 2171 )
2157 2172
2158 def addrawrevision( 2173 def addrawrevision(
2159 self, 2174 self,
2160 rawtext, 2175 rawtext,
2164 p2, 2179 p2,
2165 node, 2180 node,
2166 flags, 2181 flags,
2167 cachedelta=None, 2182 cachedelta=None,
2168 deltacomputer=None, 2183 deltacomputer=None,
2184 sidedata=None,
2169 ): 2185 ):
2170 """add a raw revision with known flags, node and parents 2186 """add a raw revision with known flags, node and parents
2171 useful when reusing a revision not stored in this revlog (ex: received 2187 useful when reusing a revision not stored in this revlog (ex: received
2172 over wire, or read from an external bundle). 2188 over wire, or read from an external bundle).
2173 """ 2189 """
2186 flags, 2202 flags,
2187 cachedelta, 2203 cachedelta,
2188 ifh, 2204 ifh,
2189 dfh, 2205 dfh,
2190 deltacomputer=deltacomputer, 2206 deltacomputer=deltacomputer,
2207 sidedata=sidedata,
2191 ) 2208 )
2192 finally: 2209 finally:
2193 if dfh: 2210 if dfh:
2194 dfh.close() 2211 dfh.close()
2195 ifh.close() 2212 ifh.close()
2279 cachedelta, 2296 cachedelta,
2280 ifh, 2297 ifh,
2281 dfh, 2298 dfh,
2282 alwayscache=False, 2299 alwayscache=False,
2283 deltacomputer=None, 2300 deltacomputer=None,
2301 sidedata=None,
2284 ): 2302 ):
2285 """internal function to add revisions to the log 2303 """internal function to add revisions to the log
2286 2304
2287 see addrevision for argument descriptions. 2305 see addrevision for argument descriptions.
2288 2306
2348 2366
2349 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) 2367 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2350 2368
2351 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) 2369 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2352 2370
2371 if sidedata:
2372 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2373 sidedata_offset = offset + deltainfo.deltalen
2374 else:
2375 serialized_sidedata = b""
2376 # Don't store the offset if the sidedata is empty, that way
2377 # we can easily detect empty sidedata and they will be no different
2378 # than ones we manually add.
2379 sidedata_offset = 0
2380
2353 e = ( 2381 e = (
2354 offset_type(offset, flags), 2382 offset_type(offset, flags),
2355 deltainfo.deltalen, 2383 deltainfo.deltalen,
2356 textlen, 2384 textlen,
2357 deltainfo.base, 2385 deltainfo.base,
2358 link, 2386 link,
2359 p1r, 2387 p1r,
2360 p2r, 2388 p2r,
2361 node, 2389 node,
2362 0, 2390 sidedata_offset,
2363 0, 2391 len(serialized_sidedata),
2364 ) 2392 )
2365 2393
2366 if self.version & 0xFFFF != REVLOGV2: 2394 if self.version & 0xFFFF != REVLOGV2:
2367 e = e[:8] 2395 e = e[:8]
2368 2396
2369 self.index.append(e) 2397 self.index.append(e)
2370
2371 entry = self._io.packentry(e, self.node, self.version, curr) 2398 entry = self._io.packentry(e, self.node, self.version, curr)
2372 self._writeentry( 2399 self._writeentry(
2373 transaction, ifh, dfh, entry, deltainfo.data, link, offset 2400 transaction,
2401 ifh,
2402 dfh,
2403 entry,
2404 deltainfo.data,
2405 link,
2406 offset,
2407 serialized_sidedata,
2374 ) 2408 )
2375 2409
2376 rawtext = btext[0] 2410 rawtext = btext[0]
2377 2411
2378 if alwayscache and rawtext is None: 2412 if alwayscache and rawtext is None:
2381 if type(rawtext) == bytes: # only accept immutable objects 2415 if type(rawtext) == bytes: # only accept immutable objects
2382 self._revisioncache = (node, curr, rawtext) 2416 self._revisioncache = (node, curr, rawtext)
2383 self._chainbasecache[curr] = deltainfo.chainbase 2417 self._chainbasecache[curr] = deltainfo.chainbase
2384 return curr 2418 return curr
2385 2419
2386 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset): 2420 def _writeentry(
2421 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2422 ):
2387 # Files opened in a+ mode have inconsistent behavior on various 2423 # Files opened in a+ mode have inconsistent behavior on various
2388 # platforms. Windows requires that a file positioning call be made 2424 # platforms. Windows requires that a file positioning call be made
2389 # when the file handle transitions between reads and writes. See 2425 # when the file handle transitions between reads and writes. See
2390 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other 2426 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2391 # platforms, Python or the platform itself can be buggy. Some versions 2427 # platforms, Python or the platform itself can be buggy. Some versions
2405 transaction.add(self.datafile, offset) 2441 transaction.add(self.datafile, offset)
2406 transaction.add(self.indexfile, curr * len(entry)) 2442 transaction.add(self.indexfile, curr * len(entry))
2407 if data[0]: 2443 if data[0]:
2408 dfh.write(data[0]) 2444 dfh.write(data[0])
2409 dfh.write(data[1]) 2445 dfh.write(data[1])
2446 if sidedata:
2447 dfh.write(sidedata)
2410 ifh.write(entry) 2448 ifh.write(entry)
2411 else: 2449 else:
2412 offset += curr * self._io.size 2450 offset += curr * self._io.size
2413 transaction.add(self.indexfile, offset) 2451 transaction.add(self.indexfile, offset)
2414 ifh.write(entry) 2452 ifh.write(entry)
2415 ifh.write(data[0]) 2453 ifh.write(data[0])
2416 ifh.write(data[1]) 2454 ifh.write(data[1])
2455 if sidedata:
2456 ifh.write(sidedata)
2417 self._enforceinlinesize(transaction, ifh) 2457 self._enforceinlinesize(transaction, ifh)
2418 nodemaputil.setup_persistent_nodemap(transaction, self) 2458 nodemaputil.setup_persistent_nodemap(transaction, self)
2419 2459
2420 def addgroup( 2460 def addgroup(
2421 self, 2461 self,