Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 46709:3d740058b467
sidedata: move to new sidedata storage in revlogv2
The current (experimental) sidedata system uses flagprocessors to signify the
presence and store/retrieve sidedata from the raw revlog data. This proved to be
quite fragile from an exchange perspective and a lot more complex than simply
having a dedicated space in the new revlog format.
This change does not handle exchange (ironically), so the test for amend - that
uses a bundle - is broken. This functionality is split into the next patches.
Differential Revision: https://phab.mercurial-scm.org/D9993
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Mon, 18 Jan 2021 11:44:51 +0100 |
parents | 913485776542 |
children | 4cd214c9948d |
comparison
equal
deleted
inserted
replaced
46708:358737abeeef | 46709:3d740058b467 |
---|---|
118 _maxinline = 131072 | 118 _maxinline = 131072 |
119 _chunksize = 1048576 | 119 _chunksize = 1048576 |
120 | 120 |
121 # Flag processors for REVIDX_ELLIPSIS. | 121 # Flag processors for REVIDX_ELLIPSIS. |
122 def ellipsisreadprocessor(rl, text): | 122 def ellipsisreadprocessor(rl, text): |
123 return text, False, {} | 123 return text, False |
124 | 124 |
125 | 125 |
126 def ellipsiswriteprocessor(rl, text, sidedata): | 126 def ellipsiswriteprocessor(rl, text): |
127 return text, False | 127 return text, False |
128 | 128 |
129 | 129 |
130 def ellipsisrawprocessor(rl, text): | 130 def ellipsisrawprocessor(rl, text): |
131 return False | 131 return False |
552 if b'maxdeltachainspan' in opts: | 552 if b'maxdeltachainspan' in opts: |
553 self._maxdeltachainspan = opts[b'maxdeltachainspan'] | 553 self._maxdeltachainspan = opts[b'maxdeltachainspan'] |
554 if self._mmaplargeindex and b'mmapindexthreshold' in opts: | 554 if self._mmaplargeindex and b'mmapindexthreshold' in opts: |
555 mmapindexthreshold = opts[b'mmapindexthreshold'] | 555 mmapindexthreshold = opts[b'mmapindexthreshold'] |
556 self.hassidedata = bool(opts.get(b'side-data', False)) | 556 self.hassidedata = bool(opts.get(b'side-data', False)) |
557 if self.hassidedata: | |
558 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors | |
559 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) | 557 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) |
560 withsparseread = bool(opts.get(b'with-sparse-read', False)) | 558 withsparseread = bool(opts.get(b'with-sparse-read', False)) |
561 # sparse-revlog forces sparse-read | 559 # sparse-revlog forces sparse-read |
562 self._withsparseread = self._sparserevlog or withsparseread | 560 self._withsparseread = self._sparserevlog or withsparseread |
563 if b'sparse-read-density-threshold' in opts: | 561 if b'sparse-read-density-threshold' in opts: |
854 return self.index[rev][0] & 0xFFFF | 852 return self.index[rev][0] & 0xFFFF |
855 | 853 |
856 def length(self, rev): | 854 def length(self, rev): |
857 return self.index[rev][1] | 855 return self.index[rev][1] |
858 | 856 |
857 def sidedata_length(self, rev): | |
858 if self.version & 0xFFFF != REVLOGV2: | |
859 return 0 | |
860 return self.index[rev][9] | |
861 | |
859 def rawsize(self, rev): | 862 def rawsize(self, rev): |
860 """return the length of the uncompressed text for a given revision""" | 863 """return the length of the uncompressed text for a given revision""" |
861 l = self.index[rev][2] | 864 l = self.index[rev][2] |
862 if l >= 0: | 865 if l >= 0: |
863 return l | 866 return l |
915 raise | 918 raise |
916 | 919 |
917 # Derived from index values. | 920 # Derived from index values. |
918 | 921 |
919 def end(self, rev): | 922 def end(self, rev): |
920 return self.start(rev) + self.length(rev) | 923 return self.start(rev) + self.length(rev) + self.sidedata_length(rev) |
921 | 924 |
922 def parents(self, node): | 925 def parents(self, node): |
923 i = self.index | 926 i = self.index |
924 d = i[self.rev(node)] | 927 d = i[self.rev(node)] |
925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline | 928 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline |
1851 if raw: | 1854 if raw: |
1852 return text, flagutil.processflagsraw(self, text, flags) | 1855 return text, flagutil.processflagsraw(self, text, flags) |
1853 elif operation == b'read': | 1856 elif operation == b'read': |
1854 return flagutil.processflagsread(self, text, flags) | 1857 return flagutil.processflagsread(self, text, flags) |
1855 else: # write operation | 1858 else: # write operation |
1856 return flagutil.processflagswrite(self, text, flags, None) | 1859 return flagutil.processflagswrite(self, text, flags) |
1857 | 1860 |
1858 def revision(self, nodeorrev, _df=None, raw=False): | 1861 def revision(self, nodeorrev, _df=None, raw=False): |
1859 """return an uncompressed revision of a given node or revision | 1862 """return an uncompressed revision of a given node or revision |
1860 number. | 1863 number. |
1861 | 1864 |
1896 | 1899 |
1897 # ``rawtext`` is the text as stored inside the revlog. Might be the | 1900 # ``rawtext`` is the text as stored inside the revlog. Might be the |
1898 # revision or might need to be processed to retrieve the revision. | 1901 # revision or might need to be processed to retrieve the revision. |
1899 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) | 1902 rev, rawtext, validated = self._rawtext(node, rev, _df=_df) |
1900 | 1903 |
1904 if self.version & 0xFFFF == REVLOGV2: | |
1905 if rev is None: | |
1906 rev = self.rev(node) | |
1907 sidedata = self._sidedata(rev) | |
1908 else: | |
1909 sidedata = {} | |
1910 | |
1901 if raw and validated: | 1911 if raw and validated: |
1902 # if we don't want to process the raw text and that raw | 1912 # if we don't want to process the raw text and that raw |
1903 # text is cached, we can exit early. | 1913 # text is cached, we can exit early. |
1904 return rawtext, {} | 1914 return rawtext, sidedata |
1905 if rev is None: | 1915 if rev is None: |
1906 rev = self.rev(node) | 1916 rev = self.rev(node) |
1907 # the revlog's flag for this revision | 1917 # the revlog's flag for this revision |
1908 # (usually alter its state or content) | 1918 # (usually alter its state or content) |
1909 flags = self.flags(rev) | 1919 flags = self.flags(rev) |
1910 | 1920 |
1911 if validated and flags == REVIDX_DEFAULT_FLAGS: | 1921 if validated and flags == REVIDX_DEFAULT_FLAGS: |
1912 # no extra flags set, no flag processor runs, text = rawtext | 1922 # no extra flags set, no flag processor runs, text = rawtext |
1913 return rawtext, {} | 1923 return rawtext, sidedata |
1914 | 1924 |
1915 sidedata = {} | |
1916 if raw: | 1925 if raw: |
1917 validatehash = flagutil.processflagsraw(self, rawtext, flags) | 1926 validatehash = flagutil.processflagsraw(self, rawtext, flags) |
1918 text = rawtext | 1927 text = rawtext |
1919 else: | 1928 else: |
1920 try: | 1929 r = flagutil.processflagsread(self, rawtext, flags) |
1921 r = flagutil.processflagsread(self, rawtext, flags) | 1930 text, validatehash = r |
1922 except error.SidedataHashError as exc: | |
1923 msg = _(b"integrity check failed on %s:%s sidedata key %d") | |
1924 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey) | |
1925 raise error.RevlogError(msg) | |
1926 text, validatehash, sidedata = r | |
1927 if validatehash: | 1931 if validatehash: |
1928 self.checkhash(text, node, rev=rev) | 1932 self.checkhash(text, node, rev=rev) |
1929 if not validated: | 1933 if not validated: |
1930 self._revisioncache = (node, rev, rawtext) | 1934 self._revisioncache = (node, rev, rawtext) |
1931 | 1935 |
1971 bins = bins[1:] | 1975 bins = bins[1:] |
1972 | 1976 |
1973 rawtext = mdiff.patches(basetext, bins) | 1977 rawtext = mdiff.patches(basetext, bins) |
1974 del basetext # let us have a chance to free memory early | 1978 del basetext # let us have a chance to free memory early |
1975 return (rev, rawtext, False) | 1979 return (rev, rawtext, False) |
1980 | |
1981 def _sidedata(self, rev): | |
1982 """Return the sidedata for a given revision number.""" | |
1983 index_entry = self.index[rev] | |
1984 sidedata_offset = index_entry[8] | |
1985 sidedata_size = index_entry[9] | |
1986 | |
1987 if self._inline: | |
1988 sidedata_offset += self._io.size * (1 + rev) | |
1989 if sidedata_size == 0: | |
1990 return {} | |
1991 | |
1992 segment = self._getsegment(sidedata_offset, sidedata_size) | |
1993 sidedata = sidedatautil.deserialize_sidedata(segment) | |
1994 return sidedata | |
1976 | 1995 |
1977 def rawdata(self, nodeorrev, _df=None): | 1996 def rawdata(self, nodeorrev, _df=None): |
1978 """return an uncompressed raw data of a given node or revision number. | 1997 """return an uncompressed raw data of a given node or revision number. |
1979 | 1998 |
1980 _df - an existing file handle to read from. (internal-only) | 1999 _df - an existing file handle to read from. (internal-only) |
2105 _(b"attempted to add linkrev -1 to %s") % self.indexfile | 2124 _(b"attempted to add linkrev -1 to %s") % self.indexfile |
2106 ) | 2125 ) |
2107 | 2126 |
2108 if sidedata is None: | 2127 if sidedata is None: |
2109 sidedata = {} | 2128 sidedata = {} |
2110 flags = flags & ~REVIDX_SIDEDATA | |
2111 elif not self.hassidedata: | 2129 elif not self.hassidedata: |
2112 raise error.ProgrammingError( | 2130 raise error.ProgrammingError( |
2113 _(b"trying to add sidedata to a revlog who don't support them") | 2131 _(b"trying to add sidedata to a revlog who don't support them") |
2114 ) | 2132 ) |
2115 else: | |
2116 flags |= REVIDX_SIDEDATA | |
2117 | 2133 |
2118 if flags: | 2134 if flags: |
2119 node = node or self.hash(text, p1, p2) | 2135 node = node or self.hash(text, p1, p2) |
2120 | 2136 |
2121 rawtext, validatehash = flagutil.processflagswrite( | 2137 rawtext, validatehash = flagutil.processflagswrite(self, text, flags) |
2122 self, text, flags, sidedata=sidedata | |
2123 ) | |
2124 | 2138 |
2125 # If the flag processor modifies the revision data, ignore any provided | 2139 # If the flag processor modifies the revision data, ignore any provided |
2126 # cachedelta. | 2140 # cachedelta. |
2127 if rawtext != text: | 2141 if rawtext != text: |
2128 cachedelta = None | 2142 cachedelta = None |
2151 p2, | 2165 p2, |
2152 node, | 2166 node, |
2153 flags, | 2167 flags, |
2154 cachedelta=cachedelta, | 2168 cachedelta=cachedelta, |
2155 deltacomputer=deltacomputer, | 2169 deltacomputer=deltacomputer, |
2170 sidedata=sidedata, | |
2156 ) | 2171 ) |
2157 | 2172 |
2158 def addrawrevision( | 2173 def addrawrevision( |
2159 self, | 2174 self, |
2160 rawtext, | 2175 rawtext, |
2164 p2, | 2179 p2, |
2165 node, | 2180 node, |
2166 flags, | 2181 flags, |
2167 cachedelta=None, | 2182 cachedelta=None, |
2168 deltacomputer=None, | 2183 deltacomputer=None, |
2184 sidedata=None, | |
2169 ): | 2185 ): |
2170 """add a raw revision with known flags, node and parents | 2186 """add a raw revision with known flags, node and parents |
2171 useful when reusing a revision not stored in this revlog (ex: received | 2187 useful when reusing a revision not stored in this revlog (ex: received |
2172 over wire, or read from an external bundle). | 2188 over wire, or read from an external bundle). |
2173 """ | 2189 """ |
2186 flags, | 2202 flags, |
2187 cachedelta, | 2203 cachedelta, |
2188 ifh, | 2204 ifh, |
2189 dfh, | 2205 dfh, |
2190 deltacomputer=deltacomputer, | 2206 deltacomputer=deltacomputer, |
2207 sidedata=sidedata, | |
2191 ) | 2208 ) |
2192 finally: | 2209 finally: |
2193 if dfh: | 2210 if dfh: |
2194 dfh.close() | 2211 dfh.close() |
2195 ifh.close() | 2212 ifh.close() |
2279 cachedelta, | 2296 cachedelta, |
2280 ifh, | 2297 ifh, |
2281 dfh, | 2298 dfh, |
2282 alwayscache=False, | 2299 alwayscache=False, |
2283 deltacomputer=None, | 2300 deltacomputer=None, |
2301 sidedata=None, | |
2284 ): | 2302 ): |
2285 """internal function to add revisions to the log | 2303 """internal function to add revisions to the log |
2286 | 2304 |
2287 see addrevision for argument descriptions. | 2305 see addrevision for argument descriptions. |
2288 | 2306 |
2348 | 2366 |
2349 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) | 2367 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) |
2350 | 2368 |
2351 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) | 2369 deltainfo = deltacomputer.finddeltainfo(revinfo, fh) |
2352 | 2370 |
2371 if sidedata: | |
2372 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) | |
2373 sidedata_offset = offset + deltainfo.deltalen | |
2374 else: | |
2375 serialized_sidedata = b"" | |
2376 # Don't store the offset if the sidedata is empty, that way | |
2377 # we can easily detect empty sidedata and they will be no different | |
2378 # than ones we manually add. | |
2379 sidedata_offset = 0 | |
2380 | |
2353 e = ( | 2381 e = ( |
2354 offset_type(offset, flags), | 2382 offset_type(offset, flags), |
2355 deltainfo.deltalen, | 2383 deltainfo.deltalen, |
2356 textlen, | 2384 textlen, |
2357 deltainfo.base, | 2385 deltainfo.base, |
2358 link, | 2386 link, |
2359 p1r, | 2387 p1r, |
2360 p2r, | 2388 p2r, |
2361 node, | 2389 node, |
2362 0, | 2390 sidedata_offset, |
2363 0, | 2391 len(serialized_sidedata), |
2364 ) | 2392 ) |
2365 | 2393 |
2366 if self.version & 0xFFFF != REVLOGV2: | 2394 if self.version & 0xFFFF != REVLOGV2: |
2367 e = e[:8] | 2395 e = e[:8] |
2368 | 2396 |
2369 self.index.append(e) | 2397 self.index.append(e) |
2370 | |
2371 entry = self._io.packentry(e, self.node, self.version, curr) | 2398 entry = self._io.packentry(e, self.node, self.version, curr) |
2372 self._writeentry( | 2399 self._writeentry( |
2373 transaction, ifh, dfh, entry, deltainfo.data, link, offset | 2400 transaction, |
2401 ifh, | |
2402 dfh, | |
2403 entry, | |
2404 deltainfo.data, | |
2405 link, | |
2406 offset, | |
2407 serialized_sidedata, | |
2374 ) | 2408 ) |
2375 | 2409 |
2376 rawtext = btext[0] | 2410 rawtext = btext[0] |
2377 | 2411 |
2378 if alwayscache and rawtext is None: | 2412 if alwayscache and rawtext is None: |
2381 if type(rawtext) == bytes: # only accept immutable objects | 2415 if type(rawtext) == bytes: # only accept immutable objects |
2382 self._revisioncache = (node, curr, rawtext) | 2416 self._revisioncache = (node, curr, rawtext) |
2383 self._chainbasecache[curr] = deltainfo.chainbase | 2417 self._chainbasecache[curr] = deltainfo.chainbase |
2384 return curr | 2418 return curr |
2385 | 2419 |
2386 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset): | 2420 def _writeentry( |
2421 self, transaction, ifh, dfh, entry, data, link, offset, sidedata | |
2422 ): | |
2387 # Files opened in a+ mode have inconsistent behavior on various | 2423 # Files opened in a+ mode have inconsistent behavior on various |
2388 # platforms. Windows requires that a file positioning call be made | 2424 # platforms. Windows requires that a file positioning call be made |
2389 # when the file handle transitions between reads and writes. See | 2425 # when the file handle transitions between reads and writes. See |
2390 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other | 2426 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other |
2391 # platforms, Python or the platform itself can be buggy. Some versions | 2427 # platforms, Python or the platform itself can be buggy. Some versions |
2405 transaction.add(self.datafile, offset) | 2441 transaction.add(self.datafile, offset) |
2406 transaction.add(self.indexfile, curr * len(entry)) | 2442 transaction.add(self.indexfile, curr * len(entry)) |
2407 if data[0]: | 2443 if data[0]: |
2408 dfh.write(data[0]) | 2444 dfh.write(data[0]) |
2409 dfh.write(data[1]) | 2445 dfh.write(data[1]) |
2446 if sidedata: | |
2447 dfh.write(sidedata) | |
2410 ifh.write(entry) | 2448 ifh.write(entry) |
2411 else: | 2449 else: |
2412 offset += curr * self._io.size | 2450 offset += curr * self._io.size |
2413 transaction.add(self.indexfile, offset) | 2451 transaction.add(self.indexfile, offset) |
2414 ifh.write(entry) | 2452 ifh.write(entry) |
2415 ifh.write(data[0]) | 2453 ifh.write(data[0]) |
2416 ifh.write(data[1]) | 2454 ifh.write(data[1]) |
2455 if sidedata: | |
2456 ifh.write(sidedata) | |
2417 self._enforceinlinesize(transaction, ifh) | 2457 self._enforceinlinesize(transaction, ifh) |
2418 nodemaputil.setup_persistent_nodemap(transaction, self) | 2458 nodemaputil.setup_persistent_nodemap(transaction, self) |
2419 | 2459 |
2420 def addgroup( | 2460 def addgroup( |
2421 self, | 2461 self, |