comparison mercurial/revlog.py @ 35638:edc9330acac1

revlog: introduce 'deltainfo' to distinguish from 'delta' A 'delta' is a binary diff between two revisions, as returned by revdiff. A 'deltainfo' is an object storing information about a delta, including the 'delta' itself. Formerly, it was stored in a 7-position tuple, which was less readable.
author Paul Morelle <paul.morelle@octobus.net>
date Fri, 12 Jan 2018 18:58:44 +0100
parents a7d39f08bc66
children 30f5f33250c8
comparison
equal deleted inserted replaced
35637:a7d39f08bc66 35638:edc9330acac1
31 wdirhex, 31 wdirhex,
32 wdirid, 32 wdirid,
33 wdirrev, 33 wdirrev,
34 ) 34 )
35 from .i18n import _ 35 from .i18n import _
36 from .thirdparty import (
37 attr,
38 )
36 from . import ( 39 from . import (
37 ancestor, 40 ancestor,
38 error, 41 error,
39 mdiff, 42 mdiff,
40 policy, 43 policy,
248 previdx = idx 251 previdx = idx
249 252
250 chunk = _trimchunk(revlog, revs, previdx) 253 chunk = _trimchunk(revlog, revs, previdx)
251 if chunk: 254 if chunk:
252 yield chunk 255 yield chunk
256
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
259 distance = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
253 266
254 # index v0: 267 # index v0:
255 # 4 bytes: offset 268 # 4 bytes: offset
256 # 4 bytes: compressed length 269 # 4 bytes: compressed length
257 # 4 bytes: base rev 270 # 4 bytes: base rev
1817 except KeyError: 1830 except KeyError:
1818 raise RevlogError(_('unknown compression type %r') % t) 1831 raise RevlogError(_('unknown compression type %r') % t)
1819 1832
1820 return compressor.decompress(data) 1833 return compressor.decompress(data)
1821 1834
1822 def _isgooddelta(self, d, textlen): 1835 def _isgooddeltainfo(self, d, textlen):
1823 """Returns True if the given delta is good. Good means that it is within 1836 """Returns True if the given delta is good. Good means that it is within
1824 the disk span, disk size, and chain length bounds that we know to be 1837 the disk span, disk size, and chain length bounds that we know to be
1825 performant.""" 1838 performant."""
1826 if d is None: 1839 if d is None:
1827 return False 1840 return False
1828 1841
1829 # - 'dist' is the distance from the base revision -- bounding it limits 1842 # - 'd.distance' is the distance from the base revision -- bounding it
1830 # the amount of I/O we need to do. 1843 # limits the amount of I/O we need to do.
1831 # - 'compresseddeltalen' is the sum of the total size of deltas we need 1844 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
1832 # to apply -- bounding it limits the amount of CPU we consume. 1845 # need to apply -- bounding it limits the amount of CPU we consume.
1833 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1834 1846
1835 defaultmax = textlen * 4 1847 defaultmax = textlen * 4
1836 maxdist = self._maxdeltachainspan 1848 maxdist = self._maxdeltachainspan
1837 if not maxdist: 1849 if not maxdist:
1838 maxdist = dist # ensure the conditional pass 1850 maxdist = d.distance # ensure the conditional pass
1839 maxdist = max(maxdist, defaultmax) 1851 maxdist = max(maxdist, defaultmax)
1840 if (dist > maxdist or l > textlen or 1852 if (d.distance > maxdist or d.deltalen > textlen or
1841 compresseddeltalen > textlen * 2 or 1853 d.compresseddeltalen > textlen * 2 or
1842 (self._maxchainlen and chainlen > self._maxchainlen)): 1854 (self._maxchainlen and d.chainlen > self._maxchainlen)):
1843 return False 1855 return False
1844 1856
1845 return True 1857 return True
1846 1858
1847 def _getcandidaterevs(self, p1, p2, cachedelta): 1859 def _getcandidaterevs(self, p1, p2, cachedelta):
1921 # must pass the censored index flag to add censored revisions 1933 # must pass the censored index flag to add censored revisions
1922 if not flags & REVIDX_ISCENSORED: 1934 if not flags & REVIDX_ISCENSORED:
1923 raise 1935 raise
1924 return btext[0] 1936 return btext[0]
1925 1937
1926 def _builddelta(self, node, rev, p1, p2, btext, cachedelta, fh, flags): 1938 def _builddeltainfo(self, node, rev, p1, p2, btext, cachedelta, fh, flags):
1927 # can we use the cached delta? 1939 # can we use the cached delta?
1928 if cachedelta and cachedelta[0] == rev: 1940 if cachedelta and cachedelta[0] == rev:
1929 delta = cachedelta[1] 1941 delta = cachedelta[1]
1930 else: 1942 else:
1931 t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags) 1943 t = self._buildtext(node, p1, p2, btext, cachedelta, fh, flags)
1947 else: 1959 else:
1948 base = chainbase 1960 base = chainbase
1949 chainlen, compresseddeltalen = self._chaininfo(rev) 1961 chainlen, compresseddeltalen = self._chaininfo(rev)
1950 chainlen += 1 1962 chainlen += 1
1951 compresseddeltalen += deltalen 1963 compresseddeltalen += deltalen
1952 return (dist, deltalen, (header, data), base, 1964 return _deltainfo(dist, deltalen, (header, data), base,
1953 chainbase, chainlen, compresseddeltalen) 1965 chainbase, chainlen, compresseddeltalen)
1954 1966
1955 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, 1967 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1956 cachedelta, ifh, dfh, alwayscache=False): 1968 cachedelta, ifh, dfh, alwayscache=False):
1957 """internal function to add revisions to the log 1969 """internal function to add revisions to the log
1958 1970
1979 btext = [rawtext] 1991 btext = [rawtext]
1980 1992
1981 curr = len(self) 1993 curr = len(self)
1982 prev = curr - 1 1994 prev = curr - 1
1983 offset = self.end(prev) 1995 offset = self.end(prev)
1984 delta = None 1996 deltainfo = None
1985 p1r, p2r = self.rev(p1), self.rev(p2) 1997 p1r, p2r = self.rev(p1), self.rev(p2)
1986 1998
1987 # full versions are inserted when the needed deltas 1999 # full versions are inserted when the needed deltas
1988 # become comparable to the uncompressed text 2000 # become comparable to the uncompressed text
1989 if rawtext is None: 2001 if rawtext is None:
1993 textlen = len(rawtext) 2005 textlen = len(rawtext)
1994 2006
1995 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta): 2007 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
1996 nominateddeltas = [] 2008 nominateddeltas = []
1997 for candidaterev in candidaterevs: 2009 for candidaterev in candidaterevs:
1998 candidatedelta = self._builddelta(node, candidaterev, p1, p2, 2010 candidatedelta = self._builddeltainfo(node, candidaterev, p1,
1999 btext, cachedelta, fh, 2011 p2, btext, cachedelta,
2000 flags) 2012 fh, flags)
2001 if self._isgooddelta(candidatedelta, textlen): 2013 if self._isgooddeltainfo(candidatedelta, textlen):
2002 nominateddeltas.append(candidatedelta) 2014 nominateddeltas.append(candidatedelta)
2003 if nominateddeltas: 2015 if nominateddeltas:
2004 delta = min(nominateddeltas, key=lambda x: x[1]) 2016 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
2005 break 2017 break
2006 2018
2007 if delta is not None: 2019 if deltainfo is not None:
2008 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta 2020 base = deltainfo.base
2021 chainbase = deltainfo.chainbase
2022 data = deltainfo.data
2023 l = deltainfo.deltalen
2009 else: 2024 else:
2010 rawtext = self._buildtext(node, p1, p2, btext, cachedelta, fh, 2025 rawtext = self._buildtext(node, p1, p2, btext, cachedelta, fh,
2011 flags) 2026 flags)
2012 data = self.compress(rawtext) 2027 data = self.compress(rawtext)
2013 l = len(data[1]) + len(data[0]) 2028 l = len(data[1]) + len(data[0])