comparison mercurial/revlog.py @ 34898:1bde8e8e5de0

sparse-read: ignore trailing empty revs in each read chunk An empty entry in the revlog may happen for two reasons: - when the file is empty, and the revlog stores a snapshot; - when there is a merge and both parents were identical. `hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries in my clone of pypy, and 113 on my clone of mercurial. These empty revision may be located at the end of a sparse chain, and in some special cases may lead to read relatively large amounts of data for nothing.
author Paul Morelle <paul.morelle@octobus.net>
date Wed, 18 Oct 2017 15:28:19 +0200
parents 8c9b08a0c48c
children 6226668a7169
comparison
equal deleted inserted replaced
34897:2e350d2a0eca 34898:1bde8e8e5de0
160 s = hashlib.sha1(a) 160 s = hashlib.sha1(a)
161 s.update(b) 161 s.update(b)
162 s.update(text) 162 s.update(text)
163 return s.digest() 163 return s.digest()
164 164
165 def _trimchunk(revlog, revs, startidx, endidx=None):
166 """returns revs[startidx:endidx] without empty trailing revs
167 """
168 length = revlog.length
169
170 if endidx is None:
171 endidx = len(revs)
172
173 # Trim empty revs at the end, but never the very first revision of a chain
174 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
175 endidx -= 1
176
177 return revs[startidx:endidx]
178
165 def _slicechunk(revlog, revs): 179 def _slicechunk(revlog, revs):
166 """slice revs to reduce the amount of unrelated data to be read from disk. 180 """slice revs to reduce the amount of unrelated data to be read from disk.
167 181
168 ``revs`` is sliced into groups that should be read in one time. 182 ``revs`` is sliced into groups that should be read in one time.
169 Assume that revs are sorted. 183 Assume that revs are sorted.
192 prevend = None 206 prevend = None
193 for i, rev in enumerate(revs): 207 for i, rev in enumerate(revs):
194 revstart = start(rev) 208 revstart = start(rev)
195 revlen = length(rev) 209 revlen = length(rev)
196 210
211 # Skip empty revisions to form larger holes
212 if revlen == 0:
213 continue
214
197 if prevend is not None: 215 if prevend is not None:
198 gapsize = revstart - prevend 216 gapsize = revstart - prevend
199 # only consider holes that are large enough 217 # only consider holes that are large enough
200 if gapsize > revlog._srmingapsize: 218 if gapsize > revlog._srmingapsize:
201 heapq.heappush(gapsheap, (-gapsize, i)) 219 heapq.heappush(gapsheap, (-gapsize, i))
220 238
221 # Cut the revs at collected indices 239 # Cut the revs at collected indices
222 previdx = 0 240 previdx = 0
223 while indicesheap: 241 while indicesheap:
224 idx = heapq.heappop(indicesheap) 242 idx = heapq.heappop(indicesheap)
225 yield revs[previdx:idx] 243
244 chunk = _trimchunk(revlog, revs, previdx, idx)
245 if chunk:
246 yield chunk
247
226 previdx = idx 248 previdx = idx
227 yield revs[previdx:] 249
250 chunk = _trimchunk(revlog, revs, previdx)
251 if chunk:
252 yield chunk
228 253
229 # index v0: 254 # index v0:
230 # 4 bytes: offset 255 # 4 bytes: offset
231 # 4 bytes: compressed length 256 # 4 bytes: compressed length
232 # 4 bytes: base rev 257 # 4 bytes: base rev