Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 34898:1bde8e8e5de0
sparse-read: ignore trailing empty revs in each read chunk
An empty entry in the revlog may happen for two reasons:
- when the file is empty, and the revlog stores a snapshot;
- when there is a merge and both parents were identical.
`hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries
in my clone of pypy, and 113 on my clone of mercurial.
These empty revision may be located at the end of a sparse chain, and in some
special cases may lead to read relatively large amounts of data for nothing.
author | Paul Morelle <paul.morelle@octobus.net> |
---|---|
date | Wed, 18 Oct 2017 15:28:19 +0200 |
parents | 8c9b08a0c48c |
children | 6226668a7169 |
comparison
equal
deleted
inserted
replaced
34897:2e350d2a0eca | 34898:1bde8e8e5de0 |
---|---|
160 s = hashlib.sha1(a) | 160 s = hashlib.sha1(a) |
161 s.update(b) | 161 s.update(b) |
162 s.update(text) | 162 s.update(text) |
163 return s.digest() | 163 return s.digest() |
164 | 164 |
165 def _trimchunk(revlog, revs, startidx, endidx=None): | |
166 """returns revs[startidx:endidx] without empty trailing revs | |
167 """ | |
168 length = revlog.length | |
169 | |
170 if endidx is None: | |
171 endidx = len(revs) | |
172 | |
173 # Trim empty revs at the end, but never the very first revision of a chain | |
174 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0: | |
175 endidx -= 1 | |
176 | |
177 return revs[startidx:endidx] | |
178 | |
165 def _slicechunk(revlog, revs): | 179 def _slicechunk(revlog, revs): |
166 """slice revs to reduce the amount of unrelated data to be read from disk. | 180 """slice revs to reduce the amount of unrelated data to be read from disk. |
167 | 181 |
168 ``revs`` is sliced into groups that should be read in one time. | 182 ``revs`` is sliced into groups that should be read in one time. |
169 Assume that revs are sorted. | 183 Assume that revs are sorted. |
192 prevend = None | 206 prevend = None |
193 for i, rev in enumerate(revs): | 207 for i, rev in enumerate(revs): |
194 revstart = start(rev) | 208 revstart = start(rev) |
195 revlen = length(rev) | 209 revlen = length(rev) |
196 | 210 |
211 # Skip empty revisions to form larger holes | |
212 if revlen == 0: | |
213 continue | |
214 | |
197 if prevend is not None: | 215 if prevend is not None: |
198 gapsize = revstart - prevend | 216 gapsize = revstart - prevend |
199 # only consider holes that are large enough | 217 # only consider holes that are large enough |
200 if gapsize > revlog._srmingapsize: | 218 if gapsize > revlog._srmingapsize: |
201 heapq.heappush(gapsheap, (-gapsize, i)) | 219 heapq.heappush(gapsheap, (-gapsize, i)) |
220 | 238 |
221 # Cut the revs at collected indices | 239 # Cut the revs at collected indices |
222 previdx = 0 | 240 previdx = 0 |
223 while indicesheap: | 241 while indicesheap: |
224 idx = heapq.heappop(indicesheap) | 242 idx = heapq.heappop(indicesheap) |
225 yield revs[previdx:idx] | 243 |
244 chunk = _trimchunk(revlog, revs, previdx, idx) | |
245 if chunk: | |
246 yield chunk | |
247 | |
226 previdx = idx | 248 previdx = idx |
227 yield revs[previdx:] | 249 |
250 chunk = _trimchunk(revlog, revs, previdx) | |
251 if chunk: | |
252 yield chunk | |
228 | 253 |
229 # index v0: | 254 # index v0: |
230 # 4 bytes: offset | 255 # 4 bytes: offset |
231 # 4 bytes: compressed length | 256 # 4 bytes: compressed length |
232 # 4 bytes: base rev | 257 # 4 bytes: base rev |