comparison mercurial/revlogutils/deltas.py @ 40642:9c3c697267db

sparse-revlog: rework the way we enforce chunk size limit We move from a O(N) algorithm to a O(log(N)) algorithm. The previous algorithm was traversing the whole delta chain, looking for the exact point where it became too big. This would result in most of the delta chain to be traversed. Instead, we now use a "binary" approach, slicing the chain in two until we have a chunk of the appropriate size. We still keep the previous algorithm for the snapshots part. There are few of them and they are large bits of data distant from each other. So the previous algorithm should work well in that case. To take a practical example of restoring manifest revision '59547c40bc4c' for a reference NetBeans repository (using sparse-revlog). The media time of the step `slice-sparse-chain` of `perfrevlogrevision` improve from 1.109 ms to 0.660 ms.
author Boris Feld <boris.feld@octobus.net>
date Fri, 09 Nov 2018 17:58:37 +0100
parents 85b14f0dc334
children fd1d41ccbe38
comparison
equal deleted inserted replaced
40641:85b14f0dc334 40642:9c3c697267db
77 `revlog._srmingapsize`. 77 `revlog._srmingapsize`.
78 78
79 If individual revisions chunk are larger than this limit, they will still 79 If individual revisions chunk are larger than this limit, they will still
80 be raised individually. 80 be raised individually.
81 81
82 >>> revlog = _testrevlog([ 82 >>> data = [
83 ... 5, #00 (5) 83 ... 5, #00 (5)
84 ... 10, #01 (5) 84 ... 10, #01 (5)
85 ... 12, #02 (2) 85 ... 12, #02 (2)
86 ... 12, #03 (empty) 86 ... 12, #03 (empty)
87 ... 27, #04 (15) 87 ... 27, #04 (15)
94 ... 51, #11 (3) 94 ... 51, #11 (3)
95 ... 74, #12 (23) 95 ... 74, #12 (23)
96 ... 85, #13 (11) 96 ... 85, #13 (11)
97 ... 86, #14 (1) 97 ... 86, #14 (1)
98 ... 91, #15 (5) 98 ... 91, #15 (5)
99 ... ]) 99 ... ]
100 >>> revlog = _testrevlog(data, snapshot=range(16))
100 101
101 >>> list(slicechunk(revlog, list(range(16)))) 102 >>> list(slicechunk(revlog, list(range(16))))
102 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] 103 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
103 >>> list(slicechunk(revlog, [0, 15])) 104 >>> list(slicechunk(revlog, [0, 15]))
104 [[0], [15]] 105 [[0], [15]]
131 This is intended to be used on chunk that density slicing selected by that 132 This is intended to be used on chunk that density slicing selected by that
132 are still too large compared to the read garantee of revlog. This might 133 are still too large compared to the read garantee of revlog. This might
133 happens when "minimal gap size" interrupted the slicing or when chain are 134 happens when "minimal gap size" interrupted the slicing or when chain are
134 built in a way that create large blocks next to each other. 135 built in a way that create large blocks next to each other.
135 136
136 >>> revlog = _testrevlog([ 137 >>> data = [
137 ... 3, #0 (3) 138 ... 3, #0 (3)
138 ... 5, #1 (2) 139 ... 5, #1 (2)
139 ... 6, #2 (1) 140 ... 6, #2 (1)
140 ... 8, #3 (2) 141 ... 8, #3 (2)
141 ... 8, #4 (empty) 142 ... 8, #4 (empty)
142 ... 11, #5 (3) 143 ... 11, #5 (3)
143 ... 12, #6 (1) 144 ... 12, #6 (1)
144 ... 13, #7 (1) 145 ... 13, #7 (1)
145 ... 14, #8 (1) 146 ... 14, #8 (1)
146 ... ]) 147 ... ]
148
149 == All snapshots cases ==
150 >>> revlog = _testrevlog(data, snapshot=range(9))
147 151
148 Cases where chunk is already small enough 152 Cases where chunk is already small enough
149 >>> list(_slicechunktosize(revlog, [0], 3)) 153 >>> list(_slicechunktosize(revlog, [0], 3))
150 [[0]] 154 [[0]]
151 >>> list(_slicechunktosize(revlog, [6, 7], 3)) 155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
176 [[0], [1]] 180 [[0], [1]]
177 >>> list(_slicechunktosize(revlog, [1, 3], 1)) 181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
178 [[1], [3]] 182 [[1], [3]]
179 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2)) 183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
180 [[3], [5]] 184 [[3], [5]]
185
186 == No Snapshot cases ==
187 >>> revlog = _testrevlog(data)
188
189 Cases where chunk is already small enough
190 >>> list(_slicechunktosize(revlog, [0], 3))
191 [[0]]
192 >>> list(_slicechunktosize(revlog, [6, 7], 3))
193 [[6, 7]]
194 >>> list(_slicechunktosize(revlog, [0], None))
195 [[0]]
196 >>> list(_slicechunktosize(revlog, [6, 7], None))
197 [[6, 7]]
198
199 cases where we need actual slicing
200 >>> list(_slicechunktosize(revlog, [0, 1], 3))
201 [[0], [1]]
202 >>> list(_slicechunktosize(revlog, [1, 3], 3))
203 [[1], [3]]
204 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
205 [[1], [2, 3]]
206 >>> list(_slicechunktosize(revlog, [3, 5], 3))
207 [[3], [5]]
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
209 [[3], [4, 5]]
210 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
211 [[5], [6, 7, 8]]
212 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
213 [[0], [1, 2], [3], [5], [6, 7, 8]]
214
215 Case with too large individual chunk (must return valid chunk)
216 >>> list(_slicechunktosize(revlog, [0, 1], 2))
217 [[0], [1]]
218 >>> list(_slicechunktosize(revlog, [1, 3], 1))
219 [[1], [3]]
220 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
221 [[3], [5]]
222
223 == mixed case ==
224 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
225 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
226 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
181 """ 227 """
182 assert targetsize is None or 0 <= targetsize 228 assert targetsize is None or 0 <= targetsize
183 if targetsize is None or segmentspan(revlog, revs) <= targetsize: 229 startdata = revlog.start(revs[0])
230 enddata = revlog.end(revs[-1])
231 fullspan = enddata - startdata
232 if targetsize is None or fullspan <= targetsize:
184 yield revs 233 yield revs
185 return 234 return
186 235
187 startrevidx = 0 236 startrevidx = 0
188 startdata = revlog.start(revs[0])
189 endrevidx = 0 237 endrevidx = 0
190 iterrevs = enumerate(revs) 238 iterrevs = enumerate(revs)
191 next(iterrevs) # skip first rev. 239 next(iterrevs) # skip first rev.
240 # first step: get snapshots out of the way
192 for idx, r in iterrevs: 241 for idx, r in iterrevs:
193 span = revlog.end(r) - startdata 242 span = revlog.end(r) - startdata
194 if span <= targetsize: 243 snapshot = revlog.issnapshot(r)
244 if span <= targetsize and snapshot:
195 endrevidx = idx 245 endrevidx = idx
196 else: 246 else:
197 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1) 247 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
198 if chunk: 248 if chunk:
199 yield chunk 249 yield chunk
200 startrevidx = idx 250 startrevidx = idx
201 startdata = revlog.start(r) 251 startdata = revlog.start(r)
202 endrevidx = idx 252 endrevidx = idx
203 yield _trimchunk(revlog, revs, startrevidx) 253 if not snapshot:
254 break
255
256 # for the others, we use binary slicing to quickly converge toward valid
257 # chunks (otherwise, we might end up looking for start/end of many
258 # revisions). This logic is not looking for the perfect slicing point, it
259 # focuses on quickly converging toward valid chunks.
260 nbitem = len(revs)
261 while (enddata - startdata) > targetsize:
262 endrevidx = nbitem
263 if nbitem - startrevidx <= 1:
264 break # protect against individual chunk larger than limit
265 localenddata = revlog.end(revs[endrevidx - 1])
266 span = localenddata - startdata
267 while (localenddata - startdata) > targetsize:
268 if endrevidx - startrevidx <= 1:
269 break # protect against individual chunk larger than limit
270 endrevidx -= (endrevidx - startrevidx) // 2
271 localenddata = revlog.end(revs[endrevidx - 1])
272 span = localenddata - startdata
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
275 yield chunk
276 startrevidx = endrevidx
277 startdata = revlog.start(revs[startrevidx])
278
279 chunk = _trimchunk(revlog, revs, startrevidx)
280 if chunk:
281 yield chunk
204 282
205 def _slicechunktodensity(revlog, revs, targetdensity=0.5, 283 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
206 mingapsize=0): 284 mingapsize=0):
207 """slice revs to reduce the amount of unrelated data to be read from disk. 285 """slice revs to reduce the amount of unrelated data to be read from disk.
208 286