comparison mercurial/revlogutils/debug.py @ 49775:bd3b6f363fb9

debug-revlog: move the code in revlogutils module We have a module dedicated to debug code, let us use it.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 07 Nov 2022 14:24:52 -0500
parents 7c0a383849a8
children 511106bcb16c
comparison
equal deleted inserted replaced
49774:7c0a383849a8 49775:bd3b6f363fb9
4 # Copyright 2022 Octobus <contact@octobus.net> 4 # Copyright 2022 Octobus <contact@octobus.net>
5 # 5 #
6 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version. 7 # GNU General Public License version 2 or any later version.
8 8
9 import collections
10 import string
11
9 from .. import ( 12 from .. import (
10 node as nodemod, 13 node as nodemod,
14 util,
11 ) 15 )
12 16
13 from . import ( 17 from . import (
14 constants, 18 constants,
15 ) 19 )
265 compression, 269 compression,
266 len(heads), 270 len(heads),
267 clen, 271 clen,
268 ) 272 )
269 ) 273 )
274
275
276 def debug_revlog(ui, revlog):
277 """code for `hg debugrevlog`"""
278 r = revlog
279 format = r._format_version
280 v = r._format_flags
281 flags = []
282 gdelta = False
283 if v & constants.FLAG_INLINE_DATA:
284 flags.append(b'inline')
285 if v & constants.FLAG_GENERALDELTA:
286 gdelta = True
287 flags.append(b'generaldelta')
288 if not flags:
289 flags = [b'(none)']
290
291 ### tracks merge vs single parent
292 nummerges = 0
293
294 ### tracks ways the "delta" are build
295 # nodelta
296 numempty = 0
297 numemptytext = 0
298 numemptydelta = 0
299 # full file content
300 numfull = 0
301 # intermediate snapshot against a prior snapshot
302 numsemi = 0
303 # snapshot count per depth
304 numsnapdepth = collections.defaultdict(lambda: 0)
305 # delta against previous revision
306 numprev = 0
307 # delta against first or second parent (not prev)
308 nump1 = 0
309 nump2 = 0
310 # delta against neither prev nor parents
311 numother = 0
312 # delta against prev that are also first or second parent
313 # (details of `numprev`)
314 nump1prev = 0
315 nump2prev = 0
316
317 # data about delta chain of each revs
318 chainlengths = []
319 chainbases = []
320 chainspans = []
321
322 # data about each revision
323 datasize = [None, 0, 0]
324 fullsize = [None, 0, 0]
325 semisize = [None, 0, 0]
326 # snapshot count per depth
327 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
328 deltasize = [None, 0, 0]
329 chunktypecounts = {}
330 chunktypesizes = {}
331
332 def addsize(size, l):
333 if l[0] is None or size < l[0]:
334 l[0] = size
335 if size > l[1]:
336 l[1] = size
337 l[2] += size
338
339 numrevs = len(r)
340 for rev in range(numrevs):
341 p1, p2 = r.parentrevs(rev)
342 delta = r.deltaparent(rev)
343 if format > 0:
344 addsize(r.rawsize(rev), datasize)
345 if p2 != nodemod.nullrev:
346 nummerges += 1
347 size = r.length(rev)
348 if delta == nodemod.nullrev:
349 chainlengths.append(0)
350 chainbases.append(r.start(rev))
351 chainspans.append(size)
352 if size == 0:
353 numempty += 1
354 numemptytext += 1
355 else:
356 numfull += 1
357 numsnapdepth[0] += 1
358 addsize(size, fullsize)
359 addsize(size, snapsizedepth[0])
360 else:
361 chainlengths.append(chainlengths[delta] + 1)
362 baseaddr = chainbases[delta]
363 revaddr = r.start(rev)
364 chainbases.append(baseaddr)
365 chainspans.append((revaddr - baseaddr) + size)
366 if size == 0:
367 numempty += 1
368 numemptydelta += 1
369 elif r.issnapshot(rev):
370 addsize(size, semisize)
371 numsemi += 1
372 depth = r.snapshotdepth(rev)
373 numsnapdepth[depth] += 1
374 addsize(size, snapsizedepth[depth])
375 else:
376 addsize(size, deltasize)
377 if delta == rev - 1:
378 numprev += 1
379 if delta == p1:
380 nump1prev += 1
381 elif delta == p2:
382 nump2prev += 1
383 elif delta == p1:
384 nump1 += 1
385 elif delta == p2:
386 nump2 += 1
387 elif delta != nodemod.nullrev:
388 numother += 1
389
390 # Obtain data on the raw chunks in the revlog.
391 if util.safehasattr(r, '_getsegmentforrevs'):
392 segment = r._getsegmentforrevs(rev, rev)[1]
393 else:
394 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
395 if segment:
396 chunktype = bytes(segment[0:1])
397 else:
398 chunktype = b'empty'
399
400 if chunktype not in chunktypecounts:
401 chunktypecounts[chunktype] = 0
402 chunktypesizes[chunktype] = 0
403
404 chunktypecounts[chunktype] += 1
405 chunktypesizes[chunktype] += size
406
407 # Adjust size min value for empty cases
408 for size in (datasize, fullsize, semisize, deltasize):
409 if size[0] is None:
410 size[0] = 0
411
412 numdeltas = numrevs - numfull - numempty - numsemi
413 numoprev = numprev - nump1prev - nump2prev
414 totalrawsize = datasize[2]
415 datasize[2] /= numrevs
416 fulltotal = fullsize[2]
417 if numfull == 0:
418 fullsize[2] = 0
419 else:
420 fullsize[2] /= numfull
421 semitotal = semisize[2]
422 snaptotal = {}
423 if numsemi > 0:
424 semisize[2] /= numsemi
425 for depth in snapsizedepth:
426 snaptotal[depth] = snapsizedepth[depth][2]
427 snapsizedepth[depth][2] /= numsnapdepth[depth]
428
429 deltatotal = deltasize[2]
430 if numdeltas > 0:
431 deltasize[2] /= numdeltas
432 totalsize = fulltotal + semitotal + deltatotal
433 avgchainlen = sum(chainlengths) / numrevs
434 maxchainlen = max(chainlengths)
435 maxchainspan = max(chainspans)
436 compratio = 1
437 if totalsize:
438 compratio = totalrawsize / totalsize
439
440 basedfmtstr = b'%%%dd\n'
441 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
442
443 def dfmtstr(max):
444 return basedfmtstr % len(str(max))
445
446 def pcfmtstr(max, padding=0):
447 return basepcfmtstr % (len(str(max)), b' ' * padding)
448
449 def pcfmt(value, total):
450 if total:
451 return (value, 100 * float(value) / total)
452 else:
453 return value, 100.0
454
455 ui.writenoi18n(b'format : %d\n' % format)
456 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
457
458 ui.write(b'\n')
459 fmt = pcfmtstr(totalsize)
460 fmt2 = dfmtstr(totalsize)
461 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
462 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
463 ui.writenoi18n(
464 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
465 )
466 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
467 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
468 ui.writenoi18n(
469 b' text : '
470 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
471 )
472 ui.writenoi18n(
473 b' delta : '
474 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
475 )
476 ui.writenoi18n(
477 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
478 )
479 for depth in sorted(numsnapdepth):
480 ui.write(
481 (b' lvl-%-3d : ' % depth)
482 + fmt % pcfmt(numsnapdepth[depth], numrevs)
483 )
484 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
485 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
486 ui.writenoi18n(
487 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
488 )
489 for depth in sorted(numsnapdepth):
490 ui.write(
491 (b' lvl-%-3d : ' % depth)
492 + fmt % pcfmt(snaptotal[depth], totalsize)
493 )
494 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
495
496 letters = string.ascii_letters.encode('ascii')
497
498 def fmtchunktype(chunktype):
499 if chunktype == b'empty':
500 return b' %s : ' % chunktype
501 elif chunktype in letters:
502 return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
503 else:
504 return b' 0x%s : ' % nodemod.hex(chunktype)
505
506 ui.write(b'\n')
507 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
508 for chunktype in sorted(chunktypecounts):
509 ui.write(fmtchunktype(chunktype))
510 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
511 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
512 for chunktype in sorted(chunktypecounts):
513 ui.write(fmtchunktype(chunktype))
514 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
515
516 ui.write(b'\n')
517 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
518 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
519 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
520 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
521 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
522
523 if format > 0:
524 ui.write(b'\n')
525 ui.writenoi18n(
526 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
527 % tuple(datasize)
528 )
529 ui.writenoi18n(
530 b'full revision size (min/max/avg) : %d / %d / %d\n'
531 % tuple(fullsize)
532 )
533 ui.writenoi18n(
534 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
535 % tuple(semisize)
536 )
537 for depth in sorted(snapsizedepth):
538 if depth == 0:
539 continue
540 ui.writenoi18n(
541 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
542 % ((depth,) + tuple(snapsizedepth[depth]))
543 )
544 ui.writenoi18n(
545 b'delta size (min/max/avg) : %d / %d / %d\n'
546 % tuple(deltasize)
547 )
548
549 if numdeltas > 0:
550 ui.write(b'\n')
551 fmt = pcfmtstr(numdeltas)
552 fmt2 = pcfmtstr(numdeltas, 4)
553 ui.writenoi18n(
554 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
555 )
556 if numprev > 0:
557 ui.writenoi18n(
558 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
559 )
560 ui.writenoi18n(
561 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
562 )
563 ui.writenoi18n(
564 b' other : ' + fmt2 % pcfmt(numoprev, numprev)
565 )
566 if gdelta:
567 ui.writenoi18n(
568 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
569 )
570 ui.writenoi18n(
571 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
572 )
573 ui.writenoi18n(
574 b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
575 )