Mercurial > public > mercurial-scm > hg
comparison hgext/git/index.py @ 52622:aa5844ade247
git: speed up possible head processing during indexing by ~100x
Benchmarking of 50 iterations of indexing (see below) shows that there is
essentially no difference for small repos (<1k commits), similarly medium
repos (~12k commits) see some benefit but other overheads completely
overwhelm it, but for large repos (~122k commits) the 80-100x speedup is
clearly visible to the user.
All of the numbers are in seconds and were measured with time.time() calls
placed in _index_repo(). The times exclude the time taken by changedfiles
processing.
Small repo (guilt, 553 commits, 1 head):
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0008781 0.0009274 0.0009800 0.0012285 0.0014637 0.0024107 (before)
0.0003092 0.0003281 0.0003519 0.0003777 0.0003927 0.0006843 (after)
Medium repo (hamlib, 12k commits, 53 heads):
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.04881 0.05135 0.07632 0.06672 0.08042 0.09415 (before)
0.004249 0.004420 0.004799 0.004809 0.005051 0.006416 (after)
Large repo (qemu, 122k commits, 50 heads):
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.274 4.595 4.832 6.578 8.397 9.721 (before)
0.05180 0.05643 0.05865 0.06130 0.06712 0.06872 (after)
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Wed, 02 Oct 2024 15:01:26 -0400 |
parents | f4733654f144 |
children | 4e2ea270ba6a |
comparison
equal
deleted
inserted
replaced
52621:ab4fb2d15bc9 | 52622:aa5844ade247 |
---|---|
16 from . import gitutil | 16 from . import gitutil |
17 | 17 |
18 | 18 |
19 pygit2 = gitutil.get_pygit2() | 19 pygit2 = gitutil.get_pygit2() |
20 | 20 |
21 _CURRENT_SCHEMA_VERSION = 1 | 21 _CURRENT_SCHEMA_VERSION = 2 |
22 _SCHEMA = ( | 22 _SCHEMA = ( |
23 """ | 23 """ |
24 CREATE TABLE refs ( | 24 CREATE TABLE refs ( |
25 -- node and name are unique together. There may be more than one name for | 25 -- node and name are unique together. There may be more than one name for |
26 -- a given node, and there may be no name at all for a given node (in the | 26 -- a given node, and there may be no name at all for a given node (in the |
32 -- The "possible heads" of the repository, which we use to figure out | 32 -- The "possible heads" of the repository, which we use to figure out |
33 -- if we need to re-walk the changelog. | 33 -- if we need to re-walk the changelog. |
34 CREATE TABLE possible_heads ( | 34 CREATE TABLE possible_heads ( |
35 node TEXT NOT NULL | 35 node TEXT NOT NULL |
36 ); | 36 ); |
37 | |
38 CREATE UNIQUE INDEX possible_heads_idx ON possible_heads(node); | |
37 | 39 |
38 -- The topological heads of the changelog, which hg depends on. | 40 -- The topological heads of the changelog, which hg depends on. |
39 CREATE TABLE heads ( | 41 CREATE TABLE heads ( |
40 node TEXT NOT NULL | 42 node TEXT NOT NULL |
41 ); | 43 ); |
329 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', | 331 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', |
330 (commit.id.hex, p, n, None, None, None, None), | 332 (commit.id.hex, p, n, None, None, None, None), |
331 ) | 333 ) |
332 db.execute('DELETE FROM heads') | 334 db.execute('DELETE FROM heads') |
333 db.execute('DELETE FROM possible_heads') | 335 db.execute('DELETE FROM possible_heads') |
334 for hid in possible_heads: | 336 db.executemany( |
335 h = hid.hex | 337 'INSERT INTO possible_heads (node) VALUES(?)', |
336 db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,)) | 338 [(hid.hex,) for hid in possible_heads], |
337 haschild = db.execute( | 339 ) |
338 'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h) | 340 db.execute( |
339 ).fetchone()[0] | 341 ''' |
340 if not haschild: | 342 INSERT INTO heads (node) |
341 db.execute('INSERT INTO heads (node) VALUES(?)', (h,)) | 343 SELECT node FROM possible_heads WHERE |
344 node NOT IN ( | |
345 SELECT DISTINCT possible_heads.node FROM changelog, possible_heads WHERE | |
346 changelog.p1 = possible_heads.node OR | |
347 changelog.p2 = possible_heads.node | |
348 ) | |
349 ''' | |
350 ) | |
342 | 351 |
343 db.commit() | 352 db.commit() |
344 if prog is not None: | 353 if prog is not None: |
345 prog.complete() | 354 prog.complete() |
346 | 355 |