changeset 52652:4dadaf300fe0

git: index changed files on-demand Instead of indexing the changed files for every commit immediately, we can index... 1. heads' changed files immediately 2. other commits' changed files on-demand This helps a lot on repositories with large histories since the initial mercurial invocation doesn't have to wait for the complete repo history to be indexed.
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Fri, 04 Oct 2024 10:51:44 -0400
parents 42f00965e50b
children 3865451a5fab
files hgext/git/gitlog.py hgext/git/index.py
diffstat 2 files changed, 26 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/git/gitlog.py	Fri Oct 04 10:51:26 2024 -0400
+++ b/hgext/git/gitlog.py	Fri Oct 04 10:51:44 2024 -0400
@@ -340,6 +340,7 @@
         n = self.synthetic(n)
         hn = gitutil.togitnode(n)
         # We've got a real commit!
+        index._index_repo_commit(self.gitrepo, self._db, hn, commit=True)
         files = [
             r[0]
             for r in self._db.execute(
--- a/hgext/git/index.py	Fri Oct 04 10:51:26 2024 -0400
+++ b/hgext/git/index.py	Fri Oct 04 10:51:44 2024 -0400
@@ -222,7 +222,7 @@
 
 def _index_repo_commit(gitrepo, db, node, commit=False):
     already_done = db.execute(
-        "SELECT changedfiles FROM changelog WHERE node=?", (node.id.hex,)
+        "SELECT changedfiles FROM changelog WHERE node=?", (node,)
     ).fetchone()[0]
     if already_done:
         return  # This commit has already been indexed
@@ -333,7 +333,7 @@
                 p2 = commit.parents[1].id.hex
             pos += 1
             db.execute(
-                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
+                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, FALSE)',
                 (pos, commit.id.hex, p1, p2),
             )
         else:
@@ -353,18 +353,12 @@
                 p2 = parents.pop(0).id.hex
 
                 db.execute(
-                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
+                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, FALSE)',
                     (pos, this, p1, p2, synth),
                 )
 
                 p1 = this
-
-        num_changedfiles = db.execute(
-            "SELECT COUNT(*) from changedfiles WHERE node = ?",
-            (commit.id.hex,),
-        ).fetchone()[0]
-        if not num_changedfiles:
-            _index_repo_commit(gitrepo, db, commit)
+    # Determine heads from the list of possible heads.
     db.execute('DELETE FROM heads')
     db.execute('DELETE FROM possible_heads')
     db.executemany(
@@ -382,6 +376,27 @@
             )
     '''
     )
+    # Mark all commits with already-loaded changefiles info
+    db.execute(
+        '''
+    UPDATE changelog SET changedfiles=TRUE WHERE node IN (
+        SELECT DISTINCT node FROM changedfiles
+    )
+    '''
+    )
+
+    if prog is not None:
+        prog.complete()
+
+    # Index the changed files for head commits
+    prog = progress_factory(b'indexing head files')
+    heads = [
+        row[0].decode('ascii') for row in db.execute("SELECT * FROM heads")
+    ]
+    for pos, h in enumerate(heads):
+        if prog is not None:
+            prog.update(pos)
+        _index_repo_commit(gitrepo, db, h)
 
     db.commit()
     if prog is not None: