diff hgext/git/index.py @ 52628:3865451a5fab

git: cache the number of commits to speed up large repo operations Instead of iterating over the whole changelog table every time we want to know how many commits there are, we can cache the number between mercurial invocations. Unsurprisingly, this speeds up certain operations on repos with large histories. The following measurements are all in seconds and they represent the runtime of `hg log -T ' ' -l1 > /dev/null`. In other words, this includes python startup overhead, etc. On small and medium repos, there is no observable difference in runtime (because of the relatively large overhead of python runtime startup, and the rest of mercurial doing useful work), but on large repos the user-visible execution time drops by a factor of 10x or more. small repo (~600 commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1052 0.1076 0.1096 0.1102 0.1110 0.1210 (before) 0.1049 0.1087 0.1106 0.1120 0.1127 0.1302 (after) medium repo (12k commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1063 0.1095 0.1116 0.1129 0.1153 0.1349 (before) 0.1044 0.1092 0.1108 0.1115 0.1130 0.1326 (after) large repo (1.4M commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 1.973 2.105 2.256 2.243 2.406 2.443 (before) 0.144 0.147 0.148 0.150 0.151 0.176 (after)
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Fri, 04 Oct 2024 10:25:24 -0400
parents 4dadaf300fe0
children 70b523d7a60d
line wrap: on
line diff
--- a/hgext/git/index.py	Fri Oct 04 10:51:44 2024 -0400
+++ b/hgext/git/index.py	Fri Oct 04 10:25:24 2024 -0400
@@ -18,7 +18,7 @@
 
 pygit2 = gitutil.get_pygit2()
 
-_CURRENT_SCHEMA_VERSION = 4
+_CURRENT_SCHEMA_VERSION = 5
 _SCHEMA = (
     """
 CREATE TABLE refs (
@@ -72,6 +72,12 @@
 CREATE INDEX changedfiles_nodes_idx
   ON changedfiles(node);
 
+-- Cached values to improve performance
+CREATE TABLE cache (
+  ncommits INTEGER
+);
+INSERT INTO cache (ncommits) VALUES (NULL);
+
 PRAGMA user_version=%d
 """
     % _CURRENT_SCHEMA_VERSION
@@ -398,6 +404,13 @@
             prog.update(pos)
         _index_repo_commit(gitrepo, db, h)
 
+    db.execute(
+        '''
+    UPDATE cache SET
+        ncommits = (SELECT COUNT(1) FROM changelog)
+    '''
+    )
+
     db.commit()
     if prog is not None:
         prog.complete()