comparison hgext/git/index.py @ 52626:42f00965e50b

git: track which commit's file changes have been indexed Since git and mercurial commit hashes are a function of their contents, we can skip indexing the changed files of a commit if we have already indexed it as it will never change. To accomplish this, we can add a bool to the changelog table to track whether or not we have indexed the files of each commit.
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Fri, 04 Oct 2024 10:51:26 -0400
parents 27a0bfe770eb
children 4dadaf300fe0
comparison
equal deleted inserted replaced
52625:27a0bfe770eb 52626:42f00965e50b
16 from . import gitutil 16 from . import gitutil
17 17
18 18
19 pygit2 = gitutil.get_pygit2() 19 pygit2 = gitutil.get_pygit2()
20 20
21 _CURRENT_SCHEMA_VERSION = 3 21 _CURRENT_SCHEMA_VERSION = 4
22 _SCHEMA = ( 22 _SCHEMA = (
23 """ 23 """
24 CREATE TABLE refs ( 24 CREATE TABLE refs (
25 -- node and name are unique together. There may be more than one name for 25 -- node and name are unique together. There may be more than one name for
26 -- a given node, and there may be no name at all for a given node (in the 26 -- a given node, and there may be no name at all for a given node (in the
46 CREATE TABLE changelog ( 46 CREATE TABLE changelog (
47 rev INTEGER NOT NULL PRIMARY KEY, 47 rev INTEGER NOT NULL PRIMARY KEY,
48 node TEXT NOT NULL, 48 node TEXT NOT NULL,
49 p1 TEXT, 49 p1 TEXT,
50 p2 TEXT, 50 p2 TEXT,
51 synthetic TEXT 51 synthetic TEXT,
52 changedfiles BOOLEAN
52 ); 53 );
53 54
54 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node); 55 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
55 CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node); 56 CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
56 57
217 (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode), 218 (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
218 ) 219 )
219 db.commit() 220 db.commit()
220 221
221 222
222 def _index_repo_commit(gitrepo, db, commit): 223 def _index_repo_commit(gitrepo, db, node, commit=False):
224 already_done = db.execute(
225 "SELECT changedfiles FROM changelog WHERE node=?", (node.id.hex,)
226 ).fetchone()[0]
227 if already_done:
228 return # This commit has already been indexed
229
230 commit = gitrepo[node]
223 files = {} 231 files = {}
224 # I *think* we only need to check p1 for changed files 232 # I *think* we only need to check p1 for changed files
225 # (and therefore linkrevs), because any node that would 233 # (and therefore linkrevs), because any node that would
226 # actually have this commit as a linkrev would be 234 # actually have this commit as a linkrev would be
227 # completely new in this rev. 235 # completely new in this rev.
244 'INSERT INTO changedfiles (' 252 'INSERT INTO changedfiles ('
245 'node, filename, filenode, p1node, p1filenode, p2node, ' 253 'node, filename, filenode, p1node, p1filenode, p2node, '
246 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', 254 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
247 (commit.id.hex, p, n, None, None, None, None), 255 (commit.id.hex, p, n, None, None, None, None),
248 ) 256 )
257 # Mark the commit as loaded
258 db.execute(
259 "UPDATE changelog SET changedfiles=TRUE WHERE node=?", (commit.id.hex,)
260 )
261 if commit:
262 db.commit()
249 263
250 264
251 def _index_repo( 265 def _index_repo(
252 gitrepo, 266 gitrepo,
253 db, 267 db,
317 p1 = commit.parents[0].id.hex 331 p1 = commit.parents[0].id.hex
318 if len(commit.parents) == 2: 332 if len(commit.parents) == 2:
319 p2 = commit.parents[1].id.hex 333 p2 = commit.parents[1].id.hex
320 pos += 1 334 pos += 1
321 db.execute( 335 db.execute(
322 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)', 336 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
323 (pos, commit.id.hex, p1, p2), 337 (pos, commit.id.hex, p1, p2),
324 ) 338 )
325 else: 339 else:
326 parents = list(commit.parents) 340 parents = list(commit.parents)
327 341
337 synth = commit.id.hex 351 synth = commit.id.hex
338 352
339 p2 = parents.pop(0).id.hex 353 p2 = parents.pop(0).id.hex
340 354
341 db.execute( 355 db.execute(
342 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)', 356 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
343 (pos, this, p1, p2, synth), 357 (pos, this, p1, p2, synth),
344 ) 358 )
345 359
346 p1 = this 360 p1 = this
347 361