Mercurial > public > mercurial-scm > hg
comparison hgext/git/index.py @ 52626:42f00965e50b
git: track which commit's file changes have been indexed
Since git and mercurial commit hashes are a function of their contents, we
can skip indexing the changed files of a commit if we have already indexed
it as it will never change.
To accomplish this, we can add a bool to the changelog table to track
whether or not we have indexed the files of each commit.
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Fri, 04 Oct 2024 10:51:26 -0400 |
parents | 27a0bfe770eb |
children | 4dadaf300fe0 |
comparison
equal
deleted
inserted
replaced
52625:27a0bfe770eb | 52626:42f00965e50b |
---|---|
16 from . import gitutil | 16 from . import gitutil |
17 | 17 |
18 | 18 |
19 pygit2 = gitutil.get_pygit2() | 19 pygit2 = gitutil.get_pygit2() |
20 | 20 |
21 _CURRENT_SCHEMA_VERSION = 3 | 21 _CURRENT_SCHEMA_VERSION = 4 |
22 _SCHEMA = ( | 22 _SCHEMA = ( |
23 """ | 23 """ |
24 CREATE TABLE refs ( | 24 CREATE TABLE refs ( |
25 -- node and name are unique together. There may be more than one name for | 25 -- node and name are unique together. There may be more than one name for |
26 -- a given node, and there may be no name at all for a given node (in the | 26 -- a given node, and there may be no name at all for a given node (in the |
46 CREATE TABLE changelog ( | 46 CREATE TABLE changelog ( |
47 rev INTEGER NOT NULL PRIMARY KEY, | 47 rev INTEGER NOT NULL PRIMARY KEY, |
48 node TEXT NOT NULL, | 48 node TEXT NOT NULL, |
49 p1 TEXT, | 49 p1 TEXT, |
50 p2 TEXT, | 50 p2 TEXT, |
51 synthetic TEXT | 51 synthetic TEXT, |
52 changedfiles BOOLEAN | |
52 ); | 53 ); |
53 | 54 |
54 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node); | 55 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node); |
55 CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node); | 56 CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node); |
56 | 57 |
217 (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode), | 218 (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode), |
218 ) | 219 ) |
219 db.commit() | 220 db.commit() |
220 | 221 |
221 | 222 |
222 def _index_repo_commit(gitrepo, db, commit): | 223 def _index_repo_commit(gitrepo, db, node, commit=False): |
224 already_done = db.execute( | |
225 "SELECT changedfiles FROM changelog WHERE node=?", (node.id.hex,) | |
226 ).fetchone()[0] | |
227 if already_done: | |
228 return # This commit has already been indexed | |
229 | |
230 commit = gitrepo[node] | |
223 files = {} | 231 files = {} |
224 # I *think* we only need to check p1 for changed files | 232 # I *think* we only need to check p1 for changed files |
225 # (and therefore linkrevs), because any node that would | 233 # (and therefore linkrevs), because any node that would |
226 # actually have this commit as a linkrev would be | 234 # actually have this commit as a linkrev would be |
227 # completely new in this rev. | 235 # completely new in this rev. |
244 'INSERT INTO changedfiles (' | 252 'INSERT INTO changedfiles (' |
245 'node, filename, filenode, p1node, p1filenode, p2node, ' | 253 'node, filename, filenode, p1node, p1filenode, p2node, ' |
246 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', | 254 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', |
247 (commit.id.hex, p, n, None, None, None, None), | 255 (commit.id.hex, p, n, None, None, None, None), |
248 ) | 256 ) |
257 # Mark the commit as loaded | |
258 db.execute( | |
259 "UPDATE changelog SET changedfiles=TRUE WHERE node=?", (commit.id.hex,) | |
260 ) | |
261 if commit: | |
262 db.commit() | |
249 | 263 |
250 | 264 |
251 def _index_repo( | 265 def _index_repo( |
252 gitrepo, | 266 gitrepo, |
253 db, | 267 db, |
317 p1 = commit.parents[0].id.hex | 331 p1 = commit.parents[0].id.hex |
318 if len(commit.parents) == 2: | 332 if len(commit.parents) == 2: |
319 p2 = commit.parents[1].id.hex | 333 p2 = commit.parents[1].id.hex |
320 pos += 1 | 334 pos += 1 |
321 db.execute( | 335 db.execute( |
322 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)', | 336 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)', |
323 (pos, commit.id.hex, p1, p2), | 337 (pos, commit.id.hex, p1, p2), |
324 ) | 338 ) |
325 else: | 339 else: |
326 parents = list(commit.parents) | 340 parents = list(commit.parents) |
327 | 341 |
337 synth = commit.id.hex | 351 synth = commit.id.hex |
338 | 352 |
339 p2 = parents.pop(0).id.hex | 353 p2 = parents.pop(0).id.hex |
340 | 354 |
341 db.execute( | 355 db.execute( |
342 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)', | 356 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)', |
343 (pos, this, p1, p2, synth), | 357 (pos, this, p1, p2, synth), |
344 ) | 358 ) |
345 | 359 |
346 p1 = this | 360 p1 = this |
347 | 361 |