Mercurial > public > mercurial-scm > hg
annotate hgext/git/index.py @ 52628:3865451a5fab
git: cache the number of commits to speed up large repo operations
Instead of iterating over the whole changelog table every time we want to
know how many commits there are, we can cache the number between mercurial
invocations.
Unsurprisingly, this speeds up certain operations on repos with large
histories.
The following measurements are all in seconds and they represent the runtime
of `hg log -T ' ' -l1 > /dev/null`. In other words, this includes python
startup overhead, etc. On small and medium repos, there is no observable
difference in runtime (because of the relatively large overhead of python
runtime startup, and the rest of mercurial doing useful work), but on large
repos the user-visible execution time drops by a factor of 10x or more.
small repo (~600 commits):
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1052 0.1076 0.1096 0.1102 0.1110 0.1210 (before)
0.1049 0.1087 0.1106 0.1120 0.1127 0.1302 (after)
medium repo (12k commits):
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1063 0.1095 0.1116 0.1129 0.1153 0.1349 (before)
0.1044 0.1092 0.1108 0.1115 0.1130 0.1326 (after)
large repo (1.4M commits):
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.973 2.105 2.256 2.243 2.406 2.443 (before)
0.144 0.147 0.148 0.150 0.151 0.176 (after)
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Fri, 04 Oct 2024 10:25:24 -0400 |
parents | 4dadaf300fe0 |
children | 70b523d7a60d |
rev | line source |
---|---|
51859
f4733654f144
typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
1 from __future__ import annotations |
f4733654f144
typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents:
48875
diff
changeset
|
2 |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
3 import collections |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
4 import os |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
5 import sqlite3 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
6 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
7 from mercurial.i18n import _ |
47012
d55b71393907
node: replace nullid and friends with nodeconstants class
Joerg Sonnenberger <joerg@bec.de>
parents:
46113
diff
changeset
|
8 from mercurial.node import sha1nodeconstants |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
9 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
10 from mercurial import ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
11 encoding, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
12 error, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
13 pycompat, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
14 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
15 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
16 from . import gitutil |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
17 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
18 |
44484
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
19 pygit2 = gitutil.get_pygit2() |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
20 |
52628
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
21 _CURRENT_SCHEMA_VERSION = 5 |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
22 _SCHEMA = ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
23 """ |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
24 CREATE TABLE refs ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
25 -- node and name are unique together. There may be more than one name for |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
26 -- a given node, and there may be no name at all for a given node (in the |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
27 -- case of an anonymous hg head). |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
28 node TEXT NOT NULL, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
29 name TEXT |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
30 ); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
31 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
32 -- The "possible heads" of the repository, which we use to figure out |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
33 -- if we need to re-walk the changelog. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
34 CREATE TABLE possible_heads ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
35 node TEXT NOT NULL |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
36 ); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
37 |
52622
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
38 CREATE UNIQUE INDEX possible_heads_idx ON possible_heads(node); |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
39 |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
40 -- The topological heads of the changelog, which hg depends on. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
41 CREATE TABLE heads ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
42 node TEXT NOT NULL |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
43 ); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
44 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
45 -- A total ordering of the changelog |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
46 CREATE TABLE changelog ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
47 rev INTEGER NOT NULL PRIMARY KEY, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
48 node TEXT NOT NULL, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
49 p1 TEXT, |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
50 p2 TEXT, |
52626
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
51 synthetic TEXT, |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
52 changedfiles BOOLEAN |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
53 ); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
54 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
55 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
56 CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
57 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
58 -- Changed files for each commit, which lets us dynamically build |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
59 -- filelogs. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
60 CREATE TABLE changedfiles ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
61 node TEXT NOT NULL, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
62 filename TEXT NOT NULL, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
63 -- 40 zeroes for deletions |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
64 filenode TEXT NOT NULL, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
65 -- to handle filelog parentage: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
66 p1node TEXT, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
67 p1filenode TEXT, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
68 p2node TEXT, |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
69 p2filenode TEXT |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
70 ); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
71 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
72 CREATE INDEX changedfiles_nodes_idx |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
73 ON changedfiles(node); |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
74 |
52628
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
75 -- Cached values to improve performance |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
76 CREATE TABLE cache ( |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
77 ncommits INTEGER |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
78 ); |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
79 INSERT INTO cache (ncommits) VALUES (NULL); |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
80 |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
81 PRAGMA user_version=%d |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
82 """ |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
83 % _CURRENT_SCHEMA_VERSION |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
84 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
85 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
86 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
87 def _createdb(path): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
88 # print('open db', path) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
89 # import traceback |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
90 # traceback.print_stack() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
91 db = sqlite3.connect(encoding.strfromlocal(path)) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
92 db.text_factory = bytes |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
93 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
94 res = db.execute('PRAGMA user_version').fetchone()[0] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
95 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
96 # New database. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
97 if res == 0: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
98 for statement in _SCHEMA.split(';'): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
99 db.execute(statement.strip()) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
100 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
101 db.commit() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
102 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
103 elif res == _CURRENT_SCHEMA_VERSION: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
104 pass |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
105 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
106 else: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
107 raise error.Abort(_(b'sqlite database has unrecognized version')) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
108 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
109 db.execute('PRAGMA journal_mode=WAL') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
110 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
111 return db |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
112 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
113 |
44484
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
114 _OUR_ORDER = () |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
115 if pygit2: |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
116 _OUR_ORDER = ( |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
117 pygit2.GIT_SORT_TOPOLOGICAL |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
118 | pygit2.GIT_SORT_TIME |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
119 | pygit2.GIT_SORT_REVERSE |
ec54b3d2af0b
git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents:
44477
diff
changeset
|
120 ) |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
121 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
122 _DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
123 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
124 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
125 def _find_nearest_ancestor_introducing_node( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
126 db, gitrepo, file_path, walk_start, filenode |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
127 ): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
128 """Find the nearest ancestor that introduces a file node. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
129 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
130 Args: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
131 db: a handle to our sqlite database. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
132 gitrepo: A pygit2.Repository instance. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
133 file_path: the path of a file in the repo |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
134 walk_start: a pygit2.Oid that is a commit where we should start walking |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
135 for our nearest ancestor. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
136 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
137 Returns: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
138 A hexlified SHA that is the commit ID of the next-nearest parent. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
139 """ |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
140 assert isinstance(file_path, str), 'file_path must be str, got %r' % type( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
141 file_path |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
142 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
143 assert isinstance(filenode, str), 'filenode must be str, got %r' % type( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
144 filenode |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
145 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
146 parent_options = { |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
147 row[0].decode('ascii') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
148 for row in db.execute( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
149 'SELECT node FROM changedfiles ' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
150 'WHERE filename = ? AND filenode = ?', |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
151 (file_path, filenode), |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
152 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
153 } |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
154 inner_walker = gitrepo.walk(walk_start, _OUR_ORDER) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
155 for w in inner_walker: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
156 if w.id.hex in parent_options: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
157 return w.id.hex |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
158 raise error.ProgrammingError( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
159 'Unable to find introducing commit for %s node %s from %s', |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
160 (file_path, filenode, walk_start), |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
161 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
162 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
163 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
164 def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
165 """Given a starting commit and path, fill in a filelog's parent pointers. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
166 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
167 Args: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
168 gitrepo: a pygit2.Repository |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
169 db: a handle to our sqlite database |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
170 startcommit: a hexlified node id for the commit to start at |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
171 path: the path of the file whose parent pointers we should fill in. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
172 filenode: the hexlified node id of the file at startcommit |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
173 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
174 TODO: make filenode optional |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
175 """ |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
176 assert isinstance( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
177 startcommit, str |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
178 ), 'startcommit must be str, got %r' % type(startcommit) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
179 assert isinstance( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
180 startfilenode, str |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
181 ), 'startfilenode must be str, got %r' % type(startfilenode) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
182 visit = collections.deque([(startcommit, startfilenode)]) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
183 while visit: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
184 cnode, filenode = visit.popleft() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
185 commit = gitrepo[cnode] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
186 parents = [] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
187 for parent in commit.parents: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
188 t = parent.tree |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
189 for comp in path.split('/'): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
190 try: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
191 t = gitrepo[t[comp].id] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
192 except KeyError: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
193 break |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
194 else: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
195 introducer = _find_nearest_ancestor_introducing_node( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
196 db, gitrepo, path, parent.id, t.id.hex |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
197 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
198 parents.append((introducer, t.id.hex)) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
199 p1node = p1fnode = p2node = p2fnode = gitutil.nullgit |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
200 for par, parfnode in parents: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
201 found = int( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
202 db.execute( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
203 'SELECT COUNT(*) FROM changedfiles WHERE ' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
204 'node = ? AND filename = ? AND filenode = ? AND ' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
205 'p1node NOT NULL', |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
206 (par, path, parfnode), |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
207 ).fetchone()[0] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
208 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
209 if found == 0: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
210 assert par is not None |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
211 visit.append((par, parfnode)) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
212 if parents: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
213 p1node, p1fnode = parents[0] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
214 if len(parents) == 2: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
215 p2node, p2fnode = parents[1] |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
216 if len(parents) > 2: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
217 raise error.ProgrammingError( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
218 b"git support can't handle octopus merges" |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
219 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
220 db.execute( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
221 'UPDATE changedfiles SET ' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
222 'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? ' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
223 'WHERE node = ? AND filename = ? AND filenode = ?', |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
224 (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode), |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
225 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
226 db.commit() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
227 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
228 |
52626
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
229 def _index_repo_commit(gitrepo, db, node, commit=False): |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
230 already_done = db.execute( |
52627
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
231 "SELECT changedfiles FROM changelog WHERE node=?", (node,) |
52626
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
232 ).fetchone()[0] |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
233 if already_done: |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
234 return # This commit has already been indexed |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
235 |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
236 commit = gitrepo[node] |
52623
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
237 files = {} |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
238 # I *think* we only need to check p1 for changed files |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
239 # (and therefore linkrevs), because any node that would |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
240 # actually have this commit as a linkrev would be |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
241 # completely new in this rev. |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
242 p1 = commit.parents[0].id.hex if commit.parents else None |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
243 if p1 is not None: |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
244 patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS) |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
245 else: |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
246 patchgen = commit.tree.diff_to_tree(swap=True, flags=_DIFF_FLAGS) |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
247 new_files = (p.delta.new_file for p in patchgen) |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
248 files = { |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
249 nf.path: nf.id.hex |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
250 for nf in new_files |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
251 if nf.id.raw != sha1nodeconstants.nullid |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
252 } |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
253 for p, n in files.items(): |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
254 # We intentionally set NULLs for any file parentage |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
255 # information so it'll get demand-computed later. We |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
256 # used to do it right here, and it was _very_ slow. |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
257 db.execute( |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
258 'INSERT INTO changedfiles (' |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
259 'node, filename, filenode, p1node, p1filenode, p2node, ' |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
260 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)', |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
261 (commit.id.hex, p, n, None, None, None, None), |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
262 ) |
52626
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
263 # Mark the commit as loaded |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
264 db.execute( |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
265 "UPDATE changelog SET changedfiles=TRUE WHERE node=?", (commit.id.hex,) |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
266 ) |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
267 if commit: |
42f00965e50b
git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52625
diff
changeset
|
268 db.commit() |
52623
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
269 |
4e2ea270ba6a
git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52622
diff
changeset
|
270 |
44951
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
271 def _index_repo( |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
272 gitrepo, |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
273 db, |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
274 logfn=lambda x: None, |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
275 progress_factory=lambda *args, **kwargs: None, |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
276 ): |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
277 # Identify all references so we can tell the walker to visit all of them. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
278 all_refs = gitrepo.listall_references() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
279 possible_heads = set() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
280 prog = progress_factory(b'refs') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
281 for pos, ref in enumerate(all_refs): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
282 if prog is not None: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
283 prog.update(pos) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
284 if not ( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
285 ref.startswith('refs/heads/') # local branch |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
286 or ref.startswith('refs/tags/') # tag |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
287 or ref.startswith('refs/remotes/') # remote branch |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
288 or ref.startswith('refs/hg/') # from this extension |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
289 ): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
290 continue |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
291 try: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
292 start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
293 except ValueError: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
294 # No commit to be found, so we don't care for hg's purposes. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
295 continue |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
296 possible_heads.add(start.id) |
52625
27a0bfe770eb
git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52624
diff
changeset
|
297 # Optimization: if the list of refs hasn't changed, don't |
27a0bfe770eb
git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52624
diff
changeset
|
298 # reindex the changelog. This doesn't matter on small |
27a0bfe770eb
git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52624
diff
changeset
|
299 # repositories, but on even moderately deep histories (e.g., cpython) |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
300 # this is a very important performance win. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
301 # |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
302 # TODO: we should figure out how to incrementally index history |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
303 # (preferably by detecting rewinds!) so that we don't have to do a |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
304 # full changelog walk every time a new commit is created. |
44946
fb2936c5f6dc
git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents:
44484
diff
changeset
|
305 cache_heads = { |
fb2936c5f6dc
git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents:
44484
diff
changeset
|
306 pycompat.sysstr(x[0]) |
fb2936c5f6dc
git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents:
44484
diff
changeset
|
307 for x in db.execute('SELECT node FROM possible_heads') |
fb2936c5f6dc
git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents:
44484
diff
changeset
|
308 } |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
309 walker = None |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
310 cur_cache_heads = {h.hex for h in possible_heads} |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
311 if cur_cache_heads == cache_heads: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
312 return |
44951
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
313 logfn(b'heads mismatch, rebuilding dagcache\n') |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
314 for start in possible_heads: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
315 if walker is None: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
316 walker = gitrepo.walk(start, _OUR_ORDER) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
317 else: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
318 walker.push(start) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
319 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
320 # Empty out the existing changelog. Even for large-ish histories |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
321 # we can do the top-level "walk all the commits" dance very |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
322 # quickly as long as we don't need to figure out the changed files |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
323 # list. |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
324 db.execute('DELETE FROM changelog') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
325 if prog is not None: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
326 prog.complete() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
327 prog = progress_factory(b'commits') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
328 # This walker is sure to visit all the revisions in history, but |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
329 # only once. |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
330 pos = -1 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
331 for commit in walker: |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
332 if prog is not None: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
333 prog.update(pos) |
47051
de26b9a7ec29
git: consistently use str for parents when rebuilding the index database
Matt Harbison <matt_harbison@yahoo.com>
parents:
46113
diff
changeset
|
334 p1 = p2 = gitutil.nullgit |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
335 if len(commit.parents) <= 2: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
336 if commit.parents: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
337 p1 = commit.parents[0].id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
338 if len(commit.parents) == 2: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
339 p2 = commit.parents[1].id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
340 pos += 1 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
341 db.execute( |
52627
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
342 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, FALSE)', |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
343 (pos, commit.id.hex, p1, p2), |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
344 ) |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
345 else: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
346 parents = list(commit.parents) |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
347 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
348 p1 = parents.pop(0).id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
349 while parents: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
350 pos += 1 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
351 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
352 if len(parents) == 1: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
353 this = commit.id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
354 synth = None |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
355 else: |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
356 this = "%040x" % pos |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
357 synth = commit.id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
358 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
359 p2 = parents.pop(0).id.hex |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
360 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
361 db.execute( |
52627
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
362 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, FALSE)', |
52624
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
363 (pos, this, p1, p2, synth), |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
364 ) |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
365 |
cdbfe5e7592e
git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52623
diff
changeset
|
366 p1 = this |
52627
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
367 # Determine heads from the list of possible heads. |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
368 db.execute('DELETE FROM heads') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
369 db.execute('DELETE FROM possible_heads') |
52622
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
370 db.executemany( |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
371 'INSERT INTO possible_heads (node) VALUES(?)', |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
372 [(hid.hex,) for hid in possible_heads], |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
373 ) |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
374 db.execute( |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
375 ''' |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
376 INSERT INTO heads (node) |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
377 SELECT node FROM possible_heads WHERE |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
378 node NOT IN ( |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
379 SELECT DISTINCT possible_heads.node FROM changelog, possible_heads WHERE |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
380 changelog.p1 = possible_heads.node OR |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
381 changelog.p2 = possible_heads.node |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
382 ) |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
383 ''' |
aa5844ade247
git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
51859
diff
changeset
|
384 ) |
52627
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
385 # Mark all commits with already-loaded changefiles info |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
386 db.execute( |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
387 ''' |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
388 UPDATE changelog SET changedfiles=TRUE WHERE node IN ( |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
389 SELECT DISTINCT node FROM changedfiles |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
390 ) |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
391 ''' |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
392 ) |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
393 |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
394 if prog is not None: |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
395 prog.complete() |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
396 |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
397 # Index the changed files for head commits |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
398 prog = progress_factory(b'indexing head files') |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
399 heads = [ |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
400 row[0].decode('ascii') for row in db.execute("SELECT * FROM heads") |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
401 ] |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
402 for pos, h in enumerate(heads): |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
403 if prog is not None: |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
404 prog.update(pos) |
4dadaf300fe0
git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52626
diff
changeset
|
405 _index_repo_commit(gitrepo, db, h) |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
406 |
52628
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
407 db.execute( |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
408 ''' |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
409 UPDATE cache SET |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
410 ncommits = (SELECT COUNT(1) FROM changelog) |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
411 ''' |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
412 ) |
3865451a5fab
git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents:
52627
diff
changeset
|
413 |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
414 db.commit() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
415 if prog is not None: |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
416 prog.complete() |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
417 |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
418 |
44951
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
419 def get_index( |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
420 gitrepo, logfn=lambda x: None, progress_factory=lambda *args, **kwargs: None |
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
421 ): |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
422 cachepath = os.path.join( |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
423 pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache' |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
424 ) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
425 if not os.path.exists(cachepath): |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
426 os.makedirs(cachepath) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
427 dbpath = os.path.join(cachepath, b'git-commits.sqlite') |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
428 db = _createdb(dbpath) |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
429 # TODO check against gitrepo heads before doing a full index |
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
430 # TODO thread a ui.progress call into this layer |
44951
83e41b73d115
git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents:
44946
diff
changeset
|
431 _index_repo(gitrepo, db, logfn, progress_factory) |
44477
ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
432 return db |