hgext/git/index.py
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Fri, 04 Oct 2024 10:25:24 -0400
changeset 52628 3865451a5fab
parent 52627 4dadaf300fe0
child 52630 70b523d7a60d
permissions -rw-r--r--
git: cache the number of commits to speed up large repo operations Instead of iterating over the whole changelog table every time we want to know how many commits there are, we can cache the number between mercurial invocations. Unsurprisingly, this speeds up certain operations on repos with large histories. The following measurements are all in seconds and they represent the runtime of `hg log -T ' ' -l1 > /dev/null`. In other words, this includes python startup overhead, etc. On small and medium repos, there is no observable difference in runtime (because of the relatively large overhead of python runtime startup, and the rest of mercurial doing useful work), but on large repos the user-visible execution time drops by a factor of 10x or more. small repo (~600 commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1052 0.1076 0.1096 0.1102 0.1110 0.1210 (before) 0.1049 0.1087 0.1106 0.1120 0.1127 0.1302 (after) medium repo (12k commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 0.1063 0.1095 0.1116 0.1129 0.1153 0.1349 (before) 0.1044 0.1092 0.1108 0.1115 0.1130 0.1326 (after) large repo (1.4M commits): Min. 1st Qu. Median Mean 3rd Qu. Max. 1.973 2.105 2.256 2.243 2.406 2.443 (before) 0.144 0.147 0.148 0.150 0.151 0.176 (after)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
51859
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
     1
from __future__ import annotations
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
     2
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     3
import collections
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     4
import os
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     5
import sqlite3
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     6
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     7
from mercurial.i18n import _
47012
d55b71393907 node: replace nullid and friends with nodeconstants class
Joerg Sonnenberger <joerg@bec.de>
parents: 46113
diff changeset
     8
from mercurial.node import sha1nodeconstants
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     9
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    10
from mercurial import (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    11
    encoding,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    12
    error,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    13
    pycompat,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    14
)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    15
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    16
from . import gitutil
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    17
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    18
44484
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
    19
pygit2 = gitutil.get_pygit2()
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
    20
52628
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    21
_CURRENT_SCHEMA_VERSION = 5
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    22
_SCHEMA = (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    23
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    24
CREATE TABLE refs (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    25
  -- node and name are unique together. There may be more than one name for
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    26
  -- a given node, and there may be no name at all for a given node (in the
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    27
  -- case of an anonymous hg head).
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    28
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    29
  name TEXT
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    30
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    31
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    32
-- The "possible heads" of the repository, which we use to figure out
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    33
-- if we need to re-walk the changelog.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    34
CREATE TABLE possible_heads (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    35
  node TEXT NOT NULL
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    36
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    37
52622
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
    38
CREATE UNIQUE INDEX possible_heads_idx ON possible_heads(node);
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
    39
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    40
-- The topological heads of the changelog, which hg depends on.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    41
CREATE TABLE heads (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    42
  node TEXT NOT NULL
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    43
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    44
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    45
-- A total ordering of the changelog
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    46
CREATE TABLE changelog (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    47
  rev INTEGER NOT NULL PRIMARY KEY,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    48
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    49
  p1 TEXT,
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
    50
  p2 TEXT,
52626
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
    51
  synthetic TEXT,
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
    52
  changedfiles BOOLEAN
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    53
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    54
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    55
CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    56
CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    57
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    58
-- Changed files for each commit, which lets us dynamically build
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    59
-- filelogs.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    60
CREATE TABLE changedfiles (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    61
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    62
  filename TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    63
  -- 40 zeroes for deletions
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    64
  filenode TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    65
-- to handle filelog parentage:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    66
  p1node TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    67
  p1filenode TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    68
  p2node TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    69
  p2filenode TEXT
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    70
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    71
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    72
CREATE INDEX changedfiles_nodes_idx
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    73
  ON changedfiles(node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    74
52628
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    75
-- Cached values to improve performance
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    76
CREATE TABLE cache (
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    77
  ncommits INTEGER
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    78
);
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    79
INSERT INTO cache (ncommits) VALUES (NULL);
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
    80
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    81
PRAGMA user_version=%d
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    82
"""
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    83
    % _CURRENT_SCHEMA_VERSION
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    84
)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    85
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    86
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    87
def _createdb(path):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    88
    # print('open db', path)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    89
    # import traceback
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    90
    # traceback.print_stack()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    91
    db = sqlite3.connect(encoding.strfromlocal(path))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    92
    db.text_factory = bytes
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    93
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    94
    res = db.execute('PRAGMA user_version').fetchone()[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    95
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    96
    # New database.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    97
    if res == 0:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    98
        for statement in _SCHEMA.split(';'):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    99
            db.execute(statement.strip())
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   100
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   101
        db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   102
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   103
    elif res == _CURRENT_SCHEMA_VERSION:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   104
        pass
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   105
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   106
    else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   107
        raise error.Abort(_(b'sqlite database has unrecognized version'))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   108
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   109
    db.execute('PRAGMA journal_mode=WAL')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   110
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   111
    return db
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   112
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   113
44484
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   114
_OUR_ORDER = ()
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   115
if pygit2:
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   116
    _OUR_ORDER = (
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   117
        pygit2.GIT_SORT_TOPOLOGICAL
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   118
        | pygit2.GIT_SORT_TIME
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   119
        | pygit2.GIT_SORT_REVERSE
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   120
    )
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   121
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   122
_DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   123
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   124
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   125
def _find_nearest_ancestor_introducing_node(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   126
    db, gitrepo, file_path, walk_start, filenode
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   127
):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   128
    """Find the nearest ancestor that introduces a file node.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   129
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   130
    Args:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   131
      db: a handle to our sqlite database.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   132
      gitrepo: A pygit2.Repository instance.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   133
      file_path: the path of a file in the repo
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   134
      walk_start: a pygit2.Oid that is a commit where we should start walking
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   135
                  for our nearest ancestor.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   136
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   137
    Returns:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   138
      A hexlified SHA that is the commit ID of the next-nearest parent.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   139
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   140
    assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   141
        file_path
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   142
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   143
    assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   144
        filenode
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   145
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   146
    parent_options = {
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   147
        row[0].decode('ascii')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   148
        for row in db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   149
            'SELECT node FROM changedfiles '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   150
            'WHERE filename = ? AND filenode = ?',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   151
            (file_path, filenode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   152
        )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   153
    }
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   154
    inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   155
    for w in inner_walker:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   156
        if w.id.hex in parent_options:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   157
            return w.id.hex
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   158
    raise error.ProgrammingError(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   159
        'Unable to find introducing commit for %s node %s from %s',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   160
        (file_path, filenode, walk_start),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   161
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   162
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   163
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   164
def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   165
    """Given a starting commit and path, fill in a filelog's parent pointers.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   166
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   167
    Args:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   168
      gitrepo: a pygit2.Repository
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   169
      db: a handle to our sqlite database
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   170
      startcommit: a hexlified node id for the commit to start at
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   171
      path: the path of the file whose parent pointers we should fill in.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   172
      filenode: the hexlified node id of the file at startcommit
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   173
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   174
    TODO: make filenode optional
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   175
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   176
    assert isinstance(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   177
        startcommit, str
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   178
    ), 'startcommit must be str, got %r' % type(startcommit)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   179
    assert isinstance(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   180
        startfilenode, str
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   181
    ), 'startfilenode must be str, got %r' % type(startfilenode)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   182
    visit = collections.deque([(startcommit, startfilenode)])
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   183
    while visit:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   184
        cnode, filenode = visit.popleft()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   185
        commit = gitrepo[cnode]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   186
        parents = []
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   187
        for parent in commit.parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   188
            t = parent.tree
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   189
            for comp in path.split('/'):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   190
                try:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   191
                    t = gitrepo[t[comp].id]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   192
                except KeyError:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   193
                    break
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   194
            else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   195
                introducer = _find_nearest_ancestor_introducing_node(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   196
                    db, gitrepo, path, parent.id, t.id.hex
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   197
                )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   198
                parents.append((introducer, t.id.hex))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   199
        p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   200
        for par, parfnode in parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   201
            found = int(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   202
                db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   203
                    'SELECT COUNT(*) FROM changedfiles WHERE '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   204
                    'node = ? AND filename = ? AND filenode = ? AND '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   205
                    'p1node NOT NULL',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   206
                    (par, path, parfnode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   207
                ).fetchone()[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   208
            )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   209
            if found == 0:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   210
                assert par is not None
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   211
                visit.append((par, parfnode))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   212
        if parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   213
            p1node, p1fnode = parents[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   214
        if len(parents) == 2:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   215
            p2node, p2fnode = parents[1]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   216
        if len(parents) > 2:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   217
            raise error.ProgrammingError(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   218
                b"git support can't handle octopus merges"
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   219
            )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   220
        db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   221
            'UPDATE changedfiles SET '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   222
            'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   223
            'WHERE node = ? AND filename = ? AND filenode = ?',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   224
            (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   225
        )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   226
    db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   227
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   228
52626
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   229
def _index_repo_commit(gitrepo, db, node, commit=False):
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   230
    already_done = db.execute(
52627
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   231
        "SELECT changedfiles FROM changelog WHERE node=?", (node,)
52626
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   232
    ).fetchone()[0]
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   233
    if already_done:
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   234
        return  # This commit has already been indexed
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   235
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   236
    commit = gitrepo[node]
52623
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   237
    files = {}
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   238
    # I *think* we only need to check p1 for changed files
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   239
    # (and therefore linkrevs), because any node that would
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   240
    # actually have this commit as a linkrev would be
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   241
    # completely new in this rev.
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   242
    p1 = commit.parents[0].id.hex if commit.parents else None
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   243
    if p1 is not None:
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   244
        patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   245
    else:
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   246
        patchgen = commit.tree.diff_to_tree(swap=True, flags=_DIFF_FLAGS)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   247
    new_files = (p.delta.new_file for p in patchgen)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   248
    files = {
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   249
        nf.path: nf.id.hex
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   250
        for nf in new_files
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   251
        if nf.id.raw != sha1nodeconstants.nullid
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   252
    }
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   253
    for p, n in files.items():
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   254
        # We intentionally set NULLs for any file parentage
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   255
        # information so it'll get demand-computed later. We
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   256
        # used to do it right here, and it was _very_ slow.
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   257
        db.execute(
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   258
            'INSERT INTO changedfiles ('
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   259
            'node, filename, filenode, p1node, p1filenode, p2node, '
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   260
            'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   261
            (commit.id.hex, p, n, None, None, None, None),
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   262
        )
52626
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   263
    # Mark the commit as loaded
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   264
    db.execute(
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   265
        "UPDATE changelog SET changedfiles=TRUE WHERE node=?", (commit.id.hex,)
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   266
    )
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   267
    if commit:
42f00965e50b git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52625
diff changeset
   268
        db.commit()
52623
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   269
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   270
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   271
def _index_repo(
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   272
    gitrepo,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   273
    db,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   274
    logfn=lambda x: None,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   275
    progress_factory=lambda *args, **kwargs: None,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   276
):
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   277
    # Identify all references so we can tell the walker to visit all of them.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   278
    all_refs = gitrepo.listall_references()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   279
    possible_heads = set()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   280
    prog = progress_factory(b'refs')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   281
    for pos, ref in enumerate(all_refs):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   282
        if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   283
            prog.update(pos)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   284
        if not (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   285
            ref.startswith('refs/heads/')  # local branch
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   286
            or ref.startswith('refs/tags/')  # tag
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   287
            or ref.startswith('refs/remotes/')  # remote branch
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   288
            or ref.startswith('refs/hg/')  # from this extension
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   289
        ):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   290
            continue
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   291
        try:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   292
            start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   293
        except ValueError:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   294
            # No commit to be found, so we don't care for hg's purposes.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   295
            continue
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   296
        possible_heads.add(start.id)
52625
27a0bfe770eb git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52624
diff changeset
   297
    # Optimization: if the list of refs hasn't changed, don't
27a0bfe770eb git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52624
diff changeset
   298
    # reindex the changelog. This doesn't matter on small
27a0bfe770eb git: minor wording tweak to a comment
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52624
diff changeset
   299
    # repositories, but on even moderately deep histories (e.g., cpython)
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   300
    # this is a very important performance win.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   301
    #
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   302
    # TODO: we should figure out how to incrementally index history
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   303
    # (preferably by detecting rewinds!) so that we don't have to do a
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   304
    # full changelog walk every time a new commit is created.
44946
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   305
    cache_heads = {
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   306
        pycompat.sysstr(x[0])
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   307
        for x in db.execute('SELECT node FROM possible_heads')
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   308
    }
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   309
    walker = None
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   310
    cur_cache_heads = {h.hex for h in possible_heads}
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   311
    if cur_cache_heads == cache_heads:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   312
        return
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   313
    logfn(b'heads mismatch, rebuilding dagcache\n')
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   314
    for start in possible_heads:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   315
        if walker is None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   316
            walker = gitrepo.walk(start, _OUR_ORDER)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   317
        else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   318
            walker.push(start)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   319
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   320
    # Empty out the existing changelog. Even for large-ish histories
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   321
    # we can do the top-level "walk all the commits" dance very
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   322
    # quickly as long as we don't need to figure out the changed files
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   323
    # list.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   324
    db.execute('DELETE FROM changelog')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   325
    if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   326
        prog.complete()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   327
    prog = progress_factory(b'commits')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   328
    # This walker is sure to visit all the revisions in history, but
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   329
    # only once.
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   330
    pos = -1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   331
    for commit in walker:
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   332
        if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   333
            prog.update(pos)
47051
de26b9a7ec29 git: consistently use str for parents when rebuilding the index database
Matt Harbison <matt_harbison@yahoo.com>
parents: 46113
diff changeset
   334
        p1 = p2 = gitutil.nullgit
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   335
        if len(commit.parents) <= 2:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   336
            if commit.parents:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   337
                p1 = commit.parents[0].id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   338
            if len(commit.parents) == 2:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   339
                p2 = commit.parents[1].id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   340
            pos += 1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   341
            db.execute(
52627
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   342
                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, FALSE)',
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   343
                (pos, commit.id.hex, p1, p2),
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   344
            )
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   345
        else:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   346
            parents = list(commit.parents)
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   347
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   348
            p1 = parents.pop(0).id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   349
            while parents:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   350
                pos += 1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   351
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   352
                if len(parents) == 1:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   353
                    this = commit.id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   354
                    synth = None
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   355
                else:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   356
                    this = "%040x" % pos
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   357
                    synth = commit.id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   358
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   359
                p2 = parents.pop(0).id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   360
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   361
                db.execute(
52627
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   362
                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, FALSE)',
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   363
                    (pos, this, p1, p2, synth),
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   364
                )
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   365
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   366
                p1 = this
52627
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   367
    # Determine heads from the list of possible heads.
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   368
    db.execute('DELETE FROM heads')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   369
    db.execute('DELETE FROM possible_heads')
52622
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   370
    db.executemany(
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   371
        'INSERT INTO possible_heads (node) VALUES(?)',
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   372
        [(hid.hex,) for hid in possible_heads],
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   373
    )
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   374
    db.execute(
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   375
        '''
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   376
    INSERT INTO heads (node)
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   377
        SELECT node FROM possible_heads WHERE
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   378
            node NOT IN (
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   379
                SELECT DISTINCT possible_heads.node FROM changelog, possible_heads WHERE
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   380
                    changelog.p1 = possible_heads.node OR
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   381
                    changelog.p2 = possible_heads.node
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   382
            )
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   383
    '''
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   384
    )
52627
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   385
    # Mark all commits with already-loaded changefiles info
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   386
    db.execute(
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   387
        '''
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   388
    UPDATE changelog SET changedfiles=TRUE WHERE node IN (
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   389
        SELECT DISTINCT node FROM changedfiles
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   390
    )
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   391
    '''
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   392
    )
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   393
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   394
    if prog is not None:
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   395
        prog.complete()
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   396
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   397
    # Index the changed files for head commits
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   398
    prog = progress_factory(b'indexing head files')
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   399
    heads = [
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   400
        row[0].decode('ascii') for row in db.execute("SELECT * FROM heads")
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   401
    ]
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   402
    for pos, h in enumerate(heads):
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   403
        if prog is not None:
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   404
            prog.update(pos)
4dadaf300fe0 git: index changed files on-demand
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52626
diff changeset
   405
        _index_repo_commit(gitrepo, db, h)
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   406
52628
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   407
    db.execute(
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   408
        '''
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   409
    UPDATE cache SET
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   410
        ncommits = (SELECT COUNT(1) FROM changelog)
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   411
    '''
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   412
    )
3865451a5fab git: cache the number of commits to speed up large repo operations
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52627
diff changeset
   413
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   414
    db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   415
    if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   416
        prog.complete()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   417
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   418
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   419
def get_index(
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   420
    gitrepo, logfn=lambda x: None, progress_factory=lambda *args, **kwargs: None
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   421
):
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   422
    cachepath = os.path.join(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   423
        pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   424
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   425
    if not os.path.exists(cachepath):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   426
        os.makedirs(cachepath)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   427
    dbpath = os.path.join(cachepath, b'git-commits.sqlite')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   428
    db = _createdb(dbpath)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   429
    # TODO check against gitrepo heads before doing a full index
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   430
    # TODO thread a ui.progress call into this layer
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   431
    _index_repo(gitrepo, db, logfn, progress_factory)
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   432
    return db