hgext/git/index.py
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Sun, 10 Mar 2024 14:30:32 -0400
changeset 52624 cdbfe5e7592e
parent 52623 4e2ea270ba6a
child 52625 27a0bfe770eb
permissions -rw-r--r--
git: handle octopus merges Octopus merges in git are merge commits with more than 2 parents. To make them fit into mercurial core's assumption about commits having 0-2 parents, the git indexing code creates "sythetic" commits to represent the octopus commit as a sequence of regular 2-parent commits. The synthetic commit hashes are just an incrementing commit number (which is the same as the generated rev number). The last commit in the sequence of commits uses the actual git commit hash. As a result, `hg checkout -r <commit>` produces the same working directory as `git checkout <commit>` for all git commit hashes. The synthetic commit hashes are stored in the changelog table as any other commit - with the two parents - but they also contain the commit hash of the octopus merge commit. For example, given the git DAG (manually pruned `git log --graph`): *-. commit 23480d86e2689703b33f693907c40fbe6e1620e4 Merge branches... |\ \ | | | | | * commit 2eda9984b06c75448598ec6c0a9028e49dacf616 C | | | | * | commit 5e634a12f12fedaf7b8ef0f0fcdbb07222871953 B | |/ | | * | commit 8883a1296c5ae323a1b18d1f6410398ce43ebd3a D |/ | * commit 95f241588fded9554cae91be0fefd576f61ebfc6 A Where M is the octopus merge commit with 3 parents, the corresponding mercurial DAG is: $ hg log -G -T '{node} {desc}' @ 23480d86e2689703b33f693907c40fbe6e1620e4 Merge branches 'abc' and 'def' |\ | o 0000000000000000000000000000000000000004 Merge branches 'abc' and 'def' | |\ | | o 8883a1296c5ae323a1b18d1f6410398ce43ebd3a D | | | o---+ 2eda9984b06c75448598ec6c0a9028e49dacf616 C / / o / 5e634a12f12fedaf7b8ef0f0fcdbb07222871953 B |/ o 95f241588fded9554cae91be0fefd576f61ebfc6 A
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
51859
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
     1
from __future__ import annotations
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
     2
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     3
import collections
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     4
import os
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     5
import sqlite3
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     6
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     7
from mercurial.i18n import _
47012
d55b71393907 node: replace nullid and friends with nodeconstants class
Joerg Sonnenberger <joerg@bec.de>
parents: 46113
diff changeset
     8
from mercurial.node import sha1nodeconstants
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
     9
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    10
from mercurial import (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    11
    encoding,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    12
    error,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    13
    pycompat,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    14
)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    15
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    16
from . import gitutil
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    17
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    18
44484
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
    19
pygit2 = gitutil.get_pygit2()
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
    20
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
    21
_CURRENT_SCHEMA_VERSION = 3
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    22
_SCHEMA = (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    23
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    24
CREATE TABLE refs (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    25
  -- node and name are unique together. There may be more than one name for
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    26
  -- a given node, and there may be no name at all for a given node (in the
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    27
  -- case of an anonymous hg head).
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    28
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    29
  name TEXT
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    30
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    31
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    32
-- The "possible heads" of the repository, which we use to figure out
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    33
-- if we need to re-walk the changelog.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    34
CREATE TABLE possible_heads (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    35
  node TEXT NOT NULL
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    36
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    37
52622
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
    38
CREATE UNIQUE INDEX possible_heads_idx ON possible_heads(node);
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
    39
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    40
-- The topological heads of the changelog, which hg depends on.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    41
CREATE TABLE heads (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    42
  node TEXT NOT NULL
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    43
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    44
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    45
-- A total ordering of the changelog
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    46
CREATE TABLE changelog (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    47
  rev INTEGER NOT NULL PRIMARY KEY,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    48
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    49
  p1 TEXT,
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
    50
  p2 TEXT,
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
    51
  synthetic TEXT
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    52
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    53
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    54
CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    55
CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    56
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    57
-- Changed files for each commit, which lets us dynamically build
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    58
-- filelogs.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    59
CREATE TABLE changedfiles (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    60
  node TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    61
  filename TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    62
  -- 40 zeroes for deletions
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    63
  filenode TEXT NOT NULL,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    64
-- to handle filelog parentage:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    65
  p1node TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    66
  p1filenode TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    67
  p2node TEXT,
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    68
  p2filenode TEXT
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    69
);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    70
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    71
CREATE INDEX changedfiles_nodes_idx
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    72
  ON changedfiles(node);
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    73
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    74
PRAGMA user_version=%d
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    75
"""
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    76
    % _CURRENT_SCHEMA_VERSION
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    77
)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    78
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    79
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    80
def _createdb(path):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    81
    # print('open db', path)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    82
    # import traceback
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    83
    # traceback.print_stack()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    84
    db = sqlite3.connect(encoding.strfromlocal(path))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    85
    db.text_factory = bytes
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    86
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    87
    res = db.execute('PRAGMA user_version').fetchone()[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    88
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    89
    # New database.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    90
    if res == 0:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    91
        for statement in _SCHEMA.split(';'):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    92
            db.execute(statement.strip())
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    93
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    94
        db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    95
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    96
    elif res == _CURRENT_SCHEMA_VERSION:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    97
        pass
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    98
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
    99
    else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   100
        raise error.Abort(_(b'sqlite database has unrecognized version'))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   101
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   102
    db.execute('PRAGMA journal_mode=WAL')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   103
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   104
    return db
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   105
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   106
44484
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   107
_OUR_ORDER = ()
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   108
if pygit2:
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   109
    _OUR_ORDER = (
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   110
        pygit2.GIT_SORT_TOPOLOGICAL
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   111
        | pygit2.GIT_SORT_TIME
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   112
        | pygit2.GIT_SORT_REVERSE
ec54b3d2af0b git: don't fail import when pygit2 is not install
Martin von Zweigbergk <martinvonz@google.com>
parents: 44477
diff changeset
   113
    )
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   114
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   115
_DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   116
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   117
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   118
def _find_nearest_ancestor_introducing_node(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   119
    db, gitrepo, file_path, walk_start, filenode
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   120
):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   121
    """Find the nearest ancestor that introduces a file node.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   122
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   123
    Args:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   124
      db: a handle to our sqlite database.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   125
      gitrepo: A pygit2.Repository instance.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   126
      file_path: the path of a file in the repo
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   127
      walk_start: a pygit2.Oid that is a commit where we should start walking
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   128
                  for our nearest ancestor.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   129
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   130
    Returns:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   131
      A hexlified SHA that is the commit ID of the next-nearest parent.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   132
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   133
    assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   134
        file_path
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   135
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   136
    assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   137
        filenode
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   138
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   139
    parent_options = {
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   140
        row[0].decode('ascii')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   141
        for row in db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   142
            'SELECT node FROM changedfiles '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   143
            'WHERE filename = ? AND filenode = ?',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   144
            (file_path, filenode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   145
        )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   146
    }
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   147
    inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   148
    for w in inner_walker:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   149
        if w.id.hex in parent_options:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   150
            return w.id.hex
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   151
    raise error.ProgrammingError(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   152
        'Unable to find introducing commit for %s node %s from %s',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   153
        (file_path, filenode, walk_start),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   154
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   155
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   156
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   157
def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   158
    """Given a starting commit and path, fill in a filelog's parent pointers.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   159
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   160
    Args:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   161
      gitrepo: a pygit2.Repository
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   162
      db: a handle to our sqlite database
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   163
      startcommit: a hexlified node id for the commit to start at
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   164
      path: the path of the file whose parent pointers we should fill in.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   165
      filenode: the hexlified node id of the file at startcommit
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   166
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   167
    TODO: make filenode optional
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   168
    """
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   169
    assert isinstance(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   170
        startcommit, str
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   171
    ), 'startcommit must be str, got %r' % type(startcommit)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   172
    assert isinstance(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   173
        startfilenode, str
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   174
    ), 'startfilenode must be str, got %r' % type(startfilenode)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   175
    visit = collections.deque([(startcommit, startfilenode)])
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   176
    while visit:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   177
        cnode, filenode = visit.popleft()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   178
        commit = gitrepo[cnode]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   179
        parents = []
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   180
        for parent in commit.parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   181
            t = parent.tree
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   182
            for comp in path.split('/'):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   183
                try:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   184
                    t = gitrepo[t[comp].id]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   185
                except KeyError:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   186
                    break
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   187
            else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   188
                introducer = _find_nearest_ancestor_introducing_node(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   189
                    db, gitrepo, path, parent.id, t.id.hex
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   190
                )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   191
                parents.append((introducer, t.id.hex))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   192
        p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   193
        for par, parfnode in parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   194
            found = int(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   195
                db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   196
                    'SELECT COUNT(*) FROM changedfiles WHERE '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   197
                    'node = ? AND filename = ? AND filenode = ? AND '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   198
                    'p1node NOT NULL',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   199
                    (par, path, parfnode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   200
                ).fetchone()[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   201
            )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   202
            if found == 0:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   203
                assert par is not None
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   204
                visit.append((par, parfnode))
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   205
        if parents:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   206
            p1node, p1fnode = parents[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   207
        if len(parents) == 2:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   208
            p2node, p2fnode = parents[1]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   209
        if len(parents) > 2:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   210
            raise error.ProgrammingError(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   211
                b"git support can't handle octopus merges"
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   212
            )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   213
        db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   214
            'UPDATE changedfiles SET '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   215
            'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   216
            'WHERE node = ? AND filename = ? AND filenode = ?',
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   217
            (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   218
        )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   219
    db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   220
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   221
52623
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   222
def _index_repo_commit(gitrepo, db, commit):
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   223
    files = {}
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   224
    # I *think* we only need to check p1 for changed files
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   225
    # (and therefore linkrevs), because any node that would
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   226
    # actually have this commit as a linkrev would be
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   227
    # completely new in this rev.
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   228
    p1 = commit.parents[0].id.hex if commit.parents else None
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   229
    if p1 is not None:
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   230
        patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   231
    else:
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   232
        patchgen = commit.tree.diff_to_tree(swap=True, flags=_DIFF_FLAGS)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   233
    new_files = (p.delta.new_file for p in patchgen)
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   234
    files = {
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   235
        nf.path: nf.id.hex
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   236
        for nf in new_files
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   237
        if nf.id.raw != sha1nodeconstants.nullid
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   238
    }
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   239
    for p, n in files.items():
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   240
        # We intentionally set NULLs for any file parentage
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   241
        # information so it'll get demand-computed later. We
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   242
        # used to do it right here, and it was _very_ slow.
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   243
        db.execute(
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   244
            'INSERT INTO changedfiles ('
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   245
            'node, filename, filenode, p1node, p1filenode, p2node, '
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   246
            'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   247
            (commit.id.hex, p, n, None, None, None, None),
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   248
        )
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   249
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   250
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   251
def _index_repo(
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   252
    gitrepo,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   253
    db,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   254
    logfn=lambda x: None,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   255
    progress_factory=lambda *args, **kwargs: None,
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   256
):
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   257
    # Identify all references so we can tell the walker to visit all of them.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   258
    all_refs = gitrepo.listall_references()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   259
    possible_heads = set()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   260
    prog = progress_factory(b'refs')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   261
    for pos, ref in enumerate(all_refs):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   262
        if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   263
            prog.update(pos)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   264
        if not (
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   265
            ref.startswith('refs/heads/')  # local branch
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   266
            or ref.startswith('refs/tags/')  # tag
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   267
            or ref.startswith('refs/remotes/')  # remote branch
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   268
            or ref.startswith('refs/hg/')  # from this extension
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   269
        ):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   270
            continue
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   271
        try:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   272
            start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   273
        except ValueError:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   274
            # No commit to be found, so we don't care for hg's purposes.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   275
            continue
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   276
        possible_heads.add(start.id)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   277
    # Optimization: if the list of heads hasn't changed, don't
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   278
    # reindex, the changelog. This doesn't matter on small
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   279
    # repositories, but on even moderately deep histories (eg cpython)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   280
    # this is a very important performance win.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   281
    #
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   282
    # TODO: we should figure out how to incrementally index history
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   283
    # (preferably by detecting rewinds!) so that we don't have to do a
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   284
    # full changelog walk every time a new commit is created.
44946
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   285
    cache_heads = {
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   286
        pycompat.sysstr(x[0])
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   287
        for x in db.execute('SELECT node FROM possible_heads')
fb2936c5f6dc git: decode node IDs back into Python strings (issue6349)
Hollis Blanchard <hollis_blanchard@mentor.com>
parents: 44484
diff changeset
   288
    }
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   289
    walker = None
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   290
    cur_cache_heads = {h.hex for h in possible_heads}
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   291
    if cur_cache_heads == cache_heads:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   292
        return
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   293
    logfn(b'heads mismatch, rebuilding dagcache\n')
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   294
    for start in possible_heads:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   295
        if walker is None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   296
            walker = gitrepo.walk(start, _OUR_ORDER)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   297
        else:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   298
            walker.push(start)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   299
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   300
    # Empty out the existing changelog. Even for large-ish histories
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   301
    # we can do the top-level "walk all the commits" dance very
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   302
    # quickly as long as we don't need to figure out the changed files
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   303
    # list.
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   304
    db.execute('DELETE FROM changelog')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   305
    if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   306
        prog.complete()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   307
    prog = progress_factory(b'commits')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   308
    # This walker is sure to visit all the revisions in history, but
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   309
    # only once.
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   310
    pos = -1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   311
    for commit in walker:
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   312
        if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   313
            prog.update(pos)
47051
de26b9a7ec29 git: consistently use str for parents when rebuilding the index database
Matt Harbison <matt_harbison@yahoo.com>
parents: 46113
diff changeset
   314
        p1 = p2 = gitutil.nullgit
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   315
        if len(commit.parents) <= 2:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   316
            if commit.parents:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   317
                p1 = commit.parents[0].id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   318
            if len(commit.parents) == 2:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   319
                p2 = commit.parents[1].id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   320
            pos += 1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   321
            db.execute(
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   322
                'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)',
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   323
                (pos, commit.id.hex, p1, p2),
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   324
            )
52624
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   325
        else:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   326
            parents = list(commit.parents)
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   327
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   328
            p1 = parents.pop(0).id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   329
            while parents:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   330
                pos += 1
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   331
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   332
                if len(parents) == 1:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   333
                    this = commit.id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   334
                    synth = None
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   335
                else:
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   336
                    this = "%040x" % pos
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   337
                    synth = commit.id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   338
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   339
                p2 = parents.pop(0).id.hex
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   340
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   341
                db.execute(
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   342
                    'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)',
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   343
                    (pos, this, p1, p2, synth),
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   344
                )
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   345
cdbfe5e7592e git: handle octopus merges
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52623
diff changeset
   346
                p1 = this
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   347
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   348
        num_changedfiles = db.execute(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   349
            "SELECT COUNT(*) from changedfiles WHERE node = ?",
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   350
            (commit.id.hex,),
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   351
        ).fetchone()[0]
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   352
        if not num_changedfiles:
52623
4e2ea270ba6a git: move file indexing into a helper
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 52622
diff changeset
   353
            _index_repo_commit(gitrepo, db, commit)
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   354
    db.execute('DELETE FROM heads')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   355
    db.execute('DELETE FROM possible_heads')
52622
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   356
    db.executemany(
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   357
        'INSERT INTO possible_heads (node) VALUES(?)',
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   358
        [(hid.hex,) for hid in possible_heads],
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   359
    )
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   360
    db.execute(
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   361
        '''
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   362
    INSERT INTO heads (node)
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   363
        SELECT node FROM possible_heads WHERE
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   364
            node NOT IN (
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   365
                SELECT DISTINCT possible_heads.node FROM changelog, possible_heads WHERE
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   366
                    changelog.p1 = possible_heads.node OR
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   367
                    changelog.p2 = possible_heads.node
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   368
            )
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   369
    '''
aa5844ade247 git: speed up possible head processing during indexing by ~100x
Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
parents: 51859
diff changeset
   370
    )
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   371
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   372
    db.commit()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   373
    if prog is not None:
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   374
        prog.complete()
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   375
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   376
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   377
def get_index(
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   378
    gitrepo, logfn=lambda x: None, progress_factory=lambda *args, **kwargs: None
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   379
):
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   380
    cachepath = os.path.join(
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   381
        pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   382
    )
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   383
    if not os.path.exists(cachepath):
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   384
        os.makedirs(cachepath)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   385
    dbpath = os.path.join(cachepath, b'git-commits.sqlite')
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   386
    db = _createdb(dbpath)
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   387
    # TODO check against gitrepo heads before doing a full index
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   388
    # TODO thread a ui.progress call into this layer
44951
83e41b73d115 git: add debug logging when there's a mismatch in the cached heads list
Augie Fackler <augie@google.com>
parents: 44946
diff changeset
   389
    _index_repo(gitrepo, db, logfn, progress_factory)
44477
ad718271a9eb git: skeleton of a new extension to _directly_ operate on git repos
Augie Fackler <augie@google.com>
parents:
diff changeset
   390
    return db