comparison hgext/git/manifest.py @ 44477:ad718271a9eb

git: skeleton of a new extension to _directly_ operate on git repos This is based in part of work I did years ago in hgit, but it's mostly new code since I'm using pygit2 instead of dulwich and the hg storage interfaces have improved. Some cleanup of old hgit code by Pulkit, which I greatly appreciate. test-git-interop.t does not cover a whole lot of cases, but it passes. It includes status, diff, making a new commit, and `hg annotate` working on the git repository. This is _not_ (yet) production quality code: this is an experiment. Known technical debt lurking in this implementation: * Writing bookmarks just totally ignores transactions. * The way progress is threaded down into the gitstore is awful. * Ideally we'd find a way to incrementally reindex DAGs. I'm not sure how to do that efficiently, so we might need a "known only fast-forwards" mode on the DAG indexer for use on `hg commit` and friends. * We don't even _try_ to do anything reasonable for `hg pull` or `hg push`. * Mercurial need an interface for the changelog type. Tests currently require git 2.24 as far as I'm aware: `git status` has some changed output that I didn't try and handle in a compatible way. This patch has produced some interesting cleanups, most recently on the manifest type. I expect continuing down this road will produce other meritorious cleanups throughout our code. Differential Revision: https://phab.mercurial-scm.org/D6734
author Augie Fackler <augie@google.com>
date Tue, 11 Feb 2020 00:44:59 -0500
parents
children 7518ea76eff4
comparison
equal deleted inserted replaced
44470:a08bbdf839ae 44477:ad718271a9eb
1 from __future__ import absolute_import
2
3 import pygit2
4
5 from mercurial import (
6 match as matchmod,
7 pathutil,
8 pycompat,
9 util,
10 )
11 from mercurial.interfaces import (
12 repository,
13 util as interfaceutil,
14 )
15 from . import gitutil
16
17
18 @interfaceutil.implementer(repository.imanifestdict)
19 class gittreemanifest(object):
20 """Expose git trees (and optionally a builder's overlay) as a manifestdict.
21
22 Very similar to mercurial.manifest.treemanifest.
23 """
24
25 def __init__(self, git_repo, root_tree, pending_changes):
26 """Initializer.
27
28 Args:
29 git_repo: The git_repo we're walking (required to look up child
30 trees).
31 root_tree: The root Git tree object for this manifest.
32 pending_changes: A dict in which pending changes will be
33 tracked. The enclosing memgittreemanifestctx will use this to
34 construct any required Tree objects in Git during it's
35 `write()` method.
36 """
37 self._git_repo = git_repo
38 self._tree = root_tree
39 if pending_changes is None:
40 pending_changes = {}
41 # dict of path: Optional[Tuple(node, flags)]
42 self._pending_changes = pending_changes
43
44 def _resolve_entry(self, path):
45 """Given a path, load its node and flags, or raise KeyError if missing.
46
47 This takes into account any pending writes in the builder.
48 """
49 upath = pycompat.fsdecode(path)
50 ent = None
51 if path in self._pending_changes:
52 val = self._pending_changes[path]
53 if val is None:
54 raise KeyError
55 return val
56 t = self._tree
57 comps = upath.split('/')
58 for comp in comps[:-1]:
59 te = self._tree[comp]
60 t = self._git_repo[te.id]
61 ent = t[comps[-1]]
62 if ent.filemode == pygit2.GIT_FILEMODE_BLOB:
63 flags = b''
64 elif ent.filemode == pygit2.GIT_FILEMODE_BLOB_EXECUTABLE:
65 flags = b'x'
66 elif ent.filemode == pygit2.GIT_FILEMODE_LINK:
67 flags = b'l'
68 else:
69 raise ValueError('unsupported mode %s' % oct(ent.filemode))
70 return ent.id.raw, flags
71
72 def __getitem__(self, path):
73 return self._resolve_entry(path)[0]
74
75 def find(self, path):
76 return self._resolve_entry(path)
77
78 def __len__(self):
79 return len(list(self.walk(matchmod.always())))
80
81 def __nonzero__(self):
82 try:
83 next(iter(self))
84 return True
85 except StopIteration:
86 return False
87
88 __bool__ = __nonzero__
89
90 def __contains__(self, path):
91 try:
92 self._resolve_entry(path)
93 return True
94 except KeyError:
95 return False
96
97 def iterkeys(self):
98 return self.walk(matchmod.always())
99
100 def keys(self):
101 return list(self.iterkeys())
102
103 def __iter__(self):
104 return self.iterkeys()
105
106 def __setitem__(self, path, node):
107 self._pending_changes[path] = node, self.flags(path)
108
109 def __delitem__(self, path):
110 # TODO: should probably KeyError for already-deleted files?
111 self._pending_changes[path] = None
112
113 def filesnotin(self, other, match=None):
114 if match is not None:
115 match = matchmod.badmatch(match, lambda path, msg: None)
116 sm2 = set(other.walk(match))
117 return {f for f in self.walk(match) if f not in sm2}
118 return {f for f in self if f not in other}
119
120 @util.propertycache
121 def _dirs(self):
122 return pathutil.dirs(self)
123
124 def hasdir(self, dir):
125 return dir in self._dirs
126
127 def diff(self, other, match=None, clean=False):
128 # TODO
129 assert False
130
131 def setflag(self, path, flag):
132 node, unused_flag = self._resolve_entry(path)
133 self._pending_changes[path] = node, flag
134
135 def get(self, path, default=None):
136 try:
137 return self._resolve_entry(path)[0]
138 except KeyError:
139 return default
140
141 def flags(self, path):
142 try:
143 return self._resolve_entry(path)[1]
144 except KeyError:
145 return b''
146
147 def copy(self):
148 pass
149
150 def items(self):
151 for f in self:
152 # TODO: build a proper iterator version of this
153 yield self[f]
154
155 def iteritems(self):
156 return self.items()
157
158 def iterentries(self):
159 for f in self:
160 # TODO: build a proper iterator version of this
161 yield self._resolve_entry(f)
162
163 def text(self):
164 assert False # TODO can this method move out of the manifest iface?
165
166 def _walkonetree(self, tree, match, subdir):
167 for te in tree:
168 # TODO: can we prune dir walks with the matcher?
169 realname = subdir + pycompat.fsencode(te.name)
170 if te.type == r'tree':
171 for inner in self._walkonetree(
172 self._git_repo[te.id], match, realname + b'/'
173 ):
174 yield inner
175 if not match(realname):
176 continue
177 yield pycompat.fsencode(realname)
178
179 def walk(self, match):
180 # TODO: this is a very lazy way to merge in the pending
181 # changes. There is absolutely room for optimization here by
182 # being clever about walking over the sets...
183 baseline = set(self._walkonetree(self._tree, match, b''))
184 deleted = {p for p, v in self._pending_changes.items() if v is None}
185 pend = {p for p in self._pending_changes if match(p)}
186 return iter(sorted((baseline | pend) - deleted))
187
188
189 @interfaceutil.implementer(repository.imanifestrevisionstored)
190 class gittreemanifestctx(object):
191 def __init__(self, repo, gittree):
192 self._repo = repo
193 self._tree = gittree
194
195 def read(self):
196 return gittreemanifest(self._repo, self._tree, None)
197
198 def copy(self):
199 # NB: it's important that we return a memgittreemanifestctx
200 # because the caller expects a mutable manifest.
201 return memgittreemanifestctx(self._repo, self._tree)
202
203 def find(self, path):
204 self.read()[path]
205
206
207 @interfaceutil.implementer(repository.imanifestrevisionwritable)
208 class memgittreemanifestctx(object):
209 def __init__(self, repo, tree):
210 self._repo = repo
211 self._tree = tree
212 # dict of path: Optional[Tuple(node, flags)]
213 self._pending_changes = {}
214
215 def read(self):
216 return gittreemanifest(self._repo, self._tree, self._pending_changes)
217
218 def copy(self):
219 # TODO: if we have a builder in play, what should happen here?
220 # Maybe we can shuffle copy() into the immutable interface.
221 return memgittreemanifestctx(self._repo, self._tree)
222
223 def write(self, transaction, link, p1, p2, added, removed, match=None):
224 # We're not (for now, anyway) going to audit filenames, so we
225 # can ignore added and removed.
226
227 # TODO what does this match argument get used for? hopefully
228 # just narrow?
229 assert not match or isinstance(match, matchmod.alwaysmatcher)
230
231 touched_dirs = pathutil.dirs(self._pending_changes)
232 trees = {
233 b'': self._tree,
234 }
235 # path: treebuilder
236 builders = {
237 b'': self._repo.TreeBuilder(self._tree),
238 }
239 # get a TreeBuilder for every tree in the touched_dirs set
240 for d in sorted(touched_dirs, key=lambda x: (len(x), x)):
241 if d == b'':
242 # loaded root tree above
243 continue
244 comps = d.split(b'/')
245 full = b''
246 for part in comps:
247 parent = trees[full]
248 try:
249 new = self._repo[parent[pycompat.fsdecode(part)]]
250 except KeyError:
251 # new directory
252 new = None
253 full += b'/' + part
254 if new is not None:
255 # existing directory
256 trees[full] = new
257 builders[full] = self._repo.TreeBuilder(new)
258 else:
259 # new directory, use an empty dict to easily
260 # generate KeyError as any nested new dirs get
261 # created.
262 trees[full] = {}
263 builders[full] = self._repo.TreeBuilder()
264 for f, info in self._pending_changes.items():
265 if b'/' not in f:
266 dirname = b''
267 basename = f
268 else:
269 dirname, basename = f.rsplit(b'/', 1)
270 dirname = b'/' + dirname
271 if info is None:
272 builders[dirname].remove(pycompat.fsdecode(basename))
273 else:
274 n, fl = info
275 mode = {
276 b'': pygit2.GIT_FILEMODE_BLOB,
277 b'x': pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
278 b'l': pygit2.GIT_FILEMODE_LINK,
279 }[fl]
280 builders[dirname].insert(
281 pycompat.fsdecode(basename), gitutil.togitnode(n), mode
282 )
283 # This visits the buffered TreeBuilders in deepest-first
284 # order, bubbling up the edits.
285 for b in sorted(builders, key=len, reverse=True):
286 if b == b'':
287 break
288 cb = builders[b]
289 dn, bn = b.rsplit(b'/', 1)
290 builders[dn].insert(
291 pycompat.fsdecode(bn), cb.write(), pygit2.GIT_FILEMODE_TREE
292 )
293 return builders[b''].write().raw