Mercurial > public > mercurial-scm > hg
comparison hgext/git/manifest.py @ 44477:ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
This is based in part of work I did years ago in hgit, but it's mostly
new code since I'm using pygit2 instead of dulwich and the hg storage
interfaces have improved. Some cleanup of old hgit code by Pulkit,
which I greatly appreciate.
test-git-interop.t does not cover a whole lot of cases, but it
passes. It includes status, diff, making a new commit, and `hg annotate`
working on the git repository.
This is _not_ (yet) production quality code: this is an
experiment. Known technical debt lurking in this implementation:
* Writing bookmarks just totally ignores transactions.
* The way progress is threaded down into the gitstore is awful.
* Ideally we'd find a way to incrementally reindex DAGs. I'm not sure
how to do that efficiently, so we might need a "known only fast-forwards"
mode on the DAG indexer for use on `hg commit` and friends.
* We don't even _try_ to do anything reasonable for `hg pull` or `hg push`.
* Mercurial need an interface for the changelog type.
Tests currently require git 2.24 as far as I'm aware: `git status` has
some changed output that I didn't try and handle in a compatible way.
This patch has produced some interesting cleanups, most recently on
the manifest type. I expect continuing down this road will produce
other meritorious cleanups throughout our code.
Differential Revision: https://phab.mercurial-scm.org/D6734
author | Augie Fackler <augie@google.com> |
---|---|
date | Tue, 11 Feb 2020 00:44:59 -0500 |
parents | |
children | 7518ea76eff4 |
comparison
equal
deleted
inserted
replaced
44470:a08bbdf839ae | 44477:ad718271a9eb |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import pygit2 | |
4 | |
5 from mercurial import ( | |
6 match as matchmod, | |
7 pathutil, | |
8 pycompat, | |
9 util, | |
10 ) | |
11 from mercurial.interfaces import ( | |
12 repository, | |
13 util as interfaceutil, | |
14 ) | |
15 from . import gitutil | |
16 | |
17 | |
18 @interfaceutil.implementer(repository.imanifestdict) | |
19 class gittreemanifest(object): | |
20 """Expose git trees (and optionally a builder's overlay) as a manifestdict. | |
21 | |
22 Very similar to mercurial.manifest.treemanifest. | |
23 """ | |
24 | |
25 def __init__(self, git_repo, root_tree, pending_changes): | |
26 """Initializer. | |
27 | |
28 Args: | |
29 git_repo: The git_repo we're walking (required to look up child | |
30 trees). | |
31 root_tree: The root Git tree object for this manifest. | |
32 pending_changes: A dict in which pending changes will be | |
33 tracked. The enclosing memgittreemanifestctx will use this to | |
34 construct any required Tree objects in Git during it's | |
35 `write()` method. | |
36 """ | |
37 self._git_repo = git_repo | |
38 self._tree = root_tree | |
39 if pending_changes is None: | |
40 pending_changes = {} | |
41 # dict of path: Optional[Tuple(node, flags)] | |
42 self._pending_changes = pending_changes | |
43 | |
44 def _resolve_entry(self, path): | |
45 """Given a path, load its node and flags, or raise KeyError if missing. | |
46 | |
47 This takes into account any pending writes in the builder. | |
48 """ | |
49 upath = pycompat.fsdecode(path) | |
50 ent = None | |
51 if path in self._pending_changes: | |
52 val = self._pending_changes[path] | |
53 if val is None: | |
54 raise KeyError | |
55 return val | |
56 t = self._tree | |
57 comps = upath.split('/') | |
58 for comp in comps[:-1]: | |
59 te = self._tree[comp] | |
60 t = self._git_repo[te.id] | |
61 ent = t[comps[-1]] | |
62 if ent.filemode == pygit2.GIT_FILEMODE_BLOB: | |
63 flags = b'' | |
64 elif ent.filemode == pygit2.GIT_FILEMODE_BLOB_EXECUTABLE: | |
65 flags = b'x' | |
66 elif ent.filemode == pygit2.GIT_FILEMODE_LINK: | |
67 flags = b'l' | |
68 else: | |
69 raise ValueError('unsupported mode %s' % oct(ent.filemode)) | |
70 return ent.id.raw, flags | |
71 | |
72 def __getitem__(self, path): | |
73 return self._resolve_entry(path)[0] | |
74 | |
75 def find(self, path): | |
76 return self._resolve_entry(path) | |
77 | |
78 def __len__(self): | |
79 return len(list(self.walk(matchmod.always()))) | |
80 | |
81 def __nonzero__(self): | |
82 try: | |
83 next(iter(self)) | |
84 return True | |
85 except StopIteration: | |
86 return False | |
87 | |
88 __bool__ = __nonzero__ | |
89 | |
90 def __contains__(self, path): | |
91 try: | |
92 self._resolve_entry(path) | |
93 return True | |
94 except KeyError: | |
95 return False | |
96 | |
97 def iterkeys(self): | |
98 return self.walk(matchmod.always()) | |
99 | |
100 def keys(self): | |
101 return list(self.iterkeys()) | |
102 | |
103 def __iter__(self): | |
104 return self.iterkeys() | |
105 | |
106 def __setitem__(self, path, node): | |
107 self._pending_changes[path] = node, self.flags(path) | |
108 | |
109 def __delitem__(self, path): | |
110 # TODO: should probably KeyError for already-deleted files? | |
111 self._pending_changes[path] = None | |
112 | |
113 def filesnotin(self, other, match=None): | |
114 if match is not None: | |
115 match = matchmod.badmatch(match, lambda path, msg: None) | |
116 sm2 = set(other.walk(match)) | |
117 return {f for f in self.walk(match) if f not in sm2} | |
118 return {f for f in self if f not in other} | |
119 | |
120 @util.propertycache | |
121 def _dirs(self): | |
122 return pathutil.dirs(self) | |
123 | |
124 def hasdir(self, dir): | |
125 return dir in self._dirs | |
126 | |
127 def diff(self, other, match=None, clean=False): | |
128 # TODO | |
129 assert False | |
130 | |
131 def setflag(self, path, flag): | |
132 node, unused_flag = self._resolve_entry(path) | |
133 self._pending_changes[path] = node, flag | |
134 | |
135 def get(self, path, default=None): | |
136 try: | |
137 return self._resolve_entry(path)[0] | |
138 except KeyError: | |
139 return default | |
140 | |
141 def flags(self, path): | |
142 try: | |
143 return self._resolve_entry(path)[1] | |
144 except KeyError: | |
145 return b'' | |
146 | |
147 def copy(self): | |
148 pass | |
149 | |
150 def items(self): | |
151 for f in self: | |
152 # TODO: build a proper iterator version of this | |
153 yield self[f] | |
154 | |
155 def iteritems(self): | |
156 return self.items() | |
157 | |
158 def iterentries(self): | |
159 for f in self: | |
160 # TODO: build a proper iterator version of this | |
161 yield self._resolve_entry(f) | |
162 | |
163 def text(self): | |
164 assert False # TODO can this method move out of the manifest iface? | |
165 | |
166 def _walkonetree(self, tree, match, subdir): | |
167 for te in tree: | |
168 # TODO: can we prune dir walks with the matcher? | |
169 realname = subdir + pycompat.fsencode(te.name) | |
170 if te.type == r'tree': | |
171 for inner in self._walkonetree( | |
172 self._git_repo[te.id], match, realname + b'/' | |
173 ): | |
174 yield inner | |
175 if not match(realname): | |
176 continue | |
177 yield pycompat.fsencode(realname) | |
178 | |
179 def walk(self, match): | |
180 # TODO: this is a very lazy way to merge in the pending | |
181 # changes. There is absolutely room for optimization here by | |
182 # being clever about walking over the sets... | |
183 baseline = set(self._walkonetree(self._tree, match, b'')) | |
184 deleted = {p for p, v in self._pending_changes.items() if v is None} | |
185 pend = {p for p in self._pending_changes if match(p)} | |
186 return iter(sorted((baseline | pend) - deleted)) | |
187 | |
188 | |
189 @interfaceutil.implementer(repository.imanifestrevisionstored) | |
190 class gittreemanifestctx(object): | |
191 def __init__(self, repo, gittree): | |
192 self._repo = repo | |
193 self._tree = gittree | |
194 | |
195 def read(self): | |
196 return gittreemanifest(self._repo, self._tree, None) | |
197 | |
198 def copy(self): | |
199 # NB: it's important that we return a memgittreemanifestctx | |
200 # because the caller expects a mutable manifest. | |
201 return memgittreemanifestctx(self._repo, self._tree) | |
202 | |
203 def find(self, path): | |
204 self.read()[path] | |
205 | |
206 | |
207 @interfaceutil.implementer(repository.imanifestrevisionwritable) | |
208 class memgittreemanifestctx(object): | |
209 def __init__(self, repo, tree): | |
210 self._repo = repo | |
211 self._tree = tree | |
212 # dict of path: Optional[Tuple(node, flags)] | |
213 self._pending_changes = {} | |
214 | |
215 def read(self): | |
216 return gittreemanifest(self._repo, self._tree, self._pending_changes) | |
217 | |
218 def copy(self): | |
219 # TODO: if we have a builder in play, what should happen here? | |
220 # Maybe we can shuffle copy() into the immutable interface. | |
221 return memgittreemanifestctx(self._repo, self._tree) | |
222 | |
223 def write(self, transaction, link, p1, p2, added, removed, match=None): | |
224 # We're not (for now, anyway) going to audit filenames, so we | |
225 # can ignore added and removed. | |
226 | |
227 # TODO what does this match argument get used for? hopefully | |
228 # just narrow? | |
229 assert not match or isinstance(match, matchmod.alwaysmatcher) | |
230 | |
231 touched_dirs = pathutil.dirs(self._pending_changes) | |
232 trees = { | |
233 b'': self._tree, | |
234 } | |
235 # path: treebuilder | |
236 builders = { | |
237 b'': self._repo.TreeBuilder(self._tree), | |
238 } | |
239 # get a TreeBuilder for every tree in the touched_dirs set | |
240 for d in sorted(touched_dirs, key=lambda x: (len(x), x)): | |
241 if d == b'': | |
242 # loaded root tree above | |
243 continue | |
244 comps = d.split(b'/') | |
245 full = b'' | |
246 for part in comps: | |
247 parent = trees[full] | |
248 try: | |
249 new = self._repo[parent[pycompat.fsdecode(part)]] | |
250 except KeyError: | |
251 # new directory | |
252 new = None | |
253 full += b'/' + part | |
254 if new is not None: | |
255 # existing directory | |
256 trees[full] = new | |
257 builders[full] = self._repo.TreeBuilder(new) | |
258 else: | |
259 # new directory, use an empty dict to easily | |
260 # generate KeyError as any nested new dirs get | |
261 # created. | |
262 trees[full] = {} | |
263 builders[full] = self._repo.TreeBuilder() | |
264 for f, info in self._pending_changes.items(): | |
265 if b'/' not in f: | |
266 dirname = b'' | |
267 basename = f | |
268 else: | |
269 dirname, basename = f.rsplit(b'/', 1) | |
270 dirname = b'/' + dirname | |
271 if info is None: | |
272 builders[dirname].remove(pycompat.fsdecode(basename)) | |
273 else: | |
274 n, fl = info | |
275 mode = { | |
276 b'': pygit2.GIT_FILEMODE_BLOB, | |
277 b'x': pygit2.GIT_FILEMODE_BLOB_EXECUTABLE, | |
278 b'l': pygit2.GIT_FILEMODE_LINK, | |
279 }[fl] | |
280 builders[dirname].insert( | |
281 pycompat.fsdecode(basename), gitutil.togitnode(n), mode | |
282 ) | |
283 # This visits the buffered TreeBuilders in deepest-first | |
284 # order, bubbling up the edits. | |
285 for b in sorted(builders, key=len, reverse=True): | |
286 if b == b'': | |
287 break | |
288 cb = builders[b] | |
289 dn, bn = b.rsplit(b'/', 1) | |
290 builders[dn].insert( | |
291 pycompat.fsdecode(bn), cb.write(), pygit2.GIT_FILEMODE_TREE | |
292 ) | |
293 return builders[b''].write().raw |