Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 1083:30974cf73435
Add some docstrings to revlog.py
author | mpm@selenic.com |
---|---|
date | Sat, 27 Aug 2005 01:43:48 -0700 |
parents | 55bf5cfde69e |
children | 142b5d5ec9cc |
comparison
equal
deleted
inserted
replaced
1082:ce96e316278a | 1083:30974cf73435 |
---|---|
1 # revlog.py - storage back-end for mercurial | 1 """ |
2 # | 2 revlog.py - storage back-end for mercurial |
3 # This provides efficient delta storage with O(1) retrieve and append | 3 |
4 # and O(changes) merge between branches | 4 This provides efficient delta storage with O(1) retrieve and append |
5 # | 5 and O(changes) merge between branches |
6 # Copyright 2005 Matt Mackall <mpm@selenic.com> | 6 |
7 # | 7 Copyright 2005 Matt Mackall <mpm@selenic.com> |
8 # This software may be used and distributed according to the terms | 8 |
9 # of the GNU General Public License, incorporated herein by reference. | 9 This software may be used and distributed according to the terms |
10 of the GNU General Public License, incorporated herein by reference. | |
11 """ | |
10 | 12 |
11 import zlib, struct, sha, binascii, heapq | 13 import zlib, struct, sha, binascii, heapq |
12 from mercurial import mdiff | 14 from mercurial import mdiff |
13 | 15 |
14 def hex(node): return binascii.hexlify(node) | 16 def hex(node): return binascii.hexlify(node) |
15 def bin(node): return binascii.unhexlify(node) | 17 def bin(node): return binascii.unhexlify(node) |
16 def short(node): return hex(node[:6]) | 18 def short(node): return hex(node[:6]) |
17 | 19 |
18 def compress(text): | 20 def compress(text): |
21 """ generate a possibly-compressed representation of text """ | |
19 if not text: return text | 22 if not text: return text |
20 if len(text) < 44: | 23 if len(text) < 44: |
21 if text[0] == '\0': return text | 24 if text[0] == '\0': return text |
22 return 'u' + text | 25 return 'u' + text |
23 bin = zlib.compress(text) | 26 bin = zlib.compress(text) |
25 if text[0] == '\0': return text | 28 if text[0] == '\0': return text |
26 return 'u' + text | 29 return 'u' + text |
27 return bin | 30 return bin |
28 | 31 |
29 def decompress(bin): | 32 def decompress(bin): |
33 """ decompress the given input """ | |
30 if not bin: return bin | 34 if not bin: return bin |
31 t = bin[0] | 35 t = bin[0] |
32 if t == '\0': return bin | 36 if t == '\0': return bin |
33 if t == 'x': return zlib.decompress(bin) | 37 if t == 'x': return zlib.decompress(bin) |
34 if t == 'u': return bin[1:] | 38 if t == 'u': return bin[1:] |
35 raise RevlogError("unknown compression type %s" % t) | 39 raise RevlogError("unknown compression type %s" % t) |
36 | 40 |
37 def hash(text, p1, p2): | 41 def hash(text, p1, p2): |
42 """generate a hash from the given text and its parent hashes | |
43 | |
44 This hash combines both the current file contents and its history | |
45 in a manner that makes it easy to distinguish nodes with the same | |
46 content in the revision graph. | |
47 """ | |
38 l = [p1, p2] | 48 l = [p1, p2] |
39 l.sort() | 49 l.sort() |
40 s = sha.new(l[0]) | 50 s = sha.new(l[0]) |
41 s.update(l[1]) | 51 s.update(l[1]) |
42 s.update(text) | 52 s.update(text) |
44 | 54 |
45 nullid = "\0" * 20 | 55 nullid = "\0" * 20 |
46 indexformat = ">4l20s20s20s" | 56 indexformat = ">4l20s20s20s" |
47 | 57 |
48 class lazyparser: | 58 class lazyparser: |
59 """ | |
60 this class avoids the need to parse the entirety of large indices | |
61 | |
62 By default we parse and load 1000 entries at a time. | |
63 | |
64 If no position is specified, we load the whole index, and replace | |
65 the lazy objects in revlog with the underlying objects for | |
66 efficiency in cases where we look at most of the nodes. | |
67 """ | |
49 def __init__(self, data, revlog): | 68 def __init__(self, data, revlog): |
50 self.data = data | 69 self.data = data |
51 self.s = struct.calcsize(indexformat) | 70 self.s = struct.calcsize(indexformat) |
52 self.l = len(data)/self.s | 71 self.l = len(data)/self.s |
53 self.index = [None] * self.l | 72 self.index = [None] * self.l |
74 self.index[i] = e | 93 self.index[i] = e |
75 self.map[e[6]] = i | 94 self.map[e[6]] = i |
76 i += 1 | 95 i += 1 |
77 | 96 |
78 class lazyindex: | 97 class lazyindex: |
98 """a lazy version of the index array""" | |
79 def __init__(self, parser): | 99 def __init__(self, parser): |
80 self.p = parser | 100 self.p = parser |
81 def __len__(self): | 101 def __len__(self): |
82 return len(self.p.index) | 102 return len(self.p.index) |
83 def load(self, pos): | 103 def load(self, pos): |
87 return self.p.index[pos] or self.load(pos) | 107 return self.p.index[pos] or self.load(pos) |
88 def append(self, e): | 108 def append(self, e): |
89 self.p.index.append(e) | 109 self.p.index.append(e) |
90 | 110 |
91 class lazymap: | 111 class lazymap: |
112 """a lazy version of the node map""" | |
92 def __init__(self, parser): | 113 def __init__(self, parser): |
93 self.p = parser | 114 self.p = parser |
94 def load(self, key): | 115 def load(self, key): |
95 if self.p.all: return | 116 if self.p.all: return |
96 n = self.p.data.find(key) | 117 n = self.p.data.find(key) |
121 self.p.map[key] = val | 142 self.p.map[key] = val |
122 | 143 |
123 class RevlogError(Exception): pass | 144 class RevlogError(Exception): pass |
124 | 145 |
125 class revlog: | 146 class revlog: |
147 """ | |
148 the underlying revision storage object | |
149 | |
150 A revlog consists of two parts, an index and the revision data. | |
151 | |
152 The index is a file with a fixed record size containing | |
153 information on each revision, includings its nodeid (hash), the | |
154 nodeids of its parents, the position and offset of its data within | |
155 the data file, and the revision it's based on. Finally, each entry | |
156 contains a linkrev entry that can serve as a pointer to external | |
157 data. | |
158 | |
159 The revision data itself is a linear collection of data chunks. | |
160 Each chunk represents a revision and is usually represented as a | |
161 delta against the previous chunk. To bound lookup time, runs of | |
162 deltas are limited to about 2 times the length of the original | |
163 version data. This makes retrieval of a version proportional to | |
164 its size, or O(1) relative to the number of revisions. | |
165 | |
166 Both pieces of the revlog are written to in an append-only | |
167 fashion, which means we never need to rewrite a file to insert or | |
168 remove data, and can use some simple techniques to avoid the need | |
169 for locking while reading. | |
170 """ | |
126 def __init__(self, opener, indexfile, datafile): | 171 def __init__(self, opener, indexfile, datafile): |
172 """ | |
173 create a revlog object | |
174 | |
175 opener is a function that abstracts the file opening operation | |
176 and can be used to implement COW semantics or the like. | |
177 """ | |
127 self.indexfile = indexfile | 178 self.indexfile = indexfile |
128 self.datafile = datafile | 179 self.datafile = datafile |
129 self.opener = opener | 180 self.opener = opener |
130 self.cache = None | 181 self.cache = None |
131 | 182 |
191 reachable[p] = 1 | 242 reachable[p] = 1 |
192 visit.append(p) | 243 visit.append(p) |
193 return reachable | 244 return reachable |
194 | 245 |
195 def heads(self, stop=None): | 246 def heads(self, stop=None): |
247 """return the list of all nodes that have no children""" | |
196 p = {} | 248 p = {} |
197 h = [] | 249 h = [] |
198 stoprev = 0 | 250 stoprev = 0 |
199 if stop and stop in self.nodemap: | 251 if stop and stop in self.nodemap: |
200 stoprev = self.rev(stop) | 252 stoprev = self.rev(stop) |
201 | 253 |
202 for r in range(self.count() - 1, -1, -1): | 254 for r in range(self.count() - 1, -1, -1): |
203 n = self.node(r) | 255 n = self.node(r) |
204 if n not in p: | 256 if n not in p: |
205 h.append(n) | 257 h.append(n) |
206 if n == stop: | 258 if n == stop: |
210 for pn in self.parents(n): | 262 for pn in self.parents(n): |
211 p[pn] = 1 | 263 p[pn] = 1 |
212 return h | 264 return h |
213 | 265 |
214 def children(self, node): | 266 def children(self, node): |
267 """find the children of a given node""" | |
215 c = [] | 268 c = [] |
216 p = self.rev(node) | 269 p = self.rev(node) |
217 for r in range(p + 1, self.count()): | 270 for r in range(p + 1, self.count()): |
218 n = self.node(r) | 271 n = self.node(r) |
219 for pn in self.parents(n): | 272 for pn in self.parents(n): |
223 elif pn == nullid: | 276 elif pn == nullid: |
224 continue | 277 continue |
225 return c | 278 return c |
226 | 279 |
227 def lookup(self, id): | 280 def lookup(self, id): |
281 """locate a node based on revision number or subset of hex nodeid""" | |
228 try: | 282 try: |
229 rev = int(id) | 283 rev = int(id) |
230 if str(rev) != id: raise ValueError | 284 if str(rev) != id: raise ValueError |
231 if rev < 0: rev = self.count() + rev | 285 if rev < 0: rev = self.count() + rev |
232 if rev < 0 or rev >= self.count(): raise ValueError | 286 if rev < 0 or rev >= self.count(): raise ValueError |
241 return c[0] | 295 return c[0] |
242 | 296 |
243 return None | 297 return None |
244 | 298 |
245 def diff(self, a, b): | 299 def diff(self, a, b): |
300 """return a delta between two revisions""" | |
246 return mdiff.textdiff(a, b) | 301 return mdiff.textdiff(a, b) |
247 | 302 |
248 def patches(self, t, pl): | 303 def patches(self, t, pl): |
304 """apply a list of patches to a string""" | |
249 return mdiff.patches(t, pl) | 305 return mdiff.patches(t, pl) |
250 | 306 |
251 def delta(self, node): | 307 def delta(self, node): |
308 """return or calculate a delta between a node and its predecessor""" | |
252 r = self.rev(node) | 309 r = self.rev(node) |
253 b = self.base(r) | 310 b = self.base(r) |
254 if r == b: | 311 if r == b: |
255 return self.diff(self.revision(self.node(r - 1)), | 312 return self.diff(self.revision(self.node(r - 1)), |
256 self.revision(node)) | 313 self.revision(node)) |
259 f.seek(self.start(r)) | 316 f.seek(self.start(r)) |
260 data = f.read(self.length(r)) | 317 data = f.read(self.length(r)) |
261 return decompress(data) | 318 return decompress(data) |
262 | 319 |
263 def revision(self, node): | 320 def revision(self, node): |
321 """return an uncompressed revision of a given""" | |
264 if node == nullid: return "" | 322 if node == nullid: return "" |
265 if self.cache and self.cache[0] == node: return self.cache[2] | 323 if self.cache and self.cache[0] == node: return self.cache[2] |
266 | 324 |
325 # look up what we need to read | |
267 text = None | 326 text = None |
268 rev = self.rev(node) | 327 rev = self.rev(node) |
269 start, length, base, link, p1, p2, node = self.index[rev] | 328 start, length, base, link, p1, p2, node = self.index[rev] |
270 end = start + length | 329 end = start + length |
271 if base != rev: start = self.start(base) | 330 if base != rev: start = self.start(base) |
272 | 331 |
332 # do we have useful data cached? | |
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev: | 333 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
274 base = self.cache[1] | 334 base = self.cache[1] |
275 start = self.start(base + 1) | 335 start = self.start(base + 1) |
276 text = self.cache[2] | 336 text = self.cache[2] |
277 last = 0 | 337 last = 0 |
298 | 358 |
299 self.cache = (node, rev, text) | 359 self.cache = (node, rev, text) |
300 return text | 360 return text |
301 | 361 |
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): | 362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): |
363 """add a revision to the log | |
364 | |
365 text - the revision data to add | |
366 transaction - the transaction object used for rollback | |
367 link - the linkrev data to add | |
368 p1, p2 - the parent nodeids of the revision | |
369 d - an optional precomputed delta | |
370 """ | |
303 if text is None: text = "" | 371 if text is None: text = "" |
304 if p1 is None: p1 = self.tip() | 372 if p1 is None: p1 = self.tip() |
305 if p2 is None: p2 = nullid | 373 if p2 is None: p2 = nullid |
306 | 374 |
307 node = hash(text, p1, p2) | 375 node = hash(text, p1, p2) |
347 | 415 |
348 self.cache = (node, n, text) | 416 self.cache = (node, n, text) |
349 return node | 417 return node |
350 | 418 |
351 def ancestor(self, a, b): | 419 def ancestor(self, a, b): |
420 """calculate the least common ancestor of nodes a and b""" | |
352 # calculate the distance of every node from root | 421 # calculate the distance of every node from root |
353 dist = {nullid: 0} | 422 dist = {nullid: 0} |
354 for i in xrange(self.count()): | 423 for i in xrange(self.count()): |
355 n = self.node(i) | 424 n = self.node(i) |
356 p1, p2 = self.parents(n) | 425 p1, p2 = self.parents(n) |
385 ly = y.next() | 454 ly = y.next() |
386 elif lx > ly: | 455 elif lx > ly: |
387 lx = x.next() | 456 lx = x.next() |
388 | 457 |
389 def group(self, linkmap): | 458 def group(self, linkmap): |
390 # given a list of changeset revs, return a set of deltas and | 459 """calculate a delta group |
391 # metadata corresponding to nodes. the first delta is | 460 |
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to | 461 Given a list of changeset revs, return a set of deltas and |
393 # have this parent as it has all history before these | 462 metadata corresponding to nodes. the first delta is |
394 # changesets. parent is parent[0] | 463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to |
395 | 464 have this parent as it has all history before these |
465 changesets. parent is parent[0] | |
466 """ | |
396 revs = [] | 467 revs = [] |
397 needed = {} | 468 needed = {} |
398 | 469 |
399 # find file nodes/revs that match changeset revs | 470 # find file nodes/revs that match changeset revs |
400 for i in xrange(0, self.count()): | 471 for i in xrange(0, self.count()): |
496 yield d | 567 yield d |
497 | 568 |
498 yield struct.pack(">l", 0) | 569 yield struct.pack(">l", 0) |
499 | 570 |
500 def addgroup(self, revs, linkmapper, transaction, unique=0): | 571 def addgroup(self, revs, linkmapper, transaction, unique=0): |
501 # given a set of deltas, add them to the revision log. the | 572 """ |
502 # first delta is against its parent, which should be in our | 573 add a delta group |
503 # log, the rest are against the previous delta. | 574 |
504 | 575 given a set of deltas, add them to the revision log. the |
505 # track the base of the current delta log | 576 first delta is against its parent, which should be in our |
577 log, the rest are against the previous delta. | |
578 """ | |
579 | |
580 #track the base of the current delta log | |
506 r = self.count() | 581 r = self.count() |
507 t = r - 1 | 582 t = r - 1 |
508 node = nullid | 583 node = nullid |
509 | 584 |
510 base = prev = -1 | 585 base = prev = -1 |