mercurial-scm/hg-stable: mercurial/revlog.py comparison

comparison mercurial/revlog.py @ 1083:30974cf73435

Add some docstrings to revlog.py

author	mpm@selenic.com
date	Sat, 27 Aug 2005 01:43:48 -0700
parents	55bf5cfde69e
children	142b5d5ec9cc

comparison

equal deleted inserted replaced

-:ce96e316278a
+:30974cf73435
-# revlog.py - storage back-end for mercurial
+"""
-#
+revlog.py - storage back-end for mercurial
-# This provides efficient delta storage with O(1) retrieve and append
-# and O(changes) merge between branches
+This provides efficient delta storage with O(1) retrieve and append
-#
+and O(changes) merge between branches
-# Copyright 2005 Matt Mackall <mpm@selenic.com>
-#
+Copyright 2005 Matt Mackall <mpm@selenic.com>
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
+This software may be used and distributed according to the terms
+of the GNU General Public License, incorporated herein by reference.
+"""
 import zlib, struct, sha, binascii, heapq
 from mercurial import mdiff
 def hex(node): return binascii.hexlify(node)
 def bin(node): return binascii.unhexlify(node)
 def short(node): return hex(node[:6])
 def compress(text):
+""" generate a possibly-compressed representation of text """
 if not text: return text
 if len(text) < 44:
 if text[0] == '\0': return text
 return 'u' + text
 bin = zlib.compress(text)
 if text[0] == '\0': return text
 return 'u' + text
 return bin
 def decompress(bin):
+""" decompress the given input """
 if not bin: return bin
 t = bin[0]
 if t == '\0': return bin
 if t == 'x': return zlib.decompress(bin)
 if t == 'u': return bin[1:]
 raise RevlogError("unknown compression type %s" % t)
 def hash(text, p1, p2):
+"""generate a hash from the given text and its parent hashes
+This hash combines both the current file contents and its history
+in a manner that makes it easy to distinguish nodes with the same
+content in the revision graph.
+"""
 l = [p1, p2]
 l.sort()
 s = sha.new(l[0])
 s.update(l[1])
 s.update(text)
 nullid = "\0" * 20
 indexformat = ">4l20s20s20s"
 class lazyparser:
+"""
+this class avoids the need to parse the entirety of large indices
+By default we parse and load 1000 entries at a time.
+If no position is specified, we load the whole index, and replace
+the lazy objects in revlog with the underlying objects for
+efficiency in cases where we look at most of the nodes.
+"""
 def __init__(self, data, revlog):
 self.data = data
 self.s = struct.calcsize(indexformat)
 self.l = len(data)/self.s
 self.index = [None] * self.l
 self.index[i] = e
 self.map[e[6]] = i
 i += 1
 class lazyindex:
+"""a lazy version of the index array"""
 def __init__(self, parser):
 self.p = parser
 def __len__(self):
 return len(self.p.index)
 def load(self, pos):
 return self.p.index[pos] or self.load(pos)
 def append(self, e):
 self.p.index.append(e)
 class lazymap:
+"""a lazy version of the node map"""
 def __init__(self, parser):
 self.p = parser
 def load(self, key):
 if self.p.all: return
 n = self.p.data.find(key)
 self.p.map[key] = val
 class RevlogError(Exception): pass
 class revlog:
+"""
+the underlying revision storage object
+A revlog consists of two parts, an index and the revision data.
+The index is a file with a fixed record size containing
+information on each revision, includings its nodeid (hash), the
+nodeids of its parents, the position and offset of its data within
+the data file, and the revision it's based on. Finally, each entry
+contains a linkrev entry that can serve as a pointer to external
+data.
+The revision data itself is a linear collection of data chunks.
+Each chunk represents a revision and is usually represented as a
+delta against the previous chunk. To bound lookup time, runs of
+deltas are limited to about 2 times the length of the original
+version data. This makes retrieval of a version proportional to
+its size, or O(1) relative to the number of revisions.
+Both pieces of the revlog are written to in an append-only
+fashion, which means we never need to rewrite a file to insert or
+remove data, and can use some simple techniques to avoid the need
+for locking while reading.
+"""
 def __init__(self, opener, indexfile, datafile):
+"""
+create a revlog object
+opener is a function that abstracts the file opening operation
+and can be used to implement COW semantics or the like.
+"""
 self.indexfile = indexfile
 self.datafile = datafile
 self.opener = opener
 self.cache = None
 reachable[p] = 1
 visit.append(p)
 return reachable
 def heads(self, stop=None):
+"""return the list of all nodes that have no children"""
 p = {}
 h = []
 stoprev = 0
 if stop and stop in self.nodemap:
 stoprev = self.rev(stop)
 for r in range(self.count() - 1, -1, -1):
 n = self.node(r)
 if n not in p:
 h.append(n)
 if n == stop:
 for pn in self.parents(n):
 p[pn] = 1
 return h
 def children(self, node):
+"""find the children of a given node"""
 c = []
 p = self.rev(node)
 for r in range(p + 1, self.count()):
 n = self.node(r)
 for pn in self.parents(n):
 elif pn == nullid:
 continue
 return c
 def lookup(self, id):
+"""locate a node based on revision number or subset of hex nodeid"""
 try:
 rev = int(id)
 if str(rev) != id: raise ValueError
 if rev < 0: rev = self.count() + rev
 if rev < 0 or rev >= self.count(): raise ValueError
 return c[0]
 return None
 def diff(self, a, b):
+"""return a delta between two revisions"""
 return mdiff.textdiff(a, b)
 def patches(self, t, pl):
+"""apply a list of patches to a string"""
 return mdiff.patches(t, pl)
 def delta(self, node):
+"""return or calculate a delta between a node and its predecessor"""
 r = self.rev(node)
 b = self.base(r)
 if r == b:
 return self.diff(self.revision(self.node(r - 1)),
 self.revision(node))
 f.seek(self.start(r))
 data = f.read(self.length(r))
 return decompress(data)
 def revision(self, node):
+"""return an uncompressed revision of a given"""
 if node == nullid: return ""
 if self.cache and self.cache[0] == node: return self.cache[2]
+# look up what we need to read
 text = None
 rev = self.rev(node)
 start, length, base, link, p1, p2, node = self.index[rev]
 end = start + length
 if base != rev: start = self.start(base)
+# do we have useful data cached?
 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
 base = self.cache[1]
 start = self.start(base + 1)
 text = self.cache[2]
 last = 0
 self.cache = (node, rev, text)
 return text
 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
+"""add a revision to the log
+text - the revision data to add
+transaction - the transaction object used for rollback
+link - the linkrev data to add
+p1, p2 - the parent nodeids of the revision
+d - an optional precomputed delta
+"""
 if text is None: text = ""
 if p1 is None: p1 = self.tip()
 if p2 is None: p2 = nullid
 node = hash(text, p1, p2)
 self.cache = (node, n, text)
 return node
 def ancestor(self, a, b):
+"""calculate the least common ancestor of nodes a and b"""
 # calculate the distance of every node from root
 dist = {nullid: 0}
 for i in xrange(self.count()):
 n = self.node(i)
 p1, p2 = self.parents(n)
 ly = y.next()
 elif lx > ly:
 lx = x.next()
 def group(self, linkmap):
-# given a list of changeset revs, return a set of deltas and
+"""calculate a delta group
-# metadata corresponding to nodes. the first delta is
-# parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
+Given a list of changeset revs, return a set of deltas and
-# have this parent as it has all history before these
+metadata corresponding to nodes. the first delta is
-# changesets. parent is parent[0]
+parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
+have this parent as it has all history before these
+changesets. parent is parent[0]
+"""
 revs = []
 needed = {}
 # find file nodes/revs that match changeset revs
 for i in xrange(0, self.count()):
 yield d
 yield struct.pack(">l", 0)
 def addgroup(self, revs, linkmapper, transaction, unique=0):
-# given a set of deltas, add them to the revision log. the
+"""
-# first delta is against its parent, which should be in our
+add a delta group
-# log, the rest are against the previous delta.
+given a set of deltas, add them to the revision log. the
-# track the base of the current delta log
+first delta is against its parent, which should be in our
+log, the rest are against the previous delta.
+"""
+#track the base of the current delta log
 r = self.count()
 t = r - 1
 node = nullid
 base = prev = -1

Mercurial > public > mercurial-scm > hg-stable

comparison mercurial/revlog.py @ 1083:30974cf73435