Mercurial > public > mercurial-scm > hg-stable
view mercurial/hg.py @ 249:619e775aa7f9
import and startup cleanups
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
import and startup cleanups
add commands:run()
add copyright notice to commands
eliminate/reorganize imports to speed up start time:
0.5b:
$ time bash -c 'for i in `seq 100`; do ~/bin/hg > /dev/null; done'
real 0m7.718s
user 0m6.719s
sys 0m0.794s
new:
$ time bash -c 'for i in `seq 100`; do hg > /dev/null; done'
real 0m2.171s
user 0m1.684s
sys 0m0.444s
just python:
$ time bash -c 'for i in `seq 100`; do python -c pass; done'
real 0m0.988s
user 0m0.771s
sys 0m0.207s
Ignoring the fixed cost of loading the Python interpreter, we're 5.6
times faster. With the Python load time, we're still 3.5 times faster.
manifest hash: acce5882a55c76eb165316f5741724c8ce4ef587
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
iD8DBQFCoihAywK+sNU5EO8RAqMdAJwMe6Ur0R9G6jjayNa5hH2C3c4k/gCeIYvc
N178vaWWGciX9zq+g5qCAls=
=buhv
-----END PGP SIGNATURE-----
author | mpm@selenic.com |
---|---|
date | Sat, 04 Jun 2005 14:16:32 -0800 |
parents | 863b508c5b36 |
children | 3fd8fc14b12f |
line wrap: on
line source
# hg.py - repository classes for mercurial # # Copyright 2005 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. import sys, struct, os from mercurial import lock from mercurial.transaction import * from mercurial.revlog import * from difflib import SequenceMatcher class filelog(revlog): def __init__(self, opener, path): revlog.__init__(self, opener, os.path.join("data", path + ".i"), os.path.join("data", path + ".d")) def read(self, node): return self.revision(node) def add(self, text, transaction, link, p1=None, p2=None): return self.addrevision(text, transaction, link, p1, p2) def annotate(self, node): def decorate(text, rev): return [(rev, l) for l in text.splitlines(1)] def strip(annotation): return [e[1] for e in annotation] def pair(parent, child): new = [] sm = SequenceMatcher(None, strip(parent), strip(child)) for o, m, n, s, t in sm.get_opcodes(): if o == 'equal': new += parent[m:n] else: new += child[s:t] return new # find all ancestors needed = {node:1} visit = [node] while visit: n = visit.pop(0) for p in self.parents(n): if p not in needed: needed[p] = 1 visit.append(p) else: # count how many times we'll use this needed[p] += 1 # sort by revision which is a topological order visit = needed.keys() visit = [ (self.rev(n), n) for n in visit ] visit.sort() visit = [ p[1] for p in visit ] hist = {} for n in visit: curr = decorate(self.read(n), self.linkrev(n)) for p in self.parents(n): if p != nullid: curr = pair(hist[p], curr) # trim the history of unneeded revs needed[p] -= 1 if not needed[p]: del hist[p] hist[n] = curr return hist[n] class manifest(revlog): def __init__(self, opener): self.mapcache = None self.listcache = None self.addlist = None revlog.__init__(self, opener, "00manifest.i", "00manifest.d") def read(self, node): if self.mapcache and self.mapcache[0] == node: return self.mapcache[1].copy() text = self.revision(node) map = {} self.listcache = (text, text.splitlines(1)) for l in self.listcache[1]: (f, n) = l.split('\0') map[f] = bin(n[:40]) self.mapcache = (node, map) return map def diff(self, a, b): # this is sneaky, as we're not actually using a and b if self.listcache and self.addlist and self.listcache[0] == a: d = mdiff.diff(self.listcache[1], self.addlist, 1) if mdiff.patch(a, d) != b: sys.stderr.write("*** sortdiff failed, falling back ***\n") return mdiff.textdiff(a, b) return d else: return mdiff.textdiff(a, b) def add(self, map, transaction, link, p1=None, p2=None): files = map.keys() files.sort() self.addlist = ["%s\000%s\n" % (f, hex(map[f])) for f in files] text = "".join(self.addlist) n = self.addrevision(text, transaction, link, p1, p2) self.mapcache = (n, map) self.listcache = (text, self.addlist) self.addlist = None return n class changelog(revlog): def __init__(self, opener): revlog.__init__(self, opener, "00changelog.i", "00changelog.d") def extract(self, text): if not text: return (nullid, "", "0", [], "") last = text.index("\n\n") desc = text[last + 2:] l = text[:last].splitlines() manifest = bin(l[0]) user = l[1] date = l[2] files = l[3:] return (manifest, user, date, files, desc) def read(self, node): return self.extract(self.revision(node)) def add(self, manifest, list, desc, transaction, p1=None, p2=None, user=None, date=None): import socket, time user = (user or os.environ.get("HGUSER") or os.environ.get("EMAIL") or os.environ.get("LOGNAME", "unknown") + '@' + socket.getfqdn()) date = date or "%d %d" % (time.time(), time.timezone) list.sort() l = [hex(manifest), user, date] + list + ["", desc] text = "\n".join(l) return self.addrevision(text, transaction, self.count(), p1, p2) class dirstate: def __init__(self, opener, ui, root): self.opener = opener self.root = root self.dirty = 0 self.ui = ui self.map = None self.pl = None def __del__(self): if self.dirty: self.write() def __getitem__(self, key): try: return self.map[key] except TypeError: self.read() return self[key] def __contains__(self, key): if not self.map: self.read() return key in self.map def parents(self): if not self.pl: self.read() return self.pl def setparents(self, p1, p2 = nullid): self.dirty = 1 self.pl = p1, p2 def state(self, key): try: return self[key][0] except KeyError: return "?" def read(self): if self.map is not None: return self.map self.map = {} self.pl = [nullid, nullid] try: st = self.opener("dirstate").read() except: return self.pl = [st[:20], st[20: 40]] pos = 40 while pos < len(st): e = struct.unpack(">cllll", st[pos:pos+17]) l = e[4] pos += 17 f = st[pos:pos + l] self.map[f] = e[:4] pos += l def update(self, files, state): ''' current states: n normal m needs merging i invalid r marked for removal a marked for addition''' if not files: return self.read() self.dirty = 1 for f in files: if state == "r": self.map[f] = ('r', 0, 0, 0) else: try: s = os.stat(os.path.join(self.root, f)) self.map[f] = (state, s.st_mode, s.st_size, s.st_mtime) except OSError: if state != "i": raise self.map[f] = ('r', 0, 0, 0) def forget(self, files): if not files: return self.read() self.dirty = 1 for f in files: try: del self.map[f] except KeyError: self.ui.warn("not in dirstate: %s!\n" % f) pass def clear(self): self.map = {} self.dirty = 1 def write(self): st = self.opener("dirstate", "w") st.write("".join(self.pl)) for f, e in self.map.items(): e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f)) st.write(e + f) self.dirty = 0 def copy(self): self.read() return self.map.copy() # used to avoid circular references so destructors work def opener(base): p = base def o(path, mode="r"): if p[:7] == "http://": f = os.path.join(p, urllib.quote(path)) return httprangereader(f) f = os.path.join(p, path) if mode != "r": try: s = os.stat(f) except OSError: d = os.path.dirname(f) if not os.path.isdir(d): os.makedirs(d) else: if s.st_nlink > 1: file(f + ".tmp", "w").write(file(f).read()) os.rename(f+".tmp", f) return file(f, mode) return o class localrepository: def __init__(self, ui, path=None, create=0): self.remote = 0 if path and path[:7] == "http://": self.remote = 1 self.path = path else: if not path: p = os.getcwd() while not os.path.isdir(os.path.join(p, ".hg")): p = os.path.dirname(p) if p == "/": raise "No repo found" path = p self.path = os.path.join(path, ".hg") self.root = path self.ui = ui if create: os.mkdir(self.path) os.mkdir(self.join("data")) self.opener = opener(self.path) self.manifest = manifest(self.opener) self.changelog = changelog(self.opener) self.ignorelist = None self.tags = None if not self.remote: self.dirstate = dirstate(self.opener, ui, self.root) def ignore(self, f): import re if self.ignorelist is None: self.ignorelist = [] try: l = open(os.path.join(self.root, ".hgignore")) for pat in l: if pat != "\n": self.ignorelist.append(re.compile(pat[:-1])) except IOError: pass for pat in self.ignorelist: if pat.search(f): return True return False def lookup(self, key): if self.tags is None: self.tags = {} try: fl = self.file(".hgtags") for l in fl.revision(fl.tip()).splitlines(): if l: n, k = l.split(" ") self.tags[k] = bin(n) except KeyError: pass try: return self.tags[key] except KeyError: return self.changelog.lookup(key) def join(self, f): return os.path.join(self.path, f) def wjoin(self, f): return os.path.join(self.root, f) def file(self, f): if f[0] == '/': f = f[1:] return filelog(self.opener, f) def transaction(self): return transaction(self.opener, self.join("journal"), self.join("undo")) def recover(self): lock = self.lock() if os.path.exists(self.join("recover")): self.ui.status("attempting to rollback interrupted transaction\n") return rollback(self.opener, self.join("recover")) else: self.ui.warn("no interrupted transaction available\n") def undo(self): lock = self.lock() if os.path.exists(self.join("undo")): f = self.changelog.read(self.changelog.tip())[3] self.ui.status("attempting to rollback last transaction\n") rollback(self.opener, self.join("undo")) self.manifest = manifest(self.opener) self.changelog = changelog(self.opener) self.ui.status("discarding dirstate\n") node = self.changelog.tip() f.sort() self.dirstate.setparents(node) self.dirstate.update(f, 'i') else: self.ui.warn("no undo information available\n") def lock(self, wait = 1): try: return lock.lock(self.join("lock"), 0) except lock.LockHeld, inst: if wait: self.ui.warn("waiting for lock held by %s\n" % inst.args[0]) return lock.lock(self.join("lock"), wait) raise inst def rawcommit(self, files, text, user, date, p1=None, p2=None): p1 = p1 or self.dirstate.parents()[0] or nullid p2 = p2 or self.dirstate.parents()[1] or nullid pchange = self.changelog.read(p1) pmmap = self.manifest.read(pchange[0]) tr = self.transaction() mmap = {} linkrev = self.changelog.count() for f in files: try: t = file(f).read() except IOError: self.ui.warn("Read file %s error, skipped\n" % f) continue r = self.file(f) # FIXME - need to find both parents properly prev = pmmap.get(f, nullid) mmap[f] = r.add(t, tr, linkrev, prev) mnode = self.manifest.add(mmap, tr, linkrev, pchange[0]) n = self.changelog.add(mnode, files, text, tr, p1, p2, user ,date, ) tr.close() self.dirstate.setparents(p1, p2) self.dirstate.clear() self.dirstate.update(mmap.keys(), "n") def commit(self, files = None, text = ""): commit = [] remove = [] if files: for f in files: s = self.dirstate.state(f) if s in 'nmai': commit.append(f) elif s == 'r': remove.append(f) else: self.ui.warn("%s not tracked!\n" % f) else: (c, a, d, u) = self.diffdir(self.root) commit = c + a remove = d if not commit and not remove: self.ui.status("nothing changed\n") return p1, p2 = self.dirstate.parents() c1 = self.changelog.read(p1) c2 = self.changelog.read(p2) m1 = self.manifest.read(c1[0]) m2 = self.manifest.read(c2[0]) lock = self.lock() tr = self.transaction() # check in files new = {} linkrev = self.changelog.count() commit.sort() for f in commit: self.ui.note(f + "\n") try: t = file(self.wjoin(f)).read() except IOError: self.warn("trouble committing %s!\n" % f) raise r = self.file(f) fp1 = m1.get(f, nullid) fp2 = m2.get(f, nullid) new[f] = r.add(t, tr, linkrev, fp1, fp2) # update manifest m1.update(new) for f in remove: del m1[f] mn = self.manifest.add(m1, tr, linkrev, c1[0], c2[0]) # add changeset new = new.keys() new.sort() edittext = text + "\n" + "HG: manifest hash %s\n" % hex(mn) edittext += "".join(["HG: changed %s\n" % f for f in new]) edittext += "".join(["HG: removed %s\n" % f for f in remove]) edittext = self.ui.edit(edittext) n = self.changelog.add(mn, new, edittext, tr, p1, p2) tr.close() self.dirstate.setparents(n) self.dirstate.update(new, "n") self.dirstate.forget(remove) def checkout(self, node): # checkout is really dumb at the moment # it ought to basically merge change = self.changelog.read(node) l = self.manifest.read(change[0]).items() l.sort() for f,n in l: if f[0] == "/": continue self.ui.note(f, "\n") t = self.file(f).revision(n) try: file(self.wjoin(f), "w").write(t) except IOError: os.makedirs(os.path.dirname(f)) file(self.wjoin(f), "w").write(t) self.dirstate.setparents(node) self.dirstate.clear() self.dirstate.update([f for f,n in l], "n") def diffdir(self, path, changeset = None): changed = [] added = [] unknown = [] mf = {} if changeset: change = self.changelog.read(changeset) mf = self.manifest.read(change[0]) dc = dict.fromkeys(mf) else: changeset = self.dirstate.parents()[0] change = self.changelog.read(changeset) mf = self.manifest.read(change[0]) dc = self.dirstate.copy() def fcmp(fn): t1 = file(self.wjoin(fn)).read() t2 = self.file(fn).revision(mf[fn]) return cmp(t1, t2) for dir, subdirs, files in os.walk(self.root): d = dir[len(self.root)+1:] if ".hg" in subdirs: subdirs.remove(".hg") for f in files: fn = os.path.join(d, f) try: s = os.stat(os.path.join(self.root, fn)) except: continue if fn in dc: c = dc[fn] del dc[fn] if not c: if fcmp(fn): changed.append(fn) elif c[0] == 'i': if fn not in mf: added.append(fn) elif fcmp(fn): changed.append(fn) elif c[0] == 'm': changed.append(fn) elif c[0] == 'a': added.append(fn) elif c[0] == 'r': unknown.append(fn) elif c[2] != s.st_size: changed.append(fn) elif c[1] != s.st_mode or c[3] != s.st_mtime: if fcmp(fn): changed.append(fn) else: if self.ignore(fn): continue unknown.append(fn) deleted = dc.keys() deleted.sort() return (changed, added, deleted, unknown) def diffrevs(self, node1, node2): changed, added = [], [] change = self.changelog.read(node1) mf1 = self.manifest.read(change[0]) change = self.changelog.read(node2) mf2 = self.manifest.read(change[0]) for fn in mf2: if mf1.has_key(fn): if mf1[fn] != mf2[fn]: changed.append(fn) del mf1[fn] else: added.append(fn) deleted = mf1.keys() deleted.sort() return (changed, added, deleted) def add(self, list): for f in list: p = self.wjoin(f) if not os.path.isfile(p): self.ui.warn("%s does not exist!\n" % f) elif self.dirstate.state(f) == 'n': self.ui.warn("%s already tracked!\n" % f) else: self.dirstate.update([f], "a") def forget(self, list): for f in list: if self.dirstate.state(f) not in 'ai': self.ui.warn("%s not added!\n" % f) else: self.dirstate.forget([f]) def remove(self, list): for f in list: p = self.wjoin(f) if os.path.isfile(p): self.ui.warn("%s still exists!\n" % f) elif f not in self.dirstate: self.ui.warn("%s not tracked!\n" % f) else: self.dirstate.update([f], "r") def heads(self): return self.changelog.heads() def branches(self, nodes): if not nodes: nodes = [self.changelog.tip()] b = [] for n in nodes: t = n while n: p = self.changelog.parents(n) if p[1] != nullid or p[0] == nullid: b.append((t, n, p[0], p[1])) break n = p[0] return b def between(self, pairs): r = [] for top, bottom in pairs: n, l, i = top, [], 0 f = 1 while n != bottom: p = self.changelog.parents(n)[0] if i == f: l.append(n) f = f * 2 n = p i += 1 r.append(l) return r def newer(self, nodes): m = {} nl = [] pm = {} cl = self.changelog t = l = cl.count() # find the lowest numbered node for n in nodes: l = min(l, cl.rev(n)) m[n] = 1 for i in xrange(l, t): n = cl.node(i) if n in m: # explicitly listed pm[n] = 1 nl.append(n) continue for p in cl.parents(n): if p in pm: # parent listed pm[n] = 1 nl.append(n) break return nl def getchangegroup(self, remote): m = self.changelog.nodemap search = [] fetch = [] seen = {} seenbranch = {} # if we have an empty repo, fetch everything if self.changelog.tip() == nullid: self.ui.status("requesting all changes\n") return remote.changegroup([nullid]) # otherwise, assume we're closer to the tip than the root self.ui.status("searching for changes\n") heads = remote.heads() unknown = [] for h in heads: if h not in m: unknown.append(h) if not unknown: self.ui.status("nothing to do!\n") return None unknown = remote.branches(unknown) while unknown: n = unknown.pop(0) seen[n[0]] = 1 self.ui.debug("examining %s:%s\n" % (short(n[0]), short(n[1]))) if n == nullid: break if n in seenbranch: self.ui.debug("branch already found\n") continue if n[1] and n[1] in m: # do we know the base? self.ui.debug("found incomplete branch %s:%s\n" % (short(n[0]), short(n[1]))) search.append(n) # schedule branch range for scanning seenbranch[n] = 1 else: if n[2] in m and n[3] in m: if n[1] not in fetch: self.ui.debug("found new changeset %s\n" % short(n[1])) fetch.append(n[1]) # earliest unknown continue r = [] for a in n[2:4]: if a not in seen: r.append(a) if r: self.ui.debug("requesting %s\n" % " ".join(map(short, r))) for b in remote.branches(r): self.ui.debug("received %s:%s\n" % (short(b[0]), short(b[1]))) if b[0] not in m and b[0] not in seen: unknown.append(b) while search: n = search.pop(0) l = remote.between([(n[0], n[1])])[0] p = n[0] f = 1 for i in l + [n[1]]: if i in m: if f <= 2: self.ui.debug("found new branch changeset %s\n" % short(p)) fetch.append(p) else: self.ui.debug("narrowed branch search to %s:%s\n" % (short(p), short(i))) search.append((p, i)) break p, f = i, f * 2 for f in fetch: if f in m: raise "already have", short(f[:4]) self.ui.note("adding new changesets starting at " + " ".join([short(f) for f in fetch]) + "\n") return remote.changegroup(fetch) def changegroup(self, basenodes): nodes = self.newer(basenodes) # construct the link map linkmap = {} for n in nodes: linkmap[self.changelog.rev(n)] = n # construct a list of all changed files changed = {} for n in nodes: c = self.changelog.read(n) for f in c[3]: changed[f] = 1 changed = changed.keys() changed.sort() # the changegroup is changesets + manifests + all file revs revs = [ self.changelog.rev(n) for n in nodes ] for y in self.changelog.group(linkmap): yield y for y in self.manifest.group(linkmap): yield y for f in changed: yield struct.pack(">l", len(f) + 4) + f g = self.file(f).group(linkmap) for y in g: yield y def addchangegroup(self, generator): class genread: def __init__(self, generator): self.g = generator self.buf = "" def read(self, l): while l > len(self.buf): try: self.buf += self.g.next() except StopIteration: break d, self.buf = self.buf[:l], self.buf[l:] return d def getchunk(): d = source.read(4) if not d: return "" l = struct.unpack(">l", d)[0] if l <= 4: return "" return source.read(l - 4) def getgroup(): while 1: c = getchunk() if not c: break yield c def csmap(x): self.ui.debug("add changeset %s\n" % short(x)) return self.changelog.count() def revmap(x): return self.changelog.rev(x) if not generator: return changesets = files = revisions = 0 source = genread(generator) lock = self.lock() tr = self.transaction() # pull off the changeset group self.ui.status("adding changesets\n") co = self.changelog.tip() cn = self.changelog.addgroup(getgroup(), csmap, tr, 1) # unique changesets = self.changelog.rev(cn) - self.changelog.rev(co) # pull off the manifest group self.ui.status("adding manifests\n") mm = self.manifest.tip() mo = self.manifest.addgroup(getgroup(), revmap, tr) # process the files self.ui.status("adding file revisions\n") while 1: f = getchunk() if not f: break self.ui.debug("adding %s revisions\n" % f) fl = self.file(f) o = fl.tip() n = fl.addgroup(getgroup(), revmap, tr) revisions += fl.rev(n) - fl.rev(o) files += 1 self.ui.status(("modified %d files, added %d changesets" + " and %d new revisions\n") % (files, changesets, revisions)) tr.close() return def resolve(self, node): pl = self.dirstate.parents() if pl[1] != nullid: self.ui.warn("last merge not committed") return p1, p2 = pl[0], node m1n = self.changelog.read(p1)[0] m2n = self.changelog.read(p2)[0] man = self.manifest.ancestor(m1n, m2n) m1 = self.manifest.read(m1n) m2 = self.manifest.read(m2n) ma = self.manifest.read(man) (c, a, d, u) = self.diffdir(self.root) # resolve the manifest to determine which files # we care about merging self.ui.status("resolving manifests\n") self.ui.debug(" ancestor %s local %s remote %s\n" % (short(man), short(m1n), short(m2n))) merge = {} get = {} remove = [] # construct a working dir manifest mw = m1.copy() for f in a + c: mw[f] = nullid for f in d: del mw[f] for f, n in mw.iteritems(): if f in m2: if n != m2[f]: self.ui.debug(" %s versions differ, do resolve\n" % f) merge[f] = (m1.get(f, nullid), m2[f]) del m2[f] elif f in ma: if n != ma[f]: r = self.ui.prompt( (" local changed %s which remote deleted\n" % f) + "(k)eep or (d)elete?", "[kd]", "k") if r == "d": remove.append(f) else: self.ui.debug("other deleted %s\n" % f) pass # other deleted it else: self.ui.debug("local created %s\n" %f) for f, n in m2.iteritems(): if f in ma: if n != ma[f]: r = self.ui.prompt( ("remote changed %s which local deleted\n" % f) + "(k)eep or (d)elete?", "[kd]", "k") if r == "d": remove.append(f) else: pass # probably safe else: self.ui.debug("remote created %s, do resolve\n" % f) get[f] = n del mw, m1, m2, ma self.dirstate.setparents(p1, p2) # get the files we don't need to change files = get.keys() files.sort() for f in files: if f[0] == "/": continue self.ui.note(f, "\n") t = self.file(f).revision(get[f]) try: file(self.wjoin(f), "w").write(t) except IOError: os.makedirs(os.path.dirname(f)) file(self.wjoin(f), "w").write(t) # we have to remember what files we needed to get/change # because any file that's different from either one of its # parents must be in the changeset self.dirstate.update(files, 'm') # merge the tricky bits files = merge.keys() files.sort() for f in files: m, o = merge[f] self.merge3(f, m, o) # same here self.dirstate.update(files, 'm') for f in remove: self.ui.note("removing %s\n" % f) #os.unlink(f) self.dirstate.update(remove, 'r') def merge3(self, fn, my, other): """perform a 3-way merge in the working directory""" import tempfile def temp(prefix, node): pre = "%s~%s." % (os.path.basename(fn), prefix) (fd, name) = tempfile.mkstemp("", pre) f = os.fdopen(fd, "w") f.write(fl.revision(node)) f.close() return name fl = self.file(fn) base = fl.ancestor(my, other) a = self.wjoin(fn) b = temp("other", other) c = temp("base", base) self.ui.note("resolving %s\n" % fn) self.ui.debug("file %s: other %s ancestor %s\n" % (fn, short(other), short(base))) cmd = os.environ.get("HGMERGE", "hgmerge") r = os.system("%s %s %s %s" % (cmd, a, b, c)) if r: self.ui.warn("merging %s failed!\n" % f) os.unlink(b) os.unlink(c) def verify(self): filelinkrevs = {} filenodes = {} manifestchangeset = {} changesets = revisions = files = 0 errors = 0 self.ui.status("checking changesets\n") for i in range(self.changelog.count()): changesets += 1 n = self.changelog.node(i) for p in self.changelog.parents(n): if p not in self.changelog.nodemap: self.ui.warn("changeset %s has unknown parent %s\n" % (short(n), short(p))) errors += 1 try: changes = self.changelog.read(n) except Exception, inst: self.ui.warn("unpacking changeset %s: %s\n" % (short(n), inst)) errors += 1 manifestchangeset[changes[0]] = n for f in changes[3]: filelinkrevs.setdefault(f, []).append(i) self.ui.status("checking manifests\n") for i in range(self.manifest.count()): n = self.manifest.node(i) for p in self.manifest.parents(n): if p not in self.manifest.nodemap: self.ui.warn("manifest %s has unknown parent %s\n" % (short(n), short(p))) errors += 1 ca = self.changelog.node(self.manifest.linkrev(n)) cc = manifestchangeset[n] if ca != cc: self.ui.warn("manifest %s points to %s, not %s\n" % (hex(n), hex(ca), hex(cc))) errors += 1 try: delta = mdiff.patchtext(self.manifest.delta(n)) except KeyboardInterrupt: print "aborted" sys.exit(0) except Exception, inst: self.ui.warn("unpacking manifest %s: %s\n" % (short(n), inst)) errors += 1 ff = [ l.split('\0') for l in delta.splitlines() ] for f, fn in ff: filenodes.setdefault(f, {})[bin(fn)] = 1 self.ui.status("crosschecking files in changesets and manifests\n") for f in filenodes: if f not in filelinkrevs: self.ui.warn("file %s in manifest but not in changesets\n" % f) errors += 1 for f in filelinkrevs: if f not in filenodes: self.ui.warn("file %s in changeset but not in manifest\n" % f) errors += 1 self.ui.status("checking files\n") ff = filenodes.keys() ff.sort() for f in ff: if f == "/dev/null": continue files += 1 fl = self.file(f) nodes = { nullid: 1 } for i in range(fl.count()): revisions += 1 n = fl.node(i) if n not in filenodes[f]: self.ui.warn("%s: %d:%s not in manifests\n" % (f, i, short(n))) print len(filenodes[f].keys()), fl.count(), f errors += 1 else: del filenodes[f][n] flr = fl.linkrev(n) if flr not in filelinkrevs[f]: self.ui.warn("%s:%s points to unexpected changeset %d\n" % (f, short(n), fl.linkrev(n))) errors += 1 else: filelinkrevs[f].remove(flr) # verify contents try: t = fl.read(n) except Exception, inst: self.ui.warn("unpacking file %s %s: %s\n" % (f, short(n), inst)) errors += 1 # verify parents (p1, p2) = fl.parents(n) if p1 not in nodes: self.ui.warn("file %s:%s unknown parent 1 %s" % (f, short(n), short(p1))) errors += 1 if p2 not in nodes: self.ui.warn("file %s:%s unknown parent 2 %s" % (f, short(n), short(p1))) errors += 1 nodes[n] = 1 # cross-check for node in filenodes[f]: self.ui.warn("node %s in manifests not in %s\n" % (hex(n), f)) errors += 1 self.ui.status("%d files, %d changesets, %d total revisions\n" % (files, changesets, revisions)) if errors: self.ui.warn("%d integrity errors encountered!\n" % errors) return 1 class remoterepository: def __init__(self, ui, path): self.url = path self.ui = ui def do_cmd(self, cmd, **args): self.ui.debug("sending %s command\n" % cmd) q = {"cmd": cmd} q.update(args) qs = urllib.urlencode(q) cu = "%s?%s" % (self.url, qs) return urllib.urlopen(cu) def heads(self): d = self.do_cmd("heads").read() try: return map(bin, d[:-1].split(" ")) except: self.ui.warn("unexpected response:\n" + d[:400] + "\n...\n") raise def branches(self, nodes): n = " ".join(map(hex, nodes)) d = self.do_cmd("branches", nodes=n).read() try: br = [ tuple(map(bin, b.split(" "))) for b in d.splitlines() ] return br except: self.ui.warn("unexpected response:\n" + d[:400] + "\n...\n") raise def between(self, pairs): n = "\n".join(["-".join(map(hex, p)) for p in pairs]) d = self.do_cmd("between", pairs=n).read() try: p = [ l and map(bin, l.split(" ")) or [] for l in d.splitlines() ] return p except: self.ui.warn("unexpected response:\n" + d[:400] + "\n...\n") raise def changegroup(self, nodes): n = " ".join(map(hex, nodes)) zd = zlib.decompressobj() f = self.do_cmd("changegroup", roots=n) bytes = 0 while 1: d = f.read(4096) bytes += len(d) if not d: yield zd.flush() break yield zd.decompress(d) self.ui.note("%d bytes of data transfered\n" % bytes) def repository(ui, path=None, create=0): if path and path[:7] == "http://": import urllib, urllib2 return remoterepository(ui, path) if path and path[:5] == "hg://": import urllib, urllib2 return remoterepository(ui, path.replace("hg://", "http://")) if path and path[:11] == "old-http://": import urllib, urllib2 from mercurial import byterange return localrepository(ui, path.replace("old-http://", "http://")) else: return localrepository(ui, path, create) class httprangereader: def __init__(self, url): self.url = url self.pos = 0 def seek(self, pos): self.pos = pos def read(self, bytes=None): opener = urllib2.build_opener(byterange.HTTPRangeHandler()) urllib2.install_opener(opener) req = urllib2.Request(self.url) end = '' if bytes: end = self.pos + bytes req.add_header('Range', 'bytes=%d-%s' % (self.pos, end)) f = urllib2.urlopen(req) return f.read()