mercurial-scm/hg-stable: mercurial/revlog.py comparison

comparison mercurial/revlog.py @ 2073:1e6745f78989

Implement data inlined with the index file This patch allows you to optionally inline data bytes with the revlog index file. It saves considerable space and checkout time by reducing the number of inodes, wasted partial blocks and system calls. To use the inline data add this to your .hgrc [revlog] # inline data only works with revlogng format=1 # inline is the only valid flag right now. flags=inline

author	mason@suse.com
date	Tue, 04 Apr 2006 16:38:43 -0400
parents	74d3f5336b66
children	343aeefb553b

comparison

equal deleted inserted replaced

-:74d3f5336b66
+:1e6745f78989
 demandload(globals(), "sha struct zlib")
 # revlog version strings
 REVLOGV0 = 0
 REVLOGNG = 1
+# revlog flags
+REVLOGNGINLINEDATA = (1 << 16)
+def flagstr(flag):
+if flag == "inline":
+return REVLOGNGINLINEDATA
+raise RevlogError(_("unknown revlog flag %s" % flag))
 def hash(text, p1, p2):
 """generate a hash from the given text and its parent hashes
 This hash combines both the current file contents and its history
 and st.st_ctime == oldst.st_ctime):
 return
 self.indexstat = st
 if len(i) > 0:
 v = struct.unpack(versionformat, i[:4])[0]
-if v != 0:
+flags = v & ~0xFFFF
-flags = v & ~0xFFFF
+fmt = v & 0xFFFF
-fmt = v & 0xFFFF
+if fmt == 0:
-if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)):
+if flags:
-raise RevlogError(
+raise RevlogError(_("index %s invalid flags %x for format v0" %
-_("unknown version format %d or flags %x on %s") %
+(self.indexfile, flags)))
-(v, flags, self.indexfile))
+elif fmt == REVLOGNG:
+if flags & ~REVLOGNGINLINEDATA:
+raise RevlogError(_("index %s invalid flags %x for revlogng" %
+(self.indexfile, flags)))
+else:
+raise RevlogError(_("index %s invalid format %d" %
+(self.indexfile, fmt)))
 self.version = v
 if v == 0:
 self.indexformat = indexformatv0
 else:
 self.indexformat = indexformatng
 if i:
-if st and st.st_size > 10000:
+if not self.inlinedata() and st and st.st_size > 10000:
 # big index, let's parse it on demand
 parser = lazyparser(i, self, self.indexformat)
 self.index = lazyindex(parser)
 self.nodemap = lazymap(parser)
 else:
 self.parseindex(i)
+if self.inlinedata():
+# we've already got the entire data file read in, save it
+# in the chunk data
+self.chunkcache = (0, i)
 if self.version != 0:
 e = list(self.index[0])
 type = self.ngtype(e[0])
 e[0] = self.offset_type(0, type)
 self.index[0] = e
 def parseindex(self, data):
 s = struct.calcsize(self.indexformat)
 l = len(data)
 self.index = []
 self.nodemap =  {nullid: -1}
+inline = self.inlinedata()
 off = 0
 n = 0
 while off < l:
 e = struct.unpack(self.indexformat, data[off:off + s])
 self.index.append(e)
 self.nodemap[e[-1]] = n
 n += 1
 off += s
+if inline:
+off += e[1]
 def ngoffset(self, q):
 if q & 0xFFFF:
 raise RevlogError(_('%s: incompatible revision flag %x') %
 (self.indexfile, type))
 """loads both the map and the index from the lazy parser"""
 if isinstance(self.index, lazyindex):
 p = self.index.p
 p.load()
+def inlinedata(self): return self.version & REVLOGNGINLINEDATA
 def tip(self): return self.node(len(self.index) - 1)
 def count(self): return len(self.index)
 def node(self, rev):
 return (rev < 0) and nullid or self.index[rev][-1]
 def rev(self, node):
 """apply a list of patches to a string"""
 return mdiff.patches(t, pl)
 def chunk(self, rev, df=None, cachelen=4096):
 start, length = self.start(rev), self.length(rev)
+inline = self.inlinedata()
+if inline:
+start += (rev + 1) * struct.calcsize(self.indexformat)
 end = start + length
 def loadcache(df):
 cache_length = max(cachelen, length) # 4k
 if not df:
-df = self.opener(self.datafile)
+if inline:
+df = self.opener(self.indexfile)
+else:
+df = self.opener(self.datafile)
 df.seek(start)
 self.chunkcache = (start, df.read(cache_length))
 if not self.chunkcache:
 loadcache(df)
 # look up what we need to read
 text = None
 rev = self.rev(node)
 base = self.base(rev)
-df = self.opener(self.datafile)
+if self.inlinedata():
+# we probably have the whole chunk cached
+df = None
+else:
+df = self.opener(self.datafile)
 # do we have useful data cached?
 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
 base = self.cache[1]
 text = self.cache[2]
 raise RevlogError(_("integrity check failed on %s:%d")
 % (self.datafile, rev))
 self.cache = (node, rev, text)
 return text
+def checkinlinesize(self, fp, tr):
+if not self.inlinedata():
+return
+size = fp.tell()
+if size < 131072:
+return
+tr.add(self.datafile, 0)
+df = self.opener(self.datafile, 'w')
+calc = struct.calcsize(self.indexformat)
+for r in xrange(self.count()):
+start = self.start(r) + (r + 1) * calc
+length = self.length(r)
+fp.seek(start)
+d = fp.read(length)
+df.write(d)
+fp.close()
+df.close()
+fp = self.opener(self.indexfile, 'w', atomic=True)
+self.version &= ~(REVLOGNGINLINEDATA)
+if self.count():
+x = self.index[0]
+e = struct.pack(self.indexformat, *x)[4:]
+l = struct.pack(versionformat, self.version)
+fp.write(l)
+fp.write(e)
+for i in xrange(1, self.count()):
+x = self.index[i]
+e = struct.pack(self.indexformat, *x)
+fp.write(e)
+fp.close()
+self.chunkcache = None
 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
 """add a revision to the log
 text - the revision data to add
 self.index.append(e)
 self.nodemap[node] = n
 entry = struct.pack(self.indexformat, *e)
-transaction.add(self.datafile, offset)
+if not self.inlinedata():
-transaction.add(self.indexfile, n * len(entry))
+transaction.add(self.datafile, offset)
-f = self.opener(self.datafile, "a")
+transaction.add(self.indexfile, n * len(entry))
-if data[0]:
+f = self.opener(self.datafile, "a")
-f.write(data[0])
+if data[0]:
-f.write(data[1])
+f.write(data[0])
-f = self.opener(self.indexfile, "a")
+f.write(data[1])
+f = self.opener(self.indexfile, "a")
+else:
+f = self.opener(self.indexfile, "a+")
+transaction.add(self.indexfile, f.tell())
 if len(self.index) == 1 and self.version != 0:
 l = struct.pack(versionformat, self.version)
 f.write(l)
 entry = entry[4:]
 f.write(entry)
+if self.inlinedata():
+f.write(data[0])
+f.write(data[1])
+self.checkinlinesize(f, transaction)
 self.cache = (node, n, text)
 return node
 def ancestor(self, a, b):
 if r:
 end = self.end(t)
 ifh = self.opener(self.indexfile, "a+")
 transaction.add(self.indexfile, ifh.tell())
-transaction.add(self.datafile, end)
+if self.inlinedata():
-dfh = self.opener(self.datafile, "a")
+dfh = None
+else:
+transaction.add(self.datafile, end)
+dfh = self.opener(self.datafile, "a")
 # loop through our set of deltas
 chain = None
 for chunk in revs:
 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
 else:
 e = (self.offset_type(end, 0), len(cdelta), -1, base,
 link, self.rev(p1), self.rev(p2), node)
 self.index.append(e)
 self.nodemap[node] = r
-dfh.write(cdelta)
+if self.inlinedata():
 ifh.write(struct.pack(self.indexformat, *e))
+ifh.write(cdelta)
+self.checkinlinesize(ifh, transaction)
+if not self.inlinedata():
+dfh = self.opener(self.datafile, "a")
+ifh = self.opener(self.indexfile, "a")
+else:
+if not dfh:
+# addrevision switched from inline to conventional
+# reopen the index
+dfh = self.opener(self.datafile, "a")
+ifh = self.opener(self.indexfile, "a")
+dfh.write(cdelta)
+ifh.write(struct.pack(self.indexformat, *e))
 t, r, chain, prev = r, r + 1, node, node
 base = self.base(t)
 start = self.start(base)
 end = self.end(t)
 if rev >= self.count():
 return
 # first truncate the files on disk
 end = self.start(rev)
-df = self.opener(self.datafile, "a")
+if not self.inlinedata():
-df.truncate(end)
+df = self.opener(self.datafile, "a")
-end = rev * struct.calcsize(self.indexformat)
+df.truncate(end)
+end = rev * struct.calcsize(self.indexformat)
+else:
+end += rev * struct.calcsize(self.indexformat)
 indexf = self.opener(self.indexfile, "a")
 indexf.truncate(end)
 # then reset internal state in memory to forget those revisions
 f.seek(0, 2)
 actual = f.tell()
 s = struct.calcsize(self.indexformat)
 i = actual / s
 di = actual - (i * s)
+if self.inlinedata():
+databytes = 0
+for r in xrange(self.count()):
+databytes += self.length(r)
+dd = 0
+di = actual - self.count() * s - databytes
 except IOError, inst:
 if inst.errno != errno.ENOENT:
 raise
 di = 0

Mercurial > public > mercurial-scm > hg-stable

comparison mercurial/revlog.py @ 2073:1e6745f78989