Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 2073:1e6745f78989
Implement data inlined with the index file
This patch allows you to optionally inline data bytes with the
revlog index file. It saves considerable space and checkout
time by reducing the number of inodes, wasted partial blocks and
system calls.
To use the inline data add this to your .hgrc
[revlog]
# inline data only works with revlogng
format=1
# inline is the only valid flag right now.
flags=inline
author | mason@suse.com |
---|---|
date | Tue, 04 Apr 2006 16:38:43 -0400 |
parents | 74d3f5336b66 |
children | 343aeefb553b |
comparison
equal
deleted
inserted
replaced
2072:74d3f5336b66 | 2073:1e6745f78989 |
---|---|
17 demandload(globals(), "sha struct zlib") | 17 demandload(globals(), "sha struct zlib") |
18 | 18 |
19 # revlog version strings | 19 # revlog version strings |
20 REVLOGV0 = 0 | 20 REVLOGV0 = 0 |
21 REVLOGNG = 1 | 21 REVLOGNG = 1 |
22 | |
23 # revlog flags | |
24 REVLOGNGINLINEDATA = (1 << 16) | |
25 | |
26 def flagstr(flag): | |
27 if flag == "inline": | |
28 return REVLOGNGINLINEDATA | |
29 raise RevlogError(_("unknown revlog flag %s" % flag)) | |
22 | 30 |
23 def hash(text, p1, p2): | 31 def hash(text, p1, p2): |
24 """generate a hash from the given text and its parent hashes | 32 """generate a hash from the given text and its parent hashes |
25 | 33 |
26 This hash combines both the current file contents and its history | 34 This hash combines both the current file contents and its history |
232 and st.st_ctime == oldst.st_ctime): | 240 and st.st_ctime == oldst.st_ctime): |
233 return | 241 return |
234 self.indexstat = st | 242 self.indexstat = st |
235 if len(i) > 0: | 243 if len(i) > 0: |
236 v = struct.unpack(versionformat, i[:4])[0] | 244 v = struct.unpack(versionformat, i[:4])[0] |
237 if v != 0: | 245 flags = v & ~0xFFFF |
238 flags = v & ~0xFFFF | 246 fmt = v & 0xFFFF |
239 fmt = v & 0xFFFF | 247 if fmt == 0: |
240 if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)): | 248 if flags: |
241 raise RevlogError( | 249 raise RevlogError(_("index %s invalid flags %x for format v0" % |
242 _("unknown version format %d or flags %x on %s") % | 250 (self.indexfile, flags))) |
243 (v, flags, self.indexfile)) | 251 elif fmt == REVLOGNG: |
252 if flags & ~REVLOGNGINLINEDATA: | |
253 raise RevlogError(_("index %s invalid flags %x for revlogng" % | |
254 (self.indexfile, flags))) | |
255 else: | |
256 raise RevlogError(_("index %s invalid format %d" % | |
257 (self.indexfile, fmt))) | |
244 self.version = v | 258 self.version = v |
245 if v == 0: | 259 if v == 0: |
246 self.indexformat = indexformatv0 | 260 self.indexformat = indexformatv0 |
247 else: | 261 else: |
248 self.indexformat = indexformatng | 262 self.indexformat = indexformatng |
249 | 263 |
250 if i: | 264 if i: |
251 if st and st.st_size > 10000: | 265 if not self.inlinedata() and st and st.st_size > 10000: |
252 # big index, let's parse it on demand | 266 # big index, let's parse it on demand |
253 parser = lazyparser(i, self, self.indexformat) | 267 parser = lazyparser(i, self, self.indexformat) |
254 self.index = lazyindex(parser) | 268 self.index = lazyindex(parser) |
255 self.nodemap = lazymap(parser) | 269 self.nodemap = lazymap(parser) |
256 else: | 270 else: |
257 self.parseindex(i) | 271 self.parseindex(i) |
272 if self.inlinedata(): | |
273 # we've already got the entire data file read in, save it | |
274 # in the chunk data | |
275 self.chunkcache = (0, i) | |
258 if self.version != 0: | 276 if self.version != 0: |
259 e = list(self.index[0]) | 277 e = list(self.index[0]) |
260 type = self.ngtype(e[0]) | 278 type = self.ngtype(e[0]) |
261 e[0] = self.offset_type(0, type) | 279 e[0] = self.offset_type(0, type) |
262 self.index[0] = e | 280 self.index[0] = e |
268 def parseindex(self, data): | 286 def parseindex(self, data): |
269 s = struct.calcsize(self.indexformat) | 287 s = struct.calcsize(self.indexformat) |
270 l = len(data) | 288 l = len(data) |
271 self.index = [] | 289 self.index = [] |
272 self.nodemap = {nullid: -1} | 290 self.nodemap = {nullid: -1} |
291 inline = self.inlinedata() | |
273 off = 0 | 292 off = 0 |
274 n = 0 | 293 n = 0 |
275 while off < l: | 294 while off < l: |
276 e = struct.unpack(self.indexformat, data[off:off + s]) | 295 e = struct.unpack(self.indexformat, data[off:off + s]) |
277 self.index.append(e) | 296 self.index.append(e) |
278 self.nodemap[e[-1]] = n | 297 self.nodemap[e[-1]] = n |
279 n += 1 | 298 n += 1 |
280 off += s | 299 off += s |
300 if inline: | |
301 off += e[1] | |
281 | 302 |
282 def ngoffset(self, q): | 303 def ngoffset(self, q): |
283 if q & 0xFFFF: | 304 if q & 0xFFFF: |
284 raise RevlogError(_('%s: incompatible revision flag %x') % | 305 raise RevlogError(_('%s: incompatible revision flag %x') % |
285 (self.indexfile, type)) | 306 (self.indexfile, type)) |
295 """loads both the map and the index from the lazy parser""" | 316 """loads both the map and the index from the lazy parser""" |
296 if isinstance(self.index, lazyindex): | 317 if isinstance(self.index, lazyindex): |
297 p = self.index.p | 318 p = self.index.p |
298 p.load() | 319 p.load() |
299 | 320 |
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA | |
300 def tip(self): return self.node(len(self.index) - 1) | 322 def tip(self): return self.node(len(self.index) - 1) |
301 def count(self): return len(self.index) | 323 def count(self): return len(self.index) |
302 def node(self, rev): | 324 def node(self, rev): |
303 return (rev < 0) and nullid or self.index[rev][-1] | 325 return (rev < 0) and nullid or self.index[rev][-1] |
304 def rev(self, node): | 326 def rev(self, node): |
566 """apply a list of patches to a string""" | 588 """apply a list of patches to a string""" |
567 return mdiff.patches(t, pl) | 589 return mdiff.patches(t, pl) |
568 | 590 |
569 def chunk(self, rev, df=None, cachelen=4096): | 591 def chunk(self, rev, df=None, cachelen=4096): |
570 start, length = self.start(rev), self.length(rev) | 592 start, length = self.start(rev), self.length(rev) |
593 inline = self.inlinedata() | |
594 if inline: | |
595 start += (rev + 1) * struct.calcsize(self.indexformat) | |
571 end = start + length | 596 end = start + length |
572 def loadcache(df): | 597 def loadcache(df): |
573 cache_length = max(cachelen, length) # 4k | 598 cache_length = max(cachelen, length) # 4k |
574 if not df: | 599 if not df: |
575 df = self.opener(self.datafile) | 600 if inline: |
601 df = self.opener(self.indexfile) | |
602 else: | |
603 df = self.opener(self.datafile) | |
576 df.seek(start) | 604 df.seek(start) |
577 self.chunkcache = (start, df.read(cache_length)) | 605 self.chunkcache = (start, df.read(cache_length)) |
578 | 606 |
579 if not self.chunkcache: | 607 if not self.chunkcache: |
580 loadcache(df) | 608 loadcache(df) |
618 # look up what we need to read | 646 # look up what we need to read |
619 text = None | 647 text = None |
620 rev = self.rev(node) | 648 rev = self.rev(node) |
621 base = self.base(rev) | 649 base = self.base(rev) |
622 | 650 |
623 df = self.opener(self.datafile) | 651 if self.inlinedata(): |
652 # we probably have the whole chunk cached | |
653 df = None | |
654 else: | |
655 df = self.opener(self.datafile) | |
624 | 656 |
625 # do we have useful data cached? | 657 # do we have useful data cached? |
626 if self.cache and self.cache[1] >= base and self.cache[1] < rev: | 658 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
627 base = self.cache[1] | 659 base = self.cache[1] |
628 text = self.cache[2] | 660 text = self.cache[2] |
640 raise RevlogError(_("integrity check failed on %s:%d") | 672 raise RevlogError(_("integrity check failed on %s:%d") |
641 % (self.datafile, rev)) | 673 % (self.datafile, rev)) |
642 | 674 |
643 self.cache = (node, rev, text) | 675 self.cache = (node, rev, text) |
644 return text | 676 return text |
677 | |
678 def checkinlinesize(self, fp, tr): | |
679 if not self.inlinedata(): | |
680 return | |
681 size = fp.tell() | |
682 if size < 131072: | |
683 return | |
684 tr.add(self.datafile, 0) | |
685 df = self.opener(self.datafile, 'w') | |
686 calc = struct.calcsize(self.indexformat) | |
687 for r in xrange(self.count()): | |
688 start = self.start(r) + (r + 1) * calc | |
689 length = self.length(r) | |
690 fp.seek(start) | |
691 d = fp.read(length) | |
692 df.write(d) | |
693 fp.close() | |
694 df.close() | |
695 fp = self.opener(self.indexfile, 'w', atomic=True) | |
696 self.version &= ~(REVLOGNGINLINEDATA) | |
697 if self.count(): | |
698 x = self.index[0] | |
699 e = struct.pack(self.indexformat, *x)[4:] | |
700 l = struct.pack(versionformat, self.version) | |
701 fp.write(l) | |
702 fp.write(e) | |
703 | |
704 for i in xrange(1, self.count()): | |
705 x = self.index[i] | |
706 e = struct.pack(self.indexformat, *x) | |
707 fp.write(e) | |
708 | |
709 fp.close() | |
710 self.chunkcache = None | |
645 | 711 |
646 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): | 712 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): |
647 """add a revision to the log | 713 """add a revision to the log |
648 | 714 |
649 text - the revision data to add | 715 text - the revision data to add |
696 | 762 |
697 self.index.append(e) | 763 self.index.append(e) |
698 self.nodemap[node] = n | 764 self.nodemap[node] = n |
699 entry = struct.pack(self.indexformat, *e) | 765 entry = struct.pack(self.indexformat, *e) |
700 | 766 |
701 transaction.add(self.datafile, offset) | 767 if not self.inlinedata(): |
702 transaction.add(self.indexfile, n * len(entry)) | 768 transaction.add(self.datafile, offset) |
703 f = self.opener(self.datafile, "a") | 769 transaction.add(self.indexfile, n * len(entry)) |
704 if data[0]: | 770 f = self.opener(self.datafile, "a") |
705 f.write(data[0]) | 771 if data[0]: |
706 f.write(data[1]) | 772 f.write(data[0]) |
707 f = self.opener(self.indexfile, "a") | 773 f.write(data[1]) |
774 f = self.opener(self.indexfile, "a") | |
775 else: | |
776 f = self.opener(self.indexfile, "a+") | |
777 transaction.add(self.indexfile, f.tell()) | |
708 | 778 |
709 if len(self.index) == 1 and self.version != 0: | 779 if len(self.index) == 1 and self.version != 0: |
710 l = struct.pack(versionformat, self.version) | 780 l = struct.pack(versionformat, self.version) |
711 f.write(l) | 781 f.write(l) |
712 entry = entry[4:] | 782 entry = entry[4:] |
713 | 783 |
714 f.write(entry) | 784 f.write(entry) |
785 | |
786 if self.inlinedata(): | |
787 f.write(data[0]) | |
788 f.write(data[1]) | |
789 self.checkinlinesize(f, transaction) | |
715 | 790 |
716 self.cache = (node, n, text) | 791 self.cache = (node, n, text) |
717 return node | 792 return node |
718 | 793 |
719 def ancestor(self, a, b): | 794 def ancestor(self, a, b): |
828 if r: | 903 if r: |
829 end = self.end(t) | 904 end = self.end(t) |
830 | 905 |
831 ifh = self.opener(self.indexfile, "a+") | 906 ifh = self.opener(self.indexfile, "a+") |
832 transaction.add(self.indexfile, ifh.tell()) | 907 transaction.add(self.indexfile, ifh.tell()) |
833 transaction.add(self.datafile, end) | 908 if self.inlinedata(): |
834 dfh = self.opener(self.datafile, "a") | 909 dfh = None |
910 else: | |
911 transaction.add(self.datafile, end) | |
912 dfh = self.opener(self.datafile, "a") | |
835 | 913 |
836 # loop through our set of deltas | 914 # loop through our set of deltas |
837 chain = None | 915 chain = None |
838 for chunk in revs: | 916 for chunk in revs: |
839 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80]) | 917 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80]) |
883 else: | 961 else: |
884 e = (self.offset_type(end, 0), len(cdelta), -1, base, | 962 e = (self.offset_type(end, 0), len(cdelta), -1, base, |
885 link, self.rev(p1), self.rev(p2), node) | 963 link, self.rev(p1), self.rev(p2), node) |
886 self.index.append(e) | 964 self.index.append(e) |
887 self.nodemap[node] = r | 965 self.nodemap[node] = r |
888 dfh.write(cdelta) | 966 if self.inlinedata(): |
889 ifh.write(struct.pack(self.indexformat, *e)) | 967 ifh.write(struct.pack(self.indexformat, *e)) |
968 ifh.write(cdelta) | |
969 self.checkinlinesize(ifh, transaction) | |
970 if not self.inlinedata(): | |
971 dfh = self.opener(self.datafile, "a") | |
972 ifh = self.opener(self.indexfile, "a") | |
973 else: | |
974 if not dfh: | |
975 # addrevision switched from inline to conventional | |
976 # reopen the index | |
977 dfh = self.opener(self.datafile, "a") | |
978 ifh = self.opener(self.indexfile, "a") | |
979 dfh.write(cdelta) | |
980 ifh.write(struct.pack(self.indexformat, *e)) | |
890 | 981 |
891 t, r, chain, prev = r, r + 1, node, node | 982 t, r, chain, prev = r, r + 1, node, node |
892 base = self.base(t) | 983 base = self.base(t) |
893 start = self.start(base) | 984 start = self.start(base) |
894 end = self.end(t) | 985 end = self.end(t) |
913 if rev >= self.count(): | 1004 if rev >= self.count(): |
914 return | 1005 return |
915 | 1006 |
916 # first truncate the files on disk | 1007 # first truncate the files on disk |
917 end = self.start(rev) | 1008 end = self.start(rev) |
918 df = self.opener(self.datafile, "a") | 1009 if not self.inlinedata(): |
919 df.truncate(end) | 1010 df = self.opener(self.datafile, "a") |
920 end = rev * struct.calcsize(self.indexformat) | 1011 df.truncate(end) |
1012 end = rev * struct.calcsize(self.indexformat) | |
1013 else: | |
1014 end += rev * struct.calcsize(self.indexformat) | |
921 | 1015 |
922 indexf = self.opener(self.indexfile, "a") | 1016 indexf = self.opener(self.indexfile, "a") |
923 indexf.truncate(end) | 1017 indexf.truncate(end) |
924 | 1018 |
925 # then reset internal state in memory to forget those revisions | 1019 # then reset internal state in memory to forget those revisions |
950 f.seek(0, 2) | 1044 f.seek(0, 2) |
951 actual = f.tell() | 1045 actual = f.tell() |
952 s = struct.calcsize(self.indexformat) | 1046 s = struct.calcsize(self.indexformat) |
953 i = actual / s | 1047 i = actual / s |
954 di = actual - (i * s) | 1048 di = actual - (i * s) |
1049 if self.inlinedata(): | |
1050 databytes = 0 | |
1051 for r in xrange(self.count()): | |
1052 databytes += self.length(r) | |
1053 dd = 0 | |
1054 di = actual - self.count() * s - databytes | |
955 except IOError, inst: | 1055 except IOError, inst: |
956 if inst.errno != errno.ENOENT: | 1056 if inst.errno != errno.ENOENT: |
957 raise | 1057 raise |
958 di = 0 | 1058 di = 0 |
959 | 1059 |