mercurial/revlog.py
changeset 47389 e6292eb33384
parent 47387 75e1104f23a2
child 47391 33d626910374
equal deleted inserted replaced
47388:bcf92bdc2bca 47389:e6292eb33384
     1 # revlog.py - storage back-end for mercurial
     1 # revlog.py - storage back-end for mercurial
       
     2 # coding: utf8
     2 #
     3 #
     3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
     4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
     4 #
     5 #
     5 # This software may be used and distributed according to the terms of the
     6 # This software may be used and distributed according to the terms of the
     6 # GNU General Public License version 2 or any later version.
     7 # GNU General Public License version 2 or any later version.
   258 
   259 
   259 PARTIAL_READ_MSG = _(
   260 PARTIAL_READ_MSG = _(
   260     b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
   261     b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
   261 )
   262 )
   262 
   263 
       
   264 FILE_TOO_SHORT_MSG = _(
       
   265     b'cannot read from revlog %s;'
       
   266     b'  expected %d bytes from offset %d, data size is %d'
       
   267 )
       
   268 
   263 
   269 
   264 class revlog(object):
   270 class revlog(object):
   265     """
   271     """
   266     the underlying revision storage object
   272     the underlying revision storage object
   267 
   273 
   399         self.radix = radix
   405         self.radix = radix
   400 
   406 
   401         self._docket_file = None
   407         self._docket_file = None
   402         self._indexfile = None
   408         self._indexfile = None
   403         self._datafile = None
   409         self._datafile = None
       
   410         self._sidedatafile = None
   404         self._nodemap_file = None
   411         self._nodemap_file = None
   405         self.postfix = postfix
   412         self.postfix = postfix
   406         self._trypending = trypending
   413         self._trypending = trypending
   407         self.opener = opener
   414         self.opener = opener
   408         if persistentnodemap:
   415         if persistentnodemap:
   443 
   450 
   444         # Make copy of flag processors so each revlog instance can support
   451         # Make copy of flag processors so each revlog instance can support
   445         # custom flags.
   452         # custom flags.
   446         self._flagprocessors = dict(flagutil.flagprocessors)
   453         self._flagprocessors = dict(flagutil.flagprocessors)
   447 
   454 
   448         # 2-tuple of file handles being used for active writing.
   455         # 3-tuple of file handles being used for active writing.
   449         self._writinghandles = None
   456         self._writinghandles = None
   450         # prevent nesting of addgroup
   457         # prevent nesting of addgroup
   451         self._adding_group = None
   458         self._adding_group = None
   452 
   459 
   453         self._loadindex()
   460         self._loadindex()
   632             # main docket, so disable it for now.
   639             # main docket, so disable it for now.
   633             self._nodemap_file = None
   640             self._nodemap_file = None
   634 
   641 
   635         if self._docket is not None:
   642         if self._docket is not None:
   636             self._datafile = self._docket.data_filepath()
   643             self._datafile = self._docket.data_filepath()
       
   644             self._sidedatafile = self._docket.sidedata_filepath()
   637         elif self.postfix is None:
   645         elif self.postfix is None:
   638             self._datafile = b'%s.d' % self.radix
   646             self._datafile = b'%s.d' % self.radix
   639         else:
   647         else:
   640             self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
   648             self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
   641 
   649 
   801             else:
   809             else:
   802                 func = self._datafp
   810                 func = self._datafp
   803             with func() as fp:
   811             with func() as fp:
   804                 yield fp
   812                 yield fp
   805 
   813 
       
   814     @contextlib.contextmanager
   806     def _sidedatareadfp(self):
   815     def _sidedatareadfp(self):
   807         """file object suitable to read sidedata"""
   816         """file object suitable to read sidedata"""
   808         return self._datareadfp()
   817         if self._writinghandles:
       
   818             yield self._writinghandles[2]
       
   819         else:
       
   820             with self.opener(self._sidedatafile) as fp:
       
   821                 yield fp
   809 
   822 
   810     def tiprev(self):
   823     def tiprev(self):
   811         return len(self.index) - 1
   824         return len(self.index) - 1
   812 
   825 
   813     def tip(self):
   826     def tip(self):
   906 
   919 
   907     # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
   920     # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
   908     # are flags.
   921     # are flags.
   909     def start(self, rev):
   922     def start(self, rev):
   910         return int(self.index[rev][0] >> 16)
   923         return int(self.index[rev][0] >> 16)
       
   924 
       
   925     def sidedata_cut_off(self, rev):
       
   926         sd_cut_off = self.index[rev][8]
       
   927         if sd_cut_off != 0:
       
   928             return sd_cut_off
       
   929         # This is some annoying dance, because entries without sidedata
       
   930         # currently use 0 as their ofsset. (instead of previous-offset +
       
   931         # previous-size)
       
   932         #
       
   933         # We should reconsider this sidedata → 0 sidata_offset policy.
       
   934         # In the meantime, we need this.
       
   935         while 0 <= rev:
       
   936             e = self.index[rev]
       
   937             if e[9] != 0:
       
   938                 return e[8] + e[9]
       
   939             rev -= 1
       
   940         return 0
   911 
   941 
   912     def flags(self, rev):
   942     def flags(self, rev):
   913         return self.index[rev][0] & 0xFFFF
   943         return self.index[rev][0] & 0xFFFF
   914 
   944 
   915     def length(self, rev):
   945     def length(self, rev):
  2072         if sidedata_size == 0:
  2102         if sidedata_size == 0:
  2073             return {}
  2103             return {}
  2074 
  2104 
  2075         # XXX this need caching, as we do for data
  2105         # XXX this need caching, as we do for data
  2076         with self._sidedatareadfp() as sdf:
  2106         with self._sidedatareadfp() as sdf:
  2077             sdf.seek(sidedata_offset)
  2107             if self._docket.sidedata_end < sidedata_offset + sidedata_size:
       
  2108                 filename = self._sidedatafile
       
  2109                 end = self._docket.sidedata_end
       
  2110                 offset = sidedata_offset
       
  2111                 length = sidedata_size
       
  2112                 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
       
  2113                 raise error.RevlogError(m)
       
  2114 
       
  2115             sdf.seek(sidedata_offset, os.SEEK_SET)
  2078             comp_segment = sdf.read(sidedata_size)
  2116             comp_segment = sdf.read(sidedata_size)
  2079 
  2117 
  2080             if len(comp_segment) < sidedata_size:
  2118             if len(comp_segment) < sidedata_size:
  2081                 filename = self._datafile
  2119                 filename = self._sidedatafile
  2082                 length = sidedata_size
  2120                 length = sidedata_size
  2083                 offset = sidedata_offset
  2121                 offset = sidedata_offset
  2084                 got = len(comp_segment)
  2122                 got = len(comp_segment)
  2085                 m = PARTIAL_READ_MSG % (filename, length, offset, got)
  2123                 m = PARTIAL_READ_MSG % (filename, length, offset, got)
  2086                 raise error.RevlogError(m)
  2124                 raise error.RevlogError(m)
  2213             self._chunkclear()
  2251             self._chunkclear()
  2214 
  2252 
  2215             if existing_handles:
  2253             if existing_handles:
  2216                 # switched from inline to conventional reopen the index
  2254                 # switched from inline to conventional reopen the index
  2217                 ifh = self.__index_write_fp()
  2255                 ifh = self.__index_write_fp()
  2218                 self._writinghandles = (ifh, new_dfh)
  2256                 self._writinghandles = (ifh, new_dfh, None)
  2219                 new_dfh = None
  2257                 new_dfh = None
  2220         finally:
  2258         finally:
  2221             if new_dfh is not None:
  2259             if new_dfh is not None:
  2222                 new_dfh.close()
  2260                 new_dfh.close()
  2223 
  2261 
  2231             msg %= self.display_id
  2269             msg %= self.display_id
  2232             raise error.ProgrammingError(msg)
  2270             raise error.ProgrammingError(msg)
  2233         if self._writinghandles is not None:
  2271         if self._writinghandles is not None:
  2234             yield
  2272             yield
  2235         else:
  2273         else:
  2236             ifh = dfh = None
  2274             ifh = dfh = sdfh = None
  2237             try:
  2275             try:
  2238                 r = len(self)
  2276                 r = len(self)
  2239                 # opening the data file.
  2277                 # opening the data file.
  2240                 dsize = 0
  2278                 dsize = 0
  2241                 if r:
  2279                 if r:
  2251                     except IOError as inst:
  2289                     except IOError as inst:
  2252                         if inst.errno != errno.ENOENT:
  2290                         if inst.errno != errno.ENOENT:
  2253                             raise
  2291                             raise
  2254                         dfh = self._datafp(b"w+")
  2292                         dfh = self._datafp(b"w+")
  2255                     transaction.add(self._datafile, dsize)
  2293                     transaction.add(self._datafile, dsize)
       
  2294                 if self._sidedatafile is not None:
       
  2295                     try:
       
  2296                         sdfh = self.opener(self._sidedatafile, mode=b"r+")
       
  2297                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
       
  2298                     except IOError as inst:
       
  2299                         if inst.errno != errno.ENOENT:
       
  2300                             raise
       
  2301                         sdfh = self.opener(self._sidedatafile, mode=b"w+")
       
  2302                     transaction.add(
       
  2303                         self._sidedatafile, self._docket.sidedata_end
       
  2304                     )
  2256 
  2305 
  2257                 # opening the index file.
  2306                 # opening the index file.
  2258                 isize = r * self.index.entry_size
  2307                 isize = r * self.index.entry_size
  2259                 ifh = self.__index_write_fp()
  2308                 ifh = self.__index_write_fp()
  2260                 if self._inline:
  2309                 if self._inline:
  2261                     transaction.add(self._indexfile, dsize + isize)
  2310                     transaction.add(self._indexfile, dsize + isize)
  2262                 else:
  2311                 else:
  2263                     transaction.add(self._indexfile, isize)
  2312                     transaction.add(self._indexfile, isize)
  2264                 # exposing all file handle for writing.
  2313                 # exposing all file handle for writing.
  2265                 self._writinghandles = (ifh, dfh)
  2314                 self._writinghandles = (ifh, dfh, sdfh)
  2266                 yield
  2315                 yield
  2267                 if self._docket is not None:
  2316                 if self._docket is not None:
  2268                     self._write_docket(transaction)
  2317                     self._write_docket(transaction)
  2269             finally:
  2318             finally:
  2270                 self._writinghandles = None
  2319                 self._writinghandles = None
  2271                 if dfh is not None:
  2320                 if dfh is not None:
       
  2321                     dfh.close()
       
  2322                 if sdfh is not None:
  2272                     dfh.close()
  2323                     dfh.close()
  2273                 # closing the index file last to avoid exposing referent to
  2324                 # closing the index file last to avoid exposing referent to
  2274                 # potential unflushed data content.
  2325                 # potential unflushed data content.
  2275                 if ifh is not None:
  2326                 if ifh is not None:
  2276                     ifh.close()
  2327                     ifh.close()
  2511         prev = curr - 1
  2562         prev = curr - 1
  2512 
  2563 
  2513         offset = self._get_data_offset(prev)
  2564         offset = self._get_data_offset(prev)
  2514 
  2565 
  2515         if self._concurrencychecker:
  2566         if self._concurrencychecker:
  2516             ifh, dfh = self._writinghandles
  2567             ifh, dfh, sdfh = self._writinghandles
       
  2568             # XXX no checking for the sidedata file
  2517             if self._inline:
  2569             if self._inline:
  2518                 # offset is "as if" it were in the .d file, so we need to add on
  2570                 # offset is "as if" it were in the .d file, so we need to add on
  2519                 # the size of the entry metadata.
  2571                 # the size of the entry metadata.
  2520                 self._concurrencychecker(
  2572                 self._concurrencychecker(
  2521                     ifh, self._indexfile, offset + curr * self.index.entry_size
  2573                     ifh, self._indexfile, offset + curr * self.index.entry_size
  2568 
  2620 
  2569         sidedata_compression_mode = COMP_MODE_INLINE
  2621         sidedata_compression_mode = COMP_MODE_INLINE
  2570         if sidedata and self.hassidedata:
  2622         if sidedata and self.hassidedata:
  2571             sidedata_compression_mode = COMP_MODE_PLAIN
  2623             sidedata_compression_mode = COMP_MODE_PLAIN
  2572             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
  2624             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
  2573             sidedata_offset = offset + deltainfo.deltalen
  2625             sidedata_offset = self._docket.sidedata_end
  2574             h, comp_sidedata = self.compress(serialized_sidedata)
  2626             h, comp_sidedata = self.compress(serialized_sidedata)
  2575             if (
  2627             if (
  2576                 h != b'u'
  2628                 h != b'u'
  2577                 and comp_sidedata[0:1] != b'\0'
  2629                 and comp_sidedata[0:1] != b'\0'
  2578                 and len(comp_sidedata) < len(serialized_sidedata)
  2630                 and len(comp_sidedata) < len(serialized_sidedata)
  2620             entry,
  2672             entry,
  2621             deltainfo.data,
  2673             deltainfo.data,
  2622             link,
  2674             link,
  2623             offset,
  2675             offset,
  2624             serialized_sidedata,
  2676             serialized_sidedata,
       
  2677             sidedata_offset,
  2625         )
  2678         )
  2626 
  2679 
  2627         rawtext = btext[0]
  2680         rawtext = btext[0]
  2628 
  2681 
  2629         if alwayscache and rawtext is None:
  2682         if alwayscache and rawtext is None:
  2646         if self._docket is None:
  2699         if self._docket is None:
  2647             return self.end(prev)
  2700             return self.end(prev)
  2648         else:
  2701         else:
  2649             return self._docket.data_end
  2702             return self._docket.data_end
  2650 
  2703 
  2651     def _writeentry(self, transaction, entry, data, link, offset, sidedata):
  2704     def _writeentry(
       
  2705         self, transaction, entry, data, link, offset, sidedata, sidedata_offset
       
  2706     ):
  2652         # Files opened in a+ mode have inconsistent behavior on various
  2707         # Files opened in a+ mode have inconsistent behavior on various
  2653         # platforms. Windows requires that a file positioning call be made
  2708         # platforms. Windows requires that a file positioning call be made
  2654         # when the file handle transitions between reads and writes. See
  2709         # when the file handle transitions between reads and writes. See
  2655         # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
  2710         # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
  2656         # platforms, Python or the platform itself can be buggy. Some versions
  2711         # platforms, Python or the platform itself can be buggy. Some versions
  2662         # the file handle is reused for reads and may be seeked there, we need
  2717         # the file handle is reused for reads and may be seeked there, we need
  2663         # to be careful before changing this.
  2718         # to be careful before changing this.
  2664         if self._writinghandles is None:
  2719         if self._writinghandles is None:
  2665             msg = b'adding revision outside `revlog._writing` context'
  2720             msg = b'adding revision outside `revlog._writing` context'
  2666             raise error.ProgrammingError(msg)
  2721             raise error.ProgrammingError(msg)
  2667         ifh, dfh = self._writinghandles
  2722         ifh, dfh, sdfh = self._writinghandles
  2668         if self._docket is None:
  2723         if self._docket is None:
  2669             ifh.seek(0, os.SEEK_END)
  2724             ifh.seek(0, os.SEEK_END)
  2670         else:
  2725         else:
  2671             ifh.seek(self._docket.index_end, os.SEEK_SET)
  2726             ifh.seek(self._docket.index_end, os.SEEK_SET)
  2672         if dfh:
  2727         if dfh:
  2673             if self._docket is None:
  2728             if self._docket is None:
  2674                 dfh.seek(0, os.SEEK_END)
  2729                 dfh.seek(0, os.SEEK_END)
  2675             else:
  2730             else:
  2676                 dfh.seek(self._docket.data_end, os.SEEK_SET)
  2731                 dfh.seek(self._docket.data_end, os.SEEK_SET)
       
  2732         if sdfh:
       
  2733             sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
  2677 
  2734 
  2678         curr = len(self) - 1
  2735         curr = len(self) - 1
  2679         if not self._inline:
  2736         if not self._inline:
  2680             transaction.add(self._datafile, offset)
  2737             transaction.add(self._datafile, offset)
       
  2738             if self._sidedatafile:
       
  2739                 transaction.add(self._sidedatafile, sidedata_offset)
  2681             transaction.add(self._indexfile, curr * len(entry))
  2740             transaction.add(self._indexfile, curr * len(entry))
  2682             if data[0]:
  2741             if data[0]:
  2683                 dfh.write(data[0])
  2742                 dfh.write(data[0])
  2684             dfh.write(data[1])
  2743             dfh.write(data[1])
  2685             if sidedata:
  2744             if sidedata:
  2686                 dfh.write(sidedata)
  2745                 sdfh.write(sidedata)
  2687             ifh.write(entry)
  2746             ifh.write(entry)
  2688         else:
  2747         else:
  2689             offset += curr * self.index.entry_size
  2748             offset += curr * self.index.entry_size
  2690             transaction.add(self._indexfile, offset)
  2749             transaction.add(self._indexfile, offset)
  2691             ifh.write(entry)
  2750             ifh.write(entry)
  2692             ifh.write(data[0])
  2751             ifh.write(data[0])
  2693             ifh.write(data[1])
  2752             ifh.write(data[1])
  2694             if sidedata:
  2753             assert not sidedata
  2695                 ifh.write(sidedata)
       
  2696             self._enforceinlinesize(transaction)
  2754             self._enforceinlinesize(transaction)
  2697         if self._docket is not None:
  2755         if self._docket is not None:
  2698             self._docket.index_end = self._writinghandles[0].tell()
  2756             self._docket.index_end = self._writinghandles[0].tell()
  2699             self._docket.data_end = self._writinghandles[1].tell()
  2757             self._docket.data_end = self._writinghandles[1].tell()
       
  2758             self._docket.sidedata_end = self._writinghandles[2].tell()
  2700 
  2759 
  2701         nodemaputil.setup_persistent_nodemap(transaction, self)
  2760         nodemaputil.setup_persistent_nodemap(transaction, self)
  2702 
  2761 
  2703     def addgroup(
  2762     def addgroup(
  2704         self,
  2763         self,
  2864             transaction.add(self._datafile, data_end)
  2923             transaction.add(self._datafile, data_end)
  2865             end = rev * self.index.entry_size
  2924             end = rev * self.index.entry_size
  2866         else:
  2925         else:
  2867             end = data_end + (rev * self.index.entry_size)
  2926             end = data_end + (rev * self.index.entry_size)
  2868 
  2927 
       
  2928         if self._sidedatafile:
       
  2929             sidedata_end = self.sidedata_cut_off(rev)
       
  2930             transaction.add(self._sidedatafile, sidedata_end)
       
  2931 
  2869         transaction.add(self._indexfile, end)
  2932         transaction.add(self._indexfile, end)
  2870         if self._docket is not None:
  2933         if self._docket is not None:
  2871             # XXX we could, leverage the docket while stripping. However it is
  2934             # XXX we could, leverage the docket while stripping. However it is
  2872             # not powerfull enough at the time of this comment
  2935             # not powerfull enough at the time of this comment
  2873             self._docket.index_end = end
  2936             self._docket.index_end = end
  2874             self._docket.data_end = data_end
  2937             self._docket.data_end = data_end
       
  2938             self._docket.sidedata_end = sidedata_end
  2875             self._docket.write(transaction, stripping=True)
  2939             self._docket.write(transaction, stripping=True)
  2876 
  2940 
  2877         # then reset internal state in memory to forget those revisions
  2941         # then reset internal state in memory to forget those revisions
  2878         self._revisioncache = None
  2942         self._revisioncache = None
  2879         self._chaininfocache = util.lrucachedict(500)
  2943         self._chaininfocache = util.lrucachedict(500)
  3396             return
  3460             return
  3397 
  3461 
  3398         new_entries = []
  3462         new_entries = []
  3399         # append the new sidedata
  3463         # append the new sidedata
  3400         with self._writing(transaction):
  3464         with self._writing(transaction):
  3401             ifh, dfh = self._writinghandles
  3465             ifh, dfh, sdfh = self._writinghandles
  3402             if self._docket is not None:
  3466             dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
  3403                 dfh.seek(self._docket.data_end, os.SEEK_SET)
  3467 
  3404             else:
  3468             current_offset = sdfh.tell()
  3405                 dfh.seek(0, os.SEEK_END)
       
  3406 
       
  3407             current_offset = dfh.tell()
       
  3408             for rev in range(startrev, endrev + 1):
  3469             for rev in range(startrev, endrev + 1):
  3409                 entry = self.index[rev]
  3470                 entry = self.index[rev]
  3410                 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
  3471                 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
  3411                     store=self,
  3472                     store=self,
  3412                     sidedata_helpers=helpers,
  3473                     sidedata_helpers=helpers,
  3453                     new_offset_flags,
  3514                     new_offset_flags,
  3454                     sidedata_compression_mode,
  3515                     sidedata_compression_mode,
  3455                 )
  3516                 )
  3456 
  3517 
  3457                 # the sidedata computation might have move the file cursors around
  3518                 # the sidedata computation might have move the file cursors around
  3458                 dfh.seek(current_offset, os.SEEK_SET)
  3519                 sdfh.seek(current_offset, os.SEEK_SET)
  3459                 dfh.write(serialized_sidedata)
  3520                 sdfh.write(serialized_sidedata)
  3460                 new_entries.append(entry_update)
  3521                 new_entries.append(entry_update)
  3461                 current_offset += len(serialized_sidedata)
  3522                 current_offset += len(serialized_sidedata)
  3462                 if self._docket is not None:
  3523                 self._docket.sidedata_end = sdfh.tell()
  3463                     self._docket.data_end = dfh.tell()
       
  3464 
  3524 
  3465             # rewrite the new index entries
  3525             # rewrite the new index entries
  3466             ifh.seek(startrev * self.index.entry_size)
  3526             ifh.seek(startrev * self.index.entry_size)
  3467             for i, e in enumerate(new_entries):
  3527             for i, e in enumerate(new_entries):
  3468                 rev = startrev + i
  3528                 rev = startrev + i