comparison mercurial/revlog.py @ 51094:de6a8cc24de3

revlog: move the splitting-inline-revlog logic inside the inner object This is another large IO block that we need to move within the inner object if we want's it to be self sufficient.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 23 Oct 2023 14:27:07 +0200
parents c2c24b6b97f5
children a82704902db8
comparison
equal deleted inserted replaced
51093:c2c24b6b97f5 51094:de6a8cc24de3
516 mode=b"w", 516 mode=b"w",
517 checkambig=self.data_config.check_ambig, 517 checkambig=self.data_config.check_ambig,
518 atomictemp=True, 518 atomictemp=True,
519 ) 519 )
520 520
521 def split_inline(self, tr, header, new_index_file_path=None):
522 """split the data of an inline revlog into an index and a data file"""
523 existing_handles = False
524 if self._writinghandles is not None:
525 existing_handles = True
526 fp = self._writinghandles[0]
527 fp.flush()
528 fp.close()
529 # We can't use the cached file handle after close(). So prevent
530 # its usage.
531 self._writinghandles = None
532 self._segmentfile.writing_handle = None
533 # No need to deal with sidedata writing handle as it is only
534 # relevant with revlog-v2 which is never inline, not reaching
535 # this code
536
537 new_dfh = self.opener(self.data_file, mode=b"w+")
538 new_dfh.truncate(0) # drop any potentially existing data
539 try:
540 with self.reading():
541 for r in range(len(self.index)):
542 new_dfh.write(self.get_segment_for_revs(r, r)[1])
543 new_dfh.flush()
544
545 if new_index_file_path is not None:
546 self.index_file = new_index_file_path
547 with self.__index_new_fp() as fp:
548 self.inline = False
549 for i in range(len(self.index)):
550 e = self.index.entry_binary(i)
551 if i == 0:
552 packed_header = self.index.pack_header(header)
553 e = packed_header + e
554 fp.write(e)
555
556 # If we don't use side-write, the temp file replace the real
557 # index when we exit the context manager
558
559 self._segmentfile = randomaccessfile.randomaccessfile(
560 self.opener,
561 self.data_file,
562 self.data_config.chunk_cache_size,
563 )
564
565 if existing_handles:
566 # switched from inline to conventional reopen the index
567 ifh = self.__index_write_fp()
568 self._writinghandles = (ifh, new_dfh, None)
569 self._segmentfile.writing_handle = new_dfh
570 new_dfh = None
571 # No need to deal with sidedata writing handle as it is only
572 # relevant with revlog-v2 which is never inline, not reaching
573 # this code
574 finally:
575 if new_dfh is not None:
576 new_dfh.close()
577 return self.index_file
578
521 def get_segment_for_revs(self, startrev, endrev): 579 def get_segment_for_revs(self, startrev, endrev):
522 """Obtain a segment of raw data corresponding to a range of revisions. 580 """Obtain a segment of raw data corresponding to a range of revisions.
523 581
524 Accepts the start and end revisions and an optional already-open 582 Accepts the start and end revisions and an optional already-open
525 file handle to be used for reading. If the file handle is read, its 583 file handle to be used for reading. If the file handle is read, its
2584 ) 2642 )
2585 if troffset: 2643 if troffset:
2586 tr.addbackup(self._indexfile, for_offset=True) 2644 tr.addbackup(self._indexfile, for_offset=True)
2587 tr.add(self._datafile, 0) 2645 tr.add(self._datafile, 0)
2588 2646
2589 existing_handles = False 2647 new_index_file_path = None
2590 if self._inner._writinghandles is not None:
2591 existing_handles = True
2592 fp = self._inner._writinghandles[0]
2593 fp.flush()
2594 fp.close()
2595 # We can't use the cached file handle after close(). So prevent
2596 # its usage.
2597 self._inner._writinghandles = None
2598 self._inner._segmentfile.writing_handle = None
2599 # No need to deal with sidedata writing handle as it is only
2600 # relevant with revlog-v2 which is never inline, not reaching
2601 # this code
2602 if side_write: 2648 if side_write:
2603 old_index_file_path = self._indexfile 2649 old_index_file_path = self._indexfile
2604 new_index_file_path = self._split_index_file 2650 new_index_file_path = self._split_index_file
2605 opener = self.opener 2651 opener = self.opener
2606 weak_self = weakref.ref(self) 2652 weak_self = weakref.ref(self)
2607 2653
2608 # the "split" index replace the real index when the transaction is finalized 2654 # the "split" index replace the real index when the transaction is
2655 # finalized
2609 def finalize_callback(tr): 2656 def finalize_callback(tr):
2610 opener.rename( 2657 opener.rename(
2611 new_index_file_path, 2658 new_index_file_path,
2612 old_index_file_path, 2659 old_index_file_path,
2613 checkambig=True, 2660 checkambig=True,
2619 2666
2620 def abort_callback(tr): 2667 def abort_callback(tr):
2621 maybe_self = weak_self() 2668 maybe_self = weak_self()
2622 if maybe_self is not None: 2669 if maybe_self is not None:
2623 maybe_self._indexfile = old_index_file_path 2670 maybe_self._indexfile = old_index_file_path
2671 maybe_self._inner.inline = True
2624 maybe_self._inner.index_file = old_index_file_path 2672 maybe_self._inner.index_file = old_index_file_path
2625 2673
2626 tr.registertmp(new_index_file_path) 2674 tr.registertmp(new_index_file_path)
2627 if self.target[1] is not None: 2675 if self.target[1] is not None:
2628 callback_id = b'000-revlog-split-%d-%s' % self.target 2676 callback_id = b'000-revlog-split-%d-%s' % self.target
2629 else: 2677 else:
2630 callback_id = b'000-revlog-split-%d' % self.target[0] 2678 callback_id = b'000-revlog-split-%d' % self.target[0]
2631 tr.addfinalize(callback_id, finalize_callback) 2679 tr.addfinalize(callback_id, finalize_callback)
2632 tr.addabort(callback_id, abort_callback) 2680 tr.addabort(callback_id, abort_callback)
2633 2681
2634 new_dfh = self._datafp(b'w+') 2682 self._format_flags &= ~FLAG_INLINE_DATA
2635 new_dfh.truncate(0) # drop any potentially existing data 2683 self._inner.split_inline(
2636 try: 2684 tr,
2637 with self.reading(): 2685 self._format_flags | self._format_version,
2638 for r in self: 2686 new_index_file_path=new_index_file_path,
2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1]) 2687 )
2640 new_dfh.flush() 2688
2641 2689 self._inline = False
2642 if side_write: 2690 if new_index_file_path is not None:
2643 self._indexfile = new_index_file_path 2691 self._indexfile = new_index_file_path
2644 self._inner.index_file = self._indexfile 2692
2645 with self._inner._InnerRevlog__index_new_fp() as fp: 2693 nodemaputil.setup_persistent_nodemap(tr, self)
2646 self._format_flags &= ~FLAG_INLINE_DATA
2647 self._inline = False
2648 self._inner.inline = False
2649 for i in self:
2650 e = self.index.entry_binary(i)
2651 if i == 0:
2652 header = self._format_flags | self._format_version
2653 header = self.index.pack_header(header)
2654 e = header + e
2655 fp.write(e)
2656
2657 # If we don't use side-write, the temp file replace the real
2658 # index when we exit the context manager
2659
2660 nodemaputil.setup_persistent_nodemap(tr, self)
2661 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2662 self.opener,
2663 self._datafile,
2664 self.data_config.chunk_cache_size,
2665 )
2666
2667 if existing_handles:
2668 # switched from inline to conventional reopen the index
2669 index_end = None
2670 ifh = self._inner._InnerRevlog__index_write_fp(
2671 index_end=index_end
2672 )
2673 self._inner._writinghandles = (ifh, new_dfh, None)
2674 self._inner._segmentfile.writing_handle = new_dfh
2675 new_dfh = None
2676 # No need to deal with sidedata writing handle as it is only
2677 # relevant with revlog-v2 which is never inline, not reaching
2678 # this code
2679 finally:
2680 if new_dfh is not None:
2681 new_dfh.close()
2682 2694
2683 def _nodeduplicatecallback(self, transaction, node): 2695 def _nodeduplicatecallback(self, transaction, node):
2684 """called when trying to add a node already stored.""" 2696 """called when trying to add a node already stored."""
2685 2697
2686 @contextlib.contextmanager 2698 @contextlib.contextmanager