Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 51094:de6a8cc24de3
revlog: move the splitting-inline-revlog logic inside the inner object
This is another large IO block that we need to move within the inner object if
we want's it to be self sufficient.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 23 Oct 2023 14:27:07 +0200 |
parents | c2c24b6b97f5 |
children | a82704902db8 |
comparison
equal
deleted
inserted
replaced
51093:c2c24b6b97f5 | 51094:de6a8cc24de3 |
---|---|
516 mode=b"w", | 516 mode=b"w", |
517 checkambig=self.data_config.check_ambig, | 517 checkambig=self.data_config.check_ambig, |
518 atomictemp=True, | 518 atomictemp=True, |
519 ) | 519 ) |
520 | 520 |
521 def split_inline(self, tr, header, new_index_file_path=None): | |
522 """split the data of an inline revlog into an index and a data file""" | |
523 existing_handles = False | |
524 if self._writinghandles is not None: | |
525 existing_handles = True | |
526 fp = self._writinghandles[0] | |
527 fp.flush() | |
528 fp.close() | |
529 # We can't use the cached file handle after close(). So prevent | |
530 # its usage. | |
531 self._writinghandles = None | |
532 self._segmentfile.writing_handle = None | |
533 # No need to deal with sidedata writing handle as it is only | |
534 # relevant with revlog-v2 which is never inline, not reaching | |
535 # this code | |
536 | |
537 new_dfh = self.opener(self.data_file, mode=b"w+") | |
538 new_dfh.truncate(0) # drop any potentially existing data | |
539 try: | |
540 with self.reading(): | |
541 for r in range(len(self.index)): | |
542 new_dfh.write(self.get_segment_for_revs(r, r)[1]) | |
543 new_dfh.flush() | |
544 | |
545 if new_index_file_path is not None: | |
546 self.index_file = new_index_file_path | |
547 with self.__index_new_fp() as fp: | |
548 self.inline = False | |
549 for i in range(len(self.index)): | |
550 e = self.index.entry_binary(i) | |
551 if i == 0: | |
552 packed_header = self.index.pack_header(header) | |
553 e = packed_header + e | |
554 fp.write(e) | |
555 | |
556 # If we don't use side-write, the temp file replace the real | |
557 # index when we exit the context manager | |
558 | |
559 self._segmentfile = randomaccessfile.randomaccessfile( | |
560 self.opener, | |
561 self.data_file, | |
562 self.data_config.chunk_cache_size, | |
563 ) | |
564 | |
565 if existing_handles: | |
566 # switched from inline to conventional reopen the index | |
567 ifh = self.__index_write_fp() | |
568 self._writinghandles = (ifh, new_dfh, None) | |
569 self._segmentfile.writing_handle = new_dfh | |
570 new_dfh = None | |
571 # No need to deal with sidedata writing handle as it is only | |
572 # relevant with revlog-v2 which is never inline, not reaching | |
573 # this code | |
574 finally: | |
575 if new_dfh is not None: | |
576 new_dfh.close() | |
577 return self.index_file | |
578 | |
521 def get_segment_for_revs(self, startrev, endrev): | 579 def get_segment_for_revs(self, startrev, endrev): |
522 """Obtain a segment of raw data corresponding to a range of revisions. | 580 """Obtain a segment of raw data corresponding to a range of revisions. |
523 | 581 |
524 Accepts the start and end revisions and an optional already-open | 582 Accepts the start and end revisions and an optional already-open |
525 file handle to be used for reading. If the file handle is read, its | 583 file handle to be used for reading. If the file handle is read, its |
2584 ) | 2642 ) |
2585 if troffset: | 2643 if troffset: |
2586 tr.addbackup(self._indexfile, for_offset=True) | 2644 tr.addbackup(self._indexfile, for_offset=True) |
2587 tr.add(self._datafile, 0) | 2645 tr.add(self._datafile, 0) |
2588 | 2646 |
2589 existing_handles = False | 2647 new_index_file_path = None |
2590 if self._inner._writinghandles is not None: | |
2591 existing_handles = True | |
2592 fp = self._inner._writinghandles[0] | |
2593 fp.flush() | |
2594 fp.close() | |
2595 # We can't use the cached file handle after close(). So prevent | |
2596 # its usage. | |
2597 self._inner._writinghandles = None | |
2598 self._inner._segmentfile.writing_handle = None | |
2599 # No need to deal with sidedata writing handle as it is only | |
2600 # relevant with revlog-v2 which is never inline, not reaching | |
2601 # this code | |
2602 if side_write: | 2648 if side_write: |
2603 old_index_file_path = self._indexfile | 2649 old_index_file_path = self._indexfile |
2604 new_index_file_path = self._split_index_file | 2650 new_index_file_path = self._split_index_file |
2605 opener = self.opener | 2651 opener = self.opener |
2606 weak_self = weakref.ref(self) | 2652 weak_self = weakref.ref(self) |
2607 | 2653 |
2608 # the "split" index replace the real index when the transaction is finalized | 2654 # the "split" index replace the real index when the transaction is |
2655 # finalized | |
2609 def finalize_callback(tr): | 2656 def finalize_callback(tr): |
2610 opener.rename( | 2657 opener.rename( |
2611 new_index_file_path, | 2658 new_index_file_path, |
2612 old_index_file_path, | 2659 old_index_file_path, |
2613 checkambig=True, | 2660 checkambig=True, |
2619 | 2666 |
2620 def abort_callback(tr): | 2667 def abort_callback(tr): |
2621 maybe_self = weak_self() | 2668 maybe_self = weak_self() |
2622 if maybe_self is not None: | 2669 if maybe_self is not None: |
2623 maybe_self._indexfile = old_index_file_path | 2670 maybe_self._indexfile = old_index_file_path |
2671 maybe_self._inner.inline = True | |
2624 maybe_self._inner.index_file = old_index_file_path | 2672 maybe_self._inner.index_file = old_index_file_path |
2625 | 2673 |
2626 tr.registertmp(new_index_file_path) | 2674 tr.registertmp(new_index_file_path) |
2627 if self.target[1] is not None: | 2675 if self.target[1] is not None: |
2628 callback_id = b'000-revlog-split-%d-%s' % self.target | 2676 callback_id = b'000-revlog-split-%d-%s' % self.target |
2629 else: | 2677 else: |
2630 callback_id = b'000-revlog-split-%d' % self.target[0] | 2678 callback_id = b'000-revlog-split-%d' % self.target[0] |
2631 tr.addfinalize(callback_id, finalize_callback) | 2679 tr.addfinalize(callback_id, finalize_callback) |
2632 tr.addabort(callback_id, abort_callback) | 2680 tr.addabort(callback_id, abort_callback) |
2633 | 2681 |
2634 new_dfh = self._datafp(b'w+') | 2682 self._format_flags &= ~FLAG_INLINE_DATA |
2635 new_dfh.truncate(0) # drop any potentially existing data | 2683 self._inner.split_inline( |
2636 try: | 2684 tr, |
2637 with self.reading(): | 2685 self._format_flags | self._format_version, |
2638 for r in self: | 2686 new_index_file_path=new_index_file_path, |
2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1]) | 2687 ) |
2640 new_dfh.flush() | 2688 |
2641 | 2689 self._inline = False |
2642 if side_write: | 2690 if new_index_file_path is not None: |
2643 self._indexfile = new_index_file_path | 2691 self._indexfile = new_index_file_path |
2644 self._inner.index_file = self._indexfile | 2692 |
2645 with self._inner._InnerRevlog__index_new_fp() as fp: | 2693 nodemaputil.setup_persistent_nodemap(tr, self) |
2646 self._format_flags &= ~FLAG_INLINE_DATA | |
2647 self._inline = False | |
2648 self._inner.inline = False | |
2649 for i in self: | |
2650 e = self.index.entry_binary(i) | |
2651 if i == 0: | |
2652 header = self._format_flags | self._format_version | |
2653 header = self.index.pack_header(header) | |
2654 e = header + e | |
2655 fp.write(e) | |
2656 | |
2657 # If we don't use side-write, the temp file replace the real | |
2658 # index when we exit the context manager | |
2659 | |
2660 nodemaputil.setup_persistent_nodemap(tr, self) | |
2661 self._inner._segmentfile = randomaccessfile.randomaccessfile( | |
2662 self.opener, | |
2663 self._datafile, | |
2664 self.data_config.chunk_cache_size, | |
2665 ) | |
2666 | |
2667 if existing_handles: | |
2668 # switched from inline to conventional reopen the index | |
2669 index_end = None | |
2670 ifh = self._inner._InnerRevlog__index_write_fp( | |
2671 index_end=index_end | |
2672 ) | |
2673 self._inner._writinghandles = (ifh, new_dfh, None) | |
2674 self._inner._segmentfile.writing_handle = new_dfh | |
2675 new_dfh = None | |
2676 # No need to deal with sidedata writing handle as it is only | |
2677 # relevant with revlog-v2 which is never inline, not reaching | |
2678 # this code | |
2679 finally: | |
2680 if new_dfh is not None: | |
2681 new_dfh.close() | |
2682 | 2694 |
2683 def _nodeduplicatecallback(self, transaction, node): | 2695 def _nodeduplicatecallback(self, transaction, node): |
2684 """called when trying to add a node already stored.""" | 2696 """called when trying to add a node already stored.""" |
2685 | 2697 |
2686 @contextlib.contextmanager | 2698 @contextlib.contextmanager |