comparison rust/hg-core/src/revlog/file_io.rs @ 52766:549b58b1ce72

rust-inner-revlog: use threadlocals for file handles In contexts where we access the same revlog from multiple threads (like in `annotate`), multiple threads would lock the handle, then read from a different place that they would expect, since the `seek` behavior of the `InnerRevlog` was written with single threaded access in mind.
author Rapha?l Gom?s <rgomes@octobus.net>
date Fri, 24 Jan 2025 13:07:15 -0500
parents 162f4801ad39
children
comparison
equal deleted inserted replaced
52765:162f4801ad39 52766:549b58b1ce72
1 //! Helpers for revlog file reading and writing. 1 //! Helpers for revlog file reading and writing.
2 2
3 use std::{ 3 use std::{
4 cell::RefCell,
4 io::{Read, Seek, SeekFrom, Write}, 5 io::{Read, Seek, SeekFrom, Write},
5 path::{Path, PathBuf}, 6 path::{Path, PathBuf},
6 sync::{Arc, Mutex, RwLock}, 7 sync::{Arc, Mutex},
7 }; 8 };
8 9
9 use crate::{ 10 use crate::{
10 errors::{HgError, IoResultExt}, 11 errors::{HgError, IoResultExt},
11 vfs::{Vfs, VfsFile}, 12 vfs::{Vfs, VfsFile},
25 pub struct RandomAccessFile { 26 pub struct RandomAccessFile {
26 /// The current store VFS to pass it to [`FileHandle`] 27 /// The current store VFS to pass it to [`FileHandle`]
27 vfs: Box<dyn Vfs>, 28 vfs: Box<dyn Vfs>,
28 /// Filename of the open file, relative to the vfs root 29 /// Filename of the open file, relative to the vfs root
29 pub filename: PathBuf, 30 pub filename: PathBuf,
30 /// The current read-only handle on the file, if any 31 /// The current read-only handle on the file, if any.
31 pub reading_handle: RwLock<Option<FileHandle>>, 32 /// Specific to the current thread, since we don't want seeks to overlap
32 /// The current read-write handle on the file, if any 33 pub reading_handle: thread_local::ThreadLocal<RefCell<Option<FileHandle>>>,
33 pub writing_handle: RwLock<Option<FileHandle>>, 34 /// The current read-write handle on the file, if any.
35 /// Specific to the current thread, since we don't want seeks to overlap,
36 /// and we can re-use the write handle for reading in certain contexts.
37 /// Logically, two concurrent writes are impossible because they are only
38 /// accessible through `&mut self` methods, which take a lock.
39 pub writing_handle: thread_local::ThreadLocal<RefCell<Option<FileHandle>>>,
34 } 40 }
35 41
36 impl RandomAccessFile { 42 impl RandomAccessFile {
37 /// Wrap a file for random access 43 /// Wrap a file for random access
38 pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self { 44 pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self {
39 assert!(filename.is_relative()); 45 assert!(filename.is_relative());
40 Self { 46 Self {
41 vfs, 47 vfs,
42 filename, 48 filename,
43 reading_handle: RwLock::new(None), 49 reading_handle: thread_local::ThreadLocal::new(),
44 writing_handle: RwLock::new(None), 50 writing_handle: thread_local::ThreadLocal::new(),
45 } 51 }
46 } 52 }
47 53
48 /// Read a chunk of bytes from the file. 54 /// Read a chunk of bytes from the file.
49 pub fn read_chunk( 55 pub fn read_chunk(
59 } 65 }
60 66
61 /// `pub` only for hg-cpython 67 /// `pub` only for hg-cpython
62 #[doc(hidden)] 68 #[doc(hidden)]
63 pub fn get_read_handle(&self) -> Result<FileHandle, HgError> { 69 pub fn get_read_handle(&self) -> Result<FileHandle, HgError> {
64 if let Some(handle) = 70 if let Some(handle) = &*self.writing_handle.get_or_default().borrow() {
65 &*self.writing_handle.write().expect("lock is poisoned")
66 {
67 // Use a file handle being actively used for writes, if available. 71 // Use a file handle being actively used for writes, if available.
68 // There is some danger to doing this because reads will seek the 72 // There is some danger to doing this because reads will seek the
69 // file. 73 // file.
70 // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)` 74 // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)`
71 // before all writes, so we should be safe. 75 // before all writes, so we should be safe.
72 return Ok(handle.clone()); 76 return Ok(handle.clone());
73 } 77 }
74 if let Some(handle) = 78 if let Some(handle) = &*self.reading_handle.get_or_default().borrow() {
75 &*self.reading_handle.read().expect("lock is poisoned")
76 {
77 return Ok(handle.clone()); 79 return Ok(handle.clone());
78 } 80 }
79 // early returns done to work around borrowck being overzealous 81 // early returns done to work around borrowck being overzealous
80 // See https://github.com/rust-lang/rust/issues/103108 82 // See https://github.com/rust-lang/rust/issues/103108
81 let new_handle = FileHandle::new( 83 let new_handle = FileHandle::new(
82 dyn_clone::clone_box(&*self.vfs), 84 dyn_clone::clone_box(&*self.vfs),
83 &self.filename, 85 &self.filename,
84 false, 86 false,
85 false, 87 false,
86 )?; 88 )?;
87 *self.reading_handle.write().expect("lock is poisoned") = 89 *self.reading_handle.get_or_default().borrow_mut() =
88 Some(new_handle.clone()); 90 Some(new_handle.clone());
89 Ok(new_handle) 91 Ok(new_handle)
90 } 92 }
91 93
92 /// `pub` only for hg-cpython 94 /// `pub` only for hg-cpython
93 #[doc(hidden)] 95 #[doc(hidden)]
94 pub fn exit_reading_context(&self) { 96 pub fn exit_reading_context(&self) {
95 self.reading_handle 97 self.reading_handle.get().map(|h| h.take());
96 .write()
97 .expect("lock is poisoned")
98 .take();
99 } 98 }
100 99
101 // Returns whether this file currently open 100 // Returns whether this file currently open
102 pub fn is_open(&self) -> bool { 101 pub fn is_open(&self) -> bool {
103 self.reading_handle 102 self.reading_handle.get_or_default().borrow().is_some()
104 .read() 103 || self.writing_handle.get_or_default().borrow().is_some()
105 .expect("lock is poisoned")
106 .is_some()
107 || self
108 .writing_handle
109 .read()
110 .expect("lock is poisoned")
111 .is_some()
112 } 104 }
113 } 105 }
114 106
115 /// A buffer that holds new changelog index data that needs to be written 107 /// A buffer that holds new changelog index data that needs to be written
116 /// after the manifest and filelogs so that the repo is updated atomically to 108 /// after the manifest and filelogs so that the repo is updated atomically to