Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/revlog/file_io.rs @ 52766:549b58b1ce72
rust-inner-revlog: use threadlocals for file handles
In contexts where we access the same revlog from multiple threads
(like in `annotate`), multiple threads would lock the handle, then read from
a different place that they would expect, since the `seek` behavior of the
`InnerRevlog` was written with single threaded access in mind.
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Fri, 24 Jan 2025 13:07:15 -0500 |
parents | 162f4801ad39 |
children |
comparison
equal
deleted
inserted
replaced
52765:162f4801ad39 | 52766:549b58b1ce72 |
---|---|
1 //! Helpers for revlog file reading and writing. | 1 //! Helpers for revlog file reading and writing. |
2 | 2 |
3 use std::{ | 3 use std::{ |
4 cell::RefCell, | |
4 io::{Read, Seek, SeekFrom, Write}, | 5 io::{Read, Seek, SeekFrom, Write}, |
5 path::{Path, PathBuf}, | 6 path::{Path, PathBuf}, |
6 sync::{Arc, Mutex, RwLock}, | 7 sync::{Arc, Mutex}, |
7 }; | 8 }; |
8 | 9 |
9 use crate::{ | 10 use crate::{ |
10 errors::{HgError, IoResultExt}, | 11 errors::{HgError, IoResultExt}, |
11 vfs::{Vfs, VfsFile}, | 12 vfs::{Vfs, VfsFile}, |
25 pub struct RandomAccessFile { | 26 pub struct RandomAccessFile { |
26 /// The current store VFS to pass it to [`FileHandle`] | 27 /// The current store VFS to pass it to [`FileHandle`] |
27 vfs: Box<dyn Vfs>, | 28 vfs: Box<dyn Vfs>, |
28 /// Filename of the open file, relative to the vfs root | 29 /// Filename of the open file, relative to the vfs root |
29 pub filename: PathBuf, | 30 pub filename: PathBuf, |
30 /// The current read-only handle on the file, if any | 31 /// The current read-only handle on the file, if any. |
31 pub reading_handle: RwLock<Option<FileHandle>>, | 32 /// Specific to the current thread, since we don't want seeks to overlap |
32 /// The current read-write handle on the file, if any | 33 pub reading_handle: thread_local::ThreadLocal<RefCell<Option<FileHandle>>>, |
33 pub writing_handle: RwLock<Option<FileHandle>>, | 34 /// The current read-write handle on the file, if any. |
35 /// Specific to the current thread, since we don't want seeks to overlap, | |
36 /// and we can re-use the write handle for reading in certain contexts. | |
37 /// Logically, two concurrent writes are impossible because they are only | |
38 /// accessible through `&mut self` methods, which take a lock. | |
39 pub writing_handle: thread_local::ThreadLocal<RefCell<Option<FileHandle>>>, | |
34 } | 40 } |
35 | 41 |
36 impl RandomAccessFile { | 42 impl RandomAccessFile { |
37 /// Wrap a file for random access | 43 /// Wrap a file for random access |
38 pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self { | 44 pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self { |
39 assert!(filename.is_relative()); | 45 assert!(filename.is_relative()); |
40 Self { | 46 Self { |
41 vfs, | 47 vfs, |
42 filename, | 48 filename, |
43 reading_handle: RwLock::new(None), | 49 reading_handle: thread_local::ThreadLocal::new(), |
44 writing_handle: RwLock::new(None), | 50 writing_handle: thread_local::ThreadLocal::new(), |
45 } | 51 } |
46 } | 52 } |
47 | 53 |
48 /// Read a chunk of bytes from the file. | 54 /// Read a chunk of bytes from the file. |
49 pub fn read_chunk( | 55 pub fn read_chunk( |
59 } | 65 } |
60 | 66 |
61 /// `pub` only for hg-cpython | 67 /// `pub` only for hg-cpython |
62 #[doc(hidden)] | 68 #[doc(hidden)] |
63 pub fn get_read_handle(&self) -> Result<FileHandle, HgError> { | 69 pub fn get_read_handle(&self) -> Result<FileHandle, HgError> { |
64 if let Some(handle) = | 70 if let Some(handle) = &*self.writing_handle.get_or_default().borrow() { |
65 &*self.writing_handle.write().expect("lock is poisoned") | |
66 { | |
67 // Use a file handle being actively used for writes, if available. | 71 // Use a file handle being actively used for writes, if available. |
68 // There is some danger to doing this because reads will seek the | 72 // There is some danger to doing this because reads will seek the |
69 // file. | 73 // file. |
70 // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)` | 74 // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)` |
71 // before all writes, so we should be safe. | 75 // before all writes, so we should be safe. |
72 return Ok(handle.clone()); | 76 return Ok(handle.clone()); |
73 } | 77 } |
74 if let Some(handle) = | 78 if let Some(handle) = &*self.reading_handle.get_or_default().borrow() { |
75 &*self.reading_handle.read().expect("lock is poisoned") | |
76 { | |
77 return Ok(handle.clone()); | 79 return Ok(handle.clone()); |
78 } | 80 } |
79 // early returns done to work around borrowck being overzealous | 81 // early returns done to work around borrowck being overzealous |
80 // See https://github.com/rust-lang/rust/issues/103108 | 82 // See https://github.com/rust-lang/rust/issues/103108 |
81 let new_handle = FileHandle::new( | 83 let new_handle = FileHandle::new( |
82 dyn_clone::clone_box(&*self.vfs), | 84 dyn_clone::clone_box(&*self.vfs), |
83 &self.filename, | 85 &self.filename, |
84 false, | 86 false, |
85 false, | 87 false, |
86 )?; | 88 )?; |
87 *self.reading_handle.write().expect("lock is poisoned") = | 89 *self.reading_handle.get_or_default().borrow_mut() = |
88 Some(new_handle.clone()); | 90 Some(new_handle.clone()); |
89 Ok(new_handle) | 91 Ok(new_handle) |
90 } | 92 } |
91 | 93 |
92 /// `pub` only for hg-cpython | 94 /// `pub` only for hg-cpython |
93 #[doc(hidden)] | 95 #[doc(hidden)] |
94 pub fn exit_reading_context(&self) { | 96 pub fn exit_reading_context(&self) { |
95 self.reading_handle | 97 self.reading_handle.get().map(|h| h.take()); |
96 .write() | |
97 .expect("lock is poisoned") | |
98 .take(); | |
99 } | 98 } |
100 | 99 |
101 // Returns whether this file currently open | 100 // Returns whether this file currently open |
102 pub fn is_open(&self) -> bool { | 101 pub fn is_open(&self) -> bool { |
103 self.reading_handle | 102 self.reading_handle.get_or_default().borrow().is_some() |
104 .read() | 103 || self.writing_handle.get_or_default().borrow().is_some() |
105 .expect("lock is poisoned") | |
106 .is_some() | |
107 || self | |
108 .writing_handle | |
109 .read() | |
110 .expect("lock is poisoned") | |
111 .is_some() | |
112 } | 104 } |
113 } | 105 } |
114 | 106 |
115 /// A buffer that holds new changelog index data that needs to be written | 107 /// A buffer that holds new changelog index data that needs to be written |
116 /// after the manifest and filelogs so that the repo is updated atomically to | 108 /// after the manifest and filelogs so that the repo is updated atomically to |