Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/dirstate_tree/status.rs @ 47338:f27f2afb15da
dirstate-tree: Skip readdir() in `hg status -mard`
When running the status algorithm in a mode where we don?t list unknown
or ignored files, all we care about are files that are listed in the dirstate.
We can there for skip making expensive calls to readdir() to list the contents
of filesystem directories, and instead only run stat() to get the filesystem
state of files listed in the dirstate. (This state may be an error for files
that don?t exist anymore on the filesystem.)
On 16 CPU threads, this reduces the time spent in the `status()` function for
`hg status -mard` on an old snapshot of mozilla-central from ~70ms to ~50ms.
Differential Revision: https://phab.mercurial-scm.org/D10752
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 16:18:16 +0200 |
parents | 8d0260d0dbc9 |
children | 5e12b6bfdd3e |
comparison
equal
deleted
inserted
replaced
47337:0654b3b3d2b5 | 47338:f27f2afb15da |
---|---|
4 use crate::dirstate_tree::dirstate_map::NodeRef; | 4 use crate::dirstate_tree::dirstate_map::NodeRef; |
5 use crate::dirstate_tree::on_disk::DirstateV2ParseError; | 5 use crate::dirstate_tree::on_disk::DirstateV2ParseError; |
6 use crate::matchers::get_ignore_function; | 6 use crate::matchers::get_ignore_function; |
7 use crate::matchers::Matcher; | 7 use crate::matchers::Matcher; |
8 use crate::utils::files::get_bytes_from_os_string; | 8 use crate::utils::files::get_bytes_from_os_string; |
9 use crate::utils::files::get_path_from_bytes; | |
9 use crate::utils::hg_path::HgPath; | 10 use crate::utils::hg_path::HgPath; |
10 use crate::BadMatch; | 11 use crate::BadMatch; |
11 use crate::DirstateStatus; | 12 use crate::DirstateStatus; |
12 use crate::EntryState; | 13 use crate::EntryState; |
13 use crate::HgPathBuf; | 14 use crate::HgPathBuf; |
81 &self, | 82 &self, |
82 hg_path: &HgPath, | 83 hg_path: &HgPath, |
83 fs_path: &Path, | 84 fs_path: &Path, |
84 is_at_repo_root: bool, | 85 is_at_repo_root: bool, |
85 ) -> Result<Vec<DirEntry>, ()> { | 86 ) -> Result<Vec<DirEntry>, ()> { |
86 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { | 87 DirEntry::read_dir(fs_path, is_at_repo_root) |
87 let errno = error.raw_os_error().expect("expected real OS error"); | 88 .map_err(|error| self.io_error(error, hg_path)) |
88 self.outcome | 89 } |
89 .lock() | 90 |
90 .unwrap() | 91 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) { |
91 .bad | 92 let errno = error.raw_os_error().expect("expected real OS error"); |
92 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) | 93 self.outcome |
93 }) | 94 .lock() |
95 .unwrap() | |
96 .bad | |
97 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) | |
94 } | 98 } |
95 | 99 |
96 fn traverse_fs_directory_and_dirstate( | 100 fn traverse_fs_directory_and_dirstate( |
97 &self, | 101 &self, |
98 has_ignored_ancestor: bool, | 102 has_ignored_ancestor: bool, |
99 dirstate_nodes: ChildNodesRef<'tree, '_>, | 103 dirstate_nodes: ChildNodesRef<'tree, '_>, |
100 directory_hg_path: &'tree HgPath, | 104 directory_hg_path: &'tree HgPath, |
101 directory_fs_path: &Path, | 105 directory_fs_path: &Path, |
102 is_at_repo_root: bool, | 106 is_at_repo_root: bool, |
103 ) -> Result<(), DirstateV2ParseError> { | 107 ) -> Result<(), DirstateV2ParseError> { |
108 if !self.options.list_unknown && !self.options.list_ignored { | |
109 // We only care about files in the dirstate, so we can skip listing | |
110 // filesystem directories entirely. | |
111 return dirstate_nodes | |
112 .par_iter() | |
113 .map(|dirstate_node| { | |
114 let fs_path = directory_fs_path.join(get_path_from_bytes( | |
115 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), | |
116 )); | |
117 match std::fs::symlink_metadata(&fs_path) { | |
118 Ok(fs_metadata) => self.traverse_fs_and_dirstate( | |
119 &fs_path, | |
120 &fs_metadata, | |
121 dirstate_node, | |
122 has_ignored_ancestor, | |
123 ), | |
124 Err(e) if e.kind() == std::io::ErrorKind::NotFound => { | |
125 self.traverse_dirstate_only(dirstate_node) | |
126 } | |
127 Err(error) => { | |
128 let hg_path = | |
129 dirstate_node.full_path(self.dmap.on_disk)?; | |
130 Ok(self.io_error(error, hg_path)) | |
131 } | |
132 } | |
133 }) | |
134 .collect(); | |
135 } | |
136 | |
104 let mut fs_entries = if let Ok(entries) = self.read_dir( | 137 let mut fs_entries = if let Ok(entries) = self.read_dir( |
105 directory_hg_path, | 138 directory_hg_path, |
106 directory_fs_path, | 139 directory_fs_path, |
107 is_at_repo_root, | 140 is_at_repo_root, |
108 ) { | 141 ) { |
139 .map(|pair| { | 172 .map(|pair| { |
140 use itertools::EitherOrBoth::*; | 173 use itertools::EitherOrBoth::*; |
141 match pair { | 174 match pair { |
142 Both(dirstate_node, fs_entry) => self | 175 Both(dirstate_node, fs_entry) => self |
143 .traverse_fs_and_dirstate( | 176 .traverse_fs_and_dirstate( |
144 fs_entry, | 177 &fs_entry.full_path, |
178 &fs_entry.metadata, | |
145 dirstate_node, | 179 dirstate_node, |
146 has_ignored_ancestor, | 180 has_ignored_ancestor, |
147 ), | 181 ), |
148 Left(dirstate_node) => { | 182 Left(dirstate_node) => { |
149 self.traverse_dirstate_only(dirstate_node) | 183 self.traverse_dirstate_only(dirstate_node) |
158 .collect() | 192 .collect() |
159 } | 193 } |
160 | 194 |
161 fn traverse_fs_and_dirstate( | 195 fn traverse_fs_and_dirstate( |
162 &self, | 196 &self, |
163 fs_entry: &DirEntry, | 197 fs_path: &Path, |
198 fs_metadata: &std::fs::Metadata, | |
164 dirstate_node: NodeRef<'tree, '_>, | 199 dirstate_node: NodeRef<'tree, '_>, |
165 has_ignored_ancestor: bool, | 200 has_ignored_ancestor: bool, |
166 ) -> Result<(), DirstateV2ParseError> { | 201 ) -> Result<(), DirstateV2ParseError> { |
167 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?; | 202 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?; |
168 let file_type = fs_entry.metadata.file_type(); | 203 let file_type = fs_metadata.file_type(); |
169 let file_or_symlink = file_type.is_file() || file_type.is_symlink(); | 204 let file_or_symlink = file_type.is_file() || file_type.is_symlink(); |
170 if !file_or_symlink { | 205 if !file_or_symlink { |
171 // If we previously had a file here, it was removed (with | 206 // If we previously had a file here, it was removed (with |
172 // `hg rm` or similar) or deleted before it could be | 207 // `hg rm` or similar) or deleted before it could be |
173 // replaced by a directory or something else. | 208 // replaced by a directory or something else. |
184 let is_at_repo_root = false; | 219 let is_at_repo_root = false; |
185 self.traverse_fs_directory_and_dirstate( | 220 self.traverse_fs_directory_and_dirstate( |
186 is_ignored, | 221 is_ignored, |
187 dirstate_node.children(self.dmap.on_disk)?, | 222 dirstate_node.children(self.dmap.on_disk)?, |
188 hg_path, | 223 hg_path, |
189 &fs_entry.full_path, | 224 fs_path, |
190 is_at_repo_root, | 225 is_at_repo_root, |
191 )? | 226 )? |
192 } else { | 227 } else { |
193 if file_or_symlink && self.matcher.matches(hg_path) { | 228 if file_or_symlink && self.matcher.matches(hg_path) { |
194 let full_path = Cow::from(hg_path); | 229 let full_path = Cow::from(hg_path); |
207 .outcome | 242 .outcome |
208 .lock() | 243 .lock() |
209 .unwrap() | 244 .unwrap() |
210 .modified | 245 .modified |
211 .push(full_path), | 246 .push(full_path), |
212 EntryState::Normal => { | 247 EntryState::Normal => self |
213 self.handle_normal_file(&dirstate_node, fs_entry)? | 248 .handle_normal_file(&dirstate_node, fs_metadata)?, |
214 } | |
215 // This variant is not used in DirstateMap | 249 // This variant is not used in DirstateMap |
216 // nodes | 250 // nodes |
217 EntryState::Unknown => unreachable!(), | 251 EntryState::Unknown => unreachable!(), |
218 } | 252 } |
219 } else { | 253 } else { |
237 /// A file with `EntryState::Normal` in the dirstate was found in the | 271 /// A file with `EntryState::Normal` in the dirstate was found in the |
238 /// filesystem | 272 /// filesystem |
239 fn handle_normal_file( | 273 fn handle_normal_file( |
240 &self, | 274 &self, |
241 dirstate_node: &NodeRef<'tree, '_>, | 275 dirstate_node: &NodeRef<'tree, '_>, |
242 fs_entry: &DirEntry, | 276 fs_metadata: &std::fs::Metadata, |
243 ) -> Result<(), DirstateV2ParseError> { | 277 ) -> Result<(), DirstateV2ParseError> { |
244 // Keep the low 31 bits | 278 // Keep the low 31 bits |
245 fn truncate_u64(value: u64) -> i32 { | 279 fn truncate_u64(value: u64) -> i32 { |
246 (value & 0x7FFF_FFFF) as i32 | 280 (value & 0x7FFF_FFFF) as i32 |
247 } | 281 } |
251 | 285 |
252 let entry = dirstate_node | 286 let entry = dirstate_node |
253 .entry()? | 287 .entry()? |
254 .expect("handle_normal_file called with entry-less node"); | 288 .expect("handle_normal_file called with entry-less node"); |
255 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?); | 289 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?); |
256 let mode_changed = || { | 290 let mode_changed = |
257 self.options.check_exec && entry.mode_changed(&fs_entry.metadata) | 291 || self.options.check_exec && entry.mode_changed(fs_metadata); |
258 }; | 292 let size_changed = entry.size != truncate_u64(fs_metadata.len()); |
259 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len()); | |
260 if entry.size >= 0 | 293 if entry.size >= 0 |
261 && size_changed | 294 && size_changed |
262 && fs_entry.metadata.file_type().is_symlink() | 295 && fs_metadata.file_type().is_symlink() |
263 { | 296 { |
264 // issue6456: Size returned may be longer due to encryption | 297 // issue6456: Size returned may be longer due to encryption |
265 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? | 298 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? |
266 self.outcome.lock().unwrap().unsure.push(full_path) | 299 self.outcome.lock().unwrap().unsure.push(full_path) |
267 } else if dirstate_node.has_copy_source() | 300 } else if dirstate_node.has_copy_source() |
268 || entry.is_from_other_parent() | 301 || entry.is_from_other_parent() |
269 || (entry.size >= 0 && (size_changed || mode_changed())) | 302 || (entry.size >= 0 && (size_changed || mode_changed())) |
270 { | 303 { |
271 self.outcome.lock().unwrap().modified.push(full_path) | 304 self.outcome.lock().unwrap().modified.push(full_path) |
272 } else { | 305 } else { |
273 let mtime = mtime_seconds(&fs_entry.metadata); | 306 let mtime = mtime_seconds(fs_metadata); |
274 if truncate_i64(mtime) != entry.mtime | 307 if truncate_i64(mtime) != entry.mtime |
275 || mtime == self.options.last_normal_time | 308 || mtime == self.options.last_normal_time |
276 { | 309 { |
277 self.outcome.lock().unwrap().unsure.push(full_path) | 310 self.outcome.lock().unwrap().unsure.push(full_path) |
278 } else if self.options.list_clean { | 311 } else if self.options.list_clean { |