comparison rust/hg-core/src/dirstate_tree/status.rs @ 47338:f27f2afb15da

dirstate-tree: Skip readdir() in `hg status -mard` When running the status algorithm in a mode where we don?t list unknown or ignored files, all we care about are files that are listed in the dirstate. We can there for skip making expensive calls to readdir() to list the contents of filesystem directories, and instead only run stat() to get the filesystem state of files listed in the dirstate. (This state may be an error for files that don?t exist anymore on the filesystem.) On 16 CPU threads, this reduces the time spent in the `status()` function for `hg status -mard` on an old snapshot of mozilla-central from ~70ms to ~50ms. Differential Revision: https://phab.mercurial-scm.org/D10752
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 19 May 2021 16:18:16 +0200
parents 8d0260d0dbc9
children 5e12b6bfdd3e
comparison
equal deleted inserted replaced
47337:0654b3b3d2b5 47338:f27f2afb15da
4 use crate::dirstate_tree::dirstate_map::NodeRef; 4 use crate::dirstate_tree::dirstate_map::NodeRef;
5 use crate::dirstate_tree::on_disk::DirstateV2ParseError; 5 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
6 use crate::matchers::get_ignore_function; 6 use crate::matchers::get_ignore_function;
7 use crate::matchers::Matcher; 7 use crate::matchers::Matcher;
8 use crate::utils::files::get_bytes_from_os_string; 8 use crate::utils::files::get_bytes_from_os_string;
9 use crate::utils::files::get_path_from_bytes;
9 use crate::utils::hg_path::HgPath; 10 use crate::utils::hg_path::HgPath;
10 use crate::BadMatch; 11 use crate::BadMatch;
11 use crate::DirstateStatus; 12 use crate::DirstateStatus;
12 use crate::EntryState; 13 use crate::EntryState;
13 use crate::HgPathBuf; 14 use crate::HgPathBuf;
81 &self, 82 &self,
82 hg_path: &HgPath, 83 hg_path: &HgPath,
83 fs_path: &Path, 84 fs_path: &Path,
84 is_at_repo_root: bool, 85 is_at_repo_root: bool,
85 ) -> Result<Vec<DirEntry>, ()> { 86 ) -> Result<Vec<DirEntry>, ()> {
86 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { 87 DirEntry::read_dir(fs_path, is_at_repo_root)
87 let errno = error.raw_os_error().expect("expected real OS error"); 88 .map_err(|error| self.io_error(error, hg_path))
88 self.outcome 89 }
89 .lock() 90
90 .unwrap() 91 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
91 .bad 92 let errno = error.raw_os_error().expect("expected real OS error");
92 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) 93 self.outcome
93 }) 94 .lock()
95 .unwrap()
96 .bad
97 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
94 } 98 }
95 99
96 fn traverse_fs_directory_and_dirstate( 100 fn traverse_fs_directory_and_dirstate(
97 &self, 101 &self,
98 has_ignored_ancestor: bool, 102 has_ignored_ancestor: bool,
99 dirstate_nodes: ChildNodesRef<'tree, '_>, 103 dirstate_nodes: ChildNodesRef<'tree, '_>,
100 directory_hg_path: &'tree HgPath, 104 directory_hg_path: &'tree HgPath,
101 directory_fs_path: &Path, 105 directory_fs_path: &Path,
102 is_at_repo_root: bool, 106 is_at_repo_root: bool,
103 ) -> Result<(), DirstateV2ParseError> { 107 ) -> Result<(), DirstateV2ParseError> {
108 if !self.options.list_unknown && !self.options.list_ignored {
109 // We only care about files in the dirstate, so we can skip listing
110 // filesystem directories entirely.
111 return dirstate_nodes
112 .par_iter()
113 .map(|dirstate_node| {
114 let fs_path = directory_fs_path.join(get_path_from_bytes(
115 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
116 ));
117 match std::fs::symlink_metadata(&fs_path) {
118 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
119 &fs_path,
120 &fs_metadata,
121 dirstate_node,
122 has_ignored_ancestor,
123 ),
124 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
125 self.traverse_dirstate_only(dirstate_node)
126 }
127 Err(error) => {
128 let hg_path =
129 dirstate_node.full_path(self.dmap.on_disk)?;
130 Ok(self.io_error(error, hg_path))
131 }
132 }
133 })
134 .collect();
135 }
136
104 let mut fs_entries = if let Ok(entries) = self.read_dir( 137 let mut fs_entries = if let Ok(entries) = self.read_dir(
105 directory_hg_path, 138 directory_hg_path,
106 directory_fs_path, 139 directory_fs_path,
107 is_at_repo_root, 140 is_at_repo_root,
108 ) { 141 ) {
139 .map(|pair| { 172 .map(|pair| {
140 use itertools::EitherOrBoth::*; 173 use itertools::EitherOrBoth::*;
141 match pair { 174 match pair {
142 Both(dirstate_node, fs_entry) => self 175 Both(dirstate_node, fs_entry) => self
143 .traverse_fs_and_dirstate( 176 .traverse_fs_and_dirstate(
144 fs_entry, 177 &fs_entry.full_path,
178 &fs_entry.metadata,
145 dirstate_node, 179 dirstate_node,
146 has_ignored_ancestor, 180 has_ignored_ancestor,
147 ), 181 ),
148 Left(dirstate_node) => { 182 Left(dirstate_node) => {
149 self.traverse_dirstate_only(dirstate_node) 183 self.traverse_dirstate_only(dirstate_node)
158 .collect() 192 .collect()
159 } 193 }
160 194
161 fn traverse_fs_and_dirstate( 195 fn traverse_fs_and_dirstate(
162 &self, 196 &self,
163 fs_entry: &DirEntry, 197 fs_path: &Path,
198 fs_metadata: &std::fs::Metadata,
164 dirstate_node: NodeRef<'tree, '_>, 199 dirstate_node: NodeRef<'tree, '_>,
165 has_ignored_ancestor: bool, 200 has_ignored_ancestor: bool,
166 ) -> Result<(), DirstateV2ParseError> { 201 ) -> Result<(), DirstateV2ParseError> {
167 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?; 202 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
168 let file_type = fs_entry.metadata.file_type(); 203 let file_type = fs_metadata.file_type();
169 let file_or_symlink = file_type.is_file() || file_type.is_symlink(); 204 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
170 if !file_or_symlink { 205 if !file_or_symlink {
171 // If we previously had a file here, it was removed (with 206 // If we previously had a file here, it was removed (with
172 // `hg rm` or similar) or deleted before it could be 207 // `hg rm` or similar) or deleted before it could be
173 // replaced by a directory or something else. 208 // replaced by a directory or something else.
184 let is_at_repo_root = false; 219 let is_at_repo_root = false;
185 self.traverse_fs_directory_and_dirstate( 220 self.traverse_fs_directory_and_dirstate(
186 is_ignored, 221 is_ignored,
187 dirstate_node.children(self.dmap.on_disk)?, 222 dirstate_node.children(self.dmap.on_disk)?,
188 hg_path, 223 hg_path,
189 &fs_entry.full_path, 224 fs_path,
190 is_at_repo_root, 225 is_at_repo_root,
191 )? 226 )?
192 } else { 227 } else {
193 if file_or_symlink && self.matcher.matches(hg_path) { 228 if file_or_symlink && self.matcher.matches(hg_path) {
194 let full_path = Cow::from(hg_path); 229 let full_path = Cow::from(hg_path);
207 .outcome 242 .outcome
208 .lock() 243 .lock()
209 .unwrap() 244 .unwrap()
210 .modified 245 .modified
211 .push(full_path), 246 .push(full_path),
212 EntryState::Normal => { 247 EntryState::Normal => self
213 self.handle_normal_file(&dirstate_node, fs_entry)? 248 .handle_normal_file(&dirstate_node, fs_metadata)?,
214 }
215 // This variant is not used in DirstateMap 249 // This variant is not used in DirstateMap
216 // nodes 250 // nodes
217 EntryState::Unknown => unreachable!(), 251 EntryState::Unknown => unreachable!(),
218 } 252 }
219 } else { 253 } else {
237 /// A file with `EntryState::Normal` in the dirstate was found in the 271 /// A file with `EntryState::Normal` in the dirstate was found in the
238 /// filesystem 272 /// filesystem
239 fn handle_normal_file( 273 fn handle_normal_file(
240 &self, 274 &self,
241 dirstate_node: &NodeRef<'tree, '_>, 275 dirstate_node: &NodeRef<'tree, '_>,
242 fs_entry: &DirEntry, 276 fs_metadata: &std::fs::Metadata,
243 ) -> Result<(), DirstateV2ParseError> { 277 ) -> Result<(), DirstateV2ParseError> {
244 // Keep the low 31 bits 278 // Keep the low 31 bits
245 fn truncate_u64(value: u64) -> i32 { 279 fn truncate_u64(value: u64) -> i32 {
246 (value & 0x7FFF_FFFF) as i32 280 (value & 0x7FFF_FFFF) as i32
247 } 281 }
251 285
252 let entry = dirstate_node 286 let entry = dirstate_node
253 .entry()? 287 .entry()?
254 .expect("handle_normal_file called with entry-less node"); 288 .expect("handle_normal_file called with entry-less node");
255 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?); 289 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
256 let mode_changed = || { 290 let mode_changed =
257 self.options.check_exec && entry.mode_changed(&fs_entry.metadata) 291 || self.options.check_exec && entry.mode_changed(fs_metadata);
258 }; 292 let size_changed = entry.size != truncate_u64(fs_metadata.len());
259 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
260 if entry.size >= 0 293 if entry.size >= 0
261 && size_changed 294 && size_changed
262 && fs_entry.metadata.file_type().is_symlink() 295 && fs_metadata.file_type().is_symlink()
263 { 296 {
264 // issue6456: Size returned may be longer due to encryption 297 // issue6456: Size returned may be longer due to encryption
265 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? 298 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
266 self.outcome.lock().unwrap().unsure.push(full_path) 299 self.outcome.lock().unwrap().unsure.push(full_path)
267 } else if dirstate_node.has_copy_source() 300 } else if dirstate_node.has_copy_source()
268 || entry.is_from_other_parent() 301 || entry.is_from_other_parent()
269 || (entry.size >= 0 && (size_changed || mode_changed())) 302 || (entry.size >= 0 && (size_changed || mode_changed()))
270 { 303 {
271 self.outcome.lock().unwrap().modified.push(full_path) 304 self.outcome.lock().unwrap().modified.push(full_path)
272 } else { 305 } else {
273 let mtime = mtime_seconds(&fs_entry.metadata); 306 let mtime = mtime_seconds(fs_metadata);
274 if truncate_i64(mtime) != entry.mtime 307 if truncate_i64(mtime) != entry.mtime
275 || mtime == self.options.last_normal_time 308 || mtime == self.options.last_normal_time
276 { 309 {
277 self.outcome.lock().unwrap().unsure.push(full_path) 310 self.outcome.lock().unwrap().unsure.push(full_path)
278 } else if self.options.list_clean { 311 } else if self.options.list_clean {