Mercurial > public > mercurial-scm > hg
annotate rust/hg-core/src/sparse.rs @ 49517:52464a20add0
rhg: parallellize computation of [unsure_is_modified]
[unsure_is_modified] is called for every file for which we can't
determine its status based on its size and mtime alone.
In particular, this happens if the mtime of the file changes
without its contents changing.
Parallellizing this improves performance significantly when
we have many of these files.
Here's an example run (on a repo with ~400k files after dropping FS caches)
```
before:
real 0m53.901s
user 0m27.806s
sys 0m31.325s
after:
real 0m32.017s
user 0m34.277s
sys 1m26.250s
```
Another example run (a different FS):
```
before:
real 3m28.479s
user 0m31.800s
sys 0m25.324s
after:
real 0m29.751s
user 0m41.814s
sys 1m15.387s
```
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Wed, 05 Oct 2022 15:45:05 -0400 |
parents | 7c93e38a0bbd |
children | e98fd81bb151 |
rev | line source |
---|---|
49485 | 1 use std::{collections::HashSet, path::Path}; |
2 | |
3 use format_bytes::{write_bytes, DisplayBytes}; | |
4 | |
5 use crate::{ | |
6 errors::HgError, | |
7 filepatterns::parse_pattern_file_contents, | |
8 matchers::{ | |
9 AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher, | |
10 UnionMatcher, | |
11 }, | |
12 operations::cat, | |
13 repo::Repo, | |
14 requirements::SPARSE_REQUIREMENT, | |
15 utils::{hg_path::HgPath, SliceExt}, | |
16 IgnorePattern, PatternError, PatternFileWarning, PatternSyntax, Revision, | |
17 NULL_REVISION, | |
18 }; | |
19 | |
20 /// Command which is triggering the config read | |
21 #[derive(Copy, Clone, Debug)] | |
22 pub enum SparseConfigContext { | |
23 Sparse, | |
24 Narrow, | |
25 } | |
26 | |
27 impl DisplayBytes for SparseConfigContext { | |
28 fn display_bytes( | |
29 &self, | |
30 output: &mut dyn std::io::Write, | |
31 ) -> std::io::Result<()> { | |
32 match self { | |
33 SparseConfigContext::Sparse => write_bytes!(output, b"sparse"), | |
34 SparseConfigContext::Narrow => write_bytes!(output, b"narrow"), | |
35 } | |
36 } | |
37 } | |
38 | |
39 /// Possible warnings when reading sparse configuration | |
40 #[derive(Debug, derive_more::From)] | |
41 pub enum SparseWarning { | |
42 /// Warns about improper paths that start with "/" | |
43 RootWarning { | |
44 context: SparseConfigContext, | |
45 line: Vec<u8>, | |
46 }, | |
47 /// Warns about a profile missing from the given changelog revision | |
48 ProfileNotFound { profile: Vec<u8>, rev: Revision }, | |
49 #[from] | |
50 Pattern(PatternFileWarning), | |
51 } | |
52 | |
53 /// Parsed sparse config | |
54 #[derive(Debug, Default)] | |
55 pub struct SparseConfig { | |
56 // Line-separated | |
49489
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
57 pub(crate) includes: Vec<u8>, |
49485 | 58 // Line-separated |
49489
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
59 pub(crate) excludes: Vec<u8>, |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
60 pub(crate) profiles: HashSet<Vec<u8>>, |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
61 pub(crate) warnings: Vec<SparseWarning>, |
49485 | 62 } |
63 | |
49489
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
64 /// All possible errors when reading sparse/narrow config |
49485 | 65 #[derive(Debug, derive_more::From)] |
66 pub enum SparseConfigError { | |
67 IncludesAfterExcludes { | |
68 context: SparseConfigContext, | |
69 }, | |
70 EntryOutsideSection { | |
71 context: SparseConfigContext, | |
72 line: Vec<u8>, | |
73 }, | |
49489
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
74 /// Narrow config does not support '%include' directives |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
75 IncludesInNarrow, |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
76 /// An invalid pattern prefix was given to the narrow spec. Includes the |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
77 /// entire pattern for context. |
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
78 InvalidNarrowPrefix(Vec<u8>), |
49485 | 79 #[from] |
80 HgError(HgError), | |
81 #[from] | |
82 PatternError(PatternError), | |
83 } | |
84 | |
85 /// Parse sparse config file content. | |
49489
7c93e38a0bbd
rhg-status: add support for narrow clones
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49485
diff
changeset
|
86 pub(crate) fn parse_config( |
49485 | 87 raw: &[u8], |
88 context: SparseConfigContext, | |
89 ) -> Result<SparseConfig, SparseConfigError> { | |
90 let mut includes = vec![]; | |
91 let mut excludes = vec![]; | |
92 let mut profiles = HashSet::new(); | |
93 let mut warnings = vec![]; | |
94 | |
95 #[derive(PartialEq, Eq)] | |
96 enum Current { | |
97 Includes, | |
98 Excludes, | |
99 None, | |
49517
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49489
diff
changeset
|
100 } |
49485 | 101 |
102 let mut current = Current::None; | |
103 let mut in_section = false; | |
104 | |
105 for line in raw.split(|c| *c == b'\n') { | |
106 let line = line.trim(); | |
107 if line.is_empty() || line[0] == b'#' { | |
108 // empty or comment line, skip | |
109 continue; | |
110 } | |
111 if line.starts_with(b"%include ") { | |
112 let profile = line[b"%include ".len()..].trim(); | |
113 if !profile.is_empty() { | |
114 profiles.insert(profile.into()); | |
115 } | |
116 } else if line == b"[include]" { | |
117 if in_section && current == Current::Includes { | |
118 return Err(SparseConfigError::IncludesAfterExcludes { | |
119 context, | |
120 }); | |
121 } | |
122 in_section = true; | |
123 current = Current::Includes; | |
124 continue; | |
125 } else if line == b"[exclude]" { | |
126 in_section = true; | |
127 current = Current::Excludes; | |
128 } else { | |
129 if current == Current::None { | |
130 return Err(SparseConfigError::EntryOutsideSection { | |
131 context, | |
132 line: line.into(), | |
133 }); | |
134 } | |
135 if line.trim().starts_with(b"/") { | |
136 warnings.push(SparseWarning::RootWarning { | |
137 context, | |
138 line: line.into(), | |
139 }); | |
140 continue; | |
141 } | |
142 match current { | |
143 Current::Includes => { | |
144 includes.push(b'\n'); | |
145 includes.extend(line.iter()); | |
146 } | |
147 Current::Excludes => { | |
148 excludes.push(b'\n'); | |
149 excludes.extend(line.iter()); | |
150 } | |
151 Current::None => unreachable!(), | |
152 } | |
153 } | |
154 } | |
155 | |
156 Ok(SparseConfig { | |
157 includes, | |
158 excludes, | |
159 profiles, | |
160 warnings, | |
161 }) | |
162 } | |
163 | |
164 fn read_temporary_includes( | |
165 repo: &Repo, | |
166 ) -> Result<Vec<Vec<u8>>, SparseConfigError> { | |
167 let raw = repo.hg_vfs().try_read("tempsparse")?.unwrap_or(vec![]); | |
168 if raw.is_empty() { | |
169 return Ok(vec![]); | |
170 } | |
171 Ok(raw.split(|c| *c == b'\n').map(ToOwned::to_owned).collect()) | |
172 } | |
173 | |
174 /// Obtain sparse checkout patterns for the given revision | |
175 fn patterns_for_rev( | |
176 repo: &Repo, | |
177 rev: Revision, | |
178 ) -> Result<Option<SparseConfig>, SparseConfigError> { | |
179 if !repo.has_sparse() { | |
180 return Ok(None); | |
181 } | |
182 let raw = repo.hg_vfs().try_read("sparse")?.unwrap_or(vec![]); | |
183 | |
184 if raw.is_empty() { | |
185 return Ok(None); | |
186 } | |
187 | |
188 let mut config = parse_config(&raw, SparseConfigContext::Sparse)?; | |
189 | |
190 if !config.profiles.is_empty() { | |
191 let mut profiles: Vec<Vec<u8>> = config.profiles.into_iter().collect(); | |
192 let mut visited = HashSet::new(); | |
193 | |
194 while let Some(profile) = profiles.pop() { | |
195 if visited.contains(&profile) { | |
196 continue; | |
197 } | |
198 visited.insert(profile.to_owned()); | |
199 | |
200 let output = | |
201 cat(repo, &rev.to_string(), vec![HgPath::new(&profile)]) | |
202 .map_err(|_| { | |
203 HgError::corrupted(format!( | |
204 "dirstate points to non-existent parent node" | |
205 )) | |
206 })?; | |
207 if output.results.is_empty() { | |
208 config.warnings.push(SparseWarning::ProfileNotFound { | |
209 profile: profile.to_owned(), | |
210 rev, | |
211 }) | |
212 } | |
213 | |
214 let subconfig = parse_config( | |
215 &output.results[0].1, | |
216 SparseConfigContext::Sparse, | |
217 )?; | |
218 if !subconfig.includes.is_empty() { | |
219 config.includes.push(b'\n'); | |
220 config.includes.extend(&subconfig.includes); | |
221 } | |
222 if !subconfig.includes.is_empty() { | |
223 config.includes.push(b'\n'); | |
224 config.excludes.extend(&subconfig.excludes); | |
225 } | |
226 config.warnings.extend(subconfig.warnings.into_iter()); | |
227 profiles.extend(subconfig.profiles.into_iter()); | |
228 } | |
229 | |
230 config.profiles = visited; | |
231 } | |
232 | |
233 if !config.includes.is_empty() { | |
234 config.includes.extend(b"\n.hg*"); | |
235 } | |
236 | |
237 Ok(Some(config)) | |
238 } | |
239 | |
240 /// Obtain a matcher for sparse working directories. | |
241 pub fn matcher( | |
242 repo: &Repo, | |
243 ) -> Result<(Box<dyn Matcher + Sync>, Vec<SparseWarning>), SparseConfigError> { | |
244 let mut warnings = vec![]; | |
245 if !repo.requirements().contains(SPARSE_REQUIREMENT) { | |
246 return Ok((Box::new(AlwaysMatcher), warnings)); | |
247 } | |
248 | |
249 let parents = repo.dirstate_parents()?; | |
250 let mut revs = vec![]; | |
251 let p1_rev = | |
252 repo.changelog()? | |
253 .rev_from_node(parents.p1.into()) | |
254 .map_err(|_| { | |
255 HgError::corrupted(format!( | |
256 "dirstate points to non-existent parent node" | |
257 )) | |
258 })?; | |
259 if p1_rev != NULL_REVISION { | |
260 revs.push(p1_rev) | |
261 } | |
262 let p2_rev = | |
263 repo.changelog()? | |
264 .rev_from_node(parents.p2.into()) | |
265 .map_err(|_| { | |
266 HgError::corrupted(format!( | |
267 "dirstate points to non-existent parent node" | |
268 )) | |
269 })?; | |
270 if p2_rev != NULL_REVISION { | |
271 revs.push(p2_rev) | |
272 } | |
273 let mut matchers = vec![]; | |
274 | |
275 for rev in revs.iter() { | |
276 let config = patterns_for_rev(repo, *rev); | |
277 if let Ok(Some(config)) = config { | |
278 warnings.extend(config.warnings); | |
279 let mut m: Box<dyn Matcher + Sync> = Box::new(AlwaysMatcher); | |
280 if !config.includes.is_empty() { | |
281 let (patterns, subwarnings) = parse_pattern_file_contents( | |
282 &config.includes, | |
283 Path::new(""), | |
284 Some(b"relglob:".as_ref()), | |
285 false, | |
286 )?; | |
287 warnings.extend(subwarnings.into_iter().map(From::from)); | |
288 m = Box::new(IncludeMatcher::new(patterns)?); | |
289 } | |
290 if !config.excludes.is_empty() { | |
291 let (patterns, subwarnings) = parse_pattern_file_contents( | |
292 &config.excludes, | |
293 Path::new(""), | |
294 Some(b"relglob:".as_ref()), | |
295 false, | |
296 )?; | |
297 warnings.extend(subwarnings.into_iter().map(From::from)); | |
298 m = Box::new(DifferenceMatcher::new( | |
299 m, | |
300 Box::new(IncludeMatcher::new(patterns)?), | |
301 )); | |
302 } | |
303 matchers.push(m); | |
304 } | |
305 } | |
306 let result: Box<dyn Matcher + Sync> = match matchers.len() { | |
307 0 => Box::new(AlwaysMatcher), | |
308 1 => matchers.pop().expect("1 is equal to 0"), | |
309 _ => Box::new(UnionMatcher::new(matchers)), | |
310 }; | |
311 | |
312 let matcher = | |
313 force_include_matcher(result, &read_temporary_includes(repo)?)?; | |
314 Ok((matcher, warnings)) | |
315 } | |
316 | |
317 /// Returns a matcher that returns true for any of the forced includes before | |
318 /// testing against the actual matcher | |
319 fn force_include_matcher( | |
320 result: Box<dyn Matcher + Sync>, | |
321 temp_includes: &[Vec<u8>], | |
322 ) -> Result<Box<dyn Matcher + Sync>, PatternError> { | |
323 if temp_includes.is_empty() { | |
324 return Ok(result); | |
325 } | |
326 let forced_include_matcher = IncludeMatcher::new( | |
327 temp_includes | |
328 .into_iter() | |
329 .map(|include| { | |
330 IgnorePattern::new(PatternSyntax::Path, include, Path::new("")) | |
331 }) | |
332 .collect(), | |
333 )?; | |
334 Ok(Box::new(UnionMatcher::new(vec![ | |
335 Box::new(forced_include_matcher), | |
336 result, | |
337 ]))) | |
338 } |