Mercurial > public > mercurial-scm > hg
view rust/hg-core/src/utils/path.rs @ 46128:c94d013e2299
copies-rust: add smarter approach for merging small mapping with large mapping
The current approach (finding the smaller updated set) works great when the
mapping have similar size, but do a lot of unnecessary work when one side is
tinier than the other one. So we do better in theses cases. See inline
documentation for details.
It give a sizeable boost to many of out slower cases:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 18.123103 s, 5.693818 s, -12.429285 s, ? 0.3142, 15 ?s/rev
mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 17.907312 s, 5.677655 s, -12.229657 s, ? 0.3171, 15 ?s/rev
mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 17.684797 s, 5.563370 s, -12.121427 s, ? 0.3146, 15 ?s/rev
mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.881471 s, 2.864099 s, -0.017372 s, ? 0.9940, 14 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev
ideally, the im-rs object would have a `merge` method, but it does not (yet)
Full timing comparison below (they are one pathological case than become even
worse, for unclear reason).
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000043 s, 0.000042 s, -0.000001 s, ? 0.9767, 42 ?s/rev
mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000105 s, 0.000104 s, -0.000001 s, ? 0.9905, 17 ?s/rev
mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004895 s, 0.004913 s, +0.000018 s, ? 1.0037, 4 ?s/rev
pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000194 s, 0.000191 s, -0.000003 s, ? 0.9845, 21 ?s/rev
pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev
pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000115 s, 0.000112 s, -0.000003 s, ? 0.9739, 16 ?s/rev
pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.000289 s, 0.000288 s, -0.000001 s, ? 0.9965, 288 ?s/rev
pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010513 s, 0.010411 s, -0.000102 s, ? 0.9903, 1735 ?s/rev
pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.051474 s, 0.052852 s, +0.001378 s, ? 1.0268, 11 ?s/rev
pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.088086 s, 0.092828 s, +0.004742 s, ? 1.0538, 13 ?s/rev
pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.062176 s, 0.063269 s, +0.001093 s, ? 1.0176, 11 ?s/rev
pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.720950 s, 0.711975 s, -0.008975 s, ? 0.9876, 16 ?s/rev
pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012897 s, 0.012771 s, -0.000126 s, ? 0.9902, 6385 ?s/rev
pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.121524 s, 0.124505 s, +0.002981 s, ? 1.0245, 11 ?s/rev
netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000082 s, 0.000082 s, +0.000000 s, ? 1.0000, 41 ?s/rev
netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000109 s, 0.000111 s, +0.000002 s, ? 1.0183, 55 ?s/rev
netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000171 s, -0.000004 s, ? 0.9771, 57 ?s/rev
netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000719 s, 0.000708 s, -0.000011 s, ? 0.9847, 78 ?s/rev
netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010426 s, 0.010608 s, +0.000182 s, ? 1.0175, 7 ?s/rev
netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015712 s, 0.015635 s, -0.000077 s, ? 0.9951, 10 ?s/rev
netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.077353 s, 0.072072 s, -0.005281 s, ? 0.9317, 12 ?s/rev
netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.673930 s, 0.682732 s, +0.008802 s, ? 1.0131, 10 ?s/rev
mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000090 s, +0.000001 s, ? 1.0112, 45 ?s/rev
mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000212 s, 0.000210 s, -0.000002 s, ? 0.9906, 26 ?s/rev
mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000183 s, 0.000182 s, -0.000001 s, ? 0.9945, 20 ?s/rev
mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000595 s, 0.000594 s, -0.000001 s, ? 0.9983, 84 ?s/rev
mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003117 s, 0.003102 s, -0.000015 s, ? 0.9952, 1034 ?s/rev
mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.060197 s, 0.060234 s, +0.000037 s, ? 1.0006, 10039 ?s/rev
mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006379 s, 0.006300 s, -0.000079 s, ? 0.9876, 3 ?s/rev
mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005008 s, 0.004817 s, -0.000191 s, ? 0.9619, 117 ?s/rev
mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.065123 s, 0.065451 s, +0.000328 s, ? 1.0050, 8 ?s/rev
mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026404 s, 0.026282 s, -0.000122 s, ? 0.9954, 42 ?s/rev
mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203456 s, 0.206873 s, +0.003417 s, ? 1.0168, 6 ?s/rev
mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.929809 s, 1.935918 s, +0.006109 s, ? 1.0032, 12 ?s/rev
mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.825064 s, 2.827320 s, +0.002256 s, ? 1.0008, 13 ?s/rev
mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000857 s, 0.000842 s, -0.000015 s, ? 0.9825, 421 ?s/rev
mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000870 s, 0.000870 s, +0.000000 s, ? 1.0000, 435 ?s/rev
mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000165 s, +0.000004 s, ? 1.0248, 41 ?s/rev
mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001147 s, 0.001145 s, -0.000002 s, ? 0.9983, 572 ?s/rev
mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.026640 s, 0.026500 s, -0.000140 s, ? 0.9947, 26500 ?s/rev
mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.059849 s, 0.059407 s, -0.000442 s, ? 0.9926, 9901 ?s/rev
mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006326 s, 0.006325 s, -0.000001 s, ? 0.9998, 3 ?s/rev
mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005188 s, 0.005171 s, -0.000017 s, ? 0.9967, 126 ?s/rev
mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.067633 s, 0.066837 s, -0.000796 s, ? 0.9882, 10 ?s/rev
mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.306969 s, 0.314252 s, +0.007283 s, ? 1.0237, 7 ?s/rev
mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.293370 s, 0.304160 s, +0.010790 s, ? 1.0368, 7 ?s/rev
mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.087159 s, 0.089223 s, +0.002064 s, ? 1.0237, 10 ?s/rev
mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.027251 s, 0.026711 s, -0.000540 s, ? 0.9802, 43 ?s/rev
mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 3.010011 s, 3.243010 s, +0.232999 s, ? 1.0774, 33 ?s/rev
mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.753434 s, 0.756500 s, +0.003066 s, ? 1.0041, 14 ?s/rev
mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 18.123103 s, 5.693818 s, -12.429285 s, ? 0.3142, 15 ?s/rev
mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.583206 s, 0.590904 s, +0.007698 s, ? 1.0132, 17 ?s/rev
mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 17.907312 s, 5.677655 s, -12.229657 s, ? 0.3171, 15 ?s/rev
mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 17.684797 s, 5.563370 s, -12.121427 s, ? 0.3146, 15 ?s/rev
mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.881471 s, 2.864099 s, -0.017372 s, ? 0.9940, 14 ?s/rev
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 101.062002 s, 113.297287 s, +12.235285 s, ? 1.1211, 494 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev
Differential Revision: https://phab.mercurial-scm.org/D9491
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sat, 21 Nov 2020 09:40:52 +0100 |
parents | 4b953cb17612 |
children |
line wrap: on
line source
/* * Copyright (c) Facebook, Inc. and its affiliates. * * This software may be used and distributed according to the terms of the * GNU General Public License version 2. */ //! Path-related utilities. use std::env; #[cfg(not(unix))] use std::fs::rename; use std::fs::{self, remove_file as fs_remove_file}; use std::io::{self, ErrorKind}; use std::path::{Component, Path, PathBuf}; use anyhow::Result; #[cfg(not(unix))] use tempfile::Builder; /// Normalize a canonicalized Path for display. /// /// This removes the UNC prefix `\\?\` on Windows. pub fn normalize_for_display(path: &str) -> &str { if cfg!(windows) && path.starts_with(r"\\?\") { &path[4..] } else { path } } /// Similar to [`normalize_for_display`]. But work on bytes. pub fn normalize_for_display_bytes(path: &[u8]) -> &[u8] { if cfg!(windows) && path.starts_with(br"\\?\") { &path[4..] } else { path } } /// Return the absolute and normalized path without accessing the filesystem. /// /// Unlike [`fs::canonicalize`], do not follow symlinks. /// /// This function does not access the filesystem. Therefore it can behave /// differently from the kernel or other library functions in corner cases. /// For example: /// /// - On some systems with symlink support, `foo/bar/..` and `foo` can be /// different as seen by the kernel, if `foo/bar` is a symlink. This function /// always returns `foo` in this case. /// - On Windows, the official normalization rules are much more complicated. /// See https://github.com/rust-lang/rust/pull/47363#issuecomment-357069527. /// For example, this function cannot translate "drive relative" path like /// "X:foo" to an absolute path. /// /// Return an error if `std::env::current_dir()` fails or if this function /// fails to produce an absolute path. pub fn absolute(path: impl AsRef<Path>) -> io::Result<PathBuf> { let path = path.as_ref(); let path = if path.is_absolute() { path.to_path_buf() } else { std::env::current_dir()?.join(path) }; if !path.is_absolute() { return Err(io::Error::new( io::ErrorKind::Other, format!("cannot get absoltue path from {:?}", path), )); } let mut result = PathBuf::new(); for component in path.components() { match component { Component::Normal(_) | Component::RootDir | Component::Prefix(_) => { result.push(component); } Component::ParentDir => { result.pop(); } Component::CurDir => (), } } Ok(result) } /// Remove the file pointed by `path`. #[cfg(unix)] pub fn remove_file<P: AsRef<Path>>(path: P) -> Result<()> { fs_remove_file(path)?; Ok(()) } /// Remove the file pointed by `path`. /// /// On Windows, removing a file can fail for various reasons, including if the /// file is memory mapped. This can happen when the repository is accessed /// concurrently while a background task is trying to remove a packfile. To /// solve this, we can rename the file before trying to remove it. /// If the remove operation fails, a future repack will clean it up. #[cfg(not(unix))] pub fn remove_file<P: AsRef<Path>>(path: P) -> Result<()> { let path = path.as_ref(); let extension = path .extension() .and_then(|ext| ext.to_str()) .map_or(".to-delete".to_owned(), |ext| ".".to_owned() + ext + "-tmp"); let dest_path = Builder::new() .prefix("") .suffix(&extension) .rand_bytes(8) .tempfile_in(path.parent().unwrap())? .into_temp_path(); rename(path, &dest_path)?; // Ignore errors when removing the file, it will be cleaned up at a later // time. let _ = fs_remove_file(dest_path); Ok(()) } /// Create the directory and ignore failures when a directory of the same name /// already exists. pub fn create_dir(path: impl AsRef<Path>) -> io::Result<()> { match fs::create_dir(path.as_ref()) { Ok(()) => Ok(()), Err(e) => { if e.kind() == ErrorKind::AlreadyExists && path.as_ref().is_dir() { Ok(()) } else { Err(e) } } } } /// Expand the user's home directory and any environment variables references /// in the given path. /// /// This function is designed to emulate the behavior of Mercurial's /// `util.expandpath` function, which in turn uses Python's /// `os.path.expand{user,vars}` functions. This results in behavior that is /// notably different from the default expansion behavior of the `shellexpand` /// crate. In particular: /// /// - If a reference to an environment variable is missing or invalid, the /// reference is left unchanged in the resulting path rather than emitting an /// error. /// /// - Home directory expansion explicitly happens after environment variable /// expansion, meaning that if an environment variable is expanded into a /// string starting with a tilde (`~`), the tilde will be expanded into the /// user's home directory. pub fn expand_path(path: impl AsRef<str>) -> PathBuf { expand_path_impl(path.as_ref(), |k| env::var(k).ok(), dirs::home_dir) } /// Same as `expand_path` but explicitly takes closures for environment /// variable and home directory lookup for the sake of testability. fn expand_path_impl<E, H>(path: &str, getenv: E, homedir: H) -> PathBuf where E: FnMut(&str) -> Option<String>, H: FnOnce() -> Option<PathBuf>, { // The shellexpand crate does not expand Windows environment variables // like `%PROGRAMDATA%`. We'd like to expand them too. So let's do some // pre-processing. // // XXX: Doing this preprocessing has the unfortunate side-effect that // if an environment variable fails to expand on Windows, the resulting // string will contain a UNIX-style environment variable reference. // // e.g., "/foo/%MISSING%/bar" will expand to "/foo/${MISSING}/bar" // // The current approach is good enough for now, but likely needs to // be improved later for correctness. let path = { let mut new_path = String::new(); let mut is_starting = true; for ch in path.chars() { if ch == '%' { if is_starting { new_path.push_str("${"); } else { new_path.push('}'); } is_starting = !is_starting; } else if cfg!(windows) && ch == '/' { // Only on Windows, change "/" to "\" automatically. // This makes sure "%include /foo" works as expected. new_path.push('\\') } else { new_path.push(ch); } } new_path }; let path = shellexpand::env_with_context_no_errors(&path, getenv); shellexpand::tilde_with_context(&path, homedir) .as_ref() .into() } #[cfg(test)] mod tests { use super::*; use std::fs::File; use tempfile::TempDir; #[cfg(windows)] mod windows { use super::*; #[test] fn test_absolute_fullpath() { assert_eq!(absolute("C:/foo").unwrap(), Path::new("C:\\foo")); assert_eq!( absolute("x:\\a/b\\./.\\c").unwrap(), Path::new("x:\\a\\b\\c") ); assert_eq!( absolute("y:/a/b\\../..\\c\\../d\\./.").unwrap(), Path::new("y:\\d") ); assert_eq!( absolute("z:/a/b\\../..\\../..\\..").unwrap(), Path::new("z:\\") ); } } #[cfg(unix)] mod unix { use super::*; #[test] fn test_absolute_fullpath() { assert_eq!( absolute("/a/./b\\c/../d/.").unwrap(), Path::new("/a/d") ); assert_eq!(absolute("/a/../../../../b").unwrap(), Path::new("/b")); assert_eq!(absolute("/../../..").unwrap(), Path::new("/")); assert_eq!(absolute("/../../../").unwrap(), Path::new("/")); assert_eq!( absolute("//foo///bar//baz").unwrap(), Path::new("/foo/bar/baz") ); assert_eq!(absolute("//").unwrap(), Path::new("/")); } } #[test] fn test_create_dir_non_exist() -> Result<()> { let tempdir = TempDir::new()?; let mut path = tempdir.path().to_path_buf(); path.push("dir"); create_dir(&path)?; assert!(path.is_dir()); Ok(()) } #[test] fn test_create_dir_exist() -> Result<()> { let tempdir = TempDir::new()?; let mut path = tempdir.path().to_path_buf(); path.push("dir"); create_dir(&path)?; assert!(&path.is_dir()); create_dir(&path)?; assert!(&path.is_dir()); Ok(()) } #[test] fn test_create_dir_file_exist() -> Result<()> { let tempdir = TempDir::new()?; let mut path = tempdir.path().to_path_buf(); path.push("dir"); File::create(&path)?; let err = create_dir(&path).unwrap_err(); assert_eq!(err.kind(), ErrorKind::AlreadyExists); Ok(()) } #[test] fn test_path_expansion() { fn getenv(key: &str) -> Option<String> { match key { "foo" => Some("~/a".into()), "bar" => Some("b".into()), _ => None, } } fn homedir() -> Option<PathBuf> { Some(PathBuf::from("/home/user")) } let path = "$foo/${bar}/$baz"; let expected = PathBuf::from("/home/user/a/b/$baz"); assert_eq!(expand_path_impl(&path, getenv, homedir), expected); } }