Mercurial > public > mercurial-scm > hg
annotate rust/hg-core/src/operations/cat.rs @ 48224:6b5773f89183
rhg: faster hg cat when many files are requested
With this patch I'm seeing a ~39ms improvement (220ms -> 181ms) when
running [hg cat] on ~220 files in a ~260k-file repo.
The timing for [hg cat] on an individual file becomes slightly worse
(losing 5ms: 145ms -> 150ms).
A follow-up commit is intended to improve that.
Differential Revision: https://phab.mercurial-scm.org/D11615
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 04 Oct 2021 19:06:45 +0100 |
parents | 87e3f878e65f |
children | 0cc69017d47f |
rev | line source |
---|---|
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
1 // list_tracked_files.rs |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
2 // |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
4 // |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
5 // This software may be used and distributed according to the terms of the |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
6 // GNU General Public License version 2 or any later version. |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
7 |
46167
8a4914397d02
rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents:
46135
diff
changeset
|
8 use crate::repo::Repo; |
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
9 use crate::revlog::revlog::RevlogError; |
46033
88e741bf2d93
rust: use NodePrefix::from_hex instead of hex::decode directly
Simon Sapin <simon-commits@exyr.org>
parents:
46032
diff
changeset
|
10 use crate::revlog::Node; |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
47960
diff
changeset
|
11 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
47960
diff
changeset
|
12 use crate::utils::hg_path::HgPathBuf; |
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
13 |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
14 use itertools::EitherOrBoth::{Both, Left, Right}; |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
15 use itertools::Itertools; |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
16 |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
17 pub struct CatOutput { |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
18 /// Whether any file in the manifest matched the paths given as CLI |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
19 /// arguments |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
20 pub found_any: bool, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
21 /// The contents of matching files, in manifest order |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
22 pub concatenated: Vec<u8>, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
23 /// Which of the CLI arguments did not match any manifest file |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
24 pub missing: Vec<HgPathBuf>, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
25 /// The node ID that the given revset was resolved to |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
26 pub node: Node, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
27 } |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
28 |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
29 /// Output the given revision of files |
46135
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
30 /// |
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
31 /// * `root`: Repository root |
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
32 /// * `rev`: The revision to cat the files from. |
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
33 /// * `files`: The files to output. |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
34 pub fn cat<'a>( |
46167
8a4914397d02
rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents:
46135
diff
changeset
|
35 repo: &Repo, |
46433
4b381dbbf8b7
rhg: centralize parsing of `--rev` CLI arguments
Simon Sapin <simon.sapin@octobus.net>
parents:
46431
diff
changeset
|
36 revset: &str, |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
37 mut files: Vec<HgPathBuf>, |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
38 ) -> Result<CatOutput, RevlogError> { |
46433
4b381dbbf8b7
rhg: centralize parsing of `--rev` CLI arguments
Simon Sapin <simon.sapin@octobus.net>
parents:
46431
diff
changeset
|
39 let rev = crate::revset::resolve_single(revset, repo)?; |
47964
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47961
diff
changeset
|
40 let manifest = repo.manifest_for_rev(rev)?; |
47960
cfb6e6699b25
rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents:
47959
diff
changeset
|
41 let node = *repo |
cfb6e6699b25
rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents:
47959
diff
changeset
|
42 .changelog()? |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
43 .node_from_rev(rev) |
47960
cfb6e6699b25
rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents:
47959
diff
changeset
|
44 .expect("should succeed when repo.manifest did"); |
46135
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
45 let mut bytes = vec![]; |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
46 let mut found_any = false; |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
47 files.sort_unstable(); |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
48 |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
49 let mut missing = vec![]; |
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
50 |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
51 for entry in manifest |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
52 .files_with_nodes() |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
53 .merge_join_by(files.iter(), |(manifest_file, _), file| { |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
54 manifest_file.cmp(&file.as_ref()) |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
55 }) |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
56 { |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
57 match entry { |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
58 Left(_) => (), |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
59 Right(path) => missing.push(path), |
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
60 Both((manifest_file, node_bytes), _) => { |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
61 found_any = true; |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
47960
diff
changeset
|
62 let file_log = repo.filelog(manifest_file)?; |
46443
43d63979a75e
rust: use HgError in RevlogError and Vfs
Simon Sapin <simon.sapin@octobus.net>
parents:
46437
diff
changeset
|
63 let file_node = Node::from_hex_for_repo(node_bytes)?; |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47964
diff
changeset
|
64 let entry = file_log.data_for_node(file_node)?; |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
47960
diff
changeset
|
65 bytes.extend(entry.data()?) |
46135
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
66 } |
dca9cb99971c
rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents:
46134
diff
changeset
|
67 } |
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
68 } |
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
69 |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
70 let missing: Vec<HgPathBuf> = missing |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
71 .iter() |
48224
6b5773f89183
rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
72 .map(|file| (*(file.as_ref())).to_owned()) |
46744
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
73 .collect(); |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
74 Ok(CatOutput { |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
75 found_any, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
76 concatenated: bytes, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
77 missing, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
78 node, |
b1f2c2b336ec
rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents:
46443
diff
changeset
|
79 }) |
45541
522ec3dc44b9
hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
80 } |