annotate rust/hg-core/src/operations/cat.rs @ 48224:6b5773f89183

rhg: faster hg cat when many files are requested With this patch I'm seeing a ~39ms improvement (220ms -> 181ms) when running [hg cat] on ~220 files in a ~260k-file repo. The timing for [hg cat] on an individual file becomes slightly worse (losing 5ms: 145ms -> 150ms). A follow-up commit is intended to improve that. Differential Revision: https://phab.mercurial-scm.org/D11615
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 04 Oct 2021 19:06:45 +0100
parents 87e3f878e65f
children 0cc69017d47f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
1 // list_tracked_files.rs
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
2 //
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
4 //
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
5 // This software may be used and distributed according to the terms of the
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
6 // GNU General Public License version 2 or any later version.
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
7
46167
8a4914397d02 rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents: 46135
diff changeset
8 use crate::repo::Repo;
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
9 use crate::revlog::revlog::RevlogError;
46033
88e741bf2d93 rust: use NodePrefix::from_hex instead of hex::decode directly
Simon Sapin <simon-commits@exyr.org>
parents: 46032
diff changeset
10 use crate::revlog::Node;
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents: 47960
diff changeset
11
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents: 47960
diff changeset
12 use crate::utils::hg_path::HgPathBuf;
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
13
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
14 use itertools::EitherOrBoth::{Both, Left, Right};
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
15 use itertools::Itertools;
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
16
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
17 pub struct CatOutput {
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
18 /// Whether any file in the manifest matched the paths given as CLI
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
19 /// arguments
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
20 pub found_any: bool,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
21 /// The contents of matching files, in manifest order
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
22 pub concatenated: Vec<u8>,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
23 /// Which of the CLI arguments did not match any manifest file
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
24 pub missing: Vec<HgPathBuf>,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
25 /// The node ID that the given revset was resolved to
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
26 pub node: Node,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
27 }
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
28
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
29 /// Output the given revision of files
46135
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
30 ///
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
31 /// * `root`: Repository root
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
32 /// * `rev`: The revision to cat the files from.
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
33 /// * `files`: The files to output.
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
34 pub fn cat<'a>(
46167
8a4914397d02 rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents: 46135
diff changeset
35 repo: &Repo,
46433
4b381dbbf8b7 rhg: centralize parsing of `--rev` CLI arguments
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
36 revset: &str,
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
37 mut files: Vec<HgPathBuf>,
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
38 ) -> Result<CatOutput, RevlogError> {
46433
4b381dbbf8b7 rhg: centralize parsing of `--rev` CLI arguments
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
39 let rev = crate::revset::resolve_single(revset, repo)?;
47964
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47961
diff changeset
40 let manifest = repo.manifest_for_rev(rev)?;
47960
cfb6e6699b25 rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents: 47959
diff changeset
41 let node = *repo
cfb6e6699b25 rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents: 47959
diff changeset
42 .changelog()?
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
43 .node_from_rev(rev)
47960
cfb6e6699b25 rust: Add Repo::manifest(revision)
Simon Sapin <simon.sapin@octobus.net>
parents: 47959
diff changeset
44 .expect("should succeed when repo.manifest did");
46135
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
45 let mut bytes = vec![];
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
46 let mut found_any = false;
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
47 files.sort_unstable();
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
48
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
49 let mut missing = vec![];
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
50
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
51 for entry in manifest
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
52 .files_with_nodes()
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
53 .merge_join_by(files.iter(), |(manifest_file, _), file| {
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
54 manifest_file.cmp(&file.as_ref())
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
55 })
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
56 {
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
57 match entry {
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
58 Left(_) => (),
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
59 Right(path) => missing.push(path),
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
60 Both((manifest_file, node_bytes), _) => {
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
61 found_any = true;
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents: 47960
diff changeset
62 let file_log = repo.filelog(manifest_file)?;
46443
43d63979a75e rust: use HgError in RevlogError and Vfs
Simon Sapin <simon.sapin@octobus.net>
parents: 46437
diff changeset
63 let file_node = Node::from_hex_for_repo(node_bytes)?;
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47964
diff changeset
64 let entry = file_log.data_for_node(file_node)?;
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents: 47960
diff changeset
65 bytes.extend(entry.data()?)
46135
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
66 }
dca9cb99971c rust: replace most "operation" structs with functions
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
67 }
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
68 }
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
69
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
70 let missing: Vec<HgPathBuf> = missing
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
71 .iter()
48224
6b5773f89183 rhg: faster hg cat when many files are requested
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
72 .map(|file| (*(file.as_ref())).to_owned())
46744
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
73 .collect();
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
74 Ok(CatOutput {
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
75 found_any,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
76 concatenated: bytes,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
77 missing,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
78 node,
b1f2c2b336ec rhg: `cat` command: print error messages for missing files
Simon Sapin <simon.sapin@octobus.net>
parents: 46443
diff changeset
79 })
45541
522ec3dc44b9 hg-core: add a `CatRev` operation
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
80 }