annotate rust/hg-core/src/revlog/manifest.rs @ 48178:f12a19d03d2c

fix: reduce number of tool executions By grouping together (path, ctx) pairs according to the inputs they would provide to fixer tools, we can deduplicate executions of fixer tools to significantly reduce the amount of time spent running slow tools. This change does not handle clean files in the working copy, which could still be deduplicated against the files in the checked out commit. It's a little harder to do that because the filerev is not available in the workingfilectx (and it doesn't exist for added files). Anecdotally, this change makes some real uses cases at Google 10x faster. I think we were originally hesitant to do this because the benefits weren't obvious, and implementing it efficiently is kind of tricky. If we simply memoized the formatter execution function, we would be keeping tons of file content in memory. Also included is a regression test for a corner case that I broke with my first attempt at optimizing this code. Differential Revision: https://phab.mercurial-scm.org/D11280
author Danny Hooper <hooper@google.com>
date Thu, 02 Sep 2021 14:08:45 -0700
parents 87e3f878e65f
children 10c32e1b892a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
47963
001d747c2baf rust: Return HgError instead of RevlogError in revlog constructors
Simon Sapin <simon.sapin@octobus.net>
parents: 47957
diff changeset
1 use crate::errors::HgError;
46167
8a4914397d02 rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
2 use crate::repo::Repo;
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
3 use crate::revlog::revlog::{Revlog, RevlogError};
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
4 use crate::revlog::Revision;
47964
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
5 use crate::revlog::{Node, NodePrefix};
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
6 use crate::utils::hg_path::HgPath;
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
7
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
8 /// A specialized `Revlog` to work with `manifest` data format.
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
9 pub struct Manifestlog {
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
10 /// The generic `revlog` format.
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
11 revlog: Revlog,
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
12 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
13
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
14 impl Manifestlog {
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
15 /// Open the `manifest` of a repository given by its root.
47963
001d747c2baf rust: Return HgError instead of RevlogError in revlog constructors
Simon Sapin <simon.sapin@octobus.net>
parents: 47957
diff changeset
16 pub fn open(repo: &Repo) -> Result<Self, HgError> {
46167
8a4914397d02 rust: introduce Repo and Vfs types for filesystem abstraction
Simon Sapin <simon.sapin@octobus.net>
parents: 46134
diff changeset
17 let revlog = Revlog::open(repo, "00manifest.i", None)?;
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
18 Ok(Self { revlog })
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
19 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
20
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
21 /// Return the `Manifest` for the given node ID.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
22 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
23 /// Note: this is a node ID in the manifestlog, typically found through
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
24 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
25 /// changeset.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
26 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
27 /// See also `Repo::manifest_for_node`
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
28 pub fn data_for_node(
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
29 &self,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
30 node: NodePrefix,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
31 ) -> Result<Manifest, RevlogError> {
47968
6f579618ea7b rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents: 47964
diff changeset
32 let rev = self.revlog.rev_from_node(node)?;
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
33 self.data_for_rev(rev)
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
34 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
35
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
36 /// Return the `Manifest` of a given revision number.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
37 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
38 /// Note: this is a revision number in the manifestlog, *not* of any
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
39 /// changeset.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
40 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
41 /// See also `Repo::manifest_for_rev`
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
42 pub fn data_for_rev(
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
43 &self,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
44 rev: Revision,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
45 ) -> Result<Manifest, RevlogError> {
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
46 let bytes = self.revlog.get_rev_data(rev)?;
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
47 Ok(Manifest { bytes })
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
48 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
49 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
50
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
51 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
52 #[derive(Debug)]
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
53 pub struct Manifest {
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
54 bytes: Vec<u8>,
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
55 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
56
47957
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46431
diff changeset
57 impl Manifest {
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
58 /// Return an iterator over the lines of the entry.
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
59 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
60 self.bytes
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
61 .split(|b| b == &b'\n')
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
62 .filter(|line| !line.is_empty())
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
63 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
64
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
65 /// Return an iterator over the files of the entry.
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
66 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
67 self.lines().filter(|line| !line.is_empty()).map(|line| {
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
68 let pos = line
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
69 .iter()
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
70 .position(|x| x == &b'\0')
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
71 .expect("manifest line should contain \\0");
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
72 HgPath::new(&line[..pos])
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
73 })
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
74 }
45540
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
75
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
76 /// Return an iterator over the files of the entry.
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
77 pub fn files_with_nodes(&self) -> impl Iterator<Item = (&HgPath, &[u8])> {
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
78 self.lines().filter(|line| !line.is_empty()).map(|line| {
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
79 let pos = line
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
80 .iter()
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
81 .position(|x| x == &b'\0')
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
82 .expect("manifest line should contain \\0");
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
83 let hash_start = pos + 1;
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
84 let hash_end = hash_start + 40;
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
85 (HgPath::new(&line[..pos]), &line[hash_start..hash_end])
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
86 })
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45533
diff changeset
87 }
47964
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
88
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
89 /// If the given path is in this manifest, return its filelog node ID
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
90 pub fn find_file(&self, path: &HgPath) -> Result<Option<Node>, HgError> {
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
91 // TODO: use binary search instead of linear scan. This may involve
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
92 // building (and caching) an index of the byte indicex of each manifest
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
93 // line.
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
94 for (manifest_path, node) in self.files_with_nodes() {
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
95 if manifest_path == path {
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
96 return Ok(Some(Node::from_hex_for_repo(node)?));
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
97 }
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
98 }
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
99 Ok(None)
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
100 }
45533
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
101 }