Mercurial > public > mercurial-scm > hg
annotate rust/hg-core/src/revlog/filelog.rs @ 48178:f12a19d03d2c
fix: reduce number of tool executions
By grouping together (path, ctx) pairs according to the inputs they would
provide to fixer tools, we can deduplicate executions of fixer tools to
significantly reduce the amount of time spent running slow tools.
This change does not handle clean files in the working copy, which could still
be deduplicated against the files in the checked out commit. It's a little
harder to do that because the filerev is not available in the workingfilectx
(and it doesn't exist for added files).
Anecdotally, this change makes some real uses cases at Google 10x faster. I
think we were originally hesitant to do this because the benefits weren't
obvious, and implementing it efficiently is kind of tricky. If we simply
memoized the formatter execution function, we would be keeping tons of file
content in memory.
Also included is a regression test for a corner case that I broke with my first
attempt at optimizing this code.
Differential Revision: https://phab.mercurial-scm.org/D11280
author | Danny Hooper <hooper@google.com> |
---|---|
date | Thu, 02 Sep 2021 14:08:45 -0700 |
parents | 87e3f878e65f |
children | 027ebad952ac |
rev | line source |
---|---|
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
1 use crate::errors::HgError; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
2 use crate::repo::Repo; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
3 use crate::revlog::path_encode::path_encode; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
4 use crate::revlog::revlog::{Revlog, RevlogError}; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
5 use crate::revlog::NodePrefix; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
6 use crate::revlog::Revision; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
7 use crate::utils::files::get_path_from_bytes; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
8 use crate::utils::hg_path::HgPath; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
9 use crate::utils::SliceExt; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
10 use std::borrow::Cow; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
11 use std::path::PathBuf; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
12 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
13 /// A specialized `Revlog` to work with file data logs. |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
14 pub struct Filelog { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
15 /// The generic `revlog` format. |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
16 revlog: Revlog, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
17 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
18 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
19 impl Filelog { |
47963
001d747c2baf
rust: Return HgError instead of RevlogError in revlog constructors
Simon Sapin <simon.sapin@octobus.net>
parents:
47961
diff
changeset
|
20 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
21 let index_path = store_path(file_path, b".i"); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
22 let data_path = store_path(file_path, b".d"); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
23 let revlog = Revlog::open(repo, index_path, Some(&data_path))?; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
24 Ok(Self { revlog }) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
25 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
26 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
27 /// The given node ID is that of the file as found in a manifest, not of a |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
28 /// changeset. |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
29 pub fn data_for_node( |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
30 &self, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
31 file_node: impl Into<NodePrefix>, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
32 ) -> Result<FilelogEntry, RevlogError> { |
47968
6f579618ea7b
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents:
47963
diff
changeset
|
33 let file_rev = self.revlog.rev_from_node(file_node.into())?; |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
34 self.data_for_rev(file_rev) |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
35 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
36 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
37 /// The given revision is that of the file as found in a manifest, not of a |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
38 /// changeset. |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
39 pub fn data_for_rev( |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
40 &self, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
41 file_rev: Revision, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
42 ) -> Result<FilelogEntry, RevlogError> { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
43 let data = self.revlog.get_rev_data(file_rev)?; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
44 Ok(FilelogEntry(data.into())) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
45 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
46 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
47 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
48 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
49 let encoded_bytes = |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
50 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
51 get_path_from_bytes(&encoded_bytes).into() |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
52 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
53 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
54 pub struct FilelogEntry<'filelog>(Cow<'filelog, [u8]>); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
55 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
56 impl<'filelog> FilelogEntry<'filelog> { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
57 /// Split into metadata and data |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
58 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
59 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
60 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
61 if let Some(rest) = self.0.drop_prefix(DELIMITER) { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
62 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
63 Ok((Some(metadata), data)) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
64 } else { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
65 Err(HgError::corrupted( |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
66 "Missing metadata end delimiter in filelog entry", |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
67 )) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
68 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
69 } else { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
70 Ok((None, &self.0)) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
71 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
72 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
73 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
74 /// Returns the file contents at this revision, stripped of any metadata |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
75 pub fn data(&self) -> Result<&[u8], HgError> { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
76 let (_metadata, data) = self.split()?; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
77 Ok(data) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
78 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
79 } |