Mercurial > public > mercurial-scm > hg
annotate rust/hg-core/src/revlog/filelog.rs @ 49517:52464a20add0
rhg: parallellize computation of [unsure_is_modified]
[unsure_is_modified] is called for every file for which we can't
determine its status based on its size and mtime alone.
In particular, this happens if the mtime of the file changes
without its contents changing.
Parallellizing this improves performance significantly when
we have many of these files.
Here's an example run (on a repo with ~400k files after dropping FS caches)
```
before:
real 0m53.901s
user 0m27.806s
sys 0m31.325s
after:
real 0m32.017s
user 0m34.277s
sys 1m26.250s
```
Another example run (a different FS):
```
before:
real 3m28.479s
user 0m31.800s
sys 0m25.324s
after:
real 0m29.751s
user 0m41.814s
sys 1m15.387s
```
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Wed, 05 Oct 2022 15:45:05 -0400 |
parents | 3f86ee422095 |
children | e98fd81bb151 |
rev | line source |
---|---|
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
1 use crate::errors::HgError; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
2 use crate::repo::Repo; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
3 use crate::revlog::path_encode::path_encode; |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
4 use crate::revlog::revlog::RevlogEntry; |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
5 use crate::revlog::revlog::{Revlog, RevlogError}; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
6 use crate::revlog::NodePrefix; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
7 use crate::revlog::Revision; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
8 use crate::utils::files::get_path_from_bytes; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
9 use crate::utils::hg_path::HgPath; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
10 use crate::utils::SliceExt; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
11 use std::path::PathBuf; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
12 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
13 /// A specialized `Revlog` to work with file data logs. |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
14 pub struct Filelog { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
15 /// The generic `revlog` format. |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
16 revlog: Revlog, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
17 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
18 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
19 impl Filelog { |
49517
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
20 pub fn open_vfs( |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
21 store_vfs: &crate::vfs::Vfs<'_>, |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
22 file_path: &HgPath, |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
23 ) -> Result<Self, HgError> { |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
24 let index_path = store_path(file_path, b".i"); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
25 let data_path = store_path(file_path, b".d"); |
49517
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
26 let revlog = |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
27 Revlog::open(store_vfs, index_path, Some(&data_path), false)?; |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
28 Ok(Self { revlog }) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
29 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
30 |
49517
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
31 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
32 Self::open_vfs(&repo.store_vfs(), file_path) |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
33 } |
52464a20add0
rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49174
diff
changeset
|
34 |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
35 /// The given node ID is that of the file as found in a filelog, not of a |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
36 /// changeset. |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
37 pub fn data_for_node( |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
38 &self, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
39 file_node: impl Into<NodePrefix>, |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
40 ) -> Result<FilelogRevisionData, RevlogError> { |
47968
6f579618ea7b
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents:
47963
diff
changeset
|
41 let file_rev = self.revlog.rev_from_node(file_node.into())?; |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
42 self.data_for_rev(file_rev) |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
43 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
44 |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
45 /// The given revision is that of the file as found in a filelog, not of a |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
46 /// changeset. |
47969
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47968
diff
changeset
|
47 pub fn data_for_rev( |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
48 &self, |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
49 file_rev: Revision, |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
50 ) -> Result<FilelogRevisionData, RevlogError> { |
48541
f2f57724d4eb
rhg: Add RevlogEntry::data that does delta resolution
Simon Sapin <simon.sapin@octobus.net>
parents:
48540
diff
changeset
|
51 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
52 Ok(FilelogRevisionData(data.into())) |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
53 } |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
54 |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
55 /// The given node ID is that of the file as found in a filelog, not of a |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
56 /// changeset. |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
57 pub fn entry_for_node( |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
58 &self, |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
59 file_node: impl Into<NodePrefix>, |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
60 ) -> Result<FilelogEntry, RevlogError> { |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
61 let file_rev = self.revlog.rev_from_node(file_node.into())?; |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
62 self.entry_for_rev(file_rev) |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
63 } |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
64 |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
65 /// The given revision is that of the file as found in a filelog, not of a |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
66 /// changeset. |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
67 pub fn entry_for_rev( |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
68 &self, |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
69 file_rev: Revision, |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
70 ) -> Result<FilelogEntry, RevlogError> { |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
71 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
72 } |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
73 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
74 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
75 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
76 let encoded_bytes = |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
77 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
78 get_path_from_bytes(&encoded_bytes).into() |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
79 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
80 |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
81 pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
82 |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
83 impl FilelogEntry<'_> { |
48546
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
84 /// `self.data()` can be expensive, with decompression and delta |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
85 /// resolution. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
86 /// |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
87 /// *Without* paying this cost, based on revlog index information |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
88 /// including `RevlogEntry::uncompressed_len`: |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
89 /// |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
90 /// * Returns `true` if the length that `self.data().file_data().len()` |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
91 /// would return is definitely **not equal** to `other_len`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
92 /// * Returns `false` if available information is inconclusive. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
93 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
94 // Relevant code that implement this behavior in Python code: |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
95 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
96 // revlog.size, revlog.rawsize |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
97 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
98 // Let’s call `file_data_len` what would be returned by |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
99 // `self.data().file_data().len()`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
100 |
49174
3f86ee422095
censor: make rhg fall back to python when encountering a censored node
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
49089
diff
changeset
|
101 if self.0.is_censored() { |
48546
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
102 let file_data_len = 0; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
103 return other_len != file_data_len; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
104 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
105 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
106 if self.0.has_length_affecting_flag_processor() { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
107 // We can’t conclude anything about `file_data_len`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
108 return false; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
109 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
110 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
111 // Revlog revisions (usually) have metadata for the size of |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
112 // their data after decompression and delta resolution |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
113 // as would be returned by `Revlog::get_rev_data`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
114 // |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
115 // For filelogs this is the file’s contents preceded by an optional |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
116 // metadata block. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
117 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
118 l as u64 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
119 } else { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
120 // The field was set to -1, the actual uncompressed len is unknown. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
121 // We need to decompress to say more. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
122 return false; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
123 }; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
124 // `uncompressed_len = file_data_len + optional_metadata_len`, |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
125 // so `file_data_len <= uncompressed_len`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
126 if uncompressed_len < other_len { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
127 // Transitively, `file_data_len < other_len`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
128 // So `other_len != file_data_len` definitely. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
129 return true; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
130 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
131 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
132 if uncompressed_len == other_len + 4 { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
133 // It’s possible that `file_data_len == other_len` with an empty |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
134 // metadata block (2 start marker bytes + 2 end marker bytes). |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
135 // This happens when there wouldn’t otherwise be metadata, but |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
136 // the first 2 bytes of file data happen to match a start marker |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
137 // and would be ambiguous. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
138 return false; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
139 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
140 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
141 if !self.0.has_p1() { |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
142 // There may or may not be copy metadata, so we can’t deduce more |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
143 // about `file_data_len` without computing file data. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
144 return false; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
145 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
146 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
147 // Filelog ancestry is not meaningful in the way changelog ancestry is. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
148 // It only provides hints to delta generation. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
149 // p1 and p2 are set to null when making a copy or rename since |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
150 // contents are likely unrelatedto what might have previously existed |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
151 // at the destination path. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
152 // |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
153 // Conversely, since here p1 is non-null, there is no copy metadata. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
154 // Note that this reasoning may be invalidated in the presence of |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
155 // merges made by some previous versions of Mercurial that |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
156 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
157 // and `tests/test-issue6528.t`. |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
158 // |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
159 // Since copy metadata is currently the only kind of metadata |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
160 // kept in revlog data of filelogs, |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
161 // this `FilelogEntry` does not have such metadata: |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
162 let file_data_len = uncompressed_len; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
163 |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
164 return file_data_len != other_len; |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
165 } |
e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents:
48542
diff
changeset
|
166 |
48542
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
167 pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
168 Ok(FilelogRevisionData(self.0.data()?.into_owned())) |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
169 } |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
170 } |
35c47015b9b7
rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents:
48541
diff
changeset
|
171 |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
172 /// The data for one revision in a filelog, uncompressed and delta-resolved. |
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
173 pub struct FilelogRevisionData(Vec<u8>); |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
174 |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
175 impl FilelogRevisionData { |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
176 /// Split into metadata and data |
48249
e9faae0f445c
rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
48237
diff
changeset
|
177 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
178 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
179 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
180 if let Some(rest) = self.0.drop_prefix(DELIMITER) { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
181 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { |
48249
e9faae0f445c
rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
48237
diff
changeset
|
182 Ok((Some(metadata), data)) |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
183 } else { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
184 Err(HgError::corrupted( |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
185 "Missing metadata end delimiter in filelog entry", |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
186 )) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
187 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
188 } else { |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
189 Ok((None, &self.0)) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
190 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
191 } |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
192 |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
193 /// Returns the file contents at this revision, stripped of any metadata |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
194 pub fn file_data(&self) -> Result<&[u8], HgError> { |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
195 let (_metadata, data) = self.split()?; |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
196 Ok(data) |
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
197 } |
48237
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
198 |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
199 /// Consume the entry, and convert it into data, discarding any metadata, |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
200 /// if present. |
48540
20d0d896183e
rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents:
48249
diff
changeset
|
201 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { |
48249
e9faae0f445c
rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
48237
diff
changeset
|
202 if let (Some(_metadata), data) = self.split()? { |
48237
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
203 Ok(data.to_owned()) |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
204 } else { |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
205 Ok(self.0) |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
206 } |
027ebad952ac
rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
47969
diff
changeset
|
207 } |
47961
4d2a5ca060e3
rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff
changeset
|
208 } |