annotate rust/hg-core/src/revlog/filelog.rs @ 49517:52464a20add0

rhg: parallellize computation of [unsure_is_modified] [unsure_is_modified] is called for every file for which we can't determine its status based on its size and mtime alone. In particular, this happens if the mtime of the file changes without its contents changing. Parallellizing this improves performance significantly when we have many of these files. Here's an example run (on a repo with ~400k files after dropping FS caches) ``` before: real 0m53.901s user 0m27.806s sys 0m31.325s after: real 0m32.017s user 0m34.277s sys 1m26.250s ``` Another example run (a different FS): ``` before: real 3m28.479s user 0m31.800s sys 0m25.324s after: real 0m29.751s user 0m41.814s sys 1m15.387s ```
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Wed, 05 Oct 2022 15:45:05 -0400
parents 3f86ee422095
children e98fd81bb151
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
1 use crate::errors::HgError;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
2 use crate::repo::Repo;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
3 use crate::revlog::path_encode::path_encode;
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
4 use crate::revlog::revlog::RevlogEntry;
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
5 use crate::revlog::revlog::{Revlog, RevlogError};
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
6 use crate::revlog::NodePrefix;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
7 use crate::revlog::Revision;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
8 use crate::utils::files::get_path_from_bytes;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
9 use crate::utils::hg_path::HgPath;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
10 use crate::utils::SliceExt;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
11 use std::path::PathBuf;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
12
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
13 /// A specialized `Revlog` to work with file data logs.
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
14 pub struct Filelog {
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
15 /// The generic `revlog` format.
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
16 revlog: Revlog,
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
17 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
18
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
19 impl Filelog {
49517
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
20 pub fn open_vfs(
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
21 store_vfs: &crate::vfs::Vfs<'_>,
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
22 file_path: &HgPath,
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
23 ) -> Result<Self, HgError> {
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
24 let index_path = store_path(file_path, b".i");
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
25 let data_path = store_path(file_path, b".d");
49517
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
26 let revlog =
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
27 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
28 Ok(Self { revlog })
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
29 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
30
49517
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
31 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
32 Self::open_vfs(&repo.store_vfs(), file_path)
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
33 }
52464a20add0 rhg: parallellize computation of [unsure_is_modified]
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49174
diff changeset
34
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
35 /// The given node ID is that of the file as found in a filelog, not of a
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
36 /// changeset.
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
37 pub fn data_for_node(
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
38 &self,
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
39 file_node: impl Into<NodePrefix>,
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
40 ) -> Result<FilelogRevisionData, RevlogError> {
47968
6f579618ea7b rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents: 47963
diff changeset
41 let file_rev = self.revlog.rev_from_node(file_node.into())?;
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
42 self.data_for_rev(file_rev)
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
43 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
44
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
45 /// The given revision is that of the file as found in a filelog, not of a
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
46 /// changeset.
47969
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47968
diff changeset
47 pub fn data_for_rev(
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
48 &self,
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
49 file_rev: Revision,
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
50 ) -> Result<FilelogRevisionData, RevlogError> {
48541
f2f57724d4eb rhg: Add RevlogEntry::data that does delta resolution
Simon Sapin <simon.sapin@octobus.net>
parents: 48540
diff changeset
51 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
52 Ok(FilelogRevisionData(data.into()))
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
53 }
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
54
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
55 /// The given node ID is that of the file as found in a filelog, not of a
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
56 /// changeset.
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
57 pub fn entry_for_node(
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
58 &self,
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
59 file_node: impl Into<NodePrefix>,
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
60 ) -> Result<FilelogEntry, RevlogError> {
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
61 let file_rev = self.revlog.rev_from_node(file_node.into())?;
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
62 self.entry_for_rev(file_rev)
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
63 }
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
64
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
65 /// The given revision is that of the file as found in a filelog, not of a
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
66 /// changeset.
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
67 pub fn entry_for_rev(
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
68 &self,
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
69 file_rev: Revision,
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
70 ) -> Result<FilelogEntry, RevlogError> {
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
71 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
72 }
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
73 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
74
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
75 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
76 let encoded_bytes =
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
77 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
78 get_path_from_bytes(&encoded_bytes).into()
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
79 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
80
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
81 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
82
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
83 impl FilelogEntry<'_> {
48546
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
84 /// `self.data()` can be expensive, with decompression and delta
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
85 /// resolution.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
86 ///
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
87 /// *Without* paying this cost, based on revlog index information
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
88 /// including `RevlogEntry::uncompressed_len`:
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
89 ///
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
90 /// * Returns `true` if the length that `self.data().file_data().len()`
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
91 /// would return is definitely **not equal** to `other_len`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
92 /// * Returns `false` if available information is inconclusive.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
93 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
94 // Relevant code that implement this behavior in Python code:
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
95 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
96 // revlog.size, revlog.rawsize
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
97
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
98 // Let’s call `file_data_len` what would be returned by
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
99 // `self.data().file_data().len()`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
100
49174
3f86ee422095 censor: make rhg fall back to python when encountering a censored node
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 49089
diff changeset
101 if self.0.is_censored() {
48546
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
102 let file_data_len = 0;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
103 return other_len != file_data_len;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
104 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
105
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
106 if self.0.has_length_affecting_flag_processor() {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
107 // We can’t conclude anything about `file_data_len`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
108 return false;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
109 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
110
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
111 // Revlog revisions (usually) have metadata for the size of
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
112 // their data after decompression and delta resolution
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
113 // as would be returned by `Revlog::get_rev_data`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
114 //
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
115 // For filelogs this is the file’s contents preceded by an optional
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
116 // metadata block.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
117 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
118 l as u64
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
119 } else {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
120 // The field was set to -1, the actual uncompressed len is unknown.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
121 // We need to decompress to say more.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
122 return false;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
123 };
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
124 // `uncompressed_len = file_data_len + optional_metadata_len`,
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
125 // so `file_data_len <= uncompressed_len`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
126 if uncompressed_len < other_len {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
127 // Transitively, `file_data_len < other_len`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
128 // So `other_len != file_data_len` definitely.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
129 return true;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
130 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
131
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
132 if uncompressed_len == other_len + 4 {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
133 // It’s possible that `file_data_len == other_len` with an empty
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
134 // metadata block (2 start marker bytes + 2 end marker bytes).
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
135 // This happens when there wouldn’t otherwise be metadata, but
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
136 // the first 2 bytes of file data happen to match a start marker
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
137 // and would be ambiguous.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
138 return false;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
139 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
140
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
141 if !self.0.has_p1() {
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
142 // There may or may not be copy metadata, so we can’t deduce more
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
143 // about `file_data_len` without computing file data.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
144 return false;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
145 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
146
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
147 // Filelog ancestry is not meaningful in the way changelog ancestry is.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
148 // It only provides hints to delta generation.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
149 // p1 and p2 are set to null when making a copy or rename since
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
150 // contents are likely unrelatedto what might have previously existed
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
151 // at the destination path.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
152 //
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
153 // Conversely, since here p1 is non-null, there is no copy metadata.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
154 // Note that this reasoning may be invalidated in the presence of
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
155 // merges made by some previous versions of Mercurial that
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
156 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
157 // and `tests/test-issue6528.t`.
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
158 //
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
159 // Since copy metadata is currently the only kind of metadata
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
160 // kept in revlog data of filelogs,
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
161 // this `FilelogEntry` does not have such metadata:
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
162 let file_data_len = uncompressed_len;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
163
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
164 return file_data_len != other_len;
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
165 }
e91aa800ae5b rhg: desambiguate status without decompressing filelog if possible
Simon Sapin <simon.sapin@octobus.net>
parents: 48542
diff changeset
166
48542
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
167 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
168 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
169 }
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
170 }
35c47015b9b7 rhg: Expose FilelogEntry that wraps RevlogEntry
Simon Sapin <simon.sapin@octobus.net>
parents: 48541
diff changeset
171
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
172 /// The data for one revision in a filelog, uncompressed and delta-resolved.
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
173 pub struct FilelogRevisionData(Vec<u8>);
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
174
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
175 impl FilelogRevisionData {
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
176 /// Split into metadata and data
48249
e9faae0f445c rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 48237
diff changeset
177 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
178 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
179
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
180 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
181 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
48249
e9faae0f445c rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 48237
diff changeset
182 Ok((Some(metadata), data))
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
183 } else {
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
184 Err(HgError::corrupted(
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
185 "Missing metadata end delimiter in filelog entry",
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
186 ))
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
187 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
188 } else {
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
189 Ok((None, &self.0))
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
190 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
191 }
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
192
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
193 /// Returns the file contents at this revision, stripped of any metadata
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
194 pub fn file_data(&self) -> Result<&[u8], HgError> {
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
195 let (_metadata, data) = self.split()?;
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
196 Ok(data)
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
197 }
48237
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
198
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
199 /// Consume the entry, and convert it into data, discarding any metadata,
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
200 /// if present.
48540
20d0d896183e rhg: Rename some revlog-related types and methods
Simon Sapin <simon.sapin@octobus.net>
parents: 48249
diff changeset
201 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
48249
e9faae0f445c rhg: simplify split_metadata
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 48237
diff changeset
202 if let (Some(_metadata), data) = self.split()? {
48237
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
203 Ok(data.to_owned())
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
204 } else {
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
205 Ok(self.0)
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
206 }
027ebad952ac rhg: internally, return a structured representation from hg cat
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents: 47969
diff changeset
207 }
47961
4d2a5ca060e3 rust: Add a Filelog struct that wraps Revlog
Simon Sapin <simon.sapin@octobus.net>
parents:
diff changeset
208 }