Mercurial > public > mercurial-scm > hg-stable
comparison rust/hg-core/src/revlog/filelog.rs @ 48575:e91aa800ae5b
rhg: desambiguate status without decompressing filelog if possible
When status is unsure based on `stat()` and the dirstate if a file is clean
or modified, we need to compare it against the filelog.
This comparison can skip looking at contents if the lengths differ.
This changeset optimize this further to deduce what we can about the length
if the filelog without decompressing it or resolving deltas.
Differential Revision: https://phab.mercurial-scm.org/D11965
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Fri, 07 Jan 2022 14:40:21 +0100 |
parents | 35c47015b9b7 |
children | bfc117647c71 |
comparison
equal
deleted
inserted
replaced
48574:5026a0d37526 | 48575:e91aa800ae5b |
---|---|
71 } | 71 } |
72 | 72 |
73 pub struct FilelogEntry<'a>(RevlogEntry<'a>); | 73 pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
74 | 74 |
75 impl FilelogEntry<'_> { | 75 impl FilelogEntry<'_> { |
76 /// `self.data()` can be expensive, with decompression and delta | |
77 /// resolution. | |
78 /// | |
79 /// *Without* paying this cost, based on revlog index information | |
80 /// including `RevlogEntry::uncompressed_len`: | |
81 /// | |
82 /// * Returns `true` if the length that `self.data().file_data().len()` | |
83 /// would return is definitely **not equal** to `other_len`. | |
84 /// * Returns `false` if available information is inconclusive. | |
85 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | |
86 // Relevant code that implement this behavior in Python code: | |
87 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | |
88 // revlog.size, revlog.rawsize | |
89 | |
90 // Let’s call `file_data_len` what would be returned by | |
91 // `self.data().file_data().len()`. | |
92 | |
93 if self.0.is_cencored() { | |
94 let file_data_len = 0; | |
95 return other_len != file_data_len; | |
96 } | |
97 | |
98 if self.0.has_length_affecting_flag_processor() { | |
99 // We can’t conclude anything about `file_data_len`. | |
100 return false; | |
101 } | |
102 | |
103 // Revlog revisions (usually) have metadata for the size of | |
104 // their data after decompression and delta resolution | |
105 // as would be returned by `Revlog::get_rev_data`. | |
106 // | |
107 // For filelogs this is the file’s contents preceded by an optional | |
108 // metadata block. | |
109 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | |
110 l as u64 | |
111 } else { | |
112 // The field was set to -1, the actual uncompressed len is unknown. | |
113 // We need to decompress to say more. | |
114 return false; | |
115 }; | |
116 // `uncompressed_len = file_data_len + optional_metadata_len`, | |
117 // so `file_data_len <= uncompressed_len`. | |
118 if uncompressed_len < other_len { | |
119 // Transitively, `file_data_len < other_len`. | |
120 // So `other_len != file_data_len` definitely. | |
121 return true; | |
122 } | |
123 | |
124 if uncompressed_len == other_len + 4 { | |
125 // It’s possible that `file_data_len == other_len` with an empty | |
126 // metadata block (2 start marker bytes + 2 end marker bytes). | |
127 // This happens when there wouldn’t otherwise be metadata, but | |
128 // the first 2 bytes of file data happen to match a start marker | |
129 // and would be ambiguous. | |
130 return false; | |
131 } | |
132 | |
133 if !self.0.has_p1() { | |
134 // There may or may not be copy metadata, so we can’t deduce more | |
135 // about `file_data_len` without computing file data. | |
136 return false; | |
137 } | |
138 | |
139 // Filelog ancestry is not meaningful in the way changelog ancestry is. | |
140 // It only provides hints to delta generation. | |
141 // p1 and p2 are set to null when making a copy or rename since | |
142 // contents are likely unrelatedto what might have previously existed | |
143 // at the destination path. | |
144 // | |
145 // Conversely, since here p1 is non-null, there is no copy metadata. | |
146 // Note that this reasoning may be invalidated in the presence of | |
147 // merges made by some previous versions of Mercurial that | |
148 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | |
149 // and `tests/test-issue6528.t`. | |
150 // | |
151 // Since copy metadata is currently the only kind of metadata | |
152 // kept in revlog data of filelogs, | |
153 // this `FilelogEntry` does not have such metadata: | |
154 let file_data_len = uncompressed_len; | |
155 | |
156 return file_data_len != other_len; | |
157 } | |
158 | |
76 pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | 159 pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
77 Ok(FilelogRevisionData(self.0.data()?.into_owned())) | 160 Ok(FilelogRevisionData(self.0.data()?.into_owned())) |
78 } | 161 } |
79 } | 162 } |
80 | 163 |