comparison rust/hg-core/src/revlog/filelog.rs @ 48575:e91aa800ae5b

rhg: desambiguate status without decompressing filelog if possible When status is unsure based on `stat()` and the dirstate if a file is clean or modified, we need to compare it against the filelog. This comparison can skip looking at contents if the lengths differ. This changeset optimize this further to deduce what we can about the length if the filelog without decompressing it or resolving deltas. Differential Revision: https://phab.mercurial-scm.org/D11965
author Simon Sapin <simon.sapin@octobus.net>
date Fri, 07 Jan 2022 14:40:21 +0100
parents 35c47015b9b7
children bfc117647c71
comparison
equal deleted inserted replaced
48574:5026a0d37526 48575:e91aa800ae5b
71 } 71 }
72 72
73 pub struct FilelogEntry<'a>(RevlogEntry<'a>); 73 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
74 74
75 impl FilelogEntry<'_> { 75 impl FilelogEntry<'_> {
76 /// `self.data()` can be expensive, with decompression and delta
77 /// resolution.
78 ///
79 /// *Without* paying this cost, based on revlog index information
80 /// including `RevlogEntry::uncompressed_len`:
81 ///
82 /// * Returns `true` if the length that `self.data().file_data().len()`
83 /// would return is definitely **not equal** to `other_len`.
84 /// * Returns `false` if available information is inconclusive.
85 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
86 // Relevant code that implement this behavior in Python code:
87 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
88 // revlog.size, revlog.rawsize
89
90 // Let’s call `file_data_len` what would be returned by
91 // `self.data().file_data().len()`.
92
93 if self.0.is_cencored() {
94 let file_data_len = 0;
95 return other_len != file_data_len;
96 }
97
98 if self.0.has_length_affecting_flag_processor() {
99 // We can’t conclude anything about `file_data_len`.
100 return false;
101 }
102
103 // Revlog revisions (usually) have metadata for the size of
104 // their data after decompression and delta resolution
105 // as would be returned by `Revlog::get_rev_data`.
106 //
107 // For filelogs this is the file’s contents preceded by an optional
108 // metadata block.
109 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
110 l as u64
111 } else {
112 // The field was set to -1, the actual uncompressed len is unknown.
113 // We need to decompress to say more.
114 return false;
115 };
116 // `uncompressed_len = file_data_len + optional_metadata_len`,
117 // so `file_data_len <= uncompressed_len`.
118 if uncompressed_len < other_len {
119 // Transitively, `file_data_len < other_len`.
120 // So `other_len != file_data_len` definitely.
121 return true;
122 }
123
124 if uncompressed_len == other_len + 4 {
125 // It’s possible that `file_data_len == other_len` with an empty
126 // metadata block (2 start marker bytes + 2 end marker bytes).
127 // This happens when there wouldn’t otherwise be metadata, but
128 // the first 2 bytes of file data happen to match a start marker
129 // and would be ambiguous.
130 return false;
131 }
132
133 if !self.0.has_p1() {
134 // There may or may not be copy metadata, so we can’t deduce more
135 // about `file_data_len` without computing file data.
136 return false;
137 }
138
139 // Filelog ancestry is not meaningful in the way changelog ancestry is.
140 // It only provides hints to delta generation.
141 // p1 and p2 are set to null when making a copy or rename since
142 // contents are likely unrelatedto what might have previously existed
143 // at the destination path.
144 //
145 // Conversely, since here p1 is non-null, there is no copy metadata.
146 // Note that this reasoning may be invalidated in the presence of
147 // merges made by some previous versions of Mercurial that
148 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
149 // and `tests/test-issue6528.t`.
150 //
151 // Since copy metadata is currently the only kind of metadata
152 // kept in revlog data of filelogs,
153 // this `FilelogEntry` does not have such metadata:
154 let file_data_len = uncompressed_len;
155
156 return file_data_len != other_len;
157 }
158
76 pub fn data(&self) -> Result<FilelogRevisionData, HgError> { 159 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
77 Ok(FilelogRevisionData(self.0.data()?.into_owned())) 160 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
78 } 161 }
79 } 162 }
80 163