comparison rust/hg-core/src/revlog/revlog.rs @ 45531:b0d6309ff50c

hg-core: check data integrity in `Revlog` Check that the hash of the data reconstructed from deltas matches the hash stored in the revision. Differential Revision: https://phab.mercurial-scm.org/D9005
author Antoine Cezar <antoine.cezar@octobus.net>
date Wed, 02 Sep 2020 15:23:25 +0200
parents 26c53ee51c68
children 4f11a67a12fb
comparison
equal deleted inserted replaced
45530:b1cea0dc9db0 45531:b0d6309ff50c
3 use std::io::Read; 3 use std::io::Read;
4 use std::ops::Deref; 4 use std::ops::Deref;
5 use std::path::Path; 5 use std::path::Path;
6 6
7 use byteorder::{BigEndian, ByteOrder}; 7 use byteorder::{BigEndian, ByteOrder};
8 use crypto::digest::Digest;
9 use crypto::sha1::Sha1;
8 use flate2::read::ZlibDecoder; 10 use flate2::read::ZlibDecoder;
9 use memmap::{Mmap, MmapOptions}; 11 use memmap::{Mmap, MmapOptions};
10 use micro_timer::timed; 12 use micro_timer::timed;
11 use zstd; 13 use zstd;
12 14
13 use super::index::Index; 15 use super::index::Index;
16 use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
14 use super::patch; 17 use super::patch;
15 use crate::revlog::Revision; 18 use crate::revlog::Revision;
16 19
17 pub enum RevlogError { 20 pub enum RevlogError {
18 IoError(std::io::Error), 21 IoError(std::io::Error),
91 entry = self 94 entry = self
92 .get_entry(base_rev) 95 .get_entry(base_rev)
93 .map_err(|_| RevlogError::Corrupted)?; 96 .map_err(|_| RevlogError::Corrupted)?;
94 } 97 }
95 98
96 if delta_chain.is_empty() { 99 // TODO do not look twice in the index
97 Ok(entry.data()?.into()) 100 let index = self.index();
101 let index_entry =
102 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
103
104 let data: Vec<u8> = if delta_chain.is_empty() {
105 entry.data()?.into()
98 } else { 106 } else {
99 Revlog::build_data_from_deltas(entry, &delta_chain) 107 Revlog::build_data_from_deltas(entry, &delta_chain)?
100 } 108 };
109
110 if self.check_hash(
111 index_entry.p1(),
112 index_entry.p2(),
113 index_entry.hash(),
114 &data,
115 ) {
116 Ok(data)
117 } else {
118 Err(RevlogError::Corrupted)
119 }
120 }
121
122 /// Check the hash of some given data against the recorded hash.
123 pub fn check_hash(
124 &self,
125 p1: Revision,
126 p2: Revision,
127 expected: &[u8],
128 data: &[u8],
129 ) -> bool {
130 let index = self.index();
131 let e1 = index.get_entry(p1);
132 let h1 = match e1 {
133 Some(ref entry) => entry.hash(),
134 None => &NULL_NODE_ID,
135 };
136 let e2 = index.get_entry(p2);
137 let h2 = match e2 {
138 Some(ref entry) => entry.hash(),
139 None => &NULL_NODE_ID,
140 };
141
142 hash(data, &h1, &h2).as_slice() == expected
101 } 143 }
102 144
103 /// Build the full data of a revision out its snapshot 145 /// Build the full data of a revision out its snapshot
104 /// and its deltas. 146 /// and its deltas.
105 #[timed] 147 #[timed]
232 /// Format version of the revlog. 274 /// Format version of the revlog.
233 pub fn get_version(index_bytes: &[u8]) -> u16 { 275 pub fn get_version(index_bytes: &[u8]) -> u16 {
234 BigEndian::read_u16(&index_bytes[2..=3]) 276 BigEndian::read_u16(&index_bytes[2..=3])
235 } 277 }
236 278
279 /// Calculate the hash of a revision given its data and its parents.
280 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
281 let mut hasher = Sha1::new();
282 let (a, b) = (p1_hash, p2_hash);
283 if a > b {
284 hasher.input(b);
285 hasher.input(a);
286 } else {
287 hasher.input(a);
288 hasher.input(b);
289 }
290 hasher.input(data);
291 let mut hash = vec![0; NODE_BYTES_LENGTH];
292 hasher.result(&mut hash);
293 hash
294 }
295
237 #[cfg(test)] 296 #[cfg(test)]
238 mod tests { 297 mod tests {
239 use super::*; 298 use super::*;
240 299
241 use super::super::index::IndexEntryBuilder; 300 use super::super::index::IndexEntryBuilder;