comparison rust/hg-core/src/repo.rs @ 50245:dbe09fb038fc stable

rhg: remember the inode of .hg/dirstate This allows us to detect changes of `.hg/dirstate`, which is either the full dirstate (in dirstate-v1) or the docket file (v2) without relying on data inside the file. It only works on UNIX systems. This fixes a race condition for dirstate-v1 (as demonstrated by the test changes) and adds a confortable layer of sanity for dirstate-v2.
author Rapha?l Gom?s <rgomes@octobus.net>
date Wed, 01 Mar 2023 16:48:09 +0100
parents 07d030b38097
children a6b8b1ab9116
comparison
equal deleted inserted replaced
50244:07d030b38097 50245:dbe09fb038fc
257 .read("dirstate") 257 .read("dirstate")
258 .io_not_found_as_none()? 258 .io_not_found_as_none()?
259 .unwrap_or(Vec::new())) 259 .unwrap_or(Vec::new()))
260 } 260 }
261 261
262 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
263 use std::os::unix::fs::MetadataExt;
264 Ok(self
265 .hg_vfs()
266 .symlink_metadata("dirstate")
267 .io_not_found_as_none()?
268 .map(|meta| meta.ino()))
269 }
270
262 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> { 271 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
263 Ok(*self 272 Ok(*self
264 .dirstate_parents 273 .dirstate_parents
265 .get_or_init(|| self.read_dirstate_parents())?) 274 .get_or_init(|| self.read_dirstate_parents())?)
266 } 275 }
282 } 291 }
283 292
284 /// Returns the information read from the dirstate docket necessary to 293 /// Returns the information read from the dirstate docket necessary to
285 /// check if the data file has been updated/deleted by another process 294 /// check if the data file has been updated/deleted by another process
286 /// since we last read the dirstate. 295 /// since we last read the dirstate.
287 /// Namely, the data file uuid and the data size. 296 /// Namely, the inode, data file uuid and the data size.
288 fn get_dirstate_data_file_integrity( 297 fn get_dirstate_data_file_integrity(
289 &self, 298 &self,
290 ) -> Result<(Option<Vec<u8>>, usize), HgError> { 299 ) -> Result<(Option<u64>, Option<Vec<u8>>, usize), HgError> {
291 assert!( 300 assert!(
292 self.has_dirstate_v2(), 301 self.has_dirstate_v2(),
293 "accessing dirstate data file ID without dirstate-v2" 302 "accessing dirstate data file ID without dirstate-v2"
294 ); 303 );
304 // Get the identity before the contents since we could have a race
305 // between the two. Having an identity that is too old is fine, but
306 // one that is younger than the content change is bad.
307 let identity = self.dirstate_identity()?;
295 let dirstate = self.dirstate_file_contents()?; 308 let dirstate = self.dirstate_file_contents()?;
296 if dirstate.is_empty() { 309 if dirstate.is_empty() {
297 self.dirstate_parents.set(DirstateParents::NULL); 310 self.dirstate_parents.set(DirstateParents::NULL);
298 Ok((None, 0)) 311 Ok((identity, None, 0))
299 } else { 312 } else {
300 let docket = 313 let docket =
301 crate::dirstate_tree::on_disk::read_docket(&dirstate)?; 314 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
302 self.dirstate_parents.set(docket.parents()); 315 self.dirstate_parents.set(docket.parents());
303 Ok((Some(docket.uuid.to_owned()), docket.data_size())) 316 Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
304 } 317 }
305 } 318 }
306 319
307 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> { 320 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
308 if self.has_dirstate_v2() { 321 if self.has_dirstate_v2() {
345 } else { 358 } else {
346 debug_wait_for_file_or_print( 359 debug_wait_for_file_or_print(
347 self.config(), 360 self.config(),
348 "dirstate.pre-read-file", 361 "dirstate.pre-read-file",
349 ); 362 );
363 let identity = self.dirstate_identity()?;
350 let dirstate_file_contents = self.dirstate_file_contents()?; 364 let dirstate_file_contents = self.dirstate_file_contents()?;
351 return if dirstate_file_contents.is_empty() { 365 return if dirstate_file_contents.is_empty() {
352 self.dirstate_parents.set(DirstateParents::NULL); 366 self.dirstate_parents.set(DirstateParents::NULL);
353 Ok(OwningDirstateMap::new_empty(Vec::new())) 367 Ok(OwningDirstateMap::new_empty(Vec::new()))
354 } else { 368 } else {
355 let (map, parents) = 369 let (map, parents) = OwningDirstateMap::new_v1(
356 OwningDirstateMap::new_v1(dirstate_file_contents)?; 370 dirstate_file_contents,
371 identity,
372 )?;
357 self.dirstate_parents.set(parents); 373 self.dirstate_parents.set(parents);
358 Ok(map) 374 Ok(map)
359 }; 375 };
360 } 376 }
361 } 377 }
363 fn read_docket_and_data_file( 379 fn read_docket_and_data_file(
364 &self, 380 &self,
365 ) -> Result<OwningDirstateMap, DirstateError> { 381 ) -> Result<OwningDirstateMap, DirstateError> {
366 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); 382 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
367 let dirstate_file_contents = self.dirstate_file_contents()?; 383 let dirstate_file_contents = self.dirstate_file_contents()?;
384 let identity = self.dirstate_identity()?;
368 if dirstate_file_contents.is_empty() { 385 if dirstate_file_contents.is_empty() {
369 self.dirstate_parents.set(DirstateParents::NULL); 386 self.dirstate_parents.set(DirstateParents::NULL);
370 return Ok(OwningDirstateMap::new_empty(Vec::new())); 387 return Ok(OwningDirstateMap::new_empty(Vec::new()));
371 } 388 }
372 let docket = crate::dirstate_tree::on_disk::read_docket( 389 let docket = crate::dirstate_tree::on_disk::read_docket(
408 } 425 }
409 } 426 }
410 } 427 }
411 Err(e) => return Err(e.into()), 428 Err(e) => return Err(e.into()),
412 }; 429 };
413 OwningDirstateMap::new_v2(contents, data_size, metadata, uuid) 430 OwningDirstateMap::new_v2(
431 contents, data_size, metadata, uuid, identity,
432 )
414 } else { 433 } else {
415 match self 434 match self
416 .hg_vfs() 435 .hg_vfs()
417 .mmap_open(docket.data_filename()) 436 .mmap_open(docket.data_filename())
418 .io_not_found_as_none() 437 .io_not_found_as_none()
419 { 438 {
420 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( 439 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
421 data_mmap, data_size, metadata, uuid, 440 data_mmap, data_size, metadata, uuid, identity,
422 ), 441 ),
423 Ok(None) => { 442 Ok(None) => {
424 // Race where the data file was deleted right after we 443 // Race where the data file was deleted right after we
425 // read the docket, try again 444 // read the docket, try again
426 return Err(race_error.into()); 445 return Err(race_error.into());
532 let map = self.dirstate_map()?; 551 let map = self.dirstate_map()?;
533 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if 552 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
534 // it’s unset 553 // it’s unset
535 let parents = self.dirstate_parents()?; 554 let parents = self.dirstate_parents()?;
536 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { 555 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
537 let (uuid, data_size) = self.get_dirstate_data_file_integrity()?; 556 let (identity, uuid, data_size) =
557 self.get_dirstate_data_file_integrity()?;
558 let identity_changed = identity != map.old_identity();
538 let uuid_changed = uuid.as_deref() != map.old_uuid(); 559 let uuid_changed = uuid.as_deref() != map.old_uuid();
539 let data_length_changed = data_size != map.old_data_size(); 560 let data_length_changed = data_size != map.old_data_size();
540 561
541 if uuid_changed || data_length_changed { 562 if identity_changed || uuid_changed || data_length_changed {
542 // If uuid or length changed since last disk read, don't write. 563 // If any of identity, uuid or length have changed since
564 // last disk read, don't write.
543 // This is fine because either we're in a command that doesn't 565 // This is fine because either we're in a command that doesn't
544 // write anything too important (like `hg status`), or we're in 566 // write anything too important (like `hg status`), or we're in
545 // `hg add` and we're supposed to have taken the lock before 567 // `hg add` and we're supposed to have taken the lock before
546 // reading anyway. 568 // reading anyway.
547 // 569 //
634 HgError::corrupted("overflow in dirstate docket serialization") 656 HgError::corrupted("overflow in dirstate docket serialization")
635 })?; 657 })?;
636 658
637 (packed_dirstate, old_uuid) 659 (packed_dirstate, old_uuid)
638 } else { 660 } else {
661 let identity = self.dirstate_identity()?;
662 if identity != map.old_identity() {
663 // If identity changed since last disk read, don't write.
664 // This is fine because either we're in a command that doesn't
665 // write anything too important (like `hg status`), or we're in
666 // `hg add` and we're supposed to have taken the lock before
667 // reading anyway.
668 //
669 // TODO complain loudly if we've changed anything important
670 // without taking the lock.
671 // (see `hg help config.format.use-dirstate-tracked-hint`)
672 log::debug!(
673 "dirstate has changed since last read, not updating."
674 );
675 return Ok(());
676 }
639 (map.pack_v1(parents)?, None) 677 (map.pack_v1(parents)?, None)
640 }; 678 };
641 679
642 let vfs = self.hg_vfs(); 680 let vfs = self.hg_vfs();
643 vfs.atomic_write("dirstate", &packed_dirstate)?; 681 vfs.atomic_write("dirstate", &packed_dirstate)?;