annotate rust/hg-core/src/revlog/manifest.rs @ 52316:f4aede0f01af

rust-manifest: use `memchr` crate for all byte-finding needs While writing a very dumb manifest diffing algorithm for a proof-of-concept I saw that `Manifest::find_by_path` was much slower than I was expecting. It turns out that the Rust stdlib uses slow (all is relative) code when searching for byte positions for reasons ranging from portability, SIMD API stability, nobody doing the work, etc. `memch` is much faster for these purposes, so let's use it. I was measuring ~670ms of profile time in `find_by_path`, after this patch it went down to ~230ms.
author Rapha?l Gom?s <rgomes@octobus.net>
date Tue, 12 Nov 2024 23:20:04 +0100
parents 039b7caeb4d9
children a3fa37bdb7ec
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
52061
0ea323b7e3b1 rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51906
diff changeset
1 use std::num::NonZeroU8;
0ea323b7e3b1 rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51906
diff changeset
2
47991
001d747c2baf rust: Return HgError instead of RevlogError in revlog constructors
Simon Sapin <simon.sapin@octobus.net>
parents: 47985
diff changeset
3 use crate::errors::HgError;
47992
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
4 use crate::revlog::{Node, NodePrefix};
50010
750409505286 rust-clippy: merge "revlog" module definition and struct implementation
Rapha?l Gom?s <rgomes@octobus.net>
parents: 49145
diff changeset
5 use crate::revlog::{Revlog, RevlogError};
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
6 use crate::utils::hg_path::HgPath;
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
7 use crate::utils::SliceExt;
51906
db7dbe6f7bb2 rust: add Vfs trait
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51212
diff changeset
8 use crate::vfs::VfsImpl;
52286
039b7caeb4d9 rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52072
diff changeset
9 use crate::{Graph, GraphError, Revision, UncheckedRevision};
039b7caeb4d9 rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52072
diff changeset
10
039b7caeb4d9 rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52072
diff changeset
11 use super::options::RevlogOpenOptions;
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
12
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
13 /// A specialized `Revlog` to work with `manifest` data format.
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
14 pub struct Manifestlog {
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
15 /// The generic `revlog` format.
51212
13f58ce70299 rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50989
diff changeset
16 pub(crate) revlog: Revlog,
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
17 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
18
50989
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
19 impl Graph for Manifestlog {
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
20 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
21 self.revlog.parents(rev)
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
22 }
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
23 }
27e773aa607d rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50988
diff changeset
24
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
25 impl Manifestlog {
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
26 /// Open the `manifest` of a repository given by its root.
51212
13f58ce70299 rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50989
diff changeset
27 pub fn open(
51906
db7dbe6f7bb2 rust: add Vfs trait
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51212
diff changeset
28 store_vfs: &VfsImpl,
51212
13f58ce70299 rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50989
diff changeset
29 options: RevlogOpenOptions,
13f58ce70299 rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50989
diff changeset
30 ) -> Result<Self, HgError> {
13f58ce70299 rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50989
diff changeset
31 let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?;
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
32 Ok(Self { revlog })
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
33 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
34
47997
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
35 /// Return the `Manifest` for the given node ID.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
36 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
37 /// Note: this is a node ID in the manifestlog, typically found through
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
38 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
39 /// changeset.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
40 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
41 /// See also `Repo::manifest_for_node`
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
42 pub fn data_for_node(
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
43 &self,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
44 node: NodePrefix,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
45 ) -> Result<Manifest, RevlogError> {
47996
6f579618ea7b rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents: 47992
diff changeset
46 let rev = self.revlog.rev_from_node(node)?;
50988
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
47 self.data_for_checked_rev(rev)
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
48 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
49
47997
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
50 /// Return the `Manifest` of a given revision number.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
51 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
52 /// Note: this is a revision number in the manifestlog, *not* of any
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
53 /// changeset.
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
54 ///
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
55 /// See also `Repo::manifest_for_rev`
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
56 pub fn data_for_rev(
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
57 &self,
50988
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
58 rev: UncheckedRevision,
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
59 ) -> Result<Manifest, RevlogError> {
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
60 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
61 Ok(Manifest { bytes })
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
62 }
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
63
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
64 pub fn data_for_checked_rev(
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
65 &self,
47997
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
66 rev: Revision,
87e3f878e65f rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents: 47996
diff changeset
67 ) -> Result<Manifest, RevlogError> {
50988
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
68 let bytes =
1928b770e3e7 rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents: 50010
diff changeset
69 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
70 Ok(Manifest { bytes })
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
71 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
72 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
73
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
74 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
75 #[derive(Debug)]
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
76 pub struct Manifest {
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
77 /// Format for a manifest: flat sequence of variable-size entries,
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
78 /// sorted by path, each as:
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
79 ///
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
80 /// ```text
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
81 /// <path> \0 <hex_node_id> <flags> \n
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
82 /// ```
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
83 ///
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
84 /// The last entry is also terminated by a newline character.
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
85 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
86 bytes: Vec<u8>,
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
87 }
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
88
47985
d44740725b95 rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents: 46499
diff changeset
89 impl Manifest {
52072
78fc666a3e94 rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52061
diff changeset
90 /// Return a new empty manifest
78fc666a3e94 rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52061
diff changeset
91 pub fn empty() -> Self {
78fc666a3e94 rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52061
diff changeset
92 Self { bytes: vec![] }
78fc666a3e94 rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52061
diff changeset
93 }
78fc666a3e94 rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52061
diff changeset
94
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
95 pub fn iter(
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
96 &self,
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
97 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
98 self.bytes
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
99 .split(|b| b == &b'\n')
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
100 .filter(|line| !line.is_empty())
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
101 .map(ManifestEntry::from_raw)
45546
f2de24c2b1f6 hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents: 45539
diff changeset
102 }
47992
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
103
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
104 /// If the given path is in this manifest, return its filelog node ID
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
105 pub fn find_by_path(
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
106 &self,
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
107 path: &HgPath,
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
108 ) -> Result<Option<ManifestEntry>, HgError> {
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
109 use std::cmp::Ordering::*;
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
110 let path = path.as_bytes();
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
111 // Both boundaries of this `&[u8]` slice are always at the boundary of
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
112 // an entry
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
113 let mut bytes = &*self.bytes;
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
114
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
115 // Binary search algorithm derived from `[T]::binary_search_by`
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
116 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
117 // except we don’t have a slice of entries. Instead we jump to the
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
118 // middle of the byte slice and look around for entry delimiters
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
119 // (newlines).
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
120 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
121 let (entry_path, rest) =
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
122 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
123 let cmp = entry_path.cmp(path);
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
124 if cmp == Less {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
125 let after_newline = entry_range.end + 1;
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
126 bytes = &bytes[after_newline..];
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
127 } else if cmp == Greater {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
128 bytes = &bytes[..entry_range.start];
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
129 } else {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
130 return Ok(Some(ManifestEntry::from_path_and_rest(
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
131 entry_path, rest,
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
132 )));
47992
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
133 }
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
134 }
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
135 Ok(None)
796206e74b10 rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents: 47991
diff changeset
136 }
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
137
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
138 /// If there is at least one, return the byte range of an entry *excluding*
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
139 /// the final newline.
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
140 fn find_entry_near_middle_of(
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
141 bytes: &[u8],
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
142 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
143 let len = bytes.len();
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
144 if len > 0 {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
145 let middle = bytes.len() / 2;
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
146 // Integer division rounds down, so `middle < len`.
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
147 let (before, after) = bytes.split_at(middle);
52316
f4aede0f01af rust-manifest: use `memchr` crate for all byte-finding needs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52286
diff changeset
148 let entry_start = match memchr::memrchr(b'\n', before) {
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
149 Some(i) => i + 1,
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
150 None => 0, // We choose the first entry in `bytes`
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
151 };
52316
f4aede0f01af rust-manifest: use `memchr` crate for all byte-finding needs
Rapha?l Gom?s <rgomes@octobus.net>
parents: 52286
diff changeset
152 let entry_end = match memchr::memchr(b'\n', after) {
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
153 Some(i) => {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
154 // No `+ 1` here to exclude this newline from the range
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
155 middle + i
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
156 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
157 None => {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
158 // In a well-formed manifest:
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
159 //
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
160 // * Since `len > 0`, `bytes` contains at least one entry
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
161 // * Every entry ends with a newline
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
162 // * Since `middle < len`, `after` contains at least the
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
163 // newline at the end of the last entry of `bytes`.
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
164 //
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
165 // We didn’t find a newline, so this manifest is not
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
166 // well-formed.
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
167 return Err(HgError::corrupted(
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
168 "manifest entry without \\n delimiter",
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
169 ));
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
170 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
171 };
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
172 Ok(Some(entry_start..entry_end))
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
173 } else {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
174 // len == 0
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
175 Ok(None)
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
176 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
177 }
45539
89ac95bd4993 hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff changeset
178 }
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
179
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
180 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
181 #[derive(Debug)]
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
182 pub struct ManifestEntry<'manifest> {
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
183 pub path: &'manifest HgPath,
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
184 pub hex_node_id: &'manifest [u8],
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
185
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
186 /// `Some` values are b'x', b'l', or 't'
52061
0ea323b7e3b1 rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51906
diff changeset
187 pub flags: Option<NonZeroU8>,
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
188 }
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
189
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
190 impl<'a> ManifestEntry<'a> {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
191 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
192 bytes.split_2(b'\0').ok_or_else(|| {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
193 HgError::corrupted("manifest entry without \\0 delimiter")
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
194 })
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
195 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
196
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
197 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
198 let (hex_node_id, flags) = match rest.split_last() {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
199 Some((&b'x', rest)) => (rest, Some(b'x')),
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
200 Some((&b'l', rest)) => (rest, Some(b'l')),
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
201 Some((&b't', rest)) => (rest, Some(b't')),
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
202 _ => (rest, None),
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
203 };
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
204 Self {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
205 path: HgPath::new(path),
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
206 hex_node_id,
52061
0ea323b7e3b1 rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents: 51906
diff changeset
207 flags: flags.map(|f| f.try_into().expect("invalid flag")),
48532
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
208 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
209 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
210
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
211 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
212 let (path, rest) = Self::split_path(bytes)?;
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
213 Ok(Self::from_path_and_rest(path, rest))
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
214 }
e293ff808a05 rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents: 48392
diff changeset
215
48392
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
216 pub fn node_id(&self) -> Result<Node, HgError> {
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
217 Node::from_hex_for_repo(self.hex_node_id)
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
218 }
eb428010aad2 rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents: 48391
diff changeset
219 }