Mercurial > public > mercurial-scm > hg-stable
annotate rust/hg-core/src/revlog/manifest.rs @ 52316:f4aede0f01af
rust-manifest: use `memchr` crate for all byte-finding needs
While writing a very dumb manifest diffing algorithm for a proof-of-concept
I saw that `Manifest::find_by_path` was much slower than I was expecting.
It turns out that the Rust stdlib uses slow (all is relative) code when
searching for byte positions for reasons ranging from portability, SIMD
API stability, nobody doing the work, etc. `memch` is much faster for these
purposes, so let's use it.
I was measuring ~670ms of profile time in `find_by_path`, after this patch
it went down to ~230ms.
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Tue, 12 Nov 2024 23:20:04 +0100 |
parents | 039b7caeb4d9 |
children | a3fa37bdb7ec |
rev | line source |
---|---|
52061
0ea323b7e3b1
rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents:
51906
diff
changeset
|
1 use std::num::NonZeroU8; |
0ea323b7e3b1
rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents:
51906
diff
changeset
|
2 |
47991
001d747c2baf
rust: Return HgError instead of RevlogError in revlog constructors
Simon Sapin <simon.sapin@octobus.net>
parents:
47985
diff
changeset
|
3 use crate::errors::HgError; |
47992
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
4 use crate::revlog::{Node, NodePrefix}; |
50010
750409505286
rust-clippy: merge "revlog" module definition and struct implementation
Rapha?l Gom?s <rgomes@octobus.net>
parents:
49145
diff
changeset
|
5 use crate::revlog::{Revlog, RevlogError}; |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
6 use crate::utils::hg_path::HgPath; |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
7 use crate::utils::SliceExt; |
51906 | 8 use crate::vfs::VfsImpl; |
52286
039b7caeb4d9
rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52072
diff
changeset
|
9 use crate::{Graph, GraphError, Revision, UncheckedRevision}; |
039b7caeb4d9
rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52072
diff
changeset
|
10 |
039b7caeb4d9
rust-revlog: introduce an `options` module
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52072
diff
changeset
|
11 use super::options::RevlogOpenOptions; |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
12 |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
13 /// A specialized `Revlog` to work with `manifest` data format. |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
14 pub struct Manifestlog { |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
15 /// The generic `revlog` format. |
51212
13f58ce70299
rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50989
diff
changeset
|
16 pub(crate) revlog: Revlog, |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
17 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
18 |
50989
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
19 impl Graph for Manifestlog { |
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
20 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
21 self.revlog.parents(rev) |
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
22 } |
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
23 } |
27e773aa607d
rust: implement the `Graph` trait for all revlogs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50988
diff
changeset
|
24 |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
25 impl Manifestlog { |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
26 /// Open the `manifest` of a repository given by its root. |
51212
13f58ce70299
rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50989
diff
changeset
|
27 pub fn open( |
51906 | 28 store_vfs: &VfsImpl, |
51212
13f58ce70299
rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50989
diff
changeset
|
29 options: RevlogOpenOptions, |
13f58ce70299
rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50989
diff
changeset
|
30 ) -> Result<Self, HgError> { |
13f58ce70299
rust-revlog: teach the revlog opening code to read the repo options
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50989
diff
changeset
|
31 let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?; |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
32 Ok(Self { revlog }) |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
33 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
34 |
47997
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
35 /// Return the `Manifest` for the given node ID. |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
36 /// |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
37 /// Note: this is a node ID in the manifestlog, typically found through |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
38 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
39 /// changeset. |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
40 /// |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
41 /// See also `Repo::manifest_for_node` |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
42 pub fn data_for_node( |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
43 &self, |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
44 node: NodePrefix, |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
45 ) -> Result<Manifest, RevlogError> { |
47996
6f579618ea7b
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`
Simon Sapin <simon.sapin@octobus.net>
parents:
47992
diff
changeset
|
46 let rev = self.revlog.rev_from_node(node)?; |
50988
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
47 self.data_for_checked_rev(rev) |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
48 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
49 |
47997
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
50 /// Return the `Manifest` of a given revision number. |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
51 /// |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
52 /// Note: this is a revision number in the manifestlog, *not* of any |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
53 /// changeset. |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
54 /// |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
55 /// See also `Repo::manifest_for_rev` |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
56 pub fn data_for_rev( |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
57 &self, |
50988
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
58 rev: UncheckedRevision, |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
59 ) -> Result<Manifest, RevlogError> { |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
60 let bytes = self.revlog.get_rev_data(rev)?.into_owned(); |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
61 Ok(Manifest { bytes }) |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
62 } |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
63 |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
64 pub fn data_for_checked_rev( |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
65 &self, |
47997
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
66 rev: Revision, |
87e3f878e65f
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev
Simon Sapin <simon.sapin@octobus.net>
parents:
47996
diff
changeset
|
67 ) -> Result<Manifest, RevlogError> { |
50988
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
68 let bytes = |
1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
Rapha?l Gom?s <rgomes@octobus.net>
parents:
50010
diff
changeset
|
69 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned(); |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
70 Ok(Manifest { bytes }) |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
71 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
72 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
73 |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
74 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
75 #[derive(Debug)] |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
76 pub struct Manifest { |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
77 /// Format for a manifest: flat sequence of variable-size entries, |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
78 /// sorted by path, each as: |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
79 /// |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
80 /// ```text |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
81 /// <path> \0 <hex_node_id> <flags> \n |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
82 /// ``` |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
83 /// |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
84 /// The last entry is also terminated by a newline character. |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
85 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
86 bytes: Vec<u8>, |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
87 } |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
88 |
47985
d44740725b95
rust: Rename Manifest to Manifestlog, ManifestEntry to Manifest
Simon Sapin <simon.sapin@octobus.net>
parents:
46499
diff
changeset
|
89 impl Manifest { |
52072
78fc666a3e94
rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52061
diff
changeset
|
90 /// Return a new empty manifest |
78fc666a3e94
rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52061
diff
changeset
|
91 pub fn empty() -> Self { |
78fc666a3e94
rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52061
diff
changeset
|
92 Self { bytes: vec![] } |
78fc666a3e94
rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52061
diff
changeset
|
93 } |
78fc666a3e94
rust-files: check for empty manifests caused by narrow
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52061
diff
changeset
|
94 |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
95 pub fn iter( |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
96 &self, |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
97 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> { |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
98 self.bytes |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
99 .split(|b| b == &b'\n') |
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
100 .filter(|line| !line.is_empty()) |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
101 .map(ManifestEntry::from_raw) |
45546
f2de24c2b1f6
hg-core: add `files_with_nodes` to `Manifest`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
45539
diff
changeset
|
102 } |
47992
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
103 |
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
104 /// If the given path is in this manifest, return its filelog node ID |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
105 pub fn find_by_path( |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
106 &self, |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
107 path: &HgPath, |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
108 ) -> Result<Option<ManifestEntry>, HgError> { |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
109 use std::cmp::Ordering::*; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
110 let path = path.as_bytes(); |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
111 // Both boundaries of this `&[u8]` slice are always at the boundary of |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
112 // an entry |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
113 let mut bytes = &*self.bytes; |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
114 |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
115 // Binary search algorithm derived from `[T]::binary_search_by` |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
116 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
117 // except we don’t have a slice of entries. Instead we jump to the |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
118 // middle of the byte slice and look around for entry delimiters |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
119 // (newlines). |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
120 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
121 let (entry_path, rest) = |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
122 ManifestEntry::split_path(&bytes[entry_range.clone()])?; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
123 let cmp = entry_path.cmp(path); |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
124 if cmp == Less { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
125 let after_newline = entry_range.end + 1; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
126 bytes = &bytes[after_newline..]; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
127 } else if cmp == Greater { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
128 bytes = &bytes[..entry_range.start]; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
129 } else { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
130 return Ok(Some(ManifestEntry::from_path_and_rest( |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
131 entry_path, rest, |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
132 ))); |
47992
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
133 } |
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
134 } |
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
135 Ok(None) |
796206e74b10
rhg: Reuse manifest when checking status of multiple ambiguous files
Simon Sapin <simon.sapin@octobus.net>
parents:
47991
diff
changeset
|
136 } |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
137 |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
138 /// If there is at least one, return the byte range of an entry *excluding* |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
139 /// the final newline. |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
140 fn find_entry_near_middle_of( |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
141 bytes: &[u8], |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
142 ) -> Result<Option<std::ops::Range<usize>>, HgError> { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
143 let len = bytes.len(); |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
144 if len > 0 { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
145 let middle = bytes.len() / 2; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
146 // Integer division rounds down, so `middle < len`. |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
147 let (before, after) = bytes.split_at(middle); |
52316
f4aede0f01af
rust-manifest: use `memchr` crate for all byte-finding needs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52286
diff
changeset
|
148 let entry_start = match memchr::memrchr(b'\n', before) { |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
149 Some(i) => i + 1, |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
150 None => 0, // We choose the first entry in `bytes` |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
151 }; |
52316
f4aede0f01af
rust-manifest: use `memchr` crate for all byte-finding needs
Rapha?l Gom?s <rgomes@octobus.net>
parents:
52286
diff
changeset
|
152 let entry_end = match memchr::memchr(b'\n', after) { |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
153 Some(i) => { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
154 // No `+ 1` here to exclude this newline from the range |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
155 middle + i |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
156 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
157 None => { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
158 // In a well-formed manifest: |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
159 // |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
160 // * Since `len > 0`, `bytes` contains at least one entry |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
161 // * Every entry ends with a newline |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
162 // * Since `middle < len`, `after` contains at least the |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
163 // newline at the end of the last entry of `bytes`. |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
164 // |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
165 // We didn’t find a newline, so this manifest is not |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
166 // well-formed. |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
167 return Err(HgError::corrupted( |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
168 "manifest entry without \\n delimiter", |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
169 )); |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
170 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
171 }; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
172 Ok(Some(entry_start..entry_end)) |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
173 } else { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
174 // len == 0 |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
175 Ok(None) |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
176 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
177 } |
45539
89ac95bd4993
hg-core: add `Manifest` a specialized `Revlog`
Antoine Cezar <antoine.cezar@octobus.net>
parents:
diff
changeset
|
178 } |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
179 |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
180 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
181 #[derive(Debug)] |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
182 pub struct ManifestEntry<'manifest> { |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
183 pub path: &'manifest HgPath, |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
184 pub hex_node_id: &'manifest [u8], |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
185 |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
186 /// `Some` values are b'x', b'l', or 't' |
52061
0ea323b7e3b1
rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents:
51906
diff
changeset
|
187 pub flags: Option<NonZeroU8>, |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
188 } |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
189 |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
190 impl<'a> ManifestEntry<'a> { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
191 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
192 bytes.split_2(b'\0').ok_or_else(|| { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
193 HgError::corrupted("manifest entry without \\0 delimiter") |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
194 }) |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
195 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
196 |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
197 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
198 let (hex_node_id, flags) = match rest.split_last() { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
199 Some((&b'x', rest)) => (rest, Some(b'x')), |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
200 Some((&b'l', rest)) => (rest, Some(b'l')), |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
201 Some((&b't', rest)) => (rest, Some(b't')), |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
202 _ => (rest, None), |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
203 }; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
204 Self { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
205 path: HgPath::new(path), |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
206 hex_node_id, |
52061
0ea323b7e3b1
rust-manifest: encode flags as `Option<NonZeroU8>`
Rapha?l Gom?s <rgomes@octobus.net>
parents:
51906
diff
changeset
|
207 flags: flags.map(|f| f.try_into().expect("invalid flag")), |
48532
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
208 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
209 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
210 |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
211 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
212 let (path, rest) = Self::split_path(bytes)?; |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
213 Ok(Self::from_path_and_rest(path, rest)) |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
214 } |
e293ff808a05
rhg: Use binary search in manifest lookup
Simon Sapin <simon.sapin@octobus.net>
parents:
48392
diff
changeset
|
215 |
48392
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
216 pub fn node_id(&self) -> Result<Node, HgError> { |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
217 Node::from_hex_for_repo(self.hex_node_id) |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
218 } |
eb428010aad2
rhg: Also parse flags in the manifest parser
Simon Sapin <simon.sapin@octobus.net>
parents:
48391
diff
changeset
|
219 } |