diff rust/hg-core/src/revlog/manifest.rs @ 52316:f4aede0f01af

rust-manifest: use `memchr` crate for all byte-finding needs While writing a very dumb manifest diffing algorithm for a proof-of-concept I saw that `Manifest::find_by_path` was much slower than I was expecting. It turns out that the Rust stdlib uses slow (all is relative) code when searching for byte positions for reasons ranging from portability, SIMD API stability, nobody doing the work, etc. `memch` is much faster for these purposes, so let's use it. I was measuring ~670ms of profile time in `find_by_path`, after this patch it went down to ~230ms.
author Rapha?l Gom?s <rgomes@octobus.net>
date Tue, 12 Nov 2024 23:20:04 +0100
parents 039b7caeb4d9
children a3fa37bdb7ec
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/manifest.rs	Wed Oct 30 19:38:56 2024 -0700
+++ b/rust/hg-core/src/revlog/manifest.rs	Tue Nov 12 23:20:04 2024 +0100
@@ -145,12 +145,11 @@
             let middle = bytes.len() / 2;
             // Integer division rounds down, so `middle < len`.
             let (before, after) = bytes.split_at(middle);
-            let is_newline = |&byte: &u8| byte == b'\n';
-            let entry_start = match before.iter().rposition(is_newline) {
+            let entry_start = match memchr::memrchr(b'\n', before) {
                 Some(i) => i + 1,
                 None => 0, // We choose the first entry in `bytes`
             };
-            let entry_end = match after.iter().position(is_newline) {
+            let entry_end = match memchr::memchr(b'\n', after) {
                 Some(i) => {
                     // No `+ 1` here to exclude this newline from the range
                     middle + i