changeset 52396:b0b6c28b340b

hg-core: add filelog metadata header parsing This adds methods to FilelogRevisionData to extract the metadata header. The header is represented by a &[u8] wrapper capable of parsing the fields "censored", "copy", and "copyrev". This will be needed for supporting --copies in rhg status and diff.
author Mitchell Kember <mkember@janestreet.com>
date Tue, 03 Dec 2024 09:31:24 -0500
parents 136e74c2bf8f
children 42bd36bbed67
files rust/hg-core/src/revlog/filelog.rs
diffstat 1 files changed, 141 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/filelog.rs	Tue Dec 03 15:27:50 2024 -0500
+++ b/rust/hg-core/src/revlog/filelog.rs	Tue Dec 03 09:31:24 2024 -0500
@@ -11,6 +11,7 @@
 use crate::utils::SliceExt;
 use crate::Graph;
 use crate::GraphError;
+use crate::Node;
 use crate::UncheckedRevision;
 use std::path::PathBuf;
 
@@ -216,22 +217,30 @@
 
 impl FilelogRevisionData {
     /// Split into metadata and data
-    pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
+    pub fn split(
+        &self,
+    ) -> Result<(FilelogRevisionMetadata<'_>, &[u8]), HgError> {
         const DELIMITER: &[u8; 2] = b"\x01\n";
 
         if let Some(rest) = self.0.drop_prefix(DELIMITER) {
             if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
-                Ok((Some(metadata), data))
+                Ok((FilelogRevisionMetadata(Some(metadata)), data))
             } else {
                 Err(HgError::corrupted(
                     "Missing metadata end delimiter in filelog entry",
                 ))
             }
         } else {
-            Ok((None, &self.0))
+            Ok((FilelogRevisionMetadata(None), &self.0))
         }
     }
 
+    /// Returns the metadata header.
+    pub fn metadata(&self) -> Result<FilelogRevisionMetadata<'_>, HgError> {
+        let (metadata, _data) = self.split()?;
+        Ok(metadata)
+    }
+
     /// Returns the file contents at this revision, stripped of any metadata
     pub fn file_data(&self) -> Result<&[u8], HgError> {
         let (_metadata, data) = self.split()?;
@@ -241,10 +250,138 @@
     /// Consume the entry, and convert it into data, discarding any metadata,
     /// if present.
     pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
-        if let (Some(_metadata), data) = self.split()? {
+        if let (FilelogRevisionMetadata(Some(_)), data) = self.split()? {
             Ok(data.to_owned())
         } else {
             Ok(self.0)
         }
     }
 }
+
+/// The optional metadata header included in [`FilelogRevisionData`].
+pub struct FilelogRevisionMetadata<'a>(Option<&'a [u8]>);
+
+/// Fields parsed from [`FilelogRevisionMetadata`].
+#[derive(Debug, PartialEq, Default)]
+pub struct FilelogRevisionMetadataFields<'a> {
+    /// True if the file revision data is censored.
+    pub censored: bool,
+    /// Path of the copy source.
+    pub copy: Option<&'a HgPath>,
+    /// Filelog node ID of the copy source.
+    pub copyrev: Option<Node>,
+}
+
+impl<'a> FilelogRevisionMetadata<'a> {
+    /// Parses the metadata fields.
+    pub fn parse(self) -> Result<FilelogRevisionMetadataFields<'a>, HgError> {
+        let mut fields = FilelogRevisionMetadataFields::default();
+        if let Some(metadata) = self.0 {
+            let mut rest = metadata;
+            while !rest.is_empty() {
+                let Some(colon_idx) = memchr::memchr(b':', rest) else {
+                    return Err(HgError::corrupted(
+                        "File metadata header line missing colon",
+                    ));
+                };
+                if rest.get(colon_idx + 1) != Some(&b' ') {
+                    return Err(HgError::corrupted(
+                        "File metadata header line missing space",
+                    ));
+                }
+                let key = &rest[..colon_idx];
+                rest = &rest[colon_idx + 2..];
+                let Some(newline_idx) = memchr::memchr(b'\n', rest) else {
+                    return Err(HgError::corrupted(
+                        "File metadata header line missing newline",
+                    ));
+                };
+                let value = &rest[..newline_idx];
+                match key {
+                    b"censored" => {
+                        match value {
+                            b"" => fields.censored = true,
+                            _ => return Err(HgError::corrupted(
+                                "File metadata header 'censored' field has nonempty value",
+                            )),
+                        }
+                    }
+                    b"copy" => fields.copy = Some(HgPath::new(value)),
+                    b"copyrev" => {
+                        fields.copyrev = Some(Node::from_hex_for_repo(value)?)
+                    }
+                    _ => {
+                        return Err(HgError::corrupted(
+                            format!(
+                                "File metadata header has unrecognized key '{}'",
+                                String::from_utf8_lossy(key),
+                            ),
+                        ))
+                    }
+                }
+                rest = &rest[newline_idx + 1..];
+            }
+        }
+        Ok(fields)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use format_bytes::format_bytes;
+
+    #[test]
+    fn test_parse_no_metadata() {
+        let data = FilelogRevisionData(b"data".to_vec());
+        let fields = data.metadata().unwrap().parse().unwrap();
+        assert_eq!(fields, Default::default());
+    }
+
+    #[test]
+    fn test_parse_empty_metadata() {
+        let data = FilelogRevisionData(b"\x01\n\x01\ndata".to_vec());
+        let fields = data.metadata().unwrap().parse().unwrap();
+        assert_eq!(fields, Default::default());
+    }
+
+    #[test]
+    fn test_parse_one_field() {
+        let data =
+            FilelogRevisionData(b"\x01\ncopy: foo\n\x01\ndata".to_vec());
+        let fields = data.metadata().unwrap().parse().unwrap();
+        assert_eq!(
+            fields,
+            FilelogRevisionMetadataFields {
+                copy: Some(HgPath::new("foo")),
+                ..Default::default()
+            }
+        );
+    }
+
+    #[test]
+    fn test_parse_all_fields() {
+        let sha = b"215d5d1546f82a79481eb2df513a7bc341bdf17f";
+        let data = FilelogRevisionData(format_bytes!(
+            b"\x01\ncensored: \ncopy: foo\ncopyrev: {}\n\x01\ndata",
+            sha
+        ));
+        let fields = data.metadata().unwrap().parse().unwrap();
+        assert_eq!(
+            fields,
+            FilelogRevisionMetadataFields {
+                censored: true,
+                copy: Some(HgPath::new("foo")),
+                copyrev: Some(Node::from_hex(sha).unwrap()),
+            }
+        );
+    }
+
+    #[test]
+    fn test_parse_invalid_metadata() {
+        let data =
+            FilelogRevisionData(b"\x01\nbad: value\n\x01\ndata".to_vec());
+        let err = data.metadata().unwrap().parse().unwrap_err();
+        assert!(err.to_string().contains("unrecognized key 'bad'"));
+    }
+}