Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/filepatterns.rs @ 50858:090658724abf
rust: de-hardcode glob_suffix
We're adding patternmatcher in a subsequent commit, and this needs
needs to be different for includematcher and patternmatcher.
author | Spencer Baugh <sbaugh@janestreet.com> |
---|---|
date | Mon, 14 Aug 2023 09:25:36 -0400 |
parents | 2b4bcdc948e7 |
children | c112cc9effdc |
comparison
equal
deleted
inserted
replaced
50857:f50e71fdfcb4 | 50858:090658724abf |
---|---|
33 } | 33 } |
34 | 34 |
35 /// These are matched in order | 35 /// These are matched in order |
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = | 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = |
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; | 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; |
38 | |
39 /// Appended to the regexp of globs | |
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; | |
41 | 38 |
42 #[derive(Debug, Clone, PartialEq, Eq)] | 39 #[derive(Debug, Clone, PartialEq, Eq)] |
43 pub enum PatternSyntax { | 40 pub enum PatternSyntax { |
44 /// A regular expression | 41 /// A regular expression |
45 Regexp, | 42 Regexp, |
179 } | 176 } |
180 | 177 |
181 /// Builds the regex that corresponds to the given pattern. | 178 /// Builds the regex that corresponds to the given pattern. |
182 /// If within a `syntax: regexp` context, returns the pattern, | 179 /// If within a `syntax: regexp` context, returns the pattern, |
183 /// otherwise, returns the corresponding regex. | 180 /// otherwise, returns the corresponding regex. |
184 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> { | 181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> { |
185 let IgnorePattern { | 182 let IgnorePattern { |
186 syntax, pattern, .. | 183 syntax, pattern, .. |
187 } = entry; | 184 } = entry; |
188 if pattern.is_empty() { | 185 if pattern.is_empty() { |
189 return vec![]; | 186 return vec![]; |
243 res | 240 res |
244 } | 241 } |
245 PatternSyntax::RelGlob => { | 242 PatternSyntax::RelGlob => { |
246 let glob_re = glob_to_re(pattern); | 243 let glob_re = glob_to_re(pattern); |
247 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { | 244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
248 [b".*", rest, GLOB_SUFFIX].concat() | 245 [b".*", rest, glob_suffix].concat() |
249 } else { | 246 } else { |
250 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() | 247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat() |
251 } | 248 } |
252 } | 249 } |
253 PatternSyntax::Glob | PatternSyntax::RootGlob => { | 250 PatternSyntax::Glob | PatternSyntax::RootGlob => { |
254 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | 251 [glob_to_re(pattern).as_slice(), glob_suffix].concat() |
255 } | 252 } |
256 PatternSyntax::Include | 253 PatternSyntax::Include |
257 | PatternSyntax::SubInclude | 254 | PatternSyntax::SubInclude |
258 | PatternSyntax::ExpandedSubInclude(_) | 255 | PatternSyntax::ExpandedSubInclude(_) |
259 | PatternSyntax::FilePath => unreachable!(), | 256 | PatternSyntax::FilePath => unreachable!(), |
307 | 304 |
308 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs | 305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs |
309 /// that don't need to be transformed into a regex. | 306 /// that don't need to be transformed into a regex. |
310 pub fn build_single_regex( | 307 pub fn build_single_regex( |
311 entry: &IgnorePattern, | 308 entry: &IgnorePattern, |
309 glob_suffix: &[u8], | |
312 ) -> Result<Option<Vec<u8>>, PatternError> { | 310 ) -> Result<Option<Vec<u8>>, PatternError> { |
313 let IgnorePattern { | 311 let IgnorePattern { |
314 pattern, syntax, .. | 312 pattern, syntax, .. |
315 } = entry; | 313 } = entry; |
316 let pattern = match syntax { | 314 let pattern = match syntax { |
329 if is_simple_rootglob || syntax == &PatternSyntax::FilePath { | 327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath { |
330 Ok(None) | 328 Ok(None) |
331 } else { | 329 } else { |
332 let mut entry = entry.clone(); | 330 let mut entry = entry.clone(); |
333 entry.pattern = pattern; | 331 entry.pattern = pattern; |
334 Ok(Some(_build_single_regex(&entry))) | 332 Ok(Some(_build_single_regex(&entry, glob_suffix))) |
335 } | 333 } |
336 } | 334 } |
337 | 335 |
338 lazy_static! { | 336 lazy_static! { |
339 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { | 337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { |
714 } | 712 } |
715 | 713 |
716 #[test] | 714 #[test] |
717 fn test_build_single_regex() { | 715 fn test_build_single_regex() { |
718 assert_eq!( | 716 assert_eq!( |
719 build_single_regex(&IgnorePattern::new( | 717 build_single_regex( |
720 PatternSyntax::RelGlob, | 718 &IgnorePattern::new( |
721 b"rust/target/", | 719 PatternSyntax::RelGlob, |
722 Path::new("") | 720 b"rust/target/", |
723 )) | 721 Path::new("") |
722 ), | |
723 b"(?:/|$)" | |
724 ) | |
724 .unwrap(), | 725 .unwrap(), |
725 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), | 726 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), |
726 ); | 727 ); |
727 assert_eq!( | 728 assert_eq!( |
728 build_single_regex(&IgnorePattern::new( | 729 build_single_regex( |
729 PatternSyntax::Regexp, | 730 &IgnorePattern::new( |
730 br"rust/target/\d+", | 731 PatternSyntax::Regexp, |
731 Path::new("") | 732 br"rust/target/\d+", |
732 )) | 733 Path::new("") |
734 ), | |
735 b"(?:/|$)" | |
736 ) | |
733 .unwrap(), | 737 .unwrap(), |
734 Some(br"rust/target/\d+".to_vec()), | 738 Some(br"rust/target/\d+".to_vec()), |
735 ); | 739 ); |
736 } | 740 } |
737 | 741 |
738 #[test] | 742 #[test] |
739 fn test_build_single_regex_shortcut() { | 743 fn test_build_single_regex_shortcut() { |
740 assert_eq!( | 744 assert_eq!( |
741 build_single_regex(&IgnorePattern::new( | 745 build_single_regex( |
742 PatternSyntax::RootGlob, | 746 &IgnorePattern::new( |
743 b"", | 747 PatternSyntax::RootGlob, |
744 Path::new("") | 748 b"", |
745 )) | 749 Path::new("") |
750 ), | |
751 b"(?:/|$)" | |
752 ) | |
746 .unwrap(), | 753 .unwrap(), |
747 None, | 754 None, |
748 ); | 755 ); |
749 assert_eq!( | 756 assert_eq!( |
750 build_single_regex(&IgnorePattern::new( | 757 build_single_regex( |
751 PatternSyntax::RootGlob, | 758 &IgnorePattern::new( |
752 b"whatever", | 759 PatternSyntax::RootGlob, |
753 Path::new("") | 760 b"whatever", |
754 )) | 761 Path::new("") |
762 ), | |
763 b"(?:/|$)" | |
764 ) | |
755 .unwrap(), | 765 .unwrap(), |
756 None, | 766 None, |
757 ); | 767 ); |
758 assert_eq!( | 768 assert_eq!( |
759 build_single_regex(&IgnorePattern::new( | 769 build_single_regex( |
760 PatternSyntax::RootGlob, | 770 &IgnorePattern::new( |
761 b"*.o", | 771 PatternSyntax::RootGlob, |
762 Path::new("") | 772 b"*.o", |
763 )) | 773 Path::new("") |
774 ), | |
775 b"(?:/|$)" | |
776 ) | |
764 .unwrap(), | 777 .unwrap(), |
765 Some(br"[^/]*\.o(?:/|$)".to_vec()), | 778 Some(br"[^/]*\.o(?:/|$)".to_vec()), |
766 ); | 779 ); |
767 } | 780 } |
768 | 781 |
769 #[test] | 782 #[test] |
770 fn test_build_single_relregex() { | 783 fn test_build_single_relregex() { |
771 assert_eq!( | 784 assert_eq!( |
772 build_single_regex(&IgnorePattern::new( | 785 build_single_regex( |
773 PatternSyntax::RelRegexp, | 786 &IgnorePattern::new( |
774 b"^ba{2}r", | 787 PatternSyntax::RelRegexp, |
775 Path::new("") | 788 b"^ba{2}r", |
776 )) | 789 Path::new("") |
790 ), | |
791 b"(?:/|$)" | |
792 ) | |
777 .unwrap(), | 793 .unwrap(), |
778 Some(b"^ba{2}r".to_vec()), | 794 Some(b"^ba{2}r".to_vec()), |
779 ); | 795 ); |
780 assert_eq!( | 796 assert_eq!( |
781 build_single_regex(&IgnorePattern::new( | 797 build_single_regex( |
782 PatternSyntax::RelRegexp, | 798 &IgnorePattern::new( |
783 b"ba{2}r", | 799 PatternSyntax::RelRegexp, |
784 Path::new("") | 800 b"ba{2}r", |
785 )) | 801 Path::new("") |
802 ), | |
803 b"(?:/|$)" | |
804 ) | |
786 .unwrap(), | 805 .unwrap(), |
787 Some(b".*ba{2}r".to_vec()), | 806 Some(b".*ba{2}r".to_vec()), |
788 ); | 807 ); |
789 assert_eq!( | 808 assert_eq!( |
790 build_single_regex(&IgnorePattern::new( | 809 build_single_regex( |
791 PatternSyntax::RelRegexp, | 810 &IgnorePattern::new( |
792 b"(?ia)ba{2}r", | 811 PatternSyntax::RelRegexp, |
793 Path::new("") | 812 b"(?ia)ba{2}r", |
794 )) | 813 Path::new("") |
814 ), | |
815 b"(?:/|$)" | |
816 ) | |
795 .unwrap(), | 817 .unwrap(), |
796 Some(b"(?ia:.*ba{2}r)".to_vec()), | 818 Some(b"(?ia:.*ba{2}r)".to_vec()), |
797 ); | 819 ); |
798 assert_eq!( | 820 assert_eq!( |
799 build_single_regex(&IgnorePattern::new( | 821 build_single_regex( |
800 PatternSyntax::RelRegexp, | 822 &IgnorePattern::new( |
801 b"(?ia)^ba{2}r", | 823 PatternSyntax::RelRegexp, |
802 Path::new("") | 824 b"(?ia)^ba{2}r", |
803 )) | 825 Path::new("") |
826 ), | |
827 b"(?:/|$)" | |
828 ) | |
804 .unwrap(), | 829 .unwrap(), |
805 Some(b"(?ia:^ba{2}r)".to_vec()), | 830 Some(b"(?ia:^ba{2}r)".to_vec()), |
806 ); | 831 ); |
807 } | 832 } |
808 } | 833 } |