comparison rust/hg-core/src/filepatterns.rs @ 52352:2ff004fb491c

hgignore: add a GlobSuffix type, instead of passing byte arrays I think this makes it easier to understand the purpose of this extra argument.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 02 Dec 2024 11:25:26 +0000
parents f33b87b46135
children e2e49069eeb6
comparison
equal deleted inserted replaced
52351:9042ffea4edd 52352:2ff004fb491c
212 212
213 lazy_static! { 213 lazy_static! {
214 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap(); 214 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
215 } 215 }
216 216
217 /// Extra path components to match at the end of the pattern
218 #[derive(Clone, Copy)]
219 pub enum GlobSuffix {
220 /// `Empty` means the pattern only matches files, not directories,
221 /// so the path needs to match exactly.
222 Empty,
223 /// `MoreComponents` means the pattern matches directories as well,
224 /// so any path that has the pattern as a prefix, should match.
225 MoreComponents,
226 }
227
228 impl GlobSuffix {
229 fn to_re(self) -> &'static [u8] {
230 match self {
231 Self::Empty => b"$",
232 Self::MoreComponents => b"(?:/|$)",
233 }
234 }
235 }
236
217 /// Builds the regex that corresponds to the given pattern. 237 /// Builds the regex that corresponds to the given pattern.
218 /// If within a `syntax: regexp` context, returns the pattern, 238 /// If within a `syntax: regexp` context, returns the pattern,
219 /// otherwise, returns the corresponding regex. 239 /// otherwise, returns the corresponding regex.
220 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> { 240 fn _build_single_regex(
241 entry: &IgnorePattern,
242 glob_suffix: GlobSuffix,
243 ) -> Vec<u8> {
221 let IgnorePattern { 244 let IgnorePattern {
222 syntax, pattern, .. 245 syntax, pattern, ..
223 } = entry; 246 } = entry;
224 if pattern.is_empty() { 247 if pattern.is_empty() {
225 return vec![]; 248 return vec![];
262 } 285 }
263 PatternSyntax::Path | PatternSyntax::RelPath => { 286 PatternSyntax::Path | PatternSyntax::RelPath => {
264 if pattern == b"." { 287 if pattern == b"." {
265 return vec![]; 288 return vec![];
266 } 289 }
267 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() 290 [
291 escape_pattern(pattern).as_slice(),
292 GlobSuffix::MoreComponents.to_re(),
293 ]
294 .concat()
268 } 295 }
269 PatternSyntax::RootFilesIn => { 296 PatternSyntax::RootFilesIn => {
270 let mut res = if pattern == b"." { 297 let mut res = if pattern == b"." {
271 vec![] 298 vec![]
272 } else { 299 } else {
279 res 306 res
280 } 307 }
281 PatternSyntax::RelGlob => { 308 PatternSyntax::RelGlob => {
282 let glob_re = glob_to_re(pattern); 309 let glob_re = glob_to_re(pattern);
283 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { 310 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
284 [b".*", rest, glob_suffix].concat() 311 [b".*", rest, glob_suffix.to_re()].concat()
285 } else { 312 } else {
286 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat() 313 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix.to_re()].concat()
287 } 314 }
288 } 315 }
289 PatternSyntax::Glob | PatternSyntax::RootGlob => { 316 PatternSyntax::Glob | PatternSyntax::RootGlob => {
290 [glob_to_re(pattern).as_slice(), glob_suffix].concat() 317 [glob_to_re(pattern).as_slice(), glob_suffix.to_re()].concat()
291 } 318 }
292 PatternSyntax::Include 319 PatternSyntax::Include
293 | PatternSyntax::SubInclude 320 | PatternSyntax::SubInclude
294 | PatternSyntax::ExpandedSubInclude(_) 321 | PatternSyntax::ExpandedSubInclude(_)
295 | PatternSyntax::FilePath => unreachable!(), 322 | PatternSyntax::FilePath => unreachable!(),
343 370
344 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs 371 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
345 /// that don't need to be transformed into a regex. 372 /// that don't need to be transformed into a regex.
346 pub fn build_single_regex( 373 pub fn build_single_regex(
347 entry: &IgnorePattern, 374 entry: &IgnorePattern,
348 glob_suffix: &[u8], 375 glob_suffix: GlobSuffix,
349 ) -> Result<Option<Vec<u8>>, PatternError> { 376 ) -> Result<Option<Vec<u8>>, PatternError> {
350 let IgnorePattern { 377 let IgnorePattern {
351 pattern, syntax, .. 378 pattern, syntax, ..
352 } = entry; 379 } = entry;
353 let pattern = match syntax { 380 let pattern = match syntax {
798 &IgnorePattern::new( 825 &IgnorePattern::new(
799 PatternSyntax::RelGlob, 826 PatternSyntax::RelGlob,
800 b"rust/target/", 827 b"rust/target/",
801 Path::new("") 828 Path::new("")
802 ), 829 ),
803 b"(?:/|$)" 830 GlobSuffix::MoreComponents
804 ) 831 )
805 .unwrap(), 832 .unwrap(),
806 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), 833 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
807 ); 834 );
808 assert_eq!( 835 assert_eq!(
810 &IgnorePattern::new( 837 &IgnorePattern::new(
811 PatternSyntax::Regexp, 838 PatternSyntax::Regexp,
812 br"rust/target/\d+", 839 br"rust/target/\d+",
813 Path::new("") 840 Path::new("")
814 ), 841 ),
815 b"(?:/|$)" 842 GlobSuffix::MoreComponents
816 ) 843 )
817 .unwrap(), 844 .unwrap(),
818 Some(br"rust/target/\d+".to_vec()), 845 Some(br"rust/target/\d+".to_vec()),
819 ); 846 );
820 } 847 }
826 &IgnorePattern::new( 853 &IgnorePattern::new(
827 PatternSyntax::RootGlob, 854 PatternSyntax::RootGlob,
828 b"", 855 b"",
829 Path::new("") 856 Path::new("")
830 ), 857 ),
831 b"(?:/|$)" 858 GlobSuffix::MoreComponents
832 ) 859 )
833 .unwrap(), 860 .unwrap(),
834 None, 861 None,
835 ); 862 );
836 assert_eq!( 863 assert_eq!(
838 &IgnorePattern::new( 865 &IgnorePattern::new(
839 PatternSyntax::RootGlob, 866 PatternSyntax::RootGlob,
840 b"whatever", 867 b"whatever",
841 Path::new("") 868 Path::new("")
842 ), 869 ),
843 b"(?:/|$)" 870 GlobSuffix::MoreComponents
844 ) 871 )
845 .unwrap(), 872 .unwrap(),
846 None, 873 None,
847 ); 874 );
848 assert_eq!( 875 assert_eq!(
850 &IgnorePattern::new( 877 &IgnorePattern::new(
851 PatternSyntax::RootGlob, 878 PatternSyntax::RootGlob,
852 b"*.o", 879 b"*.o",
853 Path::new("") 880 Path::new("")
854 ), 881 ),
855 b"(?:/|$)" 882 GlobSuffix::MoreComponents
856 ) 883 )
857 .unwrap(), 884 .unwrap(),
858 Some(br"[^/]*\.o(?:/|$)".to_vec()), 885 Some(br"[^/]*\.o(?:/|$)".to_vec()),
859 ); 886 );
860 } 887 }
866 &IgnorePattern::new( 893 &IgnorePattern::new(
867 PatternSyntax::RelRegexp, 894 PatternSyntax::RelRegexp,
868 b"^ba{2}r", 895 b"^ba{2}r",
869 Path::new("") 896 Path::new("")
870 ), 897 ),
871 b"(?:/|$)" 898 GlobSuffix::MoreComponents
872 ) 899 )
873 .unwrap(), 900 .unwrap(),
874 Some(b"^ba{2}r".to_vec()), 901 Some(b"^ba{2}r".to_vec()),
875 ); 902 );
876 assert_eq!( 903 assert_eq!(
878 &IgnorePattern::new( 905 &IgnorePattern::new(
879 PatternSyntax::RelRegexp, 906 PatternSyntax::RelRegexp,
880 b"ba{2}r", 907 b"ba{2}r",
881 Path::new("") 908 Path::new("")
882 ), 909 ),
883 b"(?:/|$)" 910 GlobSuffix::MoreComponents
884 ) 911 )
885 .unwrap(), 912 .unwrap(),
886 Some(b".*ba{2}r".to_vec()), 913 Some(b".*ba{2}r".to_vec()),
887 ); 914 );
888 assert_eq!( 915 assert_eq!(
890 &IgnorePattern::new( 917 &IgnorePattern::new(
891 PatternSyntax::RelRegexp, 918 PatternSyntax::RelRegexp,
892 b"(?ia)ba{2}r", 919 b"(?ia)ba{2}r",
893 Path::new("") 920 Path::new("")
894 ), 921 ),
895 b"(?:/|$)" 922 GlobSuffix::MoreComponents
896 ) 923 )
897 .unwrap(), 924 .unwrap(),
898 Some(b"(?ia:.*ba{2}r)".to_vec()), 925 Some(b"(?ia:.*ba{2}r)".to_vec()),
899 ); 926 );
900 assert_eq!( 927 assert_eq!(
902 &IgnorePattern::new( 929 &IgnorePattern::new(
903 PatternSyntax::RelRegexp, 930 PatternSyntax::RelRegexp,
904 b"(?ia)^ba{2}r", 931 b"(?ia)^ba{2}r",
905 Path::new("") 932 Path::new("")
906 ), 933 ),
907 b"(?:/|$)" 934 GlobSuffix::MoreComponents
908 ) 935 )
909 .unwrap(), 936 .unwrap(),
910 Some(b"(?ia:^ba{2}r)".to_vec()), 937 Some(b"(?ia:^ba{2}r)".to_vec()),
911 ); 938 );
912 } 939 }