annotate rust/hg-core/src/pre_regex.rs @ 52556:1866119cbad7

rust-ignore: construct regex Hir object directly, avoiding large regex string Rework how we convert patterns to regexes in rust. Instead of going patterns -> string -> Regex, which is slow and causes some correctness issues, build a structured regex_syntax::hir::Hir value, which is faster and it also prevents surprising regex escape. This change makes the time of `build_regex_match` go from ~70-80ms to ~40ms in my testing (for a large hgignore). The bug I mentioned involves regex patterns that "escape" their intended scope. For example, a sequence of hgignore regexp patterns like this would previously lead to surprising behavior: foo(?: bar baz ) this matches foobar and foobaz, and doesn't match bar and baz. The new behavior is to report a pattern parse error The Python hg also has this bug, so this bugfix not really helping much, but it's probably better to fall back to real Python bugs than to simulate them.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Fri, 06 Dec 2024 20:27:59 +0000
parents
children b89c934e6269
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
52556
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
1 use core::str;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
2
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
3 use lazy_static::lazy_static;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
4
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
5 use crate::filepatterns::PatternError;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
6
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
7 lazy_static! {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
8 static ref RE_ESCAPE: Vec<Vec<u8>> = {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
9 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
10 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
11 for byte in to_escape {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
12 v[*byte as usize].insert(0, b'\\');
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
13 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
14 v
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
15 };
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
16 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
17
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
18 pub fn escape_char_for_re(c: u8) -> &'static [u8] {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
19 &RE_ESCAPE[c as usize]
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
20 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
21
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
22 #[derive(Debug, Clone)]
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
23 pub enum PreRegex {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
24 Empty,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
25 Dot,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
26 DotStar,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
27 Eof,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
28 NonslashStar,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
29 Byte(u8),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
30 Bytes(Vec<u8>),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
31 SlashOrEof,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
32 Re((regex_syntax::hir::Hir, Vec<u8>)),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
33 Maybe(Box<Self>),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
34 Alternation(Vec<Self>),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
35 Sequence(Vec<Self>),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
36 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
37
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
38 mod to_hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
39 use itertools::Itertools;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
40 use regex_syntax::hir::{
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
41 Class, ClassBytes, ClassBytesRange, Dot, Hir, Look, Repetition,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
42 };
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
43
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
44 use super::PreRegex;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
45
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
46 fn hir_star(hir: Hir) -> Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
47 Hir::repetition(Repetition {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
48 min: 0,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
49 max: None,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
50 greedy: false,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
51 sub: Box::new(hir),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
52 })
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
53 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
54
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
55 fn hir_eof() -> Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
56 Hir::look(Look::End)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
57 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
58
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
59 fn hir_nonslash() -> Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
60 let mut class =
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
61 Class::Bytes(ClassBytes::new([ClassBytesRange::new(b'/', b'/')]));
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
62 Class::negate(&mut class);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
63 Hir::class(class)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
64 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
65
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
66 fn hir_byte(b: u8) -> Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
67 let class =
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
68 Class::Bytes(ClassBytes::new([ClassBytesRange::new(b, b)]));
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
69 Hir::class(class)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
70 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
71
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
72 fn hir_literal(text: &[u8]) -> Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
73 let b: Box<[u8]> = Box::from(text);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
74 Hir::literal(b)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
75 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
76
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
77 pub(crate) fn to_hir(re: &PreRegex) -> regex_syntax::hir::Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
78 match re {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
79 PreRegex::Empty => Hir::empty(),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
80 PreRegex::Dot => Hir::dot(Dot::AnyByte),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
81 PreRegex::DotStar => hir_star(Hir::dot(Dot::AnyByte)),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
82 PreRegex::Eof => hir_eof(),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
83 PreRegex::NonslashStar => hir_star(hir_nonslash()),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
84 PreRegex::Byte(b) => hir_byte(*b),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
85 PreRegex::Bytes(bs) => hir_literal(bs),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
86 PreRegex::SlashOrEof => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
87 Hir::alternation(vec![hir_byte(b'/'), hir_eof()])
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
88 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
89 PreRegex::Re((hir, _)) => hir.clone(),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
90 PreRegex::Maybe(s) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
91 Hir::alternation(vec![Hir::empty(), s.to_hir()])
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
92 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
93 PreRegex::Alternation(alt) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
94 let alt = alt.iter().map(|r| r.to_hir()).collect_vec();
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
95 Hir::alternation(alt)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
96 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
97 PreRegex::Sequence(seq) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
98 let seq = seq.iter().map(|r| r.to_hir()).collect_vec();
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
99 Hir::concat(seq)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
100 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
101 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
102 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
103 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
104
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
105 impl PreRegex {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
106 pub fn to_hir(&self) -> regex_syntax::hir::Hir {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
107 to_hir::to_hir(self)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
108 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
109
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
110 fn to_bytes_rec(&self, out: &mut Vec<u8>) {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
111 match self {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
112 PreRegex::Empty => (),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
113 PreRegex::Dot => out.push(b'.'),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
114 PreRegex::DotStar => out.extend_from_slice(&b".*"[..]),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
115 PreRegex::Eof => out.push(b'$'),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
116 PreRegex::NonslashStar => out.extend_from_slice(&b"[^/]*"[..]),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
117 PreRegex::Byte(b) => out.extend_from_slice(escape_char_for_re(*b)),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
118 PreRegex::Bytes(bytes) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
119 for b in bytes {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
120 out.extend_from_slice(escape_char_for_re(*b))
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
121 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
122 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
123 PreRegex::SlashOrEof => out.extend_from_slice(&b"(?:/|$)"[..]),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
124 PreRegex::Re((_hir, src)) => out.extend_from_slice(src),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
125 PreRegex::Alternation(alt) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
126 if alt.is_empty() {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
127 // something that can never match
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
128 out.extend_from_slice(&b" ^"[..])
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
129 } else {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
130 out.extend_from_slice(&b"(?:"[..]);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
131 let mut first = true;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
132 for r in alt {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
133 if first {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
134 first = false
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
135 } else {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
136 out.extend_from_slice(&b"|"[..]);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
137 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
138 r.to_bytes_rec(out)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
139 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
140 out.extend_from_slice(&b")"[..]);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
141 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
142 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
143 PreRegex::Sequence(seq) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
144 for r in seq {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
145 r.to_bytes_rec(out)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
146 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
147 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
148 PreRegex::Maybe(r) => {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
149 out.extend_from_slice(&b"(?:"[..]);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
150 r.to_bytes_rec(out);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
151 out.extend_from_slice(&b")?"[..]);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
152 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
153 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
154 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
155
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
156 pub fn parse(re: &[u8]) -> Result<Self, PatternError> {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
157 let re_str = str::from_utf8(re)
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
158 .map_err(|err| PatternError::UnsupportedSyntax(err.to_string()))?;
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
159 Ok(Self::Re((
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
160 regex_syntax::parse(re_str).map_err(|err| {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
161 PatternError::UnsupportedSyntax(err.to_string())
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
162 })?,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
163 re.to_vec(),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
164 )))
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
165 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
166
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
167 pub fn to_bytes(&self) -> Vec<u8> {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
168 let mut out = vec![];
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
169 self.to_bytes_rec(&mut out);
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
170 out
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
171 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
172
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
173 pub fn literal(prefix: &[u8]) -> PreRegex {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
174 Self::Bytes(prefix.to_vec())
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
175 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
176
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
177 pub fn preceding_dir_components() -> Self {
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
178 Self::Maybe(Box::new(Self::Sequence(vec![
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
179 Self::DotStar,
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
180 Self::Byte(b'/'),
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
181 ])))
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
182 }
1866119cbad7 rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff changeset
183 }