Mercurial > public > mercurial-scm > hg
annotate rust/hg-core/src/pre_regex.rs @ 52556:1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Rework how we convert patterns to regexes in rust.
Instead of going patterns -> string -> Regex, which is slow and causes
some correctness issues, build a structured regex_syntax::hir::Hir value,
which is faster and it also prevents surprising regex escape.
This change makes the time of `build_regex_match` go from ~70-80ms
to ~40ms in my testing (for a large hgignore).
The bug I mentioned involves regex patterns that "escape" their
intended scope. For example, a sequence of hgignore regexp patterns like
this would previously lead to surprising behavior:
foo(?:
bar
baz
)
this matches foobar and foobaz, and doesn't match bar and baz.
The new behavior is to report a pattern parse error
The Python hg also has this bug, so this bugfix
not really helping much, but it's probably better to
fall back to real Python bugs than to simulate them.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Fri, 06 Dec 2024 20:27:59 +0000 |
parents | |
children | b89c934e6269 |
rev | line source |
---|---|
52556
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
1 use core::str; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
2 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
3 use lazy_static::lazy_static; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
4 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
5 use crate::filepatterns::PatternError; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
6 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
7 lazy_static! { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
8 static ref RE_ESCAPE: Vec<Vec<u8>> = { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
9 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect(); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
10 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c"; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
11 for byte in to_escape { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
12 v[*byte as usize].insert(0, b'\\'); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
13 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
14 v |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
15 }; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
16 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
17 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
18 pub fn escape_char_for_re(c: u8) -> &'static [u8] { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
19 &RE_ESCAPE[c as usize] |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
20 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
21 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
22 #[derive(Debug, Clone)] |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
23 pub enum PreRegex { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
24 Empty, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
25 Dot, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
26 DotStar, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
27 Eof, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
28 NonslashStar, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
29 Byte(u8), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
30 Bytes(Vec<u8>), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
31 SlashOrEof, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
32 Re((regex_syntax::hir::Hir, Vec<u8>)), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
33 Maybe(Box<Self>), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
34 Alternation(Vec<Self>), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
35 Sequence(Vec<Self>), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
36 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
37 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
38 mod to_hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
39 use itertools::Itertools; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
40 use regex_syntax::hir::{ |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
41 Class, ClassBytes, ClassBytesRange, Dot, Hir, Look, Repetition, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
42 }; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
43 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
44 use super::PreRegex; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
45 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
46 fn hir_star(hir: Hir) -> Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
47 Hir::repetition(Repetition { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
48 min: 0, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
49 max: None, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
50 greedy: false, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
51 sub: Box::new(hir), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
52 }) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
53 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
54 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
55 fn hir_eof() -> Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
56 Hir::look(Look::End) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
57 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
58 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
59 fn hir_nonslash() -> Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
60 let mut class = |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
61 Class::Bytes(ClassBytes::new([ClassBytesRange::new(b'/', b'/')])); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
62 Class::negate(&mut class); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
63 Hir::class(class) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
64 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
65 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
66 fn hir_byte(b: u8) -> Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
67 let class = |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
68 Class::Bytes(ClassBytes::new([ClassBytesRange::new(b, b)])); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
69 Hir::class(class) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
70 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
71 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
72 fn hir_literal(text: &[u8]) -> Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
73 let b: Box<[u8]> = Box::from(text); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
74 Hir::literal(b) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
75 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
76 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
77 pub(crate) fn to_hir(re: &PreRegex) -> regex_syntax::hir::Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
78 match re { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
79 PreRegex::Empty => Hir::empty(), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
80 PreRegex::Dot => Hir::dot(Dot::AnyByte), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
81 PreRegex::DotStar => hir_star(Hir::dot(Dot::AnyByte)), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
82 PreRegex::Eof => hir_eof(), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
83 PreRegex::NonslashStar => hir_star(hir_nonslash()), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
84 PreRegex::Byte(b) => hir_byte(*b), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
85 PreRegex::Bytes(bs) => hir_literal(bs), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
86 PreRegex::SlashOrEof => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
87 Hir::alternation(vec![hir_byte(b'/'), hir_eof()]) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
88 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
89 PreRegex::Re((hir, _)) => hir.clone(), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
90 PreRegex::Maybe(s) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
91 Hir::alternation(vec![Hir::empty(), s.to_hir()]) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
92 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
93 PreRegex::Alternation(alt) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
94 let alt = alt.iter().map(|r| r.to_hir()).collect_vec(); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
95 Hir::alternation(alt) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
96 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
97 PreRegex::Sequence(seq) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
98 let seq = seq.iter().map(|r| r.to_hir()).collect_vec(); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
99 Hir::concat(seq) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
100 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
101 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
102 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
103 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
104 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
105 impl PreRegex { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
106 pub fn to_hir(&self) -> regex_syntax::hir::Hir { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
107 to_hir::to_hir(self) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
108 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
109 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
110 fn to_bytes_rec(&self, out: &mut Vec<u8>) { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
111 match self { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
112 PreRegex::Empty => (), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
113 PreRegex::Dot => out.push(b'.'), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
114 PreRegex::DotStar => out.extend_from_slice(&b".*"[..]), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
115 PreRegex::Eof => out.push(b'$'), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
116 PreRegex::NonslashStar => out.extend_from_slice(&b"[^/]*"[..]), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
117 PreRegex::Byte(b) => out.extend_from_slice(escape_char_for_re(*b)), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
118 PreRegex::Bytes(bytes) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
119 for b in bytes { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
120 out.extend_from_slice(escape_char_for_re(*b)) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
121 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
122 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
123 PreRegex::SlashOrEof => out.extend_from_slice(&b"(?:/|$)"[..]), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
124 PreRegex::Re((_hir, src)) => out.extend_from_slice(src), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
125 PreRegex::Alternation(alt) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
126 if alt.is_empty() { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
127 // something that can never match |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
128 out.extend_from_slice(&b" ^"[..]) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
129 } else { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
130 out.extend_from_slice(&b"(?:"[..]); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
131 let mut first = true; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
132 for r in alt { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
133 if first { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
134 first = false |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
135 } else { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
136 out.extend_from_slice(&b"|"[..]); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
137 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
138 r.to_bytes_rec(out) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
139 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
140 out.extend_from_slice(&b")"[..]); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
141 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
142 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
143 PreRegex::Sequence(seq) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
144 for r in seq { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
145 r.to_bytes_rec(out) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
146 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
147 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
148 PreRegex::Maybe(r) => { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
149 out.extend_from_slice(&b"(?:"[..]); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
150 r.to_bytes_rec(out); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
151 out.extend_from_slice(&b")?"[..]); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
152 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
153 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
154 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
155 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
156 pub fn parse(re: &[u8]) -> Result<Self, PatternError> { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
157 let re_str = str::from_utf8(re) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
158 .map_err(|err| PatternError::UnsupportedSyntax(err.to_string()))?; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
159 Ok(Self::Re(( |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
160 regex_syntax::parse(re_str).map_err(|err| { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
161 PatternError::UnsupportedSyntax(err.to_string()) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
162 })?, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
163 re.to_vec(), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
164 ))) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
165 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
166 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
167 pub fn to_bytes(&self) -> Vec<u8> { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
168 let mut out = vec![]; |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
169 self.to_bytes_rec(&mut out); |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
170 out |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
171 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
172 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
173 pub fn literal(prefix: &[u8]) -> PreRegex { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
174 Self::Bytes(prefix.to_vec()) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
175 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
176 |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
177 pub fn preceding_dir_components() -> Self { |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
178 Self::Maybe(Box::new(Self::Sequence(vec![ |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
179 Self::DotStar, |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
180 Self::Byte(b'/'), |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
181 ]))) |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
182 } |
1866119cbad7
rust-ignore: construct regex Hir object directly, avoiding large regex string
Arseniy Alekseyev <aalekseyev@janestreet.com>
parents:
diff
changeset
|
183 } |