regex_syntax/hir/
translate.rs

1/*!
2Defines a translator that converts an `Ast` to an `Hir`.
3*/
4
5use core::cell::{Cell, RefCell};
6
7use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
8
9use crate::{
10    ast::{self, Ast, Span, Visitor},
11    either::Either,
12    hir::{self, Error, ErrorKind, Hir, HirKind},
13    unicode::{self, ClassQuery},
14};
15
16type Result<T> = core::result::Result<T, Error>;
17
18/// A builder for constructing an AST->HIR translator.
19#[derive(Clone, Debug)]
20pub struct TranslatorBuilder {
21    utf8: bool,
22    line_terminator: u8,
23    flags: Flags,
24}
25
26impl Default for TranslatorBuilder {
27    fn default() -> TranslatorBuilder {
28        TranslatorBuilder::new()
29    }
30}
31
32impl TranslatorBuilder {
33    /// Create a new translator builder with a default c onfiguration.
34    pub fn new() -> TranslatorBuilder {
35        TranslatorBuilder {
36            utf8: true,
37            line_terminator: b'\n',
38            flags: Flags::default(),
39        }
40    }
41
42    /// Build a translator using the current configuration.
43    pub fn build(&self) -> Translator {
44        Translator {
45            stack: RefCell::new(vec![]),
46            flags: Cell::new(self.flags),
47            utf8: self.utf8,
48            line_terminator: self.line_terminator,
49        }
50    }
51
52    /// When disabled, translation will permit the construction of a regular
53    /// expression that may match invalid UTF-8.
54    ///
55    /// When enabled (the default), the translator is guaranteed to produce an
56    /// expression that, for non-empty matches, will only ever produce spans
57    /// that are entirely valid UTF-8 (otherwise, the translator will return an
58    /// error).
59    ///
60    /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
61    /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
62    /// syntax) will be allowed even though they can produce matches that split
63    /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
64    /// matches, and it is expected that the regex engine itself must handle
65    /// these cases if necessary (perhaps by suppressing any zero-width matches
66    /// that split a codepoint).
67    pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
68        self.utf8 = yes;
69        self
70    }
71
72    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
73    ///
74    /// Namely, instead of `.` (by default) matching everything except for `\n`,
75    /// this will cause `.` to match everything except for the byte given.
76    ///
77    /// If `.` is used in a context where Unicode mode is enabled and this byte
78    /// isn't ASCII, then an error will be returned. When Unicode mode is
79    /// disabled, then any byte is permitted, but will return an error if UTF-8
80    /// mode is enabled and it is a non-ASCII byte.
81    ///
82    /// In short, any ASCII value for a line terminator is always okay. But a
83    /// non-ASCII byte might result in an error depending on whether Unicode
84    /// mode or UTF-8 mode are enabled.
85    ///
86    /// Note that if `R` mode is enabled then it always takes precedence and
87    /// the line terminator will be treated as `\r` and `\n` simultaneously.
88    ///
89    /// Note also that this *doesn't* impact the look-around assertions
90    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
91    /// configuration in the regex engine itself.
92    pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
93        self.line_terminator = byte;
94        self
95    }
96
97    /// Enable or disable the case insensitive flag (`i`) by default.
98    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
99        self.flags.case_insensitive = if yes { Some(true) } else { None };
100        self
101    }
102
103    /// Enable or disable the multi-line matching flag (`m`) by default.
104    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
105        self.flags.multi_line = if yes { Some(true) } else { None };
106        self
107    }
108
109    /// Enable or disable the "dot matches any character" flag (`s`) by
110    /// default.
111    pub fn dot_matches_new_line(
112        &mut self,
113        yes: bool,
114    ) -> &mut TranslatorBuilder {
115        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
116        self
117    }
118
119    /// Enable or disable the CRLF mode flag (`R`) by default.
120    pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
121        self.flags.crlf = if yes { Some(true) } else { None };
122        self
123    }
124
125    /// Enable or disable the "swap greed" flag (`U`) by default.
126    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
127        self.flags.swap_greed = if yes { Some(true) } else { None };
128        self
129    }
130
131    /// Enable or disable the Unicode flag (`u`) by default.
132    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
133        self.flags.unicode = if yes { None } else { Some(false) };
134        self
135    }
136}
137
138/// A translator maps abstract syntax to a high level intermediate
139/// representation.
140///
141/// A translator may be benefit from reuse. That is, a translator can translate
142/// many abstract syntax trees.
143///
144/// A `Translator` can be configured in more detail via a
145/// [`TranslatorBuilder`].
146#[derive(Clone, Debug)]
147pub struct Translator {
148    /// Our call stack, but on the heap.
149    stack: RefCell<Vec<HirFrame>>,
150    /// The current flag settings.
151    flags: Cell<Flags>,
152    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
153    utf8: bool,
154    /// The line terminator to use for `.`.
155    line_terminator: u8,
156}
157
158impl Translator {
159    /// Create a new translator using the default configuration.
160    pub fn new() -> Translator {
161        TranslatorBuilder::new().build()
162    }
163
164    /// Translate the given abstract syntax tree (AST) into a high level
165    /// intermediate representation (HIR).
166    ///
167    /// If there was a problem doing the translation, then an HIR-specific
168    /// error is returned.
169    ///
170    /// The original pattern string used to produce the `Ast` *must* also be
171    /// provided. The translator does not use the pattern string during any
172    /// correct translation, but is used for error reporting.
173    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
174        ast::visit(ast, TranslatorI::new(self, pattern))
175    }
176}
177
178/// An HirFrame is a single stack frame, represented explicitly, which is
179/// created for each item in the Ast that we traverse.
180///
181/// Note that technically, this type doesn't represent our entire stack
182/// frame. In particular, the Ast visitor represents any state associated with
183/// traversing the Ast itself.
184#[derive(Clone, Debug)]
185enum HirFrame {
186    /// An arbitrary HIR expression. These get pushed whenever we hit a base
187    /// case in the Ast. They get popped after an inductive (i.e., recursive)
188    /// step is complete.
189    Expr(Hir),
190    /// A literal that is being constructed, character by character, from the
191    /// AST. We need this because the AST gives each individual character its
192    /// own node. So as we see characters, we peek at the top-most HirFrame.
193    /// If it's a literal, then we add to it. Otherwise, we push a new literal.
194    /// When it comes time to pop it, we convert it to an Hir via Hir::literal.
195    Literal(Vec<u8>),
196    /// A Unicode character class. This frame is mutated as we descend into
197    /// the Ast of a character class (which is itself its own mini recursive
198    /// structure).
199    ClassUnicode(hir::ClassUnicode),
200    /// A byte-oriented character class. This frame is mutated as we descend
201    /// into the Ast of a character class (which is itself its own mini
202    /// recursive structure).
203    ///
204    /// Byte character classes are created when Unicode mode (`u`) is disabled.
205    /// If `utf8` is enabled (the default), then a byte character is only
206    /// permitted to match ASCII text.
207    ClassBytes(hir::ClassBytes),
208    /// This is pushed whenever a repetition is observed. After visiting every
209    /// sub-expression in the repetition, the translator's stack is expected to
210    /// have this sentinel at the top.
211    ///
212    /// This sentinel only exists to stop other things (like flattening
213    /// literals) from reaching across repetition operators.
214    Repetition,
215    /// This is pushed on to the stack upon first seeing any kind of capture,
216    /// indicated by parentheses (including non-capturing groups). It is popped
217    /// upon leaving a group.
218    Group {
219        /// The old active flags when this group was opened.
220        ///
221        /// If this group sets flags, then the new active flags are set to the
222        /// result of merging the old flags with the flags introduced by this
223        /// group. If the group doesn't set any flags, then this is simply
224        /// equivalent to whatever flags were set when the group was opened.
225        ///
226        /// When this group is popped, the active flags should be restored to
227        /// the flags set here.
228        ///
229        /// The "active" flags correspond to whatever flags are set in the
230        /// Translator.
231        old_flags: Flags,
232    },
233    /// This is pushed whenever a concatenation is observed. After visiting
234    /// every sub-expression in the concatenation, the translator's stack is
235    /// popped until it sees a Concat frame.
236    Concat,
237    /// This is pushed whenever an alternation is observed. After visiting
238    /// every sub-expression in the alternation, the translator's stack is
239    /// popped until it sees an Alternation frame.
240    Alternation,
241    /// This is pushed immediately before each sub-expression in an
242    /// alternation. This separates the branches of an alternation on the
243    /// stack and prevents literal flattening from reaching across alternation
244    /// branches.
245    ///
246    /// It is popped after each expression in a branch until an 'Alternation'
247    /// frame is observed when doing a post visit on an alternation.
248    AlternationBranch,
249}
250
251impl HirFrame {
252    /// Assert that the current stack frame is an Hir expression and return it.
253    fn unwrap_expr(self) -> Hir {
254        match self {
255            HirFrame::Expr(expr) => expr,
256            HirFrame::Literal(lit) => Hir::literal(lit),
257            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
258        }
259    }
260
261    /// Assert that the current stack frame is a Unicode class expression and
262    /// return it.
263    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
264        match self {
265            HirFrame::ClassUnicode(cls) => cls,
266            _ => panic!(
267                "tried to unwrap Unicode class \
268                 from HirFrame, got: {:?}",
269                self
270            ),
271        }
272    }
273
274    /// Assert that the current stack frame is a byte class expression and
275    /// return it.
276    fn unwrap_class_bytes(self) -> hir::ClassBytes {
277        match self {
278            HirFrame::ClassBytes(cls) => cls,
279            _ => panic!(
280                "tried to unwrap byte class \
281                 from HirFrame, got: {:?}",
282                self
283            ),
284        }
285    }
286
287    /// Assert that the current stack frame is a repetition sentinel. If it
288    /// isn't, then panic.
289    fn unwrap_repetition(self) {
290        match self {
291            HirFrame::Repetition => {}
292            _ => {
293                panic!(
294                    "tried to unwrap repetition from HirFrame, got: {:?}",
295                    self
296                )
297            }
298        }
299    }
300
301    /// Assert that the current stack frame is a group indicator and return
302    /// its corresponding flags (the flags that were active at the time the
303    /// group was entered).
304    fn unwrap_group(self) -> Flags {
305        match self {
306            HirFrame::Group { old_flags } => old_flags,
307            _ => {
308                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
309            }
310        }
311    }
312
313    /// Assert that the current stack frame is an alternation pipe sentinel. If
314    /// it isn't, then panic.
315    fn unwrap_alternation_pipe(self) {
316        match self {
317            HirFrame::AlternationBranch => {}
318            _ => {
319                panic!(
320                    "tried to unwrap alt pipe from HirFrame, got: {:?}",
321                    self
322                )
323            }
324        }
325    }
326}
327
328impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
329    type Output = Hir;
330    type Err = Error;
331
332    fn finish(self) -> Result<Hir> {
333        // ... otherwise, we should have exactly one HIR on the stack.
334        assert_eq!(self.trans().stack.borrow().len(), 1);
335        Ok(self.pop().unwrap().unwrap_expr())
336    }
337
338    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
339        match *ast {
340            Ast::ClassBracketed(_) => {
341                if self.flags().unicode() {
342                    let cls = hir::ClassUnicode::empty();
343                    self.push(HirFrame::ClassUnicode(cls));
344                } else {
345                    let cls = hir::ClassBytes::empty();
346                    self.push(HirFrame::ClassBytes(cls));
347                }
348            }
349            Ast::Repetition(_) => self.push(HirFrame::Repetition),
350            Ast::Group(ref x) => {
351                let old_flags = x
352                    .flags()
353                    .map(|ast| self.set_flags(ast))
354                    .unwrap_or_else(|| self.flags());
355                self.push(HirFrame::Group { old_flags });
356            }
357            Ast::Concat(_) => {
358                self.push(HirFrame::Concat);
359            }
360            Ast::Alternation(ref x) => {
361                self.push(HirFrame::Alternation);
362                if !x.asts.is_empty() {
363                    self.push(HirFrame::AlternationBranch);
364                }
365            }
366            _ => {}
367        }
368        Ok(())
369    }
370
371    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
372        match *ast {
373            Ast::Empty(_) => {
374                self.push(HirFrame::Expr(Hir::empty()));
375            }
376            Ast::Flags(ref x) => {
377                self.set_flags(&x.flags);
378                // Flags in the AST are generally considered directives and
379                // not actual sub-expressions. However, they can be used in
380                // the concrete syntax like `((?i))`, and we need some kind of
381                // indication of an expression there, and Empty is the correct
382                // choice.
383                //
384                // There can also be things like `(?i)+`, but we rule those out
385                // in the parser. In the future, we might allow them for
386                // consistency sake.
387                self.push(HirFrame::Expr(Hir::empty()));
388            }
389            Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
390                Either::Right(byte) => self.push_byte(byte),
391                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
392                    None => self.push_char(ch),
393                    Some(expr) => self.push(HirFrame::Expr(expr)),
394                },
395            },
396            Ast::Dot(ref span) => {
397                self.push(HirFrame::Expr(self.hir_dot(**span)?));
398            }
399            Ast::Assertion(ref x) => {
400                self.push(HirFrame::Expr(self.hir_assertion(x)?));
401            }
402            Ast::ClassPerl(ref x) => {
403                if self.flags().unicode() {
404                    let cls = self.hir_perl_unicode_class(x)?;
405                    let hcls = hir::Class::Unicode(cls);
406                    self.push(HirFrame::Expr(Hir::class(hcls)));
407                } else {
408                    let cls = self.hir_perl_byte_class(x)?;
409                    let hcls = hir::Class::Bytes(cls);
410                    self.push(HirFrame::Expr(Hir::class(hcls)));
411                }
412            }
413            Ast::ClassUnicode(ref x) => {
414                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
415                self.push(HirFrame::Expr(Hir::class(cls)));
416            }
417            Ast::ClassBracketed(ref ast) => {
418                if self.flags().unicode() {
419                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
420                    self.unicode_fold_and_negate(
421                        &ast.span,
422                        ast.negated,
423                        &mut cls,
424                    )?;
425                    let expr = Hir::class(hir::Class::Unicode(cls));
426                    self.push(HirFrame::Expr(expr));
427                } else {
428                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
429                    self.bytes_fold_and_negate(
430                        &ast.span,
431                        ast.negated,
432                        &mut cls,
433                    )?;
434                    let expr = Hir::class(hir::Class::Bytes(cls));
435                    self.push(HirFrame::Expr(expr));
436                }
437            }
438            Ast::Repetition(ref x) => {
439                let expr = self.pop().unwrap().unwrap_expr();
440                self.pop().unwrap().unwrap_repetition();
441                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
442            }
443            Ast::Group(ref x) => {
444                let expr = self.pop().unwrap().unwrap_expr();
445                let old_flags = self.pop().unwrap().unwrap_group();
446                self.trans().flags.set(old_flags);
447                self.push(HirFrame::Expr(self.hir_capture(x, expr)));
448            }
449            Ast::Concat(_) => {
450                let mut exprs = vec![];
451                while let Some(expr) = self.pop_concat_expr() {
452                    if !matches!(*expr.kind(), HirKind::Empty) {
453                        exprs.push(expr);
454                    }
455                }
456                exprs.reverse();
457                self.push(HirFrame::Expr(Hir::concat(exprs)));
458            }
459            Ast::Alternation(_) => {
460                let mut exprs = vec![];
461                while let Some(expr) = self.pop_alt_expr() {
462                    self.pop().unwrap().unwrap_alternation_pipe();
463                    exprs.push(expr);
464                }
465                exprs.reverse();
466                self.push(HirFrame::Expr(Hir::alternation(exprs)));
467            }
468        }
469        Ok(())
470    }
471
472    fn visit_alternation_in(&mut self) -> Result<()> {
473        self.push(HirFrame::AlternationBranch);
474        Ok(())
475    }
476
477    fn visit_class_set_item_pre(
478        &mut self,
479        ast: &ast::ClassSetItem,
480    ) -> Result<()> {
481        match *ast {
482            ast::ClassSetItem::Bracketed(_) => {
483                if self.flags().unicode() {
484                    let cls = hir::ClassUnicode::empty();
485                    self.push(HirFrame::ClassUnicode(cls));
486                } else {
487                    let cls = hir::ClassBytes::empty();
488                    self.push(HirFrame::ClassBytes(cls));
489                }
490            }
491            // We needn't handle the Union case here since the visitor will
492            // do it for us.
493            _ => {}
494        }
495        Ok(())
496    }
497
498    fn visit_class_set_item_post(
499        &mut self,
500        ast: &ast::ClassSetItem,
501    ) -> Result<()> {
502        match *ast {
503            ast::ClassSetItem::Empty(_) => {}
504            ast::ClassSetItem::Literal(ref x) => {
505                if self.flags().unicode() {
506                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
507                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
508                    self.push(HirFrame::ClassUnicode(cls));
509                } else {
510                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
511                    let byte = self.class_literal_byte(x)?;
512                    cls.push(hir::ClassBytesRange::new(byte, byte));
513                    self.push(HirFrame::ClassBytes(cls));
514                }
515            }
516            ast::ClassSetItem::Range(ref x) => {
517                if self.flags().unicode() {
518                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
519                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
520                    self.push(HirFrame::ClassUnicode(cls));
521                } else {
522                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
523                    let start = self.class_literal_byte(&x.start)?;
524                    let end = self.class_literal_byte(&x.end)?;
525                    cls.push(hir::ClassBytesRange::new(start, end));
526                    self.push(HirFrame::ClassBytes(cls));
527                }
528            }
529            ast::ClassSetItem::Ascii(ref x) => {
530                if self.flags().unicode() {
531                    let xcls = self.hir_ascii_unicode_class(x)?;
532                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
533                    cls.union(&xcls);
534                    self.push(HirFrame::ClassUnicode(cls));
535                } else {
536                    let xcls = self.hir_ascii_byte_class(x)?;
537                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
538                    cls.union(&xcls);
539                    self.push(HirFrame::ClassBytes(cls));
540                }
541            }
542            ast::ClassSetItem::Unicode(ref x) => {
543                let xcls = self.hir_unicode_class(x)?;
544                let mut cls = self.pop().unwrap().unwrap_class_unicode();
545                cls.union(&xcls);
546                self.push(HirFrame::ClassUnicode(cls));
547            }
548            ast::ClassSetItem::Perl(ref x) => {
549                if self.flags().unicode() {
550                    let xcls = self.hir_perl_unicode_class(x)?;
551                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
552                    cls.union(&xcls);
553                    self.push(HirFrame::ClassUnicode(cls));
554                } else {
555                    let xcls = self.hir_perl_byte_class(x)?;
556                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
557                    cls.union(&xcls);
558                    self.push(HirFrame::ClassBytes(cls));
559                }
560            }
561            ast::ClassSetItem::Bracketed(ref ast) => {
562                if self.flags().unicode() {
563                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
564                    self.unicode_fold_and_negate(
565                        &ast.span,
566                        ast.negated,
567                        &mut cls1,
568                    )?;
569
570                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
571                    cls2.union(&cls1);
572                    self.push(HirFrame::ClassUnicode(cls2));
573                } else {
574                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
575                    self.bytes_fold_and_negate(
576                        &ast.span,
577                        ast.negated,
578                        &mut cls1,
579                    )?;
580
581                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
582                    cls2.union(&cls1);
583                    self.push(HirFrame::ClassBytes(cls2));
584                }
585            }
586            // This is handled automatically by the visitor.
587            ast::ClassSetItem::Union(_) => {}
588        }
589        Ok(())
590    }
591
592    fn visit_class_set_binary_op_pre(
593        &mut self,
594        _op: &ast::ClassSetBinaryOp,
595    ) -> Result<()> {
596        if self.flags().unicode() {
597            let cls = hir::ClassUnicode::empty();
598            self.push(HirFrame::ClassUnicode(cls));
599        } else {
600            let cls = hir::ClassBytes::empty();
601            self.push(HirFrame::ClassBytes(cls));
602        }
603        Ok(())
604    }
605
606    fn visit_class_set_binary_op_in(
607        &mut self,
608        _op: &ast::ClassSetBinaryOp,
609    ) -> Result<()> {
610        if self.flags().unicode() {
611            let cls = hir::ClassUnicode::empty();
612            self.push(HirFrame::ClassUnicode(cls));
613        } else {
614            let cls = hir::ClassBytes::empty();
615            self.push(HirFrame::ClassBytes(cls));
616        }
617        Ok(())
618    }
619
620    fn visit_class_set_binary_op_post(
621        &mut self,
622        op: &ast::ClassSetBinaryOp,
623    ) -> Result<()> {
624        use crate::ast::ClassSetBinaryOpKind::*;
625
626        if self.flags().unicode() {
627            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
628            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
629            let mut cls = self.pop().unwrap().unwrap_class_unicode();
630            if self.flags().case_insensitive() {
631                rhs.try_case_fold_simple().map_err(|_| {
632                    self.error(
633                        op.rhs.span().clone(),
634                        ErrorKind::UnicodeCaseUnavailable,
635                    )
636                })?;
637                lhs.try_case_fold_simple().map_err(|_| {
638                    self.error(
639                        op.lhs.span().clone(),
640                        ErrorKind::UnicodeCaseUnavailable,
641                    )
642                })?;
643            }
644            match op.kind {
645                Intersection => lhs.intersect(&rhs),
646                Difference => lhs.difference(&rhs),
647                SymmetricDifference => lhs.symmetric_difference(&rhs),
648            }
649            cls.union(&lhs);
650            self.push(HirFrame::ClassUnicode(cls));
651        } else {
652            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
653            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
654            let mut cls = self.pop().unwrap().unwrap_class_bytes();
655            if self.flags().case_insensitive() {
656                rhs.case_fold_simple();
657                lhs.case_fold_simple();
658            }
659            match op.kind {
660                Intersection => lhs.intersect(&rhs),
661                Difference => lhs.difference(&rhs),
662                SymmetricDifference => lhs.symmetric_difference(&rhs),
663            }
664            cls.union(&lhs);
665            self.push(HirFrame::ClassBytes(cls));
666        }
667        Ok(())
668    }
669}
670
671/// The internal implementation of a translator.
672///
673/// This type is responsible for carrying around the original pattern string,
674/// which is not tied to the internal state of a translator.
675///
676/// A TranslatorI exists for the time it takes to translate a single Ast.
677#[derive(Clone, Debug)]
678struct TranslatorI<'t, 'p> {
679    trans: &'t Translator,
680    pattern: &'p str,
681}
682
683impl<'t, 'p> TranslatorI<'t, 'p> {
684    /// Build a new internal translator.
685    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
686        TranslatorI { trans, pattern }
687    }
688
689    /// Return a reference to the underlying translator.
690    fn trans(&self) -> &Translator {
691        &self.trans
692    }
693
694    /// Push the given frame on to the call stack.
695    fn push(&self, frame: HirFrame) {
696        self.trans().stack.borrow_mut().push(frame);
697    }
698
699    /// Push the given literal char on to the call stack.
700    ///
701    /// If the top-most element of the stack is a literal, then the char
702    /// is appended to the end of that literal. Otherwise, a new literal
703    /// containing just the given char is pushed to the top of the stack.
704    fn push_char(&self, ch: char) {
705        let mut buf = [0; 4];
706        let bytes = ch.encode_utf8(&mut buf).as_bytes();
707        let mut stack = self.trans().stack.borrow_mut();
708        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
709            literal.extend_from_slice(bytes);
710        } else {
711            stack.push(HirFrame::Literal(bytes.to_vec()));
712        }
713    }
714
715    /// Push the given literal byte on to the call stack.
716    ///
717    /// If the top-most element of the stack is a literal, then the byte
718    /// is appended to the end of that literal. Otherwise, a new literal
719    /// containing just the given byte is pushed to the top of the stack.
720    fn push_byte(&self, byte: u8) {
721        let mut stack = self.trans().stack.borrow_mut();
722        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
723            literal.push(byte);
724        } else {
725            stack.push(HirFrame::Literal(vec![byte]));
726        }
727    }
728
729    /// Pop the top of the call stack. If the call stack is empty, return None.
730    fn pop(&self) -> Option<HirFrame> {
731        self.trans().stack.borrow_mut().pop()
732    }
733
734    /// Pop an HIR expression from the top of the stack for a concatenation.
735    ///
736    /// This returns None if the stack is empty or when a concat frame is seen.
737    /// Otherwise, it panics if it could not find an HIR expression.
738    fn pop_concat_expr(&self) -> Option<Hir> {
739        let frame = self.pop()?;
740        match frame {
741            HirFrame::Concat => None,
742            HirFrame::Expr(expr) => Some(expr),
743            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
744            HirFrame::ClassUnicode(_) => {
745                unreachable!("expected expr or concat, got Unicode class")
746            }
747            HirFrame::ClassBytes(_) => {
748                unreachable!("expected expr or concat, got byte class")
749            }
750            HirFrame::Repetition => {
751                unreachable!("expected expr or concat, got repetition")
752            }
753            HirFrame::Group { .. } => {
754                unreachable!("expected expr or concat, got group")
755            }
756            HirFrame::Alternation => {
757                unreachable!("expected expr or concat, got alt marker")
758            }
759            HirFrame::AlternationBranch => {
760                unreachable!("expected expr or concat, got alt branch marker")
761            }
762        }
763    }
764
765    /// Pop an HIR expression from the top of the stack for an alternation.
766    ///
767    /// This returns None if the stack is empty or when an alternation frame is
768    /// seen. Otherwise, it panics if it could not find an HIR expression.
769    fn pop_alt_expr(&self) -> Option<Hir> {
770        let frame = self.pop()?;
771        match frame {
772            HirFrame::Alternation => None,
773            HirFrame::Expr(expr) => Some(expr),
774            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
775            HirFrame::ClassUnicode(_) => {
776                unreachable!("expected expr or alt, got Unicode class")
777            }
778            HirFrame::ClassBytes(_) => {
779                unreachable!("expected expr or alt, got byte class")
780            }
781            HirFrame::Repetition => {
782                unreachable!("expected expr or alt, got repetition")
783            }
784            HirFrame::Group { .. } => {
785                unreachable!("expected expr or alt, got group")
786            }
787            HirFrame::Concat => {
788                unreachable!("expected expr or alt, got concat marker")
789            }
790            HirFrame::AlternationBranch => {
791                unreachable!("expected expr or alt, got alt branch marker")
792            }
793        }
794    }
795
796    /// Create a new error with the given span and error type.
797    fn error(&self, span: Span, kind: ErrorKind) -> Error {
798        Error { kind, pattern: self.pattern.to_string(), span }
799    }
800
801    /// Return a copy of the active flags.
802    fn flags(&self) -> Flags {
803        self.trans().flags.get()
804    }
805
806    /// Set the flags of this translator from the flags set in the given AST.
807    /// Then, return the old flags.
808    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
809        let old_flags = self.flags();
810        let mut new_flags = Flags::from_ast(ast_flags);
811        new_flags.merge(&old_flags);
812        self.trans().flags.set(new_flags);
813        old_flags
814    }
815
816    /// Convert an Ast literal to its scalar representation.
817    ///
818    /// When Unicode mode is enabled, then this always succeeds and returns a
819    /// `char` (Unicode scalar value).
820    ///
821    /// When Unicode mode is disabled, then a `char` will still be returned
822    /// whenever possible. A byte is returned only when invalid UTF-8 is
823    /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
824    /// will result in an error when invalid UTF-8 is not allowed.
825    fn ast_literal_to_scalar(
826        &self,
827        lit: &ast::Literal,
828    ) -> Result<Either<char, u8>> {
829        if self.flags().unicode() {
830            return Ok(Either::Left(lit.c));
831        }
832        let byte = match lit.byte() {
833            None => return Ok(Either::Left(lit.c)),
834            Some(byte) => byte,
835        };
836        if byte <= 0x7F {
837            return Ok(Either::Left(char::try_from(byte).unwrap()));
838        }
839        if self.trans().utf8 {
840            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
841        }
842        Ok(Either::Right(byte))
843    }
844
845    fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
846        if !self.flags().case_insensitive() {
847            return Ok(None);
848        }
849        if self.flags().unicode() {
850            // If case folding won't do anything, then don't bother trying.
851            let map = unicode::SimpleCaseFolder::new()
852                .map(|f| f.overlaps(c, c))
853                .map_err(|_| {
854                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
855                })?;
856            if !map {
857                return Ok(None);
858            }
859            let mut cls =
860                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
861                    c, c,
862                )]);
863            cls.try_case_fold_simple().map_err(|_| {
864                self.error(span, ErrorKind::UnicodeCaseUnavailable)
865            })?;
866            Ok(Some(Hir::class(hir::Class::Unicode(cls))))
867        } else {
868            if !c.is_ascii() {
869                return Ok(None);
870            }
871            // If case folding won't do anything, then don't bother trying.
872            match c {
873                'A'..='Z' | 'a'..='z' => {}
874                _ => return Ok(None),
875            }
876            let mut cls =
877                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
878                    // OK because 'c.len_utf8() == 1' which in turn implies
879                    // that 'c' is ASCII.
880                    u8::try_from(c).unwrap(),
881                    u8::try_from(c).unwrap(),
882                )]);
883            cls.case_fold_simple();
884            Ok(Some(Hir::class(hir::Class::Bytes(cls))))
885        }
886    }
887
888    fn hir_dot(&self, span: Span) -> Result<Hir> {
889        let (utf8, lineterm, flags) =
890            (self.trans().utf8, self.trans().line_terminator, self.flags());
891        if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
892            return Err(self.error(span, ErrorKind::InvalidUtf8));
893        }
894        let dot = if flags.dot_matches_new_line() {
895            if flags.unicode() {
896                hir::Dot::AnyChar
897            } else {
898                hir::Dot::AnyByte
899            }
900        } else {
901            if flags.unicode() {
902                if flags.crlf() {
903                    hir::Dot::AnyCharExceptCRLF
904                } else {
905                    if !lineterm.is_ascii() {
906                        return Err(
907                            self.error(span, ErrorKind::InvalidLineTerminator)
908                        );
909                    }
910                    hir::Dot::AnyCharExcept(char::from(lineterm))
911                }
912            } else {
913                if flags.crlf() {
914                    hir::Dot::AnyByteExceptCRLF
915                } else {
916                    hir::Dot::AnyByteExcept(lineterm)
917                }
918            }
919        };
920        Ok(Hir::dot(dot))
921    }
922
923    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
924        let unicode = self.flags().unicode();
925        let multi_line = self.flags().multi_line();
926        let crlf = self.flags().crlf();
927        Ok(match asst.kind {
928            ast::AssertionKind::StartLine => Hir::look(if multi_line {
929                if crlf {
930                    hir::Look::StartCRLF
931                } else {
932                    hir::Look::StartLF
933                }
934            } else {
935                hir::Look::Start
936            }),
937            ast::AssertionKind::EndLine => Hir::look(if multi_line {
938                if crlf {
939                    hir::Look::EndCRLF
940                } else {
941                    hir::Look::EndLF
942                }
943            } else {
944                hir::Look::End
945            }),
946            ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
947            ast::AssertionKind::EndText => Hir::look(hir::Look::End),
948            ast::AssertionKind::WordBoundary => Hir::look(if unicode {
949                hir::Look::WordUnicode
950            } else {
951                hir::Look::WordAscii
952            }),
953            ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
954                hir::Look::WordUnicodeNegate
955            } else {
956                hir::Look::WordAsciiNegate
957            }),
958            ast::AssertionKind::WordBoundaryStart
959            | ast::AssertionKind::WordBoundaryStartAngle => {
960                Hir::look(if unicode {
961                    hir::Look::WordStartUnicode
962                } else {
963                    hir::Look::WordStartAscii
964                })
965            }
966            ast::AssertionKind::WordBoundaryEnd
967            | ast::AssertionKind::WordBoundaryEndAngle => {
968                Hir::look(if unicode {
969                    hir::Look::WordEndUnicode
970                } else {
971                    hir::Look::WordEndAscii
972                })
973            }
974            ast::AssertionKind::WordBoundaryStartHalf => {
975                Hir::look(if unicode {
976                    hir::Look::WordStartHalfUnicode
977                } else {
978                    hir::Look::WordStartHalfAscii
979                })
980            }
981            ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
982                hir::Look::WordEndHalfUnicode
983            } else {
984                hir::Look::WordEndHalfAscii
985            }),
986        })
987    }
988
989    fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
990        let (index, name) = match group.kind {
991            ast::GroupKind::CaptureIndex(index) => (index, None),
992            ast::GroupKind::CaptureName { ref name, .. } => {
993                (name.index, Some(name.name.clone().into_boxed_str()))
994            }
995            // The HIR doesn't need to use non-capturing groups, since the way
996            // in which the data type is defined handles this automatically.
997            ast::GroupKind::NonCapturing(_) => return expr,
998        };
999        Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
1000    }
1001
1002    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
1003        let (min, max) = match rep.op.kind {
1004            ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
1005            ast::RepetitionKind::ZeroOrMore => (0, None),
1006            ast::RepetitionKind::OneOrMore => (1, None),
1007            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
1008                (m, Some(m))
1009            }
1010            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
1011                (m, None)
1012            }
1013            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
1014                m,
1015                n,
1016            )) => (m, Some(n)),
1017        };
1018        let greedy =
1019            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
1020        Hir::repetition(hir::Repetition {
1021            min,
1022            max,
1023            greedy,
1024            sub: Box::new(expr),
1025        })
1026    }
1027
1028    fn hir_unicode_class(
1029        &self,
1030        ast_class: &ast::ClassUnicode,
1031    ) -> Result<hir::ClassUnicode> {
1032        use crate::ast::ClassUnicodeKind::*;
1033
1034        if !self.flags().unicode() {
1035            return Err(
1036                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
1037            );
1038        }
1039        let query = match ast_class.kind {
1040            OneLetter(name) => ClassQuery::OneLetter(name),
1041            Named(ref name) => ClassQuery::Binary(name),
1042            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
1043                property_name: name,
1044                property_value: value,
1045            },
1046        };
1047        let mut result = self.convert_unicode_class_error(
1048            &ast_class.span,
1049            unicode::class(query),
1050        );
1051        if let Ok(ref mut class) = result {
1052            self.unicode_fold_and_negate(
1053                &ast_class.span,
1054                ast_class.negated,
1055                class,
1056            )?;
1057        }
1058        result
1059    }
1060
1061    fn hir_ascii_unicode_class(
1062        &self,
1063        ast: &ast::ClassAscii,
1064    ) -> Result<hir::ClassUnicode> {
1065        let mut cls = hir::ClassUnicode::new(
1066            ascii_class_as_chars(&ast.kind)
1067                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
1068        );
1069        self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1070        Ok(cls)
1071    }
1072
1073    fn hir_ascii_byte_class(
1074        &self,
1075        ast: &ast::ClassAscii,
1076    ) -> Result<hir::ClassBytes> {
1077        let mut cls = hir::ClassBytes::new(
1078            ascii_class(&ast.kind)
1079                .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
1080        );
1081        self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1082        Ok(cls)
1083    }
1084
1085    fn hir_perl_unicode_class(
1086        &self,
1087        ast_class: &ast::ClassPerl,
1088    ) -> Result<hir::ClassUnicode> {
1089        use crate::ast::ClassPerlKind::*;
1090
1091        assert!(self.flags().unicode());
1092        let result = match ast_class.kind {
1093            Digit => unicode::perl_digit(),
1094            Space => unicode::perl_space(),
1095            Word => unicode::perl_word(),
1096        };
1097        let mut class =
1098            self.convert_unicode_class_error(&ast_class.span, result)?;
1099        // We needn't apply case folding here because the Perl Unicode classes
1100        // are already closed under Unicode simple case folding.
1101        if ast_class.negated {
1102            class.negate();
1103        }
1104        Ok(class)
1105    }
1106
1107    fn hir_perl_byte_class(
1108        &self,
1109        ast_class: &ast::ClassPerl,
1110    ) -> Result<hir::ClassBytes> {
1111        use crate::ast::ClassPerlKind::*;
1112
1113        assert!(!self.flags().unicode());
1114        let mut class = match ast_class.kind {
1115            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
1116            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
1117            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
1118        };
1119        // We needn't apply case folding here because the Perl ASCII classes
1120        // are already closed (under ASCII case folding).
1121        if ast_class.negated {
1122            class.negate();
1123        }
1124        // Negating a Perl byte class is likely to cause it to match invalid
1125        // UTF-8. That's only OK if the translator is configured to allow such
1126        // things.
1127        if self.trans().utf8 && !class.is_ascii() {
1128            return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
1129        }
1130        Ok(class)
1131    }
1132
1133    /// Converts the given Unicode specific error to an HIR translation error.
1134    ///
1135    /// The span given should approximate the position at which an error would
1136    /// occur.
1137    fn convert_unicode_class_error(
1138        &self,
1139        span: &Span,
1140        result: core::result::Result<hir::ClassUnicode, unicode::Error>,
1141    ) -> Result<hir::ClassUnicode> {
1142        result.map_err(|err| {
1143            let sp = span.clone();
1144            match err {
1145                unicode::Error::PropertyNotFound => {
1146                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
1147                }
1148                unicode::Error::PropertyValueNotFound => {
1149                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
1150                }
1151                unicode::Error::PerlClassNotFound => {
1152                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
1153                }
1154            }
1155        })
1156    }
1157
1158    fn unicode_fold_and_negate(
1159        &self,
1160        span: &Span,
1161        negated: bool,
1162        class: &mut hir::ClassUnicode,
1163    ) -> Result<()> {
1164        // Note that we must apply case folding before negation!
1165        // Consider `(?i)[^x]`. If we applied negation first, then
1166        // the result would be the character class that matched any
1167        // Unicode scalar value.
1168        if self.flags().case_insensitive() {
1169            class.try_case_fold_simple().map_err(|_| {
1170                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
1171            })?;
1172        }
1173        if negated {
1174            class.negate();
1175        }
1176        Ok(())
1177    }
1178
1179    fn bytes_fold_and_negate(
1180        &self,
1181        span: &Span,
1182        negated: bool,
1183        class: &mut hir::ClassBytes,
1184    ) -> Result<()> {
1185        // Note that we must apply case folding before negation!
1186        // Consider `(?i)[^x]`. If we applied negation first, then
1187        // the result would be the character class that matched any
1188        // Unicode scalar value.
1189        if self.flags().case_insensitive() {
1190            class.case_fold_simple();
1191        }
1192        if negated {
1193            class.negate();
1194        }
1195        if self.trans().utf8 && !class.is_ascii() {
1196            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
1197        }
1198        Ok(())
1199    }
1200
1201    /// Return a scalar byte value suitable for use as a literal in a byte
1202    /// character class.
1203    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
1204        match self.ast_literal_to_scalar(ast)? {
1205            Either::Right(byte) => Ok(byte),
1206            Either::Left(ch) => {
1207                if ch.is_ascii() {
1208                    Ok(u8::try_from(ch).unwrap())
1209                } else {
1210                    // We can't feasibly support Unicode in
1211                    // byte oriented classes. Byte classes don't
1212                    // do Unicode case folding.
1213                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
1214                }
1215            }
1216        }
1217    }
1218}
1219
1220/// A translator's representation of a regular expression's flags at any given
1221/// moment in time.
1222///
1223/// Each flag can be in one of three states: absent, present but disabled or
1224/// present but enabled.
1225#[derive(Clone, Copy, Debug, Default)]
1226struct Flags {
1227    case_insensitive: Option<bool>,
1228    multi_line: Option<bool>,
1229    dot_matches_new_line: Option<bool>,
1230    swap_greed: Option<bool>,
1231    unicode: Option<bool>,
1232    crlf: Option<bool>,
1233    // Note that `ignore_whitespace` is omitted here because it is handled
1234    // entirely in the parser.
1235}
1236
1237impl Flags {
1238    fn from_ast(ast: &ast::Flags) -> Flags {
1239        let mut flags = Flags::default();
1240        let mut enable = true;
1241        for item in &ast.items {
1242            match item.kind {
1243                ast::FlagsItemKind::Negation => {
1244                    enable = false;
1245                }
1246                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1247                    flags.case_insensitive = Some(enable);
1248                }
1249                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1250                    flags.multi_line = Some(enable);
1251                }
1252                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1253                    flags.dot_matches_new_line = Some(enable);
1254                }
1255                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1256                    flags.swap_greed = Some(enable);
1257                }
1258                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1259                    flags.unicode = Some(enable);
1260                }
1261                ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
1262                    flags.crlf = Some(enable);
1263                }
1264                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1265            }
1266        }
1267        flags
1268    }
1269
1270    fn merge(&mut self, previous: &Flags) {
1271        if self.case_insensitive.is_none() {
1272            self.case_insensitive = previous.case_insensitive;
1273        }
1274        if self.multi_line.is_none() {
1275            self.multi_line = previous.multi_line;
1276        }
1277        if self.dot_matches_new_line.is_none() {
1278            self.dot_matches_new_line = previous.dot_matches_new_line;
1279        }
1280        if self.swap_greed.is_none() {
1281            self.swap_greed = previous.swap_greed;
1282        }
1283        if self.unicode.is_none() {
1284            self.unicode = previous.unicode;
1285        }
1286        if self.crlf.is_none() {
1287            self.crlf = previous.crlf;
1288        }
1289    }
1290
1291    fn case_insensitive(&self) -> bool {
1292        self.case_insensitive.unwrap_or(false)
1293    }
1294
1295    fn multi_line(&self) -> bool {
1296        self.multi_line.unwrap_or(false)
1297    }
1298
1299    fn dot_matches_new_line(&self) -> bool {
1300        self.dot_matches_new_line.unwrap_or(false)
1301    }
1302
1303    fn swap_greed(&self) -> bool {
1304        self.swap_greed.unwrap_or(false)
1305    }
1306
1307    fn unicode(&self) -> bool {
1308        self.unicode.unwrap_or(true)
1309    }
1310
1311    fn crlf(&self) -> bool {
1312        self.crlf.unwrap_or(false)
1313    }
1314}
1315
1316fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1317    let ranges: Vec<_> = ascii_class(kind)
1318        .map(|(s, e)| hir::ClassBytesRange::new(s, e))
1319        .collect();
1320    hir::ClassBytes::new(ranges)
1321}
1322
1323fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
1324    use crate::ast::ClassAsciiKind::*;
1325
1326    let slice: &'static [(u8, u8)] = match *kind {
1327        Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
1328        Alpha => &[(b'A', b'Z'), (b'a', b'z')],
1329        Ascii => &[(b'\x00', b'\x7F')],
1330        Blank => &[(b'\t', b'\t'), (b' ', b' ')],
1331        Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
1332        Digit => &[(b'0', b'9')],
1333        Graph => &[(b'!', b'~')],
1334        Lower => &[(b'a', b'z')],
1335        Print => &[(b' ', b'~')],
1336        Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
1337        Space => &[
1338            (b'\t', b'\t'),
1339            (b'\n', b'\n'),
1340            (b'\x0B', b'\x0B'),
1341            (b'\x0C', b'\x0C'),
1342            (b'\r', b'\r'),
1343            (b' ', b' '),
1344        ],
1345        Upper => &[(b'A', b'Z')],
1346        Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
1347        Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
1348    };
1349    slice.iter().copied()
1350}
1351
1352fn ascii_class_as_chars(
1353    kind: &ast::ClassAsciiKind,
1354) -> impl Iterator<Item = (char, char)> {
1355    ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
1356}
1357
1358#[cfg(test)]
1359mod tests {
1360    use crate::{
1361        ast::{parse::ParserBuilder, Position},
1362        hir::{Look, Properties},
1363    };
1364
1365    use super::*;
1366
1367    // We create these errors to compare with real hir::Errors in the tests.
1368    // We define equality between TestError and hir::Error to disregard the
1369    // pattern string in hir::Error, which is annoying to provide in tests.
1370    #[derive(Clone, Debug)]
1371    struct TestError {
1372        span: Span,
1373        kind: hir::ErrorKind,
1374    }
1375
1376    impl PartialEq<hir::Error> for TestError {
1377        fn eq(&self, other: &hir::Error) -> bool {
1378            self.span == other.span && self.kind == other.kind
1379        }
1380    }
1381
1382    impl PartialEq<TestError> for hir::Error {
1383        fn eq(&self, other: &TestError) -> bool {
1384            self.span == other.span && self.kind == other.kind
1385        }
1386    }
1387
1388    fn parse(pattern: &str) -> Ast {
1389        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1390    }
1391
1392    fn t(pattern: &str) -> Hir {
1393        TranslatorBuilder::new()
1394            .utf8(true)
1395            .build()
1396            .translate(pattern, &parse(pattern))
1397            .unwrap()
1398    }
1399
1400    fn t_err(pattern: &str) -> hir::Error {
1401        TranslatorBuilder::new()
1402            .utf8(true)
1403            .build()
1404            .translate(pattern, &parse(pattern))
1405            .unwrap_err()
1406    }
1407
1408    fn t_bytes(pattern: &str) -> Hir {
1409        TranslatorBuilder::new()
1410            .utf8(false)
1411            .build()
1412            .translate(pattern, &parse(pattern))
1413            .unwrap()
1414    }
1415
1416    fn props(pattern: &str) -> Properties {
1417        t(pattern).properties().clone()
1418    }
1419
1420    fn props_bytes(pattern: &str) -> Properties {
1421        t_bytes(pattern).properties().clone()
1422    }
1423
1424    fn hir_lit(s: &str) -> Hir {
1425        hir_blit(s.as_bytes())
1426    }
1427
1428    fn hir_blit(s: &[u8]) -> Hir {
1429        Hir::literal(s)
1430    }
1431
1432    fn hir_capture(index: u32, expr: Hir) -> Hir {
1433        Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
1434    }
1435
1436    fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
1437        Hir::capture(hir::Capture {
1438            index,
1439            name: Some(name.into()),
1440            sub: Box::new(expr),
1441        })
1442    }
1443
1444    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1445        Hir::repetition(hir::Repetition {
1446            min: 0,
1447            max: Some(1),
1448            greedy,
1449            sub: Box::new(expr),
1450        })
1451    }
1452
1453    fn hir_star(greedy: bool, expr: Hir) -> Hir {
1454        Hir::repetition(hir::Repetition {
1455            min: 0,
1456            max: None,
1457            greedy,
1458            sub: Box::new(expr),
1459        })
1460    }
1461
1462    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1463        Hir::repetition(hir::Repetition {
1464            min: 1,
1465            max: None,
1466            greedy,
1467            sub: Box::new(expr),
1468        })
1469    }
1470
1471    fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
1472        Hir::repetition(hir::Repetition {
1473            min,
1474            max,
1475            greedy,
1476            sub: Box::new(expr),
1477        })
1478    }
1479
1480    fn hir_alt(alts: Vec<Hir>) -> Hir {
1481        Hir::alternation(alts)
1482    }
1483
1484    fn hir_cat(exprs: Vec<Hir>) -> Hir {
1485        Hir::concat(exprs)
1486    }
1487
1488    #[allow(dead_code)]
1489    fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1490        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1491    }
1492
1493    #[allow(dead_code)]
1494    fn hir_uclass_perl_word() -> Hir {
1495        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1496    }
1497
1498    fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
1499        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
1500            ascii_class_as_chars(kind)
1501                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
1502        )))
1503    }
1504
1505    fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
1506        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
1507            ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
1508        )))
1509    }
1510
1511    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1512        Hir::class(uclass(ranges))
1513    }
1514
1515    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1516        Hir::class(bclass(ranges))
1517    }
1518
1519    fn hir_case_fold(expr: Hir) -> Hir {
1520        match expr.into_kind() {
1521            HirKind::Class(mut cls) => {
1522                cls.case_fold_simple();
1523                Hir::class(cls)
1524            }
1525            _ => panic!("cannot case fold non-class Hir expr"),
1526        }
1527    }
1528
1529    fn hir_negate(expr: Hir) -> Hir {
1530        match expr.into_kind() {
1531            HirKind::Class(mut cls) => {
1532                cls.negate();
1533                Hir::class(cls)
1534            }
1535            _ => panic!("cannot negate non-class Hir expr"),
1536        }
1537    }
1538
1539    fn uclass(ranges: &[(char, char)]) -> hir::Class {
1540        let ranges: Vec<hir::ClassUnicodeRange> = ranges
1541            .iter()
1542            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1543            .collect();
1544        hir::Class::Unicode(hir::ClassUnicode::new(ranges))
1545    }
1546
1547    fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
1548        let ranges: Vec<hir::ClassBytesRange> = ranges
1549            .iter()
1550            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1551            .collect();
1552        hir::Class::Bytes(hir::ClassBytes::new(ranges))
1553    }
1554
1555    #[cfg(feature = "unicode-case")]
1556    fn class_case_fold(mut cls: hir::Class) -> Hir {
1557        cls.case_fold_simple();
1558        Hir::class(cls)
1559    }
1560
1561    fn class_negate(mut cls: hir::Class) -> Hir {
1562        cls.negate();
1563        Hir::class(cls)
1564    }
1565
1566    #[allow(dead_code)]
1567    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1568        use crate::hir::Class::{Bytes, Unicode};
1569
1570        match (expr1.into_kind(), expr2.into_kind()) {
1571            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1572                c1.union(&c2);
1573                Hir::class(hir::Class::Unicode(c1))
1574            }
1575            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1576                c1.union(&c2);
1577                Hir::class(hir::Class::Bytes(c1))
1578            }
1579            _ => panic!("cannot union non-class Hir exprs"),
1580        }
1581    }
1582
1583    #[allow(dead_code)]
1584    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1585        use crate::hir::Class::{Bytes, Unicode};
1586
1587        match (expr1.into_kind(), expr2.into_kind()) {
1588            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1589                c1.difference(&c2);
1590                Hir::class(hir::Class::Unicode(c1))
1591            }
1592            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1593                c1.difference(&c2);
1594                Hir::class(hir::Class::Bytes(c1))
1595            }
1596            _ => panic!("cannot difference non-class Hir exprs"),
1597        }
1598    }
1599
1600    fn hir_look(look: hir::Look) -> Hir {
1601        Hir::look(look)
1602    }
1603
1604    #[test]
1605    fn empty() {
1606        assert_eq!(t(""), Hir::empty());
1607        assert_eq!(t("(?i)"), Hir::empty());
1608        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
1609        assert_eq!(t("(?:)"), Hir::empty());
1610        assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
1611        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1612        assert_eq!(
1613            t("()|()"),
1614            hir_alt(vec![
1615                hir_capture(1, Hir::empty()),
1616                hir_capture(2, Hir::empty()),
1617            ])
1618        );
1619        assert_eq!(
1620            t("(|b)"),
1621            hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1622        );
1623        assert_eq!(
1624            t("(a|)"),
1625            hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1626        );
1627        assert_eq!(
1628            t("(a||c)"),
1629            hir_capture(
1630                1,
1631                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1632            )
1633        );
1634        assert_eq!(
1635            t("(||)"),
1636            hir_capture(
1637                1,
1638                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1639            )
1640        );
1641    }
1642
1643    #[test]
1644    fn literal() {
1645        assert_eq!(t("a"), hir_lit("a"));
1646        assert_eq!(t("(?-u)a"), hir_lit("a"));
1647        assert_eq!(t("☃"), hir_lit("☃"));
1648        assert_eq!(t("abcd"), hir_lit("abcd"));
1649
1650        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1651        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1652        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1653        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1654
1655        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
1656        assert_eq!(
1657            t_err(r"(?-u)\xFF"),
1658            TestError {
1659                kind: hir::ErrorKind::InvalidUtf8,
1660                span: Span::new(
1661                    Position::new(5, 1, 6),
1662                    Position::new(9, 1, 10)
1663                ),
1664            }
1665        );
1666    }
1667
1668    #[test]
1669    fn literal_case_insensitive() {
1670        #[cfg(feature = "unicode-case")]
1671        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1672        #[cfg(feature = "unicode-case")]
1673        assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
1674        #[cfg(feature = "unicode-case")]
1675        assert_eq!(
1676            t("a(?i)a(?-i)a"),
1677            hir_cat(vec![
1678                hir_lit("a"),
1679                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1680                hir_lit("a"),
1681            ])
1682        );
1683        #[cfg(feature = "unicode-case")]
1684        assert_eq!(
1685            t("(?i)ab@c"),
1686            hir_cat(vec![
1687                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688                hir_uclass(&[('B', 'B'), ('b', 'b')]),
1689                hir_lit("@"),
1690                hir_uclass(&[('C', 'C'), ('c', 'c')]),
1691            ])
1692        );
1693        #[cfg(feature = "unicode-case")]
1694        assert_eq!(
1695            t("(?i)β"),
1696            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1697        );
1698
1699        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1700        #[cfg(feature = "unicode-case")]
1701        assert_eq!(
1702            t("(?-u)a(?i)a(?-i)a"),
1703            hir_cat(vec![
1704                hir_lit("a"),
1705                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1706                hir_lit("a"),
1707            ])
1708        );
1709        assert_eq!(
1710            t("(?i-u)ab@c"),
1711            hir_cat(vec![
1712                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1713                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1714                hir_lit("@"),
1715                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1716            ])
1717        );
1718
1719        assert_eq!(
1720            t_bytes("(?i-u)a"),
1721            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1722        );
1723        assert_eq!(
1724            t_bytes("(?i-u)\x61"),
1725            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1726        );
1727        assert_eq!(
1728            t_bytes(r"(?i-u)\x61"),
1729            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1730        );
1731        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1732
1733        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
1734    }
1735
1736    #[test]
1737    fn dot() {
1738        assert_eq!(
1739            t("."),
1740            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
1741        );
1742        assert_eq!(
1743            t("(?R)."),
1744            hir_uclass(&[
1745                ('\0', '\t'),
1746                ('\x0B', '\x0C'),
1747                ('\x0E', '\u{10FFFF}'),
1748            ])
1749        );
1750        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
1751        assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
1752        assert_eq!(
1753            t_bytes("(?-u)."),
1754            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
1755        );
1756        assert_eq!(
1757            t_bytes("(?R-u)."),
1758            hir_bclass(&[
1759                (b'\0', b'\t'),
1760                (b'\x0B', b'\x0C'),
1761                (b'\x0E', b'\xFF'),
1762            ])
1763        );
1764        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1765        assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1766
1767        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1768        assert_eq!(
1769            t_err("(?-u)."),
1770            TestError {
1771                kind: hir::ErrorKind::InvalidUtf8,
1772                span: Span::new(
1773                    Position::new(5, 1, 6),
1774                    Position::new(6, 1, 7)
1775                ),
1776            }
1777        );
1778        assert_eq!(
1779            t_err("(?R-u)."),
1780            TestError {
1781                kind: hir::ErrorKind::InvalidUtf8,
1782                span: Span::new(
1783                    Position::new(6, 1, 7),
1784                    Position::new(7, 1, 8)
1785                ),
1786            }
1787        );
1788        assert_eq!(
1789            t_err("(?s-u)."),
1790            TestError {
1791                kind: hir::ErrorKind::InvalidUtf8,
1792                span: Span::new(
1793                    Position::new(6, 1, 7),
1794                    Position::new(7, 1, 8)
1795                ),
1796            }
1797        );
1798        assert_eq!(
1799            t_err("(?Rs-u)."),
1800            TestError {
1801                kind: hir::ErrorKind::InvalidUtf8,
1802                span: Span::new(
1803                    Position::new(7, 1, 8),
1804                    Position::new(8, 1, 9)
1805                ),
1806            }
1807        );
1808    }
1809
1810    #[test]
1811    fn assertions() {
1812        assert_eq!(t("^"), hir_look(hir::Look::Start));
1813        assert_eq!(t("$"), hir_look(hir::Look::End));
1814        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1815        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1816        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1817        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1818        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1819        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1820
1821        assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
1822        assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
1823        assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
1824        assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
1825    }
1826
1827    #[test]
1828    fn group() {
1829        assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
1830        assert_eq!(
1831            t("(a)(b)"),
1832            hir_cat(vec![
1833                hir_capture(1, hir_lit("a")),
1834                hir_capture(2, hir_lit("b")),
1835            ])
1836        );
1837        assert_eq!(
1838            t("(a)|(b)"),
1839            hir_alt(vec![
1840                hir_capture(1, hir_lit("a")),
1841                hir_capture(2, hir_lit("b")),
1842            ])
1843        );
1844        assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
1845        assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
1846        assert_eq!(
1847            t("(?P<foo>a)(?P<bar>b)"),
1848            hir_cat(vec![
1849                hir_capture_name(1, "foo", hir_lit("a")),
1850                hir_capture_name(2, "bar", hir_lit("b")),
1851            ])
1852        );
1853        assert_eq!(t("(?:)"), Hir::empty());
1854        assert_eq!(t("(?:a)"), hir_lit("a"));
1855        assert_eq!(
1856            t("(?:a)(b)"),
1857            hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
1858        );
1859        assert_eq!(
1860            t("(a)(?:b)(c)"),
1861            hir_cat(vec![
1862                hir_capture(1, hir_lit("a")),
1863                hir_lit("b"),
1864                hir_capture(2, hir_lit("c")),
1865            ])
1866        );
1867        assert_eq!(
1868            t("(a)(?P<foo>b)(c)"),
1869            hir_cat(vec![
1870                hir_capture(1, hir_lit("a")),
1871                hir_capture_name(2, "foo", hir_lit("b")),
1872                hir_capture(3, hir_lit("c")),
1873            ])
1874        );
1875        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
1876        assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
1877        assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
1878        assert_eq!(
1879            t("(((?x)))"),
1880            hir_capture(1, hir_capture(2, Hir::empty()))
1881        );
1882    }
1883
1884    #[test]
1885    fn line_anchors() {
1886        assert_eq!(t("^"), hir_look(hir::Look::Start));
1887        assert_eq!(t("$"), hir_look(hir::Look::End));
1888        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1889        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1890
1891        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1892        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1893        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1894        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1895
1896        assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
1897        assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
1898        assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
1899        assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
1900
1901        assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
1902        assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
1903        assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
1904        assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
1905    }
1906
1907    #[test]
1908    fn flags() {
1909        #[cfg(feature = "unicode-case")]
1910        assert_eq!(
1911            t("(?i:a)a"),
1912            hir_cat(
1913                vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
1914            )
1915        );
1916        assert_eq!(
1917            t("(?i-u:a)β"),
1918            hir_cat(vec![
1919                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1920                hir_lit("β"),
1921            ])
1922        );
1923        assert_eq!(
1924            t("(?:(?i-u)a)b"),
1925            hir_cat(vec![
1926                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1927                hir_lit("b"),
1928            ])
1929        );
1930        assert_eq!(
1931            t("((?i-u)a)b"),
1932            hir_cat(vec![
1933                hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1934                hir_lit("b"),
1935            ])
1936        );
1937        #[cfg(feature = "unicode-case")]
1938        assert_eq!(
1939            t("(?i)(?-i:a)a"),
1940            hir_cat(
1941                vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
1942            )
1943        );
1944        #[cfg(feature = "unicode-case")]
1945        assert_eq!(
1946            t("(?im)a^"),
1947            hir_cat(vec![
1948                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1949                hir_look(hir::Look::StartLF),
1950            ])
1951        );
1952        #[cfg(feature = "unicode-case")]
1953        assert_eq!(
1954            t("(?im)a^(?i-m)a^"),
1955            hir_cat(vec![
1956                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1957                hir_look(hir::Look::StartLF),
1958                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1959                hir_look(hir::Look::Start),
1960            ])
1961        );
1962        assert_eq!(
1963            t("(?U)a*a*?(?-U)a*a*?"),
1964            hir_cat(vec![
1965                hir_star(false, hir_lit("a")),
1966                hir_star(true, hir_lit("a")),
1967                hir_star(true, hir_lit("a")),
1968                hir_star(false, hir_lit("a")),
1969            ])
1970        );
1971        #[cfg(feature = "unicode-case")]
1972        assert_eq!(
1973            t("(?:a(?i)a)a"),
1974            hir_cat(vec![
1975                hir_cat(vec![
1976                    hir_lit("a"),
1977                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1978                ]),
1979                hir_lit("a"),
1980            ])
1981        );
1982        #[cfg(feature = "unicode-case")]
1983        assert_eq!(
1984            t("(?i)(?:a(?-i)a)a"),
1985            hir_cat(vec![
1986                hir_cat(vec![
1987                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1988                    hir_lit("a"),
1989                ]),
1990                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1991            ])
1992        );
1993    }
1994
1995    #[test]
1996    fn escape() {
1997        assert_eq!(
1998            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1999            hir_lit(r"\.+*?()|[]{}^$#")
2000        );
2001    }
2002
2003    #[test]
2004    fn repetition() {
2005        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
2006        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
2007        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
2008        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
2009        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
2010        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
2011
2012        assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
2013        assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
2014        assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
2015        assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
2016        assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
2017        assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
2018
2019        assert_eq!(
2020            t("ab?"),
2021            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
2022        );
2023        assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
2024        assert_eq!(
2025            t("a|b?"),
2026            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
2027        );
2028    }
2029
2030    #[test]
2031    fn cat_alt() {
2032        let a = || hir_look(hir::Look::Start);
2033        let b = || hir_look(hir::Look::End);
2034        let c = || hir_look(hir::Look::WordUnicode);
2035        let d = || hir_look(hir::Look::WordUnicodeNegate);
2036
2037        assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
2038        assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
2039        assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
2040        assert_eq!(
2041            t(r"^$|$\b|\b\B"),
2042            hir_alt(vec![
2043                hir_cat(vec![a(), b()]),
2044                hir_cat(vec![b(), c()]),
2045                hir_cat(vec![c(), d()]),
2046            ])
2047        );
2048        assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
2049        assert_eq!(
2050            t(r"(^|$|\b)"),
2051            hir_capture(1, hir_alt(vec![a(), b(), c()]))
2052        );
2053        assert_eq!(
2054            t(r"(^$|$\b|\b\B)"),
2055            hir_capture(
2056                1,
2057                hir_alt(vec![
2058                    hir_cat(vec![a(), b()]),
2059                    hir_cat(vec![b(), c()]),
2060                    hir_cat(vec![c(), d()]),
2061                ])
2062            )
2063        );
2064        assert_eq!(
2065            t(r"(^$|($\b|(\b\B)))"),
2066            hir_capture(
2067                1,
2068                hir_alt(vec![
2069                    hir_cat(vec![a(), b()]),
2070                    hir_capture(
2071                        2,
2072                        hir_alt(vec![
2073                            hir_cat(vec![b(), c()]),
2074                            hir_capture(3, hir_cat(vec![c(), d()])),
2075                        ])
2076                    ),
2077                ])
2078            )
2079        );
2080    }
2081
2082    // Tests the HIR transformation of things like '[a-z]|[A-Z]' into
2083    // '[A-Za-z]'. In other words, an alternation of just classes is always
2084    // equivalent to a single class corresponding to the union of the branches
2085    // in that class. (Unless some branches match invalid UTF-8 and others
2086    // match non-ASCII Unicode.)
2087    #[test]
2088    fn cat_class_flattened() {
2089        assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2090        // Combining all of the letter properties should give us the one giant
2091        // letter property.
2092        #[cfg(feature = "unicode-gencat")]
2093        assert_eq!(
2094            t(r"(?x)
2095                \p{Lowercase_Letter}
2096                |\p{Uppercase_Letter}
2097                |\p{Titlecase_Letter}
2098                |\p{Modifier_Letter}
2099                |\p{Other_Letter}
2100            "),
2101            hir_uclass_query(ClassQuery::Binary("letter"))
2102        );
2103        // Byte classes that can truly match invalid UTF-8 cannot be combined
2104        // with Unicode classes.
2105        assert_eq!(
2106            t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
2107            hir_alt(vec![
2108                hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
2109                hir_bclass(&[(b'\x90', b'\xFF')]),
2110                hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
2111            ])
2112        );
2113        // Byte classes on their own can be combined, even if some are ASCII
2114        // and others are invalid UTF-8.
2115        assert_eq!(
2116            t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
2117            hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
2118        );
2119    }
2120
2121    #[test]
2122    fn class_ascii() {
2123        assert_eq!(
2124            t("[[:alnum:]]"),
2125            hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
2126        );
2127        assert_eq!(
2128            t("[[:alpha:]]"),
2129            hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
2130        );
2131        assert_eq!(
2132            t("[[:ascii:]]"),
2133            hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
2134        );
2135        assert_eq!(
2136            t("[[:blank:]]"),
2137            hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
2138        );
2139        assert_eq!(
2140            t("[[:cntrl:]]"),
2141            hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
2142        );
2143        assert_eq!(
2144            t("[[:digit:]]"),
2145            hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
2146        );
2147        assert_eq!(
2148            t("[[:graph:]]"),
2149            hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
2150        );
2151        assert_eq!(
2152            t("[[:lower:]]"),
2153            hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
2154        );
2155        assert_eq!(
2156            t("[[:print:]]"),
2157            hir_ascii_uclass(&ast::ClassAsciiKind::Print)
2158        );
2159        assert_eq!(
2160            t("[[:punct:]]"),
2161            hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
2162        );
2163        assert_eq!(
2164            t("[[:space:]]"),
2165            hir_ascii_uclass(&ast::ClassAsciiKind::Space)
2166        );
2167        assert_eq!(
2168            t("[[:upper:]]"),
2169            hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
2170        );
2171        assert_eq!(
2172            t("[[:word:]]"),
2173            hir_ascii_uclass(&ast::ClassAsciiKind::Word)
2174        );
2175        assert_eq!(
2176            t("[[:xdigit:]]"),
2177            hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
2178        );
2179
2180        assert_eq!(
2181            t("[[:^lower:]]"),
2182            hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
2183        );
2184        #[cfg(feature = "unicode-case")]
2185        assert_eq!(
2186            t("(?i)[[:lower:]]"),
2187            hir_uclass(&[
2188                ('A', 'Z'),
2189                ('a', 'z'),
2190                ('\u{17F}', '\u{17F}'),
2191                ('\u{212A}', '\u{212A}'),
2192            ])
2193        );
2194
2195        assert_eq!(
2196            t("(?-u)[[:lower:]]"),
2197            hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
2198        );
2199        assert_eq!(
2200            t("(?i-u)[[:lower:]]"),
2201            hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
2202        );
2203
2204        assert_eq!(
2205            t_err("(?-u)[[:^lower:]]"),
2206            TestError {
2207                kind: hir::ErrorKind::InvalidUtf8,
2208                span: Span::new(
2209                    Position::new(6, 1, 7),
2210                    Position::new(16, 1, 17)
2211                ),
2212            }
2213        );
2214        assert_eq!(
2215            t_err("(?i-u)[[:^lower:]]"),
2216            TestError {
2217                kind: hir::ErrorKind::InvalidUtf8,
2218                span: Span::new(
2219                    Position::new(7, 1, 8),
2220                    Position::new(17, 1, 18)
2221                ),
2222            }
2223        );
2224    }
2225
2226    #[test]
2227    fn class_ascii_multiple() {
2228        // See: https://github.com/rust-lang/regex/issues/680
2229        assert_eq!(
2230            t("[[:alnum:][:^ascii:]]"),
2231            hir_union(
2232                hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
2233                hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
2234            ),
2235        );
2236        assert_eq!(
2237            t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
2238            hir_union(
2239                hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
2240                hir_bclass(&[(0x80, 0xFF)]),
2241            ),
2242        );
2243    }
2244
2245    #[test]
2246    #[cfg(feature = "unicode-perl")]
2247    fn class_perl_unicode() {
2248        // Unicode
2249        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
2250        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
2251        assert_eq!(t(r"\w"), hir_uclass_perl_word());
2252        #[cfg(feature = "unicode-case")]
2253        assert_eq!(
2254            t(r"(?i)\d"),
2255            hir_uclass_query(ClassQuery::Binary("digit"))
2256        );
2257        #[cfg(feature = "unicode-case")]
2258        assert_eq!(
2259            t(r"(?i)\s"),
2260            hir_uclass_query(ClassQuery::Binary("space"))
2261        );
2262        #[cfg(feature = "unicode-case")]
2263        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2264
2265        // Unicode, negated
2266        assert_eq!(
2267            t(r"\D"),
2268            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2269        );
2270        assert_eq!(
2271            t(r"\S"),
2272            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2273        );
2274        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2275        #[cfg(feature = "unicode-case")]
2276        assert_eq!(
2277            t(r"(?i)\D"),
2278            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2279        );
2280        #[cfg(feature = "unicode-case")]
2281        assert_eq!(
2282            t(r"(?i)\S"),
2283            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2284        );
2285        #[cfg(feature = "unicode-case")]
2286        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2287    }
2288
2289    #[test]
2290    fn class_perl_ascii() {
2291        // ASCII only
2292        assert_eq!(
2293            t(r"(?-u)\d"),
2294            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2295        );
2296        assert_eq!(
2297            t(r"(?-u)\s"),
2298            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2299        );
2300        assert_eq!(
2301            t(r"(?-u)\w"),
2302            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2303        );
2304        assert_eq!(
2305            t(r"(?i-u)\d"),
2306            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2307        );
2308        assert_eq!(
2309            t(r"(?i-u)\s"),
2310            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2311        );
2312        assert_eq!(
2313            t(r"(?i-u)\w"),
2314            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2315        );
2316
2317        // ASCII only, negated
2318        assert_eq!(
2319            t_bytes(r"(?-u)\D"),
2320            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2321        );
2322        assert_eq!(
2323            t_bytes(r"(?-u)\S"),
2324            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2325        );
2326        assert_eq!(
2327            t_bytes(r"(?-u)\W"),
2328            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2329        );
2330        assert_eq!(
2331            t_bytes(r"(?i-u)\D"),
2332            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2333        );
2334        assert_eq!(
2335            t_bytes(r"(?i-u)\S"),
2336            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2337        );
2338        assert_eq!(
2339            t_bytes(r"(?i-u)\W"),
2340            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2341        );
2342
2343        // ASCII only, negated, with UTF-8 mode enabled.
2344        // In this case, negating any Perl class results in an error because
2345        // all such classes can match invalid UTF-8.
2346        assert_eq!(
2347            t_err(r"(?-u)\D"),
2348            TestError {
2349                kind: hir::ErrorKind::InvalidUtf8,
2350                span: Span::new(
2351                    Position::new(5, 1, 6),
2352                    Position::new(7, 1, 8),
2353                ),
2354            },
2355        );
2356        assert_eq!(
2357            t_err(r"(?-u)\S"),
2358            TestError {
2359                kind: hir::ErrorKind::InvalidUtf8,
2360                span: Span::new(
2361                    Position::new(5, 1, 6),
2362                    Position::new(7, 1, 8),
2363                ),
2364            },
2365        );
2366        assert_eq!(
2367            t_err(r"(?-u)\W"),
2368            TestError {
2369                kind: hir::ErrorKind::InvalidUtf8,
2370                span: Span::new(
2371                    Position::new(5, 1, 6),
2372                    Position::new(7, 1, 8),
2373                ),
2374            },
2375        );
2376        assert_eq!(
2377            t_err(r"(?i-u)\D"),
2378            TestError {
2379                kind: hir::ErrorKind::InvalidUtf8,
2380                span: Span::new(
2381                    Position::new(6, 1, 7),
2382                    Position::new(8, 1, 9),
2383                ),
2384            },
2385        );
2386        assert_eq!(
2387            t_err(r"(?i-u)\S"),
2388            TestError {
2389                kind: hir::ErrorKind::InvalidUtf8,
2390                span: Span::new(
2391                    Position::new(6, 1, 7),
2392                    Position::new(8, 1, 9),
2393                ),
2394            },
2395        );
2396        assert_eq!(
2397            t_err(r"(?i-u)\W"),
2398            TestError {
2399                kind: hir::ErrorKind::InvalidUtf8,
2400                span: Span::new(
2401                    Position::new(6, 1, 7),
2402                    Position::new(8, 1, 9),
2403                ),
2404            },
2405        );
2406    }
2407
2408    #[test]
2409    #[cfg(not(feature = "unicode-perl"))]
2410    fn class_perl_word_disabled() {
2411        assert_eq!(
2412            t_err(r"\w"),
2413            TestError {
2414                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2415                span: Span::new(
2416                    Position::new(0, 1, 1),
2417                    Position::new(2, 1, 3)
2418                ),
2419            }
2420        );
2421    }
2422
2423    #[test]
2424    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2425    fn class_perl_space_disabled() {
2426        assert_eq!(
2427            t_err(r"\s"),
2428            TestError {
2429                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2430                span: Span::new(
2431                    Position::new(0, 1, 1),
2432                    Position::new(2, 1, 3)
2433                ),
2434            }
2435        );
2436    }
2437
2438    #[test]
2439    #[cfg(all(
2440        not(feature = "unicode-perl"),
2441        not(feature = "unicode-gencat")
2442    ))]
2443    fn class_perl_digit_disabled() {
2444        assert_eq!(
2445            t_err(r"\d"),
2446            TestError {
2447                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2448                span: Span::new(
2449                    Position::new(0, 1, 1),
2450                    Position::new(2, 1, 3)
2451                ),
2452            }
2453        );
2454    }
2455
2456    #[test]
2457    #[cfg(feature = "unicode-gencat")]
2458    fn class_unicode_gencat() {
2459        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2460        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2461        assert_eq!(
2462            t(r"\p{Separator}"),
2463            hir_uclass_query(ClassQuery::Binary("Z"))
2464        );
2465        assert_eq!(
2466            t(r"\p{se      PaRa ToR}"),
2467            hir_uclass_query(ClassQuery::Binary("Z"))
2468        );
2469        assert_eq!(
2470            t(r"\p{gc:Separator}"),
2471            hir_uclass_query(ClassQuery::Binary("Z"))
2472        );
2473        assert_eq!(
2474            t(r"\p{gc=Separator}"),
2475            hir_uclass_query(ClassQuery::Binary("Z"))
2476        );
2477        assert_eq!(
2478            t(r"\p{Other}"),
2479            hir_uclass_query(ClassQuery::Binary("Other"))
2480        );
2481        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2482
2483        assert_eq!(
2484            t(r"\PZ"),
2485            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2486        );
2487        assert_eq!(
2488            t(r"\P{separator}"),
2489            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2490        );
2491        assert_eq!(
2492            t(r"\P{gc!=separator}"),
2493            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2494        );
2495
2496        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2497        assert_eq!(
2498            t(r"\p{assigned}"),
2499            hir_uclass_query(ClassQuery::Binary("Assigned"))
2500        );
2501        assert_eq!(
2502            t(r"\p{ascii}"),
2503            hir_uclass_query(ClassQuery::Binary("ASCII"))
2504        );
2505        assert_eq!(
2506            t(r"\p{gc:any}"),
2507            hir_uclass_query(ClassQuery::Binary("Any"))
2508        );
2509        assert_eq!(
2510            t(r"\p{gc:assigned}"),
2511            hir_uclass_query(ClassQuery::Binary("Assigned"))
2512        );
2513        assert_eq!(
2514            t(r"\p{gc:ascii}"),
2515            hir_uclass_query(ClassQuery::Binary("ASCII"))
2516        );
2517
2518        assert_eq!(
2519            t_err(r"(?-u)\pZ"),
2520            TestError {
2521                kind: hir::ErrorKind::UnicodeNotAllowed,
2522                span: Span::new(
2523                    Position::new(5, 1, 6),
2524                    Position::new(8, 1, 9)
2525                ),
2526            }
2527        );
2528        assert_eq!(
2529            t_err(r"(?-u)\p{Separator}"),
2530            TestError {
2531                kind: hir::ErrorKind::UnicodeNotAllowed,
2532                span: Span::new(
2533                    Position::new(5, 1, 6),
2534                    Position::new(18, 1, 19)
2535                ),
2536            }
2537        );
2538        assert_eq!(
2539            t_err(r"\pE"),
2540            TestError {
2541                kind: hir::ErrorKind::UnicodePropertyNotFound,
2542                span: Span::new(
2543                    Position::new(0, 1, 1),
2544                    Position::new(3, 1, 4)
2545                ),
2546            }
2547        );
2548        assert_eq!(
2549            t_err(r"\p{Foo}"),
2550            TestError {
2551                kind: hir::ErrorKind::UnicodePropertyNotFound,
2552                span: Span::new(
2553                    Position::new(0, 1, 1),
2554                    Position::new(7, 1, 8)
2555                ),
2556            }
2557        );
2558        assert_eq!(
2559            t_err(r"\p{gc:Foo}"),
2560            TestError {
2561                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2562                span: Span::new(
2563                    Position::new(0, 1, 1),
2564                    Position::new(10, 1, 11)
2565                ),
2566            }
2567        );
2568    }
2569
2570    #[test]
2571    #[cfg(not(feature = "unicode-gencat"))]
2572    fn class_unicode_gencat_disabled() {
2573        assert_eq!(
2574            t_err(r"\p{Separator}"),
2575            TestError {
2576                kind: hir::ErrorKind::UnicodePropertyNotFound,
2577                span: Span::new(
2578                    Position::new(0, 1, 1),
2579                    Position::new(13, 1, 14)
2580                ),
2581            }
2582        );
2583
2584        assert_eq!(
2585            t_err(r"\p{Any}"),
2586            TestError {
2587                kind: hir::ErrorKind::UnicodePropertyNotFound,
2588                span: Span::new(
2589                    Position::new(0, 1, 1),
2590                    Position::new(7, 1, 8)
2591                ),
2592            }
2593        );
2594    }
2595
2596    #[test]
2597    #[cfg(feature = "unicode-script")]
2598    fn class_unicode_script() {
2599        assert_eq!(
2600            t(r"\p{Greek}"),
2601            hir_uclass_query(ClassQuery::Binary("Greek"))
2602        );
2603        #[cfg(feature = "unicode-case")]
2604        assert_eq!(
2605            t(r"(?i)\p{Greek}"),
2606            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2607        );
2608        #[cfg(feature = "unicode-case")]
2609        assert_eq!(
2610            t(r"(?i)\P{Greek}"),
2611            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2612                "Greek"
2613            ))))
2614        );
2615
2616        assert_eq!(
2617            t_err(r"\p{sc:Foo}"),
2618            TestError {
2619                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2620                span: Span::new(
2621                    Position::new(0, 1, 1),
2622                    Position::new(10, 1, 11)
2623                ),
2624            }
2625        );
2626        assert_eq!(
2627            t_err(r"\p{scx:Foo}"),
2628            TestError {
2629                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2630                span: Span::new(
2631                    Position::new(0, 1, 1),
2632                    Position::new(11, 1, 12)
2633                ),
2634            }
2635        );
2636    }
2637
2638    #[test]
2639    #[cfg(not(feature = "unicode-script"))]
2640    fn class_unicode_script_disabled() {
2641        assert_eq!(
2642            t_err(r"\p{Greek}"),
2643            TestError {
2644                kind: hir::ErrorKind::UnicodePropertyNotFound,
2645                span: Span::new(
2646                    Position::new(0, 1, 1),
2647                    Position::new(9, 1, 10)
2648                ),
2649            }
2650        );
2651
2652        assert_eq!(
2653            t_err(r"\p{scx:Greek}"),
2654            TestError {
2655                kind: hir::ErrorKind::UnicodePropertyNotFound,
2656                span: Span::new(
2657                    Position::new(0, 1, 1),
2658                    Position::new(13, 1, 14)
2659                ),
2660            }
2661        );
2662    }
2663
2664    #[test]
2665    #[cfg(feature = "unicode-age")]
2666    fn class_unicode_age() {
2667        assert_eq!(
2668            t_err(r"\p{age:Foo}"),
2669            TestError {
2670                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2671                span: Span::new(
2672                    Position::new(0, 1, 1),
2673                    Position::new(11, 1, 12)
2674                ),
2675            }
2676        );
2677    }
2678
2679    #[test]
2680    #[cfg(feature = "unicode-gencat")]
2681    fn class_unicode_any_empty() {
2682        assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
2683    }
2684
2685    #[test]
2686    #[cfg(not(feature = "unicode-age"))]
2687    fn class_unicode_age_disabled() {
2688        assert_eq!(
2689            t_err(r"\p{age:3.0}"),
2690            TestError {
2691                kind: hir::ErrorKind::UnicodePropertyNotFound,
2692                span: Span::new(
2693                    Position::new(0, 1, 1),
2694                    Position::new(11, 1, 12)
2695                ),
2696            }
2697        );
2698    }
2699
2700    #[test]
2701    fn class_bracketed() {
2702        assert_eq!(t("[a]"), hir_lit("a"));
2703        assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
2704        assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
2705        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2706        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2707        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2708        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2709        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2710        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2711        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2712        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2713        #[cfg(feature = "unicode-gencat")]
2714        assert_eq!(
2715            t(r"[\pZ]"),
2716            hir_uclass_query(ClassQuery::Binary("separator"))
2717        );
2718        #[cfg(feature = "unicode-gencat")]
2719        assert_eq!(
2720            t(r"[\p{separator}]"),
2721            hir_uclass_query(ClassQuery::Binary("separator"))
2722        );
2723        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2724        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2725        #[cfg(feature = "unicode-gencat")]
2726        assert_eq!(
2727            t(r"[^\PZ]"),
2728            hir_uclass_query(ClassQuery::Binary("separator"))
2729        );
2730        #[cfg(feature = "unicode-gencat")]
2731        assert_eq!(
2732            t(r"[^\P{separator}]"),
2733            hir_uclass_query(ClassQuery::Binary("separator"))
2734        );
2735        #[cfg(all(
2736            feature = "unicode-case",
2737            any(feature = "unicode-perl", feature = "unicode-gencat")
2738        ))]
2739        assert_eq!(
2740            t(r"(?i)[^\D]"),
2741            hir_uclass_query(ClassQuery::Binary("digit"))
2742        );
2743        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2744        assert_eq!(
2745            t(r"(?i)[^\P{greek}]"),
2746            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2747        );
2748
2749        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2750        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2751        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2752
2753        #[cfg(feature = "unicode-case")]
2754        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2755        #[cfg(feature = "unicode-case")]
2756        assert_eq!(
2757            t("(?i)[k]"),
2758            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2759        );
2760        #[cfg(feature = "unicode-case")]
2761        assert_eq!(
2762            t("(?i)[β]"),
2763            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2764        );
2765        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2766
2767        assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
2768        assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
2769        assert_eq!(
2770            t_bytes("(?-u)[^a]"),
2771            class_negate(bclass(&[(b'a', b'a')]))
2772        );
2773        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2774        assert_eq!(
2775            t(r"[^\d]"),
2776            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2777        );
2778        #[cfg(feature = "unicode-gencat")]
2779        assert_eq!(
2780            t(r"[^\pZ]"),
2781            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2782        );
2783        #[cfg(feature = "unicode-gencat")]
2784        assert_eq!(
2785            t(r"[^\p{separator}]"),
2786            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2787        );
2788        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2789        assert_eq!(
2790            t(r"(?i)[^\p{greek}]"),
2791            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2792                "greek"
2793            ))))
2794        );
2795        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2796        assert_eq!(
2797            t(r"(?i)[\P{greek}]"),
2798            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2799                "greek"
2800            ))))
2801        );
2802
2803        // Test some weird cases.
2804        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2805
2806        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2807        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2808        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2809        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2810        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2811
2812        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2813        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2814        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2815        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2816        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2817
2818        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2819        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2820        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2821        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2822        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2823
2824        assert_eq!(
2825            t_err("(?-u)[^a]"),
2826            TestError {
2827                kind: hir::ErrorKind::InvalidUtf8,
2828                span: Span::new(
2829                    Position::new(5, 1, 6),
2830                    Position::new(9, 1, 10)
2831                ),
2832            }
2833        );
2834        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2835        assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
2836        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2837        assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
2838    }
2839
2840    #[test]
2841    fn class_bracketed_union() {
2842        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2843        #[cfg(feature = "unicode-gencat")]
2844        assert_eq!(
2845            t(r"[a\pZb]"),
2846            hir_union(
2847                hir_uclass(&[('a', 'b')]),
2848                hir_uclass_query(ClassQuery::Binary("separator"))
2849            )
2850        );
2851        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2852        assert_eq!(
2853            t(r"[\pZ\p{Greek}]"),
2854            hir_union(
2855                hir_uclass_query(ClassQuery::Binary("greek")),
2856                hir_uclass_query(ClassQuery::Binary("separator"))
2857            )
2858        );
2859        #[cfg(all(
2860            feature = "unicode-age",
2861            feature = "unicode-gencat",
2862            feature = "unicode-script"
2863        ))]
2864        assert_eq!(
2865            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2866            hir_union(
2867                hir_uclass_query(ClassQuery::ByValue {
2868                    property_name: "age",
2869                    property_value: "3.0",
2870                }),
2871                hir_union(
2872                    hir_uclass_query(ClassQuery::Binary("greek")),
2873                    hir_uclass_query(ClassQuery::Binary("separator"))
2874                )
2875            )
2876        );
2877        #[cfg(all(
2878            feature = "unicode-age",
2879            feature = "unicode-gencat",
2880            feature = "unicode-script"
2881        ))]
2882        assert_eq!(
2883            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2884            hir_union(
2885                hir_uclass_query(ClassQuery::ByValue {
2886                    property_name: "age",
2887                    property_value: "3.0",
2888                }),
2889                hir_union(
2890                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
2891                    hir_union(
2892                        hir_uclass_query(ClassQuery::Binary("greek")),
2893                        hir_uclass_query(ClassQuery::Binary("separator"))
2894                    )
2895                )
2896            )
2897        );
2898
2899        #[cfg(all(
2900            feature = "unicode-age",
2901            feature = "unicode-case",
2902            feature = "unicode-gencat",
2903            feature = "unicode-script"
2904        ))]
2905        assert_eq!(
2906            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2907            hir_case_fold(hir_union(
2908                hir_uclass_query(ClassQuery::ByValue {
2909                    property_name: "age",
2910                    property_value: "3.0",
2911                }),
2912                hir_union(
2913                    hir_uclass_query(ClassQuery::Binary("greek")),
2914                    hir_uclass_query(ClassQuery::Binary("separator"))
2915                )
2916            ))
2917        );
2918        #[cfg(all(
2919            feature = "unicode-age",
2920            feature = "unicode-gencat",
2921            feature = "unicode-script"
2922        ))]
2923        assert_eq!(
2924            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2925            hir_negate(hir_union(
2926                hir_uclass_query(ClassQuery::ByValue {
2927                    property_name: "age",
2928                    property_value: "3.0",
2929                }),
2930                hir_union(
2931                    hir_uclass_query(ClassQuery::Binary("greek")),
2932                    hir_uclass_query(ClassQuery::Binary("separator"))
2933                )
2934            ))
2935        );
2936        #[cfg(all(
2937            feature = "unicode-age",
2938            feature = "unicode-case",
2939            feature = "unicode-gencat",
2940            feature = "unicode-script"
2941        ))]
2942        assert_eq!(
2943            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2944            hir_negate(hir_case_fold(hir_union(
2945                hir_uclass_query(ClassQuery::ByValue {
2946                    property_name: "age",
2947                    property_value: "3.0",
2948                }),
2949                hir_union(
2950                    hir_uclass_query(ClassQuery::Binary("greek")),
2951                    hir_uclass_query(ClassQuery::Binary("separator"))
2952                )
2953            )))
2954        );
2955    }
2956
2957    #[test]
2958    fn class_bracketed_nested() {
2959        assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
2960        assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
2961        assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
2962
2963        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2964        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2965
2966        #[cfg(feature = "unicode-case")]
2967        assert_eq!(
2968            t(r"(?i)[a[^c]]"),
2969            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2970        );
2971        #[cfg(feature = "unicode-case")]
2972        assert_eq!(
2973            t(r"(?i)[a-b[^c]]"),
2974            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2975        );
2976
2977        #[cfg(feature = "unicode-case")]
2978        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2979        #[cfg(feature = "unicode-case")]
2980        assert_eq!(
2981            t(r"(?i)[^a-b[^c]]"),
2982            hir_uclass(&[('C', 'C'), ('c', 'c')])
2983        );
2984
2985        assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
2986        #[cfg(feature = "unicode-case")]
2987        assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
2988    }
2989
2990    #[test]
2991    fn class_bracketed_intersect() {
2992        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2993        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2994        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2995        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2996        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2997        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2998        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2999        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
3000        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3001
3002        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
3003        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3004        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3005        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
3006        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
3007        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
3008
3009        #[cfg(feature = "unicode-case")]
3010        assert_eq!(
3011            t("(?i)[abc&&b-c]"),
3012            hir_case_fold(hir_uclass(&[('b', 'c')]))
3013        );
3014        #[cfg(feature = "unicode-case")]
3015        assert_eq!(
3016            t("(?i)[abc&&[b-c]]"),
3017            hir_case_fold(hir_uclass(&[('b', 'c')]))
3018        );
3019        #[cfg(feature = "unicode-case")]
3020        assert_eq!(
3021            t("(?i)[[abc]&&[b-c]]"),
3022            hir_case_fold(hir_uclass(&[('b', 'c')]))
3023        );
3024        #[cfg(feature = "unicode-case")]
3025        assert_eq!(
3026            t("(?i)[a-z&&b-y&&c-x]"),
3027            hir_case_fold(hir_uclass(&[('c', 'x')]))
3028        );
3029        #[cfg(feature = "unicode-case")]
3030        assert_eq!(
3031            t("(?i)[c-da-b&&a-d]"),
3032            hir_case_fold(hir_uclass(&[('a', 'd')]))
3033        );
3034        #[cfg(feature = "unicode-case")]
3035        assert_eq!(
3036            t("(?i)[a-d&&c-da-b]"),
3037            hir_case_fold(hir_uclass(&[('a', 'd')]))
3038        );
3039
3040        assert_eq!(
3041            t("(?i-u)[abc&&b-c]"),
3042            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3043        );
3044        assert_eq!(
3045            t("(?i-u)[abc&&[b-c]]"),
3046            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3047        );
3048        assert_eq!(
3049            t("(?i-u)[[abc]&&[b-c]]"),
3050            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3051        );
3052        assert_eq!(
3053            t("(?i-u)[a-z&&b-y&&c-x]"),
3054            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
3055        );
3056        assert_eq!(
3057            t("(?i-u)[c-da-b&&a-d]"),
3058            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3059        );
3060        assert_eq!(
3061            t("(?i-u)[a-d&&c-da-b]"),
3062            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3063        );
3064
3065        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
3066        // `^` is also allowed to be unescaped after `&&`.
3067        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
3068        // `]` needs to be escaped after `&&` since it's not at start of class.
3069        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
3070        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
3071        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
3072        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
3073        // Test precedence.
3074        assert_eq!(
3075            t(r"[a-w&&[^c-g]z]"),
3076            hir_uclass(&[('a', 'b'), ('h', 'w')])
3077        );
3078    }
3079
3080    #[test]
3081    fn class_bracketed_intersect_negate() {
3082        #[cfg(feature = "unicode-perl")]
3083        assert_eq!(
3084            t(r"[^\w&&\d]"),
3085            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3086        );
3087        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3088        #[cfg(feature = "unicode-perl")]
3089        assert_eq!(
3090            t(r"[^[\w&&\d]]"),
3091            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3092        );
3093        #[cfg(feature = "unicode-perl")]
3094        assert_eq!(
3095            t(r"[^[^\w&&\d]]"),
3096            hir_uclass_query(ClassQuery::Binary("digit"))
3097        );
3098        #[cfg(feature = "unicode-perl")]
3099        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
3100
3101        #[cfg(feature = "unicode-perl")]
3102        assert_eq!(
3103            t_bytes(r"(?-u)[^\w&&\d]"),
3104            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3105        );
3106        assert_eq!(
3107            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
3108            hir_negate(hir_bclass(&[(b'a', b'c')]))
3109        );
3110        assert_eq!(
3111            t_bytes(r"(?-u)[^[\w&&\d]]"),
3112            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3113        );
3114        assert_eq!(
3115            t_bytes(r"(?-u)[^[^\w&&\d]]"),
3116            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
3117        );
3118        assert_eq!(
3119            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
3120            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
3121        );
3122    }
3123
3124    #[test]
3125    fn class_bracketed_difference() {
3126        #[cfg(feature = "unicode-gencat")]
3127        assert_eq!(
3128            t(r"[\pL--[:ascii:]]"),
3129            hir_difference(
3130                hir_uclass_query(ClassQuery::Binary("letter")),
3131                hir_uclass(&[('\0', '\x7F')])
3132            )
3133        );
3134
3135        assert_eq!(
3136            t(r"(?-u)[[:alpha:]--[:lower:]]"),
3137            hir_bclass(&[(b'A', b'Z')])
3138        );
3139    }
3140
3141    #[test]
3142    fn class_bracketed_symmetric_difference() {
3143        #[cfg(feature = "unicode-script")]
3144        assert_eq!(
3145            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3146            // Class({
3147            //     '·'..='·',
3148            //     '\u{300}'..='\u{301}',
3149            //     '\u{304}'..='\u{304}',
3150            //     '\u{306}'..='\u{306}',
3151            //     '\u{308}'..='\u{308}',
3152            //     '\u{313}'..='\u{313}',
3153            //     '\u{342}'..='\u{342}',
3154            //     '\u{345}'..='\u{345}',
3155            //     'ʹ'..='ʹ',
3156            //     '\u{1dc0}'..='\u{1dc1}',
3157            //     '⁝'..='⁝',
3158            // })
3159            hir_uclass(&[
3160                ('·', '·'),
3161                ('\u{0300}', '\u{0301}'),
3162                ('\u{0304}', '\u{0304}'),
3163                ('\u{0306}', '\u{0306}'),
3164                ('\u{0308}', '\u{0308}'),
3165                ('\u{0313}', '\u{0313}'),
3166                ('\u{0342}', '\u{0342}'),
3167                ('\u{0345}', '\u{0345}'),
3168                ('ʹ', 'ʹ'),
3169                ('\u{1DC0}', '\u{1DC1}'),
3170                ('⁝', '⁝'),
3171            ])
3172        );
3173        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
3174
3175        assert_eq!(
3176            t(r"(?-u)[a-g~~c-j]"),
3177            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
3178        );
3179    }
3180
3181    #[test]
3182    fn ignore_whitespace() {
3183        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
3184        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
3185        assert_eq!(
3186            t(r"(?x)\x # comment
3187{ # comment
3188    53 # comment
3189} #comment"),
3190            hir_lit("S")
3191        );
3192
3193        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
3194        assert_eq!(
3195            t(r"(?x)\x # comment
3196        53 # comment"),
3197            hir_lit("S")
3198        );
3199        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
3200
3201        #[cfg(feature = "unicode-gencat")]
3202        assert_eq!(
3203            t(r"(?x)\p # comment
3204{ # comment
3205    Separator # comment
3206} # comment"),
3207            hir_uclass_query(ClassQuery::Binary("separator"))
3208        );
3209
3210        assert_eq!(
3211            t(r"(?x)a # comment
3212{ # comment
3213    5 # comment
3214    , # comment
3215    10 # comment
3216} # comment"),
3217            hir_range(true, 5, Some(10), hir_lit("a"))
3218        );
3219
3220        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
3221    }
3222
3223    #[test]
3224    fn analysis_is_utf8() {
3225        // Positive examples.
3226        assert!(props_bytes(r"a").is_utf8());
3227        assert!(props_bytes(r"ab").is_utf8());
3228        assert!(props_bytes(r"(?-u)a").is_utf8());
3229        assert!(props_bytes(r"(?-u)ab").is_utf8());
3230        assert!(props_bytes(r"\xFF").is_utf8());
3231        assert!(props_bytes(r"\xFF\xFF").is_utf8());
3232        assert!(props_bytes(r"[^a]").is_utf8());
3233        assert!(props_bytes(r"[^a][^a]").is_utf8());
3234        assert!(props_bytes(r"\b").is_utf8());
3235        assert!(props_bytes(r"\B").is_utf8());
3236        assert!(props_bytes(r"(?-u)\b").is_utf8());
3237        assert!(props_bytes(r"(?-u)\B").is_utf8());
3238
3239        // Negative examples.
3240        assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
3241        assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
3242        assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
3243        assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
3244    }
3245
3246    #[test]
3247    fn analysis_captures_len() {
3248        assert_eq!(0, props(r"a").explicit_captures_len());
3249        assert_eq!(0, props(r"(?:a)").explicit_captures_len());
3250        assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
3251        assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
3252        assert_eq!(1, props(r"(a)").explicit_captures_len());
3253        assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
3254        assert_eq!(1, props(r"()").explicit_captures_len());
3255        assert_eq!(1, props(r"()a").explicit_captures_len());
3256        assert_eq!(1, props(r"(a)+").explicit_captures_len());
3257        assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
3258        assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
3259        assert_eq!(2, props(r"((a))").explicit_captures_len());
3260        assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
3261    }
3262
3263    #[test]
3264    fn analysis_static_captures_len() {
3265        let len = |pattern| props(pattern).static_explicit_captures_len();
3266        assert_eq!(Some(0), len(r""));
3267        assert_eq!(Some(0), len(r"foo|bar"));
3268        assert_eq!(None, len(r"(foo)|bar"));
3269        assert_eq!(None, len(r"foo|(bar)"));
3270        assert_eq!(Some(1), len(r"(foo|bar)"));
3271        assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
3272        assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
3273        assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
3274        assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
3275        assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
3276        assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
3277        assert_eq!(None, len(r"(a)(b)(extra)?"));
3278        assert_eq!(Some(1), len(r"(foo)|(bar)"));
3279        assert_eq!(Some(2), len(r"(foo)(bar)"));
3280        assert_eq!(Some(2), len(r"(foo)+(bar)"));
3281        assert_eq!(None, len(r"(foo)*(bar)"));
3282        assert_eq!(Some(0), len(r"(foo)?{0}"));
3283        assert_eq!(None, len(r"(foo)?{1}"));
3284        assert_eq!(Some(1), len(r"(foo){1}"));
3285        assert_eq!(Some(1), len(r"(foo){1,}"));
3286        assert_eq!(Some(1), len(r"(foo){1,}?"));
3287        assert_eq!(None, len(r"(foo){1,}??"));
3288        assert_eq!(None, len(r"(foo){0,}"));
3289        assert_eq!(Some(1), len(r"(foo)(?:bar)"));
3290        assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
3291        assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
3292        assert_eq!(
3293            Some(2),
3294            len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
3295        );
3296    }
3297
3298    #[test]
3299    fn analysis_is_all_assertions() {
3300        // Positive examples.
3301        let p = props(r"\b");
3302        assert!(!p.look_set().is_empty());
3303        assert_eq!(p.minimum_len(), Some(0));
3304
3305        let p = props(r"\B");
3306        assert!(!p.look_set().is_empty());
3307        assert_eq!(p.minimum_len(), Some(0));
3308
3309        let p = props(r"^");
3310        assert!(!p.look_set().is_empty());
3311        assert_eq!(p.minimum_len(), Some(0));
3312
3313        let p = props(r"$");
3314        assert!(!p.look_set().is_empty());
3315        assert_eq!(p.minimum_len(), Some(0));
3316
3317        let p = props(r"\A");
3318        assert!(!p.look_set().is_empty());
3319        assert_eq!(p.minimum_len(), Some(0));
3320
3321        let p = props(r"\z");
3322        assert!(!p.look_set().is_empty());
3323        assert_eq!(p.minimum_len(), Some(0));
3324
3325        let p = props(r"$^\z\A\b\B");
3326        assert!(!p.look_set().is_empty());
3327        assert_eq!(p.minimum_len(), Some(0));
3328
3329        let p = props(r"$|^|\z|\A|\b|\B");
3330        assert!(!p.look_set().is_empty());
3331        assert_eq!(p.minimum_len(), Some(0));
3332
3333        let p = props(r"^$|$^");
3334        assert!(!p.look_set().is_empty());
3335        assert_eq!(p.minimum_len(), Some(0));
3336
3337        let p = props(r"((\b)+())*^");
3338        assert!(!p.look_set().is_empty());
3339        assert_eq!(p.minimum_len(), Some(0));
3340
3341        // Negative examples.
3342        let p = props(r"^a");
3343        assert!(!p.look_set().is_empty());
3344        assert_eq!(p.minimum_len(), Some(1));
3345    }
3346
3347    #[test]
3348    fn analysis_look_set_prefix_any() {
3349        let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))");
3350        assert!(p.look_set_prefix_any().contains(Look::WordAscii));
3351    }
3352
3353    #[test]
3354    fn analysis_is_anchored() {
3355        let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
3356        let is_end = |p| props(p).look_set_suffix().contains(Look::End);
3357
3358        // Positive examples.
3359        assert!(is_start(r"^"));
3360        assert!(is_end(r"$"));
3361
3362        assert!(is_start(r"^^"));
3363        assert!(props(r"$$").look_set_suffix().contains(Look::End));
3364
3365        assert!(is_start(r"^$"));
3366        assert!(is_end(r"^$"));
3367
3368        assert!(is_start(r"^foo"));
3369        assert!(is_end(r"foo$"));
3370
3371        assert!(is_start(r"^foo|^bar"));
3372        assert!(is_end(r"foo$|bar$"));
3373
3374        assert!(is_start(r"^(foo|bar)"));
3375        assert!(is_end(r"(foo|bar)$"));
3376
3377        assert!(is_start(r"^+"));
3378        assert!(is_end(r"$+"));
3379        assert!(is_start(r"^++"));
3380        assert!(is_end(r"$++"));
3381        assert!(is_start(r"(^)+"));
3382        assert!(is_end(r"($)+"));
3383
3384        assert!(is_start(r"$^"));
3385        assert!(is_start(r"$^"));
3386        assert!(is_start(r"$^|^$"));
3387        assert!(is_end(r"$^|^$"));
3388
3389        assert!(is_start(r"\b^"));
3390        assert!(is_end(r"$\b"));
3391        assert!(is_start(r"^(?m:^)"));
3392        assert!(is_end(r"(?m:$)$"));
3393        assert!(is_start(r"(?m:^)^"));
3394        assert!(is_end(r"$(?m:$)"));
3395
3396        // Negative examples.
3397        assert!(!is_start(r"(?m)^"));
3398        assert!(!is_end(r"(?m)$"));
3399        assert!(!is_start(r"(?m:^$)|$^"));
3400        assert!(!is_end(r"(?m:^$)|$^"));
3401        assert!(!is_start(r"$^|(?m:^$)"));
3402        assert!(!is_end(r"$^|(?m:^$)"));
3403
3404        assert!(!is_start(r"a^"));
3405        assert!(!is_start(r"$a"));
3406
3407        assert!(!is_end(r"a^"));
3408        assert!(!is_end(r"$a"));
3409
3410        assert!(!is_start(r"^foo|bar"));
3411        assert!(!is_end(r"foo|bar$"));
3412
3413        assert!(!is_start(r"^*"));
3414        assert!(!is_end(r"$*"));
3415        assert!(!is_start(r"^*+"));
3416        assert!(!is_end(r"$*+"));
3417        assert!(!is_start(r"^+*"));
3418        assert!(!is_end(r"$+*"));
3419        assert!(!is_start(r"(^)*"));
3420        assert!(!is_end(r"($)*"));
3421    }
3422
3423    #[test]
3424    fn analysis_is_any_anchored() {
3425        let is_start = |p| props(p).look_set().contains(Look::Start);
3426        let is_end = |p| props(p).look_set().contains(Look::End);
3427
3428        // Positive examples.
3429        assert!(is_start(r"^"));
3430        assert!(is_end(r"$"));
3431        assert!(is_start(r"\A"));
3432        assert!(is_end(r"\z"));
3433
3434        // Negative examples.
3435        assert!(!is_start(r"(?m)^"));
3436        assert!(!is_end(r"(?m)$"));
3437        assert!(!is_start(r"$"));
3438        assert!(!is_end(r"^"));
3439    }
3440
3441    #[test]
3442    fn analysis_can_empty() {
3443        // Positive examples.
3444        let assert_empty =
3445            |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
3446        assert_empty(r"");
3447        assert_empty(r"()");
3448        assert_empty(r"()*");
3449        assert_empty(r"()+");
3450        assert_empty(r"()?");
3451        assert_empty(r"a*");
3452        assert_empty(r"a?");
3453        assert_empty(r"a{0}");
3454        assert_empty(r"a{0,}");
3455        assert_empty(r"a{0,1}");
3456        assert_empty(r"a{0,10}");
3457        #[cfg(feature = "unicode-gencat")]
3458        assert_empty(r"\pL*");
3459        assert_empty(r"a*|b");
3460        assert_empty(r"b|a*");
3461        assert_empty(r"a|");
3462        assert_empty(r"|a");
3463        assert_empty(r"a||b");
3464        assert_empty(r"a*a?(abcd)*");
3465        assert_empty(r"^");
3466        assert_empty(r"$");
3467        assert_empty(r"(?m)^");
3468        assert_empty(r"(?m)$");
3469        assert_empty(r"\A");
3470        assert_empty(r"\z");
3471        assert_empty(r"\B");
3472        assert_empty(r"(?-u)\B");
3473        assert_empty(r"\b");
3474        assert_empty(r"(?-u)\b");
3475
3476        // Negative examples.
3477        let assert_non_empty =
3478            |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
3479        assert_non_empty(r"a+");
3480        assert_non_empty(r"a{1}");
3481        assert_non_empty(r"a{1,}");
3482        assert_non_empty(r"a{1,2}");
3483        assert_non_empty(r"a{1,10}");
3484        assert_non_empty(r"b|a");
3485        assert_non_empty(r"a*a+(abcd)*");
3486        #[cfg(feature = "unicode-gencat")]
3487        assert_non_empty(r"\P{any}");
3488        assert_non_empty(r"[a--a]");
3489        assert_non_empty(r"[a&&b]");
3490    }
3491
3492    #[test]
3493    fn analysis_is_literal() {
3494        // Positive examples.
3495        assert!(props(r"a").is_literal());
3496        assert!(props(r"ab").is_literal());
3497        assert!(props(r"abc").is_literal());
3498        assert!(props(r"(?m)abc").is_literal());
3499        assert!(props(r"(?:a)").is_literal());
3500        assert!(props(r"foo(?:a)").is_literal());
3501        assert!(props(r"(?:a)foo").is_literal());
3502        assert!(props(r"[a]").is_literal());
3503
3504        // Negative examples.
3505        assert!(!props(r"").is_literal());
3506        assert!(!props(r"^").is_literal());
3507        assert!(!props(r"a|b").is_literal());
3508        assert!(!props(r"(a)").is_literal());
3509        assert!(!props(r"a+").is_literal());
3510        assert!(!props(r"foo(a)").is_literal());
3511        assert!(!props(r"(a)foo").is_literal());
3512        assert!(!props(r"[ab]").is_literal());
3513    }
3514
3515    #[test]
3516    fn analysis_is_alternation_literal() {
3517        // Positive examples.
3518        assert!(props(r"a").is_alternation_literal());
3519        assert!(props(r"ab").is_alternation_literal());
3520        assert!(props(r"abc").is_alternation_literal());
3521        assert!(props(r"(?m)abc").is_alternation_literal());
3522        assert!(props(r"foo|bar").is_alternation_literal());
3523        assert!(props(r"foo|bar|baz").is_alternation_literal());
3524        assert!(props(r"[a]").is_alternation_literal());
3525        assert!(props(r"(?:ab)|cd").is_alternation_literal());
3526        assert!(props(r"ab|(?:cd)").is_alternation_literal());
3527
3528        // Negative examples.
3529        assert!(!props(r"").is_alternation_literal());
3530        assert!(!props(r"^").is_alternation_literal());
3531        assert!(!props(r"(a)").is_alternation_literal());
3532        assert!(!props(r"a+").is_alternation_literal());
3533        assert!(!props(r"foo(a)").is_alternation_literal());
3534        assert!(!props(r"(a)foo").is_alternation_literal());
3535        assert!(!props(r"[ab]").is_alternation_literal());
3536        assert!(!props(r"[ab]|b").is_alternation_literal());
3537        assert!(!props(r"a|[ab]").is_alternation_literal());
3538        assert!(!props(r"(a)|b").is_alternation_literal());
3539        assert!(!props(r"a|(b)").is_alternation_literal());
3540        assert!(!props(r"a|b").is_alternation_literal());
3541        assert!(!props(r"a|b|c").is_alternation_literal());
3542        assert!(!props(r"[a]|b").is_alternation_literal());
3543        assert!(!props(r"a|[b]").is_alternation_literal());
3544        assert!(!props(r"(?:a)|b").is_alternation_literal());
3545        assert!(!props(r"a|(?:b)").is_alternation_literal());
3546        assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
3547    }
3548
3549    // This tests that the smart Hir::repetition constructors does some basic
3550    // simplifications.
3551    #[test]
3552    fn smart_repetition() {
3553        assert_eq!(t(r"a{0}"), Hir::empty());
3554        assert_eq!(t(r"a{1}"), hir_lit("a"));
3555        assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
3556    }
3557
3558    // This tests that the smart Hir::concat constructor simplifies the given
3559    // exprs in a way we expect.
3560    #[test]
3561    fn smart_concat() {
3562        assert_eq!(t(""), Hir::empty());
3563        assert_eq!(t("(?:)"), Hir::empty());
3564        assert_eq!(t("abc"), hir_lit("abc"));
3565        assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
3566        assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
3567        assert_eq!(
3568            t("foo(?:bar^baz)quux"),
3569            hir_cat(vec![
3570                hir_lit("foobar"),
3571                hir_look(hir::Look::Start),
3572                hir_lit("bazquux"),
3573            ])
3574        );
3575        assert_eq!(
3576            t("foo(?:ba(?:r^b)az)quux"),
3577            hir_cat(vec![
3578                hir_lit("foobar"),
3579                hir_look(hir::Look::Start),
3580                hir_lit("bazquux"),
3581            ])
3582        );
3583    }
3584
3585    // This tests that the smart Hir::alternation constructor simplifies the
3586    // given exprs in a way we expect.
3587    #[test]
3588    fn smart_alternation() {
3589        assert_eq!(
3590            t("(?:foo)|(?:bar)"),
3591            hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
3592        );
3593        assert_eq!(
3594            t("quux|(?:abc|def|xyz)|baz"),
3595            hir_alt(vec![
3596                hir_lit("quux"),
3597                hir_lit("abc"),
3598                hir_lit("def"),
3599                hir_lit("xyz"),
3600                hir_lit("baz"),
3601            ])
3602        );
3603        assert_eq!(
3604            t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
3605            hir_alt(vec![
3606                hir_lit("quux"),
3607                hir_lit("abc"),
3608                hir_lit("def"),
3609                hir_lit("mno"),
3610                hir_lit("xyz"),
3611                hir_lit("baz"),
3612            ])
3613        );
3614        assert_eq!(
3615            t("a|b|c|d|e|f|x|y|z"),
3616            hir_uclass(&[('a', 'f'), ('x', 'z')]),
3617        );
3618        // Tests that we lift common prefixes out of an alternation.
3619        assert_eq!(
3620            t("[A-Z]foo|[A-Z]quux"),
3621            hir_cat(vec![
3622                hir_uclass(&[('A', 'Z')]),
3623                hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
3624            ]),
3625        );
3626        assert_eq!(
3627            t("[A-Z][A-Z]|[A-Z]quux"),
3628            hir_cat(vec![
3629                hir_uclass(&[('A', 'Z')]),
3630                hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
3631            ]),
3632        );
3633        assert_eq!(
3634            t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
3635            hir_cat(vec![
3636                hir_uclass(&[('A', 'Z')]),
3637                hir_uclass(&[('A', 'Z')]),
3638                hir_alt(vec![Hir::empty(), hir_lit("quux")]),
3639            ]),
3640        );
3641        assert_eq!(
3642            t("[A-Z]foo|[A-Z]foobar"),
3643            hir_cat(vec![
3644                hir_uclass(&[('A', 'Z')]),
3645                hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
3646            ]),
3647        );
3648    }
3649
3650    #[test]
3651    fn regression_alt_empty_concat() {
3652        use crate::ast::{self, Ast};
3653
3654        let span = Span::splat(Position::new(0, 0, 0));
3655        let ast = Ast::alternation(ast::Alternation {
3656            span,
3657            asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
3658        });
3659
3660        let mut t = Translator::new();
3661        assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
3662    }
3663
3664    #[test]
3665    fn regression_empty_alt() {
3666        use crate::ast::{self, Ast};
3667
3668        let span = Span::splat(Position::new(0, 0, 0));
3669        let ast = Ast::concat(ast::Concat {
3670            span,
3671            asts: vec![Ast::alternation(ast::Alternation {
3672                span,
3673                asts: vec![],
3674            })],
3675        });
3676
3677        let mut t = Translator::new();
3678        assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
3679    }
3680
3681    #[test]
3682    fn regression_singleton_alt() {
3683        use crate::{
3684            ast::{self, Ast},
3685            hir::Dot,
3686        };
3687
3688        let span = Span::splat(Position::new(0, 0, 0));
3689        let ast = Ast::concat(ast::Concat {
3690            span,
3691            asts: vec![Ast::alternation(ast::Alternation {
3692                span,
3693                asts: vec![Ast::dot(span)],
3694            })],
3695        });
3696
3697        let mut t = Translator::new();
3698        assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
3699    }
3700
3701    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
3702    #[test]
3703    fn regression_fuzz_match() {
3704        let pat = "[(\u{6} \0-\u{afdf5}]  \0 ";
3705        let ast = ParserBuilder::new()
3706            .octal(false)
3707            .ignore_whitespace(true)
3708            .build()
3709            .parse(pat)
3710            .unwrap();
3711        let hir = TranslatorBuilder::new()
3712            .utf8(true)
3713            .case_insensitive(false)
3714            .multi_line(false)
3715            .dot_matches_new_line(false)
3716            .swap_greed(true)
3717            .unicode(true)
3718            .build()
3719            .translate(pat, &ast)
3720            .unwrap();
3721        assert_eq!(
3722            hir,
3723            Hir::concat(vec![
3724                hir_uclass(&[('\0', '\u{afdf5}')]),
3725                hir_lit("\0"),
3726            ])
3727        );
3728    }
3729
3730    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
3731    #[cfg(feature = "unicode")]
3732    #[test]
3733    fn regression_fuzz_difference1() {
3734        let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
3735        let _ = t(pat); // shouldn't panic
3736    }
3737
3738    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
3739    #[test]
3740    fn regression_fuzz_char_decrement1() {
3741        let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
3742        let _ = t(pat); // shouldn't panic
3743    }
3744}