regex_syntax/ast/
visitor.rs

1use alloc::{vec, vec::Vec};
2
3use crate::ast::{self, Ast};
4
5/// A trait for visiting an abstract syntax tree (AST) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on an abstract syntax tree without necessarily using recursion.
9/// In particular, this permits callers to do case analysis with constant stack
10/// usage, which can be important since the size of an abstract syntax tree
11/// may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`] function.
15///
16/// Note that the abstract syntax tree for a regular expression is quite
17/// complex. Unless you specifically need it, you might be able to use the much
18/// simpler [high-level intermediate representation](crate::hir::Hir) and its
19/// [corresponding `Visitor` trait](crate::hir::Visitor) instead.
20pub trait Visitor {
21    /// The result of visiting an AST.
22    type Output;
23    /// An error that visiting an AST might return.
24    type Err;
25
26    /// All implementors of `Visitor` must provide a `finish` method, which
27    /// yields the result of visiting the AST or an error.
28    fn finish(self) -> Result<Self::Output, Self::Err>;
29
30    /// This method is called before beginning traversal of the AST.
31    fn start(&mut self) {}
32
33    /// This method is called on an `Ast` before descending into child `Ast`
34    /// nodes.
35    fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
36        Ok(())
37    }
38
39    /// This method is called on an `Ast` after descending all of its child
40    /// `Ast` nodes.
41    fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
42        Ok(())
43    }
44
45    /// This method is called between child nodes of an
46    /// [`Alternation`](ast::Alternation).
47    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
48        Ok(())
49    }
50
51    /// This method is called between child nodes of a concatenation.
52    fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
53        Ok(())
54    }
55
56    /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
57    /// before descending into child nodes.
58    fn visit_class_set_item_pre(
59        &mut self,
60        _ast: &ast::ClassSetItem,
61    ) -> Result<(), Self::Err> {
62        Ok(())
63    }
64
65    /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
66    /// after descending into child nodes.
67    fn visit_class_set_item_post(
68        &mut self,
69        _ast: &ast::ClassSetItem,
70    ) -> Result<(), Self::Err> {
71        Ok(())
72    }
73
74    /// This method is called on every
75    /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into
76    /// child nodes.
77    fn visit_class_set_binary_op_pre(
78        &mut self,
79        _ast: &ast::ClassSetBinaryOp,
80    ) -> Result<(), Self::Err> {
81        Ok(())
82    }
83
84    /// This method is called on every
85    /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child
86    /// nodes.
87    fn visit_class_set_binary_op_post(
88        &mut self,
89        _ast: &ast::ClassSetBinaryOp,
90    ) -> Result<(), Self::Err> {
91        Ok(())
92    }
93
94    /// This method is called between the left hand and right hand child nodes
95    /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp).
96    fn visit_class_set_binary_op_in(
97        &mut self,
98        _ast: &ast::ClassSetBinaryOp,
99    ) -> Result<(), Self::Err> {
100        Ok(())
101    }
102}
103
104/// Executes an implementation of `Visitor` in constant stack space.
105///
106/// This function will visit every node in the given `Ast` while calling the
107/// appropriate methods provided by the [`Visitor`] trait.
108///
109/// The primary use case for this method is when one wants to perform case
110/// analysis over an `Ast` without using a stack size proportional to the depth
111/// of the `Ast`. Namely, this method will instead use constant stack size, but
112/// will use heap space proportional to the size of the `Ast`. This may be
113/// desirable in cases where the size of `Ast` is proportional to end user
114/// input.
115///
116/// If the visitor returns an error at any point, then visiting is stopped and
117/// the error is returned.
118pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
119    HeapVisitor::new().visit(ast, visitor)
120}
121
122/// HeapVisitor visits every item in an `Ast` recursively using constant stack
123/// size and a heap size proportional to the size of the `Ast`.
124struct HeapVisitor<'a> {
125    /// A stack of `Ast` nodes. This is roughly analogous to the call stack
126    /// used in a typical recursive visitor.
127    stack: Vec<(&'a Ast, Frame<'a>)>,
128    /// Similar to the `Ast` stack above, but is used only for character
129    /// classes. In particular, character classes embed their own mini
130    /// recursive syntax.
131    stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
132}
133
134/// Represents a single stack frame while performing structural induction over
135/// an `Ast`.
136enum Frame<'a> {
137    /// A stack frame allocated just before descending into a repetition
138    /// operator's child node.
139    Repetition(&'a ast::Repetition),
140    /// A stack frame allocated just before descending into a group's child
141    /// node.
142    Group(&'a ast::Group),
143    /// The stack frame used while visiting every child node of a concatenation
144    /// of expressions.
145    Concat {
146        /// The child node we are currently visiting.
147        head: &'a Ast,
148        /// The remaining child nodes to visit (which may be empty).
149        tail: &'a [Ast],
150    },
151    /// The stack frame used while visiting every child node of an alternation
152    /// of expressions.
153    Alternation {
154        /// The child node we are currently visiting.
155        head: &'a Ast,
156        /// The remaining child nodes to visit (which may be empty).
157        tail: &'a [Ast],
158    },
159}
160
161/// Represents a single stack frame while performing structural induction over
162/// a character class.
163enum ClassFrame<'a> {
164    /// The stack frame used while visiting every child node of a union of
165    /// character class items.
166    Union {
167        /// The child node we are currently visiting.
168        head: &'a ast::ClassSetItem,
169        /// The remaining child nodes to visit (which may be empty).
170        tail: &'a [ast::ClassSetItem],
171    },
172    /// The stack frame used while a binary class operation.
173    Binary { op: &'a ast::ClassSetBinaryOp },
174    /// A stack frame allocated just before descending into a binary operator's
175    /// left hand child node.
176    BinaryLHS {
177        op: &'a ast::ClassSetBinaryOp,
178        lhs: &'a ast::ClassSet,
179        rhs: &'a ast::ClassSet,
180    },
181    /// A stack frame allocated just before descending into a binary operator's
182    /// right hand child node.
183    BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
184}
185
186/// A representation of the inductive step when performing structural induction
187/// over a character class.
188///
189/// Note that there is no analogous explicit type for the inductive step for
190/// `Ast` nodes because the inductive step is just an `Ast`. For character
191/// classes, the inductive step can produce one of two possible child nodes:
192/// an item or a binary operation. (An item cannot be a binary operation
193/// because that would imply binary operations can be unioned in the concrete
194/// syntax, which is not possible.)
195enum ClassInduct<'a> {
196    Item(&'a ast::ClassSetItem),
197    BinaryOp(&'a ast::ClassSetBinaryOp),
198}
199
200impl<'a> HeapVisitor<'a> {
201    fn new() -> HeapVisitor<'a> {
202        HeapVisitor { stack: vec![], stack_class: vec![] }
203    }
204
205    fn visit<V: Visitor>(
206        &mut self,
207        mut ast: &'a Ast,
208        mut visitor: V,
209    ) -> Result<V::Output, V::Err> {
210        self.stack.clear();
211        self.stack_class.clear();
212
213        visitor.start();
214        loop {
215            visitor.visit_pre(ast)?;
216            if let Some(x) = self.induct(ast, &mut visitor)? {
217                let child = x.child();
218                self.stack.push((ast, x));
219                ast = child;
220                continue;
221            }
222            // No induction means we have a base case, so we can post visit
223            // it now.
224            visitor.visit_post(ast)?;
225
226            // At this point, we now try to pop our call stack until it is
227            // either empty or we hit another inductive case.
228            loop {
229                let (post_ast, frame) = match self.stack.pop() {
230                    None => return visitor.finish(),
231                    Some((post_ast, frame)) => (post_ast, frame),
232                };
233                // If this is a concat/alternate, then we might have additional
234                // inductive steps to process.
235                if let Some(x) = self.pop(frame) {
236                    match x {
237                        Frame::Alternation { .. } => {
238                            visitor.visit_alternation_in()?;
239                        }
240                        Frame::Concat { .. } => {
241                            visitor.visit_concat_in()?;
242                        }
243                        _ => {}
244                    }
245                    ast = x.child();
246                    self.stack.push((post_ast, x));
247                    break;
248                }
249                // Otherwise, we've finished visiting all the child nodes for
250                // this AST, so we can post visit it now.
251                visitor.visit_post(post_ast)?;
252            }
253        }
254    }
255
256    /// Build a stack frame for the given AST if one is needed (which occurs if
257    /// and only if there are child nodes in the AST). Otherwise, return None.
258    ///
259    /// If this visits a class, then the underlying visitor implementation may
260    /// return an error which will be passed on here.
261    fn induct<V: Visitor>(
262        &mut self,
263        ast: &'a Ast,
264        visitor: &mut V,
265    ) -> Result<Option<Frame<'a>>, V::Err> {
266        Ok(match *ast {
267            Ast::ClassBracketed(ref x) => {
268                self.visit_class(x, visitor)?;
269                None
270            }
271            Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
272            Ast::Group(ref x) => Some(Frame::Group(x)),
273            Ast::Concat(ref x) if x.asts.is_empty() => None,
274            Ast::Concat(ref x) => {
275                Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
276            }
277            Ast::Alternation(ref x) if x.asts.is_empty() => None,
278            Ast::Alternation(ref x) => Some(Frame::Alternation {
279                head: &x.asts[0],
280                tail: &x.asts[1..],
281            }),
282            _ => None,
283        })
284    }
285
286    /// Pops the given frame. If the frame has an additional inductive step,
287    /// then return it, otherwise return `None`.
288    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
289        match induct {
290            Frame::Repetition(_) => None,
291            Frame::Group(_) => None,
292            Frame::Concat { tail, .. } => {
293                if tail.is_empty() {
294                    None
295                } else {
296                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
297                }
298            }
299            Frame::Alternation { tail, .. } => {
300                if tail.is_empty() {
301                    None
302                } else {
303                    Some(Frame::Alternation {
304                        head: &tail[0],
305                        tail: &tail[1..],
306                    })
307                }
308            }
309        }
310    }
311
312    fn visit_class<V: Visitor>(
313        &mut self,
314        ast: &'a ast::ClassBracketed,
315        visitor: &mut V,
316    ) -> Result<(), V::Err> {
317        let mut ast = ClassInduct::from_bracketed(ast);
318        loop {
319            self.visit_class_pre(&ast, visitor)?;
320            if let Some(x) = self.induct_class(&ast) {
321                let child = x.child();
322                self.stack_class.push((ast, x));
323                ast = child;
324                continue;
325            }
326            self.visit_class_post(&ast, visitor)?;
327
328            // At this point, we now try to pop our call stack until it is
329            // either empty or we hit another inductive case.
330            loop {
331                let (post_ast, frame) = match self.stack_class.pop() {
332                    None => return Ok(()),
333                    Some((post_ast, frame)) => (post_ast, frame),
334                };
335                // If this is a union or a binary op, then we might have
336                // additional inductive steps to process.
337                if let Some(x) = self.pop_class(frame) {
338                    if let ClassFrame::BinaryRHS { ref op, .. } = x {
339                        visitor.visit_class_set_binary_op_in(op)?;
340                    }
341                    ast = x.child();
342                    self.stack_class.push((post_ast, x));
343                    break;
344                }
345                // Otherwise, we've finished visiting all the child nodes for
346                // this class node, so we can post visit it now.
347                self.visit_class_post(&post_ast, visitor)?;
348            }
349        }
350    }
351
352    /// Call the appropriate `Visitor` methods given an inductive step.
353    fn visit_class_pre<V: Visitor>(
354        &self,
355        ast: &ClassInduct<'a>,
356        visitor: &mut V,
357    ) -> Result<(), V::Err> {
358        match *ast {
359            ClassInduct::Item(item) => {
360                visitor.visit_class_set_item_pre(item)?;
361            }
362            ClassInduct::BinaryOp(op) => {
363                visitor.visit_class_set_binary_op_pre(op)?;
364            }
365        }
366        Ok(())
367    }
368
369    /// Call the appropriate `Visitor` methods given an inductive step.
370    fn visit_class_post<V: Visitor>(
371        &self,
372        ast: &ClassInduct<'a>,
373        visitor: &mut V,
374    ) -> Result<(), V::Err> {
375        match *ast {
376            ClassInduct::Item(item) => {
377                visitor.visit_class_set_item_post(item)?;
378            }
379            ClassInduct::BinaryOp(op) => {
380                visitor.visit_class_set_binary_op_post(op)?;
381            }
382        }
383        Ok(())
384    }
385
386    /// Build a stack frame for the given class node if one is needed (which
387    /// occurs if and only if there are child nodes). Otherwise, return None.
388    fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
389        match *ast {
390            ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
391                match x.kind {
392                    ast::ClassSet::Item(ref item) => {
393                        Some(ClassFrame::Union { head: item, tail: &[] })
394                    }
395                    ast::ClassSet::BinaryOp(ref op) => {
396                        Some(ClassFrame::Binary { op })
397                    }
398                }
399            }
400            ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
401                if x.items.is_empty() {
402                    None
403                } else {
404                    Some(ClassFrame::Union {
405                        head: &x.items[0],
406                        tail: &x.items[1..],
407                    })
408                }
409            }
410            ClassInduct::BinaryOp(op) => {
411                Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
412            }
413            _ => None,
414        }
415    }
416
417    /// Pops the given frame. If the frame has an additional inductive step,
418    /// then return it, otherwise return `None`.
419    fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
420        match induct {
421            ClassFrame::Union { tail, .. } => {
422                if tail.is_empty() {
423                    None
424                } else {
425                    Some(ClassFrame::Union {
426                        head: &tail[0],
427                        tail: &tail[1..],
428                    })
429                }
430            }
431            ClassFrame::Binary { .. } => None,
432            ClassFrame::BinaryLHS { op, rhs, .. } => {
433                Some(ClassFrame::BinaryRHS { op, rhs })
434            }
435            ClassFrame::BinaryRHS { .. } => None,
436        }
437    }
438}
439
440impl<'a> Frame<'a> {
441    /// Perform the next inductive step on this frame and return the next
442    /// child AST node to visit.
443    fn child(&self) -> &'a Ast {
444        match *self {
445            Frame::Repetition(rep) => &rep.ast,
446            Frame::Group(group) => &group.ast,
447            Frame::Concat { head, .. } => head,
448            Frame::Alternation { head, .. } => head,
449        }
450    }
451}
452
453impl<'a> ClassFrame<'a> {
454    /// Perform the next inductive step on this frame and return the next
455    /// child class node to visit.
456    fn child(&self) -> ClassInduct<'a> {
457        match *self {
458            ClassFrame::Union { head, .. } => ClassInduct::Item(head),
459            ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
460            ClassFrame::BinaryLHS { ref lhs, .. } => {
461                ClassInduct::from_set(lhs)
462            }
463            ClassFrame::BinaryRHS { ref rhs, .. } => {
464                ClassInduct::from_set(rhs)
465            }
466        }
467    }
468}
469
470impl<'a> ClassInduct<'a> {
471    fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
472        ClassInduct::from_set(&ast.kind)
473    }
474
475    fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
476        match *ast {
477            ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
478            ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
479        }
480    }
481}
482
483impl<'a> core::fmt::Debug for ClassFrame<'a> {
484    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
485        let x = match *self {
486            ClassFrame::Union { .. } => "Union",
487            ClassFrame::Binary { .. } => "Binary",
488            ClassFrame::BinaryLHS { .. } => "BinaryLHS",
489            ClassFrame::BinaryRHS { .. } => "BinaryRHS",
490        };
491        write!(f, "{}", x)
492    }
493}
494
495impl<'a> core::fmt::Debug for ClassInduct<'a> {
496    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
497        let x = match *self {
498            ClassInduct::Item(it) => match *it {
499                ast::ClassSetItem::Empty(_) => "Item(Empty)",
500                ast::ClassSetItem::Literal(_) => "Item(Literal)",
501                ast::ClassSetItem::Range(_) => "Item(Range)",
502                ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
503                ast::ClassSetItem::Perl(_) => "Item(Perl)",
504                ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
505                ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
506                ast::ClassSetItem::Union(_) => "Item(Union)",
507            },
508            ClassInduct::BinaryOp(it) => match it.kind {
509                ast::ClassSetBinaryOpKind::Intersection => {
510                    "BinaryOp(Intersection)"
511                }
512                ast::ClassSetBinaryOpKind::Difference => {
513                    "BinaryOp(Difference)"
514                }
515                ast::ClassSetBinaryOpKind::SymmetricDifference => {
516                    "BinaryOp(SymmetricDifference)"
517                }
518            },
519        };
520        write!(f, "{}", x)
521    }
522}