regex_syntax/hir/
visitor.rs

1use alloc::{vec, vec::Vec};
2
3use crate::hir::{self, Hir, HirKind};
4
5/// A trait for visiting the high-level IR (HIR) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on a high-level intermediate representation of a regular
9/// expression without necessarily using recursion. In particular, this permits
10/// callers to do case analysis with constant stack usage, which can be
11/// important since the size of an HIR may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`] function.
15pub trait Visitor {
16    /// The result of visiting an HIR.
17    type Output;
18    /// An error that visiting an HIR might return.
19    type Err;
20
21    /// All implementors of `Visitor` must provide a `finish` method, which
22    /// yields the result of visiting the HIR or an error.
23    fn finish(self) -> Result<Self::Output, Self::Err>;
24
25    /// This method is called before beginning traversal of the HIR.
26    fn start(&mut self) {}
27
28    /// This method is called on an `Hir` before descending into child `Hir`
29    /// nodes.
30    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
31        Ok(())
32    }
33
34    /// This method is called on an `Hir` after descending all of its child
35    /// `Hir` nodes.
36    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
37        Ok(())
38    }
39
40    /// This method is called between child nodes of an alternation.
41    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
42        Ok(())
43    }
44
45    /// This method is called between child nodes of a concatenation.
46    fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
47        Ok(())
48    }
49}
50
51/// Executes an implementation of `Visitor` in constant stack space.
52///
53/// This function will visit every node in the given `Hir` while calling
54/// appropriate methods provided by the [`Visitor`] trait.
55///
56/// The primary use case for this method is when one wants to perform case
57/// analysis over an `Hir` without using a stack size proportional to the depth
58/// of the `Hir`. Namely, this method will instead use constant stack space,
59/// but will use heap space proportional to the size of the `Hir`. This may be
60/// desirable in cases where the size of `Hir` is proportional to end user
61/// input.
62///
63/// If the visitor returns an error at any point, then visiting is stopped and
64/// the error is returned.
65pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
66    HeapVisitor::new().visit(hir, visitor)
67}
68
69/// HeapVisitor visits every item in an `Hir` recursively using constant stack
70/// size and a heap size proportional to the size of the `Hir`.
71struct HeapVisitor<'a> {
72    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
73    /// used in a typical recursive visitor.
74    stack: Vec<(&'a Hir, Frame<'a>)>,
75}
76
77/// Represents a single stack frame while performing structural induction over
78/// an `Hir`.
79enum Frame<'a> {
80    /// A stack frame allocated just before descending into a repetition
81    /// operator's child node.
82    Repetition(&'a hir::Repetition),
83    /// A stack frame allocated just before descending into a capture's child
84    /// node.
85    Capture(&'a hir::Capture),
86    /// The stack frame used while visiting every child node of a concatenation
87    /// of expressions.
88    Concat {
89        /// The child node we are currently visiting.
90        head: &'a Hir,
91        /// The remaining child nodes to visit (which may be empty).
92        tail: &'a [Hir],
93    },
94    /// The stack frame used while visiting every child node of an alternation
95    /// of expressions.
96    Alternation {
97        /// The child node we are currently visiting.
98        head: &'a Hir,
99        /// The remaining child nodes to visit (which may be empty).
100        tail: &'a [Hir],
101    },
102}
103
104impl<'a> HeapVisitor<'a> {
105    fn new() -> HeapVisitor<'a> {
106        HeapVisitor { stack: vec![] }
107    }
108
109    fn visit<V: Visitor>(
110        &mut self,
111        mut hir: &'a Hir,
112        mut visitor: V,
113    ) -> Result<V::Output, V::Err> {
114        self.stack.clear();
115
116        visitor.start();
117        loop {
118            visitor.visit_pre(hir)?;
119            if let Some(x) = self.induct(hir) {
120                let child = x.child();
121                self.stack.push((hir, x));
122                hir = child;
123                continue;
124            }
125            // No induction means we have a base case, so we can post visit
126            // it now.
127            visitor.visit_post(hir)?;
128
129            // At this point, we now try to pop our call stack until it is
130            // either empty or we hit another inductive case.
131            loop {
132                let (post_hir, frame) = match self.stack.pop() {
133                    None => return visitor.finish(),
134                    Some((post_hir, frame)) => (post_hir, frame),
135                };
136                // If this is a concat/alternate, then we might have additional
137                // inductive steps to process.
138                if let Some(x) = self.pop(frame) {
139                    match x {
140                        Frame::Alternation { .. } => {
141                            visitor.visit_alternation_in()?;
142                        }
143                        Frame::Concat { .. } => {
144                            visitor.visit_concat_in()?;
145                        }
146                        _ => {}
147                    }
148                    hir = x.child();
149                    self.stack.push((post_hir, x));
150                    break;
151                }
152                // Otherwise, we've finished visiting all the child nodes for
153                // this HIR, so we can post visit it now.
154                visitor.visit_post(post_hir)?;
155            }
156        }
157    }
158
159    /// Build a stack frame for the given HIR if one is needed (which occurs if
160    /// and only if there are child nodes in the HIR). Otherwise, return None.
161    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
162        match *hir.kind() {
163            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
164            HirKind::Capture(ref x) => Some(Frame::Capture(x)),
165            HirKind::Concat(ref x) if x.is_empty() => None,
166            HirKind::Concat(ref x) => {
167                Some(Frame::Concat { head: &x[0], tail: &x[1..] })
168            }
169            HirKind::Alternation(ref x) if x.is_empty() => None,
170            HirKind::Alternation(ref x) => {
171                Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
172            }
173            _ => None,
174        }
175    }
176
177    /// Pops the given frame. If the frame has an additional inductive step,
178    /// then return it, otherwise return `None`.
179    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
180        match induct {
181            Frame::Repetition(_) => None,
182            Frame::Capture(_) => None,
183            Frame::Concat { tail, .. } => {
184                if tail.is_empty() {
185                    None
186                } else {
187                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
188                }
189            }
190            Frame::Alternation { tail, .. } => {
191                if tail.is_empty() {
192                    None
193                } else {
194                    Some(Frame::Alternation {
195                        head: &tail[0],
196                        tail: &tail[1..],
197                    })
198                }
199            }
200        }
201    }
202}
203
204impl<'a> Frame<'a> {
205    /// Perform the next inductive step on this frame and return the next
206    /// child HIR node to visit.
207    fn child(&self) -> &'a Hir {
208        match *self {
209            Frame::Repetition(rep) => &rep.sub,
210            Frame::Capture(capture) => &capture.sub,
211            Frame::Concat { head, .. } => head,
212            Frame::Alternation { head, .. } => head,
213        }
214    }
215}