regex_syntax/
error.rs

1use alloc::{
2    format,
3    string::{String, ToString},
4    vec,
5    vec::Vec,
6};
7
8use crate::{ast, hir};
9
10/// This error type encompasses any error that can be returned by this crate.
11///
12/// This error type is marked as `non_exhaustive`. This means that adding a
13/// new variant is not considered a breaking change.
14#[non_exhaustive]
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub enum Error {
17    /// An error that occurred while translating concrete syntax into abstract
18    /// syntax (AST).
19    Parse(ast::Error),
20    /// An error that occurred while translating abstract syntax into a high
21    /// level intermediate representation (HIR).
22    Translate(hir::Error),
23}
24
25impl From<ast::Error> for Error {
26    fn from(err: ast::Error) -> Error {
27        Error::Parse(err)
28    }
29}
30
31impl From<hir::Error> for Error {
32    fn from(err: hir::Error) -> Error {
33        Error::Translate(err)
34    }
35}
36
37#[cfg(feature = "std")]
38impl std::error::Error for Error {}
39
40impl core::fmt::Display for Error {
41    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42        match *self {
43            Error::Parse(ref x) => x.fmt(f),
44            Error::Translate(ref x) => x.fmt(f),
45        }
46    }
47}
48
49/// A helper type for formatting nice error messages.
50///
51/// This type is responsible for reporting regex parse errors in a nice human
52/// readable format. Most of its complexity is from interspersing notational
53/// markers pointing out the position where an error occurred.
54#[derive(Debug)]
55pub struct Formatter<'e, E> {
56    /// The original regex pattern in which the error occurred.
57    pattern: &'e str,
58    /// The error kind. It must impl fmt::Display.
59    err: &'e E,
60    /// The primary span of the error.
61    span: &'e ast::Span,
62    /// An auxiliary and optional span, in case the error needs to point to
63    /// two locations (e.g., when reporting a duplicate capture group name).
64    aux_span: Option<&'e ast::Span>,
65}
66
67impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68    fn from(err: &'e ast::Error) -> Self {
69        Formatter {
70            pattern: err.pattern(),
71            err: err.kind(),
72            span: err.span(),
73            aux_span: err.auxiliary_span(),
74        }
75    }
76}
77
78impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79    fn from(err: &'e hir::Error) -> Self {
80        Formatter {
81            pattern: err.pattern(),
82            err: err.kind(),
83            span: err.span(),
84            aux_span: None,
85        }
86    }
87}
88
89impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91        let spans = Spans::from_formatter(self);
92        if self.pattern.contains('\n') {
93            let divider = repeat_char('~', 79);
94
95            writeln!(f, "regex parse error:")?;
96            writeln!(f, "{}", divider)?;
97            let notated = spans.notate();
98            write!(f, "{}", notated)?;
99            writeln!(f, "{}", divider)?;
100            // If we have error spans that cover multiple lines, then we just
101            // note the line numbers.
102            if !spans.multi_line.is_empty() {
103                let mut notes = vec![];
104                for span in &spans.multi_line {
105                    notes.push(format!(
106                        "on line {} (column {}) through line {} (column {})",
107                        span.start.line,
108                        span.start.column,
109                        span.end.line,
110                        span.end.column - 1
111                    ));
112                }
113                writeln!(f, "{}", notes.join("\n"))?;
114            }
115            write!(f, "error: {}", self.err)?;
116        } else {
117            writeln!(f, "regex parse error:")?;
118            let notated = Spans::from_formatter(self).notate();
119            write!(f, "{}", notated)?;
120            write!(f, "error: {}", self.err)?;
121        }
122        Ok(())
123    }
124}
125
126/// This type represents an arbitrary number of error spans in a way that makes
127/// it convenient to notate the regex pattern. ("Notate" means "point out
128/// exactly where the error occurred in the regex pattern.")
129///
130/// Technically, we can only ever have two spans given our current error
131/// structure. However, after toiling with a specific algorithm for handling
132/// two spans, it became obvious that an algorithm to handle an arbitrary
133/// number of spans was actually much simpler.
134struct Spans<'p> {
135    /// The original regex pattern string.
136    pattern: &'p str,
137    /// The total width that should be used for line numbers. The width is
138    /// used for left padding the line numbers for alignment.
139    ///
140    /// A value of `0` means line numbers should not be displayed. That is,
141    /// the pattern is itself only one line.
142    line_number_width: usize,
143    /// All error spans that occur on a single line. This sequence always has
144    /// length equivalent to the number of lines in `pattern`, where the index
145    /// of the sequence represents a line number, starting at `0`. The spans
146    /// in each line are sorted in ascending order.
147    by_line: Vec<Vec<ast::Span>>,
148    /// All error spans that occur over one or more lines. That is, the start
149    /// and end position of the span have different line numbers. The spans are
150    /// sorted in ascending order.
151    multi_line: Vec<ast::Span>,
152}
153
154impl<'p> Spans<'p> {
155    /// Build a sequence of spans from a formatter.
156    fn from_formatter<'e, E: core::fmt::Display>(
157        fmter: &'p Formatter<'e, E>,
158    ) -> Spans<'p> {
159        let mut line_count = fmter.pattern.lines().count();
160        // If the pattern ends with a `\n` literal, then our line count is
161        // off by one, since a span can occur immediately after the last `\n`,
162        // which is consider to be an additional line.
163        if fmter.pattern.ends_with('\n') {
164            line_count += 1;
165        }
166        let line_number_width =
167            if line_count <= 1 { 0 } else { line_count.to_string().len() };
168        let mut spans = Spans {
169            pattern: &fmter.pattern,
170            line_number_width,
171            by_line: vec![vec![]; line_count],
172            multi_line: vec![],
173        };
174        spans.add(fmter.span.clone());
175        if let Some(span) = fmter.aux_span {
176            spans.add(span.clone());
177        }
178        spans
179    }
180
181    /// Add the given span to this sequence, putting it in the right place.
182    fn add(&mut self, span: ast::Span) {
183        // This is grossly inefficient since we sort after each add, but right
184        // now, we only ever add two spans at most.
185        if span.is_one_line() {
186            let i = span.start.line - 1; // because lines are 1-indexed
187            self.by_line[i].push(span);
188            self.by_line[i].sort();
189        } else {
190            self.multi_line.push(span);
191            self.multi_line.sort();
192        }
193    }
194
195    /// Notate the pattern string with carents (`^`) pointing at each span
196    /// location. This only applies to spans that occur within a single line.
197    fn notate(&self) -> String {
198        let mut notated = String::new();
199        for (i, line) in self.pattern.lines().enumerate() {
200            if self.line_number_width > 0 {
201                notated.push_str(&self.left_pad_line_number(i + 1));
202                notated.push_str(": ");
203            } else {
204                notated.push_str("    ");
205            }
206            notated.push_str(line);
207            notated.push('\n');
208            if let Some(notes) = self.notate_line(i) {
209                notated.push_str(&notes);
210                notated.push('\n');
211            }
212        }
213        notated
214    }
215
216    /// Return notes for the line indexed at `i` (zero-based). If there are no
217    /// spans for the given line, then `None` is returned. Otherwise, an
218    /// appropriately space padded string with correctly positioned `^` is
219    /// returned, accounting for line numbers.
220    fn notate_line(&self, i: usize) -> Option<String> {
221        let spans = &self.by_line[i];
222        if spans.is_empty() {
223            return None;
224        }
225        let mut notes = String::new();
226        for _ in 0..self.line_number_padding() {
227            notes.push(' ');
228        }
229        let mut pos = 0;
230        for span in spans {
231            for _ in pos..(span.start.column - 1) {
232                notes.push(' ');
233                pos += 1;
234            }
235            let note_len = span.end.column.saturating_sub(span.start.column);
236            for _ in 0..core::cmp::max(1, note_len) {
237                notes.push('^');
238                pos += 1;
239            }
240        }
241        Some(notes)
242    }
243
244    /// Left pad the given line number with spaces such that it is aligned with
245    /// other line numbers.
246    fn left_pad_line_number(&self, n: usize) -> String {
247        let n = n.to_string();
248        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249        let mut result = repeat_char(' ', pad);
250        result.push_str(&n);
251        result
252    }
253
254    /// Return the line number padding beginning at the start of each line of
255    /// the pattern.
256    ///
257    /// If the pattern is only one line, then this returns a fixed padding
258    /// for visual indentation.
259    fn line_number_padding(&self) -> usize {
260        if self.line_number_width == 0 {
261            4
262        } else {
263            2 + self.line_number_width
264        }
265    }
266}
267
268fn repeat_char(c: char, count: usize) -> String {
269    core::iter::repeat(c).take(count).collect()
270}
271
272#[cfg(test)]
273mod tests {
274    use alloc::string::ToString;
275
276    use crate::ast::parse::Parser;
277
278    fn assert_panic_message(pattern: &str, expected_msg: &str) {
279        let result = Parser::new().parse(pattern);
280        match result {
281            Ok(_) => {
282                panic!("regex should not have parsed");
283            }
284            Err(err) => {
285                assert_eq!(err.to_string(), expected_msg.trim());
286            }
287        }
288    }
289
290    // See: https://github.com/rust-lang/regex/issues/464
291    #[test]
292    fn regression_464() {
293        let err = Parser::new().parse("a{\n").unwrap_err();
294        // This test checks that the error formatter doesn't panic.
295        assert!(!err.to_string().is_empty());
296    }
297
298    // See: https://github.com/rust-lang/regex/issues/545
299    #[test]
300    fn repetition_quantifier_expects_a_valid_decimal() {
301        assert_panic_message(
302            r"\\u{[^}]*}",
303            r#"
304regex parse error:
305    \\u{[^}]*}
306        ^
307error: repetition quantifier expects a valid decimal
308"#,
309        );
310    }
311}