regex_syntax/ast/
print.rs

1/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use core::fmt;
6
7use crate::ast::{
8    self,
9    visitor::{self, Visitor},
10    Ast,
11};
12
13/// A builder for constructing a printer.
14///
15/// Note that since a printer doesn't have any configuration knobs, this type
16/// remains unexported.
17#[derive(Clone, Debug)]
18struct PrinterBuilder {
19    _priv: (),
20}
21
22impl Default for PrinterBuilder {
23    fn default() -> PrinterBuilder {
24        PrinterBuilder::new()
25    }
26}
27
28impl PrinterBuilder {
29    fn new() -> PrinterBuilder {
30        PrinterBuilder { _priv: () }
31    }
32
33    fn build(&self) -> Printer {
34        Printer { _priv: () }
35    }
36}
37
38/// A printer for a regular expression abstract syntax tree.
39///
40/// A printer converts an abstract syntax tree (AST) to a regular expression
41/// pattern string. This particular printer uses constant stack space and heap
42/// space proportional to the size of the AST.
43///
44/// This printer will not necessarily preserve the original formatting of the
45/// regular expression pattern string. For example, all whitespace and comments
46/// are ignored.
47#[derive(Debug)]
48pub struct Printer {
49    _priv: (),
50}
51
52impl Printer {
53    /// Create a new printer.
54    pub fn new() -> Printer {
55        PrinterBuilder::new().build()
56    }
57
58    /// Print the given `Ast` to the given writer. The writer must implement
59    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
60    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
61    /// implementations) or a `&mut String`.
62    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
63        visitor::visit(ast, Writer { wtr })
64    }
65}
66
67#[derive(Debug)]
68struct Writer<W> {
69    wtr: W,
70}
71
72impl<W: fmt::Write> Visitor for Writer<W> {
73    type Output = ();
74    type Err = fmt::Error;
75
76    fn finish(self) -> fmt::Result {
77        Ok(())
78    }
79
80    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
81        match *ast {
82            Ast::Group(ref x) => self.fmt_group_pre(x),
83            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
84            _ => Ok(()),
85        }
86    }
87
88    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
89        match *ast {
90            Ast::Empty(_) => Ok(()),
91            Ast::Flags(ref x) => self.fmt_set_flags(x),
92            Ast::Literal(ref x) => self.fmt_literal(x),
93            Ast::Dot(_) => self.wtr.write_str("."),
94            Ast::Assertion(ref x) => self.fmt_assertion(x),
95            Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
96            Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
97            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
98            Ast::Repetition(ref x) => self.fmt_repetition(x),
99            Ast::Group(ref x) => self.fmt_group_post(x),
100            Ast::Alternation(_) => Ok(()),
101            Ast::Concat(_) => Ok(()),
102        }
103    }
104
105    fn visit_alternation_in(&mut self) -> fmt::Result {
106        self.wtr.write_str("|")
107    }
108
109    fn visit_class_set_item_pre(
110        &mut self,
111        ast: &ast::ClassSetItem,
112    ) -> Result<(), Self::Err> {
113        match *ast {
114            ast::ClassSetItem::Bracketed(ref x) => {
115                self.fmt_class_bracketed_pre(x)
116            }
117            _ => Ok(()),
118        }
119    }
120
121    fn visit_class_set_item_post(
122        &mut self,
123        ast: &ast::ClassSetItem,
124    ) -> Result<(), Self::Err> {
125        use crate::ast::ClassSetItem::*;
126
127        match *ast {
128            Empty(_) => Ok(()),
129            Literal(ref x) => self.fmt_literal(x),
130            Range(ref x) => {
131                self.fmt_literal(&x.start)?;
132                self.wtr.write_str("-")?;
133                self.fmt_literal(&x.end)?;
134                Ok(())
135            }
136            Ascii(ref x) => self.fmt_class_ascii(x),
137            Unicode(ref x) => self.fmt_class_unicode(x),
138            Perl(ref x) => self.fmt_class_perl(x),
139            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
140            Union(_) => Ok(()),
141        }
142    }
143
144    fn visit_class_set_binary_op_in(
145        &mut self,
146        ast: &ast::ClassSetBinaryOp,
147    ) -> Result<(), Self::Err> {
148        self.fmt_class_set_binary_op_kind(&ast.kind)
149    }
150}
151
152impl<W: fmt::Write> Writer<W> {
153    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
154        use crate::ast::GroupKind::*;
155        match ast.kind {
156            CaptureIndex(_) => self.wtr.write_str("("),
157            CaptureName { ref name, starts_with_p } => {
158                let start = if starts_with_p { "(?P<" } else { "(?<" };
159                self.wtr.write_str(start)?;
160                self.wtr.write_str(&name.name)?;
161                self.wtr.write_str(">")?;
162                Ok(())
163            }
164            NonCapturing(ref flags) => {
165                self.wtr.write_str("(?")?;
166                self.fmt_flags(flags)?;
167                self.wtr.write_str(":")?;
168                Ok(())
169            }
170        }
171    }
172
173    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
174        self.wtr.write_str(")")
175    }
176
177    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
178        use crate::ast::RepetitionKind::*;
179        match ast.op.kind {
180            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
181            ZeroOrOne => self.wtr.write_str("??"),
182            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
183            ZeroOrMore => self.wtr.write_str("*?"),
184            OneOrMore if ast.greedy => self.wtr.write_str("+"),
185            OneOrMore => self.wtr.write_str("+?"),
186            Range(ref x) => {
187                self.fmt_repetition_range(x)?;
188                if !ast.greedy {
189                    self.wtr.write_str("?")?;
190                }
191                Ok(())
192            }
193        }
194    }
195
196    fn fmt_repetition_range(
197        &mut self,
198        ast: &ast::RepetitionRange,
199    ) -> fmt::Result {
200        use crate::ast::RepetitionRange::*;
201        match *ast {
202            Exactly(x) => write!(self.wtr, "{{{}}}", x),
203            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
204            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
205        }
206    }
207
208    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
209        use crate::ast::LiteralKind::*;
210
211        match ast.kind {
212            Verbatim => self.wtr.write_char(ast.c),
213            Meta | Superfluous => write!(self.wtr, r"\{}", ast.c),
214            Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)),
215            HexFixed(ast::HexLiteralKind::X) => {
216                write!(self.wtr, r"\x{:02X}", u32::from(ast.c))
217            }
218            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
219                write!(self.wtr, r"\u{:04X}", u32::from(ast.c))
220            }
221            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
222                write!(self.wtr, r"\U{:08X}", u32::from(ast.c))
223            }
224            HexBrace(ast::HexLiteralKind::X) => {
225                write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c))
226            }
227            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
228                write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c))
229            }
230            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
231                write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c))
232            }
233            Special(ast::SpecialLiteralKind::Bell) => {
234                self.wtr.write_str(r"\a")
235            }
236            Special(ast::SpecialLiteralKind::FormFeed) => {
237                self.wtr.write_str(r"\f")
238            }
239            Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
240            Special(ast::SpecialLiteralKind::LineFeed) => {
241                self.wtr.write_str(r"\n")
242            }
243            Special(ast::SpecialLiteralKind::CarriageReturn) => {
244                self.wtr.write_str(r"\r")
245            }
246            Special(ast::SpecialLiteralKind::VerticalTab) => {
247                self.wtr.write_str(r"\v")
248            }
249            Special(ast::SpecialLiteralKind::Space) => {
250                self.wtr.write_str(r"\ ")
251            }
252        }
253    }
254
255    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
256        use crate::ast::AssertionKind::*;
257        match ast.kind {
258            StartLine => self.wtr.write_str("^"),
259            EndLine => self.wtr.write_str("$"),
260            StartText => self.wtr.write_str(r"\A"),
261            EndText => self.wtr.write_str(r"\z"),
262            WordBoundary => self.wtr.write_str(r"\b"),
263            NotWordBoundary => self.wtr.write_str(r"\B"),
264            WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
265            WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
266            WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
267            WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
268            WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
269            WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
270        }
271    }
272
273    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
274        self.wtr.write_str("(?")?;
275        self.fmt_flags(&ast.flags)?;
276        self.wtr.write_str(")")?;
277        Ok(())
278    }
279
280    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
281        use crate::ast::{Flag, FlagsItemKind};
282
283        for item in &ast.items {
284            match item.kind {
285                FlagsItemKind::Negation => self.wtr.write_str("-"),
286                FlagsItemKind::Flag(ref flag) => match *flag {
287                    Flag::CaseInsensitive => self.wtr.write_str("i"),
288                    Flag::MultiLine => self.wtr.write_str("m"),
289                    Flag::DotMatchesNewLine => self.wtr.write_str("s"),
290                    Flag::SwapGreed => self.wtr.write_str("U"),
291                    Flag::Unicode => self.wtr.write_str("u"),
292                    Flag::CRLF => self.wtr.write_str("R"),
293                    Flag::IgnoreWhitespace => self.wtr.write_str("x"),
294                },
295            }?;
296        }
297        Ok(())
298    }
299
300    fn fmt_class_bracketed_pre(
301        &mut self,
302        ast: &ast::ClassBracketed,
303    ) -> fmt::Result {
304        if ast.negated {
305            self.wtr.write_str("[^")
306        } else {
307            self.wtr.write_str("[")
308        }
309    }
310
311    fn fmt_class_bracketed_post(
312        &mut self,
313        _ast: &ast::ClassBracketed,
314    ) -> fmt::Result {
315        self.wtr.write_str("]")
316    }
317
318    fn fmt_class_set_binary_op_kind(
319        &mut self,
320        ast: &ast::ClassSetBinaryOpKind,
321    ) -> fmt::Result {
322        use crate::ast::ClassSetBinaryOpKind::*;
323        match *ast {
324            Intersection => self.wtr.write_str("&&"),
325            Difference => self.wtr.write_str("--"),
326            SymmetricDifference => self.wtr.write_str("~~"),
327        }
328    }
329
330    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
331        use crate::ast::ClassPerlKind::*;
332        match ast.kind {
333            Digit if ast.negated => self.wtr.write_str(r"\D"),
334            Digit => self.wtr.write_str(r"\d"),
335            Space if ast.negated => self.wtr.write_str(r"\S"),
336            Space => self.wtr.write_str(r"\s"),
337            Word if ast.negated => self.wtr.write_str(r"\W"),
338            Word => self.wtr.write_str(r"\w"),
339        }
340    }
341
342    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
343        use crate::ast::ClassAsciiKind::*;
344        match ast.kind {
345            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
346            Alnum => self.wtr.write_str("[:alnum:]"),
347            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
348            Alpha => self.wtr.write_str("[:alpha:]"),
349            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
350            Ascii => self.wtr.write_str("[:ascii:]"),
351            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
352            Blank => self.wtr.write_str("[:blank:]"),
353            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
354            Cntrl => self.wtr.write_str("[:cntrl:]"),
355            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
356            Digit => self.wtr.write_str("[:digit:]"),
357            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
358            Graph => self.wtr.write_str("[:graph:]"),
359            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
360            Lower => self.wtr.write_str("[:lower:]"),
361            Print if ast.negated => self.wtr.write_str("[:^print:]"),
362            Print => self.wtr.write_str("[:print:]"),
363            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
364            Punct => self.wtr.write_str("[:punct:]"),
365            Space if ast.negated => self.wtr.write_str("[:^space:]"),
366            Space => self.wtr.write_str("[:space:]"),
367            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
368            Upper => self.wtr.write_str("[:upper:]"),
369            Word if ast.negated => self.wtr.write_str("[:^word:]"),
370            Word => self.wtr.write_str("[:word:]"),
371            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
372            Xdigit => self.wtr.write_str("[:xdigit:]"),
373        }
374    }
375
376    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
377        use crate::ast::ClassUnicodeKind::*;
378        use crate::ast::ClassUnicodeOpKind::*;
379
380        if ast.negated {
381            self.wtr.write_str(r"\P")?;
382        } else {
383            self.wtr.write_str(r"\p")?;
384        }
385        match ast.kind {
386            OneLetter(c) => self.wtr.write_char(c),
387            Named(ref x) => write!(self.wtr, "{{{}}}", x),
388            NamedValue { op: Equal, ref name, ref value } => {
389                write!(self.wtr, "{{{}={}}}", name, value)
390            }
391            NamedValue { op: Colon, ref name, ref value } => {
392                write!(self.wtr, "{{{}:{}}}", name, value)
393            }
394            NamedValue { op: NotEqual, ref name, ref value } => {
395                write!(self.wtr, "{{{}!={}}}", name, value)
396            }
397        }
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use alloc::string::String;
404
405    use crate::ast::parse::ParserBuilder;
406
407    use super::*;
408
409    fn roundtrip(given: &str) {
410        roundtrip_with(|b| b, given);
411    }
412
413    fn roundtrip_with<F>(mut f: F, given: &str)
414    where
415        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
416    {
417        let mut builder = ParserBuilder::new();
418        f(&mut builder);
419        let ast = builder.build().parse(given).unwrap();
420
421        let mut printer = Printer::new();
422        let mut dst = String::new();
423        printer.print(&ast, &mut dst).unwrap();
424        assert_eq!(given, dst);
425    }
426
427    #[test]
428    fn print_literal() {
429        roundtrip("a");
430        roundtrip(r"\[");
431        roundtrip_with(|b| b.octal(true), r"\141");
432        roundtrip(r"\x61");
433        roundtrip(r"\x7F");
434        roundtrip(r"\u0061");
435        roundtrip(r"\U00000061");
436        roundtrip(r"\x{61}");
437        roundtrip(r"\x{7F}");
438        roundtrip(r"\u{61}");
439        roundtrip(r"\U{61}");
440
441        roundtrip(r"\a");
442        roundtrip(r"\f");
443        roundtrip(r"\t");
444        roundtrip(r"\n");
445        roundtrip(r"\r");
446        roundtrip(r"\v");
447        roundtrip(r"(?x)\ ");
448    }
449
450    #[test]
451    fn print_dot() {
452        roundtrip(".");
453    }
454
455    #[test]
456    fn print_concat() {
457        roundtrip("ab");
458        roundtrip("abcde");
459        roundtrip("a(bcd)ef");
460    }
461
462    #[test]
463    fn print_alternation() {
464        roundtrip("a|b");
465        roundtrip("a|b|c|d|e");
466        roundtrip("|a|b|c|d|e");
467        roundtrip("|a|b|c|d|e|");
468        roundtrip("a(b|c|d)|e|f");
469    }
470
471    #[test]
472    fn print_assertion() {
473        roundtrip(r"^");
474        roundtrip(r"$");
475        roundtrip(r"\A");
476        roundtrip(r"\z");
477        roundtrip(r"\b");
478        roundtrip(r"\B");
479    }
480
481    #[test]
482    fn print_repetition() {
483        roundtrip("a?");
484        roundtrip("a??");
485        roundtrip("a*");
486        roundtrip("a*?");
487        roundtrip("a+");
488        roundtrip("a+?");
489        roundtrip("a{5}");
490        roundtrip("a{5}?");
491        roundtrip("a{5,}");
492        roundtrip("a{5,}?");
493        roundtrip("a{5,10}");
494        roundtrip("a{5,10}?");
495    }
496
497    #[test]
498    fn print_flags() {
499        roundtrip("(?i)");
500        roundtrip("(?-i)");
501        roundtrip("(?s-i)");
502        roundtrip("(?-si)");
503        roundtrip("(?siUmux)");
504    }
505
506    #[test]
507    fn print_group() {
508        roundtrip("(?i:a)");
509        roundtrip("(?P<foo>a)");
510        roundtrip("(?<foo>a)");
511        roundtrip("(a)");
512    }
513
514    #[test]
515    fn print_class() {
516        roundtrip(r"[abc]");
517        roundtrip(r"[a-z]");
518        roundtrip(r"[^a-z]");
519        roundtrip(r"[a-z0-9]");
520        roundtrip(r"[-a-z0-9]");
521        roundtrip(r"[-a-z0-9]");
522        roundtrip(r"[a-z0-9---]");
523        roundtrip(r"[a-z&&m-n]");
524        roundtrip(r"[[a-z&&m-n]]");
525        roundtrip(r"[a-z--m-n]");
526        roundtrip(r"[a-z~~m-n]");
527        roundtrip(r"[a-z[0-9]]");
528        roundtrip(r"[a-z[^0-9]]");
529
530        roundtrip(r"\d");
531        roundtrip(r"\D");
532        roundtrip(r"\s");
533        roundtrip(r"\S");
534        roundtrip(r"\w");
535        roundtrip(r"\W");
536
537        roundtrip(r"[[:alnum:]]");
538        roundtrip(r"[[:^alnum:]]");
539        roundtrip(r"[[:alpha:]]");
540        roundtrip(r"[[:^alpha:]]");
541        roundtrip(r"[[:ascii:]]");
542        roundtrip(r"[[:^ascii:]]");
543        roundtrip(r"[[:blank:]]");
544        roundtrip(r"[[:^blank:]]");
545        roundtrip(r"[[:cntrl:]]");
546        roundtrip(r"[[:^cntrl:]]");
547        roundtrip(r"[[:digit:]]");
548        roundtrip(r"[[:^digit:]]");
549        roundtrip(r"[[:graph:]]");
550        roundtrip(r"[[:^graph:]]");
551        roundtrip(r"[[:lower:]]");
552        roundtrip(r"[[:^lower:]]");
553        roundtrip(r"[[:print:]]");
554        roundtrip(r"[[:^print:]]");
555        roundtrip(r"[[:punct:]]");
556        roundtrip(r"[[:^punct:]]");
557        roundtrip(r"[[:space:]]");
558        roundtrip(r"[[:^space:]]");
559        roundtrip(r"[[:upper:]]");
560        roundtrip(r"[[:^upper:]]");
561        roundtrip(r"[[:word:]]");
562        roundtrip(r"[[:^word:]]");
563        roundtrip(r"[[:xdigit:]]");
564        roundtrip(r"[[:^xdigit:]]");
565
566        roundtrip(r"\pL");
567        roundtrip(r"\PL");
568        roundtrip(r"\p{L}");
569        roundtrip(r"\P{L}");
570        roundtrip(r"\p{X=Y}");
571        roundtrip(r"\P{X=Y}");
572        roundtrip(r"\p{X:Y}");
573        roundtrip(r"\P{X:Y}");
574        roundtrip(r"\p{X!=Y}");
575        roundtrip(r"\P{X!=Y}");
576    }
577}