1use core::{
6 borrow::Borrow,
7 cell::{Cell, RefCell},
8 mem,
9};
10
11use alloc::{
12 boxed::Box,
13 string::{String, ToString},
14 vec,
15 vec::Vec,
16};
17
18use crate::{
19 ast::{self, Ast, Position, Span},
20 either::Either,
21 is_escapeable_character, is_meta_character,
22};
23
24type Result<T> = core::result::Result<T, ast::Error>;
25
26#[derive(Clone, Debug, Eq, PartialEq)]
33enum Primitive {
34 Literal(ast::Literal),
35 Assertion(ast::Assertion),
36 Dot(Span),
37 Perl(ast::ClassPerl),
38 Unicode(ast::ClassUnicode),
39}
40
41impl Primitive {
42 fn span(&self) -> &Span {
44 match *self {
45 Primitive::Literal(ref x) => &x.span,
46 Primitive::Assertion(ref x) => &x.span,
47 Primitive::Dot(ref span) => span,
48 Primitive::Perl(ref x) => &x.span,
49 Primitive::Unicode(ref x) => &x.span,
50 }
51 }
52
53 fn into_ast(self) -> Ast {
55 match self {
56 Primitive::Literal(lit) => Ast::literal(lit),
57 Primitive::Assertion(assert) => Ast::assertion(assert),
58 Primitive::Dot(span) => Ast::dot(span),
59 Primitive::Perl(cls) => Ast::class_perl(cls),
60 Primitive::Unicode(cls) => Ast::class_unicode(cls),
61 }
62 }
63
64 fn into_class_set_item<P: Borrow<Parser>>(
69 self,
70 p: &ParserI<'_, P>,
71 ) -> Result<ast::ClassSetItem> {
72 use self::Primitive::*;
73 use crate::ast::ClassSetItem;
74
75 match self {
76 Literal(lit) => Ok(ClassSetItem::Literal(lit)),
77 Perl(cls) => Ok(ClassSetItem::Perl(cls)),
78 Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
79 x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
80 }
81 }
82
83 fn into_class_literal<P: Borrow<Parser>>(
90 self,
91 p: &ParserI<'_, P>,
92 ) -> Result<ast::Literal> {
93 use self::Primitive::*;
94
95 match self {
96 Literal(lit) => Ok(lit),
97 x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
98 }
99 }
100}
101
102fn is_hex(c: char) -> bool {
104 ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
105}
106
107fn is_capture_char(c: char, first: bool) -> bool {
112 if first {
113 c == '_' || c.is_alphabetic()
114 } else {
115 c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
116 }
117}
118
119#[derive(Clone, Debug)]
123pub struct ParserBuilder {
124 ignore_whitespace: bool,
125 nest_limit: u32,
126 octal: bool,
127 empty_min_range: bool,
128}
129
130impl Default for ParserBuilder {
131 fn default() -> ParserBuilder {
132 ParserBuilder::new()
133 }
134}
135
136impl ParserBuilder {
137 pub fn new() -> ParserBuilder {
139 ParserBuilder {
140 ignore_whitespace: false,
141 nest_limit: 250,
142 octal: false,
143 empty_min_range: false,
144 }
145 }
146
147 pub fn build(&self) -> Parser {
149 Parser {
150 pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
151 capture_index: Cell::new(0),
152 nest_limit: self.nest_limit,
153 octal: self.octal,
154 empty_min_range: self.empty_min_range,
155 initial_ignore_whitespace: self.ignore_whitespace,
156 ignore_whitespace: Cell::new(self.ignore_whitespace),
157 comments: RefCell::new(vec![]),
158 stack_group: RefCell::new(vec![]),
159 stack_class: RefCell::new(vec![]),
160 capture_names: RefCell::new(vec![]),
161 scratch: RefCell::new(String::new()),
162 }
163 }
164
165 pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
191 self.nest_limit = limit;
192 self
193 }
194
195 pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
211 self.octal = yes;
212 self
213 }
214
215 pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
224 self.ignore_whitespace = yes;
225 self
226 }
227
228 pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
236 self.empty_min_range = yes;
237 self
238 }
239}
240
241#[derive(Clone, Debug)]
249pub struct Parser {
250 pos: Cell<Position>,
252 capture_index: Cell<u32>,
254 nest_limit: u32,
257 octal: bool,
261 initial_ignore_whitespace: bool,
264 empty_min_range: bool,
267 ignore_whitespace: Cell<bool>,
270 comments: RefCell<Vec<ast::Comment>>,
272 stack_group: RefCell<Vec<GroupState>>,
274 stack_class: RefCell<Vec<ClassState>>,
277 capture_names: RefCell<Vec<ast::CaptureName>>,
280 scratch: RefCell<String>,
283}
284
285#[derive(Clone, Debug)]
295struct ParserI<'s, P> {
296 parser: P,
298 pattern: &'s str,
300}
301
302#[derive(Clone, Debug)]
306enum GroupState {
307 Group {
309 concat: ast::Concat,
311 group: ast::Group,
313 ignore_whitespace: bool,
315 },
316 Alternation(ast::Alternation),
321}
322
323#[derive(Clone, Debug)]
330enum ClassState {
331 Open {
333 union: ast::ClassSetUnion,
335 set: ast::ClassBracketed,
339 },
340 Op {
343 kind: ast::ClassSetBinaryOpKind,
345 lhs: ast::ClassSet,
347 },
348}
349
350impl Parser {
351 pub fn new() -> Parser {
358 ParserBuilder::new().build()
359 }
360
361 pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
363 ParserI::new(self, pattern).parse()
364 }
365
366 pub fn parse_with_comments(
369 &mut self,
370 pattern: &str,
371 ) -> Result<ast::WithComments> {
372 ParserI::new(self, pattern).parse_with_comments()
373 }
374
375 fn reset(&self) {
381 self.pos.set(Position { offset: 0, line: 1, column: 1 });
384 self.ignore_whitespace.set(self.initial_ignore_whitespace);
385 self.comments.borrow_mut().clear();
386 self.stack_group.borrow_mut().clear();
387 self.stack_class.borrow_mut().clear();
388 }
389}
390
391impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
392 fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
394 ParserI { parser, pattern }
395 }
396
397 fn parser(&self) -> &Parser {
399 self.parser.borrow()
400 }
401
402 fn pattern(&self) -> &str {
404 self.pattern
405 }
406
407 fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
409 ast::Error { kind, pattern: self.pattern().to_string(), span }
410 }
411
412 fn offset(&self) -> usize {
417 self.parser().pos.get().offset
418 }
419
420 fn line(&self) -> usize {
424 self.parser().pos.get().line
425 }
426
427 fn column(&self) -> usize {
431 self.parser().pos.get().column
432 }
433
434 fn next_capture_index(&self, span: Span) -> Result<u32> {
442 let current = self.parser().capture_index.get();
443 let i = current.checked_add(1).ok_or_else(|| {
444 self.error(span, ast::ErrorKind::CaptureLimitExceeded)
445 })?;
446 self.parser().capture_index.set(i);
447 Ok(i)
448 }
449
450 fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
453 let mut names = self.parser().capture_names.borrow_mut();
454 match names
455 .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
456 {
457 Err(i) => {
458 names.insert(i, cap.clone());
459 Ok(())
460 }
461 Ok(i) => Err(self.error(
462 cap.span,
463 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
464 )),
465 }
466 }
467
468 fn ignore_whitespace(&self) -> bool {
470 self.parser().ignore_whitespace.get()
471 }
472
473 fn char(&self) -> char {
477 self.char_at(self.offset())
478 }
479
480 fn char_at(&self, i: usize) -> char {
484 self.pattern()[i..]
485 .chars()
486 .next()
487 .unwrap_or_else(|| panic!("expected char at offset {}", i))
488 }
489
490 fn bump(&self) -> bool {
494 if self.is_eof() {
495 return false;
496 }
497 let Position { mut offset, mut line, mut column } = self.pos();
498 if self.char() == '\n' {
499 line = line.checked_add(1).unwrap();
500 column = 1;
501 } else {
502 column = column.checked_add(1).unwrap();
503 }
504 offset += self.char().len_utf8();
505 self.parser().pos.set(Position { offset, line, column });
506 self.pattern()[self.offset()..].chars().next().is_some()
507 }
508
509 fn bump_if(&self, prefix: &str) -> bool {
514 if self.pattern()[self.offset()..].starts_with(prefix) {
515 for _ in 0..prefix.chars().count() {
516 self.bump();
517 }
518 true
519 } else {
520 false
521 }
522 }
523
524 fn is_lookaround_prefix(&self) -> bool {
532 self.bump_if("?=")
533 || self.bump_if("?!")
534 || self.bump_if("?<=")
535 || self.bump_if("?<!")
536 }
537
538 fn bump_and_bump_space(&self) -> bool {
542 if !self.bump() {
543 return false;
544 }
545 self.bump_space();
546 !self.is_eof()
547 }
548
549 fn bump_space(&self) {
559 if !self.ignore_whitespace() {
560 return;
561 }
562 while !self.is_eof() {
563 if self.char().is_whitespace() {
564 self.bump();
565 } else if self.char() == '#' {
566 let start = self.pos();
567 let mut comment_text = String::new();
568 self.bump();
569 while !self.is_eof() {
570 let c = self.char();
571 self.bump();
572 if c == '\n' {
573 break;
574 }
575 comment_text.push(c);
576 }
577 let comment = ast::Comment {
578 span: Span::new(start, self.pos()),
579 comment: comment_text,
580 };
581 self.parser().comments.borrow_mut().push(comment);
582 } else {
583 break;
584 }
585 }
586 }
587
588 fn peek(&self) -> Option<char> {
592 if self.is_eof() {
593 return None;
594 }
595 self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
596 }
597
598 fn peek_space(&self) -> Option<char> {
601 if !self.ignore_whitespace() {
602 return self.peek();
603 }
604 if self.is_eof() {
605 return None;
606 }
607 let mut start = self.offset() + self.char().len_utf8();
608 let mut in_comment = false;
609 for (i, c) in self.pattern()[start..].char_indices() {
610 if c.is_whitespace() {
611 continue;
612 } else if !in_comment && c == '#' {
613 in_comment = true;
614 } else if in_comment && c == '\n' {
615 in_comment = false;
616 } else {
617 start += i;
618 break;
619 }
620 }
621 self.pattern()[start..].chars().next()
622 }
623
624 fn is_eof(&self) -> bool {
626 self.offset() == self.pattern().len()
627 }
628
629 fn pos(&self) -> Position {
632 self.parser().pos.get()
633 }
634
635 fn span(&self) -> Span {
638 Span::splat(self.pos())
639 }
640
641 fn span_char(&self) -> Span {
643 let mut next = Position {
644 offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
645 line: self.line(),
646 column: self.column().checked_add(1).unwrap(),
647 };
648 if self.char() == '\n' {
649 next.line += 1;
650 next.column = 1;
651 }
652 Span::new(self.pos(), next)
653 }
654
655 #[inline(never)]
665 fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
666 assert_eq!(self.char(), '|');
667 concat.span.end = self.pos();
668 self.push_or_add_alternation(concat);
669 self.bump();
670 Ok(ast::Concat { span: self.span(), asts: vec![] })
671 }
672
673 fn push_or_add_alternation(&self, concat: ast::Concat) {
676 use self::GroupState::*;
677
678 let mut stack = self.parser().stack_group.borrow_mut();
679 if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
680 alts.asts.push(concat.into_ast());
681 return;
682 }
683 stack.push(Alternation(ast::Alternation {
684 span: Span::new(concat.span.start, self.pos()),
685 asts: vec![concat.into_ast()],
686 }));
687 }
688
689 #[inline(never)]
703 fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
704 assert_eq!(self.char(), '(');
705 match self.parse_group()? {
706 Either::Left(set) => {
707 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
708 if let Some(v) = ignore {
709 self.parser().ignore_whitespace.set(v);
710 }
711
712 concat.asts.push(Ast::flags(set));
713 Ok(concat)
714 }
715 Either::Right(group) => {
716 let old_ignore_whitespace = self.ignore_whitespace();
717 let new_ignore_whitespace = group
718 .flags()
719 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
720 .unwrap_or(old_ignore_whitespace);
721 self.parser().stack_group.borrow_mut().push(
722 GroupState::Group {
723 concat,
724 group,
725 ignore_whitespace: old_ignore_whitespace,
726 },
727 );
728 self.parser().ignore_whitespace.set(new_ignore_whitespace);
729 Ok(ast::Concat { span: self.span(), asts: vec![] })
730 }
731 }
732 }
733
734 #[inline(never)]
744 fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
745 use self::GroupState::*;
746
747 assert_eq!(self.char(), ')');
748 let mut stack = self.parser().stack_group.borrow_mut();
749 let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
750 .pop()
751 {
752 Some(Group { concat, group, ignore_whitespace }) => {
753 (concat, group, ignore_whitespace, None)
754 }
755 Some(Alternation(alt)) => match stack.pop() {
756 Some(Group { concat, group, ignore_whitespace }) => {
757 (concat, group, ignore_whitespace, Some(alt))
758 }
759 None | Some(Alternation(_)) => {
760 return Err(self.error(
761 self.span_char(),
762 ast::ErrorKind::GroupUnopened,
763 ));
764 }
765 },
766 None => {
767 return Err(self
768 .error(self.span_char(), ast::ErrorKind::GroupUnopened));
769 }
770 };
771 self.parser().ignore_whitespace.set(ignore_whitespace);
772 group_concat.span.end = self.pos();
773 self.bump();
774 group.span.end = self.pos();
775 match alt {
776 Some(mut alt) => {
777 alt.span.end = group_concat.span.end;
778 alt.asts.push(group_concat.into_ast());
779 group.ast = Box::new(alt.into_ast());
780 }
781 None => {
782 group.ast = Box::new(group_concat.into_ast());
783 }
784 }
785 prior_concat.asts.push(Ast::group(group));
786 Ok(prior_concat)
787 }
788
789 #[inline(never)]
796 fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
797 concat.span.end = self.pos();
798 let mut stack = self.parser().stack_group.borrow_mut();
799 let ast = match stack.pop() {
800 None => Ok(concat.into_ast()),
801 Some(GroupState::Alternation(mut alt)) => {
802 alt.span.end = self.pos();
803 alt.asts.push(concat.into_ast());
804 Ok(Ast::alternation(alt))
805 }
806 Some(GroupState::Group { group, .. }) => {
807 return Err(
808 self.error(group.span, ast::ErrorKind::GroupUnclosed)
809 );
810 }
811 };
812 match stack.pop() {
814 None => ast,
815 Some(GroupState::Alternation(_)) => {
816 unreachable!()
823 }
824 Some(GroupState::Group { group, .. }) => {
825 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
826 }
827 }
828 }
829
830 #[inline(never)]
839 fn push_class_open(
840 &self,
841 parent_union: ast::ClassSetUnion,
842 ) -> Result<ast::ClassSetUnion> {
843 assert_eq!(self.char(), '[');
844
845 let (nested_set, nested_union) = self.parse_set_class_open()?;
846 self.parser()
847 .stack_class
848 .borrow_mut()
849 .push(ClassState::Open { union: parent_union, set: nested_set });
850 Ok(nested_union)
851 }
852
853 #[inline(never)]
868 fn pop_class(
869 &self,
870 nested_union: ast::ClassSetUnion,
871 ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
872 assert_eq!(self.char(), ']');
873
874 let item = ast::ClassSet::Item(nested_union.into_item());
875 let prevset = self.pop_class_op(item);
876 let mut stack = self.parser().stack_class.borrow_mut();
877 match stack.pop() {
878 None => {
879 panic!("unexpected empty character class stack")
888 }
889 Some(ClassState::Op { .. }) => {
890 panic!("unexpected ClassState::Op")
897 }
898 Some(ClassState::Open { mut union, mut set }) => {
899 self.bump();
900 set.span.end = self.pos();
901 set.kind = prevset;
902 if stack.is_empty() {
903 Ok(Either::Right(set))
904 } else {
905 union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
906 Ok(Either::Left(union))
907 }
908 }
909 }
910 }
911
912 #[inline(never)]
917 fn unclosed_class_error(&self) -> ast::Error {
918 for state in self.parser().stack_class.borrow().iter().rev() {
919 if let ClassState::Open { ref set, .. } = *state {
920 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
921 }
922 }
923 panic!("no open character class found")
926 }
927
928 #[inline(never)]
934 fn push_class_op(
935 &self,
936 next_kind: ast::ClassSetBinaryOpKind,
937 next_union: ast::ClassSetUnion,
938 ) -> ast::ClassSetUnion {
939 let item = ast::ClassSet::Item(next_union.into_item());
940 let new_lhs = self.pop_class_op(item);
941 self.parser()
942 .stack_class
943 .borrow_mut()
944 .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
945 ast::ClassSetUnion { span: self.span(), items: vec![] }
946 }
947
948 #[inline(never)]
954 fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
955 let mut stack = self.parser().stack_class.borrow_mut();
956 let (kind, lhs) = match stack.pop() {
957 Some(ClassState::Op { kind, lhs }) => (kind, lhs),
958 Some(state @ ClassState::Open { .. }) => {
959 stack.push(state);
960 return rhs;
961 }
962 None => unreachable!(),
963 };
964 let span = Span::new(lhs.span().start, rhs.span().end);
965 ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
966 span,
967 kind,
968 lhs: Box::new(lhs),
969 rhs: Box::new(rhs),
970 })
971 }
972}
973
974impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
975 fn parse(&self) -> Result<Ast> {
977 self.parse_with_comments().map(|astc| astc.ast)
978 }
979
980 fn parse_with_comments(&self) -> Result<ast::WithComments> {
983 assert_eq!(self.offset(), 0, "parser can only be used once");
984 self.parser().reset();
985 let mut concat = ast::Concat { span: self.span(), asts: vec![] };
986 loop {
987 self.bump_space();
988 if self.is_eof() {
989 break;
990 }
991 match self.char() {
992 '(' => concat = self.push_group(concat)?,
993 ')' => concat = self.pop_group(concat)?,
994 '|' => concat = self.push_alternate(concat)?,
995 '[' => {
996 let class = self.parse_set_class()?;
997 concat.asts.push(Ast::class_bracketed(class));
998 }
999 '?' => {
1000 concat = self.parse_uncounted_repetition(
1001 concat,
1002 ast::RepetitionKind::ZeroOrOne,
1003 )?;
1004 }
1005 '*' => {
1006 concat = self.parse_uncounted_repetition(
1007 concat,
1008 ast::RepetitionKind::ZeroOrMore,
1009 )?;
1010 }
1011 '+' => {
1012 concat = self.parse_uncounted_repetition(
1013 concat,
1014 ast::RepetitionKind::OneOrMore,
1015 )?;
1016 }
1017 '{' => {
1018 concat = self.parse_counted_repetition(concat)?;
1019 }
1020 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1021 }
1022 }
1023 let ast = self.pop_group_end(concat)?;
1024 NestLimiter::new(self).check(&ast)?;
1025 Ok(ast::WithComments {
1026 ast,
1027 comments: mem::replace(
1028 &mut *self.parser().comments.borrow_mut(),
1029 vec![],
1030 ),
1031 })
1032 }
1033
1034 #[inline(never)]
1048 fn parse_uncounted_repetition(
1049 &self,
1050 mut concat: ast::Concat,
1051 kind: ast::RepetitionKind,
1052 ) -> Result<ast::Concat> {
1053 assert!(
1054 self.char() == '?' || self.char() == '*' || self.char() == '+'
1055 );
1056 let op_start = self.pos();
1057 let ast = match concat.asts.pop() {
1058 Some(ast) => ast,
1059 None => {
1060 return Err(
1061 self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1062 )
1063 }
1064 };
1065 match ast {
1066 Ast::Empty(_) | Ast::Flags(_) => {
1067 return Err(
1068 self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1069 )
1070 }
1071 _ => {}
1072 }
1073 let mut greedy = true;
1074 if self.bump() && self.char() == '?' {
1075 greedy = false;
1076 self.bump();
1077 }
1078 concat.asts.push(Ast::repetition(ast::Repetition {
1079 span: ast.span().with_end(self.pos()),
1080 op: ast::RepetitionOp {
1081 span: Span::new(op_start, self.pos()),
1082 kind,
1083 },
1084 greedy,
1085 ast: Box::new(ast),
1086 }));
1087 Ok(concat)
1088 }
1089
1090 #[inline(never)]
1103 fn parse_counted_repetition(
1104 &self,
1105 mut concat: ast::Concat,
1106 ) -> Result<ast::Concat> {
1107 assert!(self.char() == '{');
1108 let start = self.pos();
1109 let ast = match concat.asts.pop() {
1110 Some(ast) => ast,
1111 None => {
1112 return Err(
1113 self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1114 )
1115 }
1116 };
1117 match ast {
1118 Ast::Empty(_) | Ast::Flags(_) => {
1119 return Err(
1120 self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1121 )
1122 }
1123 _ => {}
1124 }
1125 if !self.bump_and_bump_space() {
1126 return Err(self.error(
1127 Span::new(start, self.pos()),
1128 ast::ErrorKind::RepetitionCountUnclosed,
1129 ));
1130 }
1131 let count_start = specialize_err(
1132 self.parse_decimal(),
1133 ast::ErrorKind::DecimalEmpty,
1134 ast::ErrorKind::RepetitionCountDecimalEmpty,
1135 );
1136 if self.is_eof() {
1137 return Err(self.error(
1138 Span::new(start, self.pos()),
1139 ast::ErrorKind::RepetitionCountUnclosed,
1140 ));
1141 }
1142 let range = if self.char() == ',' {
1143 if !self.bump_and_bump_space() {
1144 return Err(self.error(
1145 Span::new(start, self.pos()),
1146 ast::ErrorKind::RepetitionCountUnclosed,
1147 ));
1148 }
1149 if self.char() != '}' {
1150 let count_start = match count_start {
1151 Ok(c) => c,
1152 Err(err)
1153 if err.kind
1154 == ast::ErrorKind::RepetitionCountDecimalEmpty =>
1155 {
1156 if self.parser().empty_min_range {
1157 0
1158 } else {
1159 return Err(err);
1160 }
1161 }
1162 err => err?,
1163 };
1164 let count_end = specialize_err(
1165 self.parse_decimal(),
1166 ast::ErrorKind::DecimalEmpty,
1167 ast::ErrorKind::RepetitionCountDecimalEmpty,
1168 )?;
1169 ast::RepetitionRange::Bounded(count_start, count_end)
1170 } else {
1171 ast::RepetitionRange::AtLeast(count_start?)
1172 }
1173 } else {
1174 ast::RepetitionRange::Exactly(count_start?)
1175 };
1176
1177 if self.is_eof() || self.char() != '}' {
1178 return Err(self.error(
1179 Span::new(start, self.pos()),
1180 ast::ErrorKind::RepetitionCountUnclosed,
1181 ));
1182 }
1183
1184 let mut greedy = true;
1185 if self.bump_and_bump_space() && self.char() == '?' {
1186 greedy = false;
1187 self.bump();
1188 }
1189
1190 let op_span = Span::new(start, self.pos());
1191 if !range.is_valid() {
1192 return Err(
1193 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1194 );
1195 }
1196 concat.asts.push(Ast::repetition(ast::Repetition {
1197 span: ast.span().with_end(self.pos()),
1198 op: ast::RepetitionOp {
1199 span: op_span,
1200 kind: ast::RepetitionKind::Range(range),
1201 },
1202 greedy,
1203 ast: Box::new(ast),
1204 }));
1205 Ok(concat)
1206 }
1207
1208 #[inline(never)]
1227 fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1228 assert_eq!(self.char(), '(');
1229 let open_span = self.span_char();
1230 self.bump();
1231 self.bump_space();
1232 if self.is_lookaround_prefix() {
1233 return Err(self.error(
1234 Span::new(open_span.start, self.span().end),
1235 ast::ErrorKind::UnsupportedLookAround,
1236 ));
1237 }
1238 let inner_span = self.span();
1239 let mut starts_with_p = true;
1240 if self.bump_if("?P<") || {
1241 starts_with_p = false;
1242 self.bump_if("?<")
1243 } {
1244 let capture_index = self.next_capture_index(open_span)?;
1245 let name = self.parse_capture_name(capture_index)?;
1246 Ok(Either::Right(ast::Group {
1247 span: open_span,
1248 kind: ast::GroupKind::CaptureName { starts_with_p, name },
1249 ast: Box::new(Ast::empty(self.span())),
1250 }))
1251 } else if self.bump_if("?") {
1252 if self.is_eof() {
1253 return Err(
1254 self.error(open_span, ast::ErrorKind::GroupUnclosed)
1255 );
1256 }
1257 let flags = self.parse_flags()?;
1258 let char_end = self.char();
1259 self.bump();
1260 if char_end == ')' {
1261 if flags.items.is_empty() {
1264 return Err(self.error(
1265 inner_span,
1266 ast::ErrorKind::RepetitionMissing,
1267 ));
1268 }
1269 Ok(Either::Left(ast::SetFlags {
1270 span: Span { end: self.pos(), ..open_span },
1271 flags,
1272 }))
1273 } else {
1274 assert_eq!(char_end, ':');
1275 Ok(Either::Right(ast::Group {
1276 span: open_span,
1277 kind: ast::GroupKind::NonCapturing(flags),
1278 ast: Box::new(Ast::empty(self.span())),
1279 }))
1280 }
1281 } else {
1282 let capture_index = self.next_capture_index(open_span)?;
1283 Ok(Either::Right(ast::Group {
1284 span: open_span,
1285 kind: ast::GroupKind::CaptureIndex(capture_index),
1286 ast: Box::new(Ast::empty(self.span())),
1287 }))
1288 }
1289 }
1290
1291 #[inline(never)]
1298 fn parse_capture_name(
1299 &self,
1300 capture_index: u32,
1301 ) -> Result<ast::CaptureName> {
1302 if self.is_eof() {
1303 return Err(self
1304 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1305 }
1306 let start = self.pos();
1307 loop {
1308 if self.char() == '>' {
1309 break;
1310 }
1311 if !is_capture_char(self.char(), self.pos() == start) {
1312 return Err(self.error(
1313 self.span_char(),
1314 ast::ErrorKind::GroupNameInvalid,
1315 ));
1316 }
1317 if !self.bump() {
1318 break;
1319 }
1320 }
1321 let end = self.pos();
1322 if self.is_eof() {
1323 return Err(self
1324 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1325 }
1326 assert_eq!(self.char(), '>');
1327 self.bump();
1328 let name = &self.pattern()[start.offset..end.offset];
1329 if name.is_empty() {
1330 return Err(self.error(
1331 Span::new(start, start),
1332 ast::ErrorKind::GroupNameEmpty,
1333 ));
1334 }
1335 let capname = ast::CaptureName {
1336 span: Span::new(start, end),
1337 name: name.to_string(),
1338 index: capture_index,
1339 };
1340 self.add_capture_name(&capname)?;
1341 Ok(capname)
1342 }
1343
1344 #[inline(never)]
1359 fn parse_flags(&self) -> Result<ast::Flags> {
1360 let mut flags = ast::Flags { span: self.span(), items: vec![] };
1361 let mut last_was_negation = None;
1362 while self.char() != ':' && self.char() != ')' {
1363 if self.char() == '-' {
1364 last_was_negation = Some(self.span_char());
1365 let item = ast::FlagsItem {
1366 span: self.span_char(),
1367 kind: ast::FlagsItemKind::Negation,
1368 };
1369 if let Some(i) = flags.add_item(item) {
1370 return Err(self.error(
1371 self.span_char(),
1372 ast::ErrorKind::FlagRepeatedNegation {
1373 original: flags.items[i].span,
1374 },
1375 ));
1376 }
1377 } else {
1378 last_was_negation = None;
1379 let item = ast::FlagsItem {
1380 span: self.span_char(),
1381 kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1382 };
1383 if let Some(i) = flags.add_item(item) {
1384 return Err(self.error(
1385 self.span_char(),
1386 ast::ErrorKind::FlagDuplicate {
1387 original: flags.items[i].span,
1388 },
1389 ));
1390 }
1391 }
1392 if !self.bump() {
1393 return Err(
1394 self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1395 );
1396 }
1397 }
1398 if let Some(span) = last_was_negation {
1399 return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1400 }
1401 flags.span.end = self.pos();
1402 Ok(flags)
1403 }
1404
1405 #[inline(never)]
1411 fn parse_flag(&self) -> Result<ast::Flag> {
1412 match self.char() {
1413 'i' => Ok(ast::Flag::CaseInsensitive),
1414 'm' => Ok(ast::Flag::MultiLine),
1415 's' => Ok(ast::Flag::DotMatchesNewLine),
1416 'U' => Ok(ast::Flag::SwapGreed),
1417 'u' => Ok(ast::Flag::Unicode),
1418 'R' => Ok(ast::Flag::CRLF),
1419 'x' => Ok(ast::Flag::IgnoreWhitespace),
1420 _ => {
1421 Err(self
1422 .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1423 }
1424 }
1425 }
1426
1427 fn parse_primitive(&self) -> Result<Primitive> {
1438 match self.char() {
1439 '\\' => self.parse_escape(),
1440 '.' => {
1441 let ast = Primitive::Dot(self.span_char());
1442 self.bump();
1443 Ok(ast)
1444 }
1445 '^' => {
1446 let ast = Primitive::Assertion(ast::Assertion {
1447 span: self.span_char(),
1448 kind: ast::AssertionKind::StartLine,
1449 });
1450 self.bump();
1451 Ok(ast)
1452 }
1453 '$' => {
1454 let ast = Primitive::Assertion(ast::Assertion {
1455 span: self.span_char(),
1456 kind: ast::AssertionKind::EndLine,
1457 });
1458 self.bump();
1459 Ok(ast)
1460 }
1461 c => {
1462 let ast = Primitive::Literal(ast::Literal {
1463 span: self.span_char(),
1464 kind: ast::LiteralKind::Verbatim,
1465 c,
1466 });
1467 self.bump();
1468 Ok(ast)
1469 }
1470 }
1471 }
1472
1473 #[inline(never)]
1479 fn parse_escape(&self) -> Result<Primitive> {
1480 assert_eq!(self.char(), '\\');
1481 let start = self.pos();
1482 if !self.bump() {
1483 return Err(self.error(
1484 Span::new(start, self.pos()),
1485 ast::ErrorKind::EscapeUnexpectedEof,
1486 ));
1487 }
1488 let c = self.char();
1489 match c {
1491 '0'..='7' => {
1492 if !self.parser().octal {
1493 return Err(self.error(
1494 Span::new(start, self.span_char().end),
1495 ast::ErrorKind::UnsupportedBackreference,
1496 ));
1497 }
1498 let mut lit = self.parse_octal();
1499 lit.span.start = start;
1500 return Ok(Primitive::Literal(lit));
1501 }
1502 '8'..='9' if !self.parser().octal => {
1503 return Err(self.error(
1504 Span::new(start, self.span_char().end),
1505 ast::ErrorKind::UnsupportedBackreference,
1506 ));
1507 }
1508 'x' | 'u' | 'U' => {
1509 let mut lit = self.parse_hex()?;
1510 lit.span.start = start;
1511 return Ok(Primitive::Literal(lit));
1512 }
1513 'p' | 'P' => {
1514 let mut cls = self.parse_unicode_class()?;
1515 cls.span.start = start;
1516 return Ok(Primitive::Unicode(cls));
1517 }
1518 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1519 let mut cls = self.parse_perl_class();
1520 cls.span.start = start;
1521 return Ok(Primitive::Perl(cls));
1522 }
1523 _ => {}
1524 }
1525
1526 self.bump();
1528 let span = Span::new(start, self.pos());
1529 if is_meta_character(c) {
1530 return Ok(Primitive::Literal(ast::Literal {
1531 span,
1532 kind: ast::LiteralKind::Meta,
1533 c,
1534 }));
1535 }
1536 if is_escapeable_character(c) {
1537 return Ok(Primitive::Literal(ast::Literal {
1538 span,
1539 kind: ast::LiteralKind::Superfluous,
1540 c,
1541 }));
1542 }
1543 let special = |kind, c| {
1544 Ok(Primitive::Literal(ast::Literal {
1545 span,
1546 kind: ast::LiteralKind::Special(kind),
1547 c,
1548 }))
1549 };
1550 match c {
1551 'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1552 'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1553 't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1554 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1555 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1556 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1557 'A' => Ok(Primitive::Assertion(ast::Assertion {
1558 span,
1559 kind: ast::AssertionKind::StartText,
1560 })),
1561 'z' => Ok(Primitive::Assertion(ast::Assertion {
1562 span,
1563 kind: ast::AssertionKind::EndText,
1564 })),
1565 'b' => {
1566 let mut wb = ast::Assertion {
1567 span,
1568 kind: ast::AssertionKind::WordBoundary,
1569 };
1570 if !self.is_eof() && self.char() == '{' {
1573 if let Some(kind) =
1574 self.maybe_parse_special_word_boundary(start)?
1575 {
1576 wb.kind = kind;
1577 wb.span.end = self.pos();
1578 }
1579 }
1580 Ok(Primitive::Assertion(wb))
1581 }
1582 'B' => Ok(Primitive::Assertion(ast::Assertion {
1583 span,
1584 kind: ast::AssertionKind::NotWordBoundary,
1585 })),
1586 '<' => Ok(Primitive::Assertion(ast::Assertion {
1587 span,
1588 kind: ast::AssertionKind::WordBoundaryStartAngle,
1589 })),
1590 '>' => Ok(Primitive::Assertion(ast::Assertion {
1591 span,
1592 kind: ast::AssertionKind::WordBoundaryEndAngle,
1593 })),
1594 _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1595 }
1596 }
1597
1598 fn maybe_parse_special_word_boundary(
1618 &self,
1619 wb_start: Position,
1620 ) -> Result<Option<ast::AssertionKind>> {
1621 assert_eq!(self.char(), '{');
1622
1623 let is_valid_char = |c| match c {
1624 'A'..='Z' | 'a'..='z' | '-' => true,
1625 _ => false,
1626 };
1627 let start = self.pos();
1628 if !self.bump_and_bump_space() {
1629 return Err(self.error(
1630 Span::new(wb_start, self.pos()),
1631 ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
1632 ));
1633 }
1634 let start_contents = self.pos();
1635 if !is_valid_char(self.char()) {
1640 self.parser().pos.set(start);
1641 return Ok(None);
1642 }
1643
1644 let mut scratch = self.parser().scratch.borrow_mut();
1646 scratch.clear();
1647 while !self.is_eof() && is_valid_char(self.char()) {
1648 scratch.push(self.char());
1649 self.bump_and_bump_space();
1650 }
1651 if self.is_eof() || self.char() != '}' {
1652 return Err(self.error(
1653 Span::new(start, self.pos()),
1654 ast::ErrorKind::SpecialWordBoundaryUnclosed,
1655 ));
1656 }
1657 let end = self.pos();
1658 self.bump();
1659 let kind = match scratch.as_str() {
1660 "start" => ast::AssertionKind::WordBoundaryStart,
1661 "end" => ast::AssertionKind::WordBoundaryEnd,
1662 "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
1663 "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
1664 _ => {
1665 return Err(self.error(
1666 Span::new(start_contents, end),
1667 ast::ErrorKind::SpecialWordBoundaryUnrecognized,
1668 ))
1669 }
1670 };
1671 Ok(Some(kind))
1672 }
1673
1674 #[inline(never)]
1682 fn parse_octal(&self) -> ast::Literal {
1683 assert!(self.parser().octal);
1684 assert!('0' <= self.char() && self.char() <= '7');
1685 let start = self.pos();
1686 while self.bump()
1688 && '0' <= self.char()
1689 && self.char() <= '7'
1690 && self.pos().offset - start.offset <= 2
1691 {}
1692 let end = self.pos();
1693 let octal = &self.pattern()[start.offset..end.offset];
1694 let codepoint =
1697 u32::from_str_radix(octal, 8).expect("valid octal number");
1698 let c = char::from_u32(codepoint).expect("Unicode scalar value");
1701 ast::Literal {
1702 span: Span::new(start, end),
1703 kind: ast::LiteralKind::Octal,
1704 c,
1705 }
1706 }
1707
1708 #[inline(never)]
1713 fn parse_hex(&self) -> Result<ast::Literal> {
1714 assert!(
1715 self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1716 );
1717
1718 let hex_kind = match self.char() {
1719 'x' => ast::HexLiteralKind::X,
1720 'u' => ast::HexLiteralKind::UnicodeShort,
1721 _ => ast::HexLiteralKind::UnicodeLong,
1722 };
1723 if !self.bump_and_bump_space() {
1724 return Err(
1725 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1726 );
1727 }
1728 if self.char() == '{' {
1729 self.parse_hex_brace(hex_kind)
1730 } else {
1731 self.parse_hex_digits(hex_kind)
1732 }
1733 }
1734
1735 #[inline(never)]
1743 fn parse_hex_digits(
1744 &self,
1745 kind: ast::HexLiteralKind,
1746 ) -> Result<ast::Literal> {
1747 let mut scratch = self.parser().scratch.borrow_mut();
1748 scratch.clear();
1749
1750 let start = self.pos();
1751 for i in 0..kind.digits() {
1752 if i > 0 && !self.bump_and_bump_space() {
1753 return Err(self
1754 .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1755 }
1756 if !is_hex(self.char()) {
1757 return Err(self.error(
1758 self.span_char(),
1759 ast::ErrorKind::EscapeHexInvalidDigit,
1760 ));
1761 }
1762 scratch.push(self.char());
1763 }
1764 self.bump_and_bump_space();
1767 let end = self.pos();
1768 let hex = scratch.as_str();
1769 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1770 None => Err(self.error(
1771 Span::new(start, end),
1772 ast::ErrorKind::EscapeHexInvalid,
1773 )),
1774 Some(c) => Ok(ast::Literal {
1775 span: Span::new(start, end),
1776 kind: ast::LiteralKind::HexFixed(kind),
1777 c,
1778 }),
1779 }
1780 }
1781
1782 #[inline(never)]
1786 fn parse_hex_brace(
1787 &self,
1788 kind: ast::HexLiteralKind,
1789 ) -> Result<ast::Literal> {
1790 let mut scratch = self.parser().scratch.borrow_mut();
1791 scratch.clear();
1792
1793 let brace_pos = self.pos();
1794 let start = self.span_char().end;
1795 while self.bump_and_bump_space() && self.char() != '}' {
1796 if !is_hex(self.char()) {
1797 return Err(self.error(
1798 self.span_char(),
1799 ast::ErrorKind::EscapeHexInvalidDigit,
1800 ));
1801 }
1802 scratch.push(self.char());
1803 }
1804 if self.is_eof() {
1805 return Err(self.error(
1806 Span::new(brace_pos, self.pos()),
1807 ast::ErrorKind::EscapeUnexpectedEof,
1808 ));
1809 }
1810 let end = self.pos();
1811 let hex = scratch.as_str();
1812 assert_eq!(self.char(), '}');
1813 self.bump_and_bump_space();
1814
1815 if hex.is_empty() {
1816 return Err(self.error(
1817 Span::new(brace_pos, self.pos()),
1818 ast::ErrorKind::EscapeHexEmpty,
1819 ));
1820 }
1821 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1822 None => Err(self.error(
1823 Span::new(start, end),
1824 ast::ErrorKind::EscapeHexInvalid,
1825 )),
1826 Some(c) => Ok(ast::Literal {
1827 span: Span::new(start, self.pos()),
1828 kind: ast::LiteralKind::HexBrace(kind),
1829 c,
1830 }),
1831 }
1832 }
1833
1834 fn parse_decimal(&self) -> Result<u32> {
1844 let mut scratch = self.parser().scratch.borrow_mut();
1845 scratch.clear();
1846
1847 while !self.is_eof() && self.char().is_whitespace() {
1848 self.bump();
1849 }
1850 let start = self.pos();
1851 while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1852 scratch.push(self.char());
1853 self.bump_and_bump_space();
1854 }
1855 let span = Span::new(start, self.pos());
1856 while !self.is_eof() && self.char().is_whitespace() {
1857 self.bump_and_bump_space();
1858 }
1859 let digits = scratch.as_str();
1860 if digits.is_empty() {
1861 return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1862 }
1863 match u32::from_str_radix(digits, 10).ok() {
1864 Some(n) => Ok(n),
1865 None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1866 }
1867 }
1868
1869 #[inline(never)]
1877 fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
1878 assert_eq!(self.char(), '[');
1879
1880 let mut union =
1881 ast::ClassSetUnion { span: self.span(), items: vec![] };
1882 loop {
1883 self.bump_space();
1884 if self.is_eof() {
1885 return Err(self.unclosed_class_error());
1886 }
1887 match self.char() {
1888 '[' => {
1889 if !self.parser().stack_class.borrow().is_empty() {
1894 if let Some(cls) = self.maybe_parse_ascii_class() {
1895 union.push(ast::ClassSetItem::Ascii(cls));
1896 continue;
1897 }
1898 }
1899 union = self.push_class_open(union)?;
1900 }
1901 ']' => match self.pop_class(union)? {
1902 Either::Left(nested_union) => {
1903 union = nested_union;
1904 }
1905 Either::Right(class) => return Ok(class),
1906 },
1907 '&' if self.peek() == Some('&') => {
1908 assert!(self.bump_if("&&"));
1909 union = self.push_class_op(
1910 ast::ClassSetBinaryOpKind::Intersection,
1911 union,
1912 );
1913 }
1914 '-' if self.peek() == Some('-') => {
1915 assert!(self.bump_if("--"));
1916 union = self.push_class_op(
1917 ast::ClassSetBinaryOpKind::Difference,
1918 union,
1919 );
1920 }
1921 '~' if self.peek() == Some('~') => {
1922 assert!(self.bump_if("~~"));
1923 union = self.push_class_op(
1924 ast::ClassSetBinaryOpKind::SymmetricDifference,
1925 union,
1926 );
1927 }
1928 _ => {
1929 union.push(self.parse_set_class_range()?);
1930 }
1931 }
1932 }
1933 }
1934
1935 #[inline(never)]
1944 fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1945 let prim1 = self.parse_set_class_item()?;
1946 self.bump_space();
1947 if self.is_eof() {
1948 return Err(self.unclosed_class_error());
1949 }
1950 if self.char() != '-'
1956 || self.peek_space() == Some(']')
1957 || self.peek_space() == Some('-')
1958 {
1959 return prim1.into_class_set_item(self);
1960 }
1961 if !self.bump_and_bump_space() {
1964 return Err(self.unclosed_class_error());
1965 }
1966 let prim2 = self.parse_set_class_item()?;
1967 let range = ast::ClassSetRange {
1968 span: Span::new(prim1.span().start, prim2.span().end),
1969 start: prim1.into_class_literal(self)?,
1970 end: prim2.into_class_literal(self)?,
1971 };
1972 if !range.is_valid() {
1973 return Err(
1974 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1975 );
1976 }
1977 Ok(ast::ClassSetItem::Range(range))
1978 }
1979
1980 #[inline(never)]
1991 fn parse_set_class_item(&self) -> Result<Primitive> {
1992 if self.char() == '\\' {
1993 self.parse_escape()
1994 } else {
1995 let x = Primitive::Literal(ast::Literal {
1996 span: self.span_char(),
1997 kind: ast::LiteralKind::Verbatim,
1998 c: self.char(),
1999 });
2000 self.bump();
2001 Ok(x)
2002 }
2003 }
2004
2005 #[inline(never)]
2022 fn parse_set_class_open(
2023 &self,
2024 ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
2025 assert_eq!(self.char(), '[');
2026 let start = self.pos();
2027 if !self.bump_and_bump_space() {
2028 return Err(self.error(
2029 Span::new(start, self.pos()),
2030 ast::ErrorKind::ClassUnclosed,
2031 ));
2032 }
2033
2034 let negated = if self.char() != '^' {
2035 false
2036 } else {
2037 if !self.bump_and_bump_space() {
2038 return Err(self.error(
2039 Span::new(start, self.pos()),
2040 ast::ErrorKind::ClassUnclosed,
2041 ));
2042 }
2043 true
2044 };
2045 let mut union =
2047 ast::ClassSetUnion { span: self.span(), items: vec![] };
2048 while self.char() == '-' {
2049 union.push(ast::ClassSetItem::Literal(ast::Literal {
2050 span: self.span_char(),
2051 kind: ast::LiteralKind::Verbatim,
2052 c: '-',
2053 }));
2054 if !self.bump_and_bump_space() {
2055 return Err(self.error(
2056 Span::new(start, start),
2057 ast::ErrorKind::ClassUnclosed,
2058 ));
2059 }
2060 }
2061 if union.items.is_empty() && self.char() == ']' {
2064 union.push(ast::ClassSetItem::Literal(ast::Literal {
2065 span: self.span_char(),
2066 kind: ast::LiteralKind::Verbatim,
2067 c: ']',
2068 }));
2069 if !self.bump_and_bump_space() {
2070 return Err(self.error(
2071 Span::new(start, self.pos()),
2072 ast::ErrorKind::ClassUnclosed,
2073 ));
2074 }
2075 }
2076 let set = ast::ClassBracketed {
2077 span: Span::new(start, self.pos()),
2078 negated,
2079 kind: ast::ClassSet::union(ast::ClassSetUnion {
2080 span: Span::new(union.span.start, union.span.start),
2081 items: vec![],
2082 }),
2083 };
2084 Ok((set, union))
2085 }
2086
2087 #[inline(never)]
2096 fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
2097 assert_eq!(self.char(), '[');
2117 let start = self.pos();
2119 let mut negated = false;
2120 if !self.bump() || self.char() != ':' {
2121 self.parser().pos.set(start);
2122 return None;
2123 }
2124 if !self.bump() {
2125 self.parser().pos.set(start);
2126 return None;
2127 }
2128 if self.char() == '^' {
2129 negated = true;
2130 if !self.bump() {
2131 self.parser().pos.set(start);
2132 return None;
2133 }
2134 }
2135 let name_start = self.offset();
2136 while self.char() != ':' && self.bump() {}
2137 if self.is_eof() {
2138 self.parser().pos.set(start);
2139 return None;
2140 }
2141 let name = &self.pattern()[name_start..self.offset()];
2142 if !self.bump_if(":]") {
2143 self.parser().pos.set(start);
2144 return None;
2145 }
2146 let kind = match ast::ClassAsciiKind::from_name(name) {
2147 Some(kind) => kind,
2148 None => {
2149 self.parser().pos.set(start);
2150 return None;
2151 }
2152 };
2153 Some(ast::ClassAscii {
2154 span: Span::new(start, self.pos()),
2155 kind,
2156 negated,
2157 })
2158 }
2159
2160 #[inline(never)]
2167 fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2168 assert!(self.char() == 'p' || self.char() == 'P');
2169
2170 let mut scratch = self.parser().scratch.borrow_mut();
2171 scratch.clear();
2172
2173 let negated = self.char() == 'P';
2174 if !self.bump_and_bump_space() {
2175 return Err(
2176 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2177 );
2178 }
2179 let (start, kind) = if self.char() == '{' {
2180 let start = self.span_char().end;
2181 while self.bump_and_bump_space() && self.char() != '}' {
2182 scratch.push(self.char());
2183 }
2184 if self.is_eof() {
2185 return Err(self
2186 .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2187 }
2188 assert_eq!(self.char(), '}');
2189 self.bump();
2190
2191 let name = scratch.as_str();
2192 if let Some(i) = name.find("!=") {
2193 (
2194 start,
2195 ast::ClassUnicodeKind::NamedValue {
2196 op: ast::ClassUnicodeOpKind::NotEqual,
2197 name: name[..i].to_string(),
2198 value: name[i + 2..].to_string(),
2199 },
2200 )
2201 } else if let Some(i) = name.find(':') {
2202 (
2203 start,
2204 ast::ClassUnicodeKind::NamedValue {
2205 op: ast::ClassUnicodeOpKind::Colon,
2206 name: name[..i].to_string(),
2207 value: name[i + 1..].to_string(),
2208 },
2209 )
2210 } else if let Some(i) = name.find('=') {
2211 (
2212 start,
2213 ast::ClassUnicodeKind::NamedValue {
2214 op: ast::ClassUnicodeOpKind::Equal,
2215 name: name[..i].to_string(),
2216 value: name[i + 1..].to_string(),
2217 },
2218 )
2219 } else {
2220 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2221 }
2222 } else {
2223 let start = self.pos();
2224 let c = self.char();
2225 if c == '\\' {
2226 return Err(self.error(
2227 self.span_char(),
2228 ast::ErrorKind::UnicodeClassInvalid,
2229 ));
2230 }
2231 self.bump_and_bump_space();
2232 let kind = ast::ClassUnicodeKind::OneLetter(c);
2233 (start, kind)
2234 };
2235 Ok(ast::ClassUnicode {
2236 span: Span::new(start, self.pos()),
2237 negated,
2238 kind,
2239 })
2240 }
2241
2242 #[inline(never)]
2246 fn parse_perl_class(&self) -> ast::ClassPerl {
2247 let c = self.char();
2248 let span = self.span_char();
2249 self.bump();
2250 let (negated, kind) = match c {
2251 'd' => (false, ast::ClassPerlKind::Digit),
2252 'D' => (true, ast::ClassPerlKind::Digit),
2253 's' => (false, ast::ClassPerlKind::Space),
2254 'S' => (true, ast::ClassPerlKind::Space),
2255 'w' => (false, ast::ClassPerlKind::Word),
2256 'W' => (true, ast::ClassPerlKind::Word),
2257 c => panic!("expected valid Perl class but got '{}'", c),
2258 };
2259 ast::ClassPerl { span, kind, negated }
2260 }
2261}
2262
2263#[derive(Debug)]
2266struct NestLimiter<'p, 's, P> {
2267 p: &'p ParserI<'s, P>,
2269 depth: u32,
2271}
2272
2273impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2274 fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2275 NestLimiter { p, depth: 0 }
2276 }
2277
2278 #[inline(never)]
2279 fn check(self, ast: &Ast) -> Result<()> {
2280 ast::visit(ast, self)
2281 }
2282
2283 fn increment_depth(&mut self, span: &Span) -> Result<()> {
2284 let new = self.depth.checked_add(1).ok_or_else(|| {
2285 self.p.error(
2286 span.clone(),
2287 ast::ErrorKind::NestLimitExceeded(u32::MAX),
2288 )
2289 })?;
2290 let limit = self.p.parser().nest_limit;
2291 if new > limit {
2292 return Err(self.p.error(
2293 span.clone(),
2294 ast::ErrorKind::NestLimitExceeded(limit),
2295 ));
2296 }
2297 self.depth = new;
2298 Ok(())
2299 }
2300
2301 fn decrement_depth(&mut self) {
2302 self.depth = self.depth.checked_sub(1).unwrap();
2305 }
2306}
2307
2308impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2309 type Output = ();
2310 type Err = ast::Error;
2311
2312 fn finish(self) -> Result<()> {
2313 Ok(())
2314 }
2315
2316 fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2317 let span = match *ast {
2318 Ast::Empty(_)
2319 | Ast::Flags(_)
2320 | Ast::Literal(_)
2321 | Ast::Dot(_)
2322 | Ast::Assertion(_)
2323 | Ast::ClassUnicode(_)
2324 | Ast::ClassPerl(_) => {
2325 return Ok(());
2327 }
2328 Ast::ClassBracketed(ref x) => &x.span,
2329 Ast::Repetition(ref x) => &x.span,
2330 Ast::Group(ref x) => &x.span,
2331 Ast::Alternation(ref x) => &x.span,
2332 Ast::Concat(ref x) => &x.span,
2333 };
2334 self.increment_depth(span)
2335 }
2336
2337 fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2338 match *ast {
2339 Ast::Empty(_)
2340 | Ast::Flags(_)
2341 | Ast::Literal(_)
2342 | Ast::Dot(_)
2343 | Ast::Assertion(_)
2344 | Ast::ClassUnicode(_)
2345 | Ast::ClassPerl(_) => {
2346 Ok(())
2348 }
2349 Ast::ClassBracketed(_)
2350 | Ast::Repetition(_)
2351 | Ast::Group(_)
2352 | Ast::Alternation(_)
2353 | Ast::Concat(_) => {
2354 self.decrement_depth();
2355 Ok(())
2356 }
2357 }
2358 }
2359
2360 fn visit_class_set_item_pre(
2361 &mut self,
2362 ast: &ast::ClassSetItem,
2363 ) -> Result<()> {
2364 let span = match *ast {
2365 ast::ClassSetItem::Empty(_)
2366 | ast::ClassSetItem::Literal(_)
2367 | ast::ClassSetItem::Range(_)
2368 | ast::ClassSetItem::Ascii(_)
2369 | ast::ClassSetItem::Unicode(_)
2370 | ast::ClassSetItem::Perl(_) => {
2371 return Ok(());
2373 }
2374 ast::ClassSetItem::Bracketed(ref x) => &x.span,
2375 ast::ClassSetItem::Union(ref x) => &x.span,
2376 };
2377 self.increment_depth(span)
2378 }
2379
2380 fn visit_class_set_item_post(
2381 &mut self,
2382 ast: &ast::ClassSetItem,
2383 ) -> Result<()> {
2384 match *ast {
2385 ast::ClassSetItem::Empty(_)
2386 | ast::ClassSetItem::Literal(_)
2387 | ast::ClassSetItem::Range(_)
2388 | ast::ClassSetItem::Ascii(_)
2389 | ast::ClassSetItem::Unicode(_)
2390 | ast::ClassSetItem::Perl(_) => {
2391 Ok(())
2393 }
2394 ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2395 self.decrement_depth();
2396 Ok(())
2397 }
2398 }
2399 }
2400
2401 fn visit_class_set_binary_op_pre(
2402 &mut self,
2403 ast: &ast::ClassSetBinaryOp,
2404 ) -> Result<()> {
2405 self.increment_depth(&ast.span)
2406 }
2407
2408 fn visit_class_set_binary_op_post(
2409 &mut self,
2410 _ast: &ast::ClassSetBinaryOp,
2411 ) -> Result<()> {
2412 self.decrement_depth();
2413 Ok(())
2414 }
2415}
2416
2417fn specialize_err<T>(
2421 result: Result<T>,
2422 from: ast::ErrorKind,
2423 to: ast::ErrorKind,
2424) -> Result<T> {
2425 if let Err(e) = result {
2426 if e.kind == from {
2427 Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2428 } else {
2429 Err(e)
2430 }
2431 } else {
2432 result
2433 }
2434}
2435
2436#[cfg(test)]
2437mod tests {
2438 use core::ops::Range;
2439
2440 use alloc::format;
2441
2442 use super::*;
2443
2444 macro_rules! assert_eq {
2447 ($left:expr, $right:expr) => {{
2448 match (&$left, &$right) {
2449 (left_val, right_val) => {
2450 if !(*left_val == *right_val) {
2451 panic!(
2452 "assertion failed: `(left == right)`\n\n\
2453 left: `{:?}`\nright: `{:?}`\n\n",
2454 left_val, right_val
2455 )
2456 }
2457 }
2458 }
2459 }};
2460 }
2461
2462 #[derive(Clone, Debug)]
2466 struct TestError {
2467 span: Span,
2468 kind: ast::ErrorKind,
2469 }
2470
2471 impl PartialEq<ast::Error> for TestError {
2472 fn eq(&self, other: &ast::Error) -> bool {
2473 self.span == other.span && self.kind == other.kind
2474 }
2475 }
2476
2477 impl PartialEq<TestError> for ast::Error {
2478 fn eq(&self, other: &TestError) -> bool {
2479 self.span == other.span && self.kind == other.kind
2480 }
2481 }
2482
2483 fn s(str: &str) -> String {
2484 str.to_string()
2485 }
2486
2487 fn parser(pattern: &str) -> ParserI<'_, Parser> {
2488 ParserI::new(Parser::new(), pattern)
2489 }
2490
2491 fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2492 let parser = ParserBuilder::new().octal(true).build();
2493 ParserI::new(parser, pattern)
2494 }
2495
2496 fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
2497 let parser = ParserBuilder::new().empty_min_range(true).build();
2498 ParserI::new(parser, pattern)
2499 }
2500
2501 fn parser_nest_limit(
2502 pattern: &str,
2503 nest_limit: u32,
2504 ) -> ParserI<'_, Parser> {
2505 let p = ParserBuilder::new().nest_limit(nest_limit).build();
2506 ParserI::new(p, pattern)
2507 }
2508
2509 fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2510 let p = ParserBuilder::new().ignore_whitespace(true).build();
2511 ParserI::new(p, pattern)
2512 }
2513
2514 fn nspan(start: Position, end: Position) -> Span {
2516 Span::new(start, end)
2517 }
2518
2519 fn npos(offset: usize, line: usize, column: usize) -> Position {
2521 Position::new(offset, line, column)
2522 }
2523
2524 fn span(range: Range<usize>) -> Span {
2528 let start = Position::new(range.start, 1, range.start + 1);
2529 let end = Position::new(range.end, 1, range.end + 1);
2530 Span::new(start, end)
2531 }
2532
2533 fn span_range(subject: &str, range: Range<usize>) -> Span {
2535 let start = Position {
2536 offset: range.start,
2537 line: 1 + subject[..range.start].matches('\n').count(),
2538 column: 1 + subject[..range.start]
2539 .chars()
2540 .rev()
2541 .position(|c| c == '\n')
2542 .unwrap_or(subject[..range.start].chars().count()),
2543 };
2544 let end = Position {
2545 offset: range.end,
2546 line: 1 + subject[..range.end].matches('\n').count(),
2547 column: 1 + subject[..range.end]
2548 .chars()
2549 .rev()
2550 .position(|c| c == '\n')
2551 .unwrap_or(subject[..range.end].chars().count()),
2552 };
2553 Span::new(start, end)
2554 }
2555
2556 fn lit(c: char, start: usize) -> Ast {
2558 lit_with(c, span(start..start + c.len_utf8()))
2559 }
2560
2561 fn meta_lit(c: char, span: Span) -> Ast {
2563 Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
2564 }
2565
2566 fn lit_with(c: char, span: Span) -> Ast {
2568 Ast::literal(ast::Literal {
2569 span,
2570 kind: ast::LiteralKind::Verbatim,
2571 c,
2572 })
2573 }
2574
2575 fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2577 concat_with(span(range), asts)
2578 }
2579
2580 fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2582 Ast::concat(ast::Concat { span, asts })
2583 }
2584
2585 fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2587 Ast::alternation(ast::Alternation { span: span(range), asts })
2588 }
2589
2590 fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2592 Ast::group(ast::Group {
2593 span: span(range),
2594 kind: ast::GroupKind::CaptureIndex(index),
2595 ast: Box::new(ast),
2596 })
2597 }
2598
2599 fn flag_set(
2607 pat: &str,
2608 range: Range<usize>,
2609 flag: ast::Flag,
2610 negated: bool,
2611 ) -> Ast {
2612 let mut items = vec![ast::FlagsItem {
2613 span: span_range(pat, (range.end - 2)..(range.end - 1)),
2614 kind: ast::FlagsItemKind::Flag(flag),
2615 }];
2616 if negated {
2617 items.insert(
2618 0,
2619 ast::FlagsItem {
2620 span: span_range(pat, (range.start + 2)..(range.end - 2)),
2621 kind: ast::FlagsItemKind::Negation,
2622 },
2623 );
2624 }
2625 Ast::flags(ast::SetFlags {
2626 span: span_range(pat, range.clone()),
2627 flags: ast::Flags {
2628 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2629 items,
2630 },
2631 })
2632 }
2633
2634 #[test]
2635 fn parse_nest_limit() {
2636 assert_eq!(
2638 parser_nest_limit("", 0).parse(),
2639 Ok(Ast::empty(span(0..0)))
2640 );
2641 assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2642
2643 assert_eq!(
2645 parser_nest_limit("a+", 0).parse().unwrap_err(),
2646 TestError {
2647 span: span(0..2),
2648 kind: ast::ErrorKind::NestLimitExceeded(0),
2649 }
2650 );
2651 assert_eq!(
2652 parser_nest_limit("a+", 1).parse(),
2653 Ok(Ast::repetition(ast::Repetition {
2654 span: span(0..2),
2655 op: ast::RepetitionOp {
2656 span: span(1..2),
2657 kind: ast::RepetitionKind::OneOrMore,
2658 },
2659 greedy: true,
2660 ast: Box::new(lit('a', 0)),
2661 }))
2662 );
2663 assert_eq!(
2664 parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2665 TestError {
2666 span: span(0..3),
2667 kind: ast::ErrorKind::NestLimitExceeded(1),
2668 }
2669 );
2670 assert_eq!(
2671 parser_nest_limit("a+*", 1).parse().unwrap_err(),
2672 TestError {
2673 span: span(0..2),
2674 kind: ast::ErrorKind::NestLimitExceeded(1),
2675 }
2676 );
2677 assert_eq!(
2678 parser_nest_limit("a+*", 2).parse(),
2679 Ok(Ast::repetition(ast::Repetition {
2680 span: span(0..3),
2681 op: ast::RepetitionOp {
2682 span: span(2..3),
2683 kind: ast::RepetitionKind::ZeroOrMore,
2684 },
2685 greedy: true,
2686 ast: Box::new(Ast::repetition(ast::Repetition {
2687 span: span(0..2),
2688 op: ast::RepetitionOp {
2689 span: span(1..2),
2690 kind: ast::RepetitionKind::OneOrMore,
2691 },
2692 greedy: true,
2693 ast: Box::new(lit('a', 0)),
2694 })),
2695 }))
2696 );
2697
2698 assert_eq!(
2700 parser_nest_limit("ab", 0).parse().unwrap_err(),
2701 TestError {
2702 span: span(0..2),
2703 kind: ast::ErrorKind::NestLimitExceeded(0),
2704 }
2705 );
2706 assert_eq!(
2707 parser_nest_limit("ab", 1).parse(),
2708 Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2709 );
2710 assert_eq!(
2711 parser_nest_limit("abc", 1).parse(),
2712 Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2713 );
2714
2715 assert_eq!(
2717 parser_nest_limit("a|b", 0).parse().unwrap_err(),
2718 TestError {
2719 span: span(0..3),
2720 kind: ast::ErrorKind::NestLimitExceeded(0),
2721 }
2722 );
2723 assert_eq!(
2724 parser_nest_limit("a|b", 1).parse(),
2725 Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2726 );
2727 assert_eq!(
2728 parser_nest_limit("a|b|c", 1).parse(),
2729 Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2730 );
2731
2732 assert_eq!(
2735 parser_nest_limit("[a]", 0).parse().unwrap_err(),
2736 TestError {
2737 span: span(0..3),
2738 kind: ast::ErrorKind::NestLimitExceeded(0),
2739 }
2740 );
2741 assert_eq!(
2742 parser_nest_limit("[a]", 1).parse(),
2743 Ok(Ast::class_bracketed(ast::ClassBracketed {
2744 span: span(0..3),
2745 negated: false,
2746 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2747 ast::Literal {
2748 span: span(1..2),
2749 kind: ast::LiteralKind::Verbatim,
2750 c: 'a',
2751 }
2752 )),
2753 }))
2754 );
2755 assert_eq!(
2756 parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2757 TestError {
2758 span: span(1..3),
2759 kind: ast::ErrorKind::NestLimitExceeded(1),
2760 }
2761 );
2762 assert_eq!(
2763 parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2764 TestError {
2765 span: span(3..7),
2766 kind: ast::ErrorKind::NestLimitExceeded(2),
2767 }
2768 );
2769 assert_eq!(
2770 parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2771 TestError {
2772 span: span(4..6),
2773 kind: ast::ErrorKind::NestLimitExceeded(3),
2774 }
2775 );
2776 assert_eq!(
2777 parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2778 TestError {
2779 span: span(1..5),
2780 kind: ast::ErrorKind::NestLimitExceeded(1),
2781 }
2782 );
2783 assert_eq!(
2784 parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2785 TestError {
2786 span: span(4..6),
2787 kind: ast::ErrorKind::NestLimitExceeded(2),
2788 }
2789 );
2790 }
2791
2792 #[test]
2793 fn parse_comments() {
2794 let pat = "(?x)
2795# This is comment 1.
2796foo # This is comment 2.
2797 # This is comment 3.
2798bar
2799# This is comment 4.";
2800 let astc = parser(pat).parse_with_comments().unwrap();
2801 assert_eq!(
2802 astc.ast,
2803 concat_with(
2804 span_range(pat, 0..pat.len()),
2805 vec![
2806 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2807 lit_with('f', span_range(pat, 26..27)),
2808 lit_with('o', span_range(pat, 27..28)),
2809 lit_with('o', span_range(pat, 28..29)),
2810 lit_with('b', span_range(pat, 74..75)),
2811 lit_with('a', span_range(pat, 75..76)),
2812 lit_with('r', span_range(pat, 76..77)),
2813 ]
2814 )
2815 );
2816 assert_eq!(
2817 astc.comments,
2818 vec![
2819 ast::Comment {
2820 span: span_range(pat, 5..26),
2821 comment: s(" This is comment 1."),
2822 },
2823 ast::Comment {
2824 span: span_range(pat, 30..51),
2825 comment: s(" This is comment 2."),
2826 },
2827 ast::Comment {
2828 span: span_range(pat, 53..74),
2829 comment: s(" This is comment 3."),
2830 },
2831 ast::Comment {
2832 span: span_range(pat, 78..98),
2833 comment: s(" This is comment 4."),
2834 },
2835 ]
2836 );
2837 }
2838
2839 #[test]
2840 fn parse_holistic() {
2841 assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2842 assert_eq!(
2843 parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2844 Ok(concat(
2845 0..36,
2846 vec![
2847 meta_lit('\\', span(0..2)),
2848 meta_lit('.', span(2..4)),
2849 meta_lit('+', span(4..6)),
2850 meta_lit('*', span(6..8)),
2851 meta_lit('?', span(8..10)),
2852 meta_lit('(', span(10..12)),
2853 meta_lit(')', span(12..14)),
2854 meta_lit('|', span(14..16)),
2855 meta_lit('[', span(16..18)),
2856 meta_lit(']', span(18..20)),
2857 meta_lit('{', span(20..22)),
2858 meta_lit('}', span(22..24)),
2859 meta_lit('^', span(24..26)),
2860 meta_lit('$', span(26..28)),
2861 meta_lit('#', span(28..30)),
2862 meta_lit('&', span(30..32)),
2863 meta_lit('-', span(32..34)),
2864 meta_lit('~', span(34..36)),
2865 ]
2866 ))
2867 );
2868 }
2869
2870 #[test]
2871 fn parse_ignore_whitespace() {
2872 let pat = "(?x)a b";
2874 assert_eq!(
2875 parser(pat).parse(),
2876 Ok(concat_with(
2877 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2878 vec![
2879 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2880 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2881 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2882 ]
2883 ))
2884 );
2885
2886 let pat = "(?x)a b(?-x)a b";
2888 assert_eq!(
2889 parser(pat).parse(),
2890 Ok(concat_with(
2891 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2892 vec![
2893 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2894 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2895 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2896 flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2897 lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2898 lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2899 lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2900 ]
2901 ))
2902 );
2903
2904 let pat = "a (?x:a )a ";
2906 assert_eq!(
2907 parser(pat).parse(),
2908 Ok(concat_with(
2909 span_range(pat, 0..11),
2910 vec![
2911 lit_with('a', span_range(pat, 0..1)),
2912 lit_with(' ', span_range(pat, 1..2)),
2913 Ast::group(ast::Group {
2914 span: span_range(pat, 2..9),
2915 kind: ast::GroupKind::NonCapturing(ast::Flags {
2916 span: span_range(pat, 4..5),
2917 items: vec![ast::FlagsItem {
2918 span: span_range(pat, 4..5),
2919 kind: ast::FlagsItemKind::Flag(
2920 ast::Flag::IgnoreWhitespace
2921 ),
2922 },],
2923 }),
2924 ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2925 }),
2926 lit_with('a', span_range(pat, 9..10)),
2927 lit_with(' ', span_range(pat, 10..11)),
2928 ]
2929 ))
2930 );
2931
2932 let pat = "(?x)( ?P<foo> a )";
2934 assert_eq!(
2935 parser(pat).parse(),
2936 Ok(concat_with(
2937 span_range(pat, 0..pat.len()),
2938 vec![
2939 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2940 Ast::group(ast::Group {
2941 span: span_range(pat, 4..pat.len()),
2942 kind: ast::GroupKind::CaptureName {
2943 starts_with_p: true,
2944 name: ast::CaptureName {
2945 span: span_range(pat, 9..12),
2946 name: s("foo"),
2947 index: 1,
2948 }
2949 },
2950 ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2951 }),
2952 ]
2953 ))
2954 );
2955 let pat = "(?x)( a )";
2956 assert_eq!(
2957 parser(pat).parse(),
2958 Ok(concat_with(
2959 span_range(pat, 0..pat.len()),
2960 vec![
2961 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2962 Ast::group(ast::Group {
2963 span: span_range(pat, 4..pat.len()),
2964 kind: ast::GroupKind::CaptureIndex(1),
2965 ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2966 }),
2967 ]
2968 ))
2969 );
2970 let pat = "(?x)( ?: a )";
2971 assert_eq!(
2972 parser(pat).parse(),
2973 Ok(concat_with(
2974 span_range(pat, 0..pat.len()),
2975 vec![
2976 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2977 Ast::group(ast::Group {
2978 span: span_range(pat, 4..pat.len()),
2979 kind: ast::GroupKind::NonCapturing(ast::Flags {
2980 span: span_range(pat, 8..8),
2981 items: vec![],
2982 }),
2983 ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2984 }),
2985 ]
2986 ))
2987 );
2988 let pat = r"(?x)\x { 53 }";
2989 assert_eq!(
2990 parser(pat).parse(),
2991 Ok(concat_with(
2992 span_range(pat, 0..pat.len()),
2993 vec![
2994 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2995 Ast::literal(ast::Literal {
2996 span: span(4..13),
2997 kind: ast::LiteralKind::HexBrace(
2998 ast::HexLiteralKind::X
2999 ),
3000 c: 'S',
3001 }),
3002 ]
3003 ))
3004 );
3005
3006 let pat = r"(?x)\ ";
3008 assert_eq!(
3009 parser(pat).parse(),
3010 Ok(concat_with(
3011 span_range(pat, 0..pat.len()),
3012 vec![
3013 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
3014 Ast::literal(ast::Literal {
3015 span: span_range(pat, 4..6),
3016 kind: ast::LiteralKind::Superfluous,
3017 c: ' ',
3018 }),
3019 ]
3020 ))
3021 );
3022 }
3023
3024 #[test]
3025 fn parse_newlines() {
3026 let pat = ".\n.";
3027 assert_eq!(
3028 parser(pat).parse(),
3029 Ok(concat_with(
3030 span_range(pat, 0..3),
3031 vec![
3032 Ast::dot(span_range(pat, 0..1)),
3033 lit_with('\n', span_range(pat, 1..2)),
3034 Ast::dot(span_range(pat, 2..3)),
3035 ]
3036 ))
3037 );
3038
3039 let pat = "foobar\nbaz\nquux\n";
3040 assert_eq!(
3041 parser(pat).parse(),
3042 Ok(concat_with(
3043 span_range(pat, 0..pat.len()),
3044 vec![
3045 lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
3046 lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
3047 lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
3048 lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
3049 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
3050 lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
3051 lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
3052 lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
3053 lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
3054 lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
3055 lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
3056 lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
3057 lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
3058 lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
3059 lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
3060 lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
3061 ]
3062 ))
3063 );
3064 }
3065
3066 #[test]
3067 fn parse_uncounted_repetition() {
3068 assert_eq!(
3069 parser(r"a*").parse(),
3070 Ok(Ast::repetition(ast::Repetition {
3071 span: span(0..2),
3072 op: ast::RepetitionOp {
3073 span: span(1..2),
3074 kind: ast::RepetitionKind::ZeroOrMore,
3075 },
3076 greedy: true,
3077 ast: Box::new(lit('a', 0)),
3078 }))
3079 );
3080 assert_eq!(
3081 parser(r"a+").parse(),
3082 Ok(Ast::repetition(ast::Repetition {
3083 span: span(0..2),
3084 op: ast::RepetitionOp {
3085 span: span(1..2),
3086 kind: ast::RepetitionKind::OneOrMore,
3087 },
3088 greedy: true,
3089 ast: Box::new(lit('a', 0)),
3090 }))
3091 );
3092
3093 assert_eq!(
3094 parser(r"a?").parse(),
3095 Ok(Ast::repetition(ast::Repetition {
3096 span: span(0..2),
3097 op: ast::RepetitionOp {
3098 span: span(1..2),
3099 kind: ast::RepetitionKind::ZeroOrOne,
3100 },
3101 greedy: true,
3102 ast: Box::new(lit('a', 0)),
3103 }))
3104 );
3105 assert_eq!(
3106 parser(r"a??").parse(),
3107 Ok(Ast::repetition(ast::Repetition {
3108 span: span(0..3),
3109 op: ast::RepetitionOp {
3110 span: span(1..3),
3111 kind: ast::RepetitionKind::ZeroOrOne,
3112 },
3113 greedy: false,
3114 ast: Box::new(lit('a', 0)),
3115 }))
3116 );
3117 assert_eq!(
3118 parser(r"a?").parse(),
3119 Ok(Ast::repetition(ast::Repetition {
3120 span: span(0..2),
3121 op: ast::RepetitionOp {
3122 span: span(1..2),
3123 kind: ast::RepetitionKind::ZeroOrOne,
3124 },
3125 greedy: true,
3126 ast: Box::new(lit('a', 0)),
3127 }))
3128 );
3129 assert_eq!(
3130 parser(r"a?b").parse(),
3131 Ok(concat(
3132 0..3,
3133 vec![
3134 Ast::repetition(ast::Repetition {
3135 span: span(0..2),
3136 op: ast::RepetitionOp {
3137 span: span(1..2),
3138 kind: ast::RepetitionKind::ZeroOrOne,
3139 },
3140 greedy: true,
3141 ast: Box::new(lit('a', 0)),
3142 }),
3143 lit('b', 2),
3144 ]
3145 ))
3146 );
3147 assert_eq!(
3148 parser(r"a??b").parse(),
3149 Ok(concat(
3150 0..4,
3151 vec![
3152 Ast::repetition(ast::Repetition {
3153 span: span(0..3),
3154 op: ast::RepetitionOp {
3155 span: span(1..3),
3156 kind: ast::RepetitionKind::ZeroOrOne,
3157 },
3158 greedy: false,
3159 ast: Box::new(lit('a', 0)),
3160 }),
3161 lit('b', 3),
3162 ]
3163 ))
3164 );
3165 assert_eq!(
3166 parser(r"ab?").parse(),
3167 Ok(concat(
3168 0..3,
3169 vec![
3170 lit('a', 0),
3171 Ast::repetition(ast::Repetition {
3172 span: span(1..3),
3173 op: ast::RepetitionOp {
3174 span: span(2..3),
3175 kind: ast::RepetitionKind::ZeroOrOne,
3176 },
3177 greedy: true,
3178 ast: Box::new(lit('b', 1)),
3179 }),
3180 ]
3181 ))
3182 );
3183 assert_eq!(
3184 parser(r"(ab)?").parse(),
3185 Ok(Ast::repetition(ast::Repetition {
3186 span: span(0..5),
3187 op: ast::RepetitionOp {
3188 span: span(4..5),
3189 kind: ast::RepetitionKind::ZeroOrOne,
3190 },
3191 greedy: true,
3192 ast: Box::new(group(
3193 0..4,
3194 1,
3195 concat(1..3, vec![lit('a', 1), lit('b', 2),])
3196 )),
3197 }))
3198 );
3199 assert_eq!(
3200 parser(r"|a?").parse(),
3201 Ok(alt(
3202 0..3,
3203 vec![
3204 Ast::empty(span(0..0)),
3205 Ast::repetition(ast::Repetition {
3206 span: span(1..3),
3207 op: ast::RepetitionOp {
3208 span: span(2..3),
3209 kind: ast::RepetitionKind::ZeroOrOne,
3210 },
3211 greedy: true,
3212 ast: Box::new(lit('a', 1)),
3213 }),
3214 ]
3215 ))
3216 );
3217
3218 assert_eq!(
3219 parser(r"*").parse().unwrap_err(),
3220 TestError {
3221 span: span(0..0),
3222 kind: ast::ErrorKind::RepetitionMissing,
3223 }
3224 );
3225 assert_eq!(
3226 parser(r"(?i)*").parse().unwrap_err(),
3227 TestError {
3228 span: span(4..4),
3229 kind: ast::ErrorKind::RepetitionMissing,
3230 }
3231 );
3232 assert_eq!(
3233 parser(r"(*)").parse().unwrap_err(),
3234 TestError {
3235 span: span(1..1),
3236 kind: ast::ErrorKind::RepetitionMissing,
3237 }
3238 );
3239 assert_eq!(
3240 parser(r"(?:?)").parse().unwrap_err(),
3241 TestError {
3242 span: span(3..3),
3243 kind: ast::ErrorKind::RepetitionMissing,
3244 }
3245 );
3246 assert_eq!(
3247 parser(r"+").parse().unwrap_err(),
3248 TestError {
3249 span: span(0..0),
3250 kind: ast::ErrorKind::RepetitionMissing,
3251 }
3252 );
3253 assert_eq!(
3254 parser(r"?").parse().unwrap_err(),
3255 TestError {
3256 span: span(0..0),
3257 kind: ast::ErrorKind::RepetitionMissing,
3258 }
3259 );
3260 assert_eq!(
3261 parser(r"(?)").parse().unwrap_err(),
3262 TestError {
3263 span: span(1..1),
3264 kind: ast::ErrorKind::RepetitionMissing,
3265 }
3266 );
3267 assert_eq!(
3268 parser(r"|*").parse().unwrap_err(),
3269 TestError {
3270 span: span(1..1),
3271 kind: ast::ErrorKind::RepetitionMissing,
3272 }
3273 );
3274 assert_eq!(
3275 parser(r"|+").parse().unwrap_err(),
3276 TestError {
3277 span: span(1..1),
3278 kind: ast::ErrorKind::RepetitionMissing,
3279 }
3280 );
3281 assert_eq!(
3282 parser(r"|?").parse().unwrap_err(),
3283 TestError {
3284 span: span(1..1),
3285 kind: ast::ErrorKind::RepetitionMissing,
3286 }
3287 );
3288 }
3289
3290 #[test]
3291 fn parse_counted_repetition() {
3292 assert_eq!(
3293 parser(r"a{5}").parse(),
3294 Ok(Ast::repetition(ast::Repetition {
3295 span: span(0..4),
3296 op: ast::RepetitionOp {
3297 span: span(1..4),
3298 kind: ast::RepetitionKind::Range(
3299 ast::RepetitionRange::Exactly(5)
3300 ),
3301 },
3302 greedy: true,
3303 ast: Box::new(lit('a', 0)),
3304 }))
3305 );
3306 assert_eq!(
3307 parser(r"a{5,}").parse(),
3308 Ok(Ast::repetition(ast::Repetition {
3309 span: span(0..5),
3310 op: ast::RepetitionOp {
3311 span: span(1..5),
3312 kind: ast::RepetitionKind::Range(
3313 ast::RepetitionRange::AtLeast(5)
3314 ),
3315 },
3316 greedy: true,
3317 ast: Box::new(lit('a', 0)),
3318 }))
3319 );
3320 assert_eq!(
3321 parser(r"a{5,9}").parse(),
3322 Ok(Ast::repetition(ast::Repetition {
3323 span: span(0..6),
3324 op: ast::RepetitionOp {
3325 span: span(1..6),
3326 kind: ast::RepetitionKind::Range(
3327 ast::RepetitionRange::Bounded(5, 9)
3328 ),
3329 },
3330 greedy: true,
3331 ast: Box::new(lit('a', 0)),
3332 }))
3333 );
3334 assert_eq!(
3335 parser(r"a{5}?").parse(),
3336 Ok(Ast::repetition(ast::Repetition {
3337 span: span(0..5),
3338 op: ast::RepetitionOp {
3339 span: span(1..5),
3340 kind: ast::RepetitionKind::Range(
3341 ast::RepetitionRange::Exactly(5)
3342 ),
3343 },
3344 greedy: false,
3345 ast: Box::new(lit('a', 0)),
3346 }))
3347 );
3348 assert_eq!(
3349 parser(r"ab{5}").parse(),
3350 Ok(concat(
3351 0..5,
3352 vec![
3353 lit('a', 0),
3354 Ast::repetition(ast::Repetition {
3355 span: span(1..5),
3356 op: ast::RepetitionOp {
3357 span: span(2..5),
3358 kind: ast::RepetitionKind::Range(
3359 ast::RepetitionRange::Exactly(5)
3360 ),
3361 },
3362 greedy: true,
3363 ast: Box::new(lit('b', 1)),
3364 }),
3365 ]
3366 ))
3367 );
3368 assert_eq!(
3369 parser(r"ab{5}c").parse(),
3370 Ok(concat(
3371 0..6,
3372 vec![
3373 lit('a', 0),
3374 Ast::repetition(ast::Repetition {
3375 span: span(1..5),
3376 op: ast::RepetitionOp {
3377 span: span(2..5),
3378 kind: ast::RepetitionKind::Range(
3379 ast::RepetitionRange::Exactly(5)
3380 ),
3381 },
3382 greedy: true,
3383 ast: Box::new(lit('b', 1)),
3384 }),
3385 lit('c', 5),
3386 ]
3387 ))
3388 );
3389
3390 assert_eq!(
3391 parser(r"a{ 5 }").parse(),
3392 Ok(Ast::repetition(ast::Repetition {
3393 span: span(0..6),
3394 op: ast::RepetitionOp {
3395 span: span(1..6),
3396 kind: ast::RepetitionKind::Range(
3397 ast::RepetitionRange::Exactly(5)
3398 ),
3399 },
3400 greedy: true,
3401 ast: Box::new(lit('a', 0)),
3402 }))
3403 );
3404 assert_eq!(
3405 parser(r"a{ 5 , 9 }").parse(),
3406 Ok(Ast::repetition(ast::Repetition {
3407 span: span(0..10),
3408 op: ast::RepetitionOp {
3409 span: span(1..10),
3410 kind: ast::RepetitionKind::Range(
3411 ast::RepetitionRange::Bounded(5, 9)
3412 ),
3413 },
3414 greedy: true,
3415 ast: Box::new(lit('a', 0)),
3416 }))
3417 );
3418 assert_eq!(
3419 parser_empty_min_range(r"a{,9}").parse(),
3420 Ok(Ast::repetition(ast::Repetition {
3421 span: span(0..5),
3422 op: ast::RepetitionOp {
3423 span: span(1..5),
3424 kind: ast::RepetitionKind::Range(
3425 ast::RepetitionRange::Bounded(0, 9)
3426 ),
3427 },
3428 greedy: true,
3429 ast: Box::new(lit('a', 0)),
3430 }))
3431 );
3432 assert_eq!(
3433 parser_ignore_whitespace(r"a{5,9} ?").parse(),
3434 Ok(Ast::repetition(ast::Repetition {
3435 span: span(0..8),
3436 op: ast::RepetitionOp {
3437 span: span(1..8),
3438 kind: ast::RepetitionKind::Range(
3439 ast::RepetitionRange::Bounded(5, 9)
3440 ),
3441 },
3442 greedy: false,
3443 ast: Box::new(lit('a', 0)),
3444 }))
3445 );
3446 assert_eq!(
3447 parser(r"\b{5,9}").parse(),
3448 Ok(Ast::repetition(ast::Repetition {
3449 span: span(0..7),
3450 op: ast::RepetitionOp {
3451 span: span(2..7),
3452 kind: ast::RepetitionKind::Range(
3453 ast::RepetitionRange::Bounded(5, 9)
3454 ),
3455 },
3456 greedy: true,
3457 ast: Box::new(Ast::assertion(ast::Assertion {
3458 span: span(0..2),
3459 kind: ast::AssertionKind::WordBoundary,
3460 })),
3461 }))
3462 );
3463
3464 assert_eq!(
3465 parser(r"(?i){0}").parse().unwrap_err(),
3466 TestError {
3467 span: span(4..4),
3468 kind: ast::ErrorKind::RepetitionMissing,
3469 }
3470 );
3471 assert_eq!(
3472 parser(r"(?m){1,1}").parse().unwrap_err(),
3473 TestError {
3474 span: span(4..4),
3475 kind: ast::ErrorKind::RepetitionMissing,
3476 }
3477 );
3478 assert_eq!(
3479 parser(r"a{]}").parse().unwrap_err(),
3480 TestError {
3481 span: span(2..2),
3482 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3483 }
3484 );
3485 assert_eq!(
3486 parser(r"a{1,]}").parse().unwrap_err(),
3487 TestError {
3488 span: span(4..4),
3489 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3490 }
3491 );
3492 assert_eq!(
3493 parser(r"a{").parse().unwrap_err(),
3494 TestError {
3495 span: span(1..2),
3496 kind: ast::ErrorKind::RepetitionCountUnclosed,
3497 }
3498 );
3499 assert_eq!(
3500 parser(r"a{}").parse().unwrap_err(),
3501 TestError {
3502 span: span(2..2),
3503 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3504 }
3505 );
3506 assert_eq!(
3507 parser(r"a{a").parse().unwrap_err(),
3508 TestError {
3509 span: span(2..2),
3510 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3511 }
3512 );
3513 assert_eq!(
3514 parser(r"a{9999999999}").parse().unwrap_err(),
3515 TestError {
3516 span: span(2..12),
3517 kind: ast::ErrorKind::DecimalInvalid,
3518 }
3519 );
3520 assert_eq!(
3521 parser(r"a{9").parse().unwrap_err(),
3522 TestError {
3523 span: span(1..3),
3524 kind: ast::ErrorKind::RepetitionCountUnclosed,
3525 }
3526 );
3527 assert_eq!(
3528 parser(r"a{9,a").parse().unwrap_err(),
3529 TestError {
3530 span: span(4..4),
3531 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3532 }
3533 );
3534 assert_eq!(
3535 parser(r"a{9,9999999999}").parse().unwrap_err(),
3536 TestError {
3537 span: span(4..14),
3538 kind: ast::ErrorKind::DecimalInvalid,
3539 }
3540 );
3541 assert_eq!(
3542 parser(r"a{9,").parse().unwrap_err(),
3543 TestError {
3544 span: span(1..4),
3545 kind: ast::ErrorKind::RepetitionCountUnclosed,
3546 }
3547 );
3548 assert_eq!(
3549 parser(r"a{9,11").parse().unwrap_err(),
3550 TestError {
3551 span: span(1..6),
3552 kind: ast::ErrorKind::RepetitionCountUnclosed,
3553 }
3554 );
3555 assert_eq!(
3556 parser(r"a{2,1}").parse().unwrap_err(),
3557 TestError {
3558 span: span(1..6),
3559 kind: ast::ErrorKind::RepetitionCountInvalid,
3560 }
3561 );
3562 assert_eq!(
3563 parser(r"{5}").parse().unwrap_err(),
3564 TestError {
3565 span: span(0..0),
3566 kind: ast::ErrorKind::RepetitionMissing,
3567 }
3568 );
3569 assert_eq!(
3570 parser(r"|{5}").parse().unwrap_err(),
3571 TestError {
3572 span: span(1..1),
3573 kind: ast::ErrorKind::RepetitionMissing,
3574 }
3575 );
3576 }
3577
3578 #[test]
3579 fn parse_alternate() {
3580 assert_eq!(
3581 parser(r"a|b").parse(),
3582 Ok(Ast::alternation(ast::Alternation {
3583 span: span(0..3),
3584 asts: vec![lit('a', 0), lit('b', 2)],
3585 }))
3586 );
3587 assert_eq!(
3588 parser(r"(a|b)").parse(),
3589 Ok(group(
3590 0..5,
3591 1,
3592 Ast::alternation(ast::Alternation {
3593 span: span(1..4),
3594 asts: vec![lit('a', 1), lit('b', 3)],
3595 })
3596 ))
3597 );
3598
3599 assert_eq!(
3600 parser(r"a|b|c").parse(),
3601 Ok(Ast::alternation(ast::Alternation {
3602 span: span(0..5),
3603 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3604 }))
3605 );
3606 assert_eq!(
3607 parser(r"ax|by|cz").parse(),
3608 Ok(Ast::alternation(ast::Alternation {
3609 span: span(0..8),
3610 asts: vec![
3611 concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3612 concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3613 concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3614 ],
3615 }))
3616 );
3617 assert_eq!(
3618 parser(r"(ax|by|cz)").parse(),
3619 Ok(group(
3620 0..10,
3621 1,
3622 Ast::alternation(ast::Alternation {
3623 span: span(1..9),
3624 asts: vec![
3625 concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3626 concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3627 concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3628 ],
3629 })
3630 ))
3631 );
3632 assert_eq!(
3633 parser(r"(ax|(by|(cz)))").parse(),
3634 Ok(group(
3635 0..14,
3636 1,
3637 alt(
3638 1..13,
3639 vec![
3640 concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3641 group(
3642 4..13,
3643 2,
3644 alt(
3645 5..12,
3646 vec![
3647 concat(
3648 5..7,
3649 vec![lit('b', 5), lit('y', 6)]
3650 ),
3651 group(
3652 8..12,
3653 3,
3654 concat(
3655 9..11,
3656 vec![lit('c', 9), lit('z', 10),]
3657 )
3658 ),
3659 ]
3660 )
3661 ),
3662 ]
3663 )
3664 ))
3665 );
3666
3667 assert_eq!(
3668 parser(r"|").parse(),
3669 Ok(alt(
3670 0..1,
3671 vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
3672 ))
3673 );
3674 assert_eq!(
3675 parser(r"||").parse(),
3676 Ok(alt(
3677 0..2,
3678 vec![
3679 Ast::empty(span(0..0)),
3680 Ast::empty(span(1..1)),
3681 Ast::empty(span(2..2)),
3682 ]
3683 ))
3684 );
3685 assert_eq!(
3686 parser(r"a|").parse(),
3687 Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
3688 );
3689 assert_eq!(
3690 parser(r"|a").parse(),
3691 Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
3692 );
3693
3694 assert_eq!(
3695 parser(r"(|)").parse(),
3696 Ok(group(
3697 0..3,
3698 1,
3699 alt(
3700 1..2,
3701 vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
3702 )
3703 ))
3704 );
3705 assert_eq!(
3706 parser(r"(a|)").parse(),
3707 Ok(group(
3708 0..4,
3709 1,
3710 alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
3711 ))
3712 );
3713 assert_eq!(
3714 parser(r"(|a)").parse(),
3715 Ok(group(
3716 0..4,
3717 1,
3718 alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
3719 ))
3720 );
3721
3722 assert_eq!(
3723 parser(r"a|b)").parse().unwrap_err(),
3724 TestError {
3725 span: span(3..4),
3726 kind: ast::ErrorKind::GroupUnopened,
3727 }
3728 );
3729 assert_eq!(
3730 parser(r"(a|b").parse().unwrap_err(),
3731 TestError {
3732 span: span(0..1),
3733 kind: ast::ErrorKind::GroupUnclosed,
3734 }
3735 );
3736 }
3737
3738 #[test]
3739 fn parse_unsupported_lookaround() {
3740 assert_eq!(
3741 parser(r"(?=a)").parse().unwrap_err(),
3742 TestError {
3743 span: span(0..3),
3744 kind: ast::ErrorKind::UnsupportedLookAround,
3745 }
3746 );
3747 assert_eq!(
3748 parser(r"(?!a)").parse().unwrap_err(),
3749 TestError {
3750 span: span(0..3),
3751 kind: ast::ErrorKind::UnsupportedLookAround,
3752 }
3753 );
3754 assert_eq!(
3755 parser(r"(?<=a)").parse().unwrap_err(),
3756 TestError {
3757 span: span(0..4),
3758 kind: ast::ErrorKind::UnsupportedLookAround,
3759 }
3760 );
3761 assert_eq!(
3762 parser(r"(?<!a)").parse().unwrap_err(),
3763 TestError {
3764 span: span(0..4),
3765 kind: ast::ErrorKind::UnsupportedLookAround,
3766 }
3767 );
3768 }
3769
3770 #[test]
3771 fn parse_group() {
3772 assert_eq!(
3773 parser("(?i)").parse(),
3774 Ok(Ast::flags(ast::SetFlags {
3775 span: span(0..4),
3776 flags: ast::Flags {
3777 span: span(2..3),
3778 items: vec![ast::FlagsItem {
3779 span: span(2..3),
3780 kind: ast::FlagsItemKind::Flag(
3781 ast::Flag::CaseInsensitive
3782 ),
3783 }],
3784 },
3785 }))
3786 );
3787 assert_eq!(
3788 parser("(?iU)").parse(),
3789 Ok(Ast::flags(ast::SetFlags {
3790 span: span(0..5),
3791 flags: ast::Flags {
3792 span: span(2..4),
3793 items: vec![
3794 ast::FlagsItem {
3795 span: span(2..3),
3796 kind: ast::FlagsItemKind::Flag(
3797 ast::Flag::CaseInsensitive
3798 ),
3799 },
3800 ast::FlagsItem {
3801 span: span(3..4),
3802 kind: ast::FlagsItemKind::Flag(
3803 ast::Flag::SwapGreed
3804 ),
3805 },
3806 ],
3807 },
3808 }))
3809 );
3810 assert_eq!(
3811 parser("(?i-U)").parse(),
3812 Ok(Ast::flags(ast::SetFlags {
3813 span: span(0..6),
3814 flags: ast::Flags {
3815 span: span(2..5),
3816 items: vec![
3817 ast::FlagsItem {
3818 span: span(2..3),
3819 kind: ast::FlagsItemKind::Flag(
3820 ast::Flag::CaseInsensitive
3821 ),
3822 },
3823 ast::FlagsItem {
3824 span: span(3..4),
3825 kind: ast::FlagsItemKind::Negation,
3826 },
3827 ast::FlagsItem {
3828 span: span(4..5),
3829 kind: ast::FlagsItemKind::Flag(
3830 ast::Flag::SwapGreed
3831 ),
3832 },
3833 ],
3834 },
3835 }))
3836 );
3837
3838 assert_eq!(
3839 parser("()").parse(),
3840 Ok(Ast::group(ast::Group {
3841 span: span(0..2),
3842 kind: ast::GroupKind::CaptureIndex(1),
3843 ast: Box::new(Ast::empty(span(1..1))),
3844 }))
3845 );
3846 assert_eq!(
3847 parser("(a)").parse(),
3848 Ok(Ast::group(ast::Group {
3849 span: span(0..3),
3850 kind: ast::GroupKind::CaptureIndex(1),
3851 ast: Box::new(lit('a', 1)),
3852 }))
3853 );
3854 assert_eq!(
3855 parser("(())").parse(),
3856 Ok(Ast::group(ast::Group {
3857 span: span(0..4),
3858 kind: ast::GroupKind::CaptureIndex(1),
3859 ast: Box::new(Ast::group(ast::Group {
3860 span: span(1..3),
3861 kind: ast::GroupKind::CaptureIndex(2),
3862 ast: Box::new(Ast::empty(span(2..2))),
3863 })),
3864 }))
3865 );
3866
3867 assert_eq!(
3868 parser("(?:a)").parse(),
3869 Ok(Ast::group(ast::Group {
3870 span: span(0..5),
3871 kind: ast::GroupKind::NonCapturing(ast::Flags {
3872 span: span(2..2),
3873 items: vec![],
3874 }),
3875 ast: Box::new(lit('a', 3)),
3876 }))
3877 );
3878
3879 assert_eq!(
3880 parser("(?i:a)").parse(),
3881 Ok(Ast::group(ast::Group {
3882 span: span(0..6),
3883 kind: ast::GroupKind::NonCapturing(ast::Flags {
3884 span: span(2..3),
3885 items: vec![ast::FlagsItem {
3886 span: span(2..3),
3887 kind: ast::FlagsItemKind::Flag(
3888 ast::Flag::CaseInsensitive
3889 ),
3890 },],
3891 }),
3892 ast: Box::new(lit('a', 4)),
3893 }))
3894 );
3895 assert_eq!(
3896 parser("(?i-U:a)").parse(),
3897 Ok(Ast::group(ast::Group {
3898 span: span(0..8),
3899 kind: ast::GroupKind::NonCapturing(ast::Flags {
3900 span: span(2..5),
3901 items: vec![
3902 ast::FlagsItem {
3903 span: span(2..3),
3904 kind: ast::FlagsItemKind::Flag(
3905 ast::Flag::CaseInsensitive
3906 ),
3907 },
3908 ast::FlagsItem {
3909 span: span(3..4),
3910 kind: ast::FlagsItemKind::Negation,
3911 },
3912 ast::FlagsItem {
3913 span: span(4..5),
3914 kind: ast::FlagsItemKind::Flag(
3915 ast::Flag::SwapGreed
3916 ),
3917 },
3918 ],
3919 }),
3920 ast: Box::new(lit('a', 6)),
3921 }))
3922 );
3923
3924 assert_eq!(
3925 parser("(").parse().unwrap_err(),
3926 TestError {
3927 span: span(0..1),
3928 kind: ast::ErrorKind::GroupUnclosed,
3929 }
3930 );
3931 assert_eq!(
3932 parser("(?").parse().unwrap_err(),
3933 TestError {
3934 span: span(0..1),
3935 kind: ast::ErrorKind::GroupUnclosed,
3936 }
3937 );
3938 assert_eq!(
3939 parser("(?P").parse().unwrap_err(),
3940 TestError {
3941 span: span(2..3),
3942 kind: ast::ErrorKind::FlagUnrecognized,
3943 }
3944 );
3945 assert_eq!(
3946 parser("(?P<").parse().unwrap_err(),
3947 TestError {
3948 span: span(4..4),
3949 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3950 }
3951 );
3952 assert_eq!(
3953 parser("(a").parse().unwrap_err(),
3954 TestError {
3955 span: span(0..1),
3956 kind: ast::ErrorKind::GroupUnclosed,
3957 }
3958 );
3959 assert_eq!(
3960 parser("(()").parse().unwrap_err(),
3961 TestError {
3962 span: span(0..1),
3963 kind: ast::ErrorKind::GroupUnclosed,
3964 }
3965 );
3966 assert_eq!(
3967 parser(")").parse().unwrap_err(),
3968 TestError {
3969 span: span(0..1),
3970 kind: ast::ErrorKind::GroupUnopened,
3971 }
3972 );
3973 assert_eq!(
3974 parser("a)").parse().unwrap_err(),
3975 TestError {
3976 span: span(1..2),
3977 kind: ast::ErrorKind::GroupUnopened,
3978 }
3979 );
3980 }
3981
3982 #[test]
3983 fn parse_capture_name() {
3984 assert_eq!(
3985 parser("(?<a>z)").parse(),
3986 Ok(Ast::group(ast::Group {
3987 span: span(0..7),
3988 kind: ast::GroupKind::CaptureName {
3989 starts_with_p: false,
3990 name: ast::CaptureName {
3991 span: span(3..4),
3992 name: s("a"),
3993 index: 1,
3994 }
3995 },
3996 ast: Box::new(lit('z', 5)),
3997 }))
3998 );
3999 assert_eq!(
4000 parser("(?P<a>z)").parse(),
4001 Ok(Ast::group(ast::Group {
4002 span: span(0..8),
4003 kind: ast::GroupKind::CaptureName {
4004 starts_with_p: true,
4005 name: ast::CaptureName {
4006 span: span(4..5),
4007 name: s("a"),
4008 index: 1,
4009 }
4010 },
4011 ast: Box::new(lit('z', 6)),
4012 }))
4013 );
4014 assert_eq!(
4015 parser("(?P<abc>z)").parse(),
4016 Ok(Ast::group(ast::Group {
4017 span: span(0..10),
4018 kind: ast::GroupKind::CaptureName {
4019 starts_with_p: true,
4020 name: ast::CaptureName {
4021 span: span(4..7),
4022 name: s("abc"),
4023 index: 1,
4024 }
4025 },
4026 ast: Box::new(lit('z', 8)),
4027 }))
4028 );
4029
4030 assert_eq!(
4031 parser("(?P<a_1>z)").parse(),
4032 Ok(Ast::group(ast::Group {
4033 span: span(0..10),
4034 kind: ast::GroupKind::CaptureName {
4035 starts_with_p: true,
4036 name: ast::CaptureName {
4037 span: span(4..7),
4038 name: s("a_1"),
4039 index: 1,
4040 }
4041 },
4042 ast: Box::new(lit('z', 8)),
4043 }))
4044 );
4045
4046 assert_eq!(
4047 parser("(?P<a.1>z)").parse(),
4048 Ok(Ast::group(ast::Group {
4049 span: span(0..10),
4050 kind: ast::GroupKind::CaptureName {
4051 starts_with_p: true,
4052 name: ast::CaptureName {
4053 span: span(4..7),
4054 name: s("a.1"),
4055 index: 1,
4056 }
4057 },
4058 ast: Box::new(lit('z', 8)),
4059 }))
4060 );
4061
4062 assert_eq!(
4063 parser("(?P<a[1]>z)").parse(),
4064 Ok(Ast::group(ast::Group {
4065 span: span(0..11),
4066 kind: ast::GroupKind::CaptureName {
4067 starts_with_p: true,
4068 name: ast::CaptureName {
4069 span: span(4..8),
4070 name: s("a[1]"),
4071 index: 1,
4072 }
4073 },
4074 ast: Box::new(lit('z', 9)),
4075 }))
4076 );
4077
4078 assert_eq!(
4079 parser("(?P<a¾>)").parse(),
4080 Ok(Ast::group(ast::Group {
4081 span: Span::new(
4082 Position::new(0, 1, 1),
4083 Position::new(9, 1, 9),
4084 ),
4085 kind: ast::GroupKind::CaptureName {
4086 starts_with_p: true,
4087 name: ast::CaptureName {
4088 span: Span::new(
4089 Position::new(4, 1, 5),
4090 Position::new(7, 1, 7),
4091 ),
4092 name: s("a¾"),
4093 index: 1,
4094 }
4095 },
4096 ast: Box::new(Ast::empty(Span::new(
4097 Position::new(8, 1, 8),
4098 Position::new(8, 1, 8),
4099 ))),
4100 }))
4101 );
4102 assert_eq!(
4103 parser("(?P<名字>)").parse(),
4104 Ok(Ast::group(ast::Group {
4105 span: Span::new(
4106 Position::new(0, 1, 1),
4107 Position::new(12, 1, 9),
4108 ),
4109 kind: ast::GroupKind::CaptureName {
4110 starts_with_p: true,
4111 name: ast::CaptureName {
4112 span: Span::new(
4113 Position::new(4, 1, 5),
4114 Position::new(10, 1, 7),
4115 ),
4116 name: s("名字"),
4117 index: 1,
4118 }
4119 },
4120 ast: Box::new(Ast::empty(Span::new(
4121 Position::new(11, 1, 8),
4122 Position::new(11, 1, 8),
4123 ))),
4124 }))
4125 );
4126
4127 assert_eq!(
4128 parser("(?P<").parse().unwrap_err(),
4129 TestError {
4130 span: span(4..4),
4131 kind: ast::ErrorKind::GroupNameUnexpectedEof,
4132 }
4133 );
4134 assert_eq!(
4135 parser("(?P<>z)").parse().unwrap_err(),
4136 TestError {
4137 span: span(4..4),
4138 kind: ast::ErrorKind::GroupNameEmpty,
4139 }
4140 );
4141 assert_eq!(
4142 parser("(?P<a").parse().unwrap_err(),
4143 TestError {
4144 span: span(5..5),
4145 kind: ast::ErrorKind::GroupNameUnexpectedEof,
4146 }
4147 );
4148 assert_eq!(
4149 parser("(?P<ab").parse().unwrap_err(),
4150 TestError {
4151 span: span(6..6),
4152 kind: ast::ErrorKind::GroupNameUnexpectedEof,
4153 }
4154 );
4155 assert_eq!(
4156 parser("(?P<0a").parse().unwrap_err(),
4157 TestError {
4158 span: span(4..5),
4159 kind: ast::ErrorKind::GroupNameInvalid,
4160 }
4161 );
4162 assert_eq!(
4163 parser("(?P<~").parse().unwrap_err(),
4164 TestError {
4165 span: span(4..5),
4166 kind: ast::ErrorKind::GroupNameInvalid,
4167 }
4168 );
4169 assert_eq!(
4170 parser("(?P<abc~").parse().unwrap_err(),
4171 TestError {
4172 span: span(7..8),
4173 kind: ast::ErrorKind::GroupNameInvalid,
4174 }
4175 );
4176 assert_eq!(
4177 parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
4178 TestError {
4179 span: span(12..13),
4180 kind: ast::ErrorKind::GroupNameDuplicate {
4181 original: span(4..5),
4182 },
4183 }
4184 );
4185 assert_eq!(
4186 parser("(?P<5>)").parse().unwrap_err(),
4187 TestError {
4188 span: span(4..5),
4189 kind: ast::ErrorKind::GroupNameInvalid,
4190 }
4191 );
4192 assert_eq!(
4193 parser("(?P<5a>)").parse().unwrap_err(),
4194 TestError {
4195 span: span(4..5),
4196 kind: ast::ErrorKind::GroupNameInvalid,
4197 }
4198 );
4199 assert_eq!(
4200 parser("(?P<¾>)").parse().unwrap_err(),
4201 TestError {
4202 span: Span::new(
4203 Position::new(4, 1, 5),
4204 Position::new(6, 1, 6),
4205 ),
4206 kind: ast::ErrorKind::GroupNameInvalid,
4207 }
4208 );
4209 assert_eq!(
4210 parser("(?P<¾a>)").parse().unwrap_err(),
4211 TestError {
4212 span: Span::new(
4213 Position::new(4, 1, 5),
4214 Position::new(6, 1, 6),
4215 ),
4216 kind: ast::ErrorKind::GroupNameInvalid,
4217 }
4218 );
4219 assert_eq!(
4220 parser("(?P<☃>)").parse().unwrap_err(),
4221 TestError {
4222 span: Span::new(
4223 Position::new(4, 1, 5),
4224 Position::new(7, 1, 6),
4225 ),
4226 kind: ast::ErrorKind::GroupNameInvalid,
4227 }
4228 );
4229 assert_eq!(
4230 parser("(?P<a☃>)").parse().unwrap_err(),
4231 TestError {
4232 span: Span::new(
4233 Position::new(5, 1, 6),
4234 Position::new(8, 1, 7),
4235 ),
4236 kind: ast::ErrorKind::GroupNameInvalid,
4237 }
4238 );
4239 }
4240
4241 #[test]
4242 fn parse_flags() {
4243 assert_eq!(
4244 parser("i:").parse_flags(),
4245 Ok(ast::Flags {
4246 span: span(0..1),
4247 items: vec![ast::FlagsItem {
4248 span: span(0..1),
4249 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4250 }],
4251 })
4252 );
4253 assert_eq!(
4254 parser("i)").parse_flags(),
4255 Ok(ast::Flags {
4256 span: span(0..1),
4257 items: vec![ast::FlagsItem {
4258 span: span(0..1),
4259 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4260 }],
4261 })
4262 );
4263
4264 assert_eq!(
4265 parser("isU:").parse_flags(),
4266 Ok(ast::Flags {
4267 span: span(0..3),
4268 items: vec![
4269 ast::FlagsItem {
4270 span: span(0..1),
4271 kind: ast::FlagsItemKind::Flag(
4272 ast::Flag::CaseInsensitive
4273 ),
4274 },
4275 ast::FlagsItem {
4276 span: span(1..2),
4277 kind: ast::FlagsItemKind::Flag(
4278 ast::Flag::DotMatchesNewLine
4279 ),
4280 },
4281 ast::FlagsItem {
4282 span: span(2..3),
4283 kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4284 },
4285 ],
4286 })
4287 );
4288
4289 assert_eq!(
4290 parser("-isU:").parse_flags(),
4291 Ok(ast::Flags {
4292 span: span(0..4),
4293 items: vec![
4294 ast::FlagsItem {
4295 span: span(0..1),
4296 kind: ast::FlagsItemKind::Negation,
4297 },
4298 ast::FlagsItem {
4299 span: span(1..2),
4300 kind: ast::FlagsItemKind::Flag(
4301 ast::Flag::CaseInsensitive
4302 ),
4303 },
4304 ast::FlagsItem {
4305 span: span(2..3),
4306 kind: ast::FlagsItemKind::Flag(
4307 ast::Flag::DotMatchesNewLine
4308 ),
4309 },
4310 ast::FlagsItem {
4311 span: span(3..4),
4312 kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4313 },
4314 ],
4315 })
4316 );
4317 assert_eq!(
4318 parser("i-sU:").parse_flags(),
4319 Ok(ast::Flags {
4320 span: span(0..4),
4321 items: vec![
4322 ast::FlagsItem {
4323 span: span(0..1),
4324 kind: ast::FlagsItemKind::Flag(
4325 ast::Flag::CaseInsensitive
4326 ),
4327 },
4328 ast::FlagsItem {
4329 span: span(1..2),
4330 kind: ast::FlagsItemKind::Negation,
4331 },
4332 ast::FlagsItem {
4333 span: span(2..3),
4334 kind: ast::FlagsItemKind::Flag(
4335 ast::Flag::DotMatchesNewLine
4336 ),
4337 },
4338 ast::FlagsItem {
4339 span: span(3..4),
4340 kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4341 },
4342 ],
4343 })
4344 );
4345 assert_eq!(
4346 parser("i-sR:").parse_flags(),
4347 Ok(ast::Flags {
4348 span: span(0..4),
4349 items: vec![
4350 ast::FlagsItem {
4351 span: span(0..1),
4352 kind: ast::FlagsItemKind::Flag(
4353 ast::Flag::CaseInsensitive
4354 ),
4355 },
4356 ast::FlagsItem {
4357 span: span(1..2),
4358 kind: ast::FlagsItemKind::Negation,
4359 },
4360 ast::FlagsItem {
4361 span: span(2..3),
4362 kind: ast::FlagsItemKind::Flag(
4363 ast::Flag::DotMatchesNewLine
4364 ),
4365 },
4366 ast::FlagsItem {
4367 span: span(3..4),
4368 kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
4369 },
4370 ],
4371 })
4372 );
4373
4374 assert_eq!(
4375 parser("isU").parse_flags().unwrap_err(),
4376 TestError {
4377 span: span(3..3),
4378 kind: ast::ErrorKind::FlagUnexpectedEof,
4379 }
4380 );
4381 assert_eq!(
4382 parser("isUa:").parse_flags().unwrap_err(),
4383 TestError {
4384 span: span(3..4),
4385 kind: ast::ErrorKind::FlagUnrecognized,
4386 }
4387 );
4388 assert_eq!(
4389 parser("isUi:").parse_flags().unwrap_err(),
4390 TestError {
4391 span: span(3..4),
4392 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4393 }
4394 );
4395 assert_eq!(
4396 parser("i-sU-i:").parse_flags().unwrap_err(),
4397 TestError {
4398 span: span(4..5),
4399 kind: ast::ErrorKind::FlagRepeatedNegation {
4400 original: span(1..2),
4401 },
4402 }
4403 );
4404 assert_eq!(
4405 parser("-)").parse_flags().unwrap_err(),
4406 TestError {
4407 span: span(0..1),
4408 kind: ast::ErrorKind::FlagDanglingNegation,
4409 }
4410 );
4411 assert_eq!(
4412 parser("i-)").parse_flags().unwrap_err(),
4413 TestError {
4414 span: span(1..2),
4415 kind: ast::ErrorKind::FlagDanglingNegation,
4416 }
4417 );
4418 assert_eq!(
4419 parser("iU-)").parse_flags().unwrap_err(),
4420 TestError {
4421 span: span(2..3),
4422 kind: ast::ErrorKind::FlagDanglingNegation,
4423 }
4424 );
4425 }
4426
4427 #[test]
4428 fn parse_flag() {
4429 assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4430 assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4431 assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4432 assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4433 assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4434 assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
4435 assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4436
4437 assert_eq!(
4438 parser("a").parse_flag().unwrap_err(),
4439 TestError {
4440 span: span(0..1),
4441 kind: ast::ErrorKind::FlagUnrecognized,
4442 }
4443 );
4444 assert_eq!(
4445 parser("☃").parse_flag().unwrap_err(),
4446 TestError {
4447 span: span_range("☃", 0..3),
4448 kind: ast::ErrorKind::FlagUnrecognized,
4449 }
4450 );
4451 }
4452
4453 #[test]
4454 fn parse_primitive_non_escape() {
4455 assert_eq!(
4456 parser(r".").parse_primitive(),
4457 Ok(Primitive::Dot(span(0..1)))
4458 );
4459 assert_eq!(
4460 parser(r"^").parse_primitive(),
4461 Ok(Primitive::Assertion(ast::Assertion {
4462 span: span(0..1),
4463 kind: ast::AssertionKind::StartLine,
4464 }))
4465 );
4466 assert_eq!(
4467 parser(r"$").parse_primitive(),
4468 Ok(Primitive::Assertion(ast::Assertion {
4469 span: span(0..1),
4470 kind: ast::AssertionKind::EndLine,
4471 }))
4472 );
4473
4474 assert_eq!(
4475 parser(r"a").parse_primitive(),
4476 Ok(Primitive::Literal(ast::Literal {
4477 span: span(0..1),
4478 kind: ast::LiteralKind::Verbatim,
4479 c: 'a',
4480 }))
4481 );
4482 assert_eq!(
4483 parser(r"|").parse_primitive(),
4484 Ok(Primitive::Literal(ast::Literal {
4485 span: span(0..1),
4486 kind: ast::LiteralKind::Verbatim,
4487 c: '|',
4488 }))
4489 );
4490 assert_eq!(
4491 parser(r"☃").parse_primitive(),
4492 Ok(Primitive::Literal(ast::Literal {
4493 span: span_range("☃", 0..3),
4494 kind: ast::LiteralKind::Verbatim,
4495 c: '☃',
4496 }))
4497 );
4498 }
4499
4500 #[test]
4501 fn parse_escape() {
4502 assert_eq!(
4503 parser(r"\|").parse_primitive(),
4504 Ok(Primitive::Literal(ast::Literal {
4505 span: span(0..2),
4506 kind: ast::LiteralKind::Meta,
4507 c: '|',
4508 }))
4509 );
4510 let specials = &[
4511 (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4512 (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4513 (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4514 (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4515 (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4516 (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4517 ];
4518 for &(pat, c, ref kind) in specials {
4519 assert_eq!(
4520 parser(pat).parse_primitive(),
4521 Ok(Primitive::Literal(ast::Literal {
4522 span: span(0..2),
4523 kind: ast::LiteralKind::Special(kind.clone()),
4524 c,
4525 }))
4526 );
4527 }
4528 assert_eq!(
4529 parser(r"\A").parse_primitive(),
4530 Ok(Primitive::Assertion(ast::Assertion {
4531 span: span(0..2),
4532 kind: ast::AssertionKind::StartText,
4533 }))
4534 );
4535 assert_eq!(
4536 parser(r"\z").parse_primitive(),
4537 Ok(Primitive::Assertion(ast::Assertion {
4538 span: span(0..2),
4539 kind: ast::AssertionKind::EndText,
4540 }))
4541 );
4542 assert_eq!(
4543 parser(r"\b").parse_primitive(),
4544 Ok(Primitive::Assertion(ast::Assertion {
4545 span: span(0..2),
4546 kind: ast::AssertionKind::WordBoundary,
4547 }))
4548 );
4549 assert_eq!(
4550 parser(r"\b{start}").parse_primitive(),
4551 Ok(Primitive::Assertion(ast::Assertion {
4552 span: span(0..9),
4553 kind: ast::AssertionKind::WordBoundaryStart,
4554 }))
4555 );
4556 assert_eq!(
4557 parser(r"\b{end}").parse_primitive(),
4558 Ok(Primitive::Assertion(ast::Assertion {
4559 span: span(0..7),
4560 kind: ast::AssertionKind::WordBoundaryEnd,
4561 }))
4562 );
4563 assert_eq!(
4564 parser(r"\b{start-half}").parse_primitive(),
4565 Ok(Primitive::Assertion(ast::Assertion {
4566 span: span(0..14),
4567 kind: ast::AssertionKind::WordBoundaryStartHalf,
4568 }))
4569 );
4570 assert_eq!(
4571 parser(r"\b{end-half}").parse_primitive(),
4572 Ok(Primitive::Assertion(ast::Assertion {
4573 span: span(0..12),
4574 kind: ast::AssertionKind::WordBoundaryEndHalf,
4575 }))
4576 );
4577 assert_eq!(
4578 parser(r"\<").parse_primitive(),
4579 Ok(Primitive::Assertion(ast::Assertion {
4580 span: span(0..2),
4581 kind: ast::AssertionKind::WordBoundaryStartAngle,
4582 }))
4583 );
4584 assert_eq!(
4585 parser(r"\>").parse_primitive(),
4586 Ok(Primitive::Assertion(ast::Assertion {
4587 span: span(0..2),
4588 kind: ast::AssertionKind::WordBoundaryEndAngle,
4589 }))
4590 );
4591 assert_eq!(
4592 parser(r"\B").parse_primitive(),
4593 Ok(Primitive::Assertion(ast::Assertion {
4594 span: span(0..2),
4595 kind: ast::AssertionKind::NotWordBoundary,
4596 }))
4597 );
4598
4599 for c in ['!', '@', '%', '"', '\'', '/', ' '] {
4601 let pat = format!(r"\{}", c);
4602 assert_eq!(
4603 parser(&pat).parse_primitive(),
4604 Ok(Primitive::Literal(ast::Literal {
4605 span: span(0..2),
4606 kind: ast::LiteralKind::Superfluous,
4607 c,
4608 }))
4609 );
4610 }
4611
4612 assert_eq!(
4615 parser(r"\e").parse_escape().unwrap_err(),
4616 TestError {
4617 span: span(0..2),
4618 kind: ast::ErrorKind::EscapeUnrecognized,
4619 }
4620 );
4621 assert_eq!(
4622 parser(r"\y").parse_escape().unwrap_err(),
4623 TestError {
4624 span: span(0..2),
4625 kind: ast::ErrorKind::EscapeUnrecognized,
4626 }
4627 );
4628
4629 assert_eq!(
4634 parser(r"\b{").parse_escape().unwrap_err(),
4635 TestError {
4636 span: span(0..3),
4637 kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4638 }
4639 );
4640 assert_eq!(
4641 parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
4642 TestError {
4643 span: span(0..4),
4644 kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4645 }
4646 );
4647 assert_eq!(
4650 parser(r"\b{ ").parse().unwrap_err(),
4651 TestError {
4652 span: span(2..4),
4653 kind: ast::ErrorKind::RepetitionCountUnclosed,
4654 }
4655 );
4656 assert_eq!(
4660 parser(r"\b{foo").parse_escape().unwrap_err(),
4661 TestError {
4662 span: span(2..6),
4663 kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4664 }
4665 );
4666 assert_eq!(
4669 parser(r"\b{foo!}").parse_escape().unwrap_err(),
4670 TestError {
4671 span: span(2..6),
4672 kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4673 }
4674 );
4675 assert_eq!(
4678 parser(r"\b{foo}").parse_escape().unwrap_err(),
4679 TestError {
4680 span: span(3..6),
4681 kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
4682 }
4683 );
4684
4685 assert_eq!(
4687 parser(r"\").parse_escape().unwrap_err(),
4688 TestError {
4689 span: span(0..1),
4690 kind: ast::ErrorKind::EscapeUnexpectedEof,
4691 }
4692 );
4693 }
4694
4695 #[test]
4696 fn parse_unsupported_backreference() {
4697 assert_eq!(
4698 parser(r"\0").parse_escape().unwrap_err(),
4699 TestError {
4700 span: span(0..2),
4701 kind: ast::ErrorKind::UnsupportedBackreference,
4702 }
4703 );
4704 assert_eq!(
4705 parser(r"\9").parse_escape().unwrap_err(),
4706 TestError {
4707 span: span(0..2),
4708 kind: ast::ErrorKind::UnsupportedBackreference,
4709 }
4710 );
4711 }
4712
4713 #[test]
4714 fn parse_octal() {
4715 for i in 0..511 {
4716 let pat = format!(r"\{:o}", i);
4717 assert_eq!(
4718 parser_octal(&pat).parse_escape(),
4719 Ok(Primitive::Literal(ast::Literal {
4720 span: span(0..pat.len()),
4721 kind: ast::LiteralKind::Octal,
4722 c: char::from_u32(i).unwrap(),
4723 }))
4724 );
4725 }
4726 assert_eq!(
4727 parser_octal(r"\778").parse_escape(),
4728 Ok(Primitive::Literal(ast::Literal {
4729 span: span(0..3),
4730 kind: ast::LiteralKind::Octal,
4731 c: '?',
4732 }))
4733 );
4734 assert_eq!(
4735 parser_octal(r"\7777").parse_escape(),
4736 Ok(Primitive::Literal(ast::Literal {
4737 span: span(0..4),
4738 kind: ast::LiteralKind::Octal,
4739 c: '\u{01FF}',
4740 }))
4741 );
4742 assert_eq!(
4743 parser_octal(r"\778").parse(),
4744 Ok(Ast::concat(ast::Concat {
4745 span: span(0..4),
4746 asts: vec![
4747 Ast::literal(ast::Literal {
4748 span: span(0..3),
4749 kind: ast::LiteralKind::Octal,
4750 c: '?',
4751 }),
4752 Ast::literal(ast::Literal {
4753 span: span(3..4),
4754 kind: ast::LiteralKind::Verbatim,
4755 c: '8',
4756 }),
4757 ],
4758 }))
4759 );
4760 assert_eq!(
4761 parser_octal(r"\7777").parse(),
4762 Ok(Ast::concat(ast::Concat {
4763 span: span(0..5),
4764 asts: vec![
4765 Ast::literal(ast::Literal {
4766 span: span(0..4),
4767 kind: ast::LiteralKind::Octal,
4768 c: '\u{01FF}',
4769 }),
4770 Ast::literal(ast::Literal {
4771 span: span(4..5),
4772 kind: ast::LiteralKind::Verbatim,
4773 c: '7',
4774 }),
4775 ],
4776 }))
4777 );
4778
4779 assert_eq!(
4780 parser_octal(r"\8").parse_escape().unwrap_err(),
4781 TestError {
4782 span: span(0..2),
4783 kind: ast::ErrorKind::EscapeUnrecognized,
4784 }
4785 );
4786 }
4787
4788 #[test]
4789 fn parse_hex_two() {
4790 for i in 0..256 {
4791 let pat = format!(r"\x{:02x}", i);
4792 assert_eq!(
4793 parser(&pat).parse_escape(),
4794 Ok(Primitive::Literal(ast::Literal {
4795 span: span(0..pat.len()),
4796 kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4797 c: char::from_u32(i).unwrap(),
4798 }))
4799 );
4800 }
4801
4802 assert_eq!(
4803 parser(r"\xF").parse_escape().unwrap_err(),
4804 TestError {
4805 span: span(3..3),
4806 kind: ast::ErrorKind::EscapeUnexpectedEof,
4807 }
4808 );
4809 assert_eq!(
4810 parser(r"\xG").parse_escape().unwrap_err(),
4811 TestError {
4812 span: span(2..3),
4813 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4814 }
4815 );
4816 assert_eq!(
4817 parser(r"\xFG").parse_escape().unwrap_err(),
4818 TestError {
4819 span: span(3..4),
4820 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4821 }
4822 );
4823 }
4824
4825 #[test]
4826 fn parse_hex_four() {
4827 for i in 0..65536 {
4828 let c = match char::from_u32(i) {
4829 None => continue,
4830 Some(c) => c,
4831 };
4832 let pat = format!(r"\u{:04x}", i);
4833 assert_eq!(
4834 parser(&pat).parse_escape(),
4835 Ok(Primitive::Literal(ast::Literal {
4836 span: span(0..pat.len()),
4837 kind: ast::LiteralKind::HexFixed(
4838 ast::HexLiteralKind::UnicodeShort
4839 ),
4840 c,
4841 }))
4842 );
4843 }
4844
4845 assert_eq!(
4846 parser(r"\uF").parse_escape().unwrap_err(),
4847 TestError {
4848 span: span(3..3),
4849 kind: ast::ErrorKind::EscapeUnexpectedEof,
4850 }
4851 );
4852 assert_eq!(
4853 parser(r"\uG").parse_escape().unwrap_err(),
4854 TestError {
4855 span: span(2..3),
4856 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4857 }
4858 );
4859 assert_eq!(
4860 parser(r"\uFG").parse_escape().unwrap_err(),
4861 TestError {
4862 span: span(3..4),
4863 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4864 }
4865 );
4866 assert_eq!(
4867 parser(r"\uFFG").parse_escape().unwrap_err(),
4868 TestError {
4869 span: span(4..5),
4870 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4871 }
4872 );
4873 assert_eq!(
4874 parser(r"\uFFFG").parse_escape().unwrap_err(),
4875 TestError {
4876 span: span(5..6),
4877 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4878 }
4879 );
4880 assert_eq!(
4881 parser(r"\uD800").parse_escape().unwrap_err(),
4882 TestError {
4883 span: span(2..6),
4884 kind: ast::ErrorKind::EscapeHexInvalid,
4885 }
4886 );
4887 }
4888
4889 #[test]
4890 fn parse_hex_eight() {
4891 for i in 0..65536 {
4892 let c = match char::from_u32(i) {
4893 None => continue,
4894 Some(c) => c,
4895 };
4896 let pat = format!(r"\U{:08x}", i);
4897 assert_eq!(
4898 parser(&pat).parse_escape(),
4899 Ok(Primitive::Literal(ast::Literal {
4900 span: span(0..pat.len()),
4901 kind: ast::LiteralKind::HexFixed(
4902 ast::HexLiteralKind::UnicodeLong
4903 ),
4904 c,
4905 }))
4906 );
4907 }
4908
4909 assert_eq!(
4910 parser(r"\UF").parse_escape().unwrap_err(),
4911 TestError {
4912 span: span(3..3),
4913 kind: ast::ErrorKind::EscapeUnexpectedEof,
4914 }
4915 );
4916 assert_eq!(
4917 parser(r"\UG").parse_escape().unwrap_err(),
4918 TestError {
4919 span: span(2..3),
4920 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4921 }
4922 );
4923 assert_eq!(
4924 parser(r"\UFG").parse_escape().unwrap_err(),
4925 TestError {
4926 span: span(3..4),
4927 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4928 }
4929 );
4930 assert_eq!(
4931 parser(r"\UFFG").parse_escape().unwrap_err(),
4932 TestError {
4933 span: span(4..5),
4934 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4935 }
4936 );
4937 assert_eq!(
4938 parser(r"\UFFFG").parse_escape().unwrap_err(),
4939 TestError {
4940 span: span(5..6),
4941 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4942 }
4943 );
4944 assert_eq!(
4945 parser(r"\UFFFFG").parse_escape().unwrap_err(),
4946 TestError {
4947 span: span(6..7),
4948 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4949 }
4950 );
4951 assert_eq!(
4952 parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4953 TestError {
4954 span: span(7..8),
4955 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4956 }
4957 );
4958 assert_eq!(
4959 parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4960 TestError {
4961 span: span(8..9),
4962 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4963 }
4964 );
4965 assert_eq!(
4966 parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4967 TestError {
4968 span: span(9..10),
4969 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4970 }
4971 );
4972 }
4973
4974 #[test]
4975 fn parse_hex_brace() {
4976 assert_eq!(
4977 parser(r"\u{26c4}").parse_escape(),
4978 Ok(Primitive::Literal(ast::Literal {
4979 span: span(0..8),
4980 kind: ast::LiteralKind::HexBrace(
4981 ast::HexLiteralKind::UnicodeShort
4982 ),
4983 c: '⛄',
4984 }))
4985 );
4986 assert_eq!(
4987 parser(r"\U{26c4}").parse_escape(),
4988 Ok(Primitive::Literal(ast::Literal {
4989 span: span(0..8),
4990 kind: ast::LiteralKind::HexBrace(
4991 ast::HexLiteralKind::UnicodeLong
4992 ),
4993 c: '⛄',
4994 }))
4995 );
4996 assert_eq!(
4997 parser(r"\x{26c4}").parse_escape(),
4998 Ok(Primitive::Literal(ast::Literal {
4999 span: span(0..8),
5000 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5001 c: '⛄',
5002 }))
5003 );
5004 assert_eq!(
5005 parser(r"\x{26C4}").parse_escape(),
5006 Ok(Primitive::Literal(ast::Literal {
5007 span: span(0..8),
5008 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5009 c: '⛄',
5010 }))
5011 );
5012 assert_eq!(
5013 parser(r"\x{10fFfF}").parse_escape(),
5014 Ok(Primitive::Literal(ast::Literal {
5015 span: span(0..10),
5016 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5017 c: '\u{10FFFF}',
5018 }))
5019 );
5020
5021 assert_eq!(
5022 parser(r"\x").parse_escape().unwrap_err(),
5023 TestError {
5024 span: span(2..2),
5025 kind: ast::ErrorKind::EscapeUnexpectedEof,
5026 }
5027 );
5028 assert_eq!(
5029 parser(r"\x{").parse_escape().unwrap_err(),
5030 TestError {
5031 span: span(2..3),
5032 kind: ast::ErrorKind::EscapeUnexpectedEof,
5033 }
5034 );
5035 assert_eq!(
5036 parser(r"\x{FF").parse_escape().unwrap_err(),
5037 TestError {
5038 span: span(2..5),
5039 kind: ast::ErrorKind::EscapeUnexpectedEof,
5040 }
5041 );
5042 assert_eq!(
5043 parser(r"\x{}").parse_escape().unwrap_err(),
5044 TestError {
5045 span: span(2..4),
5046 kind: ast::ErrorKind::EscapeHexEmpty,
5047 }
5048 );
5049 assert_eq!(
5050 parser(r"\x{FGF}").parse_escape().unwrap_err(),
5051 TestError {
5052 span: span(4..5),
5053 kind: ast::ErrorKind::EscapeHexInvalidDigit,
5054 }
5055 );
5056 assert_eq!(
5057 parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
5058 TestError {
5059 span: span(3..9),
5060 kind: ast::ErrorKind::EscapeHexInvalid,
5061 }
5062 );
5063 assert_eq!(
5064 parser(r"\x{D800}").parse_escape().unwrap_err(),
5065 TestError {
5066 span: span(3..7),
5067 kind: ast::ErrorKind::EscapeHexInvalid,
5068 }
5069 );
5070 assert_eq!(
5071 parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
5072 TestError {
5073 span: span(3..12),
5074 kind: ast::ErrorKind::EscapeHexInvalid,
5075 }
5076 );
5077 }
5078
5079 #[test]
5080 fn parse_decimal() {
5081 assert_eq!(parser("123").parse_decimal(), Ok(123));
5082 assert_eq!(parser("0").parse_decimal(), Ok(0));
5083 assert_eq!(parser("01").parse_decimal(), Ok(1));
5084
5085 assert_eq!(
5086 parser("-1").parse_decimal().unwrap_err(),
5087 TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
5088 );
5089 assert_eq!(
5090 parser("").parse_decimal().unwrap_err(),
5091 TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
5092 );
5093 assert_eq!(
5094 parser("9999999999").parse_decimal().unwrap_err(),
5095 TestError {
5096 span: span(0..10),
5097 kind: ast::ErrorKind::DecimalInvalid,
5098 }
5099 );
5100 }
5101
5102 #[test]
5103 fn parse_set_class() {
5104 fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
5105 ast::ClassSet::union(ast::ClassSetUnion { span, items })
5106 }
5107
5108 fn intersection(
5109 span: Span,
5110 lhs: ast::ClassSet,
5111 rhs: ast::ClassSet,
5112 ) -> ast::ClassSet {
5113 ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5114 span,
5115 kind: ast::ClassSetBinaryOpKind::Intersection,
5116 lhs: Box::new(lhs),
5117 rhs: Box::new(rhs),
5118 })
5119 }
5120
5121 fn difference(
5122 span: Span,
5123 lhs: ast::ClassSet,
5124 rhs: ast::ClassSet,
5125 ) -> ast::ClassSet {
5126 ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5127 span,
5128 kind: ast::ClassSetBinaryOpKind::Difference,
5129 lhs: Box::new(lhs),
5130 rhs: Box::new(rhs),
5131 })
5132 }
5133
5134 fn symdifference(
5135 span: Span,
5136 lhs: ast::ClassSet,
5137 rhs: ast::ClassSet,
5138 ) -> ast::ClassSet {
5139 ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5140 span,
5141 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
5142 lhs: Box::new(lhs),
5143 rhs: Box::new(rhs),
5144 })
5145 }
5146
5147 fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
5148 ast::ClassSet::Item(item)
5149 }
5150
5151 fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
5152 ast::ClassSetItem::Ascii(cls)
5153 }
5154
5155 fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
5156 ast::ClassSetItem::Unicode(cls)
5157 }
5158
5159 fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
5160 ast::ClassSetItem::Perl(cls)
5161 }
5162
5163 fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
5164 ast::ClassSetItem::Bracketed(Box::new(cls))
5165 }
5166
5167 fn lit(span: Span, c: char) -> ast::ClassSetItem {
5168 ast::ClassSetItem::Literal(ast::Literal {
5169 span,
5170 kind: ast::LiteralKind::Verbatim,
5171 c,
5172 })
5173 }
5174
5175 fn empty(span: Span) -> ast::ClassSetItem {
5176 ast::ClassSetItem::Empty(span)
5177 }
5178
5179 fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
5180 let pos1 = Position {
5181 offset: span.start.offset + start.len_utf8(),
5182 column: span.start.column + 1,
5183 ..span.start
5184 };
5185 let pos2 = Position {
5186 offset: span.end.offset - end.len_utf8(),
5187 column: span.end.column - 1,
5188 ..span.end
5189 };
5190 ast::ClassSetItem::Range(ast::ClassSetRange {
5191 span,
5192 start: ast::Literal {
5193 span: Span { end: pos1, ..span },
5194 kind: ast::LiteralKind::Verbatim,
5195 c: start,
5196 },
5197 end: ast::Literal {
5198 span: Span { start: pos2, ..span },
5199 kind: ast::LiteralKind::Verbatim,
5200 c: end,
5201 },
5202 })
5203 }
5204
5205 fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
5206 ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
5207 }
5208
5209 fn lower(span: Span, negated: bool) -> ast::ClassAscii {
5210 ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
5211 }
5212
5213 assert_eq!(
5214 parser("[[:alnum:]]").parse(),
5215 Ok(Ast::class_bracketed(ast::ClassBracketed {
5216 span: span(0..11),
5217 negated: false,
5218 kind: itemset(item_ascii(alnum(span(1..10), false))),
5219 }))
5220 );
5221 assert_eq!(
5222 parser("[[[:alnum:]]]").parse(),
5223 Ok(Ast::class_bracketed(ast::ClassBracketed {
5224 span: span(0..13),
5225 negated: false,
5226 kind: itemset(item_bracket(ast::ClassBracketed {
5227 span: span(1..12),
5228 negated: false,
5229 kind: itemset(item_ascii(alnum(span(2..11), false))),
5230 })),
5231 }))
5232 );
5233 assert_eq!(
5234 parser("[[:alnum:]&&[:lower:]]").parse(),
5235 Ok(Ast::class_bracketed(ast::ClassBracketed {
5236 span: span(0..22),
5237 negated: false,
5238 kind: intersection(
5239 span(1..21),
5240 itemset(item_ascii(alnum(span(1..10), false))),
5241 itemset(item_ascii(lower(span(12..21), false))),
5242 ),
5243 }))
5244 );
5245 assert_eq!(
5246 parser("[[:alnum:]--[:lower:]]").parse(),
5247 Ok(Ast::class_bracketed(ast::ClassBracketed {
5248 span: span(0..22),
5249 negated: false,
5250 kind: difference(
5251 span(1..21),
5252 itemset(item_ascii(alnum(span(1..10), false))),
5253 itemset(item_ascii(lower(span(12..21), false))),
5254 ),
5255 }))
5256 );
5257 assert_eq!(
5258 parser("[[:alnum:]~~[:lower:]]").parse(),
5259 Ok(Ast::class_bracketed(ast::ClassBracketed {
5260 span: span(0..22),
5261 negated: false,
5262 kind: symdifference(
5263 span(1..21),
5264 itemset(item_ascii(alnum(span(1..10), false))),
5265 itemset(item_ascii(lower(span(12..21), false))),
5266 ),
5267 }))
5268 );
5269
5270 assert_eq!(
5271 parser("[a]").parse(),
5272 Ok(Ast::class_bracketed(ast::ClassBracketed {
5273 span: span(0..3),
5274 negated: false,
5275 kind: itemset(lit(span(1..2), 'a')),
5276 }))
5277 );
5278 assert_eq!(
5279 parser(r"[a\]]").parse(),
5280 Ok(Ast::class_bracketed(ast::ClassBracketed {
5281 span: span(0..5),
5282 negated: false,
5283 kind: union(
5284 span(1..4),
5285 vec![
5286 lit(span(1..2), 'a'),
5287 ast::ClassSetItem::Literal(ast::Literal {
5288 span: span(2..4),
5289 kind: ast::LiteralKind::Meta,
5290 c: ']',
5291 }),
5292 ]
5293 ),
5294 }))
5295 );
5296 assert_eq!(
5297 parser(r"[a\-z]").parse(),
5298 Ok(Ast::class_bracketed(ast::ClassBracketed {
5299 span: span(0..6),
5300 negated: false,
5301 kind: union(
5302 span(1..5),
5303 vec![
5304 lit(span(1..2), 'a'),
5305 ast::ClassSetItem::Literal(ast::Literal {
5306 span: span(2..4),
5307 kind: ast::LiteralKind::Meta,
5308 c: '-',
5309 }),
5310 lit(span(4..5), 'z'),
5311 ]
5312 ),
5313 }))
5314 );
5315 assert_eq!(
5316 parser("[ab]").parse(),
5317 Ok(Ast::class_bracketed(ast::ClassBracketed {
5318 span: span(0..4),
5319 negated: false,
5320 kind: union(
5321 span(1..3),
5322 vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
5323 ),
5324 }))
5325 );
5326 assert_eq!(
5327 parser("[a-]").parse(),
5328 Ok(Ast::class_bracketed(ast::ClassBracketed {
5329 span: span(0..4),
5330 negated: false,
5331 kind: union(
5332 span(1..3),
5333 vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
5334 ),
5335 }))
5336 );
5337 assert_eq!(
5338 parser("[-a]").parse(),
5339 Ok(Ast::class_bracketed(ast::ClassBracketed {
5340 span: span(0..4),
5341 negated: false,
5342 kind: union(
5343 span(1..3),
5344 vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
5345 ),
5346 }))
5347 );
5348 assert_eq!(
5349 parser(r"[\pL]").parse(),
5350 Ok(Ast::class_bracketed(ast::ClassBracketed {
5351 span: span(0..5),
5352 negated: false,
5353 kind: itemset(item_unicode(ast::ClassUnicode {
5354 span: span(1..4),
5355 negated: false,
5356 kind: ast::ClassUnicodeKind::OneLetter('L'),
5357 })),
5358 }))
5359 );
5360 assert_eq!(
5361 parser(r"[\w]").parse(),
5362 Ok(Ast::class_bracketed(ast::ClassBracketed {
5363 span: span(0..4),
5364 negated: false,
5365 kind: itemset(item_perl(ast::ClassPerl {
5366 span: span(1..3),
5367 kind: ast::ClassPerlKind::Word,
5368 negated: false,
5369 })),
5370 }))
5371 );
5372 assert_eq!(
5373 parser(r"[a\wz]").parse(),
5374 Ok(Ast::class_bracketed(ast::ClassBracketed {
5375 span: span(0..6),
5376 negated: false,
5377 kind: union(
5378 span(1..5),
5379 vec![
5380 lit(span(1..2), 'a'),
5381 item_perl(ast::ClassPerl {
5382 span: span(2..4),
5383 kind: ast::ClassPerlKind::Word,
5384 negated: false,
5385 }),
5386 lit(span(4..5), 'z'),
5387 ]
5388 ),
5389 }))
5390 );
5391
5392 assert_eq!(
5393 parser("[a-z]").parse(),
5394 Ok(Ast::class_bracketed(ast::ClassBracketed {
5395 span: span(0..5),
5396 negated: false,
5397 kind: itemset(range(span(1..4), 'a', 'z')),
5398 }))
5399 );
5400 assert_eq!(
5401 parser("[a-cx-z]").parse(),
5402 Ok(Ast::class_bracketed(ast::ClassBracketed {
5403 span: span(0..8),
5404 negated: false,
5405 kind: union(
5406 span(1..7),
5407 vec![
5408 range(span(1..4), 'a', 'c'),
5409 range(span(4..7), 'x', 'z'),
5410 ]
5411 ),
5412 }))
5413 );
5414 assert_eq!(
5415 parser(r"[\w&&a-cx-z]").parse(),
5416 Ok(Ast::class_bracketed(ast::ClassBracketed {
5417 span: span(0..12),
5418 negated: false,
5419 kind: intersection(
5420 span(1..11),
5421 itemset(item_perl(ast::ClassPerl {
5422 span: span(1..3),
5423 kind: ast::ClassPerlKind::Word,
5424 negated: false,
5425 })),
5426 union(
5427 span(5..11),
5428 vec![
5429 range(span(5..8), 'a', 'c'),
5430 range(span(8..11), 'x', 'z'),
5431 ]
5432 ),
5433 ),
5434 }))
5435 );
5436 assert_eq!(
5437 parser(r"[a-cx-z&&\w]").parse(),
5438 Ok(Ast::class_bracketed(ast::ClassBracketed {
5439 span: span(0..12),
5440 negated: false,
5441 kind: intersection(
5442 span(1..11),
5443 union(
5444 span(1..7),
5445 vec![
5446 range(span(1..4), 'a', 'c'),
5447 range(span(4..7), 'x', 'z'),
5448 ]
5449 ),
5450 itemset(item_perl(ast::ClassPerl {
5451 span: span(9..11),
5452 kind: ast::ClassPerlKind::Word,
5453 negated: false,
5454 })),
5455 ),
5456 }))
5457 );
5458 assert_eq!(
5459 parser(r"[a--b--c]").parse(),
5460 Ok(Ast::class_bracketed(ast::ClassBracketed {
5461 span: span(0..9),
5462 negated: false,
5463 kind: difference(
5464 span(1..8),
5465 difference(
5466 span(1..5),
5467 itemset(lit(span(1..2), 'a')),
5468 itemset(lit(span(4..5), 'b')),
5469 ),
5470 itemset(lit(span(7..8), 'c')),
5471 ),
5472 }))
5473 );
5474 assert_eq!(
5475 parser(r"[a~~b~~c]").parse(),
5476 Ok(Ast::class_bracketed(ast::ClassBracketed {
5477 span: span(0..9),
5478 negated: false,
5479 kind: symdifference(
5480 span(1..8),
5481 symdifference(
5482 span(1..5),
5483 itemset(lit(span(1..2), 'a')),
5484 itemset(lit(span(4..5), 'b')),
5485 ),
5486 itemset(lit(span(7..8), 'c')),
5487 ),
5488 }))
5489 );
5490 assert_eq!(
5491 parser(r"[\^&&^]").parse(),
5492 Ok(Ast::class_bracketed(ast::ClassBracketed {
5493 span: span(0..7),
5494 negated: false,
5495 kind: intersection(
5496 span(1..6),
5497 itemset(ast::ClassSetItem::Literal(ast::Literal {
5498 span: span(1..3),
5499 kind: ast::LiteralKind::Meta,
5500 c: '^',
5501 })),
5502 itemset(lit(span(5..6), '^')),
5503 ),
5504 }))
5505 );
5506 assert_eq!(
5507 parser(r"[\&&&&]").parse(),
5508 Ok(Ast::class_bracketed(ast::ClassBracketed {
5509 span: span(0..7),
5510 negated: false,
5511 kind: intersection(
5512 span(1..6),
5513 itemset(ast::ClassSetItem::Literal(ast::Literal {
5514 span: span(1..3),
5515 kind: ast::LiteralKind::Meta,
5516 c: '&',
5517 })),
5518 itemset(lit(span(5..6), '&')),
5519 ),
5520 }))
5521 );
5522 assert_eq!(
5523 parser(r"[&&&&]").parse(),
5524 Ok(Ast::class_bracketed(ast::ClassBracketed {
5525 span: span(0..6),
5526 negated: false,
5527 kind: intersection(
5528 span(1..5),
5529 intersection(
5530 span(1..3),
5531 itemset(empty(span(1..1))),
5532 itemset(empty(span(3..3))),
5533 ),
5534 itemset(empty(span(5..5))),
5535 ),
5536 }))
5537 );
5538
5539 let pat = "[☃-⛄]";
5540 assert_eq!(
5541 parser(pat).parse(),
5542 Ok(Ast::class_bracketed(ast::ClassBracketed {
5543 span: span_range(pat, 0..9),
5544 negated: false,
5545 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5546 span: span_range(pat, 1..8),
5547 start: ast::Literal {
5548 span: span_range(pat, 1..4),
5549 kind: ast::LiteralKind::Verbatim,
5550 c: '☃',
5551 },
5552 end: ast::Literal {
5553 span: span_range(pat, 5..8),
5554 kind: ast::LiteralKind::Verbatim,
5555 c: '⛄',
5556 },
5557 })),
5558 }))
5559 );
5560
5561 assert_eq!(
5562 parser(r"[]]").parse(),
5563 Ok(Ast::class_bracketed(ast::ClassBracketed {
5564 span: span(0..3),
5565 negated: false,
5566 kind: itemset(lit(span(1..2), ']')),
5567 }))
5568 );
5569 assert_eq!(
5570 parser(r"[]\[]").parse(),
5571 Ok(Ast::class_bracketed(ast::ClassBracketed {
5572 span: span(0..5),
5573 negated: false,
5574 kind: union(
5575 span(1..4),
5576 vec![
5577 lit(span(1..2), ']'),
5578 ast::ClassSetItem::Literal(ast::Literal {
5579 span: span(2..4),
5580 kind: ast::LiteralKind::Meta,
5581 c: '[',
5582 }),
5583 ]
5584 ),
5585 }))
5586 );
5587 assert_eq!(
5588 parser(r"[\[]]").parse(),
5589 Ok(concat(
5590 0..5,
5591 vec![
5592 Ast::class_bracketed(ast::ClassBracketed {
5593 span: span(0..4),
5594 negated: false,
5595 kind: itemset(ast::ClassSetItem::Literal(
5596 ast::Literal {
5597 span: span(1..3),
5598 kind: ast::LiteralKind::Meta,
5599 c: '[',
5600 }
5601 )),
5602 }),
5603 Ast::literal(ast::Literal {
5604 span: span(4..5),
5605 kind: ast::LiteralKind::Verbatim,
5606 c: ']',
5607 }),
5608 ]
5609 ))
5610 );
5611
5612 assert_eq!(
5613 parser("[").parse().unwrap_err(),
5614 TestError {
5615 span: span(0..1),
5616 kind: ast::ErrorKind::ClassUnclosed,
5617 }
5618 );
5619 assert_eq!(
5620 parser("[[").parse().unwrap_err(),
5621 TestError {
5622 span: span(1..2),
5623 kind: ast::ErrorKind::ClassUnclosed,
5624 }
5625 );
5626 assert_eq!(
5627 parser("[[-]").parse().unwrap_err(),
5628 TestError {
5629 span: span(0..1),
5630 kind: ast::ErrorKind::ClassUnclosed,
5631 }
5632 );
5633 assert_eq!(
5634 parser("[[[:alnum:]").parse().unwrap_err(),
5635 TestError {
5636 span: span(1..2),
5637 kind: ast::ErrorKind::ClassUnclosed,
5638 }
5639 );
5640 assert_eq!(
5641 parser(r"[\b]").parse().unwrap_err(),
5642 TestError {
5643 span: span(1..3),
5644 kind: ast::ErrorKind::ClassEscapeInvalid,
5645 }
5646 );
5647 assert_eq!(
5648 parser(r"[\w-a]").parse().unwrap_err(),
5649 TestError {
5650 span: span(1..3),
5651 kind: ast::ErrorKind::ClassRangeLiteral,
5652 }
5653 );
5654 assert_eq!(
5655 parser(r"[a-\w]").parse().unwrap_err(),
5656 TestError {
5657 span: span(3..5),
5658 kind: ast::ErrorKind::ClassRangeLiteral,
5659 }
5660 );
5661 assert_eq!(
5662 parser(r"[z-a]").parse().unwrap_err(),
5663 TestError {
5664 span: span(1..4),
5665 kind: ast::ErrorKind::ClassRangeInvalid,
5666 }
5667 );
5668
5669 assert_eq!(
5670 parser_ignore_whitespace("[a ").parse().unwrap_err(),
5671 TestError {
5672 span: span(0..1),
5673 kind: ast::ErrorKind::ClassUnclosed,
5674 }
5675 );
5676 assert_eq!(
5677 parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5678 TestError {
5679 span: span(0..1),
5680 kind: ast::ErrorKind::ClassUnclosed,
5681 }
5682 );
5683 }
5684
5685 #[test]
5686 fn parse_set_class_open() {
5687 assert_eq!(parser("[a]").parse_set_class_open(), {
5688 let set = ast::ClassBracketed {
5689 span: span(0..1),
5690 negated: false,
5691 kind: ast::ClassSet::union(ast::ClassSetUnion {
5692 span: span(1..1),
5693 items: vec![],
5694 }),
5695 };
5696 let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5697 Ok((set, union))
5698 });
5699 assert_eq!(
5700 parser_ignore_whitespace("[ a]").parse_set_class_open(),
5701 {
5702 let set = ast::ClassBracketed {
5703 span: span(0..4),
5704 negated: false,
5705 kind: ast::ClassSet::union(ast::ClassSetUnion {
5706 span: span(4..4),
5707 items: vec![],
5708 }),
5709 };
5710 let union =
5711 ast::ClassSetUnion { span: span(4..4), items: vec![] };
5712 Ok((set, union))
5713 }
5714 );
5715 assert_eq!(parser("[^a]").parse_set_class_open(), {
5716 let set = ast::ClassBracketed {
5717 span: span(0..2),
5718 negated: true,
5719 kind: ast::ClassSet::union(ast::ClassSetUnion {
5720 span: span(2..2),
5721 items: vec![],
5722 }),
5723 };
5724 let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5725 Ok((set, union))
5726 });
5727 assert_eq!(
5728 parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5729 {
5730 let set = ast::ClassBracketed {
5731 span: span(0..4),
5732 negated: true,
5733 kind: ast::ClassSet::union(ast::ClassSetUnion {
5734 span: span(4..4),
5735 items: vec![],
5736 }),
5737 };
5738 let union =
5739 ast::ClassSetUnion { span: span(4..4), items: vec![] };
5740 Ok((set, union))
5741 }
5742 );
5743 assert_eq!(parser("[-a]").parse_set_class_open(), {
5744 let set = ast::ClassBracketed {
5745 span: span(0..2),
5746 negated: false,
5747 kind: ast::ClassSet::union(ast::ClassSetUnion {
5748 span: span(1..1),
5749 items: vec![],
5750 }),
5751 };
5752 let union = ast::ClassSetUnion {
5753 span: span(1..2),
5754 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5755 span: span(1..2),
5756 kind: ast::LiteralKind::Verbatim,
5757 c: '-',
5758 })],
5759 };
5760 Ok((set, union))
5761 });
5762 assert_eq!(
5763 parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5764 {
5765 let set = ast::ClassBracketed {
5766 span: span(0..4),
5767 negated: false,
5768 kind: ast::ClassSet::union(ast::ClassSetUnion {
5769 span: span(2..2),
5770 items: vec![],
5771 }),
5772 };
5773 let union = ast::ClassSetUnion {
5774 span: span(2..3),
5775 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5776 span: span(2..3),
5777 kind: ast::LiteralKind::Verbatim,
5778 c: '-',
5779 })],
5780 };
5781 Ok((set, union))
5782 }
5783 );
5784 assert_eq!(parser("[^-a]").parse_set_class_open(), {
5785 let set = ast::ClassBracketed {
5786 span: span(0..3),
5787 negated: true,
5788 kind: ast::ClassSet::union(ast::ClassSetUnion {
5789 span: span(2..2),
5790 items: vec![],
5791 }),
5792 };
5793 let union = ast::ClassSetUnion {
5794 span: span(2..3),
5795 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5796 span: span(2..3),
5797 kind: ast::LiteralKind::Verbatim,
5798 c: '-',
5799 })],
5800 };
5801 Ok((set, union))
5802 });
5803 assert_eq!(parser("[--a]").parse_set_class_open(), {
5804 let set = ast::ClassBracketed {
5805 span: span(0..3),
5806 negated: false,
5807 kind: ast::ClassSet::union(ast::ClassSetUnion {
5808 span: span(1..1),
5809 items: vec![],
5810 }),
5811 };
5812 let union = ast::ClassSetUnion {
5813 span: span(1..3),
5814 items: vec![
5815 ast::ClassSetItem::Literal(ast::Literal {
5816 span: span(1..2),
5817 kind: ast::LiteralKind::Verbatim,
5818 c: '-',
5819 }),
5820 ast::ClassSetItem::Literal(ast::Literal {
5821 span: span(2..3),
5822 kind: ast::LiteralKind::Verbatim,
5823 c: '-',
5824 }),
5825 ],
5826 };
5827 Ok((set, union))
5828 });
5829 assert_eq!(parser("[]a]").parse_set_class_open(), {
5830 let set = ast::ClassBracketed {
5831 span: span(0..2),
5832 negated: false,
5833 kind: ast::ClassSet::union(ast::ClassSetUnion {
5834 span: span(1..1),
5835 items: vec![],
5836 }),
5837 };
5838 let union = ast::ClassSetUnion {
5839 span: span(1..2),
5840 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5841 span: span(1..2),
5842 kind: ast::LiteralKind::Verbatim,
5843 c: ']',
5844 })],
5845 };
5846 Ok((set, union))
5847 });
5848 assert_eq!(
5849 parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5850 {
5851 let set = ast::ClassBracketed {
5852 span: span(0..4),
5853 negated: false,
5854 kind: ast::ClassSet::union(ast::ClassSetUnion {
5855 span: span(2..2),
5856 items: vec![],
5857 }),
5858 };
5859 let union = ast::ClassSetUnion {
5860 span: span(2..3),
5861 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5862 span: span(2..3),
5863 kind: ast::LiteralKind::Verbatim,
5864 c: ']',
5865 })],
5866 };
5867 Ok((set, union))
5868 }
5869 );
5870 assert_eq!(parser("[^]a]").parse_set_class_open(), {
5871 let set = ast::ClassBracketed {
5872 span: span(0..3),
5873 negated: true,
5874 kind: ast::ClassSet::union(ast::ClassSetUnion {
5875 span: span(2..2),
5876 items: vec![],
5877 }),
5878 };
5879 let union = ast::ClassSetUnion {
5880 span: span(2..3),
5881 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5882 span: span(2..3),
5883 kind: ast::LiteralKind::Verbatim,
5884 c: ']',
5885 })],
5886 };
5887 Ok((set, union))
5888 });
5889 assert_eq!(parser("[-]a]").parse_set_class_open(), {
5890 let set = ast::ClassBracketed {
5891 span: span(0..2),
5892 negated: false,
5893 kind: ast::ClassSet::union(ast::ClassSetUnion {
5894 span: span(1..1),
5895 items: vec![],
5896 }),
5897 };
5898 let union = ast::ClassSetUnion {
5899 span: span(1..2),
5900 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5901 span: span(1..2),
5902 kind: ast::LiteralKind::Verbatim,
5903 c: '-',
5904 })],
5905 };
5906 Ok((set, union))
5907 });
5908
5909 assert_eq!(
5910 parser("[").parse_set_class_open().unwrap_err(),
5911 TestError {
5912 span: span(0..1),
5913 kind: ast::ErrorKind::ClassUnclosed,
5914 }
5915 );
5916 assert_eq!(
5917 parser_ignore_whitespace("[ ")
5918 .parse_set_class_open()
5919 .unwrap_err(),
5920 TestError {
5921 span: span(0..5),
5922 kind: ast::ErrorKind::ClassUnclosed,
5923 }
5924 );
5925 assert_eq!(
5926 parser("[^").parse_set_class_open().unwrap_err(),
5927 TestError {
5928 span: span(0..2),
5929 kind: ast::ErrorKind::ClassUnclosed,
5930 }
5931 );
5932 assert_eq!(
5933 parser("[]").parse_set_class_open().unwrap_err(),
5934 TestError {
5935 span: span(0..2),
5936 kind: ast::ErrorKind::ClassUnclosed,
5937 }
5938 );
5939 assert_eq!(
5940 parser("[-").parse_set_class_open().unwrap_err(),
5941 TestError {
5942 span: span(0..0),
5943 kind: ast::ErrorKind::ClassUnclosed,
5944 }
5945 );
5946 assert_eq!(
5947 parser("[--").parse_set_class_open().unwrap_err(),
5948 TestError {
5949 span: span(0..0),
5950 kind: ast::ErrorKind::ClassUnclosed,
5951 }
5952 );
5953
5954 assert_eq!(
5956 parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5957 TestError {
5958 span: span(4..4),
5959 kind: ast::ErrorKind::ClassUnclosed,
5960 }
5961 );
5962 }
5963
5964 #[test]
5965 fn maybe_parse_ascii_class() {
5966 assert_eq!(
5967 parser(r"[:alnum:]").maybe_parse_ascii_class(),
5968 Some(ast::ClassAscii {
5969 span: span(0..9),
5970 kind: ast::ClassAsciiKind::Alnum,
5971 negated: false,
5972 })
5973 );
5974 assert_eq!(
5975 parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5976 Some(ast::ClassAscii {
5977 span: span(0..9),
5978 kind: ast::ClassAsciiKind::Alnum,
5979 negated: false,
5980 })
5981 );
5982 assert_eq!(
5983 parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5984 Some(ast::ClassAscii {
5985 span: span(0..10),
5986 kind: ast::ClassAsciiKind::Alnum,
5987 negated: true,
5988 })
5989 );
5990
5991 let p = parser(r"[:");
5992 assert_eq!(p.maybe_parse_ascii_class(), None);
5993 assert_eq!(p.offset(), 0);
5994
5995 let p = parser(r"[:^");
5996 assert_eq!(p.maybe_parse_ascii_class(), None);
5997 assert_eq!(p.offset(), 0);
5998
5999 let p = parser(r"[^:alnum:]");
6000 assert_eq!(p.maybe_parse_ascii_class(), None);
6001 assert_eq!(p.offset(), 0);
6002
6003 let p = parser(r"[:alnnum:]");
6004 assert_eq!(p.maybe_parse_ascii_class(), None);
6005 assert_eq!(p.offset(), 0);
6006
6007 let p = parser(r"[:alnum]");
6008 assert_eq!(p.maybe_parse_ascii_class(), None);
6009 assert_eq!(p.offset(), 0);
6010
6011 let p = parser(r"[:alnum:");
6012 assert_eq!(p.maybe_parse_ascii_class(), None);
6013 assert_eq!(p.offset(), 0);
6014 }
6015
6016 #[test]
6017 fn parse_unicode_class() {
6018 assert_eq!(
6019 parser(r"\pN").parse_escape(),
6020 Ok(Primitive::Unicode(ast::ClassUnicode {
6021 span: span(0..3),
6022 negated: false,
6023 kind: ast::ClassUnicodeKind::OneLetter('N'),
6024 }))
6025 );
6026 assert_eq!(
6027 parser(r"\PN").parse_escape(),
6028 Ok(Primitive::Unicode(ast::ClassUnicode {
6029 span: span(0..3),
6030 negated: true,
6031 kind: ast::ClassUnicodeKind::OneLetter('N'),
6032 }))
6033 );
6034 assert_eq!(
6035 parser(r"\p{N}").parse_escape(),
6036 Ok(Primitive::Unicode(ast::ClassUnicode {
6037 span: span(0..5),
6038 negated: false,
6039 kind: ast::ClassUnicodeKind::Named(s("N")),
6040 }))
6041 );
6042 assert_eq!(
6043 parser(r"\P{N}").parse_escape(),
6044 Ok(Primitive::Unicode(ast::ClassUnicode {
6045 span: span(0..5),
6046 negated: true,
6047 kind: ast::ClassUnicodeKind::Named(s("N")),
6048 }))
6049 );
6050 assert_eq!(
6051 parser(r"\p{Greek}").parse_escape(),
6052 Ok(Primitive::Unicode(ast::ClassUnicode {
6053 span: span(0..9),
6054 negated: false,
6055 kind: ast::ClassUnicodeKind::Named(s("Greek")),
6056 }))
6057 );
6058
6059 assert_eq!(
6060 parser(r"\p{scx:Katakana}").parse_escape(),
6061 Ok(Primitive::Unicode(ast::ClassUnicode {
6062 span: span(0..16),
6063 negated: false,
6064 kind: ast::ClassUnicodeKind::NamedValue {
6065 op: ast::ClassUnicodeOpKind::Colon,
6066 name: s("scx"),
6067 value: s("Katakana"),
6068 },
6069 }))
6070 );
6071 assert_eq!(
6072 parser(r"\p{scx=Katakana}").parse_escape(),
6073 Ok(Primitive::Unicode(ast::ClassUnicode {
6074 span: span(0..16),
6075 negated: false,
6076 kind: ast::ClassUnicodeKind::NamedValue {
6077 op: ast::ClassUnicodeOpKind::Equal,
6078 name: s("scx"),
6079 value: s("Katakana"),
6080 },
6081 }))
6082 );
6083 assert_eq!(
6084 parser(r"\p{scx!=Katakana}").parse_escape(),
6085 Ok(Primitive::Unicode(ast::ClassUnicode {
6086 span: span(0..17),
6087 negated: false,
6088 kind: ast::ClassUnicodeKind::NamedValue {
6089 op: ast::ClassUnicodeOpKind::NotEqual,
6090 name: s("scx"),
6091 value: s("Katakana"),
6092 },
6093 }))
6094 );
6095
6096 assert_eq!(
6097 parser(r"\p{:}").parse_escape(),
6098 Ok(Primitive::Unicode(ast::ClassUnicode {
6099 span: span(0..5),
6100 negated: false,
6101 kind: ast::ClassUnicodeKind::NamedValue {
6102 op: ast::ClassUnicodeOpKind::Colon,
6103 name: s(""),
6104 value: s(""),
6105 },
6106 }))
6107 );
6108 assert_eq!(
6109 parser(r"\p{=}").parse_escape(),
6110 Ok(Primitive::Unicode(ast::ClassUnicode {
6111 span: span(0..5),
6112 negated: false,
6113 kind: ast::ClassUnicodeKind::NamedValue {
6114 op: ast::ClassUnicodeOpKind::Equal,
6115 name: s(""),
6116 value: s(""),
6117 },
6118 }))
6119 );
6120 assert_eq!(
6121 parser(r"\p{!=}").parse_escape(),
6122 Ok(Primitive::Unicode(ast::ClassUnicode {
6123 span: span(0..6),
6124 negated: false,
6125 kind: ast::ClassUnicodeKind::NamedValue {
6126 op: ast::ClassUnicodeOpKind::NotEqual,
6127 name: s(""),
6128 value: s(""),
6129 },
6130 }))
6131 );
6132
6133 assert_eq!(
6134 parser(r"\p").parse_escape().unwrap_err(),
6135 TestError {
6136 span: span(2..2),
6137 kind: ast::ErrorKind::EscapeUnexpectedEof,
6138 }
6139 );
6140 assert_eq!(
6141 parser(r"\p{").parse_escape().unwrap_err(),
6142 TestError {
6143 span: span(3..3),
6144 kind: ast::ErrorKind::EscapeUnexpectedEof,
6145 }
6146 );
6147 assert_eq!(
6148 parser(r"\p{N").parse_escape().unwrap_err(),
6149 TestError {
6150 span: span(4..4),
6151 kind: ast::ErrorKind::EscapeUnexpectedEof,
6152 }
6153 );
6154 assert_eq!(
6155 parser(r"\p{Greek").parse_escape().unwrap_err(),
6156 TestError {
6157 span: span(8..8),
6158 kind: ast::ErrorKind::EscapeUnexpectedEof,
6159 }
6160 );
6161
6162 assert_eq!(
6163 parser(r"\pNz").parse(),
6164 Ok(Ast::concat(ast::Concat {
6165 span: span(0..4),
6166 asts: vec![
6167 Ast::class_unicode(ast::ClassUnicode {
6168 span: span(0..3),
6169 negated: false,
6170 kind: ast::ClassUnicodeKind::OneLetter('N'),
6171 }),
6172 Ast::literal(ast::Literal {
6173 span: span(3..4),
6174 kind: ast::LiteralKind::Verbatim,
6175 c: 'z',
6176 }),
6177 ],
6178 }))
6179 );
6180 assert_eq!(
6181 parser(r"\p{Greek}z").parse(),
6182 Ok(Ast::concat(ast::Concat {
6183 span: span(0..10),
6184 asts: vec![
6185 Ast::class_unicode(ast::ClassUnicode {
6186 span: span(0..9),
6187 negated: false,
6188 kind: ast::ClassUnicodeKind::Named(s("Greek")),
6189 }),
6190 Ast::literal(ast::Literal {
6191 span: span(9..10),
6192 kind: ast::LiteralKind::Verbatim,
6193 c: 'z',
6194 }),
6195 ],
6196 }))
6197 );
6198 assert_eq!(
6199 parser(r"\p\{").parse().unwrap_err(),
6200 TestError {
6201 span: span(2..3),
6202 kind: ast::ErrorKind::UnicodeClassInvalid,
6203 }
6204 );
6205 assert_eq!(
6206 parser(r"\P\{").parse().unwrap_err(),
6207 TestError {
6208 span: span(2..3),
6209 kind: ast::ErrorKind::UnicodeClassInvalid,
6210 }
6211 );
6212 }
6213
6214 #[test]
6215 fn parse_perl_class() {
6216 assert_eq!(
6217 parser(r"\d").parse_escape(),
6218 Ok(Primitive::Perl(ast::ClassPerl {
6219 span: span(0..2),
6220 kind: ast::ClassPerlKind::Digit,
6221 negated: false,
6222 }))
6223 );
6224 assert_eq!(
6225 parser(r"\D").parse_escape(),
6226 Ok(Primitive::Perl(ast::ClassPerl {
6227 span: span(0..2),
6228 kind: ast::ClassPerlKind::Digit,
6229 negated: true,
6230 }))
6231 );
6232 assert_eq!(
6233 parser(r"\s").parse_escape(),
6234 Ok(Primitive::Perl(ast::ClassPerl {
6235 span: span(0..2),
6236 kind: ast::ClassPerlKind::Space,
6237 negated: false,
6238 }))
6239 );
6240 assert_eq!(
6241 parser(r"\S").parse_escape(),
6242 Ok(Primitive::Perl(ast::ClassPerl {
6243 span: span(0..2),
6244 kind: ast::ClassPerlKind::Space,
6245 negated: true,
6246 }))
6247 );
6248 assert_eq!(
6249 parser(r"\w").parse_escape(),
6250 Ok(Primitive::Perl(ast::ClassPerl {
6251 span: span(0..2),
6252 kind: ast::ClassPerlKind::Word,
6253 negated: false,
6254 }))
6255 );
6256 assert_eq!(
6257 parser(r"\W").parse_escape(),
6258 Ok(Primitive::Perl(ast::ClassPerl {
6259 span: span(0..2),
6260 kind: ast::ClassPerlKind::Word,
6261 negated: true,
6262 }))
6263 );
6264
6265 assert_eq!(
6266 parser(r"\d").parse(),
6267 Ok(Ast::class_perl(ast::ClassPerl {
6268 span: span(0..2),
6269 kind: ast::ClassPerlKind::Digit,
6270 negated: false,
6271 }))
6272 );
6273 assert_eq!(
6274 parser(r"\dz").parse(),
6275 Ok(Ast::concat(ast::Concat {
6276 span: span(0..3),
6277 asts: vec![
6278 Ast::class_perl(ast::ClassPerl {
6279 span: span(0..2),
6280 kind: ast::ClassPerlKind::Digit,
6281 negated: false,
6282 }),
6283 Ast::literal(ast::Literal {
6284 span: span(2..3),
6285 kind: ast::LiteralKind::Verbatim,
6286 c: 'z',
6287 }),
6288 ],
6289 }))
6290 );
6291 }
6292
6293 #[test]
6297 fn regression_454_nest_too_big() {
6298 let pattern = r#"
6299 2(?:
6300 [45]\d{3}|
6301 7(?:
6302 1[0-267]|
6303 2[0-289]|
6304 3[0-29]|
6305 4[01]|
6306 5[1-3]|
6307 6[013]|
6308 7[0178]|
6309 91
6310 )|
6311 8(?:
6312 0[125]|
6313 [139][1-6]|
6314 2[0157-9]|
6315 41|
6316 6[1-35]|
6317 7[1-5]|
6318 8[1-8]|
6319 90
6320 )|
6321 9(?:
6322 0[0-2]|
6323 1[0-4]|
6324 2[568]|
6325 3[3-6]|
6326 5[5-7]|
6327 6[0167]|
6328 7[15]|
6329 8[0146-9]
6330 )
6331 )\d{4}
6332 "#;
6333 assert!(parser_nest_limit(pattern, 50).parse().is_ok());
6334 }
6335
6336 #[test]
6340 fn regression_455_trailing_dash_ignore_whitespace() {
6341 assert!(parser("(?x)[ / - ]").parse().is_ok());
6342 assert!(parser("(?x)[ a - ]").parse().is_ok());
6343 assert!(parser(
6344 "(?x)[
6345 a
6346 - ]
6347 "
6348 )
6349 .parse()
6350 .is_ok());
6351 assert!(parser(
6352 "(?x)[
6353 a # wat
6354 - ]
6355 "
6356 )
6357 .parse()
6358 .is_ok());
6359
6360 assert!(parser("(?x)[ / -").parse().is_err());
6361 assert!(parser("(?x)[ / - ").parse().is_err());
6362 assert!(parser(
6363 "(?x)[
6364 / -
6365 "
6366 )
6367 .parse()
6368 .is_err());
6369 assert!(parser(
6370 "(?x)[
6371 / - # wat
6372 "
6373 )
6374 .parse()
6375 .is_err());
6376 }
6377}