1use std::{cell::RefCell, char, iter::FusedIterator, mem::transmute, rc::Rc};
4
5use either::Either::{Left, Right};
6use smallvec::{smallvec, SmallVec};
7use swc_atoms::{Atom, AtomStoreCell};
8use swc_common::{
9 comments::Comments,
10 input::{Input, StringInput},
11 BytePos, Span,
12};
13use swc_ecma_ast::{op, AssignOp, EsVersion, Ident};
14
15pub use self::state::{TokenContext, TokenContexts};
16use self::{
17 comments_buffer::CommentsBuffer,
18 state::State,
19 table::{ByteHandler, BYTE_HANDLERS},
20 util::*,
21};
22use crate::{
23 error::{Error, SyntaxError},
24 token::{BinOpToken, IdentLike, Token, Word},
25 Context, Syntax,
26};
27
28mod comments_buffer;
29mod jsx;
30mod number;
31mod state;
32mod table;
33#[cfg(test)]
34mod tests;
35pub mod util;
36mod whitespace;
37
38pub(crate) type LexResult<T> = Result<T, Error>;
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
41pub(crate) struct Char(u32);
42
43impl From<char> for Char {
44 fn from(c: char) -> Self {
45 Char(c as u32)
46 }
47}
48
49impl From<u32> for Char {
50 fn from(c: u32) -> Self {
51 Char(c)
52 }
53}
54
55pub(crate) struct CharIter(SmallVec<[char; 7]>);
56
57impl IntoIterator for Char {
59 type IntoIter = CharIter;
60 type Item = char;
61
62 #[allow(unsafe_code)]
63 fn into_iter(self) -> Self::IntoIter {
64 CharIter(match char::from_u32(self.0) {
70 Some(c) => smallvec![c],
71 None => {
72 let mut buf = smallvec![];
73
74 let high = self.0 & 0xffff0000 >> 16;
75
76 let low = self.0 & 0x0000ffff;
77
78 if !(0xdc00..=0xdfff).contains(&low) {
81 buf.push('\\');
82 buf.push('u');
83 buf.extend(format!("{high:x}").chars());
84 buf.push('\\');
85 buf.push('u');
86 buf.extend(format!("{low:x}").chars());
87 } else {
88 let astral_code_point = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
90
91 buf.push('\\');
92 buf.push('u');
93 buf.extend(format!("{astral_code_point:x}").chars());
94 }
95
96 buf
97 }
98 })
99 }
100}
101
102impl Iterator for CharIter {
103 type Item = char;
104
105 fn next(&mut self) -> Option<Self::Item> {
106 if self.0.is_empty() {
107 None
108 } else {
109 Some(self.0.remove(0))
110 }
111 }
112}
113
114impl FusedIterator for CharIter {}
115
116#[derive(Clone)]
117pub struct Lexer<'a> {
118 comments: Option<&'a dyn Comments>,
119 comments_buffer: Option<CommentsBuffer>,
121
122 pub(crate) ctx: Context,
123 input: StringInput<'a>,
124 start_pos: BytePos,
125
126 state: State,
127 pub(crate) syntax: Syntax,
128 pub(crate) target: EsVersion,
129
130 errors: Rc<RefCell<Vec<Error>>>,
131 module_errors: Rc<RefCell<Vec<Error>>>,
132
133 buf: Rc<RefCell<String>>,
134
135 atoms: Rc<AtomStoreCell>,
136}
137
138impl FusedIterator for Lexer<'_> {}
139
140impl<'a> Lexer<'a> {
141 pub fn new(
142 syntax: Syntax,
143 target: EsVersion,
144 input: StringInput<'a>,
145 comments: Option<&'a dyn Comments>,
146 ) -> Self {
147 let start_pos = input.last_pos();
148
149 Lexer {
150 comments,
151 comments_buffer: comments.is_some().then(CommentsBuffer::new),
152 ctx: Default::default(),
153 input,
154 start_pos,
155 state: State::new(syntax, start_pos),
156 syntax,
157 target,
158 errors: Default::default(),
159 module_errors: Default::default(),
160 buf: Rc::new(RefCell::new(String::with_capacity(256))),
161 atoms: Default::default(),
162 }
163 }
164
165 fn with_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
167 where
168 F: for<'any> FnOnce(&mut Lexer<'any>, &mut String) -> LexResult<Ret>,
169 {
170 let b = self.buf.clone();
171 let mut buf = b.borrow_mut();
172 buf.clear();
173
174 op(self, &mut buf)
175 }
176
177 fn read_token(&mut self) -> LexResult<Option<Token>> {
179 let byte = match self.input.as_str().as_bytes().first() {
180 Some(&v) => v,
181 None => return Ok(None),
182 };
183
184 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
185
186 match handler {
187 Some(handler) => handler(self),
188 None => {
189 let start = self.cur_pos();
190 self.input.bump_bytes(1);
191 self.error_span(
192 pos_span(start),
193 SyntaxError::UnexpectedChar { c: byte as _ },
194 )
195 }
196 }
197 }
198
199 fn read_token_number_sign(&mut self) -> LexResult<Option<Token>> {
201 debug_assert!(self.cur().is_some());
202
203 unsafe {
204 self.input.bump(); }
207
208 debug_assert!(
211 !self.input.is_at_start() || self.cur() != Some('!'),
212 "#! should have already been handled by read_shebang()"
213 );
214 Ok(Some(Token::Hash))
215 }
216
217 #[inline(never)]
221 fn read_token_dot(&mut self) -> LexResult<Token> {
222 let next = match self.input.peek() {
224 Some(next) => next,
225 None => {
226 unsafe {
227 self.input.bump();
229 }
230 return Ok(tok!('.'));
231 }
232 };
233 if next.is_ascii_digit() {
234 return self.read_number(true).map(|v| match v {
235 Left((value, raw)) => Token::Num { value, raw },
236 Right((value, raw)) => Token::BigInt { value, raw },
237 });
238 }
239
240 unsafe {
241 self.input.bump();
244 }
245
246 if next == '.' && self.input.peek() == Some('.') {
247 unsafe {
248 self.input.bump(); self.input.bump(); }
253
254 return Ok(tok!("..."));
255 }
256
257 Ok(tok!('.'))
258 }
259
260 #[inline(never)]
264 fn read_token_question_mark(&mut self) -> LexResult<Token> {
265 match self.input.peek() {
266 Some('?') => {
267 unsafe {
268 self.input.bump();
270 self.input.bump();
271 }
272 if self.input.cur() == Some('=') {
273 unsafe {
274 self.input.bump();
276 }
277
278 return Ok(tok!("??="));
279 }
280 Ok(tok!("??"))
281 }
282 _ => {
283 unsafe {
284 self.input.bump();
286 }
287 Ok(tok!('?'))
288 }
289 }
290 }
291
292 #[inline(never)]
296 fn read_token_colon(&mut self) -> LexResult<Token> {
297 unsafe {
298 self.input.bump();
300 }
301 Ok(tok!(':'))
302 }
303
304 #[inline(never)]
308 fn read_token_zero(&mut self) -> LexResult<Token> {
309 let next = self.input.peek();
310
311 let bigint = match next {
312 Some('x') | Some('X') => self.read_radix_number::<16>(),
313 Some('o') | Some('O') => self.read_radix_number::<8>(),
314 Some('b') | Some('B') => self.read_radix_number::<2>(),
315 _ => {
316 return self.read_number(false).map(|v| match v {
317 Left((value, raw)) => Token::Num { value, raw },
318 Right((value, raw)) => Token::BigInt { value, raw },
319 });
320 }
321 };
322
323 bigint.map(|v| match v {
324 Left((value, raw)) => Token::Num { value, raw },
325 Right((value, raw)) => Token::BigInt { value, raw },
326 })
327 }
328
329 #[inline(never)]
333 fn read_token_logical(&mut self, c: u8) -> LexResult<Token> {
334 let had_line_break_before_last = self.had_line_break_before_last();
335 let start = self.cur_pos();
336
337 unsafe {
338 self.input.bump();
340 }
341 let token = if c == b'&' {
342 BinOpToken::BitAnd
343 } else {
344 BinOpToken::BitOr
345 };
346
347 if self.input.eat_byte(b'=') {
349 return Ok(Token::AssignOp(match token {
350 BinOpToken::BitAnd => AssignOp::BitAndAssign,
351 BinOpToken::BitOr => AssignOp::BitOrAssign,
352 _ => unreachable!(),
353 }));
354 }
355
356 if self.input.cur() == Some(c as char) {
358 unsafe {
359 self.input.bump();
361 }
362
363 if self.input.cur() == Some('=') {
364 unsafe {
365 self.input.bump();
367 }
368 return Ok(Token::AssignOp(match token {
369 BinOpToken::BitAnd => op!("&&="),
370 BinOpToken::BitOr => op!("||="),
371 _ => unreachable!(),
372 }));
373 }
374
375 if had_line_break_before_last && token == BinOpToken::BitOr && self.is_str("||||| ") {
378 let span = fixed_len_span(start, 7);
379 self.emit_error_span(span, SyntaxError::TS1185);
380 self.skip_line_comment(5);
381 self.skip_space::<true>();
382 return self.error_span(span, SyntaxError::TS1185);
383 }
384
385 return Ok(Token::BinOp(match token {
386 BinOpToken::BitAnd => BinOpToken::LogicalAnd,
387 BinOpToken::BitOr => BinOpToken::LogicalOr,
388 _ => unreachable!(),
389 }));
390 }
391
392 Ok(Token::BinOp(token))
393 }
394
395 #[inline(never)]
399 fn read_token_mul_mod(&mut self, c: u8) -> LexResult<Token> {
400 let is_mul = c == b'*';
401 unsafe {
402 self.input.bump();
404 }
405 let mut token = if is_mul {
406 Token::BinOp(BinOpToken::Mul)
407 } else {
408 Token::BinOp(BinOpToken::Mod)
409 };
410
411 if is_mul && self.input.eat_byte(b'*') {
413 token = Token::BinOp(BinOpToken::Exp)
414 }
415
416 if self.input.eat_byte(b'=') {
417 token = match token {
418 Token::BinOp(BinOpToken::Mul) => Token::AssignOp(AssignOp::MulAssign),
419 Token::BinOp(BinOpToken::Mod) => Token::AssignOp(AssignOp::ModAssign),
420 Token::BinOp(BinOpToken::Exp) => Token::AssignOp(AssignOp::ExpAssign),
421 _ => unreachable!(),
422 }
423 }
424
425 Ok(token)
426 }
427
428 fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<Vec<Char>>> {
432 debug_assert_eq!(self.cur(), Some('\\'));
433
434 let start = self.cur_pos();
435
436 self.bump(); let c = match self.cur() {
439 Some(c) => c,
440 None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
441 };
442
443 macro_rules! push_c_and_ret {
444 ($c:expr) => {{
445 $c
446 }};
447 }
448
449 let c = match c {
450 '\\' => push_c_and_ret!('\\'),
451 'n' => push_c_and_ret!('\n'),
452 'r' => push_c_and_ret!('\r'),
453 't' => push_c_and_ret!('\t'),
454 'b' => push_c_and_ret!('\u{0008}'),
455 'v' => push_c_and_ret!('\u{000b}'),
456 'f' => push_c_and_ret!('\u{000c}'),
457 '\r' => {
458 self.bump(); self.eat(b'\n');
461
462 return Ok(None);
463 }
464 '\n' | '\u{2028}' | '\u{2029}' => {
465 self.bump();
466
467 return Ok(None);
468 }
469
470 'x' => {
472 self.bump(); match self.read_int_u32::<16>(2)? {
475 Some(val) => return Ok(Some(vec![Char::from(val)])),
476 None => self.error(
477 start,
478 SyntaxError::BadCharacterEscapeSequence {
479 expected: "2 hex characters",
480 },
481 )?,
482 }
483 }
484
485 'u' => match self.read_unicode_escape() {
487 Ok(chars) => return Ok(Some(chars)),
488 Err(err) => self.error(start, err.into_kind())?,
489 },
490
491 '0'..='7' => {
493 self.bump();
494
495 let first_c = if c == '0' {
496 match self.cur() {
497 Some(next) if next.is_digit(8) => c,
498 _ => return Ok(Some(vec!['\u{0000}'.into()])),
500 }
501 } else {
502 c
503 };
504
505 if in_template {
507 self.error(start, SyntaxError::LegacyOctal)?
508 }
509
510 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
511
512 let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
513
514 macro_rules! one {
515 ($check:expr) => {{
516 let cur = self.cur();
517
518 match cur.and_then(|c| c.to_digit(8)) {
519 Some(v) => {
520 value = if $check {
521 let new_val = value
522 .checked_mul(8)
523 .and_then(|value| value.checked_add(v as u8));
524 match new_val {
525 Some(val) => val,
526 None => return Ok(Some(vec![Char::from(value as char)])),
527 }
528 } else {
529 value * 8 + v as u8
530 };
531
532 self.bump();
533 }
534 _ => return Ok(Some(vec![Char::from(value as u32)])),
535 }
536 }};
537 }
538
539 one!(false);
540 one!(true);
541
542 return Ok(Some(vec![Char::from(value as char)]));
543 }
544 _ => c,
545 };
546
547 unsafe {
548 self.input.bump();
550 }
551
552 Ok(Some(vec![c.into()]))
553 }
554
555 fn read_token_plus_minus(&mut self, c: u8) -> LexResult<Option<Token>> {
556 let start = self.cur_pos();
557
558 unsafe {
559 self.input.bump();
561 }
562
563 Ok(Some(if self.input.cur() == Some(c as char) {
565 unsafe {
566 self.input.bump();
568 }
569
570 if self.state.had_line_break && c == b'-' && self.eat(b'>') {
572 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
573 self.skip_line_comment(0);
574 self.skip_space::<true>();
575 return self.read_token();
576 }
577
578 if c == b'+' {
579 Token::PlusPlus
580 } else {
581 Token::MinusMinus
582 }
583 } else if self.input.eat_byte(b'=') {
584 Token::AssignOp(if c == b'+' {
585 AssignOp::AddAssign
586 } else {
587 AssignOp::SubAssign
588 })
589 } else {
590 Token::BinOp(if c == b'+' {
591 BinOpToken::Add
592 } else {
593 BinOpToken::Sub
594 })
595 }))
596 }
597
598 fn read_token_bang_or_eq(&mut self, c: u8) -> LexResult<Option<Token>> {
599 let start = self.cur_pos();
600 let had_line_break_before_last = self.had_line_break_before_last();
601
602 unsafe {
603 self.input.bump();
605 }
606
607 Ok(Some(if self.input.eat_byte(b'=') {
608 if self.input.eat_byte(b'=') {
611 if c == b'!' {
612 Token::BinOp(BinOpToken::NotEqEq)
613 } else {
614 if had_line_break_before_last && self.is_str("====") {
617 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
618 self.skip_line_comment(4);
619 self.skip_space::<true>();
620 return self.read_token();
621 }
622
623 Token::BinOp(BinOpToken::EqEqEq)
624 }
625 } else if c == b'!' {
626 Token::BinOp(BinOpToken::NotEq)
627 } else {
628 Token::BinOp(BinOpToken::EqEq)
629 }
630 } else if c == b'=' && self.input.eat_byte(b'>') {
631 Token::Arrow
634 } else if c == b'!' {
635 Token::Bang
636 } else {
637 Token::AssignOp(AssignOp::Assign)
638 }))
639 }
640}
641
642impl Lexer<'_> {
643 #[inline(never)]
644 fn read_slash(&mut self) -> LexResult<Option<Token>> {
645 debug_assert_eq!(self.cur(), Some('/'));
646
647 self.bump();
649
650 Ok(Some(if self.eat(b'=') {
651 tok!("/=")
652 } else {
653 tok!('/')
654 }))
655 }
656
657 #[inline(never)]
658 fn read_token_lt_gt(&mut self) -> LexResult<Option<Token>> {
659 debug_assert!(self.cur() == Some('<') || self.cur() == Some('>'));
660
661 let had_line_break_before_last = self.had_line_break_before_last();
662 let start = self.cur_pos();
663 let c = self.cur().unwrap();
664 self.bump();
665
666 if self.syntax.typescript() && self.ctx.in_type && !self.ctx.should_not_lex_lt_or_gt_as_type
667 {
668 if c == '<' {
669 return Ok(Some(tok!('<')));
670 } else if c == '>' {
671 return Ok(Some(tok!('>')));
672 }
673 }
674
675 if c == '<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-') {
677 self.skip_line_comment(3);
678 self.skip_space::<true>();
679 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
680
681 return self.read_token();
682 }
683
684 let mut op = if c == '<' {
685 BinOpToken::Lt
686 } else {
687 BinOpToken::Gt
688 };
689
690 if self.cur() == Some(c) {
692 self.bump();
693 op = if c == '<' {
694 BinOpToken::LShift
695 } else {
696 BinOpToken::RShift
697 };
698
699 if c == '>' && self.cur() == Some(c) {
701 self.bump();
702 op = BinOpToken::ZeroFillRShift;
703 }
704 }
705
706 let token = if self.eat(b'=') {
707 match op {
708 BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
709 BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
710 BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
711 BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
712 BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
713 _ => unreachable!(),
714 }
715 } else {
716 Token::BinOp(op)
717 };
718
719 if had_line_break_before_last
726 && match op {
727 BinOpToken::LShift if self.is_str("<<<<< ") => true,
728 BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
729 _ => false,
730 }
731 {
732 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
733 self.skip_line_comment(5);
734 self.skip_space::<true>();
735 return self.read_token();
736 }
737
738 Ok(Some(token))
739 }
740
741 fn read_ident_unknown(&mut self) -> LexResult<Token> {
744 debug_assert!(self.cur().is_some());
745
746 let (word, _) = self
747 .read_word_as_str_with(|l, s, _, _| Word::Ident(IdentLike::Other(l.atoms.atom(s))))?;
748
749 Ok(Word(word))
750 }
751
752 fn read_word_with(
755 &mut self,
756 convert: &dyn Fn(&str) -> Option<Word>,
757 ) -> LexResult<Option<Token>> {
758 debug_assert!(self.cur().is_some());
759
760 let start = self.cur_pos();
761 let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
762 if can_be_known {
763 if let Some(word) = convert(s) {
764 return word;
765 }
766 }
767
768 Word::Ident(IdentLike::Other(l.atoms.atom(s)))
769 })?;
770
771 if has_escape && self.ctx.is_reserved(&word) {
776 self.error(
777 start,
778 SyntaxError::EscapeInReservedWord { word: word.into() },
779 )?
780 } else {
781 Ok(Some(Token::Word(word)))
782 }
783 }
784
785 fn read_word_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
789 where
790 F: for<'any> FnOnce(&'any mut Lexer<'_>, &str, bool, bool) -> Ret,
791 {
792 debug_assert!(self.cur().is_some());
793 let mut first = true;
794 let mut can_be_keyword = true;
795 let mut slice_start = self.cur_pos();
796 let mut has_escape = false;
797
798 self.with_buf(|l, buf| {
799 loop {
800 if let Some(c) = l.input.cur_as_ascii() {
801 if can_be_keyword && (c.is_ascii_uppercase() || c.is_ascii_digit()) {
803 can_be_keyword = false;
804 }
805
806 if Ident::is_valid_continue(c as _) {
807 l.bump();
808 continue;
809 } else if first && Ident::is_valid_start(c as _) {
810 l.bump();
811 first = false;
812 continue;
813 }
814
815 if c == b'\\' {
817 first = false;
818 has_escape = true;
819 let start = l.cur_pos();
820 l.bump();
821
822 if !l.is(b'u') {
823 l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
824 }
825
826 {
827 let end = l.input.cur_pos();
828 let s = unsafe {
829 l.input.slice(slice_start, start)
832 };
833 buf.push_str(s);
834 unsafe {
835 l.input.reset_to(end);
837 }
838 }
839
840 let chars = l.read_unicode_escape()?;
841
842 if let Some(c) = chars.first() {
843 let valid = if first {
844 c.is_ident_start()
845 } else {
846 c.is_ident_part()
847 };
848
849 if !valid {
850 l.emit_error(start, SyntaxError::InvalidIdentChar);
851 }
852 }
853
854 for c in chars {
855 buf.extend(c);
856 }
857
858 slice_start = l.cur_pos();
859 continue;
860 }
861
862 break;
865 }
866
867 if let Some(c) = l.input.cur() {
868 if Ident::is_valid_continue(c) {
869 l.bump();
870 continue;
871 } else if first && Ident::is_valid_start(c) {
872 l.bump();
873 first = false;
874 continue;
875 }
876 }
877
878 break;
879 }
880
881 let end = l.cur_pos();
882
883 let value = if !has_escape {
884 let s = unsafe {
887 l.input.slice(slice_start, end)
890 };
891 let s = unsafe {
892 transmute::<&str, &'static str>(s)
894 };
895
896 convert(l, s, has_escape, can_be_keyword)
897 } else {
898 let s = unsafe {
899 l.input.slice(slice_start, end)
902 };
903 buf.push_str(s);
904
905 convert(l, buf, has_escape, can_be_keyword)
906 };
907
908 Ok((value, has_escape))
909 })
910 }
911
912 fn read_unicode_escape(&mut self) -> LexResult<Vec<Char>> {
913 debug_assert_eq!(self.cur(), Some('u'));
914
915 let mut chars = Vec::new();
916 let mut is_curly = false;
917
918 self.bump(); if self.eat(b'{') {
921 is_curly = true;
922 }
923
924 let state = self.input.cur_pos();
925 let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
926 Ok(Some(val)) => {
927 if 0x0010_ffff >= val {
928 char::from_u32(val)
929 } else {
930 let start = self.cur_pos();
931
932 self.error(
933 start,
934 SyntaxError::BadCharacterEscapeSequence {
935 expected: if is_curly {
936 "1-6 hex characters in the range 0 to 10FFFF."
937 } else {
938 "4 hex characters"
939 },
940 },
941 )?
942 }
943 }
944 _ => {
945 let start = self.cur_pos();
946
947 self.error(
948 start,
949 SyntaxError::BadCharacterEscapeSequence {
950 expected: if is_curly {
951 "1-6 hex characters"
952 } else {
953 "4 hex characters"
954 },
955 },
956 )?
957 }
958 };
959
960 match c {
961 Some(c) => {
962 chars.push(c.into());
963 }
964 _ => {
965 unsafe {
966 self.input.reset_to(state);
968 }
969
970 chars.push(Char::from('\\'));
971 chars.push(Char::from('u'));
972
973 if is_curly {
974 chars.push(Char::from('{'));
975
976 for _ in 0..6 {
977 if let Some(c) = self.input.cur() {
978 if c == '}' {
979 break;
980 }
981
982 self.bump();
983
984 chars.push(Char::from(c));
985 } else {
986 break;
987 }
988 }
989
990 chars.push(Char::from('}'));
991 } else {
992 for _ in 0..4 {
993 if let Some(c) = self.input.cur() {
994 self.bump();
995
996 chars.push(Char::from(c));
997 }
998 }
999 }
1000 }
1001 }
1002
1003 if is_curly && !self.eat(b'}') {
1004 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1005 }
1006
1007 Ok(chars)
1008 }
1009
1010 fn read_str_lit(&mut self) -> LexResult<Token> {
1012 debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
1013 let start = self.cur_pos();
1014 let quote = self.cur().unwrap() as u8;
1015
1016 self.bump(); let mut has_escape = false;
1019 let mut slice_start = self.input.cur_pos();
1020
1021 self.with_buf(|l, buf| {
1022 loop {
1023 if let Some(c) = l.input.cur_as_ascii() {
1024 if c == quote {
1025 let value_end = l.cur_pos();
1026
1027 let value = if !has_escape {
1028 let s = unsafe {
1029 l.input.slice(slice_start, value_end)
1032 };
1033
1034 l.atoms.atom(s)
1035 } else {
1036 let s = unsafe {
1037 l.input.slice(slice_start, value_end)
1040 };
1041 buf.push_str(s);
1042
1043 l.atoms.atom(&**buf)
1044 };
1045
1046 unsafe {
1047 l.input.bump();
1049 }
1050
1051 let end = l.cur_pos();
1052
1053 let raw = unsafe {
1054 l.input.slice(start, end)
1057 };
1058 let raw = l.atoms.atom(raw);
1059
1060 return Ok(Token::Str { value, raw });
1061 }
1062
1063 if c == b'\\' {
1064 has_escape = true;
1065
1066 {
1067 let end = l.cur_pos();
1068 let s = unsafe {
1069 l.input.slice(slice_start, end)
1072 };
1073 buf.push_str(s);
1074 }
1075
1076 if let Some(chars) = l.read_escaped_char(false)? {
1077 for c in chars {
1078 buf.extend(c);
1079 }
1080 }
1081
1082 slice_start = l.cur_pos();
1083 continue;
1084 }
1085
1086 if (c as char).is_line_break() {
1087 break;
1088 }
1089
1090 unsafe {
1091 l.input.bump();
1093 }
1094 continue;
1095 }
1096
1097 match l.input.cur() {
1098 Some(c) => {
1099 if c.is_line_break() {
1100 break;
1101 }
1102 unsafe {
1103 l.input.bump();
1105 }
1106 }
1107 None => break,
1108 }
1109 }
1110
1111 {
1112 let end = l.cur_pos();
1113 let s = unsafe {
1114 l.input.slice(slice_start, end)
1117 };
1118 buf.push_str(s);
1119 }
1120
1121 l.emit_error(start, SyntaxError::UnterminatedStrLit);
1122
1123 let end = l.cur_pos();
1124
1125 let raw = unsafe {
1126 l.input.slice(start, end)
1129 };
1130 Ok(Token::Str {
1131 value: l.atoms.atom(&*buf),
1132 raw: l.atoms.atom(raw),
1133 })
1134 })
1135 }
1136
1137 fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
1139 unsafe {
1140 self.input.reset_to(start);
1142 }
1143
1144 debug_assert_eq!(self.cur(), Some('/'));
1145
1146 let start = self.cur_pos();
1147
1148 self.bump();
1149
1150 let (mut escaped, mut in_class) = (false, false);
1151
1152 let content = self.with_buf(|l, buf| {
1153 while let Some(c) = l.cur() {
1154 if c.is_line_terminator() {
1157 let span = l.span(start);
1158
1159 return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
1160 }
1161
1162 if escaped {
1163 escaped = false;
1164 } else {
1165 match c {
1166 '[' => in_class = true,
1167 ']' if in_class => in_class = false,
1168 '/' if !in_class => break,
1170 _ => {}
1171 }
1172
1173 escaped = c == '\\';
1174 }
1175
1176 l.bump();
1177 buf.push(c);
1178 }
1179
1180 Ok(l.atoms.atom(&**buf))
1181 })?;
1182
1183 if !self.is(b'/') {
1185 let span = self.span(start);
1186
1187 return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
1188 }
1189
1190 self.bump(); let flags = {
1199 match self.cur() {
1200 Some(c) if c.is_ident_start() => self
1201 .read_word_as_str_with(|l, s, _, _| l.atoms.atom(s))
1202 .map(Some),
1203 _ => Ok(None),
1204 }
1205 }?
1206 .map(|(value, _)| value)
1207 .unwrap_or_default();
1208
1209 Ok(Token::Regex(content, flags))
1210 }
1211
1212 #[cold]
1213 fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1214 if self.input.cur() != Some('#') || self.input.peek() != Some('!') {
1215 return Ok(None);
1216 }
1217 unsafe {
1218 self.input.bump();
1220 self.input.bump();
1222 }
1223 let s = self.input.uncons_while(|c| !c.is_line_terminator());
1224 Ok(Some(self.atoms.atom(s)))
1225 }
1226
1227 fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Token> {
1228 let start = self.cur_pos();
1229
1230 let mut cooked = Ok(String::new());
1231 let mut cooked_slice_start = start;
1232 let raw_slice_start = start;
1233
1234 macro_rules! consume_cooked {
1235 () => {{
1236 if let Ok(cooked) = &mut cooked {
1237 let last_pos = self.cur_pos();
1238 cooked.push_str(unsafe {
1239 self.input.slice(cooked_slice_start, last_pos)
1242 });
1243 }
1244 }};
1245 }
1246
1247 while let Some(c) = self.cur() {
1248 if c == '`' || (c == '$' && self.peek() == Some('{')) {
1249 if start == self.cur_pos() && self.state.last_was_tpl_element() {
1250 if c == '$' {
1251 self.bump();
1252 self.bump();
1253 return Ok(tok!("${"));
1254 } else {
1255 self.bump();
1256 return Ok(tok!('`'));
1257 }
1258 }
1259
1260 let cooked = if cooked_slice_start == raw_slice_start {
1262 let last_pos = self.cur_pos();
1263 let s = unsafe {
1264 self.input.slice(cooked_slice_start, last_pos)
1267 };
1268
1269 Ok(self.atoms.atom(s))
1270 } else {
1271 consume_cooked!();
1272
1273 cooked.map(|s| self.atoms.atom(s))
1274 };
1275
1276 let end = self.input.cur_pos();
1278 let raw = unsafe {
1279 self.input.slice(raw_slice_start, end)
1282 };
1283 return Ok(Token::Template {
1284 cooked,
1285 raw: self.atoms.atom(raw),
1286 });
1287 }
1288
1289 if c == '\\' {
1290 consume_cooked!();
1291
1292 match self.read_escaped_char(true) {
1293 Ok(Some(chars)) => {
1294 if let Ok(ref mut cooked) = cooked {
1295 for c in chars {
1296 cooked.extend(c);
1297 }
1298 }
1299 }
1300 Ok(None) => {}
1301 Err(error) => {
1302 cooked = Err(error);
1303 }
1304 }
1305
1306 cooked_slice_start = self.cur_pos();
1307 } else if c.is_line_terminator() {
1308 self.state.had_line_break = true;
1309
1310 consume_cooked!();
1311
1312 let c = if c == '\r' && self.peek() == Some('\n') {
1313 self.bump(); '\n'
1315 } else {
1316 match c {
1317 '\n' => '\n',
1318 '\r' => '\n',
1319 '\u{2028}' => '\u{2028}',
1320 '\u{2029}' => '\u{2029}',
1321 _ => unreachable!(),
1322 }
1323 };
1324
1325 self.bump();
1326
1327 if let Ok(ref mut cooked) = cooked {
1328 cooked.push(c);
1329 }
1330 cooked_slice_start = self.cur_pos();
1331 } else {
1332 self.bump();
1333 }
1334 }
1335
1336 self.error(start_of_tpl, SyntaxError::UnterminatedTpl)?
1337 }
1338
1339 #[inline]
1340 #[allow(clippy::misnamed_getters)]
1341 pub fn had_line_break_before_last(&self) -> bool {
1342 self.state.had_line_break
1343 }
1344
1345 #[inline]
1346 pub fn set_expr_allowed(&mut self, allow: bool) {
1347 self.state.is_expr_allowed = allow;
1348 }
1349
1350 #[inline]
1351 pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
1352 self.state.next_regexp = start;
1353 }
1354}
1355
1356fn pos_span(p: BytePos) -> Span {
1357 Span::new(p, p)
1358}
1359
1360fn fixed_len_span(p: BytePos, len: u32) -> Span {
1361 Span::new(p, p + BytePos(len))
1362}