swc_ecma_parser/lexer/
mod.rs

1//! ECMAScript lexer.
2
3use std::{cell::RefCell, char, iter::FusedIterator, mem::transmute, rc::Rc};
4
5use either::Either::{Left, Right};
6use smallvec::{smallvec, SmallVec};
7use swc_atoms::{Atom, AtomStoreCell};
8use swc_common::{
9    comments::Comments,
10    input::{Input, StringInput},
11    BytePos, Span,
12};
13use swc_ecma_ast::{op, AssignOp, EsVersion, Ident};
14
15pub use self::state::{TokenContext, TokenContexts};
16use self::{
17    comments_buffer::CommentsBuffer,
18    state::State,
19    table::{ByteHandler, BYTE_HANDLERS},
20    util::*,
21};
22use crate::{
23    error::{Error, SyntaxError},
24    token::{BinOpToken, IdentLike, Token, Word},
25    Context, Syntax,
26};
27
28mod comments_buffer;
29mod jsx;
30mod number;
31mod state;
32mod table;
33#[cfg(test)]
34mod tests;
35pub mod util;
36mod whitespace;
37
38pub(crate) type LexResult<T> = Result<T, Error>;
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
41pub(crate) struct Char(u32);
42
43impl From<char> for Char {
44    fn from(c: char) -> Self {
45        Char(c as u32)
46    }
47}
48
49impl From<u32> for Char {
50    fn from(c: u32) -> Self {
51        Char(c)
52    }
53}
54
55pub(crate) struct CharIter(SmallVec<[char; 7]>);
56
57/// Ported from https://github.com/web-infra-dev/oxc/blob/99a4816ce7b6132b2667257984f9d92ae3768f03/crates/oxc_parser/src/lexer/mod.rs#L1349-L1374
58impl IntoIterator for Char {
59    type IntoIter = CharIter;
60    type Item = char;
61
62    #[allow(unsafe_code)]
63    fn into_iter(self) -> Self::IntoIter {
64        //        // TODO: Check if this is correct
65        //        fn to_char(v: u8) -> char {
66        //            char::from_digit(v as _, 16).unwrap_or('0')
67        //        }
68
69        CharIter(match char::from_u32(self.0) {
70            Some(c) => smallvec![c],
71            None => {
72                let mut buf = smallvec![];
73
74                let high = self.0 & 0xffff0000 >> 16;
75
76                let low = self.0 & 0x0000ffff;
77
78                // The second code unit of a surrogate pair is always in the range from 0xDC00
79                // to 0xDFFF, and is called a low surrogate or a trail surrogate.
80                if !(0xdc00..=0xdfff).contains(&low) {
81                    buf.push('\\');
82                    buf.push('u');
83                    buf.extend(format!("{high:x}").chars());
84                    buf.push('\\');
85                    buf.push('u');
86                    buf.extend(format!("{low:x}").chars());
87                } else {
88                    // `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair`
89                    let astral_code_point = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
90
91                    buf.push('\\');
92                    buf.push('u');
93                    buf.extend(format!("{astral_code_point:x}").chars());
94                }
95
96                buf
97            }
98        })
99    }
100}
101
102impl Iterator for CharIter {
103    type Item = char;
104
105    fn next(&mut self) -> Option<Self::Item> {
106        if self.0.is_empty() {
107            None
108        } else {
109            Some(self.0.remove(0))
110        }
111    }
112}
113
114impl FusedIterator for CharIter {}
115
116#[derive(Clone)]
117pub struct Lexer<'a> {
118    comments: Option<&'a dyn Comments>,
119    /// [Some] if comment comment parsing is enabled. Otherwise [None]
120    comments_buffer: Option<CommentsBuffer>,
121
122    pub(crate) ctx: Context,
123    input: StringInput<'a>,
124    start_pos: BytePos,
125
126    state: State,
127    pub(crate) syntax: Syntax,
128    pub(crate) target: EsVersion,
129
130    errors: Rc<RefCell<Vec<Error>>>,
131    module_errors: Rc<RefCell<Vec<Error>>>,
132
133    buf: Rc<RefCell<String>>,
134
135    atoms: Rc<AtomStoreCell>,
136}
137
138impl FusedIterator for Lexer<'_> {}
139
140impl<'a> Lexer<'a> {
141    pub fn new(
142        syntax: Syntax,
143        target: EsVersion,
144        input: StringInput<'a>,
145        comments: Option<&'a dyn Comments>,
146    ) -> Self {
147        let start_pos = input.last_pos();
148
149        Lexer {
150            comments,
151            comments_buffer: comments.is_some().then(CommentsBuffer::new),
152            ctx: Default::default(),
153            input,
154            start_pos,
155            state: State::new(syntax, start_pos),
156            syntax,
157            target,
158            errors: Default::default(),
159            module_errors: Default::default(),
160            buf: Rc::new(RefCell::new(String::with_capacity(256))),
161            atoms: Default::default(),
162        }
163    }
164
165    /// Utility method to reuse buffer.
166    fn with_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
167    where
168        F: for<'any> FnOnce(&mut Lexer<'any>, &mut String) -> LexResult<Ret>,
169    {
170        let b = self.buf.clone();
171        let mut buf = b.borrow_mut();
172        buf.clear();
173
174        op(self, &mut buf)
175    }
176
177    /// babel: `getTokenFromCode`
178    fn read_token(&mut self) -> LexResult<Option<Token>> {
179        let byte = match self.input.as_str().as_bytes().first() {
180            Some(&v) => v,
181            None => return Ok(None),
182        };
183
184        let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
185
186        match handler {
187            Some(handler) => handler(self),
188            None => {
189                let start = self.cur_pos();
190                self.input.bump_bytes(1);
191                self.error_span(
192                    pos_span(start),
193                    SyntaxError::UnexpectedChar { c: byte as _ },
194                )
195            }
196        }
197    }
198
199    /// `#`
200    fn read_token_number_sign(&mut self) -> LexResult<Option<Token>> {
201        debug_assert!(self.cur().is_some());
202
203        unsafe {
204            // Safety: cur() is Some('#')
205            self.input.bump(); // '#'
206        }
207
208        // `#` can also be a part of shebangs, however they should have been
209        // handled by `read_shebang()`
210        debug_assert!(
211            !self.input.is_at_start() || self.cur() != Some('!'),
212            "#! should have already been handled by read_shebang()"
213        );
214        Ok(Some(Token::Hash))
215    }
216
217    /// Read a token given `.`.
218    ///
219    /// This is extracted as a method to reduce size of `read_token`.
220    #[inline(never)]
221    fn read_token_dot(&mut self) -> LexResult<Token> {
222        // Check for eof
223        let next = match self.input.peek() {
224            Some(next) => next,
225            None => {
226                unsafe {
227                    // Safety: cur() is Some(',')
228                    self.input.bump();
229                }
230                return Ok(tok!('.'));
231            }
232        };
233        if next.is_ascii_digit() {
234            return self.read_number(true).map(|v| match v {
235                Left((value, raw)) => Token::Num { value, raw },
236                Right((value, raw)) => Token::BigInt { value, raw },
237            });
238        }
239
240        unsafe {
241            // Safety: cur() is Some
242            // 1st `.`
243            self.input.bump();
244        }
245
246        if next == '.' && self.input.peek() == Some('.') {
247            unsafe {
248                // Safety: peek() was Some
249
250                self.input.bump(); // 2nd `.`
251                self.input.bump(); // 3rd `.`
252            }
253
254            return Ok(tok!("..."));
255        }
256
257        Ok(tok!('.'))
258    }
259
260    /// Read a token given `?`.
261    ///
262    /// This is extracted as a method to reduce size of `read_token`.
263    #[inline(never)]
264    fn read_token_question_mark(&mut self) -> LexResult<Token> {
265        match self.input.peek() {
266            Some('?') => {
267                unsafe {
268                    // Safety: peek() was some
269                    self.input.bump();
270                    self.input.bump();
271                }
272                if self.input.cur() == Some('=') {
273                    unsafe {
274                        // Safety: cur() was some
275                        self.input.bump();
276                    }
277
278                    return Ok(tok!("??="));
279                }
280                Ok(tok!("??"))
281            }
282            _ => {
283                unsafe {
284                    // Safety: peek() is callable only if cur() is Some
285                    self.input.bump();
286                }
287                Ok(tok!('?'))
288            }
289        }
290    }
291
292    /// Read a token given `:`.
293    ///
294    /// This is extracted as a method to reduce size of `read_token`.
295    #[inline(never)]
296    fn read_token_colon(&mut self) -> LexResult<Token> {
297        unsafe {
298            // Safety: cur() is Some(':')
299            self.input.bump();
300        }
301        Ok(tok!(':'))
302    }
303
304    /// Read a token given `0`.
305    ///
306    /// This is extracted as a method to reduce size of `read_token`.
307    #[inline(never)]
308    fn read_token_zero(&mut self) -> LexResult<Token> {
309        let next = self.input.peek();
310
311        let bigint = match next {
312            Some('x') | Some('X') => self.read_radix_number::<16>(),
313            Some('o') | Some('O') => self.read_radix_number::<8>(),
314            Some('b') | Some('B') => self.read_radix_number::<2>(),
315            _ => {
316                return self.read_number(false).map(|v| match v {
317                    Left((value, raw)) => Token::Num { value, raw },
318                    Right((value, raw)) => Token::BigInt { value, raw },
319                });
320            }
321        };
322
323        bigint.map(|v| match v {
324            Left((value, raw)) => Token::Num { value, raw },
325            Right((value, raw)) => Token::BigInt { value, raw },
326        })
327    }
328
329    /// Read a token given `|` or `&`.
330    ///
331    /// This is extracted as a method to reduce size of `read_token`.
332    #[inline(never)]
333    fn read_token_logical(&mut self, c: u8) -> LexResult<Token> {
334        let had_line_break_before_last = self.had_line_break_before_last();
335        let start = self.cur_pos();
336
337        unsafe {
338            // Safety: cur() is Some(c as char)
339            self.input.bump();
340        }
341        let token = if c == b'&' {
342            BinOpToken::BitAnd
343        } else {
344            BinOpToken::BitOr
345        };
346
347        // '|=', '&='
348        if self.input.eat_byte(b'=') {
349            return Ok(Token::AssignOp(match token {
350                BinOpToken::BitAnd => AssignOp::BitAndAssign,
351                BinOpToken::BitOr => AssignOp::BitOrAssign,
352                _ => unreachable!(),
353            }));
354        }
355
356        // '||', '&&'
357        if self.input.cur() == Some(c as char) {
358            unsafe {
359                // Safety: cur() is Some(c)
360                self.input.bump();
361            }
362
363            if self.input.cur() == Some('=') {
364                unsafe {
365                    // Safety: cur() is Some('=')
366                    self.input.bump();
367                }
368                return Ok(Token::AssignOp(match token {
369                    BinOpToken::BitAnd => op!("&&="),
370                    BinOpToken::BitOr => op!("||="),
371                    _ => unreachable!(),
372                }));
373            }
374
375            // |||||||
376            //   ^
377            if had_line_break_before_last && token == BinOpToken::BitOr && self.is_str("||||| ") {
378                let span = fixed_len_span(start, 7);
379                self.emit_error_span(span, SyntaxError::TS1185);
380                self.skip_line_comment(5);
381                self.skip_space::<true>();
382                return self.error_span(span, SyntaxError::TS1185);
383            }
384
385            return Ok(Token::BinOp(match token {
386                BinOpToken::BitAnd => BinOpToken::LogicalAnd,
387                BinOpToken::BitOr => BinOpToken::LogicalOr,
388                _ => unreachable!(),
389            }));
390        }
391
392        Ok(Token::BinOp(token))
393    }
394
395    /// Read a token given `*` or `%`.
396    ///
397    /// This is extracted as a method to reduce size of `read_token`.
398    #[inline(never)]
399    fn read_token_mul_mod(&mut self, c: u8) -> LexResult<Token> {
400        let is_mul = c == b'*';
401        unsafe {
402            // Safety: cur() is Some(c)
403            self.input.bump();
404        }
405        let mut token = if is_mul {
406            Token::BinOp(BinOpToken::Mul)
407        } else {
408            Token::BinOp(BinOpToken::Mod)
409        };
410
411        // check for **
412        if is_mul && self.input.eat_byte(b'*') {
413            token = Token::BinOp(BinOpToken::Exp)
414        }
415
416        if self.input.eat_byte(b'=') {
417            token = match token {
418                Token::BinOp(BinOpToken::Mul) => Token::AssignOp(AssignOp::MulAssign),
419                Token::BinOp(BinOpToken::Mod) => Token::AssignOp(AssignOp::ModAssign),
420                Token::BinOp(BinOpToken::Exp) => Token::AssignOp(AssignOp::ExpAssign),
421                _ => unreachable!(),
422            }
423        }
424
425        Ok(token)
426    }
427
428    /// Read an escaped character for string literal.
429    ///
430    /// In template literal, we should preserve raw string.
431    fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<Vec<Char>>> {
432        debug_assert_eq!(self.cur(), Some('\\'));
433
434        let start = self.cur_pos();
435
436        self.bump(); // '\'
437
438        let c = match self.cur() {
439            Some(c) => c,
440            None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
441        };
442
443        macro_rules! push_c_and_ret {
444            ($c:expr) => {{
445                $c
446            }};
447        }
448
449        let c = match c {
450            '\\' => push_c_and_ret!('\\'),
451            'n' => push_c_and_ret!('\n'),
452            'r' => push_c_and_ret!('\r'),
453            't' => push_c_and_ret!('\t'),
454            'b' => push_c_and_ret!('\u{0008}'),
455            'v' => push_c_and_ret!('\u{000b}'),
456            'f' => push_c_and_ret!('\u{000c}'),
457            '\r' => {
458                self.bump(); // remove '\r'
459
460                self.eat(b'\n');
461
462                return Ok(None);
463            }
464            '\n' | '\u{2028}' | '\u{2029}' => {
465                self.bump();
466
467                return Ok(None);
468            }
469
470            // read hexadecimal escape sequences
471            'x' => {
472                self.bump(); // 'x'
473
474                match self.read_int_u32::<16>(2)? {
475                    Some(val) => return Ok(Some(vec![Char::from(val)])),
476                    None => self.error(
477                        start,
478                        SyntaxError::BadCharacterEscapeSequence {
479                            expected: "2 hex characters",
480                        },
481                    )?,
482                }
483            }
484
485            // read unicode escape sequences
486            'u' => match self.read_unicode_escape() {
487                Ok(chars) => return Ok(Some(chars)),
488                Err(err) => self.error(start, err.into_kind())?,
489            },
490
491            // octal escape sequences
492            '0'..='7' => {
493                self.bump();
494
495                let first_c = if c == '0' {
496                    match self.cur() {
497                        Some(next) if next.is_digit(8) => c,
498                        // \0 is not an octal literal nor decimal literal.
499                        _ => return Ok(Some(vec!['\u{0000}'.into()])),
500                    }
501                } else {
502                    c
503                };
504
505                // TODO: Show template instead of strict mode
506                if in_template {
507                    self.error(start, SyntaxError::LegacyOctal)?
508                }
509
510                self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
511
512                let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
513
514                macro_rules! one {
515                    ($check:expr) => {{
516                        let cur = self.cur();
517
518                        match cur.and_then(|c| c.to_digit(8)) {
519                            Some(v) => {
520                                value = if $check {
521                                    let new_val = value
522                                        .checked_mul(8)
523                                        .and_then(|value| value.checked_add(v as u8));
524                                    match new_val {
525                                        Some(val) => val,
526                                        None => return Ok(Some(vec![Char::from(value as char)])),
527                                    }
528                                } else {
529                                    value * 8 + v as u8
530                                };
531
532                                self.bump();
533                            }
534                            _ => return Ok(Some(vec![Char::from(value as u32)])),
535                        }
536                    }};
537                }
538
539                one!(false);
540                one!(true);
541
542                return Ok(Some(vec![Char::from(value as char)]));
543            }
544            _ => c,
545        };
546
547        unsafe {
548            // Safety: cur() is Some(c) if this method is called.
549            self.input.bump();
550        }
551
552        Ok(Some(vec![c.into()]))
553    }
554
555    fn read_token_plus_minus(&mut self, c: u8) -> LexResult<Option<Token>> {
556        let start = self.cur_pos();
557
558        unsafe {
559            // Safety: cur() is Some(c), if this method is called.
560            self.input.bump();
561        }
562
563        // '++', '--'
564        Ok(Some(if self.input.cur() == Some(c as char) {
565            unsafe {
566                // Safety: cur() is Some(c)
567                self.input.bump();
568            }
569
570            // Handle -->
571            if self.state.had_line_break && c == b'-' && self.eat(b'>') {
572                self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
573                self.skip_line_comment(0);
574                self.skip_space::<true>();
575                return self.read_token();
576            }
577
578            if c == b'+' {
579                Token::PlusPlus
580            } else {
581                Token::MinusMinus
582            }
583        } else if self.input.eat_byte(b'=') {
584            Token::AssignOp(if c == b'+' {
585                AssignOp::AddAssign
586            } else {
587                AssignOp::SubAssign
588            })
589        } else {
590            Token::BinOp(if c == b'+' {
591                BinOpToken::Add
592            } else {
593                BinOpToken::Sub
594            })
595        }))
596    }
597
598    fn read_token_bang_or_eq(&mut self, c: u8) -> LexResult<Option<Token>> {
599        let start = self.cur_pos();
600        let had_line_break_before_last = self.had_line_break_before_last();
601
602        unsafe {
603            // Safety: cur() is Some(c) if this method is called.
604            self.input.bump();
605        }
606
607        Ok(Some(if self.input.eat_byte(b'=') {
608            // "=="
609
610            if self.input.eat_byte(b'=') {
611                if c == b'!' {
612                    Token::BinOp(BinOpToken::NotEqEq)
613                } else {
614                    // =======
615                    //    ^
616                    if had_line_break_before_last && self.is_str("====") {
617                        self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
618                        self.skip_line_comment(4);
619                        self.skip_space::<true>();
620                        return self.read_token();
621                    }
622
623                    Token::BinOp(BinOpToken::EqEqEq)
624                }
625            } else if c == b'!' {
626                Token::BinOp(BinOpToken::NotEq)
627            } else {
628                Token::BinOp(BinOpToken::EqEq)
629            }
630        } else if c == b'=' && self.input.eat_byte(b'>') {
631            // "=>"
632
633            Token::Arrow
634        } else if c == b'!' {
635            Token::Bang
636        } else {
637            Token::AssignOp(AssignOp::Assign)
638        }))
639    }
640}
641
642impl Lexer<'_> {
643    #[inline(never)]
644    fn read_slash(&mut self) -> LexResult<Option<Token>> {
645        debug_assert_eq!(self.cur(), Some('/'));
646
647        // Divide operator
648        self.bump();
649
650        Ok(Some(if self.eat(b'=') {
651            tok!("/=")
652        } else {
653            tok!('/')
654        }))
655    }
656
657    #[inline(never)]
658    fn read_token_lt_gt(&mut self) -> LexResult<Option<Token>> {
659        debug_assert!(self.cur() == Some('<') || self.cur() == Some('>'));
660
661        let had_line_break_before_last = self.had_line_break_before_last();
662        let start = self.cur_pos();
663        let c = self.cur().unwrap();
664        self.bump();
665
666        if self.syntax.typescript() && self.ctx.in_type && !self.ctx.should_not_lex_lt_or_gt_as_type
667        {
668            if c == '<' {
669                return Ok(Some(tok!('<')));
670            } else if c == '>' {
671                return Ok(Some(tok!('>')));
672            }
673        }
674
675        // XML style comment. `<!--`
676        if c == '<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-') {
677            self.skip_line_comment(3);
678            self.skip_space::<true>();
679            self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
680
681            return self.read_token();
682        }
683
684        let mut op = if c == '<' {
685            BinOpToken::Lt
686        } else {
687            BinOpToken::Gt
688        };
689
690        // '<<', '>>'
691        if self.cur() == Some(c) {
692            self.bump();
693            op = if c == '<' {
694                BinOpToken::LShift
695            } else {
696                BinOpToken::RShift
697            };
698
699            //'>>>'
700            if c == '>' && self.cur() == Some(c) {
701                self.bump();
702                op = BinOpToken::ZeroFillRShift;
703            }
704        }
705
706        let token = if self.eat(b'=') {
707            match op {
708                BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
709                BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
710                BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
711                BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
712                BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
713                _ => unreachable!(),
714            }
715        } else {
716            Token::BinOp(op)
717        };
718
719        // All conflict markers consist of the same character repeated seven times.
720        // If it is a <<<<<<< or >>>>>>> marker then it is also followed by a space.
721        // <<<<<<<
722        //   ^
723        // >>>>>>>
724        //    ^
725        if had_line_break_before_last
726            && match op {
727                BinOpToken::LShift if self.is_str("<<<<< ") => true,
728                BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
729                _ => false,
730            }
731        {
732            self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
733            self.skip_line_comment(5);
734            self.skip_space::<true>();
735            return self.read_token();
736        }
737
738        Ok(Some(token))
739    }
740
741    /// This can be used if there's no keyword starting with the first
742    /// character.
743    fn read_ident_unknown(&mut self) -> LexResult<Token> {
744        debug_assert!(self.cur().is_some());
745
746        let (word, _) = self
747            .read_word_as_str_with(|l, s, _, _| Word::Ident(IdentLike::Other(l.atoms.atom(s))))?;
748
749        Ok(Word(word))
750    }
751
752    /// This can be used if there's no keyword starting with the first
753    /// character.
754    fn read_word_with(
755        &mut self,
756        convert: &dyn Fn(&str) -> Option<Word>,
757    ) -> LexResult<Option<Token>> {
758        debug_assert!(self.cur().is_some());
759
760        let start = self.cur_pos();
761        let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
762            if can_be_known {
763                if let Some(word) = convert(s) {
764                    return word;
765                }
766            }
767
768            Word::Ident(IdentLike::Other(l.atoms.atom(s)))
769        })?;
770
771        // Note: ctx is store in lexer because of this error.
772        // 'await' and 'yield' may have semantic of reserved word, which means lexer
773        // should know context or parser should handle this error. Our approach to this
774        // problem is former one.
775        if has_escape && self.ctx.is_reserved(&word) {
776            self.error(
777                start,
778                SyntaxError::EscapeInReservedWord { word: word.into() },
779            )?
780        } else {
781            Ok(Some(Token::Word(word)))
782        }
783    }
784
785    /// This method is optimized for texts without escape sequences.
786    ///
787    /// `convert(text, has_escape, can_be_keyword)`
788    fn read_word_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
789    where
790        F: for<'any> FnOnce(&'any mut Lexer<'_>, &str, bool, bool) -> Ret,
791    {
792        debug_assert!(self.cur().is_some());
793        let mut first = true;
794        let mut can_be_keyword = true;
795        let mut slice_start = self.cur_pos();
796        let mut has_escape = false;
797
798        self.with_buf(|l, buf| {
799            loop {
800                if let Some(c) = l.input.cur_as_ascii() {
801                    // Performance optimization
802                    if can_be_keyword && (c.is_ascii_uppercase() || c.is_ascii_digit()) {
803                        can_be_keyword = false;
804                    }
805
806                    if Ident::is_valid_continue(c as _) {
807                        l.bump();
808                        continue;
809                    } else if first && Ident::is_valid_start(c as _) {
810                        l.bump();
811                        first = false;
812                        continue;
813                    }
814
815                    // unicode escape
816                    if c == b'\\' {
817                        first = false;
818                        has_escape = true;
819                        let start = l.cur_pos();
820                        l.bump();
821
822                        if !l.is(b'u') {
823                            l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
824                        }
825
826                        {
827                            let end = l.input.cur_pos();
828                            let s = unsafe {
829                                // Safety: start and end are valid position because we got them from
830                                // `self.input`
831                                l.input.slice(slice_start, start)
832                            };
833                            buf.push_str(s);
834                            unsafe {
835                                // Safety: We got end from `self.input`
836                                l.input.reset_to(end);
837                            }
838                        }
839
840                        let chars = l.read_unicode_escape()?;
841
842                        if let Some(c) = chars.first() {
843                            let valid = if first {
844                                c.is_ident_start()
845                            } else {
846                                c.is_ident_part()
847                            };
848
849                            if !valid {
850                                l.emit_error(start, SyntaxError::InvalidIdentChar);
851                            }
852                        }
853
854                        for c in chars {
855                            buf.extend(c);
856                        }
857
858                        slice_start = l.cur_pos();
859                        continue;
860                    }
861
862                    // ASCII but not a valid identifier
863
864                    break;
865                }
866
867                if let Some(c) = l.input.cur() {
868                    if Ident::is_valid_continue(c) {
869                        l.bump();
870                        continue;
871                    } else if first && Ident::is_valid_start(c) {
872                        l.bump();
873                        first = false;
874                        continue;
875                    }
876                }
877
878                break;
879            }
880
881            let end = l.cur_pos();
882
883            let value = if !has_escape {
884                // Fast path: raw slice is enough if there's no escape.
885
886                let s = unsafe {
887                    // Safety: slice_start and end are valid position because we got them from
888                    // `self.input`
889                    l.input.slice(slice_start, end)
890                };
891                let s = unsafe {
892                    // Safety: We don't use 'static. We just bypass the lifetime check.
893                    transmute::<&str, &'static str>(s)
894                };
895
896                convert(l, s, has_escape, can_be_keyword)
897            } else {
898                let s = unsafe {
899                    // Safety: slice_start and end are valid position because we got them from
900                    // `self.input`
901                    l.input.slice(slice_start, end)
902                };
903                buf.push_str(s);
904
905                convert(l, buf, has_escape, can_be_keyword)
906            };
907
908            Ok((value, has_escape))
909        })
910    }
911
912    fn read_unicode_escape(&mut self) -> LexResult<Vec<Char>> {
913        debug_assert_eq!(self.cur(), Some('u'));
914
915        let mut chars = Vec::new();
916        let mut is_curly = false;
917
918        self.bump(); // 'u'
919
920        if self.eat(b'{') {
921            is_curly = true;
922        }
923
924        let state = self.input.cur_pos();
925        let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
926            Ok(Some(val)) => {
927                if 0x0010_ffff >= val {
928                    char::from_u32(val)
929                } else {
930                    let start = self.cur_pos();
931
932                    self.error(
933                        start,
934                        SyntaxError::BadCharacterEscapeSequence {
935                            expected: if is_curly {
936                                "1-6 hex characters in the range 0 to 10FFFF."
937                            } else {
938                                "4 hex characters"
939                            },
940                        },
941                    )?
942                }
943            }
944            _ => {
945                let start = self.cur_pos();
946
947                self.error(
948                    start,
949                    SyntaxError::BadCharacterEscapeSequence {
950                        expected: if is_curly {
951                            "1-6 hex characters"
952                        } else {
953                            "4 hex characters"
954                        },
955                    },
956                )?
957            }
958        };
959
960        match c {
961            Some(c) => {
962                chars.push(c.into());
963            }
964            _ => {
965                unsafe {
966                    // Safety: state is valid position because we got it from cur_pos()
967                    self.input.reset_to(state);
968                }
969
970                chars.push(Char::from('\\'));
971                chars.push(Char::from('u'));
972
973                if is_curly {
974                    chars.push(Char::from('{'));
975
976                    for _ in 0..6 {
977                        if let Some(c) = self.input.cur() {
978                            if c == '}' {
979                                break;
980                            }
981
982                            self.bump();
983
984                            chars.push(Char::from(c));
985                        } else {
986                            break;
987                        }
988                    }
989
990                    chars.push(Char::from('}'));
991                } else {
992                    for _ in 0..4 {
993                        if let Some(c) = self.input.cur() {
994                            self.bump();
995
996                            chars.push(Char::from(c));
997                        }
998                    }
999                }
1000            }
1001        }
1002
1003        if is_curly && !self.eat(b'}') {
1004            self.error(state, SyntaxError::InvalidUnicodeEscape)?
1005        }
1006
1007        Ok(chars)
1008    }
1009
1010    /// See https://tc39.github.io/ecma262/#sec-literals-string-literals
1011    fn read_str_lit(&mut self) -> LexResult<Token> {
1012        debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
1013        let start = self.cur_pos();
1014        let quote = self.cur().unwrap() as u8;
1015
1016        self.bump(); // '"'
1017
1018        let mut has_escape = false;
1019        let mut slice_start = self.input.cur_pos();
1020
1021        self.with_buf(|l, buf| {
1022            loop {
1023                if let Some(c) = l.input.cur_as_ascii() {
1024                    if c == quote {
1025                        let value_end = l.cur_pos();
1026
1027                        let value = if !has_escape {
1028                            let s = unsafe {
1029                                // Safety: slice_start and value_end are valid position because we
1030                                // got them from `self.input`
1031                                l.input.slice(slice_start, value_end)
1032                            };
1033
1034                            l.atoms.atom(s)
1035                        } else {
1036                            let s = unsafe {
1037                                // Safety: slice_start and value_end are valid position because we
1038                                // got them from `self.input`
1039                                l.input.slice(slice_start, value_end)
1040                            };
1041                            buf.push_str(s);
1042
1043                            l.atoms.atom(&**buf)
1044                        };
1045
1046                        unsafe {
1047                            // Safety: cur is quote
1048                            l.input.bump();
1049                        }
1050
1051                        let end = l.cur_pos();
1052
1053                        let raw = unsafe {
1054                            // Safety: start and end are valid position because we got them from
1055                            // `self.input`
1056                            l.input.slice(start, end)
1057                        };
1058                        let raw = l.atoms.atom(raw);
1059
1060                        return Ok(Token::Str { value, raw });
1061                    }
1062
1063                    if c == b'\\' {
1064                        has_escape = true;
1065
1066                        {
1067                            let end = l.cur_pos();
1068                            let s = unsafe {
1069                                // Safety: start and end are valid position because we got them from
1070                                // `self.input`
1071                                l.input.slice(slice_start, end)
1072                            };
1073                            buf.push_str(s);
1074                        }
1075
1076                        if let Some(chars) = l.read_escaped_char(false)? {
1077                            for c in chars {
1078                                buf.extend(c);
1079                            }
1080                        }
1081
1082                        slice_start = l.cur_pos();
1083                        continue;
1084                    }
1085
1086                    if (c as char).is_line_break() {
1087                        break;
1088                    }
1089
1090                    unsafe {
1091                        // Safety: cur is a ascii character
1092                        l.input.bump();
1093                    }
1094                    continue;
1095                }
1096
1097                match l.input.cur() {
1098                    Some(c) => {
1099                        if c.is_line_break() {
1100                            break;
1101                        }
1102                        unsafe {
1103                            // Safety: cur is Some(c)
1104                            l.input.bump();
1105                        }
1106                    }
1107                    None => break,
1108                }
1109            }
1110
1111            {
1112                let end = l.cur_pos();
1113                let s = unsafe {
1114                    // Safety: start and end are valid position because we got them from
1115                    // `self.input`
1116                    l.input.slice(slice_start, end)
1117                };
1118                buf.push_str(s);
1119            }
1120
1121            l.emit_error(start, SyntaxError::UnterminatedStrLit);
1122
1123            let end = l.cur_pos();
1124
1125            let raw = unsafe {
1126                // Safety: start and end are valid position because we got them from
1127                // `self.input`
1128                l.input.slice(start, end)
1129            };
1130            Ok(Token::Str {
1131                value: l.atoms.atom(&*buf),
1132                raw: l.atoms.atom(raw),
1133            })
1134        })
1135    }
1136
1137    /// Expects current char to be '/'
1138    fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
1139        unsafe {
1140            // Safety: start is valid position, and cur() is Some('/')
1141            self.input.reset_to(start);
1142        }
1143
1144        debug_assert_eq!(self.cur(), Some('/'));
1145
1146        let start = self.cur_pos();
1147
1148        self.bump();
1149
1150        let (mut escaped, mut in_class) = (false, false);
1151
1152        let content = self.with_buf(|l, buf| {
1153            while let Some(c) = l.cur() {
1154                // This is ported from babel.
1155                // Seems like regexp literal cannot contain linebreak.
1156                if c.is_line_terminator() {
1157                    let span = l.span(start);
1158
1159                    return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
1160                }
1161
1162                if escaped {
1163                    escaped = false;
1164                } else {
1165                    match c {
1166                        '[' => in_class = true,
1167                        ']' if in_class => in_class = false,
1168                        // Terminates content part of regex literal
1169                        '/' if !in_class => break,
1170                        _ => {}
1171                    }
1172
1173                    escaped = c == '\\';
1174                }
1175
1176                l.bump();
1177                buf.push(c);
1178            }
1179
1180            Ok(l.atoms.atom(&**buf))
1181        })?;
1182
1183        // input is terminated without following `/`
1184        if !self.is(b'/') {
1185            let span = self.span(start);
1186
1187            return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
1188        }
1189
1190        self.bump(); // '/'
1191
1192        // Spec says "It is a Syntax Error if IdentifierPart contains a Unicode escape
1193        // sequence." TODO: check for escape
1194
1195        // Need to use `read_word` because '\uXXXX' sequences are allowed
1196        // here (don't ask).
1197        // let flags_start = self.cur_pos();
1198        let flags = {
1199            match self.cur() {
1200                Some(c) if c.is_ident_start() => self
1201                    .read_word_as_str_with(|l, s, _, _| l.atoms.atom(s))
1202                    .map(Some),
1203                _ => Ok(None),
1204            }
1205        }?
1206        .map(|(value, _)| value)
1207        .unwrap_or_default();
1208
1209        Ok(Token::Regex(content, flags))
1210    }
1211
1212    #[cold]
1213    fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1214        if self.input.cur() != Some('#') || self.input.peek() != Some('!') {
1215            return Ok(None);
1216        }
1217        unsafe {
1218            // Safety: cur() is Some('#')
1219            self.input.bump();
1220            // Safety: cur() is Some('!')
1221            self.input.bump();
1222        }
1223        let s = self.input.uncons_while(|c| !c.is_line_terminator());
1224        Ok(Some(self.atoms.atom(s)))
1225    }
1226
1227    fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Token> {
1228        let start = self.cur_pos();
1229
1230        let mut cooked = Ok(String::new());
1231        let mut cooked_slice_start = start;
1232        let raw_slice_start = start;
1233
1234        macro_rules! consume_cooked {
1235            () => {{
1236                if let Ok(cooked) = &mut cooked {
1237                    let last_pos = self.cur_pos();
1238                    cooked.push_str(unsafe {
1239                        // Safety: Both of start and last_pos are valid position because we got them
1240                        // from `self.input`
1241                        self.input.slice(cooked_slice_start, last_pos)
1242                    });
1243                }
1244            }};
1245        }
1246
1247        while let Some(c) = self.cur() {
1248            if c == '`' || (c == '$' && self.peek() == Some('{')) {
1249                if start == self.cur_pos() && self.state.last_was_tpl_element() {
1250                    if c == '$' {
1251                        self.bump();
1252                        self.bump();
1253                        return Ok(tok!("${"));
1254                    } else {
1255                        self.bump();
1256                        return Ok(tok!('`'));
1257                    }
1258                }
1259
1260                // If we don't have any escape
1261                let cooked = if cooked_slice_start == raw_slice_start {
1262                    let last_pos = self.cur_pos();
1263                    let s = unsafe {
1264                        // Safety: Both of start and last_pos are valid position because we got them
1265                        // from `self.input`
1266                        self.input.slice(cooked_slice_start, last_pos)
1267                    };
1268
1269                    Ok(self.atoms.atom(s))
1270                } else {
1271                    consume_cooked!();
1272
1273                    cooked.map(|s| self.atoms.atom(s))
1274                };
1275
1276                // TODO: Handle error
1277                let end = self.input.cur_pos();
1278                let raw = unsafe {
1279                    // Safety: Both of start and last_pos are valid position because we got them
1280                    // from `self.input`
1281                    self.input.slice(raw_slice_start, end)
1282                };
1283                return Ok(Token::Template {
1284                    cooked,
1285                    raw: self.atoms.atom(raw),
1286                });
1287            }
1288
1289            if c == '\\' {
1290                consume_cooked!();
1291
1292                match self.read_escaped_char(true) {
1293                    Ok(Some(chars)) => {
1294                        if let Ok(ref mut cooked) = cooked {
1295                            for c in chars {
1296                                cooked.extend(c);
1297                            }
1298                        }
1299                    }
1300                    Ok(None) => {}
1301                    Err(error) => {
1302                        cooked = Err(error);
1303                    }
1304                }
1305
1306                cooked_slice_start = self.cur_pos();
1307            } else if c.is_line_terminator() {
1308                self.state.had_line_break = true;
1309
1310                consume_cooked!();
1311
1312                let c = if c == '\r' && self.peek() == Some('\n') {
1313                    self.bump(); // '\r'
1314                    '\n'
1315                } else {
1316                    match c {
1317                        '\n' => '\n',
1318                        '\r' => '\n',
1319                        '\u{2028}' => '\u{2028}',
1320                        '\u{2029}' => '\u{2029}',
1321                        _ => unreachable!(),
1322                    }
1323                };
1324
1325                self.bump();
1326
1327                if let Ok(ref mut cooked) = cooked {
1328                    cooked.push(c);
1329                }
1330                cooked_slice_start = self.cur_pos();
1331            } else {
1332                self.bump();
1333            }
1334        }
1335
1336        self.error(start_of_tpl, SyntaxError::UnterminatedTpl)?
1337    }
1338
1339    #[inline]
1340    #[allow(clippy::misnamed_getters)]
1341    pub fn had_line_break_before_last(&self) -> bool {
1342        self.state.had_line_break
1343    }
1344
1345    #[inline]
1346    pub fn set_expr_allowed(&mut self, allow: bool) {
1347        self.state.is_expr_allowed = allow;
1348    }
1349
1350    #[inline]
1351    pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
1352        self.state.next_regexp = start;
1353    }
1354}
1355
1356fn pos_span(p: BytePos) -> Span {
1357    Span::new(p, p)
1358}
1359
1360fn fixed_len_span(p: BytePos, len: u32) -> Span {
1361    Span::new(p, p + BytePos(len))
1362}