1use std::mem::take;
2
3use swc_common::BytePos;
4use swc_ecma_ast::EsVersion;
5use swc_ecma_lexer::{
6 common::{
7 lexer::{
8 char::CharExt,
9 comments_buffer::{BufferedCommentKind, CommentsBufferTrait},
10 state::State as StateTrait,
11 LexResult,
12 },
13 syntax::SyntaxFlags,
14 },
15 error::SyntaxError,
16 TokenContexts,
17};
18
19use super::{Context, Input, Lexer, LexerTrait};
20use crate::{
21 error::Error,
22 input::Tokens,
23 lexer::{
24 comments_buffer::CommentsBufferCheckpoint,
25 token::{Token, TokenAndSpan, TokenValue},
26 },
27};
28
29#[derive(Clone)]
33pub struct State {
34 pub had_line_break: bool,
36 pub had_line_break_before_last: bool,
38 is_first: bool,
40 pub next_regexp: Option<BytePos>,
41 pub start: BytePos,
42 pub prev_hi: BytePos,
43
44 pub(super) token_value: Option<TokenValue>,
45 token_type: Option<Token>,
46}
47
48pub struct LexerCheckpoint {
49 comments_buffer: CommentsBufferCheckpoint,
50 state: State,
51 ctx: Context,
52 input_last_pos: BytePos,
53}
54
55impl<'a> swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'a> {
56 type Checkpoint = LexerCheckpoint;
57
58 fn checkpoint_save(&self) -> Self::Checkpoint {
59 Self::Checkpoint {
60 state: self.state.clone(),
61 ctx: self.ctx,
62 input_last_pos: self.input.last_pos(),
63 comments_buffer: self
64 .comments_buffer
65 .as_ref()
66 .map(|cb| cb.checkpoint_save())
67 .unwrap_or_default(),
68 }
69 }
70
71 fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
72 self.state = checkpoint.state;
73 self.ctx = checkpoint.ctx;
74 unsafe { self.input.reset_to(checkpoint.input_last_pos) };
75 if let Some(comments_buffer) = self.comments_buffer.as_mut() {
76 comments_buffer.checkpoint_load(checkpoint.comments_buffer);
77 }
78 }
79
80 #[inline]
81 fn set_ctx(&mut self, ctx: Context) {
82 if ctx.contains(Context::Module) && !self.module_errors.is_empty() {
83 self.errors.append(&mut self.module_errors);
84 }
85 self.ctx = ctx
86 }
87
88 #[inline]
89 fn ctx(&self) -> Context {
90 self.ctx
91 }
92
93 #[inline]
94 fn ctx_mut(&mut self) -> &mut Context {
95 &mut self.ctx
96 }
97
98 #[inline]
99 fn syntax(&self) -> SyntaxFlags {
100 self.syntax
101 }
102
103 #[inline]
104 fn target(&self) -> EsVersion {
105 self.target
106 }
107
108 #[inline]
109 fn start_pos(&self) -> BytePos {
110 self.start_pos
111 }
112
113 #[inline]
114 fn set_expr_allowed(&mut self, _: bool) {}
115
116 #[inline]
117 fn set_next_regexp(&mut self, start: Option<BytePos>) {
118 self.state.next_regexp = start;
119 }
120
121 #[inline]
122 fn token_context(&self) -> &TokenContexts {
123 unreachable!();
124 }
125
126 #[inline]
127 fn token_context_mut(&mut self) -> &mut TokenContexts {
128 unreachable!();
129 }
130
131 #[inline]
132 fn set_token_context(&mut self, _: TokenContexts) {
133 unreachable!();
134 }
135
136 fn add_error(&mut self, error: Error) {
137 self.errors.push(error);
138 }
139
140 fn add_module_mode_error(&mut self, error: Error) {
141 if self.ctx.contains(Context::Module) {
142 self.add_error(error);
143 return;
144 }
145 self.module_errors.push(error);
146 }
147
148 #[inline]
149 fn take_errors(&mut self) -> Vec<Error> {
150 take(&mut self.errors)
151 }
152
153 #[inline]
154 fn take_script_module_errors(&mut self) -> Vec<Error> {
155 take(&mut self.module_errors)
156 }
157
158 #[inline]
159 fn end_pos(&self) -> BytePos {
160 self.input.end_pos()
161 }
162
163 #[inline]
164 fn update_token_flags(&mut self, f: impl FnOnce(&mut swc_ecma_lexer::lexer::TokenFlags)) {
165 f(&mut self.token_flags)
166 }
167
168 #[inline]
169 fn token_flags(&self) -> swc_ecma_lexer::lexer::TokenFlags {
170 self.token_flags
171 }
172}
173
174impl crate::input::Tokens for Lexer<'_> {
175 fn clone_token_value(&self) -> Option<TokenValue> {
176 self.state.token_value.clone()
177 }
178
179 fn get_token_value(&self) -> Option<&TokenValue> {
180 self.state.token_value.as_ref()
181 }
182
183 fn set_token_value(&mut self, token_value: Option<TokenValue>) {
184 self.state.token_value = token_value;
185 }
186
187 fn take_token_value(&mut self) -> Option<TokenValue> {
188 self.state.token_value.take()
189 }
190
191 fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool, reset: BytePos) -> TokenAndSpan {
192 unsafe {
193 self.input.reset_to(reset);
194 }
195 Tokens::scan_jsx_token(self, allow_multiline_jsx_text)
196 }
197
198 fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan {
199 unsafe {
200 self.input.reset_to(reset);
201 }
202 Tokens::scan_jsx_open_el_terminal_token(self)
203 }
204
205 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan {
206 let start = self.cur_pos();
207 let res = match self.scan_jsx_token(allow_multiline_jsx_text) {
208 Ok(res) => Ok(res),
209 Err(error) => {
210 self.state.set_token_value(TokenValue::Error(error));
211 Err(Token::Error)
212 }
213 };
214 let token = match res {
215 Ok(t) => t,
216 Err(e) => e,
217 };
218 let span = self.span(start);
219 if token != Token::Eof {
220 if let Some(comments) = self.comments_buffer.as_mut() {
221 comments.pending_to_comment(BufferedCommentKind::Leading, start);
222 }
223
224 self.state.set_token_type(token);
225 self.state.prev_hi = self.last_pos();
226 self.state.had_line_break_before_last = self.had_line_break_before_last();
227 }
228 TokenAndSpan {
230 token,
231 had_line_break: self.had_line_break_before_last(),
232 span,
233 }
234 }
235
236 fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan {
237 self.skip_space::<true>();
238 let start = self.input.cur_pos();
239 let res = match self.scan_jsx_attrs_terminal_token() {
240 Ok(res) => Ok(res),
241 Err(error) => {
242 self.state.set_token_value(TokenValue::Error(error));
243 Err(Token::Error)
244 }
245 };
246 let token = match res {
247 Ok(t) => t,
248 Err(e) => e,
249 };
250 let span = self.span(start);
251 if token != Token::Eof {
252 if let Some(comments) = self.comments_buffer.as_mut() {
253 comments.pending_to_comment(BufferedCommentKind::Leading, start);
254 }
255
256 self.state.set_token_type(token);
257 self.state.prev_hi = self.last_pos();
258 self.state.had_line_break_before_last = self.had_line_break_before_last();
259 }
260 TokenAndSpan {
262 token,
263 had_line_break: self.had_line_break_before_last(),
264 span,
265 }
266 }
267
268 fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan {
269 let token = self.state.token_type.unwrap();
270 debug_assert!(token.is_word());
271 let mut v = String::with_capacity(16);
272 while let Some(ch) = self.input().cur() {
273 if ch == '-' {
274 v.push(ch);
275 self.bump();
276 } else {
277 let old_pos = self.cur_pos();
278 v.push_str(&self.scan_identifier_parts());
279 if self.cur_pos() == old_pos {
280 break;
281 }
282 }
283 }
284 let v = if !v.is_empty() {
285 let v = if token.is_known_ident() {
286 format!("{}{}", token.to_string(None), v)
287 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
288 format!("{value}{v}")
289 } else {
290 format!("{}{}", token.to_string(None), v)
291 };
292 self.atom(v)
293 } else if token.is_known_ident() || token.is_keyword() {
294 self.atom(token.to_string(None))
295 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
296 value
297 } else {
298 unreachable!(
299 "`token_value` should be a word, but got: {:?}",
300 self.state.token_value
301 )
302 };
303 self.state.set_token_value(TokenValue::Word(v));
304 TokenAndSpan {
305 token: Token::JSXName,
306 had_line_break: self.had_line_break_before_last(),
307 span: self.span(start),
308 }
309 }
310
311 fn scan_jsx_attribute_value(&mut self) -> TokenAndSpan {
312 let Some(cur) = self.cur() else {
313 let start = self.cur_pos();
314 return TokenAndSpan {
315 token: Token::Eof,
316 had_line_break: self.had_line_break_before_last(),
317 span: self.span(start),
318 };
319 };
320 let start = self.cur_pos();
321
322 match cur {
323 '\'' | '"' => {
324 let token = self.read_jsx_str(cur);
325 let token = match token {
326 Ok(token) => token,
327 Err(e) => {
328 self.state.set_token_value(TokenValue::Error(e));
329 return TokenAndSpan {
330 token: Token::Error,
331 had_line_break: self.had_line_break_before_last(),
332 span: self.span(start),
333 };
334 }
335 };
336 debug_assert!(self
337 .get_token_value()
338 .is_some_and(|t| matches!(t, TokenValue::Str { .. })));
339 debug_assert!(token == Token::Str);
340 TokenAndSpan {
341 token,
342 had_line_break: self.had_line_break_before_last(),
343 span: self.span(start),
344 }
345 }
346 _ => self.next().unwrap_or_else(|| TokenAndSpan {
347 token: Token::Eof,
348 had_line_break: self.had_line_break_before_last(),
349 span: self.span(start),
350 }),
351 }
352 }
353
354 fn rescan_template_token(
355 &mut self,
356 start: BytePos,
357 start_with_back_tick: bool,
358 ) -> TokenAndSpan {
359 unsafe { self.input.reset_to(start) };
360 let res = self.scan_template_token(start, start_with_back_tick);
361 let token = match res.map_err(|e| {
362 self.state.set_token_value(TokenValue::Error(e));
363 Token::Error
364 }) {
365 Ok(t) => t,
366 Err(e) => e,
367 };
368 let span = if start_with_back_tick {
369 self.span(start)
370 } else {
371 self.span(start + BytePos(1))
373 };
374
375 if token != Token::Eof {
376 if let Some(comments) = self.comments_buffer.as_mut() {
377 comments.pending_to_comment(BufferedCommentKind::Leading, start);
378 }
379
380 self.state.set_token_type(token);
381 self.state.prev_hi = self.last_pos();
382 self.state.had_line_break_before_last = self.had_line_break_before_last();
383 }
384 TokenAndSpan {
386 token,
387 had_line_break: self.had_line_break_before_last(),
388 span,
389 }
390 }
391}
392
393impl Lexer<'_> {
394 fn next_token(&mut self, start: &mut BytePos) -> Result<Token, Error> {
395 if let Some(start) = self.state.next_regexp {
396 return self.read_regexp(start);
397 }
398
399 if self.state.is_first {
400 if let Some(shebang) = self.read_shebang()? {
401 self.state.set_token_value(TokenValue::Word(shebang));
402 return Ok(Token::Shebang);
403 }
404 }
405
406 self.state.had_line_break = self.state.is_first;
407 self.state.is_first = false;
408
409 self.skip_space::<true>();
410 *start = self.input.cur_pos();
411
412 if self.input.last_pos() == self.input.end_pos() {
413 self.consume_pending_comments();
415 return Ok(Token::Eof);
416 }
417
418 self.state.start = *start;
425
426 self.read_token()
427 }
428
429 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Result<Token, Error> {
430 debug_assert!(self.syntax.jsx());
431
432 if self.input_mut().as_str().is_empty() {
433 return Ok(Token::Eof);
434 };
435
436 if self.input.eat_byte(b'<') {
437 return Ok(if self.input.eat_byte(b'/') {
438 Token::LessSlash
439 } else {
440 Token::Lt
441 });
442 } else if self.input.eat_byte(b'{') {
443 return Ok(Token::LBrace);
444 }
445
446 let start = self.input.cur_pos();
447 let mut first_non_whitespace = 0;
448 let mut chunk_start = start;
449 let mut value = String::new();
450
451 while let Some(ch) = self.input_mut().cur() {
452 if ch == '{' {
453 break;
454 } else if ch == '<' {
455 break;
457 }
458
459 if ch == '>' {
460 self.emit_error(
461 self.input().cur_pos(),
462 SyntaxError::UnexpectedTokenWithSuggestions {
463 candidate_list: vec!["`{'>'}`", "`>`"],
464 },
465 );
466 } else if ch == '}' {
467 self.emit_error(
468 self.input().cur_pos(),
469 SyntaxError::UnexpectedTokenWithSuggestions {
470 candidate_list: vec!["`{'}'}`", "`}`"],
471 },
472 );
473 }
474
475 if first_non_whitespace == 0 && ch.is_line_terminator() {
476 first_non_whitespace = -1;
477 } else if !allow_multiline_jsx_text
478 && ch.is_line_terminator()
479 && first_non_whitespace > 0
480 {
481 break;
482 } else if ch.is_whitespace() {
483 first_non_whitespace = self.cur_pos().0 as i32;
484 }
485
486 if ch == '&' {
487 let cur_pos = self.input().cur_pos();
488
489 let s = unsafe {
490 self.input_slice(chunk_start, cur_pos)
492 };
493 value.push_str(s);
494
495 if let Ok(jsx_entity) = self.read_jsx_entity() {
496 value.push(jsx_entity.0);
497
498 chunk_start = self.input.cur_pos();
499 }
500 } else {
501 self.bump();
502 }
503 }
504
505 let end = self.input().cur_pos();
506 let raw = unsafe {
507 self.input_slice(start, end)
509 };
510 let value = if value.is_empty() {
511 self.atom(raw)
512 } else {
513 let s = unsafe {
514 self.input_slice(chunk_start, end)
516 };
517 value.push_str(s);
518 self.atom(value)
519 };
520
521 let raw: swc_atoms::Atom = self.atom(raw);
522
523 self.state.set_token_value(TokenValue::Str { raw, value });
524
525 self.state.start = start;
526
527 Ok(Token::JSXText)
528 }
529
530 fn scan_jsx_attrs_terminal_token(&mut self) -> LexResult<Token> {
531 if self.input_mut().as_str().is_empty() {
532 Ok(Token::Eof)
533 } else if self.input.eat_byte(b'>') {
534 Ok(Token::Gt)
535 } else if self.input.eat_byte(b'/') {
536 Ok(Token::Slash)
537 } else {
538 self.read_token()
539 }
540 }
541
542 fn scan_identifier_parts(&mut self) -> String {
543 let mut v = String::with_capacity(16);
544 while let Some(ch) = self.input().cur() {
545 if ch.is_ident_part() {
546 v.push(ch);
547 self.input_mut().bump_bytes(ch.len_utf8());
548 } else if ch == '\\' {
549 self.bump(); if !self.is(b'u') {
551 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
552 continue;
553 }
554 self.bump(); let Ok(chars) = self.read_unicode_escape() else {
556 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
557 break;
558 };
559 for c in chars {
560 v.extend(c);
561 }
562 self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE;
563 } else {
564 break;
565 }
566 }
567 v
568 }
569}
570
571impl Iterator for Lexer<'_> {
572 type Item = TokenAndSpan;
573
574 fn next(&mut self) -> Option<Self::Item> {
575 let mut start = self.cur_pos();
576
577 let token = match self.next_token(&mut start) {
578 Ok(res) => res,
579 Err(error) => {
580 self.state.set_token_value(TokenValue::Error(error));
581 Token::Error
582 }
583 };
584
585 let span = self.span(start);
586 if token != Token::Eof {
587 if let Some(comments) = self.comments_buffer.as_mut() {
588 comments.pending_to_comment(BufferedCommentKind::Leading, start);
589 }
590
591 self.state.set_token_type(token);
592 self.state.prev_hi = self.last_pos();
593 self.state.had_line_break_before_last = self.had_line_break_before_last();
594 Some(TokenAndSpan {
596 token,
597 had_line_break: self.had_line_break_before_last(),
598 span,
599 })
600 } else {
601 None
602 }
603 }
604}
605
606impl State {
607 pub fn new(start_pos: BytePos) -> Self {
608 State {
609 had_line_break: false,
610 had_line_break_before_last: false,
611 is_first: true,
612 next_regexp: None,
613 start: BytePos(0),
614 prev_hi: start_pos,
615 token_value: None,
616 token_type: None,
617 }
618 }
619
620 pub(crate) fn set_token_value(&mut self, token_value: TokenValue) {
621 self.token_value = Some(token_value);
622 }
623}
624
625impl swc_ecma_lexer::common::lexer::state::State for State {
626 type TokenKind = Token;
627 type TokenType = Token;
628
629 #[inline(always)]
630 fn is_expr_allowed(&self) -> bool {
631 unreachable!("is_expr_allowed should not be called in Parser/State")
632 }
633
634 #[inline(always)]
635 fn set_is_expr_allowed(&mut self, _: bool) {
636 }
638
639 #[inline(always)]
640 fn set_next_regexp(&mut self, start: Option<BytePos>) {
641 self.next_regexp = start;
642 }
643
644 #[inline(always)]
645 fn had_line_break(&self) -> bool {
646 self.had_line_break
647 }
648
649 #[inline(always)]
650 fn mark_had_line_break(&mut self) {
651 self.had_line_break = true;
652 }
653
654 #[inline(always)]
655 fn had_line_break_before_last(&self) -> bool {
656 self.had_line_break_before_last
657 }
658
659 #[inline(always)]
660 fn token_contexts(&self) -> &swc_ecma_lexer::TokenContexts {
661 unreachable!();
662 }
663
664 #[inline(always)]
665 fn mut_token_contexts(&mut self) -> &mut swc_ecma_lexer::TokenContexts {
666 unreachable!();
667 }
668
669 #[inline(always)]
670 fn set_token_type(&mut self, token_type: Self::TokenType) {
671 self.token_type = Some(token_type);
672 }
673
674 #[inline(always)]
675 fn token_type(&self) -> Option<Self::TokenType> {
676 self.token_type
677 }
678
679 #[inline(always)]
680 fn syntax(&self) -> SyntaxFlags {
681 unreachable!("syntax is not stored in State, but in Lexer")
682 }
683
684 #[inline(always)]
685 fn prev_hi(&self) -> BytePos {
686 self.prev_hi
687 }
688
689 #[inline(always)]
690 fn start(&self) -> BytePos {
691 self.start
692 }
693}