swc_common/
input.rs

1use std::str;
2
3use debug_unreachable::debug_unreachable;
4
5use crate::syntax_pos::{BytePos, SourceFile};
6
7pub type SourceFileInput<'a> = StringInput<'a>;
8
9/// Implementation of [Input].
10#[derive(Clone)]
11pub struct StringInput<'a> {
12    last_pos: BytePos,
13    /// Current cursor
14    iter: str::Chars<'a>,
15    orig: &'a str,
16    /// Original start position.
17    orig_start: BytePos,
18    orig_end: BytePos,
19}
20
21impl<'a> StringInput<'a> {
22    /// `start` and `end` can be arbitrary value, but start should be less than
23    /// or equal to end.
24    ///
25    ///
26    /// `swc` get this value from [SourceMap] because code generator depends on
27    /// some methods of [SourceMap].
28    /// If you are not going to use methods from
29    /// [SourceMap], you may use any value.
30    pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
31        assert!(start <= end);
32
33        StringInput {
34            last_pos: start,
35            orig: src,
36            iter: src.chars(),
37            orig_start: start,
38            orig_end: end,
39        }
40    }
41
42    #[inline(always)]
43    pub fn as_str(&self) -> &str {
44        self.iter.as_str()
45    }
46
47    #[inline]
48    pub fn bump_bytes(&mut self, n: usize) {
49        unsafe {
50            // Safety: We only proceed, not go back.
51            self.reset_to(self.last_pos + BytePos(n as u32));
52        }
53    }
54
55    pub fn start_pos(&self) -> BytePos {
56        self.orig_start
57    }
58
59    #[inline(always)]
60    pub fn end_pos(&self) -> BytePos {
61        self.orig_end
62    }
63}
64
65/// Creates an [Input] from [SourceFile]. This is an alias for
66///
67/// ```ignore
68///    StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
69/// ```
70impl<'a> From<&'a SourceFile> for StringInput<'a> {
71    fn from(fm: &'a SourceFile) -> Self {
72        StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
73    }
74}
75
76impl<'a> Input<'a> for StringInput<'a> {
77    #[inline]
78    fn cur(&self) -> Option<char> {
79        self.iter.clone().next()
80    }
81
82    #[inline]
83    fn peek(&self) -> Option<char> {
84        let mut iter = self.iter.clone();
85        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster.
86        iter.next();
87        iter.next()
88    }
89
90    #[inline]
91    fn peek_ahead(&self) -> Option<char> {
92        let mut iter = self.iter.clone();
93        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster
94        iter.next();
95        iter.next();
96        iter.next()
97    }
98
99    #[inline]
100    unsafe fn bump(&mut self) {
101        if let Some(c) = self.iter.next() {
102            self.last_pos = self.last_pos + BytePos((c.len_utf8()) as u32);
103        } else {
104            unsafe {
105                debug_unreachable!("bump should not be called when cur() == None");
106            }
107        }
108    }
109
110    #[inline]
111    fn cur_as_ascii(&self) -> Option<u8> {
112        let first_byte = *self.as_str().as_bytes().first()?;
113        if first_byte <= 0x7f {
114            Some(first_byte)
115        } else {
116            None
117        }
118    }
119
120    #[inline]
121    fn is_at_start(&self) -> bool {
122        self.orig_start == self.last_pos
123    }
124
125    /// TODO(kdy1): Remove this?
126    #[inline]
127    fn cur_pos(&self) -> BytePos {
128        self.last_pos
129    }
130
131    #[inline]
132    fn last_pos(&self) -> BytePos {
133        self.last_pos
134    }
135
136    #[inline]
137    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
138        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
139        let s = self.orig;
140
141        let start_idx = (start - self.orig_start).0 as usize;
142        let end_idx = (end - self.orig_start).0 as usize;
143
144        debug_assert!(end_idx <= s.len());
145
146        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
147
148        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
149        self.last_pos = end;
150
151        ret
152    }
153
154    #[inline]
155    fn uncons_while<F>(&mut self, mut pred: F) -> &'a str
156    where
157        F: FnMut(char) -> bool,
158    {
159        let last = {
160            let mut last = 0;
161            for c in self.iter.clone() {
162                if pred(c) {
163                    last += c.len_utf8();
164                } else {
165                    break;
166                }
167            }
168            last
169        };
170
171        let s = self.iter.as_str();
172        debug_assert!(last <= s.len());
173        let ret = unsafe { s.get_unchecked(..last) };
174
175        self.last_pos = self.last_pos + BytePos(last as _);
176        self.iter = unsafe { s.get_unchecked(last..) }.chars();
177
178        ret
179    }
180
181    #[inline]
182    unsafe fn reset_to(&mut self, to: BytePos) {
183        if self.last_pos == to {
184            // No need to reset.
185            return;
186        }
187
188        let orig = self.orig;
189        let idx = (to - self.orig_start).0 as usize;
190
191        debug_assert!(idx <= orig.len());
192        let s = unsafe { orig.get_unchecked(idx..) };
193        self.iter = s.chars();
194        self.last_pos = to;
195    }
196
197    #[inline]
198    fn is_byte(&self, c: u8) -> bool {
199        self.iter
200            .as_str()
201            .as_bytes()
202            .first()
203            .map(|b| *b == c)
204            .unwrap_or(false)
205    }
206
207    #[inline]
208    fn is_str(&self, s: &str) -> bool {
209        self.as_str().starts_with(s)
210    }
211
212    #[inline]
213    fn eat_byte(&mut self, c: u8) -> bool {
214        if self.is_byte(c) {
215            self.iter.next();
216            self.last_pos = self.last_pos + BytePos(1_u32);
217            true
218        } else {
219            false
220        }
221    }
222}
223
224pub trait Input<'a>: Clone {
225    fn cur(&self) -> Option<char>;
226    fn peek(&self) -> Option<char>;
227    fn peek_ahead(&self) -> Option<char>;
228
229    /// # Safety
230    ///
231    /// This should be called only when `cur()` returns `Some`. i.e.
232    /// when the Input is not empty.
233    unsafe fn bump(&mut self);
234
235    /// Returns [None] if it's end of input **or** current character is not an
236    /// ascii character.
237    #[inline]
238    fn cur_as_ascii(&self) -> Option<u8> {
239        self.cur().and_then(|i| {
240            if i.is_ascii() {
241                return Some(i as u8);
242            }
243            None
244        })
245    }
246
247    fn is_at_start(&self) -> bool;
248
249    fn cur_pos(&self) -> BytePos;
250
251    fn last_pos(&self) -> BytePos;
252
253    /// # Safety
254    ///
255    /// - start should be less than or equal to end.
256    /// - start and end should be in the valid range of input.
257    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
258
259    /// Takes items from stream, testing each one with predicate. returns the
260    /// range of items which passed predicate.
261    fn uncons_while<F>(&mut self, f: F) -> &'a str
262    where
263        F: FnMut(char) -> bool;
264
265    /// # Safety
266    ///
267    /// - `to` be in the valid range of input.
268    unsafe fn reset_to(&mut self, to: BytePos);
269
270    /// Implementors can override the method to make it faster.
271    ///
272    /// `c` must be ASCII.
273    #[inline]
274    #[allow(clippy::wrong_self_convention)]
275    fn is_byte(&self, c: u8) -> bool {
276        match self.cur() {
277            Some(ch) => ch == c as char,
278            _ => false,
279        }
280    }
281
282    /// Implementors can override the method to make it faster.
283    ///
284    /// `s` must be ASCII only.
285    fn is_str(&self, s: &str) -> bool;
286
287    /// Implementors can override the method to make it faster.
288    ///
289    /// `c` must be ASCII.
290    #[inline]
291    fn eat_byte(&mut self, c: u8) -> bool {
292        if self.is_byte(c) {
293            unsafe {
294                // Safety: We are sure that the input is not empty
295                self.bump();
296            }
297            true
298        } else {
299            false
300        }
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use crate::{sync::Lrc, FileName, FilePathMapping, SourceMap};
308
309    fn with_test_sess<F>(src: &'static str, f: F)
310    where
311        F: FnOnce(StringInput<'_>),
312    {
313        let cm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
314        let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src);
315
316        f((&*fm).into())
317    }
318
319    #[test]
320    fn src_input_slice_1() {
321        with_test_sess("foo/d", |mut i| {
322            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
323            assert_eq!(i.last_pos, BytePos(2));
324            assert_eq!(i.cur(), Some('o'));
325
326            assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
327            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
328            assert_eq!(i.last_pos, BytePos(4));
329            assert_eq!(i.cur(), Some('/'));
330        });
331    }
332
333    #[test]
334    fn src_input_reset_to_1() {
335        with_test_sess("load", |mut i| {
336            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
337            assert_eq!(i.last_pos, BytePos(3));
338            assert_eq!(i.cur(), Some('a'));
339            unsafe { i.reset_to(BytePos(1)) };
340
341            assert_eq!(i.cur(), Some('l'));
342            assert_eq!(i.last_pos, BytePos(1));
343        });
344    }
345
346    #[test]
347    fn src_input_smoke_01() {
348        with_test_sess("foo/d", |mut i| {
349            assert_eq!(i.cur_pos(), BytePos(1));
350            assert_eq!(i.last_pos, BytePos(1));
351            assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
352
353            // assert_eq!(i.cur_pos(), BytePos(4));
354            assert_eq!(i.last_pos, BytePos(4));
355            assert_eq!(i.cur(), Some('/'));
356
357            unsafe {
358                i.bump();
359            }
360            assert_eq!(i.last_pos, BytePos(5));
361            assert_eq!(i.cur(), Some('d'));
362
363            unsafe {
364                i.bump();
365            }
366            assert_eq!(i.last_pos, BytePos(6));
367            assert_eq!(i.cur(), None);
368        });
369    }
370
371    // #[test]
372    // fn src_input_find_01() {
373    //     with_test_sess("foo/d", |mut i| {
374    //         assert_eq!(i.cur_pos(), BytePos(1));
375    //         assert_eq!(i.last_pos, BytePos(1));
376
377    //         assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
378    //         assert_eq!(i.last_pos, BytePos(5));
379    //         assert_eq!(i.cur(), Some('d'));
380    //     });
381    // }
382
383    //    #[test]
384    //    fn src_input_smoke_02() {
385    //        let _ = crate::with_test_sess("℘℘/℘℘", | mut i| {
386    //            assert_eq!(i.iter.as_str(), "℘℘/℘℘");
387    //            assert_eq!(i.cur_pos(), BytePos(0));
388    //            assert_eq!(i.last_pos, BytePos(0));
389    //            assert_eq!(i.start_pos, BytePos(0));
390    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
391    //
392    //            assert_eq!(i.iter.as_str(), "/℘℘");
393    //            assert_eq!(i.last_pos, BytePos(6));
394    //            assert_eq!(i.start_pos, BytePos(6));
395    //            assert_eq!(i.cur(), Some('/'));
396    //            i.bump();
397    //            assert_eq!(i.last_pos, BytePos(7));
398    //            assert_eq!(i.start_pos, BytePos(6));
399    //
400    //            assert_eq!(i.iter.as_str(), "℘℘");
401    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
402    //            assert_eq!(i.last_pos, BytePos(13));
403    //            assert_eq!(i.start_pos, BytePos(13));
404    //
405    //            Ok(())
406    //        });
407    //    }
408}