swc_common/
input.rs

1use std::str;
2
3use debug_unreachable::debug_unreachable;
4
5use crate::syntax_pos::{BytePos, SourceFile};
6
7pub type SourceFileInput<'a> = StringInput<'a>;
8
9/// Implementation of [Input].
10#[derive(Clone)]
11pub struct StringInput<'a> {
12    last_pos: BytePos,
13    /// Current cursor
14    iter: str::Chars<'a>,
15    orig: &'a str,
16    /// Original start position.
17    orig_start: BytePos,
18    orig_end: BytePos,
19}
20
21impl<'a> StringInput<'a> {
22    /// `start` and `end` can be arbitrary value, but start should be less than
23    /// or equal to end.
24    ///
25    ///
26    /// `swc` get this value from [SourceMap] because code generator depends on
27    /// some methods of [SourceMap].
28    /// If you are not going to use methods from
29    /// [SourceMap], you may use any value.
30    pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
31        assert!(start <= end);
32
33        StringInput {
34            last_pos: start,
35            orig: src,
36            iter: src.chars(),
37            orig_start: start,
38            orig_end: end,
39        }
40    }
41
42    #[inline(always)]
43    pub fn as_str(&self) -> &str {
44        self.iter.as_str()
45    }
46
47    #[inline]
48    pub fn bump_bytes(&mut self, n: usize) {
49        unsafe {
50            // Safety: We only proceed, not go back.
51            self.reset_to(self.last_pos + BytePos(n as u32));
52        }
53    }
54
55    pub fn start_pos(&self) -> BytePos {
56        self.orig_start
57    }
58
59    pub fn end_pos(&self) -> BytePos {
60        self.orig_end
61    }
62}
63
64/// Creates an [Input] from [SourceFile]. This is an alias for
65///
66/// ```ignore
67///    StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
68/// ```
69impl<'a> From<&'a SourceFile> for StringInput<'a> {
70    fn from(fm: &'a SourceFile) -> Self {
71        StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
72    }
73}
74
75impl Input for StringInput<'_> {
76    #[inline]
77    fn cur(&mut self) -> Option<char> {
78        self.iter.clone().next()
79    }
80
81    #[inline]
82    fn peek(&mut self) -> Option<char> {
83        self.iter.clone().nth(1)
84    }
85
86    #[inline]
87    fn peek_ahead(&mut self) -> Option<char> {
88        self.iter.clone().nth(2)
89    }
90
91    #[inline]
92    unsafe fn bump(&mut self) {
93        if let Some(c) = self.iter.next() {
94            self.last_pos = self.last_pos + BytePos((c.len_utf8()) as u32);
95        } else {
96            unsafe {
97                debug_unreachable!("bump should not be called when cur() == None");
98            }
99        }
100    }
101
102    #[inline]
103    fn cur_as_ascii(&mut self) -> Option<u8> {
104        let first_byte = *self.as_str().as_bytes().first()?;
105        if first_byte <= 0x7f {
106            Some(first_byte)
107        } else {
108            None
109        }
110    }
111
112    #[inline]
113    fn is_at_start(&self) -> bool {
114        self.orig_start == self.last_pos
115    }
116
117    /// TODO(kdy1): Remove this?
118    #[inline]
119    fn cur_pos(&mut self) -> BytePos {
120        self.last_pos
121    }
122
123    #[inline]
124    fn last_pos(&self) -> BytePos {
125        self.last_pos
126    }
127
128    #[inline]
129    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str {
130        debug_assert!(start <= end, "Cannot slice {:?}..{:?}", start, end);
131        let s = self.orig;
132
133        let start_idx = (start - self.orig_start).0 as usize;
134        let end_idx = (end - self.orig_start).0 as usize;
135
136        debug_assert!(end_idx <= s.len());
137
138        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
139
140        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
141        self.last_pos = end;
142
143        ret
144    }
145
146    #[inline]
147    fn uncons_while<F>(&mut self, mut pred: F) -> &str
148    where
149        F: FnMut(char) -> bool,
150    {
151        let s = self.iter.as_str();
152        let mut last = 0;
153
154        for (i, c) in s.char_indices() {
155            if pred(c) {
156                last = i + c.len_utf8();
157            } else {
158                break;
159            }
160        }
161        debug_assert!(last <= s.len());
162        let ret = unsafe { s.get_unchecked(..last) };
163
164        self.last_pos = self.last_pos + BytePos(last as _);
165        self.iter = unsafe { s.get_unchecked(last..) }.chars();
166
167        ret
168    }
169
170    fn find<F>(&mut self, mut pred: F) -> Option<BytePos>
171    where
172        F: FnMut(char) -> bool,
173    {
174        let s = self.iter.as_str();
175        let mut last = 0;
176
177        for (i, c) in s.char_indices() {
178            if pred(c) {
179                last = i + c.len_utf8();
180                break;
181            }
182        }
183        if last == 0 {
184            return None;
185        }
186
187        debug_assert!(last <= s.len());
188
189        self.last_pos = self.last_pos + BytePos(last as _);
190        self.iter = unsafe { s.get_unchecked(last..) }.chars();
191
192        Some(self.last_pos)
193    }
194
195    #[inline]
196    unsafe fn reset_to(&mut self, to: BytePos) {
197        let orig = self.orig;
198        let idx = (to - self.orig_start).0 as usize;
199
200        debug_assert!(idx <= orig.len());
201        let s = unsafe { orig.get_unchecked(idx..) };
202        self.iter = s.chars();
203        self.last_pos = to;
204    }
205
206    #[inline]
207    fn is_byte(&mut self, c: u8) -> bool {
208        self.iter
209            .as_str()
210            .as_bytes()
211            .first()
212            .map(|b| *b == c)
213            .unwrap_or(false)
214    }
215
216    #[inline]
217    fn is_str(&self, s: &str) -> bool {
218        self.as_str().starts_with(s)
219    }
220
221    #[inline]
222    fn eat_byte(&mut self, c: u8) -> bool {
223        if self.is_byte(c) {
224            self.iter.next();
225            self.last_pos = self.last_pos + BytePos(1_u32);
226            true
227        } else {
228            false
229        }
230    }
231}
232
233pub trait Input: Clone {
234    fn cur(&mut self) -> Option<char>;
235    fn peek(&mut self) -> Option<char>;
236    fn peek_ahead(&mut self) -> Option<char>;
237
238    /// # Safety
239    ///
240    /// This should be called only when `cur()` returns `Some`. i.e.
241    /// when the Input is not empty.
242    unsafe fn bump(&mut self);
243
244    /// Returns [None] if it's end of input **or** current character is not an
245    /// ascii character.
246    #[inline]
247    fn cur_as_ascii(&mut self) -> Option<u8> {
248        self.cur().and_then(|i| {
249            if i.is_ascii() {
250                return Some(i as u8);
251            }
252            None
253        })
254    }
255
256    fn is_at_start(&self) -> bool;
257
258    fn cur_pos(&mut self) -> BytePos;
259
260    fn last_pos(&self) -> BytePos;
261
262    /// # Safety
263    ///
264    /// - start should be less than or equal to end.
265    /// - start and end should be in the valid range of input.
266    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str;
267
268    /// Takes items from stream, testing each one with predicate. returns the
269    /// range of items which passed predicate.
270    fn uncons_while<F>(&mut self, f: F) -> &str
271    where
272        F: FnMut(char) -> bool;
273
274    /// This method modifies [last_pos()] and [cur_pos()].
275    fn find<F>(&mut self, f: F) -> Option<BytePos>
276    where
277        F: FnMut(char) -> bool;
278
279    /// # Safety
280    ///
281    /// - `to` be in the valid range of input.
282    unsafe fn reset_to(&mut self, to: BytePos);
283
284    /// Implementors can override the method to make it faster.
285    ///
286    /// `c` must be ASCII.
287    #[inline]
288    #[allow(clippy::wrong_self_convention)]
289    fn is_byte(&mut self, c: u8) -> bool {
290        match self.cur() {
291            Some(ch) => ch == c as char,
292            _ => false,
293        }
294    }
295
296    /// Implementors can override the method to make it faster.
297    ///
298    /// `s` must be ASCII only.
299    fn is_str(&self, s: &str) -> bool;
300
301    /// Implementors can override the method to make it faster.
302    ///
303    /// `c` must be ASCII.
304    #[inline]
305    fn eat_byte(&mut self, c: u8) -> bool {
306        if self.is_byte(c) {
307            unsafe {
308                // Safety: We are sure that the input is not empty
309                self.bump();
310            }
311            true
312        } else {
313            false
314        }
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use std::sync::Arc;
321
322    use super::*;
323    use crate::{FileName, FilePathMapping, SourceMap};
324
325    fn with_test_sess<F>(src: &str, f: F)
326    where
327        F: FnOnce(StringInput<'_>),
328    {
329        let cm = Arc::new(SourceMap::new(FilePathMapping::empty()));
330        let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src.into());
331
332        f((&*fm).into())
333    }
334
335    #[test]
336    fn src_input_slice_1() {
337        with_test_sess("foo/d", |mut i| {
338            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
339            assert_eq!(i.last_pos, BytePos(2));
340            assert_eq!(i.cur(), Some('o'));
341
342            assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
343            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
344            assert_eq!(i.last_pos, BytePos(4));
345            assert_eq!(i.cur(), Some('/'));
346        });
347    }
348
349    #[test]
350    fn src_input_reset_to_1() {
351        with_test_sess("load", |mut i| {
352            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
353            assert_eq!(i.last_pos, BytePos(3));
354            assert_eq!(i.cur(), Some('a'));
355            unsafe { i.reset_to(BytePos(1)) };
356
357            assert_eq!(i.cur(), Some('l'));
358            assert_eq!(i.last_pos, BytePos(1));
359        });
360    }
361
362    #[test]
363    fn src_input_smoke_01() {
364        with_test_sess("foo/d", |mut i| {
365            assert_eq!(i.cur_pos(), BytePos(1));
366            assert_eq!(i.last_pos, BytePos(1));
367            assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
368
369            // assert_eq!(i.cur_pos(), BytePos(4));
370            assert_eq!(i.last_pos, BytePos(4));
371            assert_eq!(i.cur(), Some('/'));
372
373            unsafe {
374                i.bump();
375            }
376            assert_eq!(i.last_pos, BytePos(5));
377            assert_eq!(i.cur(), Some('d'));
378
379            unsafe {
380                i.bump();
381            }
382            assert_eq!(i.last_pos, BytePos(6));
383            assert_eq!(i.cur(), None);
384        });
385    }
386
387    #[test]
388    fn src_input_find_01() {
389        with_test_sess("foo/d", |mut i| {
390            assert_eq!(i.cur_pos(), BytePos(1));
391            assert_eq!(i.last_pos, BytePos(1));
392
393            assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
394            assert_eq!(i.last_pos, BytePos(5));
395            assert_eq!(i.cur(), Some('d'));
396        });
397    }
398
399    //    #[test]
400    //    fn src_input_smoke_02() {
401    //        let _ = crate::with_test_sess("℘℘/℘℘", | mut i| {
402    //            assert_eq!(i.iter.as_str(), "℘℘/℘℘");
403    //            assert_eq!(i.cur_pos(), BytePos(0));
404    //            assert_eq!(i.last_pos, BytePos(0));
405    //            assert_eq!(i.start_pos, BytePos(0));
406    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
407    //
408    //            assert_eq!(i.iter.as_str(), "/℘℘");
409    //            assert_eq!(i.last_pos, BytePos(6));
410    //            assert_eq!(i.start_pos, BytePos(6));
411    //            assert_eq!(i.cur(), Some('/'));
412    //            i.bump();
413    //            assert_eq!(i.last_pos, BytePos(7));
414    //            assert_eq!(i.start_pos, BytePos(6));
415    //
416    //            assert_eq!(i.iter.as_str(), "℘℘");
417    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
418    //            assert_eq!(i.last_pos, BytePos(13));
419    //            assert_eq!(i.start_pos, BytePos(13));
420    //
421    //            Ok(())
422    //        });
423    //    }
424}