use std::str;
use debug_unreachable::debug_unreachable;
use crate::syntax_pos::{BytePos, SourceFile};
pub type SourceFileInput<'a> = StringInput<'a>;
#[derive(Clone)]
pub struct StringInput<'a> {
start_pos_of_iter: BytePos,
last_pos: BytePos,
iter: str::CharIndices<'a>,
orig: &'a str,
orig_start: BytePos,
}
impl<'a> StringInput<'a> {
pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
assert!(start <= end);
StringInput {
start_pos_of_iter: start,
last_pos: start,
orig: src,
iter: src.char_indices(),
orig_start: start,
}
}
#[inline(always)]
pub fn as_str(&self) -> &str {
self.iter.as_str()
}
#[inline]
pub fn bump_bytes(&mut self, n: usize) {
self.reset_to(self.last_pos + BytePos(n as u32));
}
}
impl<'a> From<&'a SourceFile> for StringInput<'a> {
fn from(fm: &'a SourceFile) -> Self {
StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
}
}
impl<'a> Input for StringInput<'a> {
#[inline]
fn cur(&mut self) -> Option<char> {
self.iter.clone().next().map(|i| i.1)
}
#[inline]
fn peek(&mut self) -> Option<char> {
self.iter.clone().nth(1).map(|i| i.1)
}
#[inline]
fn peek_ahead(&mut self) -> Option<char> {
self.iter.clone().nth(2).map(|i| i.1)
}
#[inline]
fn bump(&mut self) {
if let Some((i, c)) = self.iter.next() {
self.last_pos = self.start_pos_of_iter + BytePos((i + c.len_utf8()) as u32);
} else {
unsafe {
debug_unreachable!("bump should not be called when cur() == None");
}
}
}
#[inline]
fn cur_as_ascii(&mut self) -> Option<u8> {
let first_byte = *self.as_str().as_bytes().first()?;
if first_byte <= 0x7f {
Some(first_byte)
} else {
None
}
}
#[inline]
fn is_at_start(&self) -> bool {
self.orig_start == self.last_pos
}
#[inline]
fn cur_pos(&mut self) -> BytePos {
self.last_pos
}
#[inline]
fn last_pos(&self) -> BytePos {
self.last_pos
}
#[inline]
fn slice(&mut self, start: BytePos, end: BytePos) -> &str {
debug_assert!(start <= end, "Cannot slice {:?}..{:?}", start, end);
let s = self.orig;
let start_idx = (start - self.orig_start).0 as usize;
let end_idx = (end - self.orig_start).0 as usize;
debug_assert!(end_idx <= s.len());
let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
self.iter = unsafe { s.get_unchecked(end_idx..) }.char_indices();
self.last_pos = end;
self.start_pos_of_iter = end;
ret
}
fn uncons_while<F>(&mut self, mut pred: F) -> &str
where
F: FnMut(char) -> bool,
{
let s = self.iter.as_str();
let mut last = 0;
for (i, c) in s.char_indices() {
if pred(c) {
last = i + c.len_utf8();
} else {
break;
}
}
debug_assert!(last <= s.len());
let ret = unsafe { s.get_unchecked(..last) };
self.last_pos = self.last_pos + BytePos(last as _);
self.start_pos_of_iter = self.last_pos;
self.iter = unsafe { s.get_unchecked(last..) }.char_indices();
ret
}
fn find<F>(&mut self, mut pred: F) -> Option<BytePos>
where
F: FnMut(char) -> bool,
{
let s = self.iter.as_str();
let mut last = 0;
for (i, c) in s.char_indices() {
if pred(c) {
last = i + c.len_utf8();
break;
}
}
if last == 0 {
return None;
}
debug_assert!(last <= s.len());
self.last_pos = self.last_pos + BytePos(last as _);
self.start_pos_of_iter = self.last_pos;
self.iter = unsafe { s.get_unchecked(last..) }.char_indices();
Some(self.last_pos)
}
#[inline]
fn reset_to(&mut self, to: BytePos) {
let orig = self.orig;
let idx = (to - self.orig_start).0 as usize;
debug_assert!(idx <= orig.len());
let s = unsafe { orig.get_unchecked(idx..) };
self.iter = s.char_indices();
self.start_pos_of_iter = to;
self.last_pos = to;
}
#[inline]
fn is_byte(&mut self, c: u8) -> bool {
if self.iter.as_str().is_empty() {
false
} else {
unsafe { *self.iter.as_str().as_bytes().get_unchecked(0) == c }
}
}
#[inline]
fn is_str(&self, s: &str) -> bool {
self.as_str().starts_with(s)
}
#[inline]
fn eat_byte(&mut self, c: u8) -> bool {
if self.is_byte(c) {
if let Some((i, _)) = self.iter.next() {
self.last_pos = self.start_pos_of_iter + BytePos((i + 1) as u32);
} else {
unsafe {
debug_unreachable!(
"We can't enter here as we already checked the state using `is_byte`"
)
}
}
true
} else {
false
}
}
}
pub trait Input: Clone {
fn cur(&mut self) -> Option<char>;
fn peek(&mut self) -> Option<char>;
fn peek_ahead(&mut self) -> Option<char>;
fn bump(&mut self);
#[inline]
fn cur_as_ascii(&mut self) -> Option<u8> {
self.cur().and_then(|i| {
if i.is_ascii() {
return Some(i as u8);
}
None
})
}
fn is_at_start(&self) -> bool;
fn cur_pos(&mut self) -> BytePos;
fn last_pos(&self) -> BytePos;
fn slice(&mut self, start: BytePos, end: BytePos) -> &str;
fn uncons_while<F>(&mut self, f: F) -> &str
where
F: FnMut(char) -> bool;
fn find<F>(&mut self, f: F) -> Option<BytePos>
where
F: FnMut(char) -> bool;
fn reset_to(&mut self, to: BytePos);
#[inline]
#[allow(clippy::wrong_self_convention)]
fn is_byte(&mut self, c: u8) -> bool {
match self.cur() {
Some(ch) => ch == c as char,
_ => false,
}
}
fn is_str(&self, s: &str) -> bool;
#[inline]
fn eat_byte(&mut self, c: u8) -> bool {
if self.is_byte(c) {
self.bump();
true
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::{FileName, FilePathMapping, SourceMap};
fn with_test_sess<F>(src: &str, f: F)
where
F: FnOnce(StringInput<'_>),
{
let cm = Arc::new(SourceMap::new(FilePathMapping::empty()));
let fm = cm.new_source_file(FileName::Real("testing".into()), src.into());
f((&*fm).into())
}
#[test]
fn src_input_slice_1() {
with_test_sess("foo/d", |mut i| {
assert_eq!(i.slice(BytePos(1), BytePos(2)), "f");
assert_eq!(i.last_pos, BytePos(2));
assert_eq!(i.start_pos_of_iter, BytePos(2));
assert_eq!(i.cur(), Some('o'));
assert_eq!(i.slice(BytePos(2), BytePos(4)), "oo");
assert_eq!(i.slice(BytePos(1), BytePos(4)), "foo");
assert_eq!(i.last_pos, BytePos(4));
assert_eq!(i.start_pos_of_iter, BytePos(4));
assert_eq!(i.cur(), Some('/'));
});
}
#[test]
fn src_input_reset_to_1() {
with_test_sess("load", |mut i| {
assert_eq!(i.slice(BytePos(1), BytePos(3)), "lo");
assert_eq!(i.last_pos, BytePos(3));
assert_eq!(i.start_pos_of_iter, BytePos(3));
assert_eq!(i.cur(), Some('a'));
i.reset_to(BytePos(1));
assert_eq!(i.cur(), Some('l'));
assert_eq!(i.last_pos, BytePos(1));
assert_eq!(i.start_pos_of_iter, BytePos(1));
});
}
#[test]
fn src_input_smoke_01() {
with_test_sess("foo/d", |mut i| {
assert_eq!(i.cur_pos(), BytePos(1));
assert_eq!(i.last_pos, BytePos(1));
assert_eq!(i.start_pos_of_iter, BytePos(1));
assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
assert_eq!(i.last_pos, BytePos(4));
assert_eq!(i.start_pos_of_iter, BytePos(4));
assert_eq!(i.cur(), Some('/'));
i.bump();
assert_eq!(i.last_pos, BytePos(5));
assert_eq!(i.cur(), Some('d'));
i.bump();
assert_eq!(i.last_pos, BytePos(6));
assert_eq!(i.cur(), None);
});
}
#[test]
fn src_input_find_01() {
with_test_sess("foo/d", |mut i| {
assert_eq!(i.cur_pos(), BytePos(1));
assert_eq!(i.last_pos, BytePos(1));
assert_eq!(i.start_pos_of_iter, BytePos(1));
assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
assert_eq!(i.start_pos_of_iter, BytePos(5));
assert_eq!(i.last_pos, BytePos(5));
assert_eq!(i.cur(), Some('d'));
});
}
}