sourcemap/js_identifiers.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
use unicode_id::UnicodeID;
/// Returns true if `c` is a valid character for an identifier start.
fn is_valid_start(c: char) -> bool {
c == '$' || c == '_' || c.is_ascii_alphabetic() || {
if c.is_ascii() {
false
} else {
UnicodeID::is_id_start(c)
}
}
}
/// Returns true if `c` is a valid character for an identifier part after start.
fn is_valid_continue(c: char) -> bool {
// As specified by the ECMA-262 spec, U+200C (ZERO WIDTH NON-JOINER) and U+200D
// (ZERO WIDTH JOINER) are format-control characters that are used to make necessary
// distinctions when forming words or phrases in certain languages. They are however
// not considered by UnicodeID to be universally valid identifier characters.
c == '$' || c == '_' || c == '\u{200c}' || c == '\u{200d}' || c.is_ascii_alphanumeric() || {
if c.is_ascii() {
false
} else {
UnicodeID::is_id_continue(c)
}
}
}
fn strip_identifier(s: &str) -> Option<&str> {
let mut iter = s.char_indices();
// Is the first character a valid starting character
match iter.next() {
Some((_, c)) => {
if !is_valid_start(c) {
return None;
}
}
None => {
return None;
}
};
// Slice up to the last valid continuation character
let mut end_idx = 0;
for (i, c) in iter {
if is_valid_continue(c) {
end_idx = i;
} else {
break;
}
}
Some(&s[..=end_idx])
}
pub fn is_valid_javascript_identifier(s: &str) -> bool {
// check stripping does not reduce the length of the token
strip_identifier(s).map_or(0, |t| t.len()) == s.len()
}
/// Finds the first valid identifier in the JS Source string given, provided
/// the string begins with the identifier or whitespace.
pub fn get_javascript_token(source_line: &str) -> Option<&str> {
match source_line.split_whitespace().next() {
Some(s) => strip_identifier(s),
None => None,
}
}
#[test]
fn test_is_valid_javascript_identifier() {
// assert_eq!(is_valid_javascript_identifier("foo 123"));
assert!(is_valid_javascript_identifier("foo_$123"));
assert!(!is_valid_javascript_identifier(" foo"));
assert!(!is_valid_javascript_identifier("foo "));
assert!(!is_valid_javascript_identifier("[123]"));
assert!(!is_valid_javascript_identifier("foo.bar"));
// Should these pass?
// assert!(is_valid_javascript_identifier("foo [bar]"));
// assert!(is_valid_javascript_identifier("foo[bar]"));
assert_eq!(get_javascript_token("foo "), Some("foo"));
assert_eq!(get_javascript_token("f _hi"), Some("f"));
assert_eq!(get_javascript_token("foo.bar"), Some("foo"));
assert_eq!(get_javascript_token("[foo,bar]"), None);
}