swc_sourcemap/
decoder.rs

1use std::io;
2use std::io::{BufReader, Read};
3
4use bitvec::field::BitField;
5use bitvec::order::Lsb0;
6use bitvec::vec::BitVec;
7use serde_json::Value;
8
9use crate::errors::{Error, Result};
10use crate::hermes::decode_hermes;
11use crate::jsontypes::RawSourceMap;
12use crate::types::{DecodedMap, RawToken, SourceMap, SourceMapIndex, SourceMapSection};
13use crate::vlq::parse_vlq_segment_into;
14
15const DATA_PREAMBLE: &str = "data:application/json;base64,";
16
17#[derive(PartialEq, Eq)]
18enum HeaderState {
19    Undecided,
20    Junk,
21    AwaitingNewline,
22    PastHeader,
23}
24
25pub struct StripHeaderReader<R: Read> {
26    r: R,
27    header_state: HeaderState,
28}
29
30impl<R: Read> StripHeaderReader<R> {
31    pub fn new(reader: R) -> StripHeaderReader<R> {
32        StripHeaderReader {
33            r: reader,
34            header_state: HeaderState::Undecided,
35        }
36    }
37}
38
39fn is_junk_json(byte: u8) -> bool {
40    byte == b')' || byte == b']' || byte == b'}' || byte == b'\''
41}
42
43impl<R: Read> Read for StripHeaderReader<R> {
44    #[inline(always)]
45    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
46        if self.header_state == HeaderState::PastHeader {
47            return self.r.read(buf);
48        }
49        self.strip_head_read(buf)
50    }
51}
52
53impl<R: Read> StripHeaderReader<R> {
54    fn strip_head_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
55        let mut backing = vec![0; buf.len()];
56        let local_buf: &mut [u8] = &mut backing;
57
58        loop {
59            let read = self.r.read(local_buf)?;
60            if read == 0 {
61                return Ok(0);
62            }
63            for (offset, &byte) in local_buf[0..read].iter().enumerate() {
64                self.header_state = match self.header_state {
65                    HeaderState::Undecided => {
66                        if is_junk_json(byte) {
67                            HeaderState::Junk
68                        } else {
69                            buf[..read].copy_from_slice(&local_buf[..read]);
70                            self.header_state = HeaderState::PastHeader;
71                            return Ok(read);
72                        }
73                    }
74                    HeaderState::Junk => {
75                        if byte == b'\r' {
76                            HeaderState::AwaitingNewline
77                        } else if byte == b'\n' {
78                            HeaderState::PastHeader
79                        } else {
80                            HeaderState::Junk
81                        }
82                    }
83                    HeaderState::AwaitingNewline => {
84                        if byte == b'\n' {
85                            HeaderState::PastHeader
86                        } else {
87                            Err(io::Error::new(
88                                io::ErrorKind::InvalidData,
89                                "expected newline",
90                            ))?
91                        }
92                    }
93                    HeaderState::PastHeader => {
94                        let rem = read - offset;
95                        buf[..rem].copy_from_slice(&local_buf[offset..read]);
96                        return Ok(rem);
97                    }
98                };
99            }
100        }
101    }
102}
103
104pub fn strip_junk_header(slice: &[u8]) -> io::Result<&[u8]> {
105    if slice.is_empty() || !is_junk_json(slice[0]) {
106        return Ok(slice);
107    }
108    let mut need_newline = false;
109    for (idx, &byte) in slice.iter().enumerate() {
110        if need_newline && byte != b'\n' {
111            Err(io::Error::new(
112                io::ErrorKind::InvalidData,
113                "expected newline",
114            ))?
115        } else if is_junk_json(byte) {
116            continue;
117        } else if byte == b'\r' {
118            need_newline = true;
119        } else if byte == b'\n' {
120            return Ok(&slice[idx..]);
121        }
122    }
123    Ok(&slice[slice.len()..])
124}
125
126/// Decodes range mappping bitfield string into index
127pub(crate) fn decode_rmi(rmi_str: &str, val: &mut BitVec<u8, Lsb0>) -> Result<()> {
128    val.clear();
129    val.resize(rmi_str.len() * 6, false);
130
131    for (idx, &byte) in rmi_str.as_bytes().iter().enumerate() {
132        let byte = match byte {
133            b'A'..=b'Z' => byte - b'A',
134            b'a'..=b'z' => byte - b'a' + 26,
135            b'0'..=b'9' => byte - b'0' + 52,
136            b'+' => 62,
137            b'/' => 63,
138            _ => {
139                return Err(Error::InvalidBase64(byte as char));
140            }
141        };
142
143        val[6 * idx..6 * (idx + 1)].store_le::<u8>(byte);
144    }
145
146    Ok(())
147}
148
149pub fn decode_regular(rsm: RawSourceMap) -> Result<SourceMap> {
150    let mut dst_col;
151
152    // Source IDs, lines, columns, and names are "running" values.
153    // Each token (except the first) contains the delta from the previous value.
154    let mut running_src_id = 0;
155    let mut running_src_line = 0;
156    let mut running_src_col = 0;
157    let mut running_name_id = 0;
158
159    let names = rsm.names.unwrap_or_default();
160    let sources = rsm.sources.unwrap_or_default();
161    let range_mappings = rsm.range_mappings.unwrap_or_default();
162    let mappings = rsm.mappings.unwrap_or_default();
163    let allocation_size = mappings.matches(&[',', ';'][..]).count() + 10;
164    let mut tokens = Vec::with_capacity(allocation_size);
165
166    let mut nums = Vec::with_capacity(6);
167    let mut rmi = BitVec::new();
168
169    for (dst_line, (line, rmi_str)) in mappings
170        .split(';')
171        .zip(range_mappings.split(';').chain(std::iter::repeat("")))
172        .enumerate()
173    {
174        if line.is_empty() {
175            continue;
176        }
177
178        dst_col = 0;
179
180        decode_rmi(rmi_str, &mut rmi)?;
181
182        for (line_index, segment) in line.split(',').enumerate() {
183            if segment.is_empty() {
184                continue;
185            }
186
187            nums.clear();
188            parse_vlq_segment_into(segment, &mut nums)?;
189            match nums.len() {
190                1 | 4 | 5 => {}
191                _ => return Err(Error::BadSegmentSize(nums.len() as u32)),
192            }
193
194            dst_col = (i64::from(dst_col) + nums[0]) as u32;
195
196            // The source file , source line, source column, and name
197            // may not be present in the current token. We use `u32::MAX`
198            // as the placeholder for missing values.
199            let mut current_src_id = !0;
200            let mut current_src_line = !0;
201            let mut current_src_col = !0;
202            let mut current_name_id = !0;
203
204            if nums.len() > 1 {
205                running_src_id = (i64::from(running_src_id) + nums[1]) as u32;
206
207                if running_src_id >= sources.len() as u32 {
208                    return Err(Error::BadSourceReference(running_src_id));
209                }
210
211                running_src_line = (i64::from(running_src_line) + nums[2]) as u32;
212                running_src_col = (i64::from(running_src_col) + nums[3]) as u32;
213
214                current_src_id = running_src_id;
215                current_src_line = running_src_line;
216                current_src_col = running_src_col;
217
218                if nums.len() > 4 {
219                    running_name_id = (i64::from(running_name_id) + nums[4]) as u32;
220                    if running_name_id >= names.len() as u32 {
221                        return Err(Error::BadNameReference(running_name_id));
222                    }
223                    current_name_id = running_name_id;
224                }
225            }
226
227            let is_range = rmi.get(line_index).map(|v| *v).unwrap_or_default();
228
229            tokens.push(RawToken {
230                dst_line: dst_line as u32,
231                dst_col,
232                src_line: current_src_line,
233                src_col: current_src_col,
234                src_id: current_src_id,
235                name_id: current_name_id,
236                is_range,
237            });
238        }
239    }
240
241    let sources = sources
242        .into_iter()
243        .map(Option::unwrap_or_default)
244        .map(Into::into)
245        .collect();
246
247    // apparently we can encounter some non string types in real world
248    // sourcemaps :(
249    let names = names
250        .into_iter()
251        .map(|val| match val {
252            Value::String(s) => s.into(),
253            Value::Number(num) => num.to_string().into(),
254            _ => "".into(),
255        })
256        .collect::<Vec<_>>();
257
258    // file sometimes is not a string for unexplicable reasons
259    let file = rsm.file.map(|val| match val {
260        Value::String(s) => s.into(),
261        _ => "<invalid>".into(),
262    });
263
264    let source_content = rsm
265        .sources_content
266        .map(|x| x.into_iter().map(|v| v.map(Into::into)).collect::<Vec<_>>());
267
268    let mut sm = SourceMap::new(file, tokens, names, sources, source_content);
269    sm.set_source_root(rsm.source_root);
270    // Use _debug_id_new (from "debugId" key) only if debug_id
271    // from ( "debug_id" key) is unset
272    sm.set_debug_id(rsm.debug_id.or(rsm._debug_id_new));
273    if let Some(ignore_list) = rsm.ignore_list {
274        for idx in ignore_list {
275            sm.add_to_ignore_list(idx);
276        }
277    }
278
279    Ok(sm)
280}
281
282fn decode_index(rsm: RawSourceMap) -> Result<SourceMapIndex> {
283    let mut sections = vec![];
284
285    for mut raw_section in rsm.sections.unwrap_or_default() {
286        sections.push(SourceMapSection::new(
287            (raw_section.offset.line, raw_section.offset.column),
288            raw_section.url,
289            match raw_section.map.take() {
290                Some(map) => Some(decode_common(*map)?),
291                None => None,
292            },
293        ));
294    }
295
296    sections.sort_by_key(SourceMapSection::get_offset);
297
298    // file sometimes is not a string for unexplicable reasons
299    let file = rsm.file.map(|val| match val {
300        Value::String(s) => s.into(),
301        _ => "<invalid>".into(),
302    });
303
304    Ok(SourceMapIndex::new_ram_bundle_compatible(
305        file,
306        sections,
307        rsm.x_facebook_offsets,
308        rsm.x_metro_module_paths,
309    )
310    .with_debug_id(rsm._debug_id_new.or(rsm.debug_id)))
311}
312
313fn decode_common(rsm: RawSourceMap) -> Result<DecodedMap> {
314    Ok(if rsm.sections.is_some() {
315        DecodedMap::Index(decode_index(rsm)?)
316    } else if rsm.x_facebook_sources.is_some() {
317        DecodedMap::Hermes(decode_hermes(rsm)?)
318    } else {
319        DecodedMap::Regular(decode_regular(rsm)?)
320    })
321}
322
323/// Decodes a sourcemap or sourcemap index from a reader
324///
325/// This supports both sourcemaps and sourcemap indexes unless the
326/// specialized methods on the individual types.
327pub fn decode<R: Read>(rdr: R) -> Result<DecodedMap> {
328    let mut rdr = StripHeaderReader::new(rdr);
329    let mut rdr = BufReader::new(&mut rdr);
330    let rsm: RawSourceMap = serde_json::from_reader(&mut rdr)?;
331    decode_common(rsm)
332}
333
334/// Decodes a sourcemap or sourcemap index from a byte slice
335///
336/// This supports both sourcemaps and sourcemap indexes unless the
337/// specialized methods on the individual types.
338pub fn decode_slice(slice: &[u8]) -> Result<DecodedMap> {
339    let content = strip_junk_header(slice)?;
340    let rsm: RawSourceMap = serde_json::from_slice(content)?;
341    decode_common(rsm)
342}
343
344/// Loads a sourcemap from a data URL
345pub fn decode_data_url(url: &str) -> Result<DecodedMap> {
346    if !url.starts_with(DATA_PREAMBLE) {
347        return Err(Error::InvalidDataUrl);
348    }
349    let data_b64 = &url[DATA_PREAMBLE.len()..];
350    let data = data_encoding::BASE64
351        .decode(data_b64.as_bytes())
352        .map_err(|_| Error::InvalidDataUrl)?;
353    decode_slice(&data[..])
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use std::io::{self, BufRead};
360
361    #[test]
362    fn test_strip_header() {
363        let input: &[_] = b")]}garbage\r\n[1, 2, 3]";
364        let mut reader = io::BufReader::new(StripHeaderReader::new(input));
365        let mut text = String::new();
366        reader.read_line(&mut text).ok();
367        assert_eq!(text, "[1, 2, 3]");
368    }
369
370    #[test]
371    fn test_bad_newline() {
372        let input: &[_] = b")]}'\r[1, 2, 3]";
373        let mut reader = io::BufReader::new(StripHeaderReader::new(input));
374        let mut text = String::new();
375        match reader.read_line(&mut text) {
376            Err(err) => {
377                assert_eq!(err.kind(), io::ErrorKind::InvalidData);
378            }
379            Ok(_) => {
380                panic!("Expected failure");
381            }
382        }
383    }
384
385    #[test]
386    fn test_decode_rmi() {
387        fn decode(rmi_str: &str) -> Vec<usize> {
388            let mut out = bitvec::bitvec![u8, Lsb0; 0; 0];
389            decode_rmi(rmi_str, &mut out).expect("failed to decode");
390
391            let mut res = vec![];
392            for (idx, bit) in out.iter().enumerate() {
393                if *bit {
394                    res.push(idx);
395                }
396            }
397            res
398        }
399
400        // This is 0-based index of the bits
401        assert_eq!(decode("AAB"), vec![12]);
402        assert_eq!(decode("g"), vec![5]);
403        assert_eq!(decode("Bg"), vec![0, 11]);
404    }
405
406    #[test]
407    fn test_decode_sourcemap_index_no_debug_id() {
408        let raw = RawSourceMap {
409            version: Some(3),
410            file: Some("test.js".into()),
411            sources: None,
412            source_root: None,
413            sources_content: None,
414            sections: Some(vec![]),
415            names: None,
416            range_mappings: None,
417            mappings: None,
418            ignore_list: None,
419            x_facebook_offsets: None,
420            x_metro_module_paths: None,
421            x_facebook_sources: None,
422            debug_id: None,
423            _debug_id_new: None,
424        };
425
426        let decoded = decode_common(raw).expect("should decoded");
427        assert_eq!(
428            decoded,
429            DecodedMap::Index(SourceMapIndex::new(Some("test.js".into()), vec![]))
430        );
431    }
432
433    #[test]
434    fn test_decode_sourcemap_index_debug_id() {
435        const DEBUG_ID: &str = "0123456789abcdef0123456789abcdef";
436
437        let raw = RawSourceMap {
438            version: Some(3),
439            file: Some("test.js".into()),
440            sources: None,
441            source_root: None,
442            sources_content: None,
443            sections: Some(vec![]),
444            names: None,
445            range_mappings: None,
446            mappings: None,
447            ignore_list: None,
448            x_facebook_offsets: None,
449            x_metro_module_paths: None,
450            x_facebook_sources: None,
451            debug_id: None,
452            _debug_id_new: Some(DEBUG_ID.parse().expect("valid debug id")),
453        };
454
455        let decoded = decode_common(raw).expect("should decode");
456        assert_eq!(
457            decoded,
458            DecodedMap::Index(
459                SourceMapIndex::new(Some("test.js".into()), vec![])
460                    .with_debug_id(Some(DEBUG_ID.parse().expect("valid debug id")))
461            )
462        );
463    }
464
465    #[test]
466    fn test_decode_sourcemap_index_debug_id_from_legacy_key() {
467        const DEBUG_ID: &str = "0123456789abcdef0123456789abcdef";
468
469        let raw = RawSourceMap {
470            version: Some(3),
471            file: Some("test.js".into()),
472            sources: None,
473            source_root: None,
474            sources_content: None,
475            sections: Some(vec![]),
476            names: None,
477            range_mappings: None,
478            mappings: None,
479            ignore_list: None,
480            x_facebook_offsets: None,
481            x_metro_module_paths: None,
482            x_facebook_sources: None,
483            debug_id: Some(DEBUG_ID.parse().expect("valid debug id")),
484            _debug_id_new: None,
485        };
486
487        let decoded = decode_common(raw).expect("should decode");
488        assert_eq!(
489            decoded,
490            DecodedMap::Index(
491                SourceMapIndex::new(Some("test.js".into()), vec![])
492                    .with_debug_id(Some(DEBUG_ID.parse().expect("valid debug id")))
493            )
494        );
495    }
496}