sourcemap/
decoder.rs

1use std::io;
2use std::io::{BufReader, Read};
3
4use bitvec::field::BitField;
5use bitvec::order::Lsb0;
6use bitvec::vec::BitVec;
7use serde_json::Value;
8
9use crate::errors::{Error, Result};
10use crate::hermes::decode_hermes;
11use crate::jsontypes::RawSourceMap;
12use crate::types::{DecodedMap, RawToken, SourceMap, SourceMapIndex, SourceMapSection};
13use crate::vlq::parse_vlq_segment_into;
14
15const DATA_PREAMBLE: &str = "data:application/json;base64,";
16
17#[derive(PartialEq, Eq)]
18enum HeaderState {
19    Undecided,
20    Junk,
21    AwaitingNewline,
22    PastHeader,
23}
24
25pub struct StripHeaderReader<R: Read> {
26    r: R,
27    header_state: HeaderState,
28}
29
30impl<R: Read> StripHeaderReader<R> {
31    pub fn new(reader: R) -> StripHeaderReader<R> {
32        StripHeaderReader {
33            r: reader,
34            header_state: HeaderState::Undecided,
35        }
36    }
37}
38
39fn is_junk_json(byte: u8) -> bool {
40    byte == b')' || byte == b']' || byte == b'}' || byte == b'\''
41}
42
43impl<R: Read> Read for StripHeaderReader<R> {
44    #[inline(always)]
45    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
46        if self.header_state == HeaderState::PastHeader {
47            return self.r.read(buf);
48        }
49        self.strip_head_read(buf)
50    }
51}
52
53impl<R: Read> StripHeaderReader<R> {
54    fn strip_head_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
55        let mut backing = vec![0; buf.len()];
56        let local_buf: &mut [u8] = &mut backing;
57
58        loop {
59            let read = self.r.read(local_buf)?;
60            if read == 0 {
61                return Ok(0);
62            }
63            for (offset, &byte) in local_buf[0..read].iter().enumerate() {
64                self.header_state = match self.header_state {
65                    HeaderState::Undecided => {
66                        if is_junk_json(byte) {
67                            HeaderState::Junk
68                        } else {
69                            buf[..read].copy_from_slice(&local_buf[..read]);
70                            self.header_state = HeaderState::PastHeader;
71                            return Ok(read);
72                        }
73                    }
74                    HeaderState::Junk => {
75                        if byte == b'\r' {
76                            HeaderState::AwaitingNewline
77                        } else if byte == b'\n' {
78                            HeaderState::PastHeader
79                        } else {
80                            HeaderState::Junk
81                        }
82                    }
83                    HeaderState::AwaitingNewline => {
84                        if byte == b'\n' {
85                            HeaderState::PastHeader
86                        } else {
87                            fail!(io::Error::new(
88                                io::ErrorKind::InvalidData,
89                                "expected newline"
90                            ));
91                        }
92                    }
93                    HeaderState::PastHeader => {
94                        let rem = read - offset;
95                        buf[..rem].copy_from_slice(&local_buf[offset..read]);
96                        return Ok(rem);
97                    }
98                };
99            }
100        }
101    }
102}
103
104pub fn strip_junk_header(slice: &[u8]) -> io::Result<&[u8]> {
105    if slice.is_empty() || !is_junk_json(slice[0]) {
106        return Ok(slice);
107    }
108    let mut need_newline = false;
109    for (idx, &byte) in slice.iter().enumerate() {
110        if need_newline && byte != b'\n' {
111            fail!(io::Error::new(
112                io::ErrorKind::InvalidData,
113                "expected newline"
114            ));
115        } else if is_junk_json(byte) {
116            continue;
117        } else if byte == b'\r' {
118            need_newline = true;
119        } else if byte == b'\n' {
120            return Ok(&slice[idx..]);
121        }
122    }
123    Ok(&slice[slice.len()..])
124}
125
126/// Decodes range mappping bitfield string into index
127fn decode_rmi(rmi_str: &str, val: &mut BitVec<u8, Lsb0>) -> Result<()> {
128    val.clear();
129    val.resize(rmi_str.len() * 6, false);
130
131    for (idx, &byte) in rmi_str.as_bytes().iter().enumerate() {
132        let byte = match byte {
133            b'A'..=b'Z' => byte - b'A',
134            b'a'..=b'z' => byte - b'a' + 26,
135            b'0'..=b'9' => byte - b'0' + 52,
136            b'+' => 62,
137            b'/' => 63,
138            _ => {
139                fail!(Error::InvalidBase64(byte as char));
140            }
141        };
142
143        val[6 * idx..6 * (idx + 1)].store_le::<u8>(byte);
144    }
145
146    Ok(())
147}
148
149pub fn decode_regular(rsm: RawSourceMap) -> Result<SourceMap> {
150    let mut dst_col;
151    let mut src_id = 0;
152    let mut src_line = 0;
153    let mut src_col = 0;
154    let mut name_id = 0;
155
156    let names = rsm.names.unwrap_or_default();
157    let sources = rsm.sources.unwrap_or_default();
158    let range_mappings = rsm.range_mappings.unwrap_or_default();
159    let mappings = rsm.mappings.unwrap_or_default();
160    let allocation_size = mappings.matches(&[',', ';'][..]).count() + 10;
161    let mut tokens = Vec::with_capacity(allocation_size);
162
163    let mut nums = Vec::with_capacity(6);
164    let mut rmi = BitVec::new();
165
166    for (dst_line, (line, rmi_str)) in mappings
167        .split(';')
168        .zip(range_mappings.split(';').chain(std::iter::repeat("")))
169        .enumerate()
170    {
171        if line.is_empty() {
172            continue;
173        }
174
175        dst_col = 0;
176
177        decode_rmi(rmi_str, &mut rmi)?;
178
179        for (line_index, segment) in line.split(',').enumerate() {
180            if segment.is_empty() {
181                continue;
182            }
183
184            nums.clear();
185            parse_vlq_segment_into(segment, &mut nums)?;
186            dst_col = (i64::from(dst_col) + nums[0]) as u32;
187
188            let mut src = !0;
189            let mut name = !0;
190
191            if nums.len() > 1 {
192                if nums.len() != 4 && nums.len() != 5 {
193                    fail!(Error::BadSegmentSize(nums.len() as u32));
194                }
195                src_id = (i64::from(src_id) + nums[1]) as u32;
196                if src_id >= sources.len() as u32 {
197                    fail!(Error::BadSourceReference(src_id));
198                }
199
200                src = src_id;
201                src_line = (i64::from(src_line) + nums[2]) as u32;
202                src_col = (i64::from(src_col) + nums[3]) as u32;
203
204                if nums.len() > 4 {
205                    name_id = (i64::from(name_id) + nums[4]) as u32;
206                    if name_id >= names.len() as u32 {
207                        fail!(Error::BadNameReference(name_id));
208                    }
209                    name = name_id;
210                }
211            }
212
213            let is_range = rmi.get(line_index).map(|v| *v).unwrap_or_default();
214
215            tokens.push(RawToken {
216                dst_line: dst_line as u32,
217                dst_col,
218                src_line,
219                src_col,
220                src_id: src,
221                name_id: name,
222                is_range,
223            });
224        }
225    }
226
227    let sources = sources
228        .into_iter()
229        .map(Option::unwrap_or_default)
230        .map(Into::into)
231        .collect();
232
233    // apparently we can encounter some non string types in real world
234    // sourcemaps :(
235    let names = names
236        .into_iter()
237        .map(|val| match val {
238            Value::String(s) => s.into(),
239            Value::Number(num) => num.to_string().into(),
240            _ => "".into(),
241        })
242        .collect::<Vec<_>>();
243
244    // file sometimes is not a string for unexplicable reasons
245    let file = rsm.file.map(|val| match val {
246        Value::String(s) => s.into(),
247        _ => "<invalid>".into(),
248    });
249
250    let source_content = rsm
251        .sources_content
252        .map(|x| x.into_iter().map(|v| v.map(Into::into)).collect::<Vec<_>>());
253
254    let mut sm = SourceMap::new(file, tokens, names, sources, source_content);
255    sm.set_source_root(rsm.source_root);
256    // Use _debug_id_new (from "debugId" key) only if debug_id
257    // from ( "debug_id" key) is unset
258    sm.set_debug_id(rsm.debug_id.or(rsm._debug_id_new));
259    if let Some(ignore_list) = rsm.ignore_list {
260        for idx in ignore_list {
261            sm.add_to_ignore_list(idx);
262        }
263    }
264
265    Ok(sm)
266}
267
268fn decode_index(rsm: RawSourceMap) -> Result<SourceMapIndex> {
269    let mut sections = vec![];
270
271    for mut raw_section in rsm.sections.unwrap_or_default() {
272        sections.push(SourceMapSection::new(
273            (raw_section.offset.line, raw_section.offset.column),
274            raw_section.url,
275            match raw_section.map.take() {
276                Some(map) => Some(decode_common(*map)?),
277                None => None,
278            },
279        ));
280    }
281
282    sections.sort_by_key(SourceMapSection::get_offset);
283
284    // file sometimes is not a string for unexplicable reasons
285    let file = rsm.file.map(|val| match val {
286        Value::String(s) => s,
287        _ => "<invalid>".into(),
288    });
289
290    Ok(SourceMapIndex::new_ram_bundle_compatible(
291        file,
292        sections,
293        rsm.x_facebook_offsets,
294        rsm.x_metro_module_paths,
295    )
296    .with_debug_id(rsm._debug_id_new.or(rsm.debug_id)))
297}
298
299fn decode_common(rsm: RawSourceMap) -> Result<DecodedMap> {
300    Ok(if rsm.sections.is_some() {
301        DecodedMap::Index(decode_index(rsm)?)
302    } else if rsm.x_facebook_sources.is_some() {
303        DecodedMap::Hermes(decode_hermes(rsm)?)
304    } else {
305        DecodedMap::Regular(decode_regular(rsm)?)
306    })
307}
308
309/// Decodes a sourcemap or sourcemap index from a reader
310///
311/// This supports both sourcemaps and sourcemap indexes unless the
312/// specialized methods on the individual types.
313pub fn decode<R: Read>(rdr: R) -> Result<DecodedMap> {
314    let mut rdr = StripHeaderReader::new(rdr);
315    let mut rdr = BufReader::new(&mut rdr);
316    let rsm: RawSourceMap = serde_json::from_reader(&mut rdr)?;
317    decode_common(rsm)
318}
319
320/// Decodes a sourcemap or sourcemap index from a byte slice
321///
322/// This supports both sourcemaps and sourcemap indexes unless the
323/// specialized methods on the individual types.
324pub fn decode_slice(slice: &[u8]) -> Result<DecodedMap> {
325    let content = strip_junk_header(slice)?;
326    let rsm: RawSourceMap = serde_json::from_slice(content)?;
327    decode_common(rsm)
328}
329
330/// Loads a sourcemap from a data URL
331pub fn decode_data_url(url: &str) -> Result<DecodedMap> {
332    if !url.starts_with(DATA_PREAMBLE) {
333        fail!(Error::InvalidDataUrl);
334    }
335    let data_b64 = &url[DATA_PREAMBLE.len()..];
336    let data = data_encoding::BASE64
337        .decode(data_b64.as_bytes())
338        .map_err(|_| Error::InvalidDataUrl)?;
339    decode_slice(&data[..])
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use std::io::{self, BufRead};
346
347    #[test]
348    fn test_strip_header() {
349        let input: &[_] = b")]}garbage\r\n[1, 2, 3]";
350        let mut reader = io::BufReader::new(StripHeaderReader::new(input));
351        let mut text = String::new();
352        reader.read_line(&mut text).ok();
353        assert_eq!(text, "[1, 2, 3]");
354    }
355
356    #[test]
357    fn test_bad_newline() {
358        let input: &[_] = b")]}'\r[1, 2, 3]";
359        let mut reader = io::BufReader::new(StripHeaderReader::new(input));
360        let mut text = String::new();
361        match reader.read_line(&mut text) {
362            Err(err) => {
363                assert_eq!(err.kind(), io::ErrorKind::InvalidData);
364            }
365            Ok(_) => {
366                panic!("Expected failure");
367            }
368        }
369    }
370
371    #[test]
372    fn test_decode_rmi() {
373        fn decode(rmi_str: &str) -> Vec<usize> {
374            let mut out = bitvec::bitvec![u8, Lsb0; 0; 0];
375            decode_rmi(rmi_str, &mut out).expect("failed to decode");
376
377            let mut res = vec![];
378            for (idx, bit) in out.iter().enumerate() {
379                if *bit {
380                    res.push(idx);
381                }
382            }
383            res
384        }
385
386        // This is 0-based index of the bits
387        assert_eq!(decode("AAB"), vec![12]);
388        assert_eq!(decode("g"), vec![5]);
389        assert_eq!(decode("Bg"), vec![0, 11]);
390    }
391
392    #[test]
393    fn test_decode_sourcemap_index_no_debug_id() {
394        let raw = RawSourceMap {
395            version: Some(3),
396            file: Some("test.js".into()),
397            sources: None,
398            source_root: None,
399            sources_content: None,
400            sections: Some(vec![]),
401            names: None,
402            range_mappings: None,
403            mappings: None,
404            ignore_list: None,
405            x_facebook_offsets: None,
406            x_metro_module_paths: None,
407            x_facebook_sources: None,
408            debug_id: None,
409            _debug_id_new: None,
410        };
411
412        let decoded = decode_common(raw).expect("should decoded");
413        assert_eq!(
414            decoded,
415            DecodedMap::Index(SourceMapIndex::new(Some("test.js".into()), vec![]))
416        );
417    }
418
419    #[test]
420    fn test_decode_sourcemap_index_debug_id() {
421        const DEBUG_ID: &str = "0123456789abcdef0123456789abcdef";
422
423        let raw = RawSourceMap {
424            version: Some(3),
425            file: Some("test.js".into()),
426            sources: None,
427            source_root: None,
428            sources_content: None,
429            sections: Some(vec![]),
430            names: None,
431            range_mappings: None,
432            mappings: None,
433            ignore_list: None,
434            x_facebook_offsets: None,
435            x_metro_module_paths: None,
436            x_facebook_sources: None,
437            debug_id: None,
438            _debug_id_new: Some(DEBUG_ID.parse().expect("valid debug id")),
439        };
440
441        let decoded = decode_common(raw).expect("should decode");
442        assert_eq!(
443            decoded,
444            DecodedMap::Index(
445                SourceMapIndex::new(Some("test.js".into()), vec![])
446                    .with_debug_id(Some(DEBUG_ID.parse().expect("valid debug id")))
447            )
448        );
449    }
450
451    #[test]
452    fn test_decode_sourcemap_index_debug_id_from_legacy_key() {
453        const DEBUG_ID: &str = "0123456789abcdef0123456789abcdef";
454
455        let raw = RawSourceMap {
456            version: Some(3),
457            file: Some("test.js".into()),
458            sources: None,
459            source_root: None,
460            sources_content: None,
461            sections: Some(vec![]),
462            names: None,
463            range_mappings: None,
464            mappings: None,
465            ignore_list: None,
466            x_facebook_offsets: None,
467            x_metro_module_paths: None,
468            x_facebook_sources: None,
469            debug_id: Some(DEBUG_ID.parse().expect("valid debug id")),
470            _debug_id_new: None,
471        };
472
473        let decoded = decode_common(raw).expect("should decode");
474        assert_eq!(
475            decoded,
476            DecodedMap::Index(
477                SourceMapIndex::new(Some("test.js".into()), vec![])
478                    .with_debug_id(Some(DEBUG_ID.parse().expect("valid debug id")))
479            )
480        );
481    }
482}