swc_sourcemap/lazy/
mod.rs

1//! This is _lazy_ because we skip deserializing all the fields that we don't need. (unlike the original crate)
2
3use crate::{
4    decoder::{decode_rmi, strip_junk_header},
5    encoder::{encode_rmi, encode_vlq_diff},
6    types::adjust_mappings,
7    vlq::parse_vlq_segment_into,
8    Error, RawToken, Result,
9};
10use std::{
11    borrow::Cow,
12    collections::{BTreeSet, HashMap},
13};
14
15use bitvec::{order::Lsb0, vec::BitVec, view::BitView};
16use bytes_str::BytesStr;
17use serde::{Deserialize, Deserializer, Serialize};
18use serde_json::value::RawValue;
19
20#[derive(Serialize, Deserialize, Debug)]
21#[serde(rename_all = "camelCase")]
22pub struct RawSourceMap<'a> {
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub(crate) version: Option<u32>,
25    #[serde(default, borrow, skip_serializing_if = "Option::is_none")]
26    pub(crate) file: Option<MaybeRawValue<'a, Str>>,
27    #[serde(borrow)]
28    pub(crate) sources: MaybeRawValue<'a, Vec<StrValue<'a>>>,
29    #[serde(default, borrow, skip_serializing_if = "Option::is_none")]
30    pub(crate) source_root: Option<StrValue<'a>>,
31    #[serde(default, borrow, skip_serializing_if = "MaybeRawValue::is_empty")]
32    pub(crate) sources_content: MaybeRawValue<'a, Vec<Option<StrValue<'a>>>>,
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub(crate) sections: Option<Vec<RawSection<'a>>>,
35    #[serde(default, borrow)]
36    pub(crate) names: MaybeRawValue<'a, Vec<StrValue<'a>>>,
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub(crate) range_mappings: Option<String>,
39    #[serde(default, skip_serializing_if = "Option::is_none")]
40    pub(crate) mappings: Option<String>,
41    #[serde(default, borrow, skip_serializing_if = "Option::is_none")]
42    pub(crate) ignore_list: Option<MaybeRawValue<'a, BTreeSet<u32>>>,
43}
44
45#[derive(Serialize, Deserialize, PartialEq, Debug, Clone, Copy)]
46pub struct RawSectionOffset {
47    pub line: u32,
48    pub column: u32,
49}
50
51#[derive(Serialize, Deserialize, Debug)]
52pub struct RawSection<'a> {
53    pub offset: RawSectionOffset,
54    #[serde(borrow)]
55    pub url: Option<&'a RawValue>,
56    #[serde(borrow)]
57    pub map: Option<&'a RawValue>,
58}
59
60#[derive(Debug)]
61pub enum DecodedMap<'a> {
62    Regular(SourceMap<'a>),
63    Index(SourceMapIndex<'a>),
64}
65
66impl<'a> DecodedMap<'a> {
67    pub fn into_source_map(self) -> Result<SourceMap<'a>> {
68        match self {
69            DecodedMap::Regular(source_map) => Ok(source_map),
70            DecodedMap::Index(source_map_index) => source_map_index.flatten(),
71        }
72    }
73}
74
75#[derive(Debug, Clone, Copy, Serialize)]
76#[serde(untagged)]
77pub(crate) enum MaybeRawValue<'a, T> {
78    RawValue(#[serde(borrow)] &'a RawValue),
79    Data(T),
80}
81
82impl<T> MaybeRawValue<'_, Vec<T>> {
83    pub fn is_empty(&self) -> bool {
84        match self {
85            MaybeRawValue::Data(vec) => vec.is_empty(),
86            MaybeRawValue::RawValue(_) => false,
87        }
88    }
89}
90
91impl<'a, 'de, T> Deserialize<'de> for MaybeRawValue<'a, T>
92where
93    'de: 'a,
94    T: Deserialize<'de>,
95{
96    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
97    where
98        D: Deserializer<'de>,
99    {
100        let raw: &'de RawValue = Deserialize::deserialize(deserializer)?;
101        Ok(MaybeRawValue::RawValue(raw))
102    }
103}
104
105impl<'a, T> MaybeRawValue<'a, T>
106where
107    T: Deserialize<'a>,
108{
109    pub fn into_data(self) -> T {
110        match self {
111            MaybeRawValue::RawValue(s) => {
112                serde_json::from_str(s.get()).expect("Failed to convert RawValue to Data")
113            }
114            MaybeRawValue::Data(data) => data,
115        }
116    }
117
118    fn assert_raw_value(self) -> &'a RawValue {
119        match self {
120            MaybeRawValue::RawValue(s) => s,
121            MaybeRawValue::Data(_) => unreachable!("Expected RawValue, got Data"),
122        }
123    }
124}
125
126impl<T> Default for MaybeRawValue<'_, T>
127where
128    T: Default,
129{
130    fn default() -> Self {
131        MaybeRawValue::Data(T::default())
132    }
133}
134
135impl<'a, T> MaybeRawValue<'a, T>
136where
137    T: Deserialize<'a>,
138    T: Default,
139{
140    pub fn as_data(&mut self) -> &mut T {
141        match self {
142            MaybeRawValue::RawValue(s) => {
143                *self = MaybeRawValue::Data(
144                    serde_json::from_str(s.get()).expect("Failed to convert RawValue to Data"),
145                );
146                if let MaybeRawValue::Data(data) = self {
147                    data
148                } else {
149                    unreachable!()
150                }
151            }
152            MaybeRawValue::Data(data) => data,
153        }
154    }
155}
156
157type Str = BytesStr;
158
159type StrValue<'a> = MaybeRawValue<'a, Str>;
160
161#[derive(Debug)]
162pub struct SourceMap<'a> {
163    pub(crate) file: Option<StrValue<'a>>,
164    pub(crate) tokens: Vec<RawToken>,
165    pub(crate) names: MaybeRawValue<'a, Vec<StrValue<'a>>>,
166    pub(crate) source_root: Option<StrValue<'a>>,
167    pub(crate) sources: MaybeRawValue<'a, Vec<StrValue<'a>>>,
168    pub(crate) sources_content: MaybeRawValue<'a, Vec<Option<StrValue<'a>>>>,
169    pub(crate) ignore_list: Option<MaybeRawValue<'a, BTreeSet<u32>>>,
170}
171
172#[derive(Debug)]
173pub(crate) struct SourceMapBuilder<'a> {
174    file: Option<StrValue<'a>>,
175    name_map: HashMap<&'a str, u32>,
176    names: Vec<StrValue<'a>>,
177    tokens: Vec<RawToken>,
178    source_map: HashMap<&'a str, u32>,
179    sources: Vec<StrValue<'a>>,
180    source_contents: Vec<Option<StrValue<'a>>>,
181    source_root: Option<StrValue<'a>>,
182    ignore_list: Option<BTreeSet<u32>>,
183}
184
185impl<'a> SourceMapBuilder<'a> {
186    pub fn new(file: Option<StrValue<'a>>) -> Self {
187        SourceMapBuilder {
188            file,
189            name_map: HashMap::new(),
190            names: Vec::new(),
191            tokens: Vec::new(),
192            source_map: HashMap::new(),
193            sources: Vec::new(),
194            source_contents: Vec::new(),
195            source_root: None,
196            ignore_list: None,
197        }
198    }
199
200    pub fn add_source(&mut self, src_raw: &'a RawValue) -> u32 {
201        let src_str = src_raw.get(); // RawValue provides get() -> &str
202        let count = self.sources.len() as u32;
203        let id = *self.source_map.entry(src_str).or_insert(count);
204        if id == count {
205            // New source
206            self.sources.push(MaybeRawValue::RawValue(src_raw));
207            // Ensure source_contents has a corresponding entry, defaulting to None.
208            // This logic ensures source_contents is always same length as sources if new one added.
209            self.source_contents.resize(self.sources.len(), None);
210        }
211        id
212    }
213
214    pub fn add_name(&mut self, name_raw: &'a RawValue) -> u32 {
215        let name_str = name_raw.get();
216        let count = self.names.len() as u32;
217        let id = *self.name_map.entry(name_str).or_insert(count);
218        if id == count {
219            // New name
220            self.names.push(MaybeRawValue::RawValue(name_raw));
221        }
222        id
223    }
224
225    pub fn set_source_contents(&mut self, src_id: u32, contents: Option<&'a RawValue>) {
226        // Ensure source_contents is large enough. src_id is 0-indexed.
227        if (src_id as usize) >= self.source_contents.len() {
228            self.source_contents.resize(src_id as usize + 1, None);
229        }
230        self.source_contents[src_id as usize] = contents.map(MaybeRawValue::RawValue);
231    }
232
233    pub fn add_to_ignore_list(&mut self, src_id: u32) {
234        self.ignore_list
235            .get_or_insert_with(BTreeSet::new)
236            .insert(src_id);
237    }
238
239    pub fn into_sourcemap(self) -> SourceMap<'a> {
240        SourceMap {
241            file: self.file,
242            tokens: self.tokens,
243            names: MaybeRawValue::Data(self.names),
244            source_root: self.source_root,
245            sources: MaybeRawValue::Data(self.sources),
246            sources_content: MaybeRawValue::Data(self.source_contents),
247            ignore_list: self.ignore_list.map(MaybeRawValue::Data),
248        }
249    }
250
251    /// Adds a new mapping to the builder.
252    #[allow(clippy::too_many_arguments)]
253    pub fn add_raw(
254        &mut self,
255        dst_line: u32,
256        dst_col: u32,
257        src_line: u32,
258        src_col: u32,
259        source: Option<u32>,
260        name: Option<u32>,
261        is_range: bool,
262    ) -> RawToken {
263        let src_id = source.unwrap_or(!0);
264        let name_id = name.unwrap_or(!0);
265        let raw = RawToken {
266            dst_line,
267            dst_col,
268            src_line,
269            src_col,
270            src_id,
271            name_id,
272            is_range,
273        };
274        self.tokens.push(raw);
275        raw
276    }
277}
278
279#[derive(Debug)]
280pub(crate) struct SourceMapSection<'a> {
281    offset: (u32, u32),
282    url: Option<MaybeRawValue<'a, String>>,
283    map: Option<Box<MaybeRawValue<'a, RawSourceMap<'a>>>>,
284}
285
286impl<'a> SourceMapSection<'a> {
287    /// Create a new sourcemap index section
288    ///
289    /// - `offset`: offset as line and column
290    /// - `url`: optional URL of where the sourcemap is located
291    /// - `map`: an optional already resolved internal sourcemap
292    pub fn new(
293        offset: (u32, u32),
294        url: Option<MaybeRawValue<'a, String>>,
295        map: Option<MaybeRawValue<'a, RawSourceMap<'a>>>,
296    ) -> SourceMapSection<'a> {
297        SourceMapSection {
298            offset,
299            url,
300            map: map.map(Box::new),
301        }
302    }
303
304    /// Returns the offset as tuple
305    pub fn get_offset(&self) -> (u32, u32) {
306        self.offset
307    }
308}
309
310#[derive(Debug)]
311pub struct SourceMapIndex<'a> {
312    file: Option<MaybeRawValue<'a, Str>>,
313    sections: Vec<SourceMapSection<'a>>,
314}
315
316pub fn decode(slice: &[u8]) -> Result<DecodedMap<'_>> {
317    let content = strip_junk_header(slice)?;
318    let rsm: RawSourceMap = serde_json::from_slice(content)?;
319
320    decode_common(rsm)
321}
322
323fn decode_common(rsm: RawSourceMap) -> Result<DecodedMap> {
324    if rsm.sections.is_some() {
325        decode_index(rsm).map(DecodedMap::Index)
326    } else {
327        decode_regular(rsm).map(DecodedMap::Regular)
328    }
329}
330
331fn decode_index(rsm: RawSourceMap) -> Result<SourceMapIndex> {
332    let mut sections = vec![];
333
334    for raw_section in rsm.sections.unwrap_or_default() {
335        sections.push(SourceMapSection::new(
336            (raw_section.offset.line, raw_section.offset.column),
337            raw_section.url.map(MaybeRawValue::RawValue),
338            raw_section.map.map(MaybeRawValue::RawValue),
339        ));
340    }
341
342    sections.sort_by_key(SourceMapSection::get_offset);
343
344    // file sometimes is not a string for unexplicable reasons
345    let file = rsm.file;
346
347    Ok(SourceMapIndex { file, sections })
348}
349
350pub fn decode_regular(rsm: RawSourceMap) -> Result<SourceMap> {
351    let mut dst_col;
352
353    // Source IDs, lines, columns, and names are "running" values.
354    // Each token (except the first) contains the delta from the previous value.
355    let mut running_src_id = 0;
356    let mut running_src_line = 0;
357    let mut running_src_col = 0;
358    let mut running_name_id = 0;
359
360    let range_mappings = rsm.range_mappings.unwrap_or_default();
361    let mappings = rsm.mappings.unwrap_or_default();
362    let allocation_size = mappings.matches(&[',', ';'][..]).count() + 10;
363    let mut tokens = Vec::with_capacity(allocation_size);
364
365    let mut nums = Vec::with_capacity(6);
366    let mut rmi = BitVec::new();
367
368    for (dst_line, (line, rmi_str)) in mappings
369        .split(';')
370        .zip(range_mappings.split(';').chain(std::iter::repeat("")))
371        .enumerate()
372    {
373        if line.is_empty() {
374            continue;
375        }
376
377        dst_col = 0;
378
379        decode_rmi(rmi_str, &mut rmi)?;
380
381        for (line_index, segment) in line.split(',').enumerate() {
382            if segment.is_empty() {
383                continue;
384            }
385
386            nums.clear();
387            parse_vlq_segment_into(segment, &mut nums)?;
388            match nums.len() {
389                1 | 4 | 5 => {}
390                _ => return Err(Error::BadSegmentSize(nums.len() as u32)),
391            }
392
393            dst_col = (i64::from(dst_col) + nums[0]) as u32;
394
395            // The source file , source line, source column, and name
396            // may not be present in the current token. We use `u32::MAX`
397            // as the placeholder for missing values.
398            let mut current_src_id = !0;
399            let mut current_src_line = !0;
400            let mut current_src_col = !0;
401            let mut current_name_id = !0;
402
403            if nums.len() > 1 {
404                running_src_id = (i64::from(running_src_id) + nums[1]) as u32;
405
406                running_src_line = (i64::from(running_src_line) + nums[2]) as u32;
407                running_src_col = (i64::from(running_src_col) + nums[3]) as u32;
408
409                current_src_id = running_src_id;
410                current_src_line = running_src_line;
411                current_src_col = running_src_col;
412
413                if nums.len() > 4 {
414                    running_name_id = (i64::from(running_name_id) + nums[4]) as u32;
415                    current_name_id = running_name_id;
416                }
417            }
418
419            let is_range = rmi.get(line_index).map(|v| *v).unwrap_or_default();
420
421            tokens.push(RawToken {
422                dst_line: dst_line as u32,
423                dst_col,
424                src_line: current_src_line,
425                src_col: current_src_col,
426                src_id: current_src_id,
427                name_id: current_name_id,
428                is_range,
429            });
430        }
431    }
432
433    let sm = SourceMap {
434        file: rsm.file,
435        tokens,
436        names: rsm.names,
437        source_root: rsm.source_root,
438        sources: rsm.sources,
439        sources_content: rsm.sources_content,
440        ignore_list: rsm.ignore_list,
441    };
442
443    Ok(sm)
444}
445
446impl<'a> SourceMap<'a> {
447    /// Refer to [crate::SourceMap::adjust_mappings] for more details.
448    pub fn adjust_mappings(&mut self, adjustment: crate::SourceMap) {
449        self.tokens = adjust_mappings(
450            std::mem::take(&mut self.tokens),
451            Cow::Owned(adjustment.tokens),
452        );
453    }
454
455    pub fn into_raw_sourcemap(self) -> RawSourceMap<'a> {
456        RawSourceMap {
457            version: Some(3),
458            range_mappings: serialize_range_mappings(&self),
459            mappings: Some(serialize_mappings(&self)),
460            file: self.file,
461            sources: self.sources,
462            source_root: self.source_root,
463            sources_content: self.sources_content,
464            sections: None,
465            names: self.names,
466            ignore_list: self.ignore_list,
467        }
468    }
469
470    pub fn file(&mut self) -> Option<&BytesStr> {
471        self.file.as_mut().map(|f| &*f.as_data())
472    }
473
474    pub fn sources(&mut self) -> impl Iterator<Item = &'_ BytesStr> + use<'_, 'a> {
475        self.sources.as_data().iter_mut().map(|d| &*d.as_data())
476    }
477
478    pub fn get_source(&mut self, src_id: u32) -> Option<&BytesStr> {
479        self.sources
480            .as_data()
481            .get_mut(src_id as usize)
482            .map(|d| &*d.as_data())
483    }
484
485    pub fn get_name(&mut self, src_id: u32) -> Option<&BytesStr> {
486        self.names
487            .as_data()
488            .get_mut(src_id as usize)
489            .map(|d| &*d.as_data())
490    }
491
492    pub fn get_source_contents(&mut self, src_id: u32) -> Option<&BytesStr> {
493        self.sources_content
494            .as_data()
495            .get_mut(src_id as usize)
496            .and_then(|d| d.as_mut().map(|d| &*d.as_data()))
497    }
498}
499
500impl<'a> SourceMapIndex<'a> {
501    pub fn flatten(self) -> Result<SourceMap<'a>> {
502        let mut builder = SourceMapBuilder::new(self.file);
503
504        for section in self.sections {
505            let (off_line, off_col) = section.get_offset();
506
507            let map = match section.map {
508                Some(map) => match decode_common(map.into_data())? {
509                    DecodedMap::Regular(sm) => sm,
510                    DecodedMap::Index(idx) => idx.flatten()?,
511                },
512                None => {
513                    return Err(Error::CannotFlatten(format!(
514                        "Section has an unresolved sourcemap: {}",
515                        section
516                            .url
517                            .map(|v| v.into_data())
518                            .as_deref()
519                            .unwrap_or("<unknown url>")
520                    )));
521                }
522            };
523
524            let sources = map.sources.into_data();
525            let source_contents = map.sources_content.into_data();
526            let ignore_list = map.ignore_list.unwrap_or_default().into_data();
527
528            let mut src_id_map = Vec::<u32>::with_capacity(sources.len());
529
530            for (original_id, (source, contents)) in
531                sources.into_iter().zip(source_contents).enumerate()
532            {
533                debug_assert_eq!(original_id, src_id_map.len());
534                let src_id = builder.add_source(source.assert_raw_value());
535
536                src_id_map.push(src_id);
537
538                if let Some(contents) = contents {
539                    builder.set_source_contents(src_id, Some(contents.assert_raw_value()));
540                }
541            }
542
543            let names = map.names.into_data();
544            let mut name_id_map = Vec::<u32>::with_capacity(names.len());
545
546            for (original_id, name) in names.into_iter().enumerate() {
547                debug_assert_eq!(original_id, name_id_map.len());
548                let name_id = builder.add_name(name.assert_raw_value());
549                name_id_map.push(name_id);
550            }
551
552            for token in map.tokens {
553                let dst_col = if token.dst_line == 0 {
554                    token.dst_col + off_col
555                } else {
556                    token.dst_col
557                };
558
559                // Use u32 -> u32 map instead of using the hash map in SourceMapBuilder for better
560                // performance
561                let original_src_id = token.src_id;
562                let src_id = if original_src_id == !0 {
563                    None
564                } else {
565                    src_id_map.get(original_src_id as usize).copied()
566                };
567
568                let original_name_id = token.name_id;
569                let name_id = if original_name_id == !0 {
570                    None
571                } else {
572                    name_id_map.get(original_name_id as usize).copied()
573                };
574
575                let raw = builder.add_raw(
576                    token.dst_line + off_line,
577                    dst_col,
578                    token.src_line,
579                    token.src_col,
580                    src_id,
581                    name_id,
582                    token.is_range,
583                );
584
585                if ignore_list.contains(&token.src_id) {
586                    builder.add_to_ignore_list(raw.src_id);
587                }
588            }
589        }
590
591        Ok(builder.into_sourcemap())
592    }
593}
594
595fn serialize_range_mappings(sm: &SourceMap) -> Option<String> {
596    let mut buf = Vec::new();
597    let mut prev_line = 0;
598    let mut had_rmi = false;
599    let mut empty = true;
600
601    let mut idx_of_first_in_line = 0;
602
603    let mut rmi_data = Vec::<u8>::new();
604
605    for (idx, token) in sm.tokens.iter().enumerate() {
606        if token.is_range {
607            had_rmi = true;
608            empty = false;
609
610            let num = idx - idx_of_first_in_line;
611
612            rmi_data.resize(rmi_data.len() + 2, 0);
613
614            let rmi_bits = rmi_data.view_bits_mut::<Lsb0>();
615            rmi_bits.set(num, true);
616        }
617
618        while token.dst_line != prev_line {
619            if had_rmi {
620                encode_rmi(&mut buf, &rmi_data);
621                rmi_data.clear();
622            }
623
624            buf.push(b';');
625            prev_line += 1;
626            had_rmi = false;
627            idx_of_first_in_line = idx;
628        }
629    }
630    if empty {
631        return None;
632    }
633
634    if had_rmi {
635        encode_rmi(&mut buf, &rmi_data);
636    }
637
638    Some(String::from_utf8(buf).expect("invalid utf8"))
639}
640
641fn serialize_mappings(sm: &SourceMap) -> String {
642    let mut rv = String::new();
643    // dst == minified == generated
644    let mut prev_dst_line = 0;
645    let mut prev_dst_col = 0;
646    let mut prev_src_line = 0;
647    let mut prev_src_col = 0;
648    let mut prev_name_id = 0;
649    let mut prev_src_id = 0;
650
651    for (idx, token) in sm.tokens.iter().enumerate() {
652        if token.dst_line != prev_dst_line {
653            prev_dst_col = 0;
654            while token.dst_line != prev_dst_line {
655                rv.push(';');
656                prev_dst_line += 1;
657            }
658        } else if idx > 0 {
659            if Some(&token) == sm.tokens.get(idx - 1).as_ref() {
660                continue;
661            }
662            rv.push(',');
663        }
664
665        encode_vlq_diff(&mut rv, token.dst_col, prev_dst_col);
666        prev_dst_col = token.dst_col;
667
668        if token.src_id != !0 {
669            encode_vlq_diff(&mut rv, token.src_id, prev_src_id);
670            prev_src_id = token.src_id;
671            encode_vlq_diff(&mut rv, token.src_line, prev_src_line);
672            prev_src_line = token.src_line;
673            encode_vlq_diff(&mut rv, token.src_col, prev_src_col);
674            prev_src_col = token.src_col;
675            if token.name_id != !0 {
676                encode_vlq_diff(&mut rv, token.name_id, prev_name_id);
677                prev_name_id = token.name_id;
678            }
679        }
680    }
681
682    rv
683}