hstr/
dynamic.rs

1use std::{
2    borrow::Cow,
3    cell::RefCell,
4    ffi::c_void,
5    hash::{BuildHasherDefault, Hash, Hasher},
6    mem::ManuallyDrop,
7    num::NonZeroU8,
8    ops::Deref,
9    ptr::NonNull,
10};
11
12use rustc_hash::FxHasher;
13use triomphe::ThinArc;
14
15use crate::{
16    tagged_value::{TaggedValue, MAX_INLINE_LEN},
17    wtf8::Wtf8,
18    Atom, Wtf8Atom, INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
19};
20
21#[derive(PartialEq, Eq)]
22pub(crate) struct Metadata {
23    pub hash: u64,
24}
25
26#[derive(Clone, PartialEq, Eq)]
27pub(crate) struct Item(pub ThinArc<Metadata, u8>);
28
29impl Deref for Item {
30    type Target = <ThinArc<Metadata, u8> as Deref>::Target;
31
32    fn deref(&self) -> &Self::Target {
33        &self.0
34    }
35}
36
37impl Hash for Item {
38    fn hash<H: Hasher>(&self, state: &mut H) {
39        state.write_u64(self.0.header.header.hash);
40    }
41}
42
43pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
44    let item = restore_arc(ptr);
45
46    ManuallyDrop::new(item)
47}
48
49pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
50    let ptr = v.get_ptr();
51    Item(ThinArc::from_raw(ptr))
52}
53
54/// A store that stores [Atom]s. Can be merged with other [AtomStore]s for
55/// better performance.
56///
57///
58/// # Merging [AtomStore]
59pub struct AtomStore {
60    pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
61}
62
63impl Default for AtomStore {
64    fn default() -> Self {
65        Self {
66            data: hashbrown::HashMap::with_capacity_and_hasher(64, BuildEntryHasher::default()),
67        }
68    }
69}
70
71impl AtomStore {
72    #[inline(always)]
73    pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
74        atom_in(self, &text.into())
75    }
76
77    #[inline(always)]
78    pub fn wtf8_atom<'a>(&mut self, text: impl Into<Cow<'a, Wtf8>>) -> Wtf8Atom {
79        wtf8_atom_in(self, text.into().as_bytes())
80    }
81
82    fn gc(&mut self) {
83        self.data.retain(|item, _| {
84            let count = ThinArc::strong_count(&item.0);
85            debug_assert!(count > 0);
86            count > 1
87        });
88    }
89}
90
91thread_local! {
92    static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
93}
94
95/// Cleans up atoms in the global store that are no longer referenced.
96pub fn global_atom_store_gc() {
97    GLOBAL_DATA.with(|global| {
98        let mut store = global.borrow_mut();
99        store.gc();
100    });
101}
102
103pub(crate) fn global_wtf8_atom(text: &[u8]) -> Wtf8Atom {
104    GLOBAL_DATA.with(|global| {
105        let mut store = global.borrow_mut();
106
107        wtf8_atom_in(&mut *store, text)
108    })
109}
110
111pub(crate) fn global_atom(text: &str) -> Atom {
112    GLOBAL_DATA.with(|global| {
113        let mut store = global.borrow_mut();
114
115        atom_in(&mut *store, text)
116    })
117}
118
119fn wtf8_atom_in<S>(storage: S, text: &[u8]) -> Wtf8Atom
120where
121    S: Storage,
122{
123    let len = text.len();
124
125    if len <= MAX_INLINE_LEN {
126        // INLINE_TAG ensures this is never zero
127        let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
128        let mut unsafe_data = TaggedValue::new_tag(tag);
129        unsafe {
130            unsafe_data.data_mut()[..len].copy_from_slice(text);
131        }
132        return Wtf8Atom { unsafe_data };
133    }
134
135    let hash = calc_hash(text);
136    let entry = storage.insert_entry(text, hash);
137    let entry = ThinArc::into_raw(entry.0) as *mut c_void;
138
139    let ptr: NonNull<c_void> = unsafe {
140        // Safety: Arc::into_raw returns a non-null pointer
141        NonNull::new_unchecked(entry)
142    };
143    debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
144    Wtf8Atom {
145        unsafe_data: TaggedValue::new_ptr(ptr),
146    }
147}
148
149/// This can create any kind of [Atom], although this lives in the `dynamic`
150/// module.
151fn atom_in<S>(storage: S, text: &str) -> Atom
152where
153    S: Storage,
154{
155    // SAFETY: `text` is valid UTF-8
156    unsafe { Atom::from_wtf8_unchecked(wtf8_atom_in(storage, text.as_bytes())) }
157}
158
159/// Attempts to construct an [Atom] but only if it can be constructed inline.
160/// This is primarily useful in constant contexts.
161pub(crate) const fn inline_atom(text: &str) -> Option<Atom> {
162    let len = text.len();
163    if len <= MAX_INLINE_LEN {
164        // INLINE_TAG ensures this is never zero
165        let tag = INLINE_TAG | ((len as u8) << LEN_OFFSET);
166        let mut unsafe_data = TaggedValue::new_tag(NonZeroU8::new(tag).unwrap());
167        // This odd pattern is needed because we cannot create slices from ranges or
168        // ranges at all in constant context.  So we write our own copying loop.
169        unsafe {
170            let data = unsafe_data.data_mut();
171            let bytes = text.as_bytes();
172            let mut i = 0;
173            while i < len {
174                data[i] = bytes[i];
175                i += 1;
176            }
177        }
178        return Some(Atom { unsafe_data });
179    }
180    None
181}
182
183trait Storage {
184    fn insert_entry(self, text: &[u8], hash: u64) -> Item;
185}
186
187impl Storage for &'_ mut AtomStore {
188    fn insert_entry(self, text: &[u8], hash: u64) -> Item {
189        // If the text is too long, interning is not worth it.
190        if text.len() > 512 {
191            return Item(ThinArc::from_header_and_slice(Metadata { hash }, text));
192        }
193
194        let (entry, _) = self
195            .data
196            .raw_entry_mut()
197            .from_hash(hash, |key| {
198                key.header.header.hash == hash && key.slice.eq(text)
199            })
200            .or_insert_with(move || {
201                (
202                    Item(ThinArc::from_header_and_slice(Metadata { hash }, text)),
203                    (),
204                )
205            });
206        entry.clone()
207    }
208}
209
210#[inline(always)]
211fn calc_hash(text: &[u8]) -> u64 {
212    let mut hasher = FxHasher::default();
213    text.hash(&mut hasher);
214    hasher.finish()
215}
216
217type BuildEntryHasher = BuildHasherDefault<EntryHasher>;
218
219/// A "no-op" hasher for [Entry] that returns [Entry::hash]. The design is
220/// inspired by the `nohash-hasher` crate.
221///
222/// Assumption: [Arc]'s implementation of [Hash] is a simple pass-through.
223#[derive(Default)]
224pub(crate) struct EntryHasher {
225    hash: u64,
226    #[cfg(debug_assertions)]
227    write_called: bool,
228}
229
230impl Hasher for EntryHasher {
231    fn finish(&self) -> u64 {
232        #[cfg(debug_assertions)]
233        debug_assert!(
234            self.write_called,
235            "EntryHasher expects write_u64 to have been called",
236        );
237        self.hash
238    }
239
240    fn write(&mut self, _bytes: &[u8]) {
241        panic!("EntryHasher expects to be called with write_u64");
242    }
243
244    fn write_u64(&mut self, val: u64) {
245        #[cfg(debug_assertions)]
246        {
247            debug_assert!(
248                !self.write_called,
249                "EntryHasher expects write_u64 to be called only once",
250            );
251            self.write_called = true;
252        }
253
254        self.hash = val;
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use crate::{dynamic::GLOBAL_DATA, global_atom_store_gc, Atom};
261
262    fn expect_size(expected: usize) {
263        // This is a helper function to count the number of bytes in the global store.
264        GLOBAL_DATA.with(|global| {
265            let store = global.borrow();
266            assert_eq!(store.data.len(), expected);
267        })
268    }
269
270    #[test]
271    fn global_ref_count_dynamic_0() {
272        expect_size(0);
273
274        // The strings should be long enough so that they are not inline even under
275        // feature `atom_size_128`
276        let atom1 = Atom::new("Hello, beautiful world!");
277
278        expect_size(1);
279
280        let atom2 = Atom::new("Hello, beautiful world!");
281
282        expect_size(1);
283
284        // 2 for the two atoms, 1 for the global store
285        assert_eq!(atom1.ref_count(), 3);
286        assert_eq!(atom2.ref_count(), 3);
287
288        drop(atom1);
289
290        expect_size(1);
291
292        // 1 for the atom2, 1 for the global store
293        assert_eq!(atom2.ref_count(), 2);
294
295        drop(atom2);
296
297        expect_size(1);
298        global_atom_store_gc();
299        expect_size(0);
300    }
301
302    #[test]
303    fn global_ref_count_dynamic_1() {
304        expect_size(0);
305
306        {
307            expect_size(0);
308            let atom = Atom::new("Hello, beautiful world!");
309            assert_eq!(atom.ref_count(), 2);
310            expect_size(1);
311        }
312
313        expect_size(1);
314        global_atom_store_gc();
315        expect_size(0);
316
317        {
318            let atom = Atom::new("Hello, beautiful world!");
319            assert_eq!(atom.ref_count(), 2);
320            expect_size(1);
321        }
322        let atom = Atom::new("Hello, beautiful world!");
323        assert_eq!(atom.ref_count(), 2);
324
325        expect_size(1);
326        global_atom_store_gc();
327        expect_size(1);
328
329        drop(atom);
330        expect_size(1);
331        global_atom_store_gc();
332        expect_size(0);
333    }
334}