hstr/
dynamic.rs

1use std::{
2    borrow::Cow,
3    cell::RefCell,
4    ffi::c_void,
5    hash::{BuildHasherDefault, Hash, Hasher},
6    mem::ManuallyDrop,
7    num::NonZeroU8,
8    ops::Deref,
9    ptr::NonNull,
10};
11
12use rustc_hash::FxHasher;
13use triomphe::ThinArc;
14
15use crate::{
16    tagged_value::{TaggedValue, MAX_INLINE_LEN},
17    Atom, INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
18};
19
20#[derive(PartialEq, Eq)]
21pub(crate) struct Metadata {
22    pub hash: u64,
23}
24
25#[derive(Clone, PartialEq, Eq)]
26pub(crate) struct Item(pub ThinArc<Metadata, u8>);
27
28impl Deref for Item {
29    type Target = <ThinArc<Metadata, u8> as Deref>::Target;
30
31    fn deref(&self) -> &Self::Target {
32        &self.0
33    }
34}
35
36impl Hash for Item {
37    fn hash<H: Hasher>(&self, state: &mut H) {
38        state.write_u64(self.0.header.header.hash);
39    }
40}
41
42pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
43    let item = restore_arc(ptr);
44
45    ManuallyDrop::new(item)
46}
47
48pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
49    let ptr = v.get_ptr();
50    Item(ThinArc::from_raw(ptr))
51}
52
53/// A store that stores [Atom]s. Can be merged with other [AtomStore]s for
54/// better performance.
55///
56///
57/// # Merging [AtomStore]
58pub struct AtomStore {
59    pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
60}
61
62impl Default for AtomStore {
63    fn default() -> Self {
64        Self {
65            data: hashbrown::HashMap::with_capacity_and_hasher(64, BuildEntryHasher::default()),
66        }
67    }
68}
69
70impl AtomStore {
71    #[inline(always)]
72    pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
73        atom_in(self, &text.into())
74    }
75
76    fn gc(&mut self) {
77        self.data.retain(|item, _| {
78            let count = ThinArc::strong_count(&item.0);
79            debug_assert!(count > 0);
80            count > 1
81        });
82    }
83}
84
85thread_local! {
86    static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
87}
88
89/// Cleans up atoms in the global store that are no longer referenced.
90pub fn global_atom_store_gc() {
91    GLOBAL_DATA.with(|global| {
92        let mut store = global.borrow_mut();
93        store.gc();
94    });
95}
96
97pub(crate) fn global_atom(text: &str) -> Atom {
98    GLOBAL_DATA.with(|global| {
99        let mut store = global.borrow_mut();
100
101        atom_in(&mut *store, text)
102    })
103}
104
105/// This can create any kind of [Atom], although this lives in the `dynamic`
106/// module.
107fn atom_in<S>(storage: S, text: &str) -> Atom
108where
109    S: Storage,
110{
111    let len = text.len();
112
113    if len <= MAX_INLINE_LEN {
114        // INLINE_TAG ensures this is never zero
115        let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
116        let mut unsafe_data = TaggedValue::new_tag(tag);
117        unsafe {
118            unsafe_data.data_mut()[..len].copy_from_slice(text.as_bytes());
119        }
120        return Atom { unsafe_data };
121    }
122
123    let hash = calc_hash(text);
124    let entry = storage.insert_entry(text, hash);
125    let entry = ThinArc::into_raw(entry.0) as *mut c_void;
126
127    let ptr: NonNull<c_void> = unsafe {
128        // Safety: Arc::into_raw returns a non-null pointer
129        NonNull::new_unchecked(entry)
130    };
131    debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
132    Atom {
133        unsafe_data: TaggedValue::new_ptr(ptr),
134    }
135}
136
137/// Attempts to construct an Atom but only if it can be constructed inline.
138/// This is primarily useful in constant contexts.
139pub(crate) const fn inline_atom(text: &str) -> Option<Atom> {
140    let len = text.len();
141    if len <= MAX_INLINE_LEN {
142        // INLINE_TAG ensures this is never zero
143        let tag = INLINE_TAG | ((len as u8) << LEN_OFFSET);
144        let mut unsafe_data = TaggedValue::new_tag(NonZeroU8::new(tag).unwrap());
145        // This odd pattern is needed because we cannot create slices from ranges or
146        // ranges at all in constant context.  So we write our own copying loop.
147        unsafe {
148            let data = unsafe_data.data_mut();
149            let bytes = text.as_bytes();
150            let mut i = 0;
151            while i < len {
152                data[i] = bytes[i];
153                i += 1;
154            }
155        }
156        return Some(Atom { unsafe_data });
157    }
158    None
159}
160
161trait Storage {
162    fn insert_entry(self, text: &str, hash: u64) -> Item;
163}
164
165impl Storage for &'_ mut AtomStore {
166    fn insert_entry(self, text: &str, hash: u64) -> Item {
167        // If the text is too long, interning is not worth it.
168        if text.len() > 512 {
169            return Item(ThinArc::from_header_and_slice(
170                Metadata { hash },
171                text.as_bytes(),
172            ));
173        }
174
175        let (entry, _) = self
176            .data
177            .raw_entry_mut()
178            .from_hash(hash, |key| {
179                key.header.header.hash == hash && key.slice.eq(text.as_bytes())
180            })
181            .or_insert_with(move || {
182                (
183                    Item(ThinArc::from_header_and_slice(
184                        Metadata { hash },
185                        text.as_bytes(),
186                    )),
187                    (),
188                )
189            });
190        entry.clone()
191    }
192}
193
194#[inline(always)]
195fn calc_hash(text: &str) -> u64 {
196    let mut hasher = FxHasher::default();
197    text.hash(&mut hasher);
198    hasher.finish()
199}
200
201type BuildEntryHasher = BuildHasherDefault<EntryHasher>;
202
203/// A "no-op" hasher for [Entry] that returns [Entry::hash]. The design is
204/// inspired by the `nohash-hasher` crate.
205///
206/// Assumption: [Arc]'s implementation of [Hash] is a simple pass-through.
207#[derive(Default)]
208pub(crate) struct EntryHasher {
209    hash: u64,
210    #[cfg(debug_assertions)]
211    write_called: bool,
212}
213
214impl Hasher for EntryHasher {
215    fn finish(&self) -> u64 {
216        #[cfg(debug_assertions)]
217        debug_assert!(
218            self.write_called,
219            "EntryHasher expects write_u64 to have been called",
220        );
221        self.hash
222    }
223
224    fn write(&mut self, _bytes: &[u8]) {
225        panic!("EntryHasher expects to be called with write_u64");
226    }
227
228    fn write_u64(&mut self, val: u64) {
229        #[cfg(debug_assertions)]
230        {
231            debug_assert!(
232                !self.write_called,
233                "EntryHasher expects write_u64 to be called only once",
234            );
235            self.write_called = true;
236        }
237
238        self.hash = val;
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use crate::{dynamic::GLOBAL_DATA, global_atom_store_gc, Atom};
245
246    fn expect_size(expected: usize) {
247        // This is a helper function to count the number of bytes in the global store.
248        GLOBAL_DATA.with(|global| {
249            let store = global.borrow();
250            assert_eq!(store.data.len(), expected);
251        })
252    }
253
254    #[test]
255    fn global_ref_count_dynamic_0() {
256        expect_size(0);
257
258        // The strings should be long enough so that they are not inline even under
259        // feature `atom_size_128`
260        let atom1 = Atom::new("Hello, beautiful world!");
261
262        expect_size(1);
263
264        let atom2 = Atom::new("Hello, beautiful world!");
265
266        expect_size(1);
267
268        // 2 for the two atoms, 1 for the global store
269        assert_eq!(atom1.ref_count(), 3);
270        assert_eq!(atom2.ref_count(), 3);
271
272        drop(atom1);
273
274        expect_size(1);
275
276        // 1 for the atom2, 1 for the global store
277        assert_eq!(atom2.ref_count(), 2);
278
279        drop(atom2);
280
281        expect_size(1);
282        global_atom_store_gc();
283        expect_size(0);
284    }
285
286    #[test]
287    fn global_ref_count_dynamic_1() {
288        expect_size(0);
289
290        {
291            expect_size(0);
292            let atom = Atom::new("Hello, beautiful world!");
293            assert_eq!(atom.ref_count(), 2);
294            expect_size(1);
295        }
296
297        expect_size(1);
298        global_atom_store_gc();
299        expect_size(0);
300
301        {
302            let atom = Atom::new("Hello, beautiful world!");
303            assert_eq!(atom.ref_count(), 2);
304            expect_size(1);
305        }
306        let atom = Atom::new("Hello, beautiful world!");
307        assert_eq!(atom.ref_count(), 2);
308
309        expect_size(1);
310        global_atom_store_gc();
311        expect_size(1);
312
313        drop(atom);
314        expect_size(1);
315        global_atom_store_gc();
316        expect_size(0);
317    }
318}