hstr/
dynamic.rs

1use std::{
2    borrow::Cow,
3    cell::RefCell,
4    ffi::c_void,
5    hash::{BuildHasherDefault, Hash, Hasher},
6    mem::{forget, ManuallyDrop},
7    ops::Deref,
8    ptr::{self, NonNull},
9};
10
11use rustc_hash::FxHasher;
12use triomphe::{HeaderWithLength, ThinArc};
13
14use crate::{
15    tagged_value::{TaggedValue, MAX_INLINE_LEN},
16    Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
17};
18
19#[derive(PartialEq, Eq)]
20pub(crate) struct Metadata {
21    pub hash: u64,
22    pub is_global: bool,
23}
24
25#[derive(Clone, PartialEq, Eq)]
26pub(crate) struct Item(pub ThinArc<HeaderWithLength<Metadata>, u8>);
27
28impl Item {
29    /// https://users.rust-lang.org/t/what-is-the-ideal-way-to-destruct-a-guard-type/25974/8
30    fn into_inner(self) -> ThinArc<HeaderWithLength<Metadata>, u8> {
31        unsafe {
32            let inner = ptr::read(&self.0);
33            forget(self);
34            inner
35        }
36    }
37}
38
39impl Deref for Item {
40    type Target = <ThinArc<HeaderWithLength<Metadata>, u8> as Deref>::Target;
41
42    fn deref(&self) -> &Self::Target {
43        &self.0
44    }
45}
46
47impl Hash for Item {
48    fn hash<H: Hasher>(&self, state: &mut H) {
49        state.write_u64(self.0.header.header.header.hash);
50    }
51}
52
53pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
54    let item = restore_arc(ptr);
55
56    ManuallyDrop::new(item)
57}
58
59pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
60    let ptr = v.get_ptr();
61    Item(ThinArc::from_raw(ptr))
62}
63
64/// A store that stores [Atom]s. Can be merged with other [AtomStore]s for
65/// better performance.
66///
67///
68/// # Merging [AtomStore]
69pub struct AtomStore {
70    pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
71}
72
73impl Default for AtomStore {
74    fn default() -> Self {
75        Self {
76            data: hashbrown::HashMap::with_capacity_and_hasher(64, Default::default()),
77        }
78    }
79}
80
81impl AtomStore {
82    #[inline(always)]
83    pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
84        atom_in(self, &text.into(), false)
85    }
86}
87
88impl Drop for Item {
89    fn drop(&mut self) {
90        // If we are going to drop the last reference, we need to remove the
91        // entry from the global store if it is a global atom
92        if self.0.header.header.header.is_global && ThinArc::strong_count(&self.0) == 2 {
93            let v = GLOBAL_DATA.try_with(|global| {
94                let mut store = global.borrow_mut();
95                store.data.remove_entry(self)
96            });
97
98            if let Ok(Some((v, _))) = v {
99                v.into_inner();
100            }
101        }
102    }
103}
104
105thread_local! {
106    static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
107}
108
109pub(crate) fn global_atom(text: &str) -> Atom {
110    GLOBAL_DATA.with(|global| {
111        let mut store = global.borrow_mut();
112
113        atom_in(&mut *store, text, true)
114    })
115}
116
117/// This can create any kind of [Atom], although this lives in the `dynamic`
118/// module.
119pub(crate) fn atom_in<S>(storage: S, text: &str, is_global: bool) -> Atom
120where
121    S: Storage,
122{
123    let len = text.len();
124
125    if len < MAX_INLINE_LEN {
126        // INLINE_TAG ensures this is never zero
127        let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
128        let mut unsafe_data = TaggedValue::new_tag(tag);
129        unsafe {
130            unsafe_data.data_mut()[..len].copy_from_slice(text.as_bytes());
131        }
132        return Atom { unsafe_data };
133    }
134
135    let hash = calc_hash(text);
136    let entry = storage.insert_entry(text, hash, is_global);
137    let entry = ThinArc::into_raw(entry.into_inner()) as *mut c_void;
138
139    let ptr: NonNull<c_void> = unsafe {
140        // Safety: Arc::into_raw returns a non-null pointer
141        NonNull::new_unchecked(entry)
142    };
143    debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
144    Atom {
145        unsafe_data: TaggedValue::new_ptr(ptr),
146    }
147}
148
149pub(crate) trait Storage {
150    fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item;
151}
152
153impl Storage for &'_ mut AtomStore {
154    #[inline(never)]
155    fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item {
156        // If the text is too long, interning is not worth it.
157        if text.len() > 512 {
158            return Item(ThinArc::from_header_and_slice(
159                HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
160                text.as_bytes(),
161            ));
162        }
163
164        let (entry, _) = self
165            .data
166            .raw_entry_mut()
167            .from_hash(hash, |key| {
168                key.header.header.header.hash == hash && key.slice == *text.as_bytes()
169            })
170            .or_insert_with(move || {
171                (
172                    Item(ThinArc::from_header_and_slice(
173                        HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
174                        text.as_bytes(),
175                    )),
176                    (),
177                )
178            });
179        entry.clone()
180    }
181}
182
183#[inline(never)]
184fn calc_hash(text: &str) -> u64 {
185    let mut hasher = FxHasher::default();
186    text.hash(&mut hasher);
187    hasher.finish()
188}
189
190type BuildEntryHasher = BuildHasherDefault<EntryHasher>;
191
192/// A "no-op" hasher for [Entry] that returns [Entry::hash]. The design is
193/// inspired by the `nohash-hasher` crate.
194///
195/// Assumption: [Arc]'s implementation of [Hash] is a simple pass-through.
196#[derive(Default)]
197pub(crate) struct EntryHasher {
198    hash: u64,
199    #[cfg(debug_assertions)]
200    write_called: bool,
201}
202
203impl Hasher for EntryHasher {
204    fn finish(&self) -> u64 {
205        #[cfg(debug_assertions)]
206        debug_assert!(
207            self.write_called,
208            "EntryHasher expects write_u64 to have been called",
209        );
210        self.hash
211    }
212
213    fn write(&mut self, _bytes: &[u8]) {
214        panic!("EntryHasher expects to be called with write_u64");
215    }
216
217    fn write_u64(&mut self, val: u64) {
218        #[cfg(debug_assertions)]
219        {
220            debug_assert!(
221                !self.write_called,
222                "EntryHasher expects write_u64 to be called only once",
223            );
224            self.write_called = true;
225        }
226
227        self.hash = val;
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use crate::{dynamic::GLOBAL_DATA, Atom};
234
235    #[test]
236    fn global_ref_count_dynamic() {
237        let atom1 = Atom::new("Hello, world!");
238
239        let atom2 = Atom::new("Hello, world!");
240
241        // 2 for the two atoms, 1 for the global store
242        assert_eq!(atom1.ref_count(), 3);
243        assert_eq!(atom2.ref_count(), 3);
244
245        drop(atom1);
246
247        // 1 for the atom2, 1 for the global store
248        assert_eq!(atom2.ref_count(), 2);
249
250        drop(atom2);
251
252        GLOBAL_DATA.with(|global| {
253            let store = global.borrow();
254            assert_eq!(store.data.len(), 0);
255        });
256    }
257}