1use crate::dynamic_set::{dynamic_set, Entry};
11use crate::static_sets::StaticAtomSet;
12use debug_unreachable::debug_unreachable;
13
14use std::borrow::Cow;
15use std::cmp::Ordering::{self, Equal};
16use std::fmt;
17use std::hash::{Hash, Hasher};
18use std::marker::PhantomData;
19use std::mem;
20use std::num::NonZeroU64;
21use std::ops;
22use std::slice;
23use std::str;
24use std::sync::atomic::Ordering::SeqCst;
25
26const DYNAMIC_TAG: u8 = 0b_00;
27const INLINE_TAG: u8 = 0b_01; const STATIC_TAG: u8 = 0b_10;
29const TAG_MASK: u64 = 0b_11;
30const LEN_OFFSET: u64 = 4;
31const LEN_MASK: u64 = 0xF0;
32
33const MAX_INLINE_LEN: usize = 7;
34const STATIC_SHIFT_BITS: usize = 32;
35
36#[derive(PartialEq, Eq)]
79pub struct Atom<Static> {
81 unsafe_data: NonZeroU64,
82 phantom: PhantomData<Static>,
83}
84
85#[cfg(feature = "malloc_size_of")]
88impl<Static: StaticAtomSet> malloc_size_of::MallocSizeOf for Atom<Static> {
89 fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize {
90 0
91 }
92}
93
94impl<Static> Atom<Static> {
98 #[inline(always)]
100 #[doc(hidden)]
101 pub const fn pack_static(n: u32) -> Self {
102 Self {
103 unsafe_data: unsafe {
104 NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
106 },
107 phantom: PhantomData,
108 }
109 }
110
111 #[inline(always)]
113 #[doc(hidden)]
114 pub const fn pack_inline(mut n: u64, len: u8) -> Self {
115 if cfg!(target_endian = "big") {
116 n = n.to_le() << 8;
120 }
121
122 let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
123 Self {
124 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
126 phantom: PhantomData,
127 }
128 }
129
130 fn tag(&self) -> u8 {
131 (self.unsafe_data.get() & TAG_MASK) as u8
132 }
133}
134
135impl<Static: StaticAtomSet> Atom<Static> {
136 #[doc(hidden)]
138 pub fn unsafe_data(&self) -> u64 {
139 self.unsafe_data.get()
140 }
141
142 #[doc(hidden)]
144 pub fn is_static(&self) -> bool {
145 self.tag() == STATIC_TAG
146 }
147
148 #[doc(hidden)]
150 pub fn is_dynamic(&self) -> bool {
151 self.tag() == DYNAMIC_TAG
152 }
153
154 #[doc(hidden)]
156 pub fn is_inline(&self) -> bool {
157 self.tag() == INLINE_TAG
158 }
159
160 fn static_index(&self) -> u64 {
161 self.unsafe_data.get() >> STATIC_SHIFT_BITS
162 }
163
164 pub fn get_hash(&self) -> u32 {
166 match self.tag() {
167 DYNAMIC_TAG => {
168 let entry = self.unsafe_data.get() as *const Entry;
169 unsafe { (*entry).hash }
170 }
171 STATIC_TAG => Static::get().hashes[self.static_index() as usize],
172 INLINE_TAG => {
173 let data = self.unsafe_data.get();
174 ((data >> 32) ^ data) as u32
176 }
177 _ => unsafe { debug_unreachable!() },
178 }
179 }
180
181 pub fn try_static(string_to_add: &str) -> Option<Self> {
182 Self::try_static_internal(string_to_add).ok()
183 }
184
185 fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> {
186 let static_set = Static::get();
187 let hash = phf_shared::hash(&*string_to_add, &static_set.key);
188 let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
189
190 if static_set.atoms[index as usize] == string_to_add {
191 Ok(Self::pack_static(index))
192 } else {
193 Err(hash)
194 }
195 }
196}
197
198impl<Static: StaticAtomSet> Default for Atom<Static> {
199 #[inline]
200 fn default() -> Self {
201 Atom::pack_static(Static::empty_string_index())
202 }
203}
204
205impl<Static: StaticAtomSet> Hash for Atom<Static> {
206 #[inline]
207 fn hash<H>(&self, state: &mut H)
208 where
209 H: Hasher,
210 {
211 state.write_u32(self.get_hash())
212 }
213}
214
215impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
216 fn from(string_to_add: Cow<'a, str>) -> Self {
217 let len = string_to_add.len();
218 if len == 0 {
219 Self::pack_static(Static::empty_string_index())
220 } else if len <= MAX_INLINE_LEN {
221 let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
222 {
223 let dest = inline_atom_slice_mut(&mut data);
224 dest[..len].copy_from_slice(string_to_add.as_bytes());
225 }
226 Atom {
227 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
229 phantom: PhantomData,
230 }
231 } else {
232 Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
233 let ptr: std::ptr::NonNull<Entry> = dynamic_set().insert(string_to_add, hash.g);
234 let data = ptr.as_ptr() as u64;
235 debug_assert!(0 == data & TAG_MASK);
236 Atom {
237 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
239 phantom: PhantomData,
240 }
241 })
242 }
243 }
244}
245
246impl<Static: StaticAtomSet> Clone for Atom<Static> {
247 #[inline(always)]
248 fn clone(&self) -> Self {
249 if self.tag() == DYNAMIC_TAG {
250 let entry = self.unsafe_data.get() as *const Entry;
251 unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
252 }
253 Atom { ..*self }
254 }
255}
256
257impl<Static> Drop for Atom<Static> {
258 #[inline]
259 fn drop(&mut self) {
260 if self.tag() == DYNAMIC_TAG {
261 let entry = self.unsafe_data.get() as *const Entry;
262 if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
263 drop_slow(self)
264 }
265 }
266
267 fn drop_slow<Static>(this: &mut Atom<Static>) {
269 dynamic_set().remove(this.unsafe_data.get() as *mut Entry);
270 }
271 }
272}
273
274impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
275 type Target = str;
276
277 #[inline]
278 fn deref(&self) -> &str {
279 unsafe {
280 match self.tag() {
281 DYNAMIC_TAG => {
282 let entry = self.unsafe_data.get() as *const Entry;
283 &(*entry).string
284 }
285 INLINE_TAG => {
286 let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
287 debug_assert!(len as usize <= MAX_INLINE_LEN);
288 let src = inline_atom_slice(&self.unsafe_data);
289 str::from_utf8_unchecked(src.get_unchecked(..(len as usize)))
290 }
291 STATIC_TAG => Static::get().atoms[self.static_index() as usize],
292 _ => debug_unreachable!(),
293 }
294 }
295 }
296}
297
298impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
299 #[inline]
300 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
301 let ty_str = unsafe {
302 match self.tag() {
303 DYNAMIC_TAG => "dynamic",
304 INLINE_TAG => "inline",
305 STATIC_TAG => "static",
306 _ => debug_unreachable!(),
307 }
308 };
309
310 write!(f, "Atom('{}' type={})", &*self, ty_str)
311 }
312}
313
314impl<Static: StaticAtomSet> PartialOrd for Atom<Static> {
315 #[inline]
316 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
317 if self.unsafe_data == other.unsafe_data {
318 return Some(Equal);
319 }
320 self.as_ref().partial_cmp(other.as_ref())
321 }
322}
323
324impl<Static: StaticAtomSet> Ord for Atom<Static> {
325 #[inline]
326 fn cmp(&self, other: &Self) -> Ordering {
327 if self.unsafe_data == other.unsafe_data {
328 return Equal;
329 }
330 self.as_ref().cmp(other.as_ref())
331 }
332}
333
334impl<Static: StaticAtomSet> Atom<Static> {
338 fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self {
339 let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
340 let buffer = unsafe { &mut *buffer.as_mut_ptr() };
341
342 if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
343 buffer_prefix.copy_from_slice(s.as_bytes());
344 let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
345 f(as_str);
346 Atom::from(&*as_str)
347 } else {
348 let mut string = s.to_owned();
349 f(&mut string);
350 Atom::from(string)
351 }
352 }
353
354 pub fn to_ascii_uppercase(&self) -> Self {
358 for (i, b) in self.bytes().enumerate() {
359 if let b'a'..=b'z' = b {
360 return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase());
361 }
362 }
363 self.clone()
364 }
365
366 pub fn to_ascii_lowercase(&self) -> Self {
370 for (i, b) in self.bytes().enumerate() {
371 if let b'A'..=b'Z' = b {
372 return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase());
373 }
374 }
375 self.clone()
376 }
377
378 pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
382 (self == other) || self.eq_str_ignore_ascii_case(&**other)
383 }
384
385 pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
389 (&**self).eq_ignore_ascii_case(other)
390 }
391}
392
393#[inline(always)]
394fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
395 let x: *const NonZeroU64 = x;
396 let mut data = x as *const u8;
397 if cfg!(target_endian = "little") {
399 data = unsafe { data.offset(1) };
400 }
401 let len = 7;
402 unsafe { slice::from_raw_parts(data, len) }
403}
404
405#[inline(always)]
406fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
407 let x: *mut u64 = x;
408 let mut data = x as *mut u8;
409 if cfg!(target_endian = "little") {
411 data = unsafe { data.offset(1) };
412 }
413 let len = 7;
414 unsafe { slice::from_raw_parts_mut(data, len) }
415}