compact_str/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[doc(hidden)]
5pub use core;
6use core::borrow::{
7    Borrow,
8    BorrowMut,
9};
10use core::cmp::Ordering;
11use core::hash::{
12    Hash,
13    Hasher,
14};
15use core::iter::FromIterator;
16use core::ops::{
17    Add,
18    AddAssign,
19    Bound,
20    Deref,
21    DerefMut,
22    RangeBounds,
23};
24use core::str::{
25    FromStr,
26    Utf8Error,
27};
28use core::{
29    fmt,
30    slice,
31};
32use std::borrow::Cow;
33use std::ffi::OsStr;
34use std::iter::FusedIterator;
35
36mod features;
37mod macros;
38
39mod repr;
40use repr::Repr;
41
42mod traits;
43pub use traits::{
44    CompactStringExt,
45    ToCompactString,
46};
47
48#[cfg(test)]
49mod tests;
50
51/// A [`CompactString`] is a compact string type that can be used almost anywhere a
52/// [`String`] or [`str`] can be used.
53///
54/// ## Using `CompactString`
55/// ```
56/// use compact_str::CompactString;
57/// # use std::collections::HashMap;
58///
59/// // CompactString auto derefs into a str so you can use all methods from `str`
60/// // that take a `&self`
61/// if CompactString::new("hello world!").is_ascii() {
62///     println!("we're all ASCII")
63/// }
64///
65/// // You can use a CompactString in collections like you would a String or &str
66/// let mut map: HashMap<CompactString, CompactString> = HashMap::new();
67///
68/// // directly construct a new `CompactString`
69/// map.insert(CompactString::new("nyc"), CompactString::new("empire state building"));
70/// // create a `CompactString` from a `&str`
71/// map.insert("sf".into(), "transamerica pyramid".into());
72/// // create a `CompactString` from a `String`
73/// map.insert(String::from("sea").into(), String::from("space needle").into());
74///
75/// fn wrapped_print<T: AsRef<str>>(text: T) {
76///     println!("{}", text.as_ref());
77/// }
78///
79/// // CompactString impls AsRef<str> and Borrow<str>, so it can be used anywhere
80/// // that expects a generic string
81/// if let Some(building) = map.get("nyc") {
82///     wrapped_print(building);
83/// }
84///
85/// // CompactString can also be directly compared to a String or &str
86/// assert_eq!(CompactString::new("chicago"), "chicago");
87/// assert_eq!(CompactString::new("houston"), String::from("houston"));
88/// ```
89///
90/// # Converting from a `String`
91/// It's important that a `CompactString` interops well with `String`, so you can easily use both in
92/// your code base.
93///
94/// `CompactString` implements `From<String>` and operates in the following manner:
95/// - Eagerly inlines the string, possibly dropping excess capacity
96/// - Otherwise re-uses the same underlying buffer from `String`
97///
98/// ```
99/// use compact_str::CompactString;
100///
101/// // eagerly inlining
102/// let short = String::from("hello world");
103/// let short_c = CompactString::from(short);
104/// assert!(!short_c.is_heap_allocated());
105///
106/// // dropping excess capacity
107/// let mut excess = String::with_capacity(256);
108/// excess.push_str("abc");
109///
110/// let excess_c = CompactString::from(excess);
111/// assert!(!excess_c.is_heap_allocated());
112/// assert!(excess_c.capacity() < 256);
113///
114/// // re-using the same buffer
115/// let long = String::from("this is a longer string that will be heap allocated");
116///
117/// let long_ptr = long.as_ptr();
118/// let long_len = long.len();
119/// let long_cap = long.capacity();
120///
121/// let mut long_c = CompactString::from(long);
122/// assert!(long_c.is_heap_allocated());
123///
124/// let cpt_ptr = long_c.as_ptr();
125/// let cpt_len = long_c.len();
126/// let cpt_cap = long_c.capacity();
127///
128/// // the original String and the CompactString point to the same place in memory, buffer re-use!
129/// assert_eq!(cpt_ptr, long_ptr);
130/// assert_eq!(cpt_len, long_len);
131/// assert_eq!(cpt_cap, long_cap);
132/// ```
133///
134/// ### Prevent Eagerly Inlining
135/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which
136/// might not always be desirable if you're converting a very large amount of `String`s. If your
137/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API.
138#[derive(Clone)]
139#[repr(transparent)]
140pub struct CompactString(Repr);
141
142impl CompactString {
143    /// Creates a new [`CompactString`] from any type that implements `AsRef<str>`.
144    /// If the string is short enough, then it will be inlined on the stack!
145    ///
146    /// # Examples
147    ///
148    /// ### Inlined
149    /// ```
150    /// # use compact_str::CompactString;
151    /// // We can inline strings up to 12 characters long on 32-bit architectures...
152    /// #[cfg(target_pointer_width = "32")]
153    /// let s = "i'm 12 chars";
154    /// // ...and up to 24 characters on 64-bit architectures!
155    /// #[cfg(target_pointer_width = "64")]
156    /// let s = "i am 24 characters long!";
157    ///
158    /// let compact = CompactString::new(&s);
159    ///
160    /// assert_eq!(compact, s);
161    /// // we are not allocated on the heap!
162    /// assert!(!compact.is_heap_allocated());
163    /// ```
164    ///
165    /// ### Heap
166    /// ```
167    /// # use compact_str::CompactString;
168    /// // For longer strings though, we get allocated on the heap
169    /// let long = "I am a longer string that will be allocated on the heap";
170    /// let compact = CompactString::new(long);
171    ///
172    /// assert_eq!(compact, long);
173    /// // we are allocated on the heap!
174    /// assert!(compact.is_heap_allocated());
175    /// ```
176    ///
177    /// ### Creation
178    /// ```
179    /// use compact_str::CompactString;
180    ///
181    /// // Using a `&'static str`
182    /// let s = "hello world!";
183    /// let hello = CompactString::new(&s);
184    ///
185    /// // Using a `String`
186    /// let u = String::from("🦄🌈");
187    /// let unicorn = CompactString::new(u);
188    ///
189    /// // Using a `Box<str>`
190    /// let b: Box<str> = String::from("📦📦📦").into_boxed_str();
191    /// let boxed = CompactString::new(&b);
192    /// ```
193    #[inline]
194    pub fn new<T: AsRef<str>>(text: T) -> Self {
195        CompactString(Repr::new(text.as_ref()))
196    }
197
198    /// Creates a new inline [`CompactString`] at compile time.
199    ///
200    /// # Examples
201    /// ```
202    /// use compact_str::CompactString;
203    ///
204    /// const DEFAULT_NAME: CompactString = CompactString::new_inline("untitled");
205    /// ```
206    ///
207    /// Note: Trying to create a long string that can't be inlined, will fail to build.
208    /// ```compile_fail
209    /// # use compact_str::CompactString;
210    /// const LONG: CompactString = CompactString::new_inline("this is a long string that can't be stored on the stack");
211    /// ```
212    #[inline]
213    pub const fn new_inline(text: &str) -> Self {
214        CompactString(Repr::new_inline(text))
215    }
216
217    /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes.
218    ///
219    /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically,
220    /// if the string has a length less than or equal to `std::mem::size_of::<String>` bytes
221    /// then it will be inlined. This also means that `CompactString`s have a minimum capacity
222    /// of `std::mem::size_of::<String>`.
223    ///
224    /// # Examples
225    ///
226    /// ### "zero" Capacity
227    /// ```
228    /// # use compact_str::CompactString;
229    /// // Creating a CompactString with a capacity of 0 will create
230    /// // one with capacity of std::mem::size_of::<String>();
231    /// let empty = CompactString::with_capacity(0);
232    /// let min_size = std::mem::size_of::<String>();
233    ///
234    /// assert_eq!(empty.capacity(), min_size);
235    /// assert_ne!(0, min_size);
236    /// assert!(!empty.is_heap_allocated());
237    /// ```
238    ///
239    /// ### Max Inline Size
240    /// ```
241    /// # use compact_str::CompactString;
242    /// // Creating a CompactString with a capacity of std::mem::size_of::<String>()
243    /// // will not heap allocate.
244    /// let str_size = std::mem::size_of::<String>();
245    /// let empty = CompactString::with_capacity(str_size);
246    ///
247    /// assert_eq!(empty.capacity(), str_size);
248    /// assert!(!empty.is_heap_allocated());
249    /// ```
250    ///
251    /// ### Heap Allocating
252    /// ```
253    /// # use compact_str::CompactString;
254    /// // If you create a `CompactString` with a capacity greater than
255    /// // `std::mem::size_of::<String>`, it will heap allocated. For heap
256    /// // allocated strings we have a minimum capacity
257    ///
258    /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::<usize>() * 4;
259    ///
260    /// let heap_size = std::mem::size_of::<String>() + 1;
261    /// let empty = CompactString::with_capacity(heap_size);
262    ///
263    /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY);
264    /// assert!(empty.is_heap_allocated());
265    /// ```
266    #[inline]
267    pub fn with_capacity(capacity: usize) -> Self {
268        CompactString(Repr::with_capacity(capacity))
269    }
270
271    /// Convert a slice of bytes into a [`CompactString`].
272    ///
273    /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
274    /// This method converts from an arbitrary contiguous collection of bytes into a
275    /// [`CompactString`], failing if the provided bytes are not `UTF-8`.
276    ///
277    /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes,
278    /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf`
279    ///
280    /// # Examples
281    /// ### Valid UTF-8
282    /// ```
283    /// # use compact_str::CompactString;
284    /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175];
285    /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8");
286    ///
287    /// assert_eq!(compact, "🦀💯");
288    /// ```
289    ///
290    /// ### Invalid UTF-8
291    /// ```
292    /// # use compact_str::CompactString;
293    /// let bytes = vec![255, 255, 255];
294    /// let result = CompactString::from_utf8(bytes);
295    ///
296    /// assert!(result.is_err());
297    /// ```
298    #[inline]
299    pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
300        Repr::from_utf8(buf).map(CompactString)
301    }
302
303    /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains
304    /// valid UTF-8.
305    ///
306    /// See the safe version, [`CompactString::from_utf8`], for more details.
307    ///
308    /// # Safety
309    ///
310    /// This function is unsafe because it does not check that the bytes passed to it are valid
311    /// UTF-8. If this constraint is violated, it may cause memory unsafety issues with future users
312    /// of the [`CompactString`], as the rest of the standard library assumes that
313    /// [`CompactString`]s are valid UTF-8.
314    ///
315    /// # Examples
316    ///
317    /// Basic usage:
318    ///
319    /// ```
320    /// # use compact_str::CompactString;
321    /// // some bytes, in a vector
322    /// let sparkle_heart = vec![240, 159, 146, 150];
323    ///
324    /// let sparkle_heart = unsafe {
325    ///     CompactString::from_utf8_unchecked(sparkle_heart)
326    /// };
327    ///
328    /// assert_eq!("💖", sparkle_heart);
329    /// ```
330    #[inline]
331    #[must_use]
332    pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
333        CompactString(Repr::from_utf8_unchecked(buf))
334    }
335
336    /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a
337    /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data.
338    ///
339    /// # Examples
340    /// ### Valid UTF-16
341    /// ```
342    /// # use compact_str::CompactString;
343    /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
344    /// let compact = CompactString::from_utf16(buf).unwrap();
345    ///
346    /// assert_eq!(compact, "𝄞music");
347    /// ```
348    ///
349    /// ### Invalid UTF-16
350    /// ```
351    /// # use compact_str::CompactString;
352    /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
353    /// let res = CompactString::from_utf16(buf);
354    ///
355    /// assert!(res.is_err());
356    /// ```
357    #[inline]
358    pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
359        // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
360        // even though the size of our iterator, `buf`, is known ahead of time.
361        //
362        // rustlang issue #48994 is tracking the fix
363
364        let buf = buf.as_ref();
365        let mut ret = CompactString::with_capacity(buf.len());
366        for c in core::char::decode_utf16(buf.iter().copied()) {
367            if let Ok(c) = c {
368                ret.push(c);
369            } else {
370                return Err(Utf16Error(()));
371            }
372        }
373        Ok(ret)
374    }
375
376    /// Decode a UTF-16–encoded slice `v` into a `CompactString`, replacing invalid data with
377    /// the replacement character (`U+FFFD`), �.
378    ///
379    /// # Examples
380    ///
381    /// Basic usage:
382    ///
383    /// ```
384    /// # use compact_str::CompactString;
385    /// // 𝄞mus<invalid>ic<invalid>
386    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
387    ///           0x0073, 0xDD1E, 0x0069, 0x0063,
388    ///           0xD834];
389    ///
390    /// assert_eq!(CompactString::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
391    ///            CompactString::from_utf16_lossy(v));
392    /// ```
393    #[inline]
394    pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
395        let buf = buf.as_ref();
396        let mut ret = CompactString::with_capacity(buf.len());
397        for c in std::char::decode_utf16(buf.iter().copied()) {
398            match c {
399                Ok(c) => ret.push(c),
400                Err(_) => ret.push_str("�"),
401            }
402        }
403        ret
404    }
405
406    /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
407    ///
408    /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
409    /// bytes long, therefore the return value of this method might not be what a human considers
410    /// the length of the string.
411    ///
412    /// # Examples
413    /// ```
414    /// # use compact_str::CompactString;
415    /// let ascii = CompactString::new("hello world");
416    /// assert_eq!(ascii.len(), 11);
417    ///
418    /// let emoji = CompactString::new("👱");
419    /// assert_eq!(emoji.len(), 4);
420    /// ```
421    #[inline]
422    pub fn len(&self) -> usize {
423        self.0.len()
424    }
425
426    /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise
427    ///
428    /// # Examples
429    /// ```
430    /// # use compact_str::CompactString;
431    /// let mut msg = CompactString::new("");
432    /// assert!(msg.is_empty());
433    ///
434    /// // add some characters
435    /// msg.push_str("hello reader!");
436    /// assert!(!msg.is_empty());
437    /// ```
438    #[inline]
439    pub fn is_empty(&self) -> bool {
440        self.len() == 0
441    }
442
443    /// Returns the capacity of the [`CompactString`], in bytes.
444    ///
445    /// # Note
446    /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::<String>()`
447    ///
448    /// # Examples
449    /// ### Minimum Size
450    /// ```
451    /// # use compact_str::CompactString;
452    /// let min_size = std::mem::size_of::<String>();
453    /// let compact = CompactString::new("");
454    ///
455    /// assert!(compact.capacity() >= min_size);
456    /// ```
457    ///
458    /// ### Heap Allocated
459    /// ```
460    /// # use compact_str::CompactString;
461    /// let compact = CompactString::with_capacity(128);
462    /// assert_eq!(compact.capacity(), 128);
463    /// ```
464    #[inline]
465    pub fn capacity(&self) -> usize {
466        self.0.capacity()
467    }
468
469    /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than
470    /// its length. The capacity may be increased by more than `additional` bytes if it chooses,
471    /// to prevent frequent reallocations.
472    ///
473    /// # Note
474    /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::<String>()`
475    /// * Reserving additional bytes may cause the `CompactString` to become heap allocated
476    ///
477    /// # Panics
478    /// Panics if the new capacity overflows `usize`
479    ///
480    /// # Examples
481    /// ```
482    /// # use compact_str::CompactString;
483    ///
484    /// const WORD: usize = std::mem::size_of::<usize>();
485    /// let mut compact = CompactString::default();
486    /// assert!(compact.capacity() >= (WORD * 3) - 1);
487    ///
488    /// compact.reserve(200);
489    /// assert!(compact.is_heap_allocated());
490    /// assert!(compact.capacity() >= 200);
491    /// ```
492    #[inline]
493    pub fn reserve(&mut self, additional: usize) {
494        self.0.reserve(additional)
495    }
496
497    /// Returns a string slice containing the entire [`CompactString`].
498    ///
499    /// # Examples
500    /// ```
501    /// # use compact_str::CompactString;
502    /// let s = CompactString::new("hello");
503    ///
504    /// assert_eq!(s.as_str(), "hello");
505    /// ```
506    #[inline]
507    pub fn as_str(&self) -> &str {
508        self.0.as_str()
509    }
510
511    /// Returns a mutable string slice containing the entire [`CompactString`].
512    ///
513    /// # Examples
514    /// ```
515    /// # use compact_str::CompactString;
516    /// let mut s = CompactString::new("hello");
517    /// s.as_mut_str().make_ascii_uppercase();
518    ///
519    /// assert_eq!(s.as_str(), "HELLO");
520    /// ```
521    #[inline]
522    pub fn as_mut_str(&mut self) -> &mut str {
523        let len = self.len();
524        unsafe { std::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
525    }
526
527    /// Returns a byte slice of the [`CompactString`]'s contents.
528    ///
529    /// # Examples
530    /// ```
531    /// # use compact_str::CompactString;
532    /// let s = CompactString::new("hello");
533    ///
534    /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
535    /// ```
536    #[inline]
537    pub fn as_bytes(&self) -> &[u8] {
538        &self.0.as_slice()[..self.len()]
539    }
540
541    // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
542    //
543    /// Provides a mutable reference to the underlying buffer of bytes.
544    ///
545    /// # Safety
546    /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must
547    ///   guarantee
548    /// that any modifications made to the underlying buffer are valid UTF-8.
549    ///
550    /// # Examples
551    /// ```
552    /// # use compact_str::CompactString;
553    /// let mut s = CompactString::new("hello");
554    ///
555    /// let slice = unsafe { s.as_mut_bytes() };
556    /// // copy bytes into our string
557    /// slice[5..11].copy_from_slice(" world".as_bytes());
558    /// // set the len of the string
559    /// unsafe { s.set_len(11) };
560    ///
561    /// assert_eq!(s, "hello world");
562    /// ```
563    #[inline]
564    pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
565        self.0.as_mut_buf()
566    }
567
568    /// Appends the given [`char`] to the end of this [`CompactString`].
569    ///
570    /// # Examples
571    /// ```
572    /// # use compact_str::CompactString;
573    /// let mut s = CompactString::new("foo");
574    ///
575    /// s.push('b');
576    /// s.push('a');
577    /// s.push('r');
578    ///
579    /// assert_eq!("foobar", s);
580    /// ```
581    pub fn push(&mut self, ch: char) {
582        self.push_str(ch.encode_utf8(&mut [0; 4]));
583    }
584
585    /// Removes the last character from the [`CompactString`] and returns it.
586    /// Returns `None` if this [`CompactString`] is empty.
587    ///
588    /// # Examples
589    /// ```
590    /// # use compact_str::CompactString;
591    /// let mut s = CompactString::new("abc");
592    ///
593    /// assert_eq!(s.pop(), Some('c'));
594    /// assert_eq!(s.pop(), Some('b'));
595    /// assert_eq!(s.pop(), Some('a'));
596    ///
597    /// assert_eq!(s.pop(), None);
598    /// ```
599    #[inline]
600    pub fn pop(&mut self) -> Option<char> {
601        self.0.pop()
602    }
603
604    /// Appends a given string slice onto the end of this [`CompactString`]
605    ///
606    /// # Examples
607    /// ```
608    /// # use compact_str::CompactString;
609    /// let mut s = CompactString::new("abc");
610    ///
611    /// s.push_str("123");
612    ///
613    /// assert_eq!("abc123", s);
614    /// ```
615    #[inline]
616    pub fn push_str(&mut self, s: &str) {
617        self.0.push_str(s)
618    }
619
620    /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it.
621    ///
622    /// This is an *O*(*n*) operation, as it requires copying every element in the
623    /// buffer.
624    ///
625    /// # Panics
626    ///
627    /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length,
628    /// or if it does not lie on a [`char`] boundary.
629    ///
630    /// # Examples
631    ///
632    /// ### Basic usage:
633    ///
634    /// ```
635    /// # use compact_str::CompactString;
636    /// let mut c = CompactString::from("hello world");
637    ///
638    /// assert_eq!(c.remove(0), 'h');
639    /// assert_eq!(c, "ello world");
640    ///
641    /// assert_eq!(c.remove(5), 'w');
642    /// assert_eq!(c, "ello orld");
643    /// ```
644    ///
645    /// ### Past total length:
646    ///
647    /// ```should_panic
648    /// # use compact_str::CompactString;
649    /// let mut c = CompactString::from("hello there!");
650    /// c.remove(100);
651    /// ```
652    ///
653    /// ### Not on char boundary:
654    ///
655    /// ```should_panic
656    /// # use compact_str::CompactString;
657    /// let mut c = CompactString::from("🦄");
658    /// c.remove(1);
659    /// ```
660    #[inline]
661    pub fn remove(&mut self, idx: usize) -> char {
662        let len = self.len();
663        let substr = &mut self.as_mut_str()[idx..];
664
665        // get the char we want to remove
666        let ch = substr
667            .chars()
668            .next()
669            .expect("cannot remove a char from the end of a string");
670        let ch_len = ch.len_utf8();
671
672        // shift everything back one character
673        let num_bytes = substr.len() - ch_len;
674        let ptr = substr.as_mut_ptr();
675
676        // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes,
677        // and are properly aligned
678        unsafe {
679            core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
680            self.set_len(len - ch_len);
681        }
682
683        ch
684    }
685
686    /// Forces the length of the [`CompactString`] to `new_len`.
687    ///
688    /// This is a low-level operation that maintains none of the normal invariants for
689    /// `CompactString`. If you want to modify the `CompactString` you should use methods like
690    /// `push`, `push_str` or `pop`.
691    ///
692    /// # Safety
693    /// * `new_len` must be less than or equal to `capacity()`
694    /// * The elements at `old_len..new_len` must be initialized
695    #[inline]
696    pub unsafe fn set_len(&mut self, new_len: usize) {
697        self.0.set_len(new_len)
698    }
699
700    /// Returns whether or not the [`CompactString`] is heap allocated.
701    ///
702    /// # Examples
703    /// ### Inlined
704    /// ```
705    /// # use compact_str::CompactString;
706    /// let hello = CompactString::new("hello world");
707    ///
708    /// assert!(!hello.is_heap_allocated());
709    /// ```
710    ///
711    /// ### Heap Allocated
712    /// ```
713    /// # use compact_str::CompactString;
714    /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 💥");
715    ///
716    /// assert!(msg.is_heap_allocated());
717    /// ```
718    #[inline]
719    pub fn is_heap_allocated(&self) -> bool {
720        self.0.is_heap_allocated()
721    }
722
723    /// Ensure that the given range is inside the set data, and that no codepoints are split.
724    ///
725    /// Returns the range `start..end` as a tuple.
726    #[inline]
727    fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
728        #[cold]
729        #[inline(never)]
730        fn illegal_range() -> ! {
731            panic!("illegal range");
732        }
733
734        let start = match range.start_bound() {
735            Bound::Included(&n) => n,
736            Bound::Excluded(&n) => match n.checked_add(1) {
737                Some(n) => n,
738                None => illegal_range(),
739            },
740            Bound::Unbounded => 0,
741        };
742        let end = match range.end_bound() {
743            Bound::Included(&n) => match n.checked_add(1) {
744                Some(n) => n,
745                None => illegal_range(),
746            },
747            Bound::Excluded(&n) => n,
748            Bound::Unbounded => self.len(),
749        };
750        if end < start {
751            illegal_range();
752        }
753
754        let s = self.as_str();
755        if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
756            illegal_range();
757        }
758
759        (start, end)
760    }
761
762    /// Removes the specified range in the [`CompactString`],
763    /// and replaces it with the given string.
764    /// The given string doesn't need to be the same length as the range.
765    ///
766    /// # Panics
767    ///
768    /// Panics if the starting point or end point do not lie on a [`char`]
769    /// boundary, or if they're out of bounds.
770    ///
771    /// # Examples
772    ///
773    /// Basic usage:
774    ///
775    /// ```
776    /// # use compact_str::CompactString;
777    /// let mut s = CompactString::new("Hello, world!");
778    ///
779    /// s.replace_range(7..12, "WORLD");
780    /// assert_eq!(s, "Hello, WORLD!");
781    ///
782    /// s.replace_range(7..=11, "you");
783    /// assert_eq!(s, "Hello, you!");
784    ///
785    /// s.replace_range(5.., "! Is it me you're looking for?");
786    /// assert_eq!(s, "Hello! Is it me you're looking for?");
787    /// ```
788    #[inline]
789    pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
790        let (start, end) = self.ensure_range(range);
791        let dest_len = end - start;
792        match dest_len.cmp(&replace_with.len()) {
793            Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
794            Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
795            Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
796        }
797    }
798
799    /// Replace into the same size.
800    unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
801        core::ptr::copy_nonoverlapping(
802            replace_with.as_ptr(),
803            self.as_mut_ptr().add(start),
804            end - start,
805        );
806    }
807
808    /// Replace, so self.len() gets smaller.
809    unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
810        let total_len = self.len();
811        let dest_len = end - start;
812        let new_len = total_len - (dest_len - replace_with.len());
813        let amount = total_len - end;
814        let data = self.as_mut_ptr();
815        // first insert the replacement string, overwriting the current content
816        core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
817        // then move the tail of the CompactString forward to its new place, filling the gap
818        core::ptr::copy(
819            data.add(total_len - amount),
820            data.add(new_len - amount),
821            amount,
822        );
823        // and lastly we set the new length
824        self.set_len(new_len);
825    }
826
827    /// Replace, so self.len() gets bigger.
828    unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
829        let dest_len = end - start;
830        self.reserve(replace_with.len() - dest_len);
831        let total_len = self.len();
832        let new_len = total_len + (replace_with.len() - dest_len);
833        let amount = total_len - end;
834        // first grow the string, so MIRI knows that the full range is usable
835        self.set_len(new_len);
836        let data = self.as_mut_ptr();
837        // then move the tail of the CompactString back to its new place
838        core::ptr::copy(
839            data.add(total_len - amount),
840            data.add(new_len - amount),
841            amount,
842        );
843        // and lastly insert the replacement string
844        core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
845    }
846
847    /// Truncate the [`CompactString`] to a shorter length.
848    ///
849    /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op.
850    ///
851    /// Calling this function does not change the capacity of the [`CompactString`].
852    ///
853    /// # Panics
854    ///
855    /// Panics if the new end of the string does not lie on a [`char`] boundary.
856    ///
857    /// # Examples
858    ///
859    /// Basic usage:
860    ///
861    /// ```
862    /// # use compact_str::CompactString;
863    /// let mut s = CompactString::new("Hello, world!");
864    /// s.truncate(5);
865    /// assert_eq!(s, "Hello");
866    /// ```
867    pub fn truncate(&mut self, new_len: usize) {
868        let s = self.as_str();
869        if new_len >= s.len() {
870            return;
871        }
872
873        assert!(
874            s.is_char_boundary(new_len),
875            "new_len must lie on char boundary",
876        );
877        unsafe { self.set_len(new_len) };
878    }
879
880    /// Converts a [`CompactString`] to a raw pointer.
881    #[inline]
882    pub fn as_ptr(&self) -> *const u8 {
883        self.0.as_slice().as_ptr()
884    }
885
886    /// Converts a mutable [`CompactString`] to a raw pointer.
887    #[inline]
888    pub fn as_mut_ptr(&mut self) -> *mut u8 {
889        unsafe { self.0.as_mut_buf().as_mut_ptr() }
890    }
891
892    /// Insert string character at an index.
893    ///
894    /// # Examples
895    ///
896    /// Basic usage:
897    ///
898    /// ```
899    /// # use compact_str::CompactString;
900    /// let mut s = CompactString::new("Hello!");
901    /// s.insert_str(5, ", world");
902    /// assert_eq!(s, "Hello, world!");
903    /// ```
904    pub fn insert_str(&mut self, idx: usize, string: &str) {
905        assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
906
907        let new_len = self.len() + string.len();
908        self.reserve(string.len());
909
910        // SAFETY: We just checked that we may split self at idx.
911        //         We set the length only after reserving the memory.
912        //         We fill the gap with valid UTF-8 data.
913        unsafe {
914            // first move the tail to the new back
915            let data = self.as_mut_ptr();
916            std::ptr::copy(
917                data.add(idx),
918                data.add(idx + string.len()),
919                new_len - idx - string.len(),
920            );
921
922            // then insert the new bytes
923            std::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
924
925            // and lastly resize the string
926            self.set_len(new_len);
927        }
928    }
929
930    /// Insert a character at an index.
931    ///
932    /// # Examples
933    ///
934    /// Basic usage:
935    ///
936    /// ```
937    /// # use compact_str::CompactString;
938    /// let mut s = CompactString::new("Hello world!");
939    /// s.insert(5, ',');
940    /// assert_eq!(s, "Hello, world!");
941    /// ```
942    pub fn insert(&mut self, idx: usize, ch: char) {
943        self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
944    }
945
946    /// Reduces the length of the [`CompactString`] to zero.
947    ///
948    /// Calling this function does not change the capacity of the [`CompactString`].
949    ///
950    /// ```
951    /// # use compact_str::CompactString;
952    /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!");
953    /// assert_eq!(s.capacity(), 49);
954    ///
955    /// s.clear();
956    ///
957    /// assert_eq!(s, "");
958    /// assert_eq!(s.capacity(), 49);
959    /// ```
960    pub fn clear(&mut self) {
961        unsafe { self.set_len(0) };
962    }
963
964    /// Split the [`CompactString`] into at the given byte index.
965    ///
966    /// Calling this function does not change the capacity of the [`CompactString`].
967    ///
968    /// # Panics
969    ///
970    /// Panics if `at` does not lie on a [`char`] boundary.
971    ///
972    /// Basic usage:
973    ///
974    /// ```
975    /// # use compact_str::CompactString;
976    /// let mut s = CompactString::new("Hello, world!");
977    /// assert_eq!(s.split_off(5), ", world!");
978    /// assert_eq!(s, "Hello");
979    /// ```
980    pub fn split_off(&mut self, at: usize) -> Self {
981        let result = self[at..].into();
982        // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
983        unsafe { self.set_len(at) };
984        result
985    }
986
987    /// Remove a range from the [`CompactString`], and return it as an iterator.
988    ///
989    /// Calling this function does not change the capacity of the [`CompactString`].
990    ///
991    /// # Panics
992    ///
993    /// Panics if the start or end of the range does not lie on a [`char`] boundary.
994    ///
995    /// # Examples
996    ///
997    /// Basic usage:
998    ///
999    /// ```
1000    /// # use compact_str::CompactString;
1001    /// let mut s = CompactString::new("Hello, world!");
1002    ///
1003    /// let mut d = s.drain(5..12);
1004    /// assert_eq!(d.next(), Some(','));   // iterate over the extracted data
1005    /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str
1006    ///
1007    /// // The iterator keeps a reference to `s`, so you have to drop() the iterator,
1008    /// // before you can access `s` again.
1009    /// drop(d);
1010    /// assert_eq!(s, "Hello!");
1011    /// ```
1012    pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
1013        let (start, end) = self.ensure_range(range);
1014        Drain {
1015            compact_string: self as *mut Self,
1016            start,
1017            end,
1018            chars: self[start..end].chars(),
1019        }
1020    }
1021
1022    /// Shrinks the capacity of this [`CompactString`] with a lower bound.
1023    ///
1024    /// The resulting capactity is never less than the size of 3×[`usize`],
1025    /// i.e. the capacity than can be inlined.
1026    ///
1027    /// # Examples
1028    ///
1029    /// Basic usage:
1030    ///
1031    /// ```
1032    /// # use compact_str::CompactString;
1033    /// let mut s = CompactString::with_capacity(100);
1034    /// assert_eq!(s.capacity(), 100);
1035    ///
1036    /// // if the capacity was already bigger than the argument, the call is a no-op
1037    /// s.shrink_to(100);
1038    /// assert_eq!(s.capacity(), 100);
1039    ///
1040    /// s.shrink_to(50);
1041    /// assert_eq!(s.capacity(), 50);
1042    ///
1043    /// // if the string can be inlined, it is
1044    /// s.shrink_to(10);
1045    /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1046    /// ```
1047    #[inline]
1048    pub fn shrink_to(&mut self, min_capacity: usize) {
1049        self.0.shrink_to(min_capacity);
1050    }
1051
1052    /// Shrinks the capacity of this [`CompactString`] to match its length.
1053    ///
1054    /// The resulting capactity is never less than the size of 3×[`usize`],
1055    /// i.e. the capacity than can be inlined.
1056    ///
1057    /// This method is effectively the same as calling [`string.shrink_to(0)`].
1058    ///
1059    /// # Examples
1060    ///
1061    /// Basic usage:
1062    ///
1063    /// ```
1064    /// # use compact_str::CompactString;
1065    /// let mut s = CompactString::from("This is a string with more than 24 characters.");
1066    ///
1067    /// s.reserve(100);
1068    /// assert!(s.capacity() >= 100);
1069    ///
1070    ///  s.shrink_to_fit();
1071    /// assert_eq!(s.len(), s.capacity());
1072    /// ```
1073    ///
1074    /// ```
1075    /// # use compact_str::CompactString;
1076    /// let mut s = CompactString::from("short string");
1077    ///
1078    /// s.reserve(100);
1079    /// assert!(s.capacity() >= 100);
1080    ///
1081    /// s.shrink_to_fit();
1082    /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1083    /// ```
1084    #[inline]
1085    pub fn shrink_to_fit(&mut self) {
1086        self.0.shrink_to(0);
1087    }
1088
1089    /// Retains only the characters specified by the predicate.
1090    ///
1091    /// The method iterates over the characters in the string and calls the `predicate`.
1092    ///
1093    /// If the `predicate` returns `false`, then the character gets removed.
1094    /// If the `predicate` returns `true`, then the character is kept.
1095    ///
1096    /// # Examples
1097    ///
1098    /// ```
1099    /// # use compact_str::CompactString;
1100    /// let mut s = CompactString::from("äb𝄞d€");
1101    ///
1102    /// let keep = [false, true, true, false, true];
1103    /// let mut iter = keep.iter();
1104    /// s.retain(|_| *iter.next().unwrap());
1105    ///
1106    /// assert_eq!(s, "b𝄞€");
1107    /// ```
1108    pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
1109        // We iterate over the string, and copy character by character.
1110
1111        let s = self.as_mut_str();
1112        let mut dest_idx = 0;
1113        let mut src_idx = 0;
1114        while let Some(ch) = s[src_idx..].chars().next() {
1115            let ch_len = ch.len_utf8();
1116            if predicate(ch) {
1117                // SAFETY: We know that both indices are valid, and that we don't split a char.
1118                unsafe {
1119                    let p = s.as_mut_ptr();
1120                    core::ptr::copy(p.add(src_idx), p.add(dest_idx), ch_len);
1121                }
1122                dest_idx += ch_len;
1123            }
1124            src_idx += ch_len;
1125        }
1126
1127        // SAFETY: We know that the index is a valid position to break the string.
1128        unsafe { self.set_len(dest_idx) };
1129    }
1130
1131    /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints
1132    ///
1133    /// # Examples
1134    ///
1135    /// ```
1136    /// # use compact_str::CompactString;
1137    /// let chess_knight = b"\xf0\x9f\xa8\x84";
1138    ///
1139    /// assert_eq!(
1140    ///     "🨄",
1141    ///     CompactString::from_utf8_lossy(chess_knight),
1142    /// );
1143    ///
1144    /// // For valid UTF-8 slices, this is the same as:
1145    /// assert_eq!(
1146    ///     "🨄",
1147    ///     CompactString::new(std::str::from_utf8(chess_knight).unwrap()),
1148    /// );
1149    /// ```
1150    ///
1151    /// Incorrect bytes:
1152    ///
1153    /// ```
1154    /// # use compact_str::CompactString;
1155    /// let broken = b"\xf0\x9f\xc8\x84";
1156    ///
1157    /// assert_eq!(
1158    ///     "�Ȅ",
1159    ///     CompactString::from_utf8_lossy(broken),
1160    /// );
1161    ///
1162    /// // For invalid UTF-8 slices, this is an optimized implemented for:
1163    /// assert_eq!(
1164    ///     "�Ȅ",
1165    ///     CompactString::from(String::from_utf8_lossy(broken)),
1166    /// );
1167    /// ```
1168    pub fn from_utf8_lossy(v: &[u8]) -> Self {
1169        fn next_char<'a>(
1170            iter: &mut <&[u8] as IntoIterator>::IntoIter,
1171            buf: &'a mut [u8; 4],
1172        ) -> Option<&'a [u8]> {
1173            const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
1174
1175            macro_rules! ensure_range {
1176                ($idx:literal, $range:pat) => {{
1177                    let mut i = iter.clone();
1178                    match i.next() {
1179                        Some(&c) if matches!(c, $range) => {
1180                            buf[$idx] = c;
1181                            *iter = i;
1182                        }
1183                        _ => return Some(REPLACEMENT),
1184                    }
1185                }};
1186            }
1187
1188            macro_rules! ensure_cont {
1189                ($idx:literal) => {{
1190                    ensure_range!($idx, 0x80..=0xBF);
1191                }};
1192            }
1193
1194            let c = *iter.next()?;
1195            buf[0] = c;
1196
1197            match c {
1198                0x00..=0x7F => {
1199                    // simple ASCII: push as is
1200                    Some(&buf[..1])
1201                }
1202                0xC2..=0xDF => {
1203                    // two bytes
1204                    ensure_cont!(1);
1205                    Some(&buf[..2])
1206                }
1207                0xE0..=0xEF => {
1208                    // three bytes
1209                    match c {
1210                        // 0x80..=0x9F encodes surrogate half
1211                        0xE0 => ensure_range!(1, 0xA0..=0xBF),
1212                        // 0xA0..=0xBF encodes surrogate half
1213                        0xED => ensure_range!(1, 0x80..=0x9F),
1214                        // all UTF-8 continuation bytes are valid
1215                        _ => ensure_cont!(1),
1216                    }
1217                    ensure_cont!(2);
1218                    Some(&buf[..3])
1219                }
1220                0xF0..=0xF4 => {
1221                    // four bytes
1222                    match c {
1223                        // 0x80..=0x8F encodes overlong three byte codepoint
1224                        0xF0 => ensure_range!(1, 0x90..=0xBF),
1225                        // 0x90..=0xBF encodes codepoint > U+10FFFF
1226                        0xF4 => ensure_range!(1, 0x80..=0x8F),
1227                        // all UTF-8 continuation bytes are valid
1228                        _ => ensure_cont!(1),
1229                    }
1230                    ensure_cont!(2);
1231                    ensure_cont!(3);
1232                    Some(&buf[..4])
1233                }
1234                | 0x80..=0xBF // unicode continuation, invalid
1235                | 0xC0..=0xC1 // overlong one byte character
1236                | 0xF5..=0xF7 // four bytes that encode > U+10FFFF
1237                | 0xF8..=0xFB // five bytes, invalid
1238                | 0xFC..=0xFD // six bytes, invalid
1239                | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid
1240            }
1241        }
1242
1243        let mut buf = [0; 4];
1244        let mut result = Self::with_capacity(v.len());
1245        let mut iter = v.iter();
1246        while let Some(s) = next_char(&mut iter, &mut buf) {
1247            // SAFETY: next_char() only returns valid strings
1248            let s = unsafe { std::str::from_utf8_unchecked(s) };
1249            result.push_str(s);
1250        }
1251        result
1252    }
1253
1254    fn from_utf16x(
1255        v: &[u8],
1256        from_int: impl Fn(u16) -> u16,
1257        from_bytes: impl Fn([u8; 2]) -> u16,
1258    ) -> Result<Self, Utf16Error> {
1259        if v.len() % 2 != 0 {
1260            // Input had an odd number of bytes.
1261            return Err(Utf16Error(()));
1262        }
1263
1264        // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
1265        // even though the size of our iterator, `v`, is known ahead of time.
1266        //
1267        // rustlang issue #48994 is tracking the fix
1268        let mut result = CompactString::with_capacity(v.len() / 2);
1269
1270        // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1271        //         `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1272        match unsafe { v.align_to::<u16>() } {
1273            (&[], v, &[]) => {
1274                // Input is correcty aligned.
1275                for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
1276                    result.push(c.map_err(|_| Utf16Error(()))?);
1277                }
1278            }
1279            _ => {
1280                // Input's alignment is off.
1281                // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1282                let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1283                for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1284                    result.push(c.map_err(|_| Utf16Error(()))?);
1285                }
1286            }
1287        }
1288
1289        Ok(result)
1290    }
1291
1292    fn from_utf16x_lossy(
1293        v: &[u8],
1294        from_int: impl Fn(u16) -> u16,
1295        from_bytes: impl Fn([u8; 2]) -> u16,
1296    ) -> Self {
1297        // Notice: We write the string "�" instead of the character '�', so the character does not
1298        //         have to be formatted before it can be appended.
1299
1300        let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
1301            true => (true, &v[..v.len() - 1]),
1302            false => (false, v),
1303        };
1304        let mut result = CompactString::with_capacity(v.len() / 2);
1305
1306        // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1307        //         `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1308        match unsafe { v.align_to::<u16>() } {
1309            (&[], v, &[]) => {
1310                // Input is correcty aligned.
1311                for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
1312                    match c {
1313                        Ok(c) => result.push(c),
1314                        Err(_) => result.push_str("�"),
1315                    }
1316                }
1317            }
1318            _ => {
1319                // Input's alignment is off.
1320                // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1321                let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1322                for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1323                    match c {
1324                        Ok(c) => result.push(c),
1325                        Err(_) => result.push_str("�"),
1326                    }
1327                }
1328            }
1329        }
1330
1331        if trailing_extra_byte {
1332            result.push_str("�");
1333        }
1334        result
1335    }
1336
1337    /// Decode a slice of bytes as UTF-16 encoded string, in little endian.
1338    ///
1339    /// # Errors
1340    ///
1341    /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1342    /// a [`Utf16Error`] is returned.
1343    ///
1344    /// # Examples
1345    ///
1346    /// ```
1347    /// # use compact_str::CompactString;
1348    /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe";
1349    /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap();
1350    /// assert_eq!(dancing_men, "👯‍♂️");
1351    /// ```
1352    #[inline]
1353    pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1354        CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
1355    }
1356
1357    /// Decode a slice of bytes as UTF-16 encoded string, in big endian.
1358    ///
1359    /// # Errors
1360    ///
1361    /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1362    /// a [`Utf16Error`] is returned.
1363    ///
1364    /// # Examples
1365    ///
1366    /// ```
1367    /// # use compact_str::CompactString;
1368    /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f";
1369    /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap();
1370    /// assert_eq!(dancing_women, "👯‍♀️");
1371    /// ```
1372    #[inline]
1373    pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1374        CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
1375    }
1376
1377    /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian.
1378    ///
1379    /// In this context "lossy" means that any broken characters in the input are replaced by the
1380    /// \<REPLACEMENT CHARACTER\> `'�'`. Please notice that, unlike UTF-8, UTF-16 is not self
1381    /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1382    ///
1383    /// # Examples
1384    ///
1385    /// ```
1386    /// # use compact_str::CompactString;
1387    /// // A "random" bit was flipped in the 4th byte:
1388    /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe";
1389    /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN);
1390    /// assert_eq!(dancing_men, "�\u{fc6f}\u{200d}♂️");
1391    /// ```
1392    #[inline]
1393    pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
1394        CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
1395    }
1396
1397    /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian.
1398    ///
1399    /// In this context "lossy" means that any broken characters in the input are replaced by the
1400    /// \<REPLACEMENT CHARACTER\> `'�'`. Please notice that, unlike UTF-8, UTF-16 is not self
1401    /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1402    ///
1403    /// # Examples
1404    ///
1405    /// ```
1406    /// # use compact_str::CompactString;
1407    /// // A "random" bit was flipped in the 9th byte:
1408    /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f";
1409    /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN);
1410    /// assert_eq!(dancing_women, "👯\u{200d}♀�");
1411    /// ```
1412    #[inline]
1413    pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
1414        CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
1415    }
1416
1417    /// Convert the [`CompactString`] into a [`String`].
1418    ///
1419    /// # Examples
1420    ///
1421    /// ```
1422    /// # use compact_str::CompactString;
1423    /// let s = CompactString::new("Hello world");
1424    /// let s = s.into_string();
1425    /// assert_eq!(s, "Hello world");
1426    /// ```
1427    pub fn into_string(self) -> String {
1428        self.0.into_string()
1429    }
1430
1431    /// Convert a [`String`] into a [`CompactString`] _without inlining_.
1432    ///
1433    /// Note: You probably don't need to use this method, instead you should use `From<String>`
1434    /// which is implemented for [`CompactString`].
1435    ///
1436    /// This method exists incase your code is very sensitive to memory allocations. Normally when
1437    /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack.
1438    /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on
1439    /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that
1440    /// was previously owned by the [`String`], so no trips to the allocator are needed.
1441    ///
1442    /// # Examples
1443    ///
1444    /// ### Short Strings
1445    /// ```
1446    /// use compact_str::CompactString;
1447    ///
1448    /// let short = "hello world".to_string();
1449    /// let c_heap = CompactString::from_string_buffer(short);
1450    ///
1451    /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer
1452    /// assert!(c_heap.is_heap_allocated());
1453    ///
1454    /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point
1455    /// let c_inline = c_heap.clone();
1456    /// assert!(!c_inline.is_heap_allocated());
1457    ///
1458    /// assert_eq!(c_heap, c_inline);
1459    /// ```
1460    ///
1461    /// ### Longer Strings
1462    /// ```
1463    /// use compact_str::CompactString;
1464    ///
1465    /// let x = "longer string that will be on the heap".to_string();
1466    /// let c1 = CompactString::from(x);
1467    ///
1468    /// let y = "longer string that will be on the heap".to_string();
1469    /// let c2 = CompactString::from_string_buffer(y);
1470    ///
1471    /// // for longer strings, we re-use the underlying String's buffer in both cases
1472    /// assert!(c1.is_heap_allocated());
1473    /// assert!(c2.is_heap_allocated());
1474    /// ```
1475    ///
1476    /// ### Buffer Re-use
1477    /// ```
1478    /// use compact_str::CompactString;
1479    ///
1480    /// let og = "hello world".to_string();
1481    /// let og_addr = og.as_ptr();
1482    ///
1483    /// let mut c = CompactString::from_string_buffer(og);
1484    /// let ex_addr = c.as_ptr();
1485    ///
1486    /// // When converting to/from String and CompactString with from_string_buffer we always re-use
1487    /// // the same underlying allocated memory/buffer
1488    /// assert_eq!(og_addr, ex_addr);
1489    ///
1490    /// let long = "this is a long string that will be on the heap".to_string();
1491    /// let long_addr = long.as_ptr();
1492    ///
1493    /// let mut long_c = CompactString::from(long);
1494    /// let long_ex_addr = long_c.as_ptr();
1495    ///
1496    /// // When converting to/from String and CompactString with From<String>, we'll also re-use the
1497    /// // underlying buffer, if the string is long, otherwise when converting to CompactString we
1498    /// // eagerly inline
1499    /// assert_eq!(long_addr, long_ex_addr);
1500    /// ```
1501    #[inline]
1502    pub fn from_string_buffer(s: String) -> Self {
1503        let repr = Repr::from_string(s, false);
1504        CompactString(repr)
1505    }
1506}
1507
1508impl Default for CompactString {
1509    #[inline]
1510    fn default() -> Self {
1511        CompactString::new("")
1512    }
1513}
1514
1515impl Deref for CompactString {
1516    type Target = str;
1517
1518    #[inline]
1519    fn deref(&self) -> &str {
1520        self.as_str()
1521    }
1522}
1523
1524impl DerefMut for CompactString {
1525    #[inline]
1526    fn deref_mut(&mut self) -> &mut str {
1527        self.as_mut_str()
1528    }
1529}
1530
1531impl AsRef<str> for CompactString {
1532    #[inline]
1533    fn as_ref(&self) -> &str {
1534        self.as_str()
1535    }
1536}
1537
1538impl AsRef<OsStr> for CompactString {
1539    #[inline]
1540    fn as_ref(&self) -> &OsStr {
1541        OsStr::new(self.as_str())
1542    }
1543}
1544
1545impl AsRef<[u8]> for CompactString {
1546    #[inline]
1547    fn as_ref(&self) -> &[u8] {
1548        self.as_bytes()
1549    }
1550}
1551
1552impl Borrow<str> for CompactString {
1553    #[inline]
1554    fn borrow(&self) -> &str {
1555        self.as_str()
1556    }
1557}
1558
1559impl BorrowMut<str> for CompactString {
1560    #[inline]
1561    fn borrow_mut(&mut self) -> &mut str {
1562        self.as_mut_str()
1563    }
1564}
1565
1566impl Eq for CompactString {}
1567
1568impl<T: AsRef<str>> PartialEq<T> for CompactString {
1569    fn eq(&self, other: &T) -> bool {
1570        self.as_str() == other.as_ref()
1571    }
1572}
1573
1574impl PartialEq<CompactString> for String {
1575    fn eq(&self, other: &CompactString) -> bool {
1576        self.as_str() == other.as_str()
1577    }
1578}
1579
1580impl PartialEq<CompactString> for &str {
1581    fn eq(&self, other: &CompactString) -> bool {
1582        *self == other.as_str()
1583    }
1584}
1585
1586impl<'a> PartialEq<CompactString> for Cow<'a, str> {
1587    fn eq(&self, other: &CompactString) -> bool {
1588        *self == other.as_str()
1589    }
1590}
1591
1592impl Ord for CompactString {
1593    fn cmp(&self, other: &Self) -> Ordering {
1594        self.as_str().cmp(other.as_str())
1595    }
1596}
1597
1598impl PartialOrd for CompactString {
1599    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1600        Some(self.cmp(other))
1601    }
1602}
1603
1604impl Hash for CompactString {
1605    fn hash<H: Hasher>(&self, state: &mut H) {
1606        self.as_str().hash(state)
1607    }
1608}
1609
1610impl<'a> From<&'a str> for CompactString {
1611    fn from(s: &'a str) -> Self {
1612        let repr = Repr::new(s);
1613        CompactString(repr)
1614    }
1615}
1616
1617impl From<String> for CompactString {
1618    fn from(s: String) -> Self {
1619        let repr = Repr::from_string(s, true);
1620        CompactString(repr)
1621    }
1622}
1623
1624impl<'a> From<&'a String> for CompactString {
1625    fn from(s: &'a String) -> Self {
1626        CompactString::new(s)
1627    }
1628}
1629
1630impl<'a> From<Cow<'a, str>> for CompactString {
1631    fn from(cow: Cow<'a, str>) -> Self {
1632        match cow {
1633            Cow::Borrowed(s) => s.into(),
1634            // we separate these two so we can re-use the underlying buffer in the owned case
1635            Cow::Owned(s) => s.into(),
1636        }
1637    }
1638}
1639
1640impl From<Box<str>> for CompactString {
1641    fn from(b: Box<str>) -> Self {
1642        let s = b.into_string();
1643        let repr = Repr::from_string(s, true);
1644        CompactString(repr)
1645    }
1646}
1647
1648impl From<CompactString> for String {
1649    #[inline]
1650    fn from(s: CompactString) -> Self {
1651        s.into_string()
1652    }
1653}
1654
1655impl From<CompactString> for Cow<'_, str> {
1656    #[inline]
1657    fn from(s: CompactString) -> Self {
1658        Self::Owned(s.into_string())
1659    }
1660}
1661
1662impl<'a> From<&'a CompactString> for Cow<'a, str> {
1663    #[inline]
1664    fn from(s: &'a CompactString) -> Self {
1665        Self::Borrowed(s)
1666    }
1667}
1668
1669impl FromStr for CompactString {
1670    type Err = core::convert::Infallible;
1671    fn from_str(s: &str) -> Result<CompactString, Self::Err> {
1672        Ok(CompactString::from(s))
1673    }
1674}
1675
1676impl fmt::Debug for CompactString {
1677    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1678        fmt::Debug::fmt(self.as_str(), f)
1679    }
1680}
1681
1682impl fmt::Display for CompactString {
1683    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1684        fmt::Display::fmt(self.as_str(), f)
1685    }
1686}
1687
1688impl FromIterator<char> for CompactString {
1689    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
1690        let repr = iter.into_iter().collect();
1691        CompactString(repr)
1692    }
1693}
1694
1695impl<'a> FromIterator<&'a char> for CompactString {
1696    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
1697        let repr = iter.into_iter().collect();
1698        CompactString(repr)
1699    }
1700}
1701
1702impl<'a> FromIterator<&'a str> for CompactString {
1703    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
1704        let repr = iter.into_iter().collect();
1705        CompactString(repr)
1706    }
1707}
1708
1709impl FromIterator<Box<str>> for CompactString {
1710    fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
1711        let repr = iter.into_iter().collect();
1712        CompactString(repr)
1713    }
1714}
1715
1716impl<'a> FromIterator<Cow<'a, str>> for CompactString {
1717    fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
1718        let repr = iter.into_iter().collect();
1719        CompactString(repr)
1720    }
1721}
1722
1723impl FromIterator<String> for CompactString {
1724    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
1725        let repr = iter.into_iter().collect();
1726        CompactString(repr)
1727    }
1728}
1729
1730impl FromIterator<CompactString> for CompactString {
1731    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1732        let repr = iter.into_iter().collect();
1733        CompactString(repr)
1734    }
1735}
1736
1737impl FromIterator<CompactString> for String {
1738    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1739        let mut iterator = iter.into_iter();
1740        match iterator.next() {
1741            None => String::new(),
1742            Some(buf) => {
1743                let mut buf = buf.into_string();
1744                buf.extend(iterator);
1745                buf
1746            }
1747        }
1748    }
1749}
1750
1751impl FromIterator<CompactString> for Cow<'_, str> {
1752    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1753        String::from_iter(iter).into()
1754    }
1755}
1756
1757impl Extend<char> for CompactString {
1758    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
1759        self.0.extend(iter)
1760    }
1761}
1762
1763impl<'a> Extend<&'a char> for CompactString {
1764    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
1765        self.0.extend(iter)
1766    }
1767}
1768
1769impl<'a> Extend<&'a str> for CompactString {
1770    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
1771        self.0.extend(iter)
1772    }
1773}
1774
1775impl Extend<Box<str>> for CompactString {
1776    fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
1777        self.0.extend(iter)
1778    }
1779}
1780
1781impl<'a> Extend<Cow<'a, str>> for CompactString {
1782    fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
1783        iter.into_iter().for_each(move |s| self.push_str(&s));
1784    }
1785}
1786
1787impl Extend<String> for CompactString {
1788    fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
1789        self.0.extend(iter)
1790    }
1791}
1792
1793impl Extend<CompactString> for String {
1794    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1795        for s in iter {
1796            self.push_str(&s);
1797        }
1798    }
1799}
1800
1801impl Extend<CompactString> for CompactString {
1802    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1803        for s in iter {
1804            self.push_str(&s);
1805        }
1806    }
1807}
1808
1809impl<'a> Extend<CompactString> for Cow<'a, str> {
1810    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1811        self.to_mut().extend(iter);
1812    }
1813}
1814
1815impl fmt::Write for CompactString {
1816    fn write_str(&mut self, s: &str) -> fmt::Result {
1817        self.push_str(s);
1818        Ok(())
1819    }
1820
1821    fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
1822        match args.as_str() {
1823            Some(s) => {
1824                self.push_str(s);
1825                Ok(())
1826            }
1827            None => fmt::write(&mut self, args),
1828        }
1829    }
1830}
1831
1832impl Add<&str> for CompactString {
1833    type Output = Self;
1834    fn add(mut self, rhs: &str) -> Self::Output {
1835        self.push_str(rhs);
1836        self
1837    }
1838}
1839
1840impl AddAssign<&str> for CompactString {
1841    fn add_assign(&mut self, rhs: &str) {
1842        self.push_str(rhs);
1843    }
1844}
1845
1846/// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice.
1847///
1848/// This type is the error type for the [`from_utf16`] method on [`CompactString`].
1849///
1850/// [`from_utf16`]: CompactString::from_utf16
1851/// # Examples
1852///
1853/// Basic usage:
1854///
1855/// ```
1856/// # use compact_str::CompactString;
1857/// // 𝄞mu<invalid>ic
1858/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
1859///           0xD800, 0x0069, 0x0063];
1860///
1861/// assert!(CompactString::from_utf16(v).is_err());
1862/// ```
1863#[derive(Copy, Clone, Debug)]
1864pub struct Utf16Error(());
1865
1866impl fmt::Display for Utf16Error {
1867    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1868        fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1869    }
1870}
1871
1872/// An iterator over the exacted data by [`CompactString::drain()`].
1873#[must_use = "iterators are lazy and do nothing unless consumed"]
1874pub struct Drain<'a> {
1875    compact_string: *mut CompactString,
1876    start: usize,
1877    end: usize,
1878    chars: std::str::Chars<'a>,
1879}
1880
1881// SAFETY: Drain keeps the lifetime of the CompactString it belongs to.
1882unsafe impl Send for Drain<'_> {}
1883unsafe impl Sync for Drain<'_> {}
1884
1885impl fmt::Debug for Drain<'_> {
1886    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1887        f.debug_tuple("Drain").field(&self.as_str()).finish()
1888    }
1889}
1890
1891impl fmt::Display for Drain<'_> {
1892    #[inline]
1893    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1894        f.write_str(self.as_str())
1895    }
1896}
1897
1898impl Drop for Drain<'_> {
1899    #[inline]
1900    fn drop(&mut self) {
1901        // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access
1902        //         the CompactString, but this function right now. CompactString::drain() ensured
1903        //         that the new extracted range does not split a UTF-8 character.
1904        unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
1905    }
1906}
1907
1908impl Drain<'_> {
1909    /// The remaining, unconsumed characters of the extracted substring.
1910    #[inline]
1911    pub fn as_str(&self) -> &str {
1912        self.chars.as_str()
1913    }
1914}
1915
1916impl Deref for Drain<'_> {
1917    type Target = str;
1918
1919    #[inline]
1920    fn deref(&self) -> &Self::Target {
1921        self.as_str()
1922    }
1923}
1924
1925impl Iterator for Drain<'_> {
1926    type Item = char;
1927
1928    #[inline]
1929    fn next(&mut self) -> Option<char> {
1930        self.chars.next()
1931    }
1932
1933    #[inline]
1934    fn count(self) -> usize {
1935        // <Chars as Iterator>::count() is specialized, and cloning is trivial.
1936        self.chars.clone().count()
1937    }
1938
1939    fn size_hint(&self) -> (usize, Option<usize>) {
1940        self.chars.size_hint()
1941    }
1942
1943    #[inline]
1944    fn last(mut self) -> Option<char> {
1945        self.chars.next_back()
1946    }
1947}
1948
1949impl DoubleEndedIterator for Drain<'_> {
1950    #[inline]
1951    fn next_back(&mut self) -> Option<char> {
1952        self.chars.next_back()
1953    }
1954}
1955
1956impl FusedIterator for Drain<'_> {}
1957
1958static_assertions::assert_eq_size!(CompactString, String);