compact_str/lib.rs
1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[doc(hidden)]
5pub use core;
6use core::borrow::{
7 Borrow,
8 BorrowMut,
9};
10use core::cmp::Ordering;
11use core::hash::{
12 Hash,
13 Hasher,
14};
15use core::iter::FromIterator;
16use core::ops::{
17 Add,
18 AddAssign,
19 Bound,
20 Deref,
21 DerefMut,
22 RangeBounds,
23};
24use core::str::{
25 FromStr,
26 Utf8Error,
27};
28use core::{
29 fmt,
30 slice,
31};
32use std::borrow::Cow;
33use std::ffi::OsStr;
34use std::iter::FusedIterator;
35
36mod features;
37mod macros;
38
39mod repr;
40use repr::Repr;
41
42mod traits;
43pub use traits::{
44 CompactStringExt,
45 ToCompactString,
46};
47
48#[cfg(test)]
49mod tests;
50
51/// A [`CompactString`] is a compact string type that can be used almost anywhere a
52/// [`String`] or [`str`] can be used.
53///
54/// ## Using `CompactString`
55/// ```
56/// use compact_str::CompactString;
57/// # use std::collections::HashMap;
58///
59/// // CompactString auto derefs into a str so you can use all methods from `str`
60/// // that take a `&self`
61/// if CompactString::new("hello world!").is_ascii() {
62/// println!("we're all ASCII")
63/// }
64///
65/// // You can use a CompactString in collections like you would a String or &str
66/// let mut map: HashMap<CompactString, CompactString> = HashMap::new();
67///
68/// // directly construct a new `CompactString`
69/// map.insert(CompactString::new("nyc"), CompactString::new("empire state building"));
70/// // create a `CompactString` from a `&str`
71/// map.insert("sf".into(), "transamerica pyramid".into());
72/// // create a `CompactString` from a `String`
73/// map.insert(String::from("sea").into(), String::from("space needle").into());
74///
75/// fn wrapped_print<T: AsRef<str>>(text: T) {
76/// println!("{}", text.as_ref());
77/// }
78///
79/// // CompactString impls AsRef<str> and Borrow<str>, so it can be used anywhere
80/// // that expects a generic string
81/// if let Some(building) = map.get("nyc") {
82/// wrapped_print(building);
83/// }
84///
85/// // CompactString can also be directly compared to a String or &str
86/// assert_eq!(CompactString::new("chicago"), "chicago");
87/// assert_eq!(CompactString::new("houston"), String::from("houston"));
88/// ```
89///
90/// # Converting from a `String`
91/// It's important that a `CompactString` interops well with `String`, so you can easily use both in
92/// your code base.
93///
94/// `CompactString` implements `From<String>` and operates in the following manner:
95/// - Eagerly inlines the string, possibly dropping excess capacity
96/// - Otherwise re-uses the same underlying buffer from `String`
97///
98/// ```
99/// use compact_str::CompactString;
100///
101/// // eagerly inlining
102/// let short = String::from("hello world");
103/// let short_c = CompactString::from(short);
104/// assert!(!short_c.is_heap_allocated());
105///
106/// // dropping excess capacity
107/// let mut excess = String::with_capacity(256);
108/// excess.push_str("abc");
109///
110/// let excess_c = CompactString::from(excess);
111/// assert!(!excess_c.is_heap_allocated());
112/// assert!(excess_c.capacity() < 256);
113///
114/// // re-using the same buffer
115/// let long = String::from("this is a longer string that will be heap allocated");
116///
117/// let long_ptr = long.as_ptr();
118/// let long_len = long.len();
119/// let long_cap = long.capacity();
120///
121/// let mut long_c = CompactString::from(long);
122/// assert!(long_c.is_heap_allocated());
123///
124/// let cpt_ptr = long_c.as_ptr();
125/// let cpt_len = long_c.len();
126/// let cpt_cap = long_c.capacity();
127///
128/// // the original String and the CompactString point to the same place in memory, buffer re-use!
129/// assert_eq!(cpt_ptr, long_ptr);
130/// assert_eq!(cpt_len, long_len);
131/// assert_eq!(cpt_cap, long_cap);
132/// ```
133///
134/// ### Prevent Eagerly Inlining
135/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which
136/// might not always be desirable if you're converting a very large amount of `String`s. If your
137/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API.
138#[derive(Clone)]
139#[repr(transparent)]
140pub struct CompactString(Repr);
141
142impl CompactString {
143 /// Creates a new [`CompactString`] from any type that implements `AsRef<str>`.
144 /// If the string is short enough, then it will be inlined on the stack!
145 ///
146 /// # Examples
147 ///
148 /// ### Inlined
149 /// ```
150 /// # use compact_str::CompactString;
151 /// // We can inline strings up to 12 characters long on 32-bit architectures...
152 /// #[cfg(target_pointer_width = "32")]
153 /// let s = "i'm 12 chars";
154 /// // ...and up to 24 characters on 64-bit architectures!
155 /// #[cfg(target_pointer_width = "64")]
156 /// let s = "i am 24 characters long!";
157 ///
158 /// let compact = CompactString::new(&s);
159 ///
160 /// assert_eq!(compact, s);
161 /// // we are not allocated on the heap!
162 /// assert!(!compact.is_heap_allocated());
163 /// ```
164 ///
165 /// ### Heap
166 /// ```
167 /// # use compact_str::CompactString;
168 /// // For longer strings though, we get allocated on the heap
169 /// let long = "I am a longer string that will be allocated on the heap";
170 /// let compact = CompactString::new(long);
171 ///
172 /// assert_eq!(compact, long);
173 /// // we are allocated on the heap!
174 /// assert!(compact.is_heap_allocated());
175 /// ```
176 ///
177 /// ### Creation
178 /// ```
179 /// use compact_str::CompactString;
180 ///
181 /// // Using a `&'static str`
182 /// let s = "hello world!";
183 /// let hello = CompactString::new(&s);
184 ///
185 /// // Using a `String`
186 /// let u = String::from("🦄🌈");
187 /// let unicorn = CompactString::new(u);
188 ///
189 /// // Using a `Box<str>`
190 /// let b: Box<str> = String::from("📦📦📦").into_boxed_str();
191 /// let boxed = CompactString::new(&b);
192 /// ```
193 #[inline]
194 pub fn new<T: AsRef<str>>(text: T) -> Self {
195 CompactString(Repr::new(text.as_ref()))
196 }
197
198 /// Creates a new inline [`CompactString`] at compile time.
199 ///
200 /// # Examples
201 /// ```
202 /// use compact_str::CompactString;
203 ///
204 /// const DEFAULT_NAME: CompactString = CompactString::new_inline("untitled");
205 /// ```
206 ///
207 /// Note: Trying to create a long string that can't be inlined, will fail to build.
208 /// ```compile_fail
209 /// # use compact_str::CompactString;
210 /// const LONG: CompactString = CompactString::new_inline("this is a long string that can't be stored on the stack");
211 /// ```
212 #[inline]
213 pub const fn new_inline(text: &str) -> Self {
214 CompactString(Repr::new_inline(text))
215 }
216
217 /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes.
218 ///
219 /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically,
220 /// if the string has a length less than or equal to `std::mem::size_of::<String>` bytes
221 /// then it will be inlined. This also means that `CompactString`s have a minimum capacity
222 /// of `std::mem::size_of::<String>`.
223 ///
224 /// # Examples
225 ///
226 /// ### "zero" Capacity
227 /// ```
228 /// # use compact_str::CompactString;
229 /// // Creating a CompactString with a capacity of 0 will create
230 /// // one with capacity of std::mem::size_of::<String>();
231 /// let empty = CompactString::with_capacity(0);
232 /// let min_size = std::mem::size_of::<String>();
233 ///
234 /// assert_eq!(empty.capacity(), min_size);
235 /// assert_ne!(0, min_size);
236 /// assert!(!empty.is_heap_allocated());
237 /// ```
238 ///
239 /// ### Max Inline Size
240 /// ```
241 /// # use compact_str::CompactString;
242 /// // Creating a CompactString with a capacity of std::mem::size_of::<String>()
243 /// // will not heap allocate.
244 /// let str_size = std::mem::size_of::<String>();
245 /// let empty = CompactString::with_capacity(str_size);
246 ///
247 /// assert_eq!(empty.capacity(), str_size);
248 /// assert!(!empty.is_heap_allocated());
249 /// ```
250 ///
251 /// ### Heap Allocating
252 /// ```
253 /// # use compact_str::CompactString;
254 /// // If you create a `CompactString` with a capacity greater than
255 /// // `std::mem::size_of::<String>`, it will heap allocated. For heap
256 /// // allocated strings we have a minimum capacity
257 ///
258 /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::<usize>() * 4;
259 ///
260 /// let heap_size = std::mem::size_of::<String>() + 1;
261 /// let empty = CompactString::with_capacity(heap_size);
262 ///
263 /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY);
264 /// assert!(empty.is_heap_allocated());
265 /// ```
266 #[inline]
267 pub fn with_capacity(capacity: usize) -> Self {
268 CompactString(Repr::with_capacity(capacity))
269 }
270
271 /// Convert a slice of bytes into a [`CompactString`].
272 ///
273 /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
274 /// This method converts from an arbitrary contiguous collection of bytes into a
275 /// [`CompactString`], failing if the provided bytes are not `UTF-8`.
276 ///
277 /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes,
278 /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf`
279 ///
280 /// # Examples
281 /// ### Valid UTF-8
282 /// ```
283 /// # use compact_str::CompactString;
284 /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175];
285 /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8");
286 ///
287 /// assert_eq!(compact, "🦀💯");
288 /// ```
289 ///
290 /// ### Invalid UTF-8
291 /// ```
292 /// # use compact_str::CompactString;
293 /// let bytes = vec![255, 255, 255];
294 /// let result = CompactString::from_utf8(bytes);
295 ///
296 /// assert!(result.is_err());
297 /// ```
298 #[inline]
299 pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
300 Repr::from_utf8(buf).map(CompactString)
301 }
302
303 /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains
304 /// valid UTF-8.
305 ///
306 /// See the safe version, [`CompactString::from_utf8`], for more details.
307 ///
308 /// # Safety
309 ///
310 /// This function is unsafe because it does not check that the bytes passed to it are valid
311 /// UTF-8. If this constraint is violated, it may cause memory unsafety issues with future users
312 /// of the [`CompactString`], as the rest of the standard library assumes that
313 /// [`CompactString`]s are valid UTF-8.
314 ///
315 /// # Examples
316 ///
317 /// Basic usage:
318 ///
319 /// ```
320 /// # use compact_str::CompactString;
321 /// // some bytes, in a vector
322 /// let sparkle_heart = vec![240, 159, 146, 150];
323 ///
324 /// let sparkle_heart = unsafe {
325 /// CompactString::from_utf8_unchecked(sparkle_heart)
326 /// };
327 ///
328 /// assert_eq!("💖", sparkle_heart);
329 /// ```
330 #[inline]
331 #[must_use]
332 pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
333 CompactString(Repr::from_utf8_unchecked(buf))
334 }
335
336 /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a
337 /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data.
338 ///
339 /// # Examples
340 /// ### Valid UTF-16
341 /// ```
342 /// # use compact_str::CompactString;
343 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
344 /// let compact = CompactString::from_utf16(buf).unwrap();
345 ///
346 /// assert_eq!(compact, "𝄞music");
347 /// ```
348 ///
349 /// ### Invalid UTF-16
350 /// ```
351 /// # use compact_str::CompactString;
352 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
353 /// let res = CompactString::from_utf16(buf);
354 ///
355 /// assert!(res.is_err());
356 /// ```
357 #[inline]
358 pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
359 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
360 // even though the size of our iterator, `buf`, is known ahead of time.
361 //
362 // rustlang issue #48994 is tracking the fix
363
364 let buf = buf.as_ref();
365 let mut ret = CompactString::with_capacity(buf.len());
366 for c in core::char::decode_utf16(buf.iter().copied()) {
367 if let Ok(c) = c {
368 ret.push(c);
369 } else {
370 return Err(Utf16Error(()));
371 }
372 }
373 Ok(ret)
374 }
375
376 /// Decode a UTF-16–encoded slice `v` into a `CompactString`, replacing invalid data with
377 /// the replacement character (`U+FFFD`), �.
378 ///
379 /// # Examples
380 ///
381 /// Basic usage:
382 ///
383 /// ```
384 /// # use compact_str::CompactString;
385 /// // 𝄞mus<invalid>ic<invalid>
386 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
387 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
388 /// 0xD834];
389 ///
390 /// assert_eq!(CompactString::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
391 /// CompactString::from_utf16_lossy(v));
392 /// ```
393 #[inline]
394 pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
395 let buf = buf.as_ref();
396 let mut ret = CompactString::with_capacity(buf.len());
397 for c in std::char::decode_utf16(buf.iter().copied()) {
398 match c {
399 Ok(c) => ret.push(c),
400 Err(_) => ret.push_str("�"),
401 }
402 }
403 ret
404 }
405
406 /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
407 ///
408 /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
409 /// bytes long, therefore the return value of this method might not be what a human considers
410 /// the length of the string.
411 ///
412 /// # Examples
413 /// ```
414 /// # use compact_str::CompactString;
415 /// let ascii = CompactString::new("hello world");
416 /// assert_eq!(ascii.len(), 11);
417 ///
418 /// let emoji = CompactString::new("👱");
419 /// assert_eq!(emoji.len(), 4);
420 /// ```
421 #[inline]
422 pub fn len(&self) -> usize {
423 self.0.len()
424 }
425
426 /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise
427 ///
428 /// # Examples
429 /// ```
430 /// # use compact_str::CompactString;
431 /// let mut msg = CompactString::new("");
432 /// assert!(msg.is_empty());
433 ///
434 /// // add some characters
435 /// msg.push_str("hello reader!");
436 /// assert!(!msg.is_empty());
437 /// ```
438 #[inline]
439 pub fn is_empty(&self) -> bool {
440 self.len() == 0
441 }
442
443 /// Returns the capacity of the [`CompactString`], in bytes.
444 ///
445 /// # Note
446 /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::<String>()`
447 ///
448 /// # Examples
449 /// ### Minimum Size
450 /// ```
451 /// # use compact_str::CompactString;
452 /// let min_size = std::mem::size_of::<String>();
453 /// let compact = CompactString::new("");
454 ///
455 /// assert!(compact.capacity() >= min_size);
456 /// ```
457 ///
458 /// ### Heap Allocated
459 /// ```
460 /// # use compact_str::CompactString;
461 /// let compact = CompactString::with_capacity(128);
462 /// assert_eq!(compact.capacity(), 128);
463 /// ```
464 #[inline]
465 pub fn capacity(&self) -> usize {
466 self.0.capacity()
467 }
468
469 /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than
470 /// its length. The capacity may be increased by more than `additional` bytes if it chooses,
471 /// to prevent frequent reallocations.
472 ///
473 /// # Note
474 /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::<String>()`
475 /// * Reserving additional bytes may cause the `CompactString` to become heap allocated
476 ///
477 /// # Panics
478 /// Panics if the new capacity overflows `usize`
479 ///
480 /// # Examples
481 /// ```
482 /// # use compact_str::CompactString;
483 ///
484 /// const WORD: usize = std::mem::size_of::<usize>();
485 /// let mut compact = CompactString::default();
486 /// assert!(compact.capacity() >= (WORD * 3) - 1);
487 ///
488 /// compact.reserve(200);
489 /// assert!(compact.is_heap_allocated());
490 /// assert!(compact.capacity() >= 200);
491 /// ```
492 #[inline]
493 pub fn reserve(&mut self, additional: usize) {
494 self.0.reserve(additional)
495 }
496
497 /// Returns a string slice containing the entire [`CompactString`].
498 ///
499 /// # Examples
500 /// ```
501 /// # use compact_str::CompactString;
502 /// let s = CompactString::new("hello");
503 ///
504 /// assert_eq!(s.as_str(), "hello");
505 /// ```
506 #[inline]
507 pub fn as_str(&self) -> &str {
508 self.0.as_str()
509 }
510
511 /// Returns a mutable string slice containing the entire [`CompactString`].
512 ///
513 /// # Examples
514 /// ```
515 /// # use compact_str::CompactString;
516 /// let mut s = CompactString::new("hello");
517 /// s.as_mut_str().make_ascii_uppercase();
518 ///
519 /// assert_eq!(s.as_str(), "HELLO");
520 /// ```
521 #[inline]
522 pub fn as_mut_str(&mut self) -> &mut str {
523 let len = self.len();
524 unsafe { std::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
525 }
526
527 /// Returns a byte slice of the [`CompactString`]'s contents.
528 ///
529 /// # Examples
530 /// ```
531 /// # use compact_str::CompactString;
532 /// let s = CompactString::new("hello");
533 ///
534 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
535 /// ```
536 #[inline]
537 pub fn as_bytes(&self) -> &[u8] {
538 &self.0.as_slice()[..self.len()]
539 }
540
541 // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
542 //
543 /// Provides a mutable reference to the underlying buffer of bytes.
544 ///
545 /// # Safety
546 /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must
547 /// guarantee
548 /// that any modifications made to the underlying buffer are valid UTF-8.
549 ///
550 /// # Examples
551 /// ```
552 /// # use compact_str::CompactString;
553 /// let mut s = CompactString::new("hello");
554 ///
555 /// let slice = unsafe { s.as_mut_bytes() };
556 /// // copy bytes into our string
557 /// slice[5..11].copy_from_slice(" world".as_bytes());
558 /// // set the len of the string
559 /// unsafe { s.set_len(11) };
560 ///
561 /// assert_eq!(s, "hello world");
562 /// ```
563 #[inline]
564 pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
565 self.0.as_mut_buf()
566 }
567
568 /// Appends the given [`char`] to the end of this [`CompactString`].
569 ///
570 /// # Examples
571 /// ```
572 /// # use compact_str::CompactString;
573 /// let mut s = CompactString::new("foo");
574 ///
575 /// s.push('b');
576 /// s.push('a');
577 /// s.push('r');
578 ///
579 /// assert_eq!("foobar", s);
580 /// ```
581 pub fn push(&mut self, ch: char) {
582 self.push_str(ch.encode_utf8(&mut [0; 4]));
583 }
584
585 /// Removes the last character from the [`CompactString`] and returns it.
586 /// Returns `None` if this [`CompactString`] is empty.
587 ///
588 /// # Examples
589 /// ```
590 /// # use compact_str::CompactString;
591 /// let mut s = CompactString::new("abc");
592 ///
593 /// assert_eq!(s.pop(), Some('c'));
594 /// assert_eq!(s.pop(), Some('b'));
595 /// assert_eq!(s.pop(), Some('a'));
596 ///
597 /// assert_eq!(s.pop(), None);
598 /// ```
599 #[inline]
600 pub fn pop(&mut self) -> Option<char> {
601 self.0.pop()
602 }
603
604 /// Appends a given string slice onto the end of this [`CompactString`]
605 ///
606 /// # Examples
607 /// ```
608 /// # use compact_str::CompactString;
609 /// let mut s = CompactString::new("abc");
610 ///
611 /// s.push_str("123");
612 ///
613 /// assert_eq!("abc123", s);
614 /// ```
615 #[inline]
616 pub fn push_str(&mut self, s: &str) {
617 self.0.push_str(s)
618 }
619
620 /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it.
621 ///
622 /// This is an *O*(*n*) operation, as it requires copying every element in the
623 /// buffer.
624 ///
625 /// # Panics
626 ///
627 /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length,
628 /// or if it does not lie on a [`char`] boundary.
629 ///
630 /// # Examples
631 ///
632 /// ### Basic usage:
633 ///
634 /// ```
635 /// # use compact_str::CompactString;
636 /// let mut c = CompactString::from("hello world");
637 ///
638 /// assert_eq!(c.remove(0), 'h');
639 /// assert_eq!(c, "ello world");
640 ///
641 /// assert_eq!(c.remove(5), 'w');
642 /// assert_eq!(c, "ello orld");
643 /// ```
644 ///
645 /// ### Past total length:
646 ///
647 /// ```should_panic
648 /// # use compact_str::CompactString;
649 /// let mut c = CompactString::from("hello there!");
650 /// c.remove(100);
651 /// ```
652 ///
653 /// ### Not on char boundary:
654 ///
655 /// ```should_panic
656 /// # use compact_str::CompactString;
657 /// let mut c = CompactString::from("🦄");
658 /// c.remove(1);
659 /// ```
660 #[inline]
661 pub fn remove(&mut self, idx: usize) -> char {
662 let len = self.len();
663 let substr = &mut self.as_mut_str()[idx..];
664
665 // get the char we want to remove
666 let ch = substr
667 .chars()
668 .next()
669 .expect("cannot remove a char from the end of a string");
670 let ch_len = ch.len_utf8();
671
672 // shift everything back one character
673 let num_bytes = substr.len() - ch_len;
674 let ptr = substr.as_mut_ptr();
675
676 // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes,
677 // and are properly aligned
678 unsafe {
679 core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
680 self.set_len(len - ch_len);
681 }
682
683 ch
684 }
685
686 /// Forces the length of the [`CompactString`] to `new_len`.
687 ///
688 /// This is a low-level operation that maintains none of the normal invariants for
689 /// `CompactString`. If you want to modify the `CompactString` you should use methods like
690 /// `push`, `push_str` or `pop`.
691 ///
692 /// # Safety
693 /// * `new_len` must be less than or equal to `capacity()`
694 /// * The elements at `old_len..new_len` must be initialized
695 #[inline]
696 pub unsafe fn set_len(&mut self, new_len: usize) {
697 self.0.set_len(new_len)
698 }
699
700 /// Returns whether or not the [`CompactString`] is heap allocated.
701 ///
702 /// # Examples
703 /// ### Inlined
704 /// ```
705 /// # use compact_str::CompactString;
706 /// let hello = CompactString::new("hello world");
707 ///
708 /// assert!(!hello.is_heap_allocated());
709 /// ```
710 ///
711 /// ### Heap Allocated
712 /// ```
713 /// # use compact_str::CompactString;
714 /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 💥");
715 ///
716 /// assert!(msg.is_heap_allocated());
717 /// ```
718 #[inline]
719 pub fn is_heap_allocated(&self) -> bool {
720 self.0.is_heap_allocated()
721 }
722
723 /// Ensure that the given range is inside the set data, and that no codepoints are split.
724 ///
725 /// Returns the range `start..end` as a tuple.
726 #[inline]
727 fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
728 #[cold]
729 #[inline(never)]
730 fn illegal_range() -> ! {
731 panic!("illegal range");
732 }
733
734 let start = match range.start_bound() {
735 Bound::Included(&n) => n,
736 Bound::Excluded(&n) => match n.checked_add(1) {
737 Some(n) => n,
738 None => illegal_range(),
739 },
740 Bound::Unbounded => 0,
741 };
742 let end = match range.end_bound() {
743 Bound::Included(&n) => match n.checked_add(1) {
744 Some(n) => n,
745 None => illegal_range(),
746 },
747 Bound::Excluded(&n) => n,
748 Bound::Unbounded => self.len(),
749 };
750 if end < start {
751 illegal_range();
752 }
753
754 let s = self.as_str();
755 if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
756 illegal_range();
757 }
758
759 (start, end)
760 }
761
762 /// Removes the specified range in the [`CompactString`],
763 /// and replaces it with the given string.
764 /// The given string doesn't need to be the same length as the range.
765 ///
766 /// # Panics
767 ///
768 /// Panics if the starting point or end point do not lie on a [`char`]
769 /// boundary, or if they're out of bounds.
770 ///
771 /// # Examples
772 ///
773 /// Basic usage:
774 ///
775 /// ```
776 /// # use compact_str::CompactString;
777 /// let mut s = CompactString::new("Hello, world!");
778 ///
779 /// s.replace_range(7..12, "WORLD");
780 /// assert_eq!(s, "Hello, WORLD!");
781 ///
782 /// s.replace_range(7..=11, "you");
783 /// assert_eq!(s, "Hello, you!");
784 ///
785 /// s.replace_range(5.., "! Is it me you're looking for?");
786 /// assert_eq!(s, "Hello! Is it me you're looking for?");
787 /// ```
788 #[inline]
789 pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
790 let (start, end) = self.ensure_range(range);
791 let dest_len = end - start;
792 match dest_len.cmp(&replace_with.len()) {
793 Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
794 Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
795 Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
796 }
797 }
798
799 /// Replace into the same size.
800 unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
801 core::ptr::copy_nonoverlapping(
802 replace_with.as_ptr(),
803 self.as_mut_ptr().add(start),
804 end - start,
805 );
806 }
807
808 /// Replace, so self.len() gets smaller.
809 unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
810 let total_len = self.len();
811 let dest_len = end - start;
812 let new_len = total_len - (dest_len - replace_with.len());
813 let amount = total_len - end;
814 let data = self.as_mut_ptr();
815 // first insert the replacement string, overwriting the current content
816 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
817 // then move the tail of the CompactString forward to its new place, filling the gap
818 core::ptr::copy(
819 data.add(total_len - amount),
820 data.add(new_len - amount),
821 amount,
822 );
823 // and lastly we set the new length
824 self.set_len(new_len);
825 }
826
827 /// Replace, so self.len() gets bigger.
828 unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
829 let dest_len = end - start;
830 self.reserve(replace_with.len() - dest_len);
831 let total_len = self.len();
832 let new_len = total_len + (replace_with.len() - dest_len);
833 let amount = total_len - end;
834 // first grow the string, so MIRI knows that the full range is usable
835 self.set_len(new_len);
836 let data = self.as_mut_ptr();
837 // then move the tail of the CompactString back to its new place
838 core::ptr::copy(
839 data.add(total_len - amount),
840 data.add(new_len - amount),
841 amount,
842 );
843 // and lastly insert the replacement string
844 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
845 }
846
847 /// Truncate the [`CompactString`] to a shorter length.
848 ///
849 /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op.
850 ///
851 /// Calling this function does not change the capacity of the [`CompactString`].
852 ///
853 /// # Panics
854 ///
855 /// Panics if the new end of the string does not lie on a [`char`] boundary.
856 ///
857 /// # Examples
858 ///
859 /// Basic usage:
860 ///
861 /// ```
862 /// # use compact_str::CompactString;
863 /// let mut s = CompactString::new("Hello, world!");
864 /// s.truncate(5);
865 /// assert_eq!(s, "Hello");
866 /// ```
867 pub fn truncate(&mut self, new_len: usize) {
868 let s = self.as_str();
869 if new_len >= s.len() {
870 return;
871 }
872
873 assert!(
874 s.is_char_boundary(new_len),
875 "new_len must lie on char boundary",
876 );
877 unsafe { self.set_len(new_len) };
878 }
879
880 /// Converts a [`CompactString`] to a raw pointer.
881 #[inline]
882 pub fn as_ptr(&self) -> *const u8 {
883 self.0.as_slice().as_ptr()
884 }
885
886 /// Converts a mutable [`CompactString`] to a raw pointer.
887 #[inline]
888 pub fn as_mut_ptr(&mut self) -> *mut u8 {
889 unsafe { self.0.as_mut_buf().as_mut_ptr() }
890 }
891
892 /// Insert string character at an index.
893 ///
894 /// # Examples
895 ///
896 /// Basic usage:
897 ///
898 /// ```
899 /// # use compact_str::CompactString;
900 /// let mut s = CompactString::new("Hello!");
901 /// s.insert_str(5, ", world");
902 /// assert_eq!(s, "Hello, world!");
903 /// ```
904 pub fn insert_str(&mut self, idx: usize, string: &str) {
905 assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
906
907 let new_len = self.len() + string.len();
908 self.reserve(string.len());
909
910 // SAFETY: We just checked that we may split self at idx.
911 // We set the length only after reserving the memory.
912 // We fill the gap with valid UTF-8 data.
913 unsafe {
914 // first move the tail to the new back
915 let data = self.as_mut_ptr();
916 std::ptr::copy(
917 data.add(idx),
918 data.add(idx + string.len()),
919 new_len - idx - string.len(),
920 );
921
922 // then insert the new bytes
923 std::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
924
925 // and lastly resize the string
926 self.set_len(new_len);
927 }
928 }
929
930 /// Insert a character at an index.
931 ///
932 /// # Examples
933 ///
934 /// Basic usage:
935 ///
936 /// ```
937 /// # use compact_str::CompactString;
938 /// let mut s = CompactString::new("Hello world!");
939 /// s.insert(5, ',');
940 /// assert_eq!(s, "Hello, world!");
941 /// ```
942 pub fn insert(&mut self, idx: usize, ch: char) {
943 self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
944 }
945
946 /// Reduces the length of the [`CompactString`] to zero.
947 ///
948 /// Calling this function does not change the capacity of the [`CompactString`].
949 ///
950 /// ```
951 /// # use compact_str::CompactString;
952 /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!");
953 /// assert_eq!(s.capacity(), 49);
954 ///
955 /// s.clear();
956 ///
957 /// assert_eq!(s, "");
958 /// assert_eq!(s.capacity(), 49);
959 /// ```
960 pub fn clear(&mut self) {
961 unsafe { self.set_len(0) };
962 }
963
964 /// Split the [`CompactString`] into at the given byte index.
965 ///
966 /// Calling this function does not change the capacity of the [`CompactString`].
967 ///
968 /// # Panics
969 ///
970 /// Panics if `at` does not lie on a [`char`] boundary.
971 ///
972 /// Basic usage:
973 ///
974 /// ```
975 /// # use compact_str::CompactString;
976 /// let mut s = CompactString::new("Hello, world!");
977 /// assert_eq!(s.split_off(5), ", world!");
978 /// assert_eq!(s, "Hello");
979 /// ```
980 pub fn split_off(&mut self, at: usize) -> Self {
981 let result = self[at..].into();
982 // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
983 unsafe { self.set_len(at) };
984 result
985 }
986
987 /// Remove a range from the [`CompactString`], and return it as an iterator.
988 ///
989 /// Calling this function does not change the capacity of the [`CompactString`].
990 ///
991 /// # Panics
992 ///
993 /// Panics if the start or end of the range does not lie on a [`char`] boundary.
994 ///
995 /// # Examples
996 ///
997 /// Basic usage:
998 ///
999 /// ```
1000 /// # use compact_str::CompactString;
1001 /// let mut s = CompactString::new("Hello, world!");
1002 ///
1003 /// let mut d = s.drain(5..12);
1004 /// assert_eq!(d.next(), Some(',')); // iterate over the extracted data
1005 /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str
1006 ///
1007 /// // The iterator keeps a reference to `s`, so you have to drop() the iterator,
1008 /// // before you can access `s` again.
1009 /// drop(d);
1010 /// assert_eq!(s, "Hello!");
1011 /// ```
1012 pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
1013 let (start, end) = self.ensure_range(range);
1014 Drain {
1015 compact_string: self as *mut Self,
1016 start,
1017 end,
1018 chars: self[start..end].chars(),
1019 }
1020 }
1021
1022 /// Shrinks the capacity of this [`CompactString`] with a lower bound.
1023 ///
1024 /// The resulting capactity is never less than the size of 3×[`usize`],
1025 /// i.e. the capacity than can be inlined.
1026 ///
1027 /// # Examples
1028 ///
1029 /// Basic usage:
1030 ///
1031 /// ```
1032 /// # use compact_str::CompactString;
1033 /// let mut s = CompactString::with_capacity(100);
1034 /// assert_eq!(s.capacity(), 100);
1035 ///
1036 /// // if the capacity was already bigger than the argument, the call is a no-op
1037 /// s.shrink_to(100);
1038 /// assert_eq!(s.capacity(), 100);
1039 ///
1040 /// s.shrink_to(50);
1041 /// assert_eq!(s.capacity(), 50);
1042 ///
1043 /// // if the string can be inlined, it is
1044 /// s.shrink_to(10);
1045 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1046 /// ```
1047 #[inline]
1048 pub fn shrink_to(&mut self, min_capacity: usize) {
1049 self.0.shrink_to(min_capacity);
1050 }
1051
1052 /// Shrinks the capacity of this [`CompactString`] to match its length.
1053 ///
1054 /// The resulting capactity is never less than the size of 3×[`usize`],
1055 /// i.e. the capacity than can be inlined.
1056 ///
1057 /// This method is effectively the same as calling [`string.shrink_to(0)`].
1058 ///
1059 /// # Examples
1060 ///
1061 /// Basic usage:
1062 ///
1063 /// ```
1064 /// # use compact_str::CompactString;
1065 /// let mut s = CompactString::from("This is a string with more than 24 characters.");
1066 ///
1067 /// s.reserve(100);
1068 /// assert!(s.capacity() >= 100);
1069 ///
1070 /// s.shrink_to_fit();
1071 /// assert_eq!(s.len(), s.capacity());
1072 /// ```
1073 ///
1074 /// ```
1075 /// # use compact_str::CompactString;
1076 /// let mut s = CompactString::from("short string");
1077 ///
1078 /// s.reserve(100);
1079 /// assert!(s.capacity() >= 100);
1080 ///
1081 /// s.shrink_to_fit();
1082 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1083 /// ```
1084 #[inline]
1085 pub fn shrink_to_fit(&mut self) {
1086 self.0.shrink_to(0);
1087 }
1088
1089 /// Retains only the characters specified by the predicate.
1090 ///
1091 /// The method iterates over the characters in the string and calls the `predicate`.
1092 ///
1093 /// If the `predicate` returns `false`, then the character gets removed.
1094 /// If the `predicate` returns `true`, then the character is kept.
1095 ///
1096 /// # Examples
1097 ///
1098 /// ```
1099 /// # use compact_str::CompactString;
1100 /// let mut s = CompactString::from("äb𝄞d€");
1101 ///
1102 /// let keep = [false, true, true, false, true];
1103 /// let mut iter = keep.iter();
1104 /// s.retain(|_| *iter.next().unwrap());
1105 ///
1106 /// assert_eq!(s, "b𝄞€");
1107 /// ```
1108 pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
1109 // We iterate over the string, and copy character by character.
1110
1111 let s = self.as_mut_str();
1112 let mut dest_idx = 0;
1113 let mut src_idx = 0;
1114 while let Some(ch) = s[src_idx..].chars().next() {
1115 let ch_len = ch.len_utf8();
1116 if predicate(ch) {
1117 // SAFETY: We know that both indices are valid, and that we don't split a char.
1118 unsafe {
1119 let p = s.as_mut_ptr();
1120 core::ptr::copy(p.add(src_idx), p.add(dest_idx), ch_len);
1121 }
1122 dest_idx += ch_len;
1123 }
1124 src_idx += ch_len;
1125 }
1126
1127 // SAFETY: We know that the index is a valid position to break the string.
1128 unsafe { self.set_len(dest_idx) };
1129 }
1130
1131 /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints
1132 ///
1133 /// # Examples
1134 ///
1135 /// ```
1136 /// # use compact_str::CompactString;
1137 /// let chess_knight = b"\xf0\x9f\xa8\x84";
1138 ///
1139 /// assert_eq!(
1140 /// "🨄",
1141 /// CompactString::from_utf8_lossy(chess_knight),
1142 /// );
1143 ///
1144 /// // For valid UTF-8 slices, this is the same as:
1145 /// assert_eq!(
1146 /// "🨄",
1147 /// CompactString::new(std::str::from_utf8(chess_knight).unwrap()),
1148 /// );
1149 /// ```
1150 ///
1151 /// Incorrect bytes:
1152 ///
1153 /// ```
1154 /// # use compact_str::CompactString;
1155 /// let broken = b"\xf0\x9f\xc8\x84";
1156 ///
1157 /// assert_eq!(
1158 /// "�Ȅ",
1159 /// CompactString::from_utf8_lossy(broken),
1160 /// );
1161 ///
1162 /// // For invalid UTF-8 slices, this is an optimized implemented for:
1163 /// assert_eq!(
1164 /// "�Ȅ",
1165 /// CompactString::from(String::from_utf8_lossy(broken)),
1166 /// );
1167 /// ```
1168 pub fn from_utf8_lossy(v: &[u8]) -> Self {
1169 fn next_char<'a>(
1170 iter: &mut <&[u8] as IntoIterator>::IntoIter,
1171 buf: &'a mut [u8; 4],
1172 ) -> Option<&'a [u8]> {
1173 const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
1174
1175 macro_rules! ensure_range {
1176 ($idx:literal, $range:pat) => {{
1177 let mut i = iter.clone();
1178 match i.next() {
1179 Some(&c) if matches!(c, $range) => {
1180 buf[$idx] = c;
1181 *iter = i;
1182 }
1183 _ => return Some(REPLACEMENT),
1184 }
1185 }};
1186 }
1187
1188 macro_rules! ensure_cont {
1189 ($idx:literal) => {{
1190 ensure_range!($idx, 0x80..=0xBF);
1191 }};
1192 }
1193
1194 let c = *iter.next()?;
1195 buf[0] = c;
1196
1197 match c {
1198 0x00..=0x7F => {
1199 // simple ASCII: push as is
1200 Some(&buf[..1])
1201 }
1202 0xC2..=0xDF => {
1203 // two bytes
1204 ensure_cont!(1);
1205 Some(&buf[..2])
1206 }
1207 0xE0..=0xEF => {
1208 // three bytes
1209 match c {
1210 // 0x80..=0x9F encodes surrogate half
1211 0xE0 => ensure_range!(1, 0xA0..=0xBF),
1212 // 0xA0..=0xBF encodes surrogate half
1213 0xED => ensure_range!(1, 0x80..=0x9F),
1214 // all UTF-8 continuation bytes are valid
1215 _ => ensure_cont!(1),
1216 }
1217 ensure_cont!(2);
1218 Some(&buf[..3])
1219 }
1220 0xF0..=0xF4 => {
1221 // four bytes
1222 match c {
1223 // 0x80..=0x8F encodes overlong three byte codepoint
1224 0xF0 => ensure_range!(1, 0x90..=0xBF),
1225 // 0x90..=0xBF encodes codepoint > U+10FFFF
1226 0xF4 => ensure_range!(1, 0x80..=0x8F),
1227 // all UTF-8 continuation bytes are valid
1228 _ => ensure_cont!(1),
1229 }
1230 ensure_cont!(2);
1231 ensure_cont!(3);
1232 Some(&buf[..4])
1233 }
1234 | 0x80..=0xBF // unicode continuation, invalid
1235 | 0xC0..=0xC1 // overlong one byte character
1236 | 0xF5..=0xF7 // four bytes that encode > U+10FFFF
1237 | 0xF8..=0xFB // five bytes, invalid
1238 | 0xFC..=0xFD // six bytes, invalid
1239 | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid
1240 }
1241 }
1242
1243 let mut buf = [0; 4];
1244 let mut result = Self::with_capacity(v.len());
1245 let mut iter = v.iter();
1246 while let Some(s) = next_char(&mut iter, &mut buf) {
1247 // SAFETY: next_char() only returns valid strings
1248 let s = unsafe { std::str::from_utf8_unchecked(s) };
1249 result.push_str(s);
1250 }
1251 result
1252 }
1253
1254 fn from_utf16x(
1255 v: &[u8],
1256 from_int: impl Fn(u16) -> u16,
1257 from_bytes: impl Fn([u8; 2]) -> u16,
1258 ) -> Result<Self, Utf16Error> {
1259 if v.len() % 2 != 0 {
1260 // Input had an odd number of bytes.
1261 return Err(Utf16Error(()));
1262 }
1263
1264 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
1265 // even though the size of our iterator, `v`, is known ahead of time.
1266 //
1267 // rustlang issue #48994 is tracking the fix
1268 let mut result = CompactString::with_capacity(v.len() / 2);
1269
1270 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1271 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1272 match unsafe { v.align_to::<u16>() } {
1273 (&[], v, &[]) => {
1274 // Input is correcty aligned.
1275 for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
1276 result.push(c.map_err(|_| Utf16Error(()))?);
1277 }
1278 }
1279 _ => {
1280 // Input's alignment is off.
1281 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1282 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1283 for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1284 result.push(c.map_err(|_| Utf16Error(()))?);
1285 }
1286 }
1287 }
1288
1289 Ok(result)
1290 }
1291
1292 fn from_utf16x_lossy(
1293 v: &[u8],
1294 from_int: impl Fn(u16) -> u16,
1295 from_bytes: impl Fn([u8; 2]) -> u16,
1296 ) -> Self {
1297 // Notice: We write the string "�" instead of the character '�', so the character does not
1298 // have to be formatted before it can be appended.
1299
1300 let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
1301 true => (true, &v[..v.len() - 1]),
1302 false => (false, v),
1303 };
1304 let mut result = CompactString::with_capacity(v.len() / 2);
1305
1306 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1307 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1308 match unsafe { v.align_to::<u16>() } {
1309 (&[], v, &[]) => {
1310 // Input is correcty aligned.
1311 for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
1312 match c {
1313 Ok(c) => result.push(c),
1314 Err(_) => result.push_str("�"),
1315 }
1316 }
1317 }
1318 _ => {
1319 // Input's alignment is off.
1320 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1321 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1322 for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1323 match c {
1324 Ok(c) => result.push(c),
1325 Err(_) => result.push_str("�"),
1326 }
1327 }
1328 }
1329 }
1330
1331 if trailing_extra_byte {
1332 result.push_str("�");
1333 }
1334 result
1335 }
1336
1337 /// Decode a slice of bytes as UTF-16 encoded string, in little endian.
1338 ///
1339 /// # Errors
1340 ///
1341 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1342 /// a [`Utf16Error`] is returned.
1343 ///
1344 /// # Examples
1345 ///
1346 /// ```
1347 /// # use compact_str::CompactString;
1348 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe";
1349 /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap();
1350 /// assert_eq!(dancing_men, "👯♂️");
1351 /// ```
1352 #[inline]
1353 pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1354 CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
1355 }
1356
1357 /// Decode a slice of bytes as UTF-16 encoded string, in big endian.
1358 ///
1359 /// # Errors
1360 ///
1361 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1362 /// a [`Utf16Error`] is returned.
1363 ///
1364 /// # Examples
1365 ///
1366 /// ```
1367 /// # use compact_str::CompactString;
1368 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f";
1369 /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap();
1370 /// assert_eq!(dancing_women, "👯♀️");
1371 /// ```
1372 #[inline]
1373 pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1374 CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
1375 }
1376
1377 /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian.
1378 ///
1379 /// In this context "lossy" means that any broken characters in the input are replaced by the
1380 /// \<REPLACEMENT CHARACTER\> `'�'`. Please notice that, unlike UTF-8, UTF-16 is not self
1381 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1382 ///
1383 /// # Examples
1384 ///
1385 /// ```
1386 /// # use compact_str::CompactString;
1387 /// // A "random" bit was flipped in the 4th byte:
1388 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe";
1389 /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN);
1390 /// assert_eq!(dancing_men, "�\u{fc6f}\u{200d}♂️");
1391 /// ```
1392 #[inline]
1393 pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
1394 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
1395 }
1396
1397 /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian.
1398 ///
1399 /// In this context "lossy" means that any broken characters in the input are replaced by the
1400 /// \<REPLACEMENT CHARACTER\> `'�'`. Please notice that, unlike UTF-8, UTF-16 is not self
1401 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1402 ///
1403 /// # Examples
1404 ///
1405 /// ```
1406 /// # use compact_str::CompactString;
1407 /// // A "random" bit was flipped in the 9th byte:
1408 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f";
1409 /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN);
1410 /// assert_eq!(dancing_women, "👯\u{200d}♀�");
1411 /// ```
1412 #[inline]
1413 pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
1414 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
1415 }
1416
1417 /// Convert the [`CompactString`] into a [`String`].
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```
1422 /// # use compact_str::CompactString;
1423 /// let s = CompactString::new("Hello world");
1424 /// let s = s.into_string();
1425 /// assert_eq!(s, "Hello world");
1426 /// ```
1427 pub fn into_string(self) -> String {
1428 self.0.into_string()
1429 }
1430
1431 /// Convert a [`String`] into a [`CompactString`] _without inlining_.
1432 ///
1433 /// Note: You probably don't need to use this method, instead you should use `From<String>`
1434 /// which is implemented for [`CompactString`].
1435 ///
1436 /// This method exists incase your code is very sensitive to memory allocations. Normally when
1437 /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack.
1438 /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on
1439 /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that
1440 /// was previously owned by the [`String`], so no trips to the allocator are needed.
1441 ///
1442 /// # Examples
1443 ///
1444 /// ### Short Strings
1445 /// ```
1446 /// use compact_str::CompactString;
1447 ///
1448 /// let short = "hello world".to_string();
1449 /// let c_heap = CompactString::from_string_buffer(short);
1450 ///
1451 /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer
1452 /// assert!(c_heap.is_heap_allocated());
1453 ///
1454 /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point
1455 /// let c_inline = c_heap.clone();
1456 /// assert!(!c_inline.is_heap_allocated());
1457 ///
1458 /// assert_eq!(c_heap, c_inline);
1459 /// ```
1460 ///
1461 /// ### Longer Strings
1462 /// ```
1463 /// use compact_str::CompactString;
1464 ///
1465 /// let x = "longer string that will be on the heap".to_string();
1466 /// let c1 = CompactString::from(x);
1467 ///
1468 /// let y = "longer string that will be on the heap".to_string();
1469 /// let c2 = CompactString::from_string_buffer(y);
1470 ///
1471 /// // for longer strings, we re-use the underlying String's buffer in both cases
1472 /// assert!(c1.is_heap_allocated());
1473 /// assert!(c2.is_heap_allocated());
1474 /// ```
1475 ///
1476 /// ### Buffer Re-use
1477 /// ```
1478 /// use compact_str::CompactString;
1479 ///
1480 /// let og = "hello world".to_string();
1481 /// let og_addr = og.as_ptr();
1482 ///
1483 /// let mut c = CompactString::from_string_buffer(og);
1484 /// let ex_addr = c.as_ptr();
1485 ///
1486 /// // When converting to/from String and CompactString with from_string_buffer we always re-use
1487 /// // the same underlying allocated memory/buffer
1488 /// assert_eq!(og_addr, ex_addr);
1489 ///
1490 /// let long = "this is a long string that will be on the heap".to_string();
1491 /// let long_addr = long.as_ptr();
1492 ///
1493 /// let mut long_c = CompactString::from(long);
1494 /// let long_ex_addr = long_c.as_ptr();
1495 ///
1496 /// // When converting to/from String and CompactString with From<String>, we'll also re-use the
1497 /// // underlying buffer, if the string is long, otherwise when converting to CompactString we
1498 /// // eagerly inline
1499 /// assert_eq!(long_addr, long_ex_addr);
1500 /// ```
1501 #[inline]
1502 pub fn from_string_buffer(s: String) -> Self {
1503 let repr = Repr::from_string(s, false);
1504 CompactString(repr)
1505 }
1506}
1507
1508impl Default for CompactString {
1509 #[inline]
1510 fn default() -> Self {
1511 CompactString::new("")
1512 }
1513}
1514
1515impl Deref for CompactString {
1516 type Target = str;
1517
1518 #[inline]
1519 fn deref(&self) -> &str {
1520 self.as_str()
1521 }
1522}
1523
1524impl DerefMut for CompactString {
1525 #[inline]
1526 fn deref_mut(&mut self) -> &mut str {
1527 self.as_mut_str()
1528 }
1529}
1530
1531impl AsRef<str> for CompactString {
1532 #[inline]
1533 fn as_ref(&self) -> &str {
1534 self.as_str()
1535 }
1536}
1537
1538impl AsRef<OsStr> for CompactString {
1539 #[inline]
1540 fn as_ref(&self) -> &OsStr {
1541 OsStr::new(self.as_str())
1542 }
1543}
1544
1545impl AsRef<[u8]> for CompactString {
1546 #[inline]
1547 fn as_ref(&self) -> &[u8] {
1548 self.as_bytes()
1549 }
1550}
1551
1552impl Borrow<str> for CompactString {
1553 #[inline]
1554 fn borrow(&self) -> &str {
1555 self.as_str()
1556 }
1557}
1558
1559impl BorrowMut<str> for CompactString {
1560 #[inline]
1561 fn borrow_mut(&mut self) -> &mut str {
1562 self.as_mut_str()
1563 }
1564}
1565
1566impl Eq for CompactString {}
1567
1568impl<T: AsRef<str>> PartialEq<T> for CompactString {
1569 fn eq(&self, other: &T) -> bool {
1570 self.as_str() == other.as_ref()
1571 }
1572}
1573
1574impl PartialEq<CompactString> for String {
1575 fn eq(&self, other: &CompactString) -> bool {
1576 self.as_str() == other.as_str()
1577 }
1578}
1579
1580impl PartialEq<CompactString> for &str {
1581 fn eq(&self, other: &CompactString) -> bool {
1582 *self == other.as_str()
1583 }
1584}
1585
1586impl<'a> PartialEq<CompactString> for Cow<'a, str> {
1587 fn eq(&self, other: &CompactString) -> bool {
1588 *self == other.as_str()
1589 }
1590}
1591
1592impl Ord for CompactString {
1593 fn cmp(&self, other: &Self) -> Ordering {
1594 self.as_str().cmp(other.as_str())
1595 }
1596}
1597
1598impl PartialOrd for CompactString {
1599 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1600 Some(self.cmp(other))
1601 }
1602}
1603
1604impl Hash for CompactString {
1605 fn hash<H: Hasher>(&self, state: &mut H) {
1606 self.as_str().hash(state)
1607 }
1608}
1609
1610impl<'a> From<&'a str> for CompactString {
1611 fn from(s: &'a str) -> Self {
1612 let repr = Repr::new(s);
1613 CompactString(repr)
1614 }
1615}
1616
1617impl From<String> for CompactString {
1618 fn from(s: String) -> Self {
1619 let repr = Repr::from_string(s, true);
1620 CompactString(repr)
1621 }
1622}
1623
1624impl<'a> From<&'a String> for CompactString {
1625 fn from(s: &'a String) -> Self {
1626 CompactString::new(s)
1627 }
1628}
1629
1630impl<'a> From<Cow<'a, str>> for CompactString {
1631 fn from(cow: Cow<'a, str>) -> Self {
1632 match cow {
1633 Cow::Borrowed(s) => s.into(),
1634 // we separate these two so we can re-use the underlying buffer in the owned case
1635 Cow::Owned(s) => s.into(),
1636 }
1637 }
1638}
1639
1640impl From<Box<str>> for CompactString {
1641 fn from(b: Box<str>) -> Self {
1642 let s = b.into_string();
1643 let repr = Repr::from_string(s, true);
1644 CompactString(repr)
1645 }
1646}
1647
1648impl From<CompactString> for String {
1649 #[inline]
1650 fn from(s: CompactString) -> Self {
1651 s.into_string()
1652 }
1653}
1654
1655impl From<CompactString> for Cow<'_, str> {
1656 #[inline]
1657 fn from(s: CompactString) -> Self {
1658 Self::Owned(s.into_string())
1659 }
1660}
1661
1662impl<'a> From<&'a CompactString> for Cow<'a, str> {
1663 #[inline]
1664 fn from(s: &'a CompactString) -> Self {
1665 Self::Borrowed(s)
1666 }
1667}
1668
1669impl FromStr for CompactString {
1670 type Err = core::convert::Infallible;
1671 fn from_str(s: &str) -> Result<CompactString, Self::Err> {
1672 Ok(CompactString::from(s))
1673 }
1674}
1675
1676impl fmt::Debug for CompactString {
1677 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1678 fmt::Debug::fmt(self.as_str(), f)
1679 }
1680}
1681
1682impl fmt::Display for CompactString {
1683 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1684 fmt::Display::fmt(self.as_str(), f)
1685 }
1686}
1687
1688impl FromIterator<char> for CompactString {
1689 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
1690 let repr = iter.into_iter().collect();
1691 CompactString(repr)
1692 }
1693}
1694
1695impl<'a> FromIterator<&'a char> for CompactString {
1696 fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
1697 let repr = iter.into_iter().collect();
1698 CompactString(repr)
1699 }
1700}
1701
1702impl<'a> FromIterator<&'a str> for CompactString {
1703 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
1704 let repr = iter.into_iter().collect();
1705 CompactString(repr)
1706 }
1707}
1708
1709impl FromIterator<Box<str>> for CompactString {
1710 fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
1711 let repr = iter.into_iter().collect();
1712 CompactString(repr)
1713 }
1714}
1715
1716impl<'a> FromIterator<Cow<'a, str>> for CompactString {
1717 fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
1718 let repr = iter.into_iter().collect();
1719 CompactString(repr)
1720 }
1721}
1722
1723impl FromIterator<String> for CompactString {
1724 fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
1725 let repr = iter.into_iter().collect();
1726 CompactString(repr)
1727 }
1728}
1729
1730impl FromIterator<CompactString> for CompactString {
1731 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1732 let repr = iter.into_iter().collect();
1733 CompactString(repr)
1734 }
1735}
1736
1737impl FromIterator<CompactString> for String {
1738 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1739 let mut iterator = iter.into_iter();
1740 match iterator.next() {
1741 None => String::new(),
1742 Some(buf) => {
1743 let mut buf = buf.into_string();
1744 buf.extend(iterator);
1745 buf
1746 }
1747 }
1748 }
1749}
1750
1751impl FromIterator<CompactString> for Cow<'_, str> {
1752 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
1753 String::from_iter(iter).into()
1754 }
1755}
1756
1757impl Extend<char> for CompactString {
1758 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
1759 self.0.extend(iter)
1760 }
1761}
1762
1763impl<'a> Extend<&'a char> for CompactString {
1764 fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
1765 self.0.extend(iter)
1766 }
1767}
1768
1769impl<'a> Extend<&'a str> for CompactString {
1770 fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
1771 self.0.extend(iter)
1772 }
1773}
1774
1775impl Extend<Box<str>> for CompactString {
1776 fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
1777 self.0.extend(iter)
1778 }
1779}
1780
1781impl<'a> Extend<Cow<'a, str>> for CompactString {
1782 fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
1783 iter.into_iter().for_each(move |s| self.push_str(&s));
1784 }
1785}
1786
1787impl Extend<String> for CompactString {
1788 fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
1789 self.0.extend(iter)
1790 }
1791}
1792
1793impl Extend<CompactString> for String {
1794 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1795 for s in iter {
1796 self.push_str(&s);
1797 }
1798 }
1799}
1800
1801impl Extend<CompactString> for CompactString {
1802 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1803 for s in iter {
1804 self.push_str(&s);
1805 }
1806 }
1807}
1808
1809impl<'a> Extend<CompactString> for Cow<'a, str> {
1810 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
1811 self.to_mut().extend(iter);
1812 }
1813}
1814
1815impl fmt::Write for CompactString {
1816 fn write_str(&mut self, s: &str) -> fmt::Result {
1817 self.push_str(s);
1818 Ok(())
1819 }
1820
1821 fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
1822 match args.as_str() {
1823 Some(s) => {
1824 self.push_str(s);
1825 Ok(())
1826 }
1827 None => fmt::write(&mut self, args),
1828 }
1829 }
1830}
1831
1832impl Add<&str> for CompactString {
1833 type Output = Self;
1834 fn add(mut self, rhs: &str) -> Self::Output {
1835 self.push_str(rhs);
1836 self
1837 }
1838}
1839
1840impl AddAssign<&str> for CompactString {
1841 fn add_assign(&mut self, rhs: &str) {
1842 self.push_str(rhs);
1843 }
1844}
1845
1846/// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice.
1847///
1848/// This type is the error type for the [`from_utf16`] method on [`CompactString`].
1849///
1850/// [`from_utf16`]: CompactString::from_utf16
1851/// # Examples
1852///
1853/// Basic usage:
1854///
1855/// ```
1856/// # use compact_str::CompactString;
1857/// // 𝄞mu<invalid>ic
1858/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
1859/// 0xD800, 0x0069, 0x0063];
1860///
1861/// assert!(CompactString::from_utf16(v).is_err());
1862/// ```
1863#[derive(Copy, Clone, Debug)]
1864pub struct Utf16Error(());
1865
1866impl fmt::Display for Utf16Error {
1867 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1868 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1869 }
1870}
1871
1872/// An iterator over the exacted data by [`CompactString::drain()`].
1873#[must_use = "iterators are lazy and do nothing unless consumed"]
1874pub struct Drain<'a> {
1875 compact_string: *mut CompactString,
1876 start: usize,
1877 end: usize,
1878 chars: std::str::Chars<'a>,
1879}
1880
1881// SAFETY: Drain keeps the lifetime of the CompactString it belongs to.
1882unsafe impl Send for Drain<'_> {}
1883unsafe impl Sync for Drain<'_> {}
1884
1885impl fmt::Debug for Drain<'_> {
1886 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1887 f.debug_tuple("Drain").field(&self.as_str()).finish()
1888 }
1889}
1890
1891impl fmt::Display for Drain<'_> {
1892 #[inline]
1893 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1894 f.write_str(self.as_str())
1895 }
1896}
1897
1898impl Drop for Drain<'_> {
1899 #[inline]
1900 fn drop(&mut self) {
1901 // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access
1902 // the CompactString, but this function right now. CompactString::drain() ensured
1903 // that the new extracted range does not split a UTF-8 character.
1904 unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
1905 }
1906}
1907
1908impl Drain<'_> {
1909 /// The remaining, unconsumed characters of the extracted substring.
1910 #[inline]
1911 pub fn as_str(&self) -> &str {
1912 self.chars.as_str()
1913 }
1914}
1915
1916impl Deref for Drain<'_> {
1917 type Target = str;
1918
1919 #[inline]
1920 fn deref(&self) -> &Self::Target {
1921 self.as_str()
1922 }
1923}
1924
1925impl Iterator for Drain<'_> {
1926 type Item = char;
1927
1928 #[inline]
1929 fn next(&mut self) -> Option<char> {
1930 self.chars.next()
1931 }
1932
1933 #[inline]
1934 fn count(self) -> usize {
1935 // <Chars as Iterator>::count() is specialized, and cloning is trivial.
1936 self.chars.clone().count()
1937 }
1938
1939 fn size_hint(&self) -> (usize, Option<usize>) {
1940 self.chars.size_hint()
1941 }
1942
1943 #[inline]
1944 fn last(mut self) -> Option<char> {
1945 self.chars.next_back()
1946 }
1947}
1948
1949impl DoubleEndedIterator for Drain<'_> {
1950 #[inline]
1951 fn next_back(&mut self) -> Option<char> {
1952 self.chars.next_back()
1953 }
1954}
1955
1956impl FusedIterator for Drain<'_> {}
1957
1958static_assertions::assert_eq_size!(CompactString, String);