writeable/
to_string_or_borrow.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::Writeable;
6use alloc::borrow::Cow;
7use alloc::string::String;
8use core::fmt;
9
10/// Bytes that have been partially validated as UTF-8 up to an offset.
11struct PartiallyValidatedUtf8<'a> {
12    // Safety Invariants:
13    // 1. The offset is less than or equal to the length of the slice.
14    // 2. The slice is valid UTF-8 up to the offset.
15    slice: &'a [u8],
16    offset: usize,
17}
18
19impl<'a> PartiallyValidatedUtf8<'a> {
20    fn new(slice: &'a [u8]) -> Self {
21        // Safety: Field invariants maintained here trivially:
22        //   1. The offset 0 is ≤ all possible lengths of slice
23        //   2. The slice contains nothing up to the offset zero
24        Self { slice, offset: 0 }
25    }
26
27    /// Check whether the given string is the next chunk of unvalidated bytes.
28    /// If so, increment offset and return true. Otherwise, return false.
29    fn try_push(&mut self, valid_str: &str) -> bool {
30        let new_offset = self.offset + valid_str.len();
31        if self.slice.get(self.offset..new_offset) == Some(valid_str.as_bytes()) {
32            // Safety: Field invariants maintained here:
33            //   1. In the line above, `self.slice.get()` returned `Some()` for `new_offset` at
34            //      the end of a `Range`, so `new_offset` is ≤ the length of `self.slice`.
35            //   2. By invariant, we have already validated the string up to `self.offset`, and
36            //      the portion of the slice between `self.offset` and `new_offset` is equal to
37            //      `valid_str`, which is a `&str`, so the string is valid up to `new_offset`.
38            self.offset = new_offset;
39            true
40        } else {
41            false
42        }
43    }
44
45    /// Return the validated portion as `&str`.
46    fn validated_as_str(&self) -> &'a str {
47        debug_assert!(self.offset <= self.slice.len());
48        // Safety: self.offset is a valid end index in a range (from field invariant)
49        let valid_slice = unsafe { self.slice.get_unchecked(..self.offset) };
50        debug_assert!(core::str::from_utf8(valid_slice).is_ok());
51        // Safety: the UTF-8 of slice has been validated up to offset (from field invariant)
52        unsafe { core::str::from_utf8_unchecked(valid_slice) }
53    }
54}
55
56enum SliceOrString<'a> {
57    Slice(PartiallyValidatedUtf8<'a>),
58    String(String),
59}
60
61/// This is an infallible impl. Functions always return Ok, not Err.
62impl fmt::Write for SliceOrString<'_> {
63    #[inline]
64    fn write_str(&mut self, other: &str) -> fmt::Result {
65        match self {
66            SliceOrString::Slice(slice) => {
67                if !slice.try_push(other) {
68                    // We failed to match. Convert to owned.
69                    let valid_str = slice.validated_as_str();
70                    let mut owned = String::with_capacity(valid_str.len() + other.len());
71                    owned.push_str(valid_str);
72                    owned.push_str(other);
73                    *self = SliceOrString::String(owned);
74                }
75                Ok(())
76            }
77            SliceOrString::String(owned) => owned.write_str(other),
78        }
79    }
80}
81
82impl<'a> SliceOrString<'a> {
83    #[inline]
84    fn new(slice: &'a [u8]) -> Self {
85        Self::Slice(PartiallyValidatedUtf8::new(slice))
86    }
87
88    #[inline]
89    fn finish(self) -> Cow<'a, str> {
90        match self {
91            SliceOrString::Slice(slice) => Cow::Borrowed(slice.validated_as_str()),
92            SliceOrString::String(owned) => Cow::Owned(owned),
93        }
94    }
95}
96
97/// Writes the contents of a `Writeable` to a string, returning a reference
98/// to a slice if it matches the provided reference bytes, and allocating a
99/// String otherwise.
100///
101/// This function is useful if you have borrowed bytes which you expect
102/// to be equal to a writeable a high percentage of the time.
103///
104/// You can also use this function to make a more efficient implementation of
105/// [`Writeable::write_to_string`].
106///
107/// # Examples
108///
109/// Basic usage and behavior:
110///
111/// ```
112/// use std::fmt;
113/// use std::borrow::Cow;
114/// use writeable::Writeable;
115///
116/// struct WelcomeMessage<'s> {
117///     pub name: &'s str,
118/// }
119///
120/// impl<'s> Writeable for WelcomeMessage<'s> {
121///     // see impl in Writeable docs
122/// #    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
123/// #        sink.write_str("Hello, ")?;
124/// #        sink.write_str(self.name)?;
125/// #        sink.write_char('!')?;
126/// #        Ok(())
127/// #    }
128/// }
129///
130/// let message = WelcomeMessage { name: "Alice" };
131///
132/// assert!(matches!(
133///     writeable::to_string_or_borrow(&message, b""),
134///     Cow::Owned(s) if s == "Hello, Alice!"
135/// ));
136/// assert!(matches!(
137///     writeable::to_string_or_borrow(&message, b"Hello"),
138///     Cow::Owned(s) if s == "Hello, Alice!"
139/// ));
140/// assert!(matches!(
141///     writeable::to_string_or_borrow(&message, b"Hello, Bob!"),
142///     Cow::Owned(s) if s == "Hello, Alice!"
143/// ));
144/// assert!(matches!(
145///     writeable::to_string_or_borrow(&message, b"Hello, Alice!"),
146///     Cow::Borrowed("Hello, Alice!")
147/// ));
148///
149/// // Borrowing can use a prefix:
150/// assert!(matches!(
151///     writeable::to_string_or_borrow(&message, b"Hello, Alice!..\xFF\x00\xFF"),
152///     Cow::Borrowed("Hello, Alice!")
153/// ));
154/// ```
155///
156/// Example use case: a function that transforms a string to lowercase.
157/// We are also able to write a more efficient implementation of
158/// [`Writeable::write_to_string`] in this situation.
159///
160/// ```
161/// use std::fmt;
162/// use std::borrow::Cow;
163/// use writeable::Writeable;
164///
165/// struct MakeAsciiLower<'a>(&'a str);
166///
167/// impl<'a> Writeable for MakeAsciiLower<'a> {
168///     fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
169///         for c in self.0.chars() {
170///             sink.write_char(c.to_ascii_lowercase())?;
171///         }
172///         Ok(())
173///     }
174///     #[inline]
175///     fn write_to_string(&self) -> Cow<str> {
176///         writeable::to_string_or_borrow(self, self.0.as_bytes())
177///     }
178/// }
179///
180/// fn make_lowercase(input: &str) -> Cow<str> {
181///     let writeable = MakeAsciiLower(input);
182///     writeable::to_string_or_borrow(&writeable, input.as_bytes())
183/// }
184///
185/// assert!(matches!(
186///     make_lowercase("this is lowercase"),
187///     Cow::Borrowed("this is lowercase")
188/// ));
189/// assert!(matches!(
190///     make_lowercase("this is UPPERCASE"),
191///     Cow::Owned(s) if s == "this is uppercase"
192/// ));
193///
194/// assert!(matches!(
195///     MakeAsciiLower("this is lowercase").write_to_string(),
196///     Cow::Borrowed("this is lowercase")
197/// ));
198/// assert!(matches!(
199///     MakeAsciiLower("this is UPPERCASE").write_to_string(),
200///     Cow::Owned(s) if s == "this is uppercase"
201/// ));
202/// ```
203pub fn to_string_or_borrow<'a>(
204    writeable: &impl Writeable,
205    reference_bytes: &'a [u8],
206) -> Cow<'a, str> {
207    let mut sink = SliceOrString::new(reference_bytes);
208    let _ = writeable.write_to(&mut sink);
209    sink.finish()
210}