zerovec/ule/
vartuple.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Types to help compose fixed-size [`ULE`] and variable-size [`VarULE`] primitives.
6//!
7//! This module exports [`VarTuple`] and [`VarTupleULE`], which allow a single sized type and
8//! a single unsized type to be stored together as a [`VarULE`].
9//!
10//! # Examples
11//!
12//! ```
13//! use zerovec::ule::vartuple::{VarTuple, VarTupleULE};
14//! use zerovec::VarZeroVec;
15//!
16//! struct Employee<'a> {
17//!     id: u32,
18//!     name: &'a str,
19//! };
20//!
21//! let employees = [
22//!     Employee {
23//!         id: 12345,
24//!         name: "Jane Doe",
25//!     },
26//!     Employee {
27//!         id: 67890,
28//!         name: "John Doe",
29//!     },
30//! ];
31//!
32//! let employees_as_var_tuples = employees
33//!     .into_iter()
34//!     .map(|x| VarTuple {
35//!         sized: x.id,
36//!         variable: x.name,
37//!     })
38//!     .collect::<Vec<_>>();
39//!
40//! let employees_vzv: VarZeroVec<VarTupleULE<u32, str>> =
41//!     employees_as_var_tuples.as_slice().into();
42//!
43//! assert_eq!(employees_vzv.len(), 2);
44//!
45//! assert_eq!(employees_vzv.get(0).unwrap().sized.as_unsigned_int(), 12345);
46//! assert_eq!(&employees_vzv.get(0).unwrap().variable, "Jane Doe");
47//!
48//! assert_eq!(employees_vzv.get(1).unwrap().sized.as_unsigned_int(), 67890);
49//! assert_eq!(&employees_vzv.get(1).unwrap().variable, "John Doe");
50//! ```
51
52use core::mem::{size_of, transmute_copy};
53use zerofrom::ZeroFrom;
54
55use super::{AsULE, EncodeAsVarULE, UleError, VarULE, ULE};
56
57/// A sized type that can be converted to a [`VarTupleULE`].
58///
59/// See the module for examples.
60#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
61#[allow(clippy::exhaustive_structs)] // well-defined type
62#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63pub struct VarTuple<A, B> {
64    pub sized: A,
65    pub variable: B,
66}
67
68/// A dynamically-sized type combining a sized and an unsized type.
69///
70/// See the module for examples.
71#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
72#[allow(clippy::exhaustive_structs)] // well-defined type
73#[repr(C)]
74pub struct VarTupleULE<A: AsULE, V: VarULE + ?Sized> {
75    pub sized: A::ULE,
76    pub variable: V,
77}
78
79// # Safety
80//
81// ## Representation
82//
83// The type `VarTupleULE` is align(1) because it is repr(C) and its fields
84// are all align(1), since they are themselves ULE and VarULE, which have
85// this same safety constraint. Further, there is no padding, because repr(C)
86// does not add padding when all fields are align(1).
87//
88// <https://doc.rust-lang.org/reference/type-layout.html#the-c-representation>
89//
90// Pointers to `VarTupleULE` are fat pointers with metadata equal to the
91// metadata of the inner DST field V.
92//
93// <https://doc.rust-lang.org/stable/std/ptr/trait.Pointee.html>
94//
95// ## Checklist
96//
97// Safety checklist for `VarULE`:
98//
99// 1. align(1): see "Representation" above.
100// 2. No padding: see "Representation" above.
101// 3. `validate_bytes` checks length and defers to the inner ULEs.
102// 4. `validate_bytes` checks length and defers to the inner ULEs.
103// 5. `from_bytes_unchecked` returns a fat pointer to the bytes.
104// 6. All other methods are left at their default impl.
105// 7. The two ULEs have byte equality, so this composition has byte equality.
106unsafe impl<A, V> VarULE for VarTupleULE<A, V>
107where
108    A: AsULE + 'static,
109    V: VarULE + ?Sized,
110{
111    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
112        let (sized_chunk, variable_chunk) = bytes
113            .split_at_checked(size_of::<A::ULE>())
114            .ok_or(UleError::length::<Self>(bytes.len()))?;
115        A::ULE::validate_bytes(sized_chunk)?;
116        V::validate_bytes(variable_chunk)?;
117        Ok(())
118    }
119
120    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
121        let (_sized_chunk, variable_chunk) = bytes.split_at_unchecked(size_of::<A::ULE>());
122        // Safety: variable_chunk is a valid V because of this function's precondition: bytes is a valid Self,
123        // and a valid Self contains a valid V after the space needed for A::ULE.
124        let variable_ref = V::from_bytes_unchecked(variable_chunk);
125        let variable_ptr: *const V = variable_ref;
126
127        // Safety: The DST of VarTupleULE is a pointer to the `sized` element and has a metadata
128        // equal to the metadata of the `variable` field (see "Representation" comments on the impl).
129
130        // We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513
131        // For now we rely on all DST metadata being a usize.
132
133        // Extract metadata from V's DST
134        // Rust doesn't know that `&V` is a fat pointer so we have to use transmute_copy
135        assert_eq!(size_of::<*const V>(), size_of::<(*const u8, usize)>());
136        // Safety: We have asserted that the transmute Src and Dst are the same size. Furthermore,
137        // DST pointers are a pointer and usize length metadata
138        let (_v_ptr, metadata) = transmute_copy::<*const V, (*const u8, usize)>(&variable_ptr);
139
140        // Construct a new DST with the same metadata as V
141        assert_eq!(size_of::<*const Self>(), size_of::<(*const u8, usize)>());
142        // Safety: Same as above but in the other direction.
143        let composed_ptr =
144            transmute_copy::<(*const u8, usize), *const Self>(&(bytes.as_ptr(), metadata));
145        &*(composed_ptr)
146    }
147}
148
149// # Safety
150//
151// encode_var_ule_len: returns the length of the two ULEs together.
152//
153// encode_var_ule_write: writes bytes by deferring to the inner ULE impls.
154unsafe impl<A, B, V> EncodeAsVarULE<VarTupleULE<A, V>> for VarTuple<A, B>
155where
156    A: AsULE + 'static,
157    B: EncodeAsVarULE<V>,
158    V: VarULE + ?Sized,
159{
160    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
161        // unnecessary if the other two are implemented
162        unreachable!()
163    }
164
165    #[inline]
166    fn encode_var_ule_len(&self) -> usize {
167        size_of::<A::ULE>() + self.variable.encode_var_ule_len()
168    }
169
170    #[inline]
171    fn encode_var_ule_write(&self, dst: &mut [u8]) {
172        // TODO: use split_first_chunk_mut in 1.77
173        let (sized_chunk, variable_chunk) = dst.split_at_mut(size_of::<A::ULE>());
174        sized_chunk.clone_from_slice([self.sized.to_unaligned()].as_bytes());
175        self.variable.encode_var_ule_write(variable_chunk);
176    }
177}
178
179#[cfg(feature = "alloc")]
180impl<A, V> alloc::borrow::ToOwned for VarTupleULE<A, V>
181where
182    A: AsULE + 'static,
183    V: VarULE + ?Sized,
184{
185    type Owned = alloc::boxed::Box<Self>;
186    fn to_owned(&self) -> Self::Owned {
187        crate::ule::encode_varule_to_box(self)
188    }
189}
190
191impl<'a, A, B, V> ZeroFrom<'a, VarTupleULE<A, V>> for VarTuple<A, B>
192where
193    A: AsULE + 'static,
194    V: VarULE + ?Sized,
195    B: ZeroFrom<'a, V>,
196{
197    fn zero_from(other: &'a VarTupleULE<A, V>) -> Self {
198        VarTuple {
199            sized: AsULE::from_unaligned(other.sized),
200            variable: B::zero_from(&other.variable),
201        }
202    }
203}
204
205#[cfg(feature = "serde")]
206impl<A, V> serde::Serialize for VarTupleULE<A, V>
207where
208    A: AsULE + 'static,
209    V: VarULE + ?Sized,
210    A: serde::Serialize,
211    V: serde::Serialize,
212{
213    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
214    where
215        S: serde::Serializer,
216    {
217        if serializer.is_human_readable() {
218            let this = VarTuple {
219                sized: A::from_unaligned(self.sized),
220                variable: &self.variable,
221            };
222            this.serialize(serializer)
223        } else {
224            serializer.serialize_bytes(self.as_bytes())
225        }
226    }
227}
228
229#[cfg(feature = "serde")]
230impl<'a, 'de: 'a, A, V> serde::Deserialize<'de> for &'a VarTupleULE<A, V>
231where
232    A: AsULE + 'static,
233    V: VarULE + ?Sized,
234    A: serde::Deserialize<'de>,
235{
236    fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error>
237    where
238        Des: serde::Deserializer<'de>,
239    {
240        if !deserializer.is_human_readable() {
241            let bytes = <&[u8]>::deserialize(deserializer)?;
242            VarTupleULE::<A, V>::parse_bytes(bytes).map_err(serde::de::Error::custom)
243        } else {
244            Err(serde::de::Error::custom(
245                "&VarTupleULE can only deserialize in zero-copy ways",
246            ))
247        }
248    }
249}
250
251#[cfg(feature = "serde")]
252impl<'de, A, V> serde::Deserialize<'de> for alloc::boxed::Box<VarTupleULE<A, V>>
253where
254    A: AsULE + 'static,
255    V: VarULE + ?Sized,
256    A: serde::Deserialize<'de>,
257    alloc::boxed::Box<V>: serde::Deserialize<'de>,
258{
259    fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error>
260    where
261        Des: serde::Deserializer<'de>,
262    {
263        if deserializer.is_human_readable() {
264            let this = VarTuple::<A, alloc::boxed::Box<V>>::deserialize(deserializer)?;
265            Ok(crate::ule::encode_varule_to_box(&this))
266        } else {
267            // This branch should usually not be hit, since Cow-like use cases will hit the Deserialize impl for &'a TupleNVarULE instead.
268
269            let deserialized = <&VarTupleULE<A, V>>::deserialize(deserializer)?;
270            Ok(deserialized.to_boxed())
271        }
272    }
273}
274
275#[test]
276fn test_simple() {
277    let var_tuple = VarTuple {
278        sized: 1500u16,
279        variable: "hello",
280    };
281    let var_tuple_ule = super::encode_varule_to_box(&var_tuple);
282    assert_eq!(var_tuple_ule.sized.as_unsigned_int(), 1500);
283    assert_eq!(&var_tuple_ule.variable, "hello");
284
285    // Can't use inference due to https://github.com/rust-lang/rust/issues/130180
286    #[cfg(feature = "serde")]
287    crate::ule::test_utils::assert_serde_roundtrips::<VarTupleULE<u16, str>>(&var_tuple_ule);
288}
289
290#[test]
291fn test_nested() {
292    use crate::{ZeroSlice, ZeroVec};
293    let var_tuple = VarTuple {
294        sized: 2000u16,
295        variable: VarTuple {
296            sized: '🦙',
297            variable: ZeroVec::alloc_from_slice(b"ICU"),
298        },
299    };
300    let var_tuple_ule = super::encode_varule_to_box(&var_tuple);
301    assert_eq!(var_tuple_ule.sized.as_unsigned_int(), 2000u16);
302    assert_eq!(var_tuple_ule.variable.sized.to_char(), '🦙');
303    assert_eq!(
304        &var_tuple_ule.variable.variable,
305        ZeroSlice::from_ule_slice(b"ICU")
306    );
307    // Can't use inference due to https://github.com/rust-lang/rust/issues/130180
308    #[cfg(feature = "serde")]
309    crate::ule::test_utils::assert_serde_roundtrips::<
310        VarTupleULE<u16, VarTupleULE<char, ZeroSlice<_>>>,
311    >(&var_tuple_ule);
312}