regex_automata/util/
int.rs

1/*!
2This module provides several integer oriented traits for converting between
3both fixed size integers and integers whose size varies based on the target
4(like `usize`).
5
6The driving design principle of this module is to attempt to centralize as many
7`as` casts as possible here. And in particular, we separate casts into two
8buckets:
9
10* Casts that we use for their truncating behavior. In this case, we use more
11descriptive names, like `low_u32` and `high_u32`.
12* Casts that we use for converting back-and-forth between `usize`. These
13conversions are generally necessary because we often store indices in different
14formats to save on memory, which requires converting to and from `usize`. In
15this case, we very specifically do not want to overflow, and so the methods
16defined here will panic if the `as` cast would be lossy in debug mode. (A
17normal `as` cast will never panic!)
18
19For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there.
20
21For regex engines, floating point is just never used, so we don't have to worry
22about `as` casts for those.
23
24Otherwise, this module pretty much covers all of our `as` needs except for one
25thing: const contexts. There are a select few places in this crate where we
26still need to use `as` because const functions on traits aren't stable yet.
27If we wind up significantly expanding our const footprint in this crate, it
28might be worth defining free functions to handle those cases. But at the time
29of writing, that just seemed like too much ceremony. Instead, I comment each
30such use of `as` in a const context with a "fixme" notice.
31
32NOTE: for simplicity, we don't take target pointer width into account here for
33`usize` conversions. Since we currently only panic in debug mode, skipping the
34check when it can be proven it isn't needed at compile time doesn't really
35matter. Now, if we wind up wanting to do as many checks as possible in release
36mode, then we would want to skip those when we know the conversions are always
37non-lossy.
38
39NOTE: this module isn't an exhaustive API. For example, we still use things
40like `u64::from` where possible, or even `usize::try_from()` for when we do
41explicitly want to panic or when we want to return an error for overflow.
42*/
43
44// We define a little more than what we need, but I'd rather just have
45// everything via a consistent and uniform API then have holes.
46#![allow(dead_code)]
47
48pub(crate) trait U8 {
49    fn as_usize(self) -> usize;
50}
51
52impl U8 for u8 {
53    fn as_usize(self) -> usize {
54        usize::from(self)
55    }
56}
57
58pub(crate) trait U16 {
59    fn as_usize(self) -> usize;
60    fn low_u8(self) -> u8;
61    fn high_u8(self) -> u8;
62}
63
64impl U16 for u16 {
65    fn as_usize(self) -> usize {
66        usize::from(self)
67    }
68
69    fn low_u8(self) -> u8 {
70        self as u8
71    }
72
73    fn high_u8(self) -> u8 {
74        (self >> 8) as u8
75    }
76}
77
78pub(crate) trait U32 {
79    fn as_usize(self) -> usize;
80    fn low_u8(self) -> u8;
81    fn low_u16(self) -> u16;
82    fn high_u16(self) -> u16;
83}
84
85impl U32 for u32 {
86    fn as_usize(self) -> usize {
87        #[cfg(debug_assertions)]
88        {
89            usize::try_from(self).expect("u32 overflowed usize")
90        }
91        #[cfg(not(debug_assertions))]
92        {
93            self as usize
94        }
95    }
96
97    fn low_u8(self) -> u8 {
98        self as u8
99    }
100
101    fn low_u16(self) -> u16 {
102        self as u16
103    }
104
105    fn high_u16(self) -> u16 {
106        (self >> 16) as u16
107    }
108}
109
110pub(crate) trait U64 {
111    fn as_usize(self) -> usize;
112    fn low_u8(self) -> u8;
113    fn low_u16(self) -> u16;
114    fn low_u32(self) -> u32;
115    fn high_u32(self) -> u32;
116}
117
118impl U64 for u64 {
119    fn as_usize(self) -> usize {
120        #[cfg(debug_assertions)]
121        {
122            usize::try_from(self).expect("u64 overflowed usize")
123        }
124        #[cfg(not(debug_assertions))]
125        {
126            self as usize
127        }
128    }
129
130    fn low_u8(self) -> u8 {
131        self as u8
132    }
133
134    fn low_u16(self) -> u16 {
135        self as u16
136    }
137
138    fn low_u32(self) -> u32 {
139        self as u32
140    }
141
142    fn high_u32(self) -> u32 {
143        (self >> 32) as u32
144    }
145}
146
147pub(crate) trait I32 {
148    fn as_usize(self) -> usize;
149    fn to_bits(self) -> u32;
150    fn from_bits(n: u32) -> i32;
151}
152
153impl I32 for i32 {
154    fn as_usize(self) -> usize {
155        #[cfg(debug_assertions)]
156        {
157            usize::try_from(self).expect("i32 overflowed usize")
158        }
159        #[cfg(not(debug_assertions))]
160        {
161            self as usize
162        }
163    }
164
165    fn to_bits(self) -> u32 {
166        self as u32
167    }
168
169    fn from_bits(n: u32) -> i32 {
170        n as i32
171    }
172}
173
174pub(crate) trait Usize {
175    fn as_u8(self) -> u8;
176    fn as_u16(self) -> u16;
177    fn as_u32(self) -> u32;
178    fn as_u64(self) -> u64;
179}
180
181impl Usize for usize {
182    fn as_u8(self) -> u8 {
183        #[cfg(debug_assertions)]
184        {
185            u8::try_from(self).expect("usize overflowed u8")
186        }
187        #[cfg(not(debug_assertions))]
188        {
189            self as u8
190        }
191    }
192
193    fn as_u16(self) -> u16 {
194        #[cfg(debug_assertions)]
195        {
196            u16::try_from(self).expect("usize overflowed u16")
197        }
198        #[cfg(not(debug_assertions))]
199        {
200            self as u16
201        }
202    }
203
204    fn as_u32(self) -> u32 {
205        #[cfg(debug_assertions)]
206        {
207            u32::try_from(self).expect("usize overflowed u32")
208        }
209        #[cfg(not(debug_assertions))]
210        {
211            self as u32
212        }
213    }
214
215    fn as_u64(self) -> u64 {
216        #[cfg(debug_assertions)]
217        {
218            u64::try_from(self).expect("usize overflowed u64")
219        }
220        #[cfg(not(debug_assertions))]
221        {
222            self as u64
223        }
224    }
225}
226
227// Pointers aren't integers, but we convert pointers to integers to perform
228// offset arithmetic in some places. (And no, we don't convert the integers
229// back to pointers.) So add 'as_usize' conversions here too for completeness.
230//
231// These 'as' casts are actually okay because they're always non-lossy. But the
232// idea here is to just try and remove as much 'as' as possible, particularly
233// in this crate where we are being really paranoid about offsets and making
234// sure we don't panic on inputs that might be untrusted. This way, the 'as'
235// casts become easier to audit if they're all in one place, even when some of
236// them are actually okay 100% of the time.
237
238pub(crate) trait Pointer {
239    fn as_usize(self) -> usize;
240}
241
242impl<T> Pointer for *const T {
243    fn as_usize(self) -> usize {
244        self as usize
245    }
246}