Skip to content

Commit de9fe50

Browse files
committed
Add some safety docs
1 parent 2ca373d commit de9fe50

File tree

5 files changed

+35
-0
lines changed

5 files changed

+35
-0
lines changed

crates/toml_edit/src/parser/datetime.rs

+1
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ pub(crate) fn unsigned_digits<'i, const MIN: usize, const MAX: usize>(
255255
input: &mut Input<'i>,
256256
) -> PResult<&'i str> {
257257
take_while(MIN..=MAX, DIGIT)
258+
// Safety: `digit` only produces ASCII
258259
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
259260
.parse_next(input)
260261
}

crates/toml_edit/src/parser/key.rs

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ pub(crate) fn simple_key(input: &mut Input<'_>) -> PResult<(RawString, InternalS
9090
fn unquoted_key<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
9191
trace(
9292
"unquoted-key",
93+
// Safety: UNQUOTED_CHAR is only ASCII ranges
9394
take_while(1.., UNQUOTED_CHAR)
9495
.map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }),
9596
)
@@ -101,6 +102,7 @@ pub(crate) fn is_unquoted_char(c: u8) -> bool {
101102
UNQUOTED_CHAR.contains_token(c)
102103
}
103104

105+
// Safety-usable invariant: UNQUOTED_CHAR is only ASCII ranges
104106
const UNQUOTED_CHAR: (
105107
RangeInclusive<u8>,
106108
RangeInclusive<u8>,

crates/toml_edit/src/parser/numbers.rs

+20
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,15 @@ pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
7979
)),
8080
)
8181
.recognize()
82+
// Safety: DIGIT1_9, digit(), and `_` only covers ASCII ranges
8283
.map(|b: &[u8]| unsafe {
8384
from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII")
8485
})
8586
.context(StrContext::Label("integer")),
8687
)
8788
.parse_next(input)
8889
}
90+
/// Safety-usable invariant: DIGIT1_9 is only ASCII ranges
8991
const DIGIT1_9: RangeInclusive<u8> = b'1'..=b'9';
9092

9193
// hex-prefix = %x30.78 ; 0x
@@ -114,11 +116,13 @@ pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
114116
))
115117
.recognize(),
116118
)
119+
// Safety: HEX_PREFIX, hexdig(), and `_` only covers ASCII ranges
117120
.map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") })
118121
.context(StrContext::Label("hexadecimal integer")),
119122
)
120123
.parse_next(input)
121124
}
125+
/// Safety-usable invariant: HEX_PREFIX is ASCII only
122126
const HEX_PREFIX: &[u8] = b"0x";
123127

124128
// oct-prefix = %x30.6F ; 0o
@@ -147,12 +151,15 @@ pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
147151
))
148152
.recognize(),
149153
)
154+
// Safety: DIGIT0_7, OCT_PREFIX, and `_` only covers ASCII ranges
150155
.map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") })
151156
.context(StrContext::Label("octal integer")),
152157
)
153158
.parse_next(input)
154159
}
160+
/// Safety-usable invariant: OCT_PREFIX is ASCII only
155161
const OCT_PREFIX: &[u8] = b"0o";
162+
/// Safety-usable invariant: DIGIT0_7 is ASCII only
156163
const DIGIT0_7: RangeInclusive<u8> = b'0'..=b'7';
157164

158165
// bin-prefix = %x30.62 ; 0b
@@ -181,12 +188,15 @@ pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
181188
))
182189
.recognize(),
183190
)
191+
// Safety: DIGIT0_1, BIN_PREFIX, and `_` only covers ASCII ranges
184192
.map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") })
185193
.context(StrContext::Label("binary integer")),
186194
)
187195
.parse_next(input)
188196
}
197+
/// Safety-usable invariant: BIN_PREFIX is ASCII only
189198
const BIN_PREFIX: &[u8] = b"0b";
199+
/// Safety-usable invariant: DIGIT0_1 is ASCII only
190200
const DIGIT0_1: RangeInclusive<u8> = b'0'..=b'1';
191201

192202
// ;; Float
@@ -234,6 +244,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
234244
)
235245
.recognize()
236246
.map(|b: &[u8]| unsafe {
247+
// Safety: `.` and `zero_prefixable_int` only handle ASCII
237248
from_utf8_unchecked(
238249
b,
239250
"`.` and `parse_zero_prefixable_int` filter out non-ASCII",
@@ -243,6 +254,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
243254
}
244255

245256
// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
257+
/// Safety-usable invariant: only produces ASCII
246258
pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
247259
(
248260
digit,
@@ -261,8 +273,10 @@ pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str>
261273
.map(|()| ()),
262274
)
263275
.recognize()
276+
// Safety: `digit()` and `_` are all ASCII
264277
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") })
265278
.parse_next(input)
279+
/// Safety-usable invariant upheld by only using `digit` and `_` in the parser
266280
}
267281

268282
// exp = "e" float-exp-part
@@ -275,6 +289,7 @@ pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
275289
)
276290
.recognize()
277291
.map(|b: &[u8]| unsafe {
292+
// Safety: `e`, `E`, `+`, `-`, and `zero_prefixable_int` are all ASCII
278293
from_utf8_unchecked(
279294
b,
280295
"`one_of` and `parse_zero_prefixable_int` filter out non-ASCII",
@@ -305,15 +320,20 @@ pub(crate) fn nan(input: &mut Input<'_>) -> PResult<f64> {
305320
const NAN: &[u8] = b"nan";
306321

307322
// DIGIT = %x30-39 ; 0-9
323+
/// Safety-usable invariant: only parses ASCII
308324
pub(crate) fn digit(input: &mut Input<'_>) -> PResult<u8> {
325+
// Safety: DIGIT is all ASCII
309326
one_of(DIGIT).parse_next(input)
310327
}
311328
const DIGIT: RangeInclusive<u8> = b'0'..=b'9';
312329

313330
// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
331+
/// Safety-usable invariant: only parses ASCII
314332
pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult<u8> {
333+
// Safety: HEXDIG is all ASCII
315334
one_of(HEXDIG).parse_next(input)
316335
}
336+
/// Safety-usable invariant: only ASCII ranges
317337
pub(crate) const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) =
318338
(DIGIT, b'A'..=b'F', b'a'..=b'f');
319339

crates/toml_edit/src/parser/strings.rs

+5
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ fn escape_seq_char(input: &mut Input<'_>) -> PResult<char> {
138138
pub(crate) fn hexescape<const N: usize>(input: &mut Input<'_>) -> PResult<char> {
139139
take_while(0..=N, HEXDIG)
140140
.verify(|b: &[u8]| b.len() == N)
141+
// Safety: HEXDIG is ASCII-only
141142
.map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
142143
.verify_map(|s| u32::from_str_radix(s, 16).ok())
143144
.try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange))
@@ -217,13 +218,15 @@ fn mlb_quotes<'i>(
217218
move |input: &mut Input<'i>| {
218219
let start = input.checkpoint();
219220
let res = terminated(b"\"\"", peek(term.by_ref()))
221+
// Safety: ???
220222
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
221223
.parse_next(input);
222224

223225
match res {
224226
Err(winnow::error::ErrMode::Backtrack(_)) => {
225227
input.reset(&start);
226228
terminated(b"\"", peek(term.by_ref()))
229+
// Safety: ???
227230
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
228231
.parse_next(input)
229232
}
@@ -346,13 +349,15 @@ fn mll_quotes<'i>(
346349
move |input: &mut Input<'i>| {
347350
let start = input.checkpoint();
348351
let res = terminated(b"''", peek(term.by_ref()))
352+
// Safety: ???
349353
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
350354
.parse_next(input);
351355

352356
match res {
353357
Err(winnow::error::ErrMode::Backtrack(_)) => {
354358
input.reset(&start);
355359
terminated(b"'", peek(term.by_ref()))
360+
// Safety: ???
356361
.map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
357362
.parse_next(input)
358363
}

crates/toml_edit/src/parser/trivia.rs

+7
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use winnow::token::take_while;
1111

1212
use crate::parser::prelude::*;
1313

14+
/// Safety invariant: must be called with valid UTF-8 in `bytes`
1415
pub(crate) unsafe fn from_utf8_unchecked<'b>(
1516
bytes: &'b [u8],
1617
safety_justification: &'static str,
@@ -27,10 +28,12 @@ pub(crate) unsafe fn from_utf8_unchecked<'b>(
2728

2829
// wschar = ( %x20 / ; Space
2930
// %x09 ) ; Horizontal tab
31+
/// Safety-usable invariant: WSCHAR is only ASCII values
3032
pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
3133

3234
// ws = *wschar
3335
pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
36+
// Safety: WSCHAR only contains ASCII
3437
take_while(0.., WSCHAR)
3538
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
3639
.parse_next(input)
@@ -58,8 +61,10 @@ pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
5861

5962
// newline = ( %x0A / ; LF
6063
// %x0D.0A ) ; CRLF
64+
/// Safety-usable invariant: Only returns ASCII bytes
6165
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
6266
alt((
67+
// Safety: CR and LF are ASCII
6368
one_of(LF).value(b'\n'),
6469
(one_of(CR), one_of(LF)).value(b'\n'),
6570
))
@@ -76,6 +81,7 @@ pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
7681
)
7782
.map(|()| ())
7883
.recognize()
84+
// Safety: `newline` and `WSCHAR` are all ASCII
7985
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
8086
.parse_next(input)
8187
}
@@ -85,6 +91,7 @@ pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
8591
(newline, ws_newline)
8692
.recognize()
8793
.map(|b| unsafe {
94+
// Safety: `newline` and `WSCHAR` are all ASCII
8895
from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
8996
})
9097
.parse_next(input)

0 commit comments

Comments
 (0)