Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ serde_path_to_error = ["dep:serde_path_to_error"]
serde_with-3 = ["dep:serde_with", "dep:serde"]
serde = ["dep:serde"]
serde_json-1 = ["dep:serde_json"]
# Provides additional functionality for BSON validation. For internal use only.
sfp-internal = []

[lib]
name = "bson"
Expand Down
13 changes: 13 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,19 @@ pub enum ErrorKind {
n: u64,
},

/// A cstring exceeded the maximum parsing length.
#[cfg(feature = "sfp-internal")]
#[error("cstring exceeded the maximum parsing length ({max_parse_len} bytes)")]
#[non_exhaustive]
#[doc(hidden)]
TooLongCStr {
/// The configured maximum parsing length.
max_parse_len: usize,

/// The bytes parsed before the maximum parsing length was reached.
bytes: Vec<u8>,
},

/// Invalid UTF-8 bytes were encountered.
#[error("Invalid UTF-8")]
#[non_exhaustive]
Expand Down
2 changes: 1 addition & 1 deletion src/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ pub use self::{
cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr},
document::RawDocument,
document_buf::{BindRawBsonRef, BindValue, RawDocumentBuf},
iter::{RawElement, RawIter},
iter::{Iter, RawElement, RawIter},
};

pub(crate) const MIN_BSON_STRING_SIZE: i32 = 4 + 1; // 4 bytes for length, one byte for null terminator
Expand Down
26 changes: 26 additions & 0 deletions src/raw/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ impl RawArray {
self.into_iter().nth(index).transpose()
}

/// Gets a reference to the value at the given index. Returns an error if a cstring is
/// encountered that exceeds the provided `len`.
#[cfg(feature = "sfp-internal")]
#[doc(hidden)]
pub fn get_with_max_cstr_parse_len(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not needed for this PR since this is an internal API we can change if needed)

If we end up wanting to add any more control over parsing we might want to change this to get_with_options and make a proper options struct.

&self,
index: usize,
len: usize,
) -> RawResult<Option<RawBsonRef<'_>>> {
self.into_iter()
.max_cstr_parse_len(len)
.nth(index)
.transpose()
}

fn get_with<'a, T>(
&'a self,
index: usize,
Expand Down Expand Up @@ -269,6 +284,17 @@ pub struct RawArrayIter<'a> {
inner: RawIter<'a>,
}

impl<'a> RawArrayIter<'a> {
/// The maximum number of bytes the iterator should parse when searching for the null-terminator
/// for a cstring.
#[cfg(feature = "sfp-internal")]
#[doc(hidden)]
pub fn max_cstr_parse_len(mut self, len: impl Into<Option<usize>>) -> Self {
self.inner = self.inner.max_cstr_parse_len(len);
self
}
}

impl<'a> Iterator for RawArrayIter<'a> {
type Item = RawResult<RawBsonRef<'a>>;

Expand Down
84 changes: 59 additions & 25 deletions src/raw/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use super::{
RawDocumentBuf,
RawIter,
RawRegexRef,
Result as RawResult,
MIN_BSON_DOCUMENT_SIZE,
};
use crate::{oid::ObjectId, spec::ElementType, Document};
Expand Down Expand Up @@ -94,7 +93,7 @@ impl RawDocument {
/// let doc = RawDocument::from_bytes(b"\x05\0\0\0\0")?;
/// # Ok::<(), bson::error::Error>(())
/// ```
pub fn from_bytes<D: AsRef<[u8]> + ?Sized>(data: &D) -> RawResult<&RawDocument> {
pub fn from_bytes<D: AsRef<[u8]> + ?Sized>(data: &D) -> Result<&RawDocument> {
let data = data.as_ref();

if data.len() < 5 {
Expand Down Expand Up @@ -145,7 +144,7 @@ impl RawDocument {
/// assert!(doc.get("unknown")?.is_none());
/// # Ok::<(), Error>(())
/// ```
pub fn get(&self, key: impl AsRef<str>) -> RawResult<Option<RawBsonRef<'_>>> {
pub fn get(&self, key: impl AsRef<str>) -> Result<Option<RawBsonRef<'_>>> {
for elem in RawIter::new(self) {
let elem = elem?;
if key.as_ref() == elem.key().as_str() {
Expand All @@ -155,6 +154,24 @@ impl RawDocument {
Ok(None)
}

/// Gets a reference to the value corresponding to the given key by iterating until the key is
/// found. Returns an error if a cstring is encountered that exceeds the provided `len`.
#[cfg(feature = "sfp-internal")]
#[doc(hidden)]
pub fn get_with_max_cstr_parse_len(
&self,
key: impl AsRef<str>,
len: usize,
) -> Result<Option<RawBsonRef<'_>>> {
for elem in RawIter::new(self).max_cstr_parse_len(len) {
let elem = elem?;
if key.as_ref() == elem.key().as_str() {
return Ok(Some(elem.try_into()?));
}
}
Ok(None)
}

/// Gets an iterator over the elements in the [`RawDocument`] that yields
/// `Result<(&str, RawBson<'_>)>`.
pub fn iter(&self) -> Iter<'_> {
Expand Down Expand Up @@ -479,24 +496,41 @@ impl RawDocument {
self.as_bytes().len() == MIN_BSON_DOCUMENT_SIZE as usize
}

pub(crate) fn cstring_bytes_at(&self, start_at: usize) -> RawResult<&[u8]> {
let buf = &self.as_bytes()[start_at..];

let mut splits = buf.splitn(2, |x| *x == 0);
let value = splits
.next()
.ok_or_else(|| RawError::malformed_bytes("no value"))?;
if splits.next().is_some() {
Ok(value)
} else {
Err(RawError::malformed_bytes("expected null terminator"))
}
}

pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&CStr> {
let bytes = self.cstring_bytes_at(start_at)?;
let s = try_to_str(bytes)?;
s.try_into()
pub(crate) fn cstring_bytes_at(
&self,
start_at: usize,
max_parse_len: Option<usize>,
) -> Result<&[u8]> {
let data = &self.data;
let end = max_parse_len
.map(|len| std::cmp::min(start_at + len + 1, data.len()))
.unwrap_or(data.len());
let buf = &data[start_at..end];

let Some(index) = buf.iter().position(|b| *b == 0) else {
#[cfg(feature = "sfp-internal")]
if let Some(max_parse_len) = max_parse_len {
return Err(crate::error::ErrorKind::TooLongCStr {
max_parse_len,
bytes: buf.to_vec(),
}
.into());
}
// Note: This error should never be encountered in practice because the document
// constructors validate that the last byte is 0.
return Err(Error::malformed_bytes("missing null terminator"));
};
Ok(&buf[..index])
}

pub(crate) fn read_cstring_at(
&self,
start_at: usize,
max_parse_len: Option<usize>,
) -> Result<&CStr> {
let bytes = self.cstring_bytes_at(start_at, max_parse_len)?;
let str = try_to_str(bytes)?;
str.try_into()
}
}

Expand Down Expand Up @@ -580,7 +614,7 @@ impl<'a> From<&'a RawDocument> for Cow<'a, RawDocument> {
impl TryFrom<&RawDocument> for Document {
type Error = RawError;

fn try_from(rawdoc: &RawDocument) -> RawResult<Document> {
fn try_from(rawdoc: &RawDocument) -> Result<Document> {
rawdoc
.into_iter()
.map(|res| res.and_then(|(k, v)| Ok((k.as_str().to_owned(), v.try_into()?))))
Expand All @@ -591,7 +625,7 @@ impl TryFrom<&RawDocument> for Document {
impl TryFrom<&RawDocument> for Utf8Lossy<Document> {
type Error = RawError;

fn try_from(rawdoc: &RawDocument) -> RawResult<Utf8Lossy<Document>> {
fn try_from(rawdoc: &RawDocument) -> Result<Utf8Lossy<Document>> {
let mut out = Document::new();
for elem in rawdoc.iter_elements() {
let elem = elem?;
Expand All @@ -602,7 +636,7 @@ impl TryFrom<&RawDocument> for Utf8Lossy<Document> {
}
}

fn deep_utf8_lossy(src: RawBson) -> RawResult<Bson> {
fn deep_utf8_lossy(src: RawBson) -> Result<Bson> {
match src {
RawBson::Array(arr) => {
let mut tmp = vec![];
Expand Down Expand Up @@ -674,7 +708,7 @@ impl TryFrom<&RawDocumentBuf> for Utf8Lossy<Document> {

impl<'a> IntoIterator for &'a RawDocument {
type IntoIter = Iter<'a>;
type Item = RawResult<(&'a CStr, RawBsonRef<'a>)>;
type Item = Result<(&'a CStr, RawBsonRef<'a>)>;

fn into_iter(self) -> Iter<'a> {
self.iter()
Expand Down
46 changes: 36 additions & 10 deletions src/raw/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ use super::{
RawDocument,
};

/// An iterator over the document's entries.
/// An iterator over the key-value pairs in a document. Construct by calling [`RawDocument::iter`]
/// or [`RawDocumentBuf::iter`](crate::RawDocumentBuf::iter).
pub struct Iter<'a> {
inner: RawIter<'a>,
}
Expand All @@ -41,6 +42,15 @@ impl<'a> Iter<'a> {
inner: RawIter::new(doc),
}
}

/// The maximum number of bytes the iterator should parse when searching for the null-terminator
/// for a cstring.
#[cfg(feature = "sfp-internal")]
#[doc(hidden)]
pub fn max_cstr_parse_len(mut self, len: impl Into<Option<usize>>) -> Self {
self.inner = self.inner.max_cstr_parse_len(len);
self
}
}

impl<'a> Iterator for Iter<'a> {
Expand All @@ -58,11 +68,12 @@ impl<'a> Iterator for Iter<'a> {
}
}

/// An iterator over the document's elements.
/// An iterator over the elements in a document. Construct by calling [`RawDocument::iter_elements`]
/// or [`RawDocumentBuf::iter_elements`](crate::RawDocumentBuf::iter_elements).
pub struct RawIter<'a> {
doc: &'a RawDocument,
offset: usize,

max_cstr_parse_len: Option<usize>,
/// Whether the underlying doc is assumed to be valid or if an error has been encountered.
/// After an error, all subsequent iterations will return None.
valid: bool,
Expand All @@ -73,10 +84,20 @@ impl<'a> RawIter<'a> {
Self {
doc,
offset: 4,
max_cstr_parse_len: None,
valid: true,
}
}

/// The maximum number of bytes the iterator should parse when searching for the null-terminator
/// for a cstring.
#[cfg(feature = "sfp-internal")]
#[doc(hidden)]
pub fn max_cstr_parse_len(mut self, len: impl Into<Option<usize>>) -> Self {
self.max_cstr_parse_len = len.into();
self
}

fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> {
let end = checked_add(start, num_bytes)?;
if self.doc.as_bytes().get(start..end).is_none() {
Expand Down Expand Up @@ -212,12 +233,12 @@ impl<'a> RawElement<'a> {
id: self.get_oid_at(self.start_at + (self.size - 12))?,
}),
ElementType::RegularExpression => {
let pattern = self.doc.read_cstring_at(self.start_at)?;
let pattern = self.doc.read_cstring_at(self.start_at, None)?;
RawBsonRef::RegularExpression(RawRegexRef {
pattern,
options: self
.doc
.read_cstring_at(self.start_at + pattern.len() + 1)?,
.read_cstring_at(self.start_at + pattern.len() + 1, None)?,
})
}
ElementType::Timestamp => RawBsonRef::Timestamp({
Expand Down Expand Up @@ -317,12 +338,14 @@ impl<'a> RawElement<'a> {
}),
ElementType::RegularExpression => {
let pattern =
String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at)?).into_owned();
String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at, None)?)
.into_owned();
let pattern_len = pattern.len();
Utf8LossyBson::RegularExpression(crate::Regex {
pattern: pattern.try_into()?,
options: String::from_utf8_lossy(
self.doc.cstring_bytes_at(self.start_at + pattern_len + 1)?,
self.doc
.cstring_bytes_at(self.start_at + pattern_len + 1, None)?,
)
.into_owned()
.try_into()?,
Expand Down Expand Up @@ -404,8 +427,8 @@ impl RawIter<'_> {
ElementType::Array => self.next_document_len(offset)?,
ElementType::Binary => self.get_next_length_at(offset)? + 4 + 1,
ElementType::RegularExpression => {
let pattern = self.doc.read_cstring_at(offset)?;
let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?;
let pattern = self.doc.read_cstring_at(offset, None)?;
let options = self.doc.read_cstring_at(offset + pattern.len() + 1, None)?;
pattern.len() + 1 + options.len() + 1
}
ElementType::DbPointer => read_len(&self.doc.as_bytes()[offset..])? + 12,
Expand Down Expand Up @@ -440,7 +463,10 @@ impl<'a> Iterator for RawIter<'a> {
return Some(Err(Error::malformed_bytes("iteration overflowed document")));
}

let key = match self.doc.read_cstring_at(self.offset + 1) {
let key = match self
.doc
.read_cstring_at(self.offset + 1, self.max_cstr_parse_len)
{
Ok(k) => k,
Err(e) => {
self.valid = false;
Expand Down
Loading