Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend DocumentDeserialize with a stateful variant DocumentDeserializeSeed #2362

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions src/core/searcher.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::collections::BTreeMap;
use std::marker::PhantomData;
use std::sync::Arc;
use std::{fmt, io};

use crate::collector::Collector;
use crate::core::Executor;
use crate::index::{SegmentId, SegmentReader};
use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
use crate::schema::document::DocumentDeserialize;
use crate::schema::document::{DocumentDeserialize, DocumentDeserializeSeed};
use crate::schema::{Schema, Term};
use crate::space_usage::SearcherSpaceUsage;
use crate::store::{CacheStats, StoreReader};
Expand Down Expand Up @@ -86,8 +87,17 @@ impl Searcher {
/// The searcher uses the segment ordinal to route the
/// request to the right `Segment`.
pub fn doc<D: DocumentDeserialize>(&self, doc_address: DocAddress) -> crate::Result<D> {
self.doc_seed(doc_address, PhantomData)
}

/// A stateful variant of [`doc`][Self::doc].`
pub fn doc_seed<T: DocumentDeserializeSeed>(
&self,
doc_address: DocAddress,
seed: T,
) -> crate::Result<T::Value> {
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
store_reader.get(doc_address.doc_id)
store_reader.get_seed(doc_address.doc_id, seed)
}

/// The cache stats for the underlying store reader.
Expand All @@ -109,9 +119,21 @@ impl Searcher {
&self,
doc_address: DocAddress,
) -> crate::Result<D> {
self.doc_async_seed(doc_address, PhantomData).await
}

#[cfg(feature = "quickwit")]
/// A stateful variant of [`doc_async`][Self::doc_async].
pub async fn doc_async_seed<T: DocumentDeserializeSeed>(
&self,
doc_address: DocAddress,
seed: T,
) -> crate::Result<T::Value> {
let executor = self.inner.index.search_executor();
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
store_reader.get_async(doc_address.doc_id, executor).await
store_reader
.get_async_seed(doc_address.doc_id, executor, seed)
.await
}

/// Access the schema associated with the index of this searcher.
Expand Down
22 changes: 22 additions & 0 deletions src/schema/document/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,28 @@ pub trait DocumentDeserialize: Sized {
where D: DocumentDeserializer<'de>;
}

/// A stateful extension of [`DocumentDeserialize`].
pub trait DocumentDeserializeSeed: Sized {
/// The type produced by using this seed.
type Value;

/// Attempts to deserialize `Self::Value` from the given `seed` and `deserializer`.
fn deserialize<'de, D>(self, deserializer: D) -> Result<Self::Value, DeserializeError>
where D: DocumentDeserializer<'de>;
}

impl<T> DocumentDeserializeSeed for PhantomData<T>
where T: DocumentDeserialize
{
/// The type produced by using this seed.
type Value = T;

fn deserialize<'de, D>(self, deserializer: D) -> Result<Self::Value, DeserializeError>
where D: DocumentDeserializer<'de> {
<T as DocumentDeserialize>::deserialize(deserializer)
}
}

/// A deserializer that can walk through each entry in the document.
pub trait DocumentDeserializer<'de> {
/// A indicator as to how many values are in the document.
Expand Down
4 changes: 2 additions & 2 deletions src/schema/document/default_document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ impl<'a> Iterator for CompactDocObjectIter<'a> {
container: self.container,
value,
};
return Some((key, value));
Some((key, value))
}
}

Expand Down Expand Up @@ -637,7 +637,7 @@ impl<'a> Iterator for CompactDocArrayIter<'a> {
container: self.container,
value,
};
return Some(value);
Some(value)
}
}

Expand Down
5 changes: 3 additions & 2 deletions src/schema/document/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,9 @@ use std::mem;

pub(crate) use self::de::BinaryDocumentDeserializer;
pub use self::de::{
ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializer, ObjectAccess,
ValueDeserialize, ValueDeserializer, ValueType, ValueVisitor,
ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializeSeed,
DocumentDeserializer, ObjectAccess, ValueDeserialize, ValueDeserializer, ValueType,
ValueVisitor,
};
pub use self::default_document::{
CompactDocArrayIter, CompactDocObjectIter, CompactDocValue, DocParsingError, TantivyDocument,
Expand Down
45 changes: 40 additions & 5 deletions src/store/reader.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::io;
use std::iter::Sum;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use std::ops::{AddAssign, Range};
use std::sync::atomic::{AtomicUsize, Ordering};
Expand All @@ -14,7 +15,9 @@ use super::Decompressor;
use crate::directory::FileSlice;
use crate::error::DataCorruption;
use crate::fastfield::AliveBitSet;
use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize};
use crate::schema::document::{
BinaryDocumentDeserializer, DocumentDeserialize, DocumentDeserializeSeed,
};
use crate::space_usage::StoreSpaceUsage;
use crate::store::index::Checkpoint;
use crate::DocId;
Expand Down Expand Up @@ -201,11 +204,21 @@ impl StoreReader {
/// It should not be called to score documents
/// for instance.
pub fn get<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
self.get_seed(doc_id, PhantomData)
}

/// A stateful version of [`get`][Self::get].
pub fn get_seed<T: DocumentDeserializeSeed>(
&self,
doc_id: DocId,
seed: T,
) -> crate::Result<T::Value> {
let mut doc_bytes = self.get_document_bytes(doc_id)?;

let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
seed.deserialize(deserializer)
.map_err(crate::TantivyError::from)
}

/// Returns raw bytes of a given document.
Expand Down Expand Up @@ -237,16 +250,27 @@ impl StoreReader {
/// Iterator over all Documents in their order as they are stored in the doc store.
/// Use this, if you want to extract all Documents from the doc store.
/// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
pub fn iter<'a: 'b, 'b, D: DocumentDeserialize>(
pub fn iter<'a: 'b, 'b, D: DocumentDeserialize + 'b>(
&'b self,
alive_bitset: Option<&'a AliveBitSet>,
) -> impl Iterator<Item = crate::Result<D>> + 'b {
self.iter_seed(alive_bitset, &PhantomData)
}

/// A stateful variant of [`iter`][Self::iter].
pub fn iter_seed<'a: 'b, 'b, T: DocumentDeserializeSeed + Clone + 'b>(
&'b self,
alive_bitset: Option<&'a AliveBitSet>,
seed: &'b T,
) -> impl Iterator<Item = crate::Result<T::Value>> + 'b {
self.iter_raw(alive_bitset).map(|doc_bytes_res| {
let mut doc_bytes = doc_bytes_res?;

let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
seed.clone()
.deserialize(deserializer)
.map_err(crate::TantivyError::from)
})
}

Expand Down Expand Up @@ -389,11 +413,22 @@ impl StoreReader {
doc_id: DocId,
executor: &Executor,
) -> crate::Result<D> {
self.get_async_seed(doc_id, executor, PhantomData).await
}

/// A stateful variant of [`get_async`][Self::get_async].
pub async fn get_async_seed<T: DocumentDeserializeSeed>(
&self,
doc_id: DocId,
executor: &Executor,
seed: T,
) -> crate::Result<T::Value> {
let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;

let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
seed.deserialize(deserializer)
.map_err(crate::TantivyError::from)
}
}

Expand Down
Loading