-
Notifications
You must be signed in to change notification settings - Fork 151
feat: Snapshot::try_new_from() API
#549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
f8bc074
3d37288
5480711
2686fb3
0daf1e9
692c9d5
f18e380
8d3357b
3bf3d67
453db1b
ce31322
f1578fb
ee61b75
fea0f76
81f61ae
7b0bd1c
691b23a
27c4a95
66e44d2
46ab944
5a582d6
592667b
75b6178
5cdde76
a94ddef
e8e327d
d5bcb67
be88b88
d396953
6e49bc2
032d72b
cb06927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,7 +53,8 @@ impl Snapshot { | |
| /// | ||
| /// - `table_root`: url pointing at the table root (where `_delta_log` folder is located) | ||
| /// - `engine`: Implementation of [`Engine`] apis. | ||
| /// - `version`: target version of the [`Snapshot`] | ||
| /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest | ||
| /// version of the table. | ||
| pub fn try_new( | ||
| table_root: Url, | ||
| engine: &dyn Engine, | ||
|
|
@@ -71,6 +72,26 @@ impl Snapshot { | |
| Self::try_new_from_log_segment(table_root, log_segment, engine) | ||
| } | ||
|
|
||
| /// Create a new [`Snapshot`] instance from an existing [`Snapshot`]. This is useful when you | ||
| /// already have a [`Snapshot`] lying around and want to do the minimal work to 'update' the | ||
| /// snapshot to a later version. | ||
| /// | ||
| /// # Parameters | ||
| /// | ||
| /// - `existing_snapshot`: reference to an existing [`Snapshot`] | ||
| /// - `engine`: Implementation of [`Engine`] apis. | ||
| /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest | ||
| /// version of the table. | ||
| pub fn new_from( | ||
| existing_snapshot: &Snapshot, | ||
| engine: &dyn Engine, | ||
| version: Option<Version>, | ||
| ) -> DeltaResult<Self> { | ||
|
||
| // TODO(zach): for now we just pass through to the old API. We should instead optimize this | ||
| // to avoid replaying overlapping LogSegments. | ||
| Self::try_new(existing_snapshot.table_root.clone(), engine, version) | ||
| } | ||
|
|
||
| /// Create a new [`Snapshot`] instance. | ||
| pub(crate) fn try_new_from_log_segment( | ||
| location: Url, | ||
|
|
@@ -250,6 +271,25 @@ mod tests { | |
| assert_eq!(snapshot.schema(), &expected); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_snapshot_new_from() { | ||
| let path = | ||
| std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap(); | ||
| let url = url::Url::from_directory_path(path).unwrap(); | ||
|
|
||
| let engine = SyncEngine::new(); | ||
| let old_snapshot = Snapshot::try_new(url, &engine, Some(0)).unwrap(); | ||
| let snapshot = Snapshot::new_from(&old_snapshot, &engine, Some(0)).unwrap(); | ||
|
|
||
| let expected = | ||
| Protocol::try_new(3, 7, Some(["deletionVectors"]), Some(["deletionVectors"])).unwrap(); | ||
| assert_eq!(snapshot.protocol(), &expected); | ||
|
|
||
| let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#; | ||
| let expected: StructType = serde_json::from_str(schema_string).unwrap(); | ||
| assert_eq!(snapshot.schema(), &expected); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_read_table_with_last_checkpoint() { | ||
| let path = std::fs::canonicalize(PathBuf::from( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,7 +90,7 @@ impl TableChanges { | |
| // supported for every protocol action in the CDF range. | ||
| let start_snapshot = | ||
| Snapshot::try_new(table_root.as_url().clone(), engine, Some(start_version))?; | ||
| let end_snapshot = Snapshot::try_new(table_root.as_url().clone(), engine, end_version)?; | ||
| let end_snapshot = Snapshot::new_from(&start_snapshot, engine, end_version)?; | ||
|
||
|
|
||
| // Verify CDF is enabled at the beginning and end of the interval to fail early. We must | ||
| // still check that CDF is enabled for every metadata action in the CDF range. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to clarify, is this api only for versions later than the existing snapshot?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep for now proposing that we allow old snapshot but just return a new snapshot (no incrementalization) maybe
warn!in that case? or i suppose we could disallow that..?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any valid scenario where a caller could legitimately pass a newer snapshot than the one they're asking for? I guess time travel? But if they know they're time traveling why would they pass a newer snapshot in the first place?
Either way, we should publicly document whether a too-new starting snapshot is an error or merely a useless hint, so callers don't have to wonder.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't think of any optimization that's available (let alone useful) if the caller passes in a new snapshot as the hint.
If that's true, then the question is: do we prohibit this behavior or just let it degenerate to the usual try_new the client should have done anyways?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would vote for returning an error in that case. It's unlikely the engine meant to get into that situation, so let's let them know they are doing something wrong
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated to be an error now! i agree :)