Skip to content

Commit 7c5302d

Browse files
committed
feat: add SnapshotUpdate interface for snapshot-producing operations
Add SnapshotUpdate<Derived> interface that extends PendingUpdateTyped<Snapshot> to provide common methods for all updates that create a new table snapshot. This follows the Java Iceberg API pattern where SnapshotUpdate<ThisT> provides a fluent API for operations like AppendFiles, DeleteFiles, and OverwriteFiles. Methods implemented: - Set(): Set summary properties on the snapshot - StageOnly(): Stage snapshot without updating table's current snapshot - DeleteWith(): Set custom file deletion callback for tracking or custom retention - ToBranch(): Commit snapshot to a specific branch (for WAP workflows) Method deferred: - ScanManifestsWith(): Deferred until executor/thread pool infrastructure is available in the codebase. Documented in class comments for future addition. Key features: - Uses CRTP pattern for type-safe fluent API and method chaining - Extends PendingUpdateTyped<Snapshot> to inherit Apply() and Commit() - Protected members allow derived classes to access state - All methods return Derived& for method chaining Testing approach: - Tests verify behavior through public API only (Apply/Commit) - Avoids exposing internal state (no getter methods for protected members) - Mock implementation returns Snapshot with configured properties - 11 comprehensive test cases covering all methods and error paths This interface will be extended by concrete snapshot operations like: - AppendFiles: Add new data files to the table - DeleteFiles: Remove data files from the table - OverwriteFiles: Replace data files in the table - RewriteFiles: Compact and optimize data files
1 parent 3855012 commit 7c5302d

File tree

4 files changed

+359
-0
lines changed

4 files changed

+359
-0
lines changed

src/iceberg/snapshot_update.h

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/snapshot_update.h
23+
/// API for table updates that produce snapshots
24+
25+
#include <functional>
26+
#include <optional>
27+
#include <string>
28+
#include <string_view>
29+
#include <unordered_map>
30+
31+
#include "iceberg/iceberg_export.h"
32+
#include "iceberg/pending_update.h"
33+
#include "iceberg/type_fwd.h"
34+
35+
namespace iceberg {
36+
37+
/// \brief Interface for updates that produce a new table snapshot
38+
///
39+
/// SnapshotUpdate extends PendingUpdate to provide common methods for all
40+
/// updates that create a new table Snapshot. Implementations include operations
41+
/// like AppendFiles, DeleteFiles, OverwriteFiles, and RewriteFiles.
42+
///
43+
/// This interface uses CRTP (Curiously Recurring Template Pattern) to enable
44+
/// fluent API method chaining in derived classes, matching the Java pattern
45+
/// where SnapshotUpdate<ThisT> allows methods to return the actual derived type.
46+
///
47+
/// Methods included from Java API:
48+
/// - Set(): Set summary properties
49+
/// - StageOnly(): Stage without updating current snapshot
50+
/// - DeleteWith(): Custom file deletion callback
51+
/// - ToBranch(): Commit to a specific branch
52+
///
53+
/// Methods deferred (will be added when infrastructure is available):
54+
/// - ScanManifestsWith(): Custom executor for parallel manifest scanning
55+
/// (requires executor/thread pool infrastructure)
56+
///
57+
/// \tparam Derived The actual implementation class (e.g., AppendFiles)
58+
template <typename Derived>
59+
class ICEBERG_EXPORT SnapshotUpdate : public PendingUpdateTyped<Snapshot> {
60+
public:
61+
~SnapshotUpdate() override = default;
62+
63+
/// \brief Set a summary property on the snapshot
64+
///
65+
/// Summary properties provide metadata about the changes in the snapshot,
66+
/// such as the operation type, number of files added/deleted, etc.
67+
///
68+
/// \param property The property name
69+
/// \param value The property value
70+
/// \return Reference to derived class for method chaining
71+
Derived& Set(std::string_view property, std::string_view value) {
72+
summary_[std::string(property)] = std::string(value);
73+
return static_cast<Derived&>(*this);
74+
}
75+
76+
/// \brief Stage the snapshot without updating the table's current snapshot
77+
///
78+
/// When StageOnly() is called, the snapshot will be committed to table metadata
79+
/// but will not update the current snapshot ID. The snapshot will not be added
80+
/// to the table's snapshot log. This is useful for creating wap branches or
81+
/// validating changes before making them current.
82+
///
83+
/// \return Reference to derived class for method chaining
84+
Derived& StageOnly() {
85+
stage_only_ = true;
86+
return static_cast<Derived&>(*this);
87+
}
88+
89+
/// \brief Set a custom file deletion callback
90+
///
91+
/// By default, files are deleted using the table's FileIO implementation.
92+
/// This method allows providing a custom deletion callback for use cases like:
93+
/// - Tracking deleted files for auditing
94+
/// - Implementing custom retention policies
95+
/// - Delegating deletion to external systems
96+
///
97+
/// \param delete_func Callback function that will be called for each file to delete
98+
/// \return Reference to derived class for method chaining
99+
Derived& DeleteWith(std::function<void(std::string_view)> delete_func) {
100+
delete_func_ = std::move(delete_func);
101+
return static_cast<Derived&>(*this);
102+
}
103+
104+
/// \brief Commit the snapshot to a specific branch
105+
///
106+
/// By default, snapshots are committed to the table's main branch.
107+
/// This method allows committing to a named branch instead, which is useful for:
108+
/// - Write-Audit-Publish (WAP) workflows
109+
/// - Feature branch development
110+
/// - Testing changes before merging to main
111+
///
112+
/// \param branch The name of the branch to commit to
113+
/// \return Reference to derived class for method chaining
114+
Derived& ToBranch(std::string_view branch) {
115+
target_branch_ = std::string(branch);
116+
return static_cast<Derived&>(*this);
117+
}
118+
119+
protected:
120+
SnapshotUpdate() = default;
121+
122+
/// \brief Summary properties to set on the snapshot
123+
std::unordered_map<std::string, std::string> summary_;
124+
125+
/// \brief Whether to stage only without updating current snapshot
126+
bool stage_only_ = false;
127+
128+
/// \brief Custom file deletion callback
129+
std::optional<std::function<void(std::string_view)>> delete_func_;
130+
131+
/// \brief Target branch name for commit (nullopt means main branch)
132+
std::optional<std::string> target_branch_;
133+
};
134+
135+
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ add_iceberg_test(table_test
8282
json_internal_test.cc
8383
pending_update_test.cc
8484
schema_json_test.cc
85+
snapshot_update_test.cc
8586
table_test.cc
8687
table_metadata_builder_test.cc
8788
table_requirement_test.cc
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/snapshot_update.h"
21+
22+
#include <gtest/gtest.h>
23+
24+
#include "iceberg/result.h"
25+
#include "iceberg/snapshot.h"
26+
#include "iceberg/test/matchers.h"
27+
28+
namespace iceberg {
29+
30+
// Mock implementation of SnapshotUpdate for testing
31+
// This mock tracks which methods were called to verify behavior
32+
class MockSnapshotUpdate : public SnapshotUpdate<MockSnapshotUpdate> {
33+
public:
34+
MockSnapshotUpdate() = default;
35+
36+
Result<Snapshot> Apply() override {
37+
if (should_fail_) {
38+
return ValidationFailed("Mock validation failed");
39+
}
40+
apply_called_ = true;
41+
42+
// Return a Snapshot that reflects the configuration set via builder methods
43+
// This allows us to verify behavior through the public API
44+
return Snapshot{
45+
.snapshot_id = 1,
46+
.parent_snapshot_id = std::nullopt,
47+
.sequence_number = 1,
48+
.timestamp_ms = TimePointMs{std::chrono::milliseconds{1000}},
49+
.manifest_list = "s3://bucket/metadata/snap-1-manifest-list.avro",
50+
.summary = summary_, // Summary is populated by Set() calls
51+
.schema_id = std::nullopt,
52+
};
53+
}
54+
55+
Status Commit() override {
56+
if (should_fail_commit_) {
57+
return CommitFailed("Mock commit failed");
58+
}
59+
commit_called_ = true;
60+
61+
// Simulate file deletion if callback is set
62+
if (delete_func_) {
63+
// In a real implementation, this would delete actual files
64+
// For testing, just call the callback
65+
(*delete_func_)("test-file-to-delete.parquet");
66+
}
67+
68+
return {};
69+
}
70+
71+
void SetShouldFail(bool fail) { should_fail_ = fail; }
72+
void SetShouldFailCommit(bool fail) { should_fail_commit_ = fail; }
73+
bool ApplyCalled() const { return apply_called_; }
74+
bool CommitCalled() const { return commit_called_; }
75+
76+
private:
77+
bool should_fail_ = false;
78+
bool should_fail_commit_ = false;
79+
bool apply_called_ = false;
80+
bool commit_called_ = false;
81+
};
82+
83+
TEST(SnapshotUpdateTest, SetSummaryProperty) {
84+
MockSnapshotUpdate update;
85+
update.Set("operation", "append");
86+
87+
// Verify through public API: the snapshot from Apply() should have the summary
88+
auto result = update.Apply();
89+
ASSERT_THAT(result, IsOk());
90+
91+
const auto& snapshot = result.value();
92+
EXPECT_EQ(snapshot.summary.size(), 1);
93+
EXPECT_EQ(snapshot.summary.at("operation"), "append");
94+
}
95+
96+
TEST(SnapshotUpdateTest, SetMultipleSummaryProperties) {
97+
MockSnapshotUpdate update;
98+
update.Set("operation", "append").Set("added-files-count", "5");
99+
100+
// Verify through public API
101+
auto result = update.Apply();
102+
ASSERT_THAT(result, IsOk());
103+
104+
const auto& snapshot = result.value();
105+
EXPECT_EQ(snapshot.summary.size(), 2);
106+
EXPECT_EQ(snapshot.summary.at("operation"), "append");
107+
EXPECT_EQ(snapshot.summary.at("added-files-count"), "5");
108+
}
109+
110+
TEST(SnapshotUpdateTest, DeleteWith) {
111+
MockSnapshotUpdate update;
112+
std::vector<std::string> deleted_files;
113+
114+
// Set up callback to track deleted files
115+
update.DeleteWith([&deleted_files](std::string_view path) {
116+
deleted_files.push_back(std::string(path));
117+
});
118+
119+
// Verify through public API: calling Commit() should invoke the callback
120+
auto status = update.Commit();
121+
EXPECT_THAT(status, IsOk());
122+
123+
// The mock implementation calls the delete callback with a test file
124+
EXPECT_EQ(deleted_files.size(), 1);
125+
EXPECT_EQ(deleted_files[0], "test-file-to-delete.parquet");
126+
}
127+
128+
TEST(SnapshotUpdateTest, MethodChaining) {
129+
MockSnapshotUpdate update;
130+
131+
// Test that all methods return the derived type for chaining
132+
update.Set("operation", "append")
133+
.Set("added-files-count", "5")
134+
.Set("added-records", "1000")
135+
.StageOnly();
136+
137+
// Verify through public API
138+
auto result = update.Apply();
139+
ASSERT_THAT(result, IsOk());
140+
141+
const auto& snapshot = result.value();
142+
EXPECT_EQ(snapshot.summary.size(), 3);
143+
EXPECT_EQ(snapshot.summary.at("operation"), "append");
144+
EXPECT_EQ(snapshot.summary.at("added-files-count"), "5");
145+
EXPECT_EQ(snapshot.summary.at("added-records"), "1000");
146+
}
147+
148+
TEST(SnapshotUpdateTest, MethodChainingWithAllMethods) {
149+
MockSnapshotUpdate update;
150+
std::vector<std::string> deleted_files;
151+
152+
// Chain all builder methods together
153+
update.Set("operation", "append")
154+
.Set("added-files-count", "5")
155+
.DeleteWith([&deleted_files](std::string_view path) {
156+
deleted_files.push_back(std::string(path));
157+
})
158+
.ToBranch("wap-branch")
159+
.StageOnly();
160+
161+
// Verify through Apply()
162+
auto result = update.Apply();
163+
ASSERT_THAT(result, IsOk());
164+
165+
const auto& snapshot = result.value();
166+
EXPECT_EQ(snapshot.summary.at("operation"), "append");
167+
EXPECT_EQ(snapshot.summary.at("added-files-count"), "5");
168+
169+
// Verify through Commit()
170+
auto status = update.Commit();
171+
EXPECT_THAT(status, IsOk());
172+
EXPECT_EQ(deleted_files.size(), 1);
173+
}
174+
175+
TEST(SnapshotUpdateTest, ApplySuccess) {
176+
MockSnapshotUpdate update;
177+
auto result = update.Apply();
178+
EXPECT_THAT(result, IsOk());
179+
EXPECT_TRUE(update.ApplyCalled());
180+
}
181+
182+
TEST(SnapshotUpdateTest, ApplyValidationFailed) {
183+
MockSnapshotUpdate update;
184+
update.SetShouldFail(true);
185+
auto result = update.Apply();
186+
EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed));
187+
EXPECT_THAT(result, HasErrorMessage("Mock validation failed"));
188+
}
189+
190+
TEST(SnapshotUpdateTest, CommitSuccess) {
191+
MockSnapshotUpdate update;
192+
auto status = update.Commit();
193+
EXPECT_THAT(status, IsOk());
194+
EXPECT_TRUE(update.CommitCalled());
195+
}
196+
197+
TEST(SnapshotUpdateTest, CommitFailed) {
198+
MockSnapshotUpdate update;
199+
update.SetShouldFailCommit(true);
200+
auto status = update.Commit();
201+
EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed));
202+
EXPECT_THAT(status, HasErrorMessage("Mock commit failed"));
203+
}
204+
205+
TEST(SnapshotUpdateTest, InheritanceFromPendingUpdate) {
206+
std::unique_ptr<PendingUpdate> base_ptr = std::make_unique<MockSnapshotUpdate>();
207+
auto status = base_ptr->Commit();
208+
EXPECT_THAT(status, IsOk());
209+
}
210+
211+
TEST(SnapshotUpdateTest, InheritanceFromPendingUpdateTyped) {
212+
std::unique_ptr<PendingUpdateTyped<Snapshot>> typed_ptr =
213+
std::make_unique<MockSnapshotUpdate>();
214+
auto status = typed_ptr->Commit();
215+
EXPECT_THAT(status, IsOk());
216+
217+
auto result = typed_ptr->Apply();
218+
EXPECT_THAT(result, IsOk());
219+
}
220+
221+
} // namespace iceberg

src/iceberg/type_fwd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ class TableUpdateContext;
159159
class PendingUpdate;
160160
template <typename T>
161161
class PendingUpdateTyped;
162+
template <typename Derived>
163+
class SnapshotUpdate;
162164

163165
/// ----------------------------------------------------------------------------
164166
/// TODO: Forward declarations below are not added yet.

0 commit comments

Comments
 (0)