Skip to content

Commit 55cc6c3

Browse files
authored
fix(schema): check if new schema field names conflict with existing partition field names (#1619)
1 parent 7fde148 commit 55cc6c3

File tree

3 files changed

+732
-5
lines changed

3 files changed

+732
-5
lines changed

crates/iceberg/src/catalog/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ impl TableUpdate {
561561
pub fn apply(self, builder: TableMetadataBuilder) -> Result<TableMetadataBuilder> {
562562
match self {
563563
TableUpdate::AssignUuid { uuid } => Ok(builder.assign_uuid(uuid)),
564-
TableUpdate::AddSchema { schema, .. } => Ok(builder.add_schema(schema)),
564+
TableUpdate::AddSchema { schema, .. } => Ok(builder.add_schema(schema)?),
565565
TableUpdate::SetCurrentSchema { schema_id } => builder.set_current_schema(schema_id),
566566
TableUpdate::AddSpec { spec } => builder.add_partition_spec(spec),
567567
TableUpdate::SetDefaultSpec { spec_id } => builder.set_default_partition_spec(spec_id),

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,22 @@ impl TableMetadata {
224224
TableMetadataBuilder::new_from_metadata(self, current_file_location)
225225
}
226226

227+
/// Check if a partition field name exists in any partition spec.
228+
#[inline]
229+
pub(crate) fn partition_name_exists(&self, name: &str) -> bool {
230+
self.partition_specs
231+
.values()
232+
.any(|spec| spec.fields().iter().any(|pf| pf.name == name))
233+
}
234+
235+
/// Check if a field name exists in any schema.
236+
#[inline]
237+
pub(crate) fn name_exists_in_any_schema(&self, name: &str) -> bool {
238+
self.schemas
239+
.values()
240+
.any(|schema| schema.field_by_name(name).is_some())
241+
}
242+
227243
/// Returns format version of this metadata.
228244
#[inline]
229245
pub fn format_version(&self) -> FormatVersion {
@@ -3133,4 +3149,217 @@ mod tests {
31333149
// Verify it returns an error
31343150
assert!(result.is_err());
31353151
}
3152+
3153+
#[test]
3154+
fn test_partition_name_exists() {
3155+
let schema = Schema::builder()
3156+
.with_fields(vec![
3157+
NestedField::required(1, "data", Type::Primitive(PrimitiveType::String)).into(),
3158+
NestedField::required(2, "partition_col", Type::Primitive(PrimitiveType::Int))
3159+
.into(),
3160+
])
3161+
.build()
3162+
.unwrap();
3163+
3164+
let spec1 = PartitionSpec::builder(schema.clone())
3165+
.with_spec_id(1)
3166+
.add_partition_field("data", "data_partition", Transform::Identity)
3167+
.unwrap()
3168+
.build()
3169+
.unwrap();
3170+
3171+
let spec2 = PartitionSpec::builder(schema.clone())
3172+
.with_spec_id(2)
3173+
.add_partition_field("partition_col", "partition_bucket", Transform::Bucket(16))
3174+
.unwrap()
3175+
.build()
3176+
.unwrap();
3177+
3178+
// Build metadata with these specs
3179+
let metadata = TableMetadataBuilder::new(
3180+
schema,
3181+
spec1.clone().into_unbound(),
3182+
SortOrder::unsorted_order(),
3183+
"s3://test/location".to_string(),
3184+
FormatVersion::V2,
3185+
HashMap::new(),
3186+
)
3187+
.unwrap()
3188+
.add_partition_spec(spec2.into_unbound())
3189+
.unwrap()
3190+
.build()
3191+
.unwrap()
3192+
.metadata;
3193+
3194+
assert!(metadata.partition_name_exists("data_partition"));
3195+
assert!(metadata.partition_name_exists("partition_bucket"));
3196+
3197+
assert!(!metadata.partition_name_exists("nonexistent_field"));
3198+
assert!(!metadata.partition_name_exists("data")); // schema field name, not partition field name
3199+
assert!(!metadata.partition_name_exists(""));
3200+
}
3201+
3202+
#[test]
3203+
fn test_partition_name_exists_empty_specs() {
3204+
// Create metadata with no partition specs (unpartitioned table)
3205+
let schema = Schema::builder()
3206+
.with_fields(vec![
3207+
NestedField::required(1, "data", Type::Primitive(PrimitiveType::String)).into(),
3208+
])
3209+
.build()
3210+
.unwrap();
3211+
3212+
let metadata = TableMetadataBuilder::new(
3213+
schema,
3214+
PartitionSpec::unpartition_spec().into_unbound(),
3215+
SortOrder::unsorted_order(),
3216+
"s3://test/location".to_string(),
3217+
FormatVersion::V2,
3218+
HashMap::new(),
3219+
)
3220+
.unwrap()
3221+
.build()
3222+
.unwrap()
3223+
.metadata;
3224+
3225+
assert!(!metadata.partition_name_exists("any_field"));
3226+
assert!(!metadata.partition_name_exists("data"));
3227+
}
3228+
3229+
#[test]
3230+
fn test_name_exists_in_any_schema() {
3231+
// Create multiple schemas with different fields
3232+
let schema1 = Schema::builder()
3233+
.with_schema_id(1)
3234+
.with_fields(vec![
3235+
NestedField::required(1, "field1", Type::Primitive(PrimitiveType::String)).into(),
3236+
NestedField::required(2, "field2", Type::Primitive(PrimitiveType::Int)).into(),
3237+
])
3238+
.build()
3239+
.unwrap();
3240+
3241+
let schema2 = Schema::builder()
3242+
.with_schema_id(2)
3243+
.with_fields(vec![
3244+
NestedField::required(1, "field1", Type::Primitive(PrimitiveType::String)).into(),
3245+
NestedField::required(3, "field3", Type::Primitive(PrimitiveType::Long)).into(),
3246+
])
3247+
.build()
3248+
.unwrap();
3249+
3250+
let metadata = TableMetadataBuilder::new(
3251+
schema1,
3252+
PartitionSpec::unpartition_spec().into_unbound(),
3253+
SortOrder::unsorted_order(),
3254+
"s3://test/location".to_string(),
3255+
FormatVersion::V2,
3256+
HashMap::new(),
3257+
)
3258+
.unwrap()
3259+
.add_current_schema(schema2)
3260+
.unwrap()
3261+
.build()
3262+
.unwrap()
3263+
.metadata;
3264+
3265+
assert!(metadata.name_exists_in_any_schema("field1")); // exists in both schemas
3266+
assert!(metadata.name_exists_in_any_schema("field2")); // exists only in schema1 (historical)
3267+
assert!(metadata.name_exists_in_any_schema("field3")); // exists only in schema2 (current)
3268+
3269+
assert!(!metadata.name_exists_in_any_schema("nonexistent_field"));
3270+
assert!(!metadata.name_exists_in_any_schema("field4"));
3271+
assert!(!metadata.name_exists_in_any_schema(""));
3272+
}
3273+
3274+
#[test]
3275+
fn test_name_exists_in_any_schema_empty_schemas() {
3276+
let schema = Schema::builder().with_fields(vec![]).build().unwrap();
3277+
3278+
let metadata = TableMetadataBuilder::new(
3279+
schema,
3280+
PartitionSpec::unpartition_spec().into_unbound(),
3281+
SortOrder::unsorted_order(),
3282+
"s3://test/location".to_string(),
3283+
FormatVersion::V2,
3284+
HashMap::new(),
3285+
)
3286+
.unwrap()
3287+
.build()
3288+
.unwrap()
3289+
.metadata;
3290+
3291+
assert!(!metadata.name_exists_in_any_schema("any_field"));
3292+
}
3293+
3294+
#[test]
3295+
fn test_helper_methods_multi_version_scenario() {
3296+
// Test a realistic multi-version scenario
3297+
let initial_schema = Schema::builder()
3298+
.with_fields(vec![
3299+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(),
3300+
NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
3301+
NestedField::required(
3302+
3,
3303+
"deprecated_field",
3304+
Type::Primitive(PrimitiveType::String),
3305+
)
3306+
.into(),
3307+
])
3308+
.build()
3309+
.unwrap();
3310+
3311+
let metadata = TableMetadataBuilder::new(
3312+
initial_schema,
3313+
PartitionSpec::unpartition_spec().into_unbound(),
3314+
SortOrder::unsorted_order(),
3315+
"s3://test/location".to_string(),
3316+
FormatVersion::V2,
3317+
HashMap::new(),
3318+
)
3319+
.unwrap();
3320+
3321+
let evolved_schema = Schema::builder()
3322+
.with_fields(vec![
3323+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(),
3324+
NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
3325+
NestedField::required(
3326+
3,
3327+
"deprecated_field",
3328+
Type::Primitive(PrimitiveType::String),
3329+
)
3330+
.into(),
3331+
NestedField::required(4, "new_field", Type::Primitive(PrimitiveType::Double))
3332+
.into(),
3333+
])
3334+
.build()
3335+
.unwrap();
3336+
3337+
// Then add a third schema that removes the deprecated field
3338+
let _final_schema = Schema::builder()
3339+
.with_fields(vec![
3340+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(),
3341+
NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
3342+
NestedField::required(4, "new_field", Type::Primitive(PrimitiveType::Double))
3343+
.into(),
3344+
NestedField::required(5, "latest_field", Type::Primitive(PrimitiveType::Boolean))
3345+
.into(),
3346+
])
3347+
.build()
3348+
.unwrap();
3349+
3350+
let final_metadata = metadata
3351+
.add_current_schema(evolved_schema)
3352+
.unwrap()
3353+
.build()
3354+
.unwrap()
3355+
.metadata;
3356+
3357+
assert!(!final_metadata.partition_name_exists("nonexistent_partition")); // unpartitioned table
3358+
3359+
assert!(final_metadata.name_exists_in_any_schema("id")); // exists in both schemas
3360+
assert!(final_metadata.name_exists_in_any_schema("name")); // exists in both schemas
3361+
assert!(final_metadata.name_exists_in_any_schema("deprecated_field")); // exists in both schemas
3362+
assert!(final_metadata.name_exists_in_any_schema("new_field")); // only in current schema
3363+
assert!(!final_metadata.name_exists_in_any_schema("never_existed"));
3364+
}
31363365
}

0 commit comments

Comments
 (0)