Skip to content

Commit 6ba7aa8

Browse files
committed
adopt nesteddtype validator
1 parent 9ecb7af commit 6ba7aa8

File tree

1 file changed

+21
-15
lines changed
  • src/nested_pandas/nestedframe

1 file changed

+21
-15
lines changed

src/nested_pandas/nestedframe/io.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,25 @@ def _cast_struct_cols_to_nested(df, reject_nesting):
164164
"""cast struct columns to nested dtype"""
165165
# Attempt to cast struct columns to NestedDTypes
166166
for col, dtype in df.dtypes.items():
167-
if pa.types.is_struct(dtype.pyarrow_dtype) and col not in reject_nesting:
168-
fields = dtype.pyarrow_dtype.fields
169-
if all([pa.types.is_list(field.type) for field in fields]):
170-
try:
171-
# Attempt to cast Struct to NestedDType
172-
df = df.astype({col: NestedDtype(dtype.pyarrow_dtype)})
173-
except ValueError as err:
174-
# If cast fails, the struct likely does not fit nested-pandas
175-
# criteria for a valid nested column
176-
raise ValueError(
177-
f"Column '{col}' is a Struct, but an attempt to cast it to a NestedDType failed. "
178-
"This is likely due to the struct not meeting the requirements for a nested column "
179-
"(all fields should be equal length). To proceed, you may add the column to the "
180-
"`reject_nesting` argument of the read_parquet function to skip the cast attempt."
181-
) from err
167+
# First validate the dtype
168+
# will return valueerror when not a struct-list
169+
valid_dtype = True
170+
try:
171+
NestedDtype._validate_dtype(dtype.pyarrow_dtype)
172+
except ValueError:
173+
valid_dtype = False
174+
175+
if valid_dtype and col not in reject_nesting:
176+
try:
177+
# Attempt to cast Struct to NestedDType
178+
df = df.astype({col: NestedDtype(dtype.pyarrow_dtype)})
179+
except ValueError as err:
180+
# If cast fails, the struct likely does not fit nested-pandas
181+
# criteria for a valid nested column
182+
raise ValueError(
183+
f"Column '{col}' is a Struct, but an attempt to cast it to a NestedDType failed. "
184+
"This is likely due to the struct not meeting the requirements for a nested column "
185+
"(all fields should be equal length). To proceed, you may add the column to the "
186+
"`reject_nesting` argument of the read_parquet function to skip the cast attempt."
187+
) from err
182188
return df

0 commit comments

Comments
 (0)