99from upath import UPath
1010
1111from ..series .dtype import NestedDtype
12+ from ..series .packer import pack_lists
1213from ..series .utils import table_to_struct_array
1314from .core import NestedFrame
1415
@@ -17,6 +18,7 @@ def read_parquet(
1718 data : str | UPath | bytes ,
1819 columns : list [str ] | None = None ,
1920 reject_nesting : list [str ] | str | None = None ,
21+ autocast_list : bool = False ,
2022 ** kwargs ,
2123) -> NestedFrame :
2224 """
@@ -39,6 +41,8 @@ def read_parquet(
3941 is castable to a nested column. However, this assumption is invalid if
4042 the lists within the struct have mismatched lengths for any given item.
4143 Columns specified here will be read using the corresponding pandas.ArrowDtype.
44+ autocast_list: bool, default=True
45+ If True, automatically cast list columns to nested columns with NestedDType.
4246 kwargs: dict
4347 Keyword arguments passed to `pyarrow.parquet.read_table`
4448
@@ -152,12 +156,13 @@ def read_parquet(
152156 for col , struct in structs .items ():
153157 table = table .append_column (col , struct )
154158
155- return from_pyarrow (table , reject_nesting = reject_nesting )
159+ return from_pyarrow (table , reject_nesting = reject_nesting , autocast_list = autocast_list )
156160
157161
158162def from_pyarrow (
159163 table : pa .Table ,
160164 reject_nesting : list [str ] | str | None = None ,
165+ autocast_list : bool = False ,
161166) -> NestedFrame :
162167 """
163168 Load a pyarrow Table object into a NestedFrame.
@@ -172,6 +177,8 @@ def from_pyarrow(
172177 is castable to a nested column. However, this assumption is invalid if
173178 the lists within the struct have mismatched lengths for any given item.
174179 Columns specified here will be read using the corresponding pandas.ArrowDtype.
180+ autocast_list: bool, default=False
181+ If True, automatically cast list columns to nested columns with NestedDType.
175182
176183 Returns
177184 -------
@@ -191,6 +198,10 @@ def from_pyarrow(
191198 # Attempt to cast struct columns to NestedDTypes
192199 df = _cast_struct_cols_to_nested (df , reject_nesting )
193200
201+ # If autocast_list is True, cast list columns to NestedDTypes
202+ if autocast_list :
203+ df = _cast_list_cols_to_nested (df )
204+
194205 return df
195206
196207
@@ -221,3 +232,11 @@ def _cast_struct_cols_to_nested(df, reject_nesting):
221232 f" read_parquet(..., reject_nesting=['{ col } '])"
222233 ) from err
223234 return df
235+
236+
237+ def _cast_list_cols_to_nested (df ):
238+ """cast list columns to nested dtype"""
239+ for col , dtype in df .dtypes .items ():
240+ if pa .types .is_list (dtype .pyarrow_dtype ):
241+ df [col ] = pack_lists (df [[col ]])
242+ return df
0 commit comments