|
7 | 7 | import pytest |
8 | 8 | from nested_pandas import read_parquet |
9 | 9 | from nested_pandas.datasets import generate_data |
| 10 | +from nested_pandas.nestedframe.io import from_pyarrow |
10 | 11 | from pandas.testing import assert_frame_equal |
11 | 12 | from upath import UPath |
12 | 13 |
|
@@ -221,6 +222,42 @@ def test_read_parquet_test_mixed_struct(): |
221 | 222 | assert len(nf.nested_columns) == 0 |
222 | 223 |
|
223 | 224 |
|
| 225 | +def test_from_pyarrow_test_mixed_struct(): |
| 226 | + """Test reading a pyarrow table with mixed struct types""" |
| 227 | + # Create the pure-list StructArray |
| 228 | + field1 = pa.array([[1, 2], [3, 4], [5, 6]]) |
| 229 | + field2 = pa.array([["a", "b"], ["b", "c"], ["c", "d"]]) |
| 230 | + field3 = pa.array([[True, False], [True, False], [True, False]]) |
| 231 | + struct_array_list = pa.StructArray.from_arrays([field1, field2, field3], ["list1", "list2", "list3"]) |
| 232 | + |
| 233 | + # Create the value StructArray |
| 234 | + field1 = pa.array([1, 2, 3]) |
| 235 | + field2 = pa.array(["a", "b", "c"]) |
| 236 | + field3 = pa.array([True, False, True]) |
| 237 | + struct_array_val = pa.StructArray.from_arrays([field1, field2, field3], ["val1", "va12", "val3"]) |
| 238 | + |
| 239 | + # Create the mixed-list StructArray |
| 240 | + field1 = pa.array([1, 2, 3]) |
| 241 | + field2 = pa.array(["a", "b", "c"]) |
| 242 | + field3 = pa.array([[True, False], [True, False], [True, False]]) |
| 243 | + struct_array_mix = pa.StructArray.from_arrays([field1, field2, field3], ["val1", "va12", "list3"]) |
| 244 | + |
| 245 | + # Create a PyArrow Table with the StructArray as one of the columns |
| 246 | + table = pa.table( |
| 247 | + { |
| 248 | + "id": pa.array([100, 101, 102]), # Another column |
| 249 | + "struct_list": struct_array_list, # Struct column |
| 250 | + "struct_value": struct_array_val, |
| 251 | + "struct_mix": struct_array_mix, |
| 252 | + } |
| 253 | + ) |
| 254 | + |
| 255 | + # Test full read |
| 256 | + nf = from_pyarrow(table) |
| 257 | + assert nf.columns.tolist() == ["id", "struct_list", "struct_value", "struct_mix"] |
| 258 | + assert nf.nested_columns == ["struct_list"] |
| 259 | + |
| 260 | + |
224 | 261 | def test_to_parquet(): |
225 | 262 | """Test writing a parquet file with no columns specified""" |
226 | 263 | # Load in the example file |
|
0 commit comments