add test for mixed struct behavior

dougbrn · dougbrn · commit 9279064c1a48 · 2025-04-17T11:06:23.000-07:00
diff --git a/tests/nested_pandas/nestedframe/test_io.py b/tests/nested_pandas/nestedframe/test_io.py
@@ -3,6 +3,7 @@
 
 import pandas as pd
 import pyarrow as pa
+import pyarrow.parquet as pq
 import pytest
 from nested_pandas import read_parquet
 from nested_pandas.datasets import generate_data
@@ -101,6 +102,51 @@ def test_read_parquet_catch_failed_cast():
         read_parquet("tests/test_data/not_nestable.parquet")
 
 
+def test_read_parquet_test_mixed_struct():
+    """Test reading a parquet file with mixed struct types"""
+    # Create the pure-list StructArray
+    field1 = pa.array([[1, 2], [3, 4], [5, 6]])
+    field2 = pa.array([["a", "b"], ["b", "c"], ["c", "d"]])
+    field3 = pa.array([[True, False], [True, False], [True, False]])
+    struct_array_list = pa.StructArray.from_arrays([field1, field2, field3], ["list1", "list2", "list3"])
+
+    # Create the value StructArray
+    field1 = pa.array([1, 2, 3])
+    field2 = pa.array(["a", "b", "c"])
+    field3 = pa.array([True, False, True])
+    struct_array_val = pa.StructArray.from_arrays([field1, field2, field3], ["val1", "va12", "val3"])
+
+    # Create the mixed-list StructArray
+    field1 = pa.array([1, 2, 3])
+    field2 = pa.array(["a", "b", "c"])
+    field3 = pa.array([[True, False], [True, False], [True, False]])
+    struct_array_mix = pa.StructArray.from_arrays([field1, field2, field3], ["val1", "va12", "list3"])
+
+    # Create a PyArrow Table with the StructArray as one of the columns
+    table = pa.table(
+        {
+            "id": pa.array([100, 101, 102]),  # Another column
+            "struct_list": struct_array_list,  # Struct column
+            "struct_value": struct_array_val,
+            "struct_mix": struct_array_mix,
+        }
+    )
+
+    # Write to a temporary file
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pq.write_table(table, os.path.join(tmpdir, "structs.parquet"))
+
+        # Test full read
+        nf = read_parquet(os.path.join(tmpdir, "structs.parquet"))
+        assert nf.columns.tolist() == ["id", "struct_list", "struct_value", "struct_mix"]
+        assert nf.nested_columns == ["struct_list"]
+
+        # Test partial read
+        nf = read_parquet(os.path.join(tmpdir, "structs.parquet"), columns=["id", "struct_mix.list3"])
+        assert nf.columns.tolist() == ["id", "struct_mix"]
+        assert nf.nested_columns == ["struct_mix"]
+
+
 def test_to_parquet():
     """Test writing a parquet file with no columns specified"""
     # Load in the example file