88
99
1010@pytest .mark .parametrize ("columns" , [["a" ], None ])
11- @pytest .mark .parametrize ("pack_columns" , [{"nested1" : ["c" ], "nested2" : ["e" ]}, {"nested1" : ["d" ]}, None ])
12- def test_read_parquet (tmp_path , columns , pack_columns ):
11+ def test_read_parquet (tmp_path , columns ):
1312 """Test nested parquet loading"""
1413 # Setup a temporary directory for files
1514 save_path = os .path .join (tmp_path , "." )
@@ -35,33 +34,22 @@ def test_read_parquet(tmp_path, columns, pack_columns):
3534 # Read from parquet
3635 nf = read_parquet (
3736 data = os .path .join (save_path , "base.parquet" ),
38- to_pack = {
39- "nested1" : os .path .join (save_path , "nested1.parquet" ),
40- "nested2" : os .path .join (save_path , "nested2.parquet" ),
41- },
4237 columns = columns ,
43- pack_columns = pack_columns ,
4438 )
4539
40+ nest1 = read_parquet (os .path .join (save_path , "nested1.parquet" ))
41+ nest2 = read_parquet (os .path .join (save_path , "nested1.parquet" ))
42+
43+ nf = nf .add_nested (nest1 , name = "nested1" ).add_nested (nest2 , name = "nested2" )
44+
4645 # Check Base Columns
4746 if columns is not None :
4847 assert nf .columns .tolist () == columns + ["nested1" , "nested2" ]
4948 else :
5049 assert nf .columns .tolist () == base .columns .tolist () + ["nested1" , "nested2" ]
5150
52- # Check Nested Columns
53- if pack_columns is not None :
54- for nested_col in pack_columns :
55- assert nf [nested_col ].nest .fields == pack_columns [nested_col ]
56- else :
57- for nested_col in nf .nested_columns :
58- if nested_col == "nested1" :
59- assert nf [nested_col ].nest .fields == nested1 .columns .tolist ()
60- elif nested_col == "nested2" :
61- assert nf [nested_col ].nest .fields == nested2 .columns .tolist ()
62-
6351
64- def test_write_packed_parquet ():
52+ def test_write_parquet ():
6553 """Tests writing a nested frame to a single parquet file."""
6654 # Generate some test data
6755 base = pd .DataFrame (data = {"a" : [1 , 2 , 3 ], "b" : [2 , 4 , 6 ]}, index = [0 , 1 , 2 ])
@@ -86,50 +74,3 @@ def test_write_packed_parquet():
8674 # Read from parquet
8775 nf2 = read_parquet (temp .name )
8876 assert_frame_equal (nf , nf2 )
89-
90-
91- def test_write_parquet_by_layer ():
92- """Tests writing a nested frame to multiple parquet files."""
93- base = pd .DataFrame (data = {"a" : [1 , 2 , 3 ], "b" : [2 , 4 , 6 ]}, index = [0 , 1 , 2 ])
94-
95- nested1 = pd .DataFrame (
96- data = {"c" : [0 , 2 , 4 , 1 , 4 , 3 , 1 , 4 , 1 ], "d" : [5 , 4 , 7 , 5 , 3 , 1 , 9 , 3 , 4 ]},
97- index = [0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 , 2 ],
98- )
99-
100- nested2 = pd .DataFrame (
101- data = {"e" : [0 , 2 , 4 , 1 , 4 , 3 , 1 , 4 , 1 ], "f" : [5 , 4 , 7 , 5 , 3 , 1 , 9 , 3 , 4 ]},
102- index = [0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 , 2 ],
103- )
104-
105- # Construct the NestedFrame
106- nf = NestedFrame (base ).add_nested (nested1 , name = "nested1" ).add_nested (nested2 , name = "nested2" )
107-
108- # Asser that a temporary file path must be a directory when by_layer is True
109- with pytest .raises (ValueError ):
110- nf .to_parquet (tempfile .NamedTemporaryFile (suffix = ".parquet" ).name , by_layer = True )
111-
112- # Write to parquet using a named temporary file
113- tmp_dir = tempfile .TemporaryDirectory ()
114- nf .to_parquet (tmp_dir .name , by_layer = True )
115-
116- # Validate the individual layers were correctly saved as their own parquet files
117- read_base_frame = read_parquet (os .path .join (tmp_dir .name , "base.parquet" ), to_pack = None )
118- assert_frame_equal (read_base_frame , nf .drop (columns = ["nested1" , "nested2" ]))
119-
120- read_nested1 = read_parquet (os .path .join (tmp_dir .name , "nested1.parquet" ), to_pack = None )
121- assert_frame_equal (read_nested1 , nf ["nested1" ].nest .to_flat ())
122-
123- read_nested2 = read_parquet (os .path .join (tmp_dir .name , "nested2.parquet" ), to_pack = None )
124- assert_frame_equal (read_nested2 , nf ["nested2" ].nest .to_flat ())
125-
126- # Validate the entire NestedFrame can be read
127- entire_nf = read_parquet (
128- data = os .path .join (tmp_dir .name , "base.parquet" ),
129- to_pack = {
130- "nested1" : os .path .join (tmp_dir .name , "nested1.parquet" ),
131- "nested2" : os .path .join (tmp_dir .name , "nested2.parquet" ),
132- },
133- )
134-
135- assert_frame_equal (nf , entire_nf )
0 commit comments