1212from pandas .api .extensions import no_default
1313from pandas .core .computation .expr import PARSERS , PandasExprVisitor
1414
15- from nested_pandas .series import packer
15+ from nested_pandas .nestedframe . utils import extract_nest_names
1616from nested_pandas .series .dtype import NestedDtype
17-
18- from ..series .packer import pack_sorted_df_into_struct
19- from .utils import extract_nest_names
17+ from nested_pandas .series .packer import pack , pack_lists , pack_sorted_df_into_struct
2018
2119
2220class NestedPandasExprVisitor (PandasExprVisitor ):
@@ -219,10 +217,8 @@ def __setitem__(self, key, value):
219217 "." in key and key .split ("." )[0 ] in self .nested_columns
220218 ):
221219 nested , col = key .split ("." )
222- new_flat = self [nested ].nest .to_flat ()
223- new_flat [col ] = value
224- packed = packer .pack (new_flat )
225- return super ().__setitem__ (nested , packed )
220+ new_nested_series = self [nested ].nest .with_flat_field (col , value )
221+ return super ().__setitem__ (nested , new_nested_series )
226222
227223 # Adding a new nested structure from a column
228224 # Allows statements like ndf["new_nested.t"] = ndf["nested.t"] - 5
@@ -231,8 +227,9 @@ def __setitem__(self, key, value):
231227 if isinstance (value , pd .Series ):
232228 value .name = col
233229 value = value .to_frame ()
234- packed = packer .pack (value )
235- return super ().__setitem__ (new_nested , packed )
230+ new_df = self .add_nested (value , name = new_nested )
231+ self ._update_inplace (new_df )
232+ return None
236233
237234 return super ().__setitem__ (key , value )
238235
@@ -242,6 +239,7 @@ def add_nested(
242239 name : str ,
243240 * ,
244241 how : str = "left" ,
242+ on : None | str | list [str ] = None ,
245243 dtype : NestedDtype | pd .ArrowDtype | pa .DataType | None = None ,
246244 ) -> Self : # type: ignore[name-defined] # noqa: F821
247245 """Packs input object to a nested column and adds it to the NestedFrame
@@ -272,6 +270,8 @@ def add_nested(
272270 index, and sort it lexicographically.
273271 - inner: form intersection of calling frame's index with other
274272 frame's index, preserving the order of the calling index.
273+ on : str, default: None
274+ A column in the list
275275 dtype : dtype or None
276276 NestedDtype to use for the nested column; pd.ArrowDtype or
277277 pa.DataType can also be used to specify the nested dtype. If None,
@@ -282,13 +282,16 @@ def add_nested(
282282 NestedFrame
283283 A new NestedFrame with the added nested column.
284284 """
285+ if on is not None and not isinstance (on , str ):
286+ raise ValueError ("Currently we only support a single column for 'on'" )
285287 # Add sources to objects
286- packed = packer . pack (obj , name = name , dtype = dtype )
288+ packed = pack (obj , name = name , on = on , dtype = dtype )
287289 new_df = self .copy ()
288- return new_df .join (packed , how = how )
290+ res = new_df .join (packed , how = how , on = on )
291+ return res
289292
290293 @classmethod
291- def from_flat (cls , df , base_columns , nested_columns = None , index = None , name = "nested" ):
294+ def from_flat (cls , df , base_columns , nested_columns = None , on : str | None = None , name = "nested" ):
292295 """Creates a NestedFrame with base and nested columns from a flat
293296 dataframe.
294297
@@ -304,7 +307,7 @@ def from_flat(cls, df, base_columns, nested_columns=None, index=None, name="nest
304307 in the list will attempt to be packed into a single nested column
305308 with the name provided in `nested_name`. If None, is defined as all
306309 columns not in `base_columns`.
307- index : str, or None
310+ on : str or None
308311 The name of a column to use as the new index. Typically, the index
309312 should have a unique value per row for base columns, and should
310313 repeat for nested columns. For example, a dataframe with two
@@ -330,11 +333,11 @@ def from_flat(cls, df, base_columns, nested_columns=None, index=None, name="nest
330333 """
331334
332335 # Resolve new index
333- if index is not None :
336+ if on is not None :
334337 # if a base column is chosen remove it
335- if index in base_columns :
336- base_columns = [col for col in base_columns if col != index ]
337- df = df .set_index (index )
338+ if on in base_columns :
339+ base_columns = [col for col in base_columns if col != on ]
340+ df = df .set_index (on )
338341
339342 # drop duplicates on index
340343 out_df = df [base_columns ][~ df .index .duplicated (keep = "first" )]
@@ -401,7 +404,7 @@ def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
401404 raise ValueError ("No columns were assigned as list columns." )
402405
403406 # Pack list columns into a nested column
404- packed_df = packer . pack_lists (df [list_columns ])
407+ packed_df = pack_lists (df [list_columns ])
405408 packed_df .name = name
406409
407410 # join the nested column to the base_column df
@@ -519,17 +522,33 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
519522 # since it operated on the base attributes.
520523 if isinstance (result , _SeriesFromNest ):
521524 nest_name , flat_nest = result .nest_name , result .flat_nest
522- new_flat_nest = flat_nest .loc [result ]
523- result = self .copy ()
524- result [nest_name ] = pack_sorted_df_into_struct (new_flat_nest )
525+ # Reset index to "ordinal" like [0, 0, 0, 1, 1, 2, 2, 2]
526+ list_index = self [nest_name ].array .get_list_index ()
527+ flat_nest = flat_nest .set_index (list_index )
528+ query_result = result .set_axis (list_index )
529+ # Selecting flat values matching the query result
530+ new_flat_nest = flat_nest [query_result ]
531+ new_df = self ._set_filtered_flat_df (nest_name , new_flat_nest )
525532 else :
526- result = self .loc [result ]
533+ new_df = self .loc [result ]
527534
528535 if inplace :
529- self ._update_inplace (result )
536+ self ._update_inplace (new_df )
530537 return None
531538 else :
532- return result
539+ return new_df
540+
541+ def _set_filtered_flat_df (self , nest_name , flat_df ):
542+ """Set a filtered flat dataframe for a nested column
543+
544+ Here we assume that flat_df has filtered "ordinal" index,
545+ e.g. flat_df.index == [0, 2, 2, 2], while self.index
546+ is arbitrary (e.g. ["a", "b", "a"]),
547+ and self[nest_name].array.list_index is [0, 0, 1, 1, 1, 2, 2, 2, 2].
548+ """
549+ new_df = self .reset_index (drop = True )
550+ new_df [nest_name ] = pack_sorted_df_into_struct (flat_df , name = nest_name )
551+ return new_df .set_index (self .index )
533552
534553 def _resolve_dropna_target (self , on_nested , subset ):
535554 """resolves the target layer for a given set of dropna kwargs"""
@@ -654,34 +673,32 @@ def dropna(
654673 return super ().dropna (
655674 axis = axis , how = how , thresh = thresh , subset = subset , inplace = inplace , ignore_index = ignore_index
656675 )
676+ if ignore_index :
677+ raise ValueError ("ignore_index is not supported for nested columns" )
657678 if subset is not None :
658679 subset = [col .split ("." )[- 1 ] for col in subset ]
680+ target_flat = self [target ].nest .to_flat ()
681+ target_flat = target_flat .set_index (self [target ].array .get_list_index ())
659682 if inplace :
660- target_flat = self [target ].nest .to_flat ()
661683 target_flat .dropna (
662684 axis = axis ,
663685 how = how ,
664686 thresh = thresh ,
665687 subset = subset ,
666- inplace = inplace ,
667- ignore_index = ignore_index ,
688+ inplace = True ,
668689 )
669- self [target ] = packer .pack_flat (target_flat )
670- return self
671- # Or if not inplace
672- new_df = self .copy ()
673- new_df [target ] = packer .pack_flat (
674- new_df [target ]
675- .nest .to_flat ()
676- .dropna (
690+ else :
691+ target_flat = target_flat .dropna (
677692 axis = axis ,
678693 how = how ,
679694 thresh = thresh ,
680695 subset = subset ,
681- inplace = inplace ,
682- ignore_index = ignore_index ,
696+ inplace = False ,
683697 )
684- )
698+ new_df = self ._set_filtered_flat_df (nest_name = target , flat_df = target_flat )
699+ if inplace :
700+ self ._update_inplace (new_df )
701+ return None
685702 return new_df
686703
687704 def reduce (self , func , * args , ** kwargs ) -> NestedFrame : # type: ignore[override]
0 commit comments