reduce output nesting inference

dougbrn · dougbrn · commit 3fd1743942ea · 2025-03-12T15:20:00.000-07:00
diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py
@@ -845,7 +845,7 @@ def sort_values(
                 return None
             return new_df
 
-    def reduce(self, func, *args, **kwargs) -> NestedFrame:  # type: ignore[override]
+    def reduce(self, func, *args, infer_nesting=True, **kwargs) -> NestedFrame:  # type: ignore[override]
         """
         Takes a function and applies it to each top-level row of the NestedFrame.
 
@@ -862,6 +862,12 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame:  # type: ignore[override
         args : positional arguments
             Positional arguments to pass to the function, the first *args should be the names of the
             columns to apply the function to.
+        infer_nesting : bool, default True
+            If True, the function will pack output columns into nested
+            structures based on column names adherring to a nested naming
+            scheme. E.g. "nested.b" and "nested.c" will be packed into a column
+            called "nested" with columns "b" and "c". If False, all outputs
+            will be returned as base columns.
         kwargs : keyword arguments, optional
             Keyword arguments to pass to the function.
 
@@ -915,7 +921,25 @@ def reduce(self, func, *args, **kwargs) -> NestedFrame:  # type: ignore[override
                 iterators.append(self[layer].array.iter_field_lists(col))
 
         results = [func(*cols, *extra_args, **kwargs) for cols in zip(*iterators)]
-        return NestedFrame(results, index=self.index)
+        results_nf = NestedFrame(results, index=self.index)
+
+        if infer_nesting:
+            # find potential nested structures
+            nested_cols = []
+            for column in results_nf.columns:
+                if isinstance(column, str) and "." in column:
+                    layer, col = column.split(".", 1)
+                    nested_cols.append(layer)
+            nested_cols = np.unique(nested_cols)
+
+            # pack results into nested structures
+            for layer in nested_cols:
+                layer_cols = [col for col in results_nf.columns if col.startswith(f"{layer}.")]
+                rename_df = results_nf[layer_cols].rename(columns=lambda x: x.split(".", 1)[1])
+                nested_col = pack_lists(rename_df, name=layer)
+                results_nf = results_nf[[col for col in results_nf.columns if not col.startswith(f"{layer}.")]].join(nested_col)
+
+        return results_nf
 
     def to_parquet(self, path, by_layer=False, **kwargs) -> None:
         """Creates parquet file(s) with the data of a NestedFrame, either
diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py
@@ -1021,6 +1021,57 @@ def cols_allclose(col1, col2):
         result, pd.DataFrame({"allclose": [True, True, True]}, index=pd.Index([0, 1, 2], name="idx"))
     )
 
+    def test_reduce_infer_nesting():
+        """Test that nesting inference works in reduce"""
+
+        ndf = generate_data(3,20, seed=1)
+
+        # Test simple case
+        def complex_output(flux):
+            return {"max_flux":np.max(flux), "lc.flux_quantiles":np.quantile(flux, [0.1,0.2,0.3,0.4,0.5])}
+        
+        result = ndf.reduce(complex_output, "nested.flux")
+        assert list(result.columns) == ["max_flux", "lc"]
+        assert list(result.lc.nest.fields) == ['flux_quantiles']
+
+        # Test multi-column nested output
+        def complex_output(flux):
+            return {"max_flux":np.max(flux),
+                    "lc.flux_quantiles":np.quantile(flux, [0.1,0.2,0.3,0.4,0.5]), 
+                    "lc.labels":[0.1,0.2,0.3,0.4,0.5]}
+
+        result = ndf.reduce(complex_output, "nested.flux")
+        assert list(result.columns) == ["max_flux", "lc"]
+        assert list(result.lc.nest.fields) == ['flux_quantiles', "labels"]
+
+        # Test integer names
+        def complex_output(flux):
+            return np.max(flux), np.quantile(flux, [0.1,0.2,0.3,0.4,0.5]),[0.1,0.2,0.3,0.4,0.5]
+        
+        result = ndf.reduce(complex_output, "nested.flux")
+        assert list(result.columns) == [0, 1, 2]
+
+        # Test multiple nested structure output
+        def complex_output(flux):
+            return {"max_flux":np.max(flux),
+                    "lc.flux_quantiles":np.quantile(flux, [0.1,0.2,0.3,0.4,0.5]),
+                    "lc.labels":[0.1,0.2,0.3,0.4,0.5],
+                    "meta.colors":["green", "red", "blue"]}
+
+        result = ndf.reduce(complex_output, "nested.flux")
+        assert list(result.columns) == ["max_flux", "lc", "meta"]
+        assert list(result.lc.nest.fields) == ["flux_quantiles", "labels"]
+        assert list(result.lc.meta.fields) == ["colors"]
+
+        # Test only nested structure output
+        def complex_output(flux):
+            return {"lc.flux_quantiles":np.quantile(flux, [0.1,0.2,0.3,0.4,0.5]),
+                    "lc.labels":[0.1,0.2,0.3,0.4,0.5]}
+
+        result = ndf.reduce(complex_output, "nested.flux")
+        assert list(result.columns) == ["lc"]
+        assert list(result.lc.nest.fields) == ["flux_quantiles", "labels"]
+
 
 def test_scientific_notation():
     """