Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/nested_pandas/series/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:

return pd.DataFrame(flat_series)

@property
def list_lengths(self) -> list[int]:
"""Lengths of the list arrays"""
return self._series.array.list_lengths

@property
def flat_length(self) -> int:
"""Length of the flat arrays"""
Expand Down
18 changes: 15 additions & 3 deletions src/nested_pandas/series/ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,12 +761,24 @@ def field_names(self) -> list[str]:
"""Names of the nested columns"""
return [field.name for field in self._chunked_array.chunk(0).type]

def _iter_list_lengths(self) -> Generator[int, None, None]:
"""Iterate over the lengths of the list arrays"""
for chunk in self._chunked_array.iterchunks():
for length in chunk.field(0).value_lengths():
if length.is_valid:
yield length.as_py()
else:
yield 0

@property
def list_lengths(self) -> list[int]:
"""Lengths of the list arrays"""
return list(self._iter_list_lengths())

@property
def flat_length(self) -> int:
"""Length of the flat arrays"""
return sum(
chunk.field(0).value_lengths().sum().as_py() or 0 for chunk in self._chunked_array.iterchunks()
)
return sum(self._iter_list_lengths())

@property
def num_chunks(self) -> int:
Expand Down
13 changes: 13 additions & 0 deletions tests/nested_pandas/series/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,19 @@ def test_fields():
assert_array_equal(series.nest.fields, ["a", "b"])


def test_list_lengths():
"""Test that the .nest.list_lengths attribute works."""
series = pack_seq(
[
pd.DataFrame({"a": [1, 2, 3], "b": [1.0, 5.0, 6.0], "c": ["a", "b", "c"]}),
None,
pd.DataFrame({"a": [1, 2], "b": [None, 0.0], "c": ["a", "b"]}),
]
)
assert series.shape == (3,)
assert series.nest.list_lengths == [3, 0, 2]


def test_flat_length():
"""Test that the .nest.flat_length attribute works."""
struct_array = pa.StructArray.from_arrays(
Expand Down
18 changes: 18 additions & 0 deletions tests/nested_pandas/series/test_ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,24 @@ def test_field_names():
assert ext_array.field_names == ["a", "b"]


def test_list_lengths():
"""Tests that the list lengths of the extension array are correct."""
struct_array = pa.StructArray.from_arrays(
arrays=[
pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, 1.0, 2.0])]),
pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0, 6.0])]),
],
names=["a", "b"],
)
empty_struct_array = pa.array([], type=struct_array.type)
null_struct_array = pa.array([None], type=struct_array.type)
ext_array = NestedExtensionArray(
pa.chunked_array([struct_array, empty_struct_array, struct_array, null_struct_array])
)

assert ext_array.list_lengths == [3, 4, 3, 4, 0]


def test_flat_length():
"""Tests that the flat length of the extension array is correct."""
struct_array = pa.StructArray.from_arrays(
Expand Down