Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts #60934

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 21 additions & 62 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
Generator,
Hashable,
Iterable,
Mapping,
Sequence,
List,
Iterator,
)
from functools import wraps
from sys import getsizeof
Expand All @@ -15,6 +18,8 @@
Any,
Literal,
cast,
ArrayLike,
overload,
)
import warnings

Expand All @@ -27,7 +32,7 @@
index as libindex,
lib,
)
from pandas._libs.hashtable import duplicated
from pandas._libs.hashtable import duplicated, duplicated_int64
from pandas._typing import (
AnyAll,
AnyArrayLike,
Expand Down Expand Up @@ -517,7 +522,7 @@ def from_arrays(
)

@classmethod
@names_compat
@doc(doc_create_index)
def from_tuples(
cls,
tuples: Iterable[tuple[Hashable, ...]],
Expand All @@ -526,73 +531,27 @@ def from_tuples(
) -> MultiIndex:
"""
Convert list of tuples to MultiIndex.

Parameters
----------
tuples : list / sequence of tuple-likes
Each tuple is the index of one row/column.
sortorder : int or None
Level of sortedness (must be lexicographically sorted by that
level).
names : list / sequence of str, optional
Names for the levels in the index.

Returns
-------
MultiIndex

See Also
--------
MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables.
MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

Examples
--------
>>> tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")]
>>> pd.MultiIndex.from_tuples(tuples, names=("number", "color"))
MultiIndex([(1, 'red'),
(1, 'blue'),
(2, 'red'),
(2, 'blue')],
names=['number', 'color'])
"""
if not is_list_like(tuples):
raise TypeError("Input must be a list / sequence of tuple-likes.")
if is_iterator(tuples):
tuples = list(tuples)
tuples = cast(Collection[tuple[Hashable, ...]], tuples)

# handling the empty tuple cases
if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
codes = [np.zeros(len(tuples))]
levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
return cls(
levels=levels,
codes=codes,
sortorder=sortorder,
names=names,
verify_integrity=False,
)

arrays: list[Sequence[Hashable]]
if len(tuples) == 0:
if isinstance(tuples, (list, tuple)) and len(tuples) == 0:
if names is None:
raise TypeError("Cannot infer number of levels from empty list")
# error: Argument 1 to "len" has incompatible type "Hashable";
# expected "Sized"
arrays = [[]] * len(names) # type: ignore[arg-type]
elif isinstance(tuples, (np.ndarray, Index)):
if isinstance(tuples, Index):
tuples = np.asarray(tuples._values)

arrays = list(lib.tuples_to_object_array(tuples).T)
elif isinstance(tuples, list):
arrays = list(lib.to_object_array_tuples(tuples).T)
names_seq = cast(Sequence[Hashable], names)
arrays: List[ArrayLike] = [[]] * len(names_seq)
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

# Convert to list and normalize
tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples]
if not tuples_list:
arrays = []
else:
arrs = zip(*tuples)
arrays = cast(list[Sequence[Hashable]], arrs)
max_length = max(len(t) for t in tuples_list)
result_tuples = [
t + (np.nan,) * (max_length - len(t)) for t in tuples_list
]
arrays = list(lib.to_object_array_tuples(result_tuples).T)

return cls.from_arrays(arrays, sortorder=sortorder, names=names)

Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,3 +857,50 @@ def test_dtype_representation(using_infer_string):
dtype=object,
)
tm.assert_series_equal(result, expected)


def test_from_tuples_different_lengths_gh60695():
"""
Test that MultiIndex.from_tuples properly handles tuples of different lengths.

GH#60695
"""
# Test case 1: Basic string tuples
tuples = [("l1",), ("l1", "l2")]
result = MultiIndex.from_tuples(tuples)
expected = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
tm.assert_index_equal(result, expected, exact=True)

# Test case 2: Series with tuple keys
s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"})
expected = pd.Series(
["v1", "v2"],
index=MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")])
)
tm.assert_series_equal(s, expected, check_index_type=True)

# Test case 3: Empty input with names
empty_idx = MultiIndex.from_tuples([], names=["a", "b"])
assert isinstance(empty_idx, MultiIndex)
assert empty_idx.names == ["a", "b"]
assert len(empty_idx) == 0

# Test case 4: Empty input without names
with pytest.raises(TypeError, match="Cannot infer number of levels"):
MultiIndex.from_tuples([])

# Test case 5: None values
tuples = [(1, None), (1, 2)]
result = MultiIndex.from_tuples(tuples)
expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)])
tm.assert_index_equal(result, expected, exact=True)

# Test case 6: Mixed types
tuples = [(1, "a"), (1,), (2, "b", "c")]
result = MultiIndex.from_tuples(tuples)
expected = MultiIndex.from_tuples([
(1, "a", np.nan),
(1, np.nan, np.nan),
(2, "b", "c")
])
tm.assert_index_equal(result, expected, exact=True)
Loading