Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,15 @@ def is_named_tuple(obj: object) -> bool:
return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields")


def is_hashable(obj: object) -> TypeGuard[Hashable]:
def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hashable]:
"""
Return True if hash(obj) will succeed, False otherwise.

If `allow_slice` is False, objects that are slices or tuples containing slices
will always return False, even if hash(obj) would succeed.
If `allow_slice` is True or None, slices and tuples containing slices are treated as
hashable if hash(obj) does not raise TypeError.

Some types will pass a test against collections.abc.Hashable but fail when
they are actually hashed with hash().

Expand All @@ -390,13 +395,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]:
----------
obj : object
The object to check for hashability. Any Python object can be passed here.
allow_slice : bool or None
If True or None, return True if the object is hashable (including slices).
If False, return True if the object is hashable and not a slice.

Returns
-------
bool
True if object can be hashed (i.e., does not raise TypeError when
passed to hash()), and False otherwise (e.g., if object is mutable
like a list or dictionary).
passed to hash()) and passes the slice check according to 'allow_slice'.
False otherwise (e.g., if object is mutable like a list or dictionary
or if allow_slice is False and object is a slice or contains a slice).

See Also
--------
Expand All @@ -422,6 +431,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]:
# Reconsider this decision once this numpy bug is fixed:
# https://github.com/numpy/numpy/issues/5562

def _contains_slice(x: object) -> bool:
# Check if object is a slice or a tuple containing a slice
if isinstance(x, tuple):
return any(isinstance(v, slice) for v in x)
elif isinstance(x, slice):
return True
return False

if allow_slice is False and _contains_slice(obj):
return False

try:
hash(obj)
except TypeError:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4050,7 +4050,7 @@ def __getitem__(self, key):
key = lib.item_from_zerodim(key)
key = com.apply_if_callable(key, self)

if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice):
if is_hashable(key, allow_slice=False) and not is_iterator(key):
# is_iterator to exclude generator e.g. test_getitem_listlike
# As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500)

Expand Down
12 changes: 3 additions & 9 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,7 @@ def _get_setitem_indexer(self, key):
if (
isinstance(ax, MultiIndex)
and self.name != "iloc"
and is_hashable(key)
and not isinstance(key, slice)
and is_hashable(key, allow_slice=False)
):
with suppress(KeyError, InvalidIndexError):
# TypeError e.g. passed a bool
Expand Down Expand Up @@ -1147,8 +1146,7 @@ def _contains_slice(x: object) -> bool:
# This should never be reached, but let's be explicit about it
raise ValueError("Too many indices") # pragma: no cover
if all(
(is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x)
for x in tup
is_hashable(x, allow_slice=False) or com.is_null_slice(x) for x in tup
):
# GH#10521 Series should reduce MultiIndex dimensions instead of
# DataFrame, IndexingError is not raised when slice(None,None,None)
Expand Down Expand Up @@ -1511,12 +1509,8 @@ def _convert_to_indexer(self, key, axis: AxisInt):

# Slices are not valid keys passed in by the user,
# even though they are hashable in Python 3.12
contains_slice = False
if isinstance(key, tuple):
contains_slice = any(isinstance(v, slice) for v in key)

if is_scalar(key) or (
isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice
isinstance(labels, MultiIndex) and is_hashable(key, allow_slice=False)
):
# Otherwise get_loc will raise InvalidIndexError

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ def __getitem__(self, key):
if is_iterator(key):
key = list(key)

if is_hashable(key) and not isinstance(key, slice):
if is_hashable(key, allow_slice=False):
# Otherwise index.get_value will raise InvalidIndexError
try:
# For labels that don't resolve as scalars like tuples and frozensets
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def __init__( # pyright: ignore[reportInconsistentConstructor]
)

try:
self._book = Workbook(self._handles.handle, **engine_kwargs) # type: ignore[arg-type]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy failed on this comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel I made changes based on your feedback. Please review.

self._book = Workbook(self._handles.handle, **engine_kwargs)
except TypeError:
self._handles.handle.close()
raise
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
missing as libmissing,
ops as libops,
)
from pandas.compat import PY312
from pandas.compat.numpy import np_version_gt2
from pandas.errors import Pandas4Warning

Expand Down Expand Up @@ -452,16 +453,55 @@ class UnhashableClass2:
def __hash__(self):
raise TypeError("Not hashable")

class HashableSlice:
def __init__(self, start, stop, step=None):
self.slice = slice(start, stop, step)

def __eq__(self, other):
return isinstance(other, HashableSlice) and self.slice == other.slice

def __hash__(self):
return hash((self.slice.start, self.slice.stop, self.slice.step))

def __repr__(self):
return (
f"HashableSlice({self.slice.start}, {self.slice.stop}, "
f"{self.slice.step})"
)

hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass())
not_hashable = ([], UnhashableClass1())
abc_hashable_not_really_hashable = (([],), UnhashableClass2())
hashable_slice = HashableSlice(1, 2)
tuple_with_slice = (slice(1, 2), 3)

for i in hashable:
assert inference.is_hashable(i)
assert inference.is_hashable(i, allow_slice=True)
assert inference.is_hashable(i, allow_slice=False)
for i in not_hashable:
assert not inference.is_hashable(i)
assert not inference.is_hashable(i, allow_slice=True)
assert not inference.is_hashable(i, allow_slice=False)
for i in abc_hashable_not_really_hashable:
assert not inference.is_hashable(i)
assert not inference.is_hashable(i, allow_slice=True)
assert not inference.is_hashable(i, allow_slice=False)

assert inference.is_hashable(hashable_slice)
assert inference.is_hashable(hashable_slice, allow_slice=True)
assert inference.is_hashable(hashable_slice, allow_slice=False)

if PY312:
for obj in [slice(1, 2), tuple_with_slice]:
assert inference.is_hashable(obj)
assert inference.is_hashable(obj, allow_slice=True)
assert not inference.is_hashable(obj, allow_slice=False)
else:
for obj in [slice(1, 2), tuple_with_slice]:
assert not inference.is_hashable(obj)
assert not inference.is_hashable(obj, allow_slice=True)
assert not inference.is_hashable(obj, allow_slice=False)

# numpy.array is no longer collections.abc.Hashable as of
# https://github.com/numpy/numpy/pull/5326, just test
Expand Down
Loading