Skip to content

Move chained assignment detection to cython for Python 3.14 compat #62070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections import defaultdict
import sys
import warnings

cimport cython
from cpython cimport PY_VERSION_HEX
from cpython.object cimport PyObject
from cpython.pyport cimport PY_SSIZE_T_MAX
from cpython.slice cimport PySlice_GetIndicesEx
Expand All @@ -20,6 +23,9 @@ from numpy cimport (
cnp.import_array()

from pandas._libs.algos import ensure_int64
from pandas.errors import ChainedAssignmentError
from pandas.errors.cow import _chained_assignment_msg


from pandas._libs.util cimport (
is_array,
Expand Down Expand Up @@ -996,3 +1002,41 @@ cdef class BlockValuesRefs:
return self._has_reference_maybe_locked()
ELSE:
return self._has_reference_maybe_locked()


cdef extern from "Python.h":
"""
#if PY_VERSION_HEX < 0x030E0000
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref);
#else
#define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \
PyUnstable_Object_IsUniqueReferencedTemporary
#endif
"""
int PyUnstable_Object_IsUniqueReferencedTemporary\
"__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1


# Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary
cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
if PY_VERSION_HEX >= 0x030E0000:
# Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary
return PyUnstable_Object_IsUniqueReferencedTemporary(obj)
else:
# Fallback for older Python versions using sys.getrefcount
return sys.getrefcount(obj) <= 1


cdef class SetitemMixin:
# class used in DataFrame and Series for checking for chained assignment

def __setitem__(self, key, value) -> None:
cdef bint is_unique = _is_unique_referenced_temporary(self)
if is_unique:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=1
)
self._setitem(key, value)

def __delitem__(self, key) -> None:
self._delitem(key)
5 changes: 5 additions & 0 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
PeriodArray,
TimedeltaArray,
)
from pandas.core.generic import NDFrame
from pandas.core.internals import BlockManager

if TYPE_CHECKING:
Expand Down Expand Up @@ -90,6 +91,10 @@ def load_reduce(self) -> None:
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
elif args and issubclass(args[0], NDFrame):
cls = args[0]
stack[-1] = cls.__new__(cls)
return
raise

dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]
Expand Down
18 changes: 9 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
properties,
)
from pandas._libs.hashtable import duplicated
from pandas._libs.internals import SetitemMixin
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat._constants import REF_COUNT
Expand All @@ -60,7 +61,6 @@
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -512,7 +512,7 @@


@set_module("pandas")
class DataFrame(NDFrame, OpsMixin):
class DataFrame(SetitemMixin, NDFrame, OpsMixin):
"""
Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Expand Down Expand Up @@ -661,6 +661,11 @@ class DataFrame(NDFrame, OpsMixin):
# and ExtensionArray. Should NOT be overridden by subclasses.
__pandas_priority__ = 4000

# override those to avoid inheriting from SetitemMixin (cython generates
# them by default)
__reduce__ = object.__reduce__
__setstate__ = NDFrame.__setstate__

@property
def _constructor(self) -> type[DataFrame]:
return DataFrame
Expand Down Expand Up @@ -4213,7 +4218,8 @@ def isetitem(self, loc, value) -> None:
arraylike, refs = self._sanitize_column(value)
self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs)

def __setitem__(self, key, value) -> None:
# def __setitem__() is implemented in SetitemMixin and dispatches to this method
def _setitem(self, key, value) -> None:
"""
Set item(s) in DataFrame by key.

Expand Down Expand Up @@ -4297,12 +4303,6 @@ def __setitem__(self, key, value) -> None:
z 3 50
# Values for 'a' and 'b' are completely ignored!
"""
if not PYPY:
if sys.getrefcount(self) <= 3:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)

key = com.apply_if_callable(key, self)

# see if we can slice the rows
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4258,8 +4258,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self:
result = result.__finalize__(self)
return result

# def __delitem__() is implemented in SetitemMixin and dispatches to this method
@final
def __delitem__(self, key) -> None:
def _delitem(self, key) -> None:
"""
Delete item
"""
Expand Down
18 changes: 9 additions & 9 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
properties,
reshape,
)
from pandas._libs.internals import SetitemMixin
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat._constants import REF_COUNT
Expand All @@ -45,7 +46,6 @@
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -235,7 +235,7 @@
# class "NDFrame")
# definition in base class "NDFrame"
@set_module("pandas")
class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
class Series(SetitemMixin, base.IndexOpsMixin, NDFrame): # type: ignore[misc]
"""
One-dimensional ndarray with axis labels (including time series).

Expand Down Expand Up @@ -363,6 +363,11 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
)
_mgr: SingleBlockManager

# override those to avoid inheriting from SetitemMixin (cython generates
# them by default)
__reduce__ = object.__reduce__
__setstate__ = NDFrame.__setstate__

# ----------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -1059,13 +1064,8 @@ def _get_value(self, label, takeable: bool = False):
else:
return self.iloc[loc]

def __setitem__(self, key, value) -> None:
if not PYPY:
if sys.getrefcount(self) <= 3:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)

# def __setitem__() is implemented in SetitemMixin and dispatches to this method
def _setitem(self, key, value) -> None:
check_dict_or_set_indexers(key)
key = com.apply_if_callable(key, self)

Expand Down
14 changes: 0 additions & 14 deletions pandas/tests/io/test_spss.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
pyreadstat = pytest.importorskip("pyreadstat")


# TODO(CoW) - detection of chained assignment in cython
# https://github.com/pandas-dev/pandas/issues/51315
@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
@pytest.mark.parametrize("path_klass", [lambda p: p, Path])
def test_spss_labelled_num(path_klass, datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
Expand All @@ -31,8 +27,6 @@ def test_spss_labelled_num(path_klass, datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_labelled_num_na(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -48,8 +42,6 @@ def test_spss_labelled_num_na(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_labelled_str(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -65,8 +57,6 @@ def test_spss_labelled_str(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_kwargs(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -81,8 +71,6 @@ def test_spss_kwargs(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_umlauts(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand Down Expand Up @@ -140,8 +128,6 @@ def test_invalid_dtype_backend():
pd.read_spss("test", dtype_backend="numpy")


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_metadata(datapath):
# GH 54264
fname = datapath("io", "data", "spss", "labelled-num.sav")
Expand Down
Loading