diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4fb24c9ad1538..70cf20a6e0fd6 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,6 +1,9 @@ from collections import defaultdict +import sys +import warnings cimport cython +from cpython cimport PY_VERSION_HEX from cpython.object cimport PyObject from cpython.pyport cimport PY_SSIZE_T_MAX from cpython.slice cimport PySlice_GetIndicesEx @@ -20,6 +23,9 @@ from numpy cimport ( cnp.import_array() from pandas._libs.algos import ensure_int64 +from pandas.errors import ChainedAssignmentError +from pandas.errors.cow import _chained_assignment_msg + from pandas._libs.util cimport ( is_array, @@ -996,3 +1002,41 @@ cdef class BlockValuesRefs: return self._has_reference_maybe_locked() ELSE: return self._has_reference_maybe_locked() + + +cdef extern from "Python.h": + """ + #if PY_VERSION_HEX < 0x030E0000 + int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref); + #else + #define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \ + PyUnstable_Object_IsUniqueReferencedTemporary + #endif + """ + int PyUnstable_Object_IsUniqueReferencedTemporary\ + "__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1 + + +# Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary +cdef inline bint _is_unique_referenced_temporary(object obj) except -1: + if PY_VERSION_HEX >= 0x030E0000: + # Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary + return PyUnstable_Object_IsUniqueReferencedTemporary(obj) + else: + # Fallback for older Python versions using sys.getrefcount + return sys.getrefcount(obj) <= 1 + + +cdef class SetitemMixin: + # class used in DataFrame and Series for checking for chained assignment + + def __setitem__(self, key, value) -> None: + cdef bint is_unique = _is_unique_referenced_temporary(self) + if is_unique: + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=1 + ) + self._setitem(key, value) + + def __delitem__(self, key) -> None: + self._delitem(key) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index beb4a69232b27..8247356f25f4d 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -22,6 +22,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager if TYPE_CHECKING: @@ -90,6 +91,10 @@ def load_reduce(self) -> None: cls = args[0] stack[-1] = NDArrayBacked.__new__(*args) return + elif args and issubclass(args[0], NDFrame): + cls = args[0] + stack[-1] = cls.__new__(cls) + return raise dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ec8c8116e5aee..f2f50039724c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -48,6 +48,7 @@ properties, ) from pandas._libs.hashtable import duplicated +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer from pandas.compat import PYPY from pandas.compat._constants import REF_COUNT @@ -60,7 +61,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -512,7 +512,7 @@ @set_module("pandas") -class DataFrame(NDFrame, OpsMixin): +class DataFrame(SetitemMixin, NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -661,6 +661,11 @@ class DataFrame(NDFrame, OpsMixin): # and ExtensionArray. Should NOT be overridden by subclasses. __pandas_priority__ = 4000 + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + @property def _constructor(self) -> type[DataFrame]: return DataFrame @@ -4213,7 +4218,8 @@ def isetitem(self, loc, value) -> None: arraylike, refs = self._sanitize_column(value) self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) - def __setitem__(self, key, value) -> None: + # def __setitem__() is implemented in SetitemMixin and dispatches to this method + def _setitem(self, key, value) -> None: """ Set item(s) in DataFrame by key. @@ -4297,12 +4303,6 @@ def __setitem__(self, key, value) -> None: z 3 50 # Values for 'a' and 'b' are completely ignored! """ - if not PYPY: - if sys.getrefcount(self) <= 3: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) - key = com.apply_if_callable(key, self) # see if we can slice the rows diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9840af15f1249..0078d7803819e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4258,8 +4258,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: result = result.__finalize__(self) return result + # def __delitem__() is implemented in SetitemMixin and dispatches to this method @final - def __delitem__(self, key) -> None: + def _delitem(self, key) -> None: """ Delete item """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 00cff09801f1a..e726a0569ccbc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -33,6 +33,7 @@ properties, reshape, ) +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer from pandas.compat import PYPY from pandas.compat._constants import REF_COUNT @@ -45,7 +46,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -235,7 +235,7 @@ # class "NDFrame") # definition in base class "NDFrame" @set_module("pandas") -class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] +class Series(SetitemMixin, base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -363,6 +363,11 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] ) _mgr: SingleBlockManager + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + # ---------------------------------------------------------------------- # Constructors @@ -1059,13 +1064,8 @@ def _get_value(self, label, takeable: bool = False): else: return self.iloc[loc] - def __setitem__(self, key, value) -> None: - if not PYPY: - if sys.getrefcount(self) <= 3: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) - + # def __setitem__() is implemented in SetitemMixin and dispatches to this method + def _setitem(self, key, value) -> None: check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 973cb21ac3041..6418bfb1691c6 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -11,10 +11,6 @@ pyreadstat = pytest.importorskip("pyreadstat") -# TODO(CoW) - detection of chained assignment in cython -# https://github.com/pandas-dev/pandas/issues/51315 -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -31,8 +27,6 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -48,8 +42,6 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -65,8 +57,6 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_kwargs(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -81,8 +71,6 @@ def test_spss_kwargs(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -140,8 +128,6 @@ def test_invalid_dtype_backend(): pd.read_spss("test", dtype_backend="numpy") -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_metadata(datapath): # GH 54264 fname = datapath("io", "data", "spss", "labelled-num.sav")