Skip to content

Commit e8a7607

Browse files
KhemkaranKhemkaran
authored andcommitted
:x
2 parents 606d1c5 + 68644ac commit e8a7607

File tree

8 files changed

+489
-16
lines changed

8 files changed

+489
-16
lines changed

doc/source/development/maintaining.rst

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,11 @@ Pre-release
388388

389389
3. Make sure the CI is green for the last commit of the branch being released.
390390

391-
4. If not a release candidate, make sure all backporting pull requests to the branch
392-
being released are merged.
391+
4. If not a release candidate, make sure all backporting pull requests to the
392+
branch being released are merged, and no merged pull requests are missing a
393+
backport (check the
394+
["Still Needs Manual Backport"](https://github.com/pandas-dev/pandas/labels/Still%20Needs%20Manual%20Backport)
395+
label for this).
393396

394397
5. Create a new issue and milestone for the version after the one being released.
395398
If the release was a release candidate, we would usually want to create issues and
@@ -435,6 +438,9 @@ which will be triggered when the tag is pushed.
435438

436439
scripts/download_wheels.sh <VERSION>
437440

441+
ATTENTION: this is currently not downloading *all* wheels, and you have to
442+
manually download the remainings wheels and sdist!
443+
438444
4. Create a `new GitHub release <https://github.com/pandas-dev/pandas/releases/new>`_:
439445

440446
- Tag: ``<version>``
@@ -462,15 +468,22 @@ Post-Release
462468
````````````
463469

464470
1. Update symlinks to stable documentation by logging in to our web server, and
465-
editing ``/var/www/html/pandas-docs/stable`` to point to ``version/<latest-version>``
466-
for major and minor releases, or ``version/<minor>`` to ``version/<patch>`` for
471+
editing ``/var/www/html/pandas-docs/stable`` to point to ``version/<X.Y>``
472+
for major and minor releases, or ``version/<X.Y>`` to ``version/<patch>`` for
467473
patch releases. The exact instructions are (replace the example version numbers by
468474
the appropriate ones for the version you are releasing):
469475

470476
- Log in to the server and use the correct user.
471477
- ``cd /var/www/html/pandas-docs/``
472-
- ``ln -sfn version/2.1 stable`` (for a major or minor release)
473-
- ``ln -sfn version/2.0.3 version/2.0`` (for a patch release)
478+
- For a major or minor release (assuming the ``/version/2.1.0/`` docs have been uploaded to the server):
479+
480+
- Create a new X.Y symlink to X.Y.Z: ``cd version; ln -sfn 2.1.0 2.1``
481+
- Update stable symlink to point to X.Y: ``ln -sfn version/2.1 stable``
482+
483+
- For a patch release (assuming the ``/version/2.1.3/`` docs have been uploaded to the server):
484+
485+
- Update the X.Y symlink to the new X.Y.Z patch version: ``cd version; ln -sfn 2.1.3 2.1``
486+
- (the stable symlink should already be pointing to the correct X.Y version)
474487

475488
2. If releasing a major or minor release, open a PR in our source code to update
476489
``web/pandas/versions.json``, to have the desired versions in the documentation

pandas/_libs/groupby.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def group_sum(
6767
result_mask: np.ndarray | None = ...,
6868
min_count: int = ...,
6969
is_datetimelike: bool = ...,
70+
initial: object = ...,
7071
skipna: bool = ...,
7172
) -> None: ...
7273
def group_prod(

pandas/_libs/groupby.pyx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,7 @@ def group_sum(
707707
uint8_t[:, ::1] result_mask=None,
708708
Py_ssize_t min_count=0,
709709
bint is_datetimelike=False,
710+
object initial=0,
710711
bint skipna=True,
711712
) -> None:
712713
"""
@@ -725,9 +726,15 @@ def group_sum(
725726
raise ValueError("len(index) != len(labels)")
726727

727728
nobs = np.zeros((<object>out).shape, dtype=np.int64)
728-
# the below is equivalent to `np.zeros_like(out)` but faster
729-
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
730-
compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
729+
if initial == 0:
730+
# the below is equivalent to `np.zeros_like(out)` but faster
731+
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
732+
compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
733+
else:
734+
# in practice this path is only taken for strings to use empty string as initial
735+
assert sum_t is object
736+
sumx = np.full((<object>out).shape, initial, dtype=object)
737+
# object code path does not use `compensation`
731738

732739
N, K = (<object>values).shape
733740
if uses_mask:

pandas/core/arrays/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,7 @@ def _groupby_op(
26082608
kind = WrappedCythonOp.get_kind_from_how(how)
26092609
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
26102610

2611+
initial: Any = 0
26112612
# GH#43682
26122613
if isinstance(self.dtype, StringDtype):
26132614
# StringArray
@@ -2632,6 +2633,7 @@ def _groupby_op(
26322633

26332634
arr = self
26342635
if op.how == "sum":
2636+
initial = ""
26352637
# https://github.com/pandas-dev/pandas/issues/60229
26362638
# All NA should result in the empty string.
26372639
assert "skipna" in kwargs
@@ -2649,6 +2651,7 @@ def _groupby_op(
26492651
ngroups=ngroups,
26502652
comp_ids=ids,
26512653
mask=None,
2654+
initial=initial,
26522655
**kwargs,
26532656
)
26542657

pandas/core/groupby/ops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import functools
1313
from typing import (
1414
TYPE_CHECKING,
15+
Any,
1516
Generic,
1617
final,
1718
)
@@ -319,6 +320,7 @@ def _cython_op_ndim_compat(
319320
comp_ids: np.ndarray,
320321
mask: npt.NDArray[np.bool_] | None = None,
321322
result_mask: npt.NDArray[np.bool_] | None = None,
323+
initial: Any = 0,
322324
**kwargs,
323325
) -> np.ndarray:
324326
if values.ndim == 1:
@@ -335,6 +337,7 @@ def _cython_op_ndim_compat(
335337
comp_ids=comp_ids,
336338
mask=mask,
337339
result_mask=result_mask,
340+
initial=initial,
338341
**kwargs,
339342
)
340343
if res.shape[0] == 1:
@@ -350,6 +353,7 @@ def _cython_op_ndim_compat(
350353
comp_ids=comp_ids,
351354
mask=mask,
352355
result_mask=result_mask,
356+
initial=initial,
353357
**kwargs,
354358
)
355359

@@ -363,6 +367,7 @@ def _call_cython_op(
363367
comp_ids: np.ndarray,
364368
mask: npt.NDArray[np.bool_] | None,
365369
result_mask: npt.NDArray[np.bool_] | None,
370+
initial: Any = 0,
366371
**kwargs,
367372
) -> np.ndarray: # np.ndarray[ndim=2]
368373
orig_values = values
@@ -420,6 +425,10 @@ def _call_cython_op(
420425
"sum",
421426
"median",
422427
]:
428+
if self.how == "sum":
429+
# pass in through kwargs only for sum (other functions don't have
430+
# the keyword)
431+
kwargs["initial"] = initial
423432
func(
424433
out=result,
425434
counts=counts,

pandas/tests/groupby/test_categorical.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ def f(a):
3232
return a
3333

3434
index = MultiIndex.from_product(map(f, args), names=names)
35+
if isinstance(fill_value, dict):
36+
# fill_value is a dict mapping column names to fill values
37+
# -> reindex column by column (reindex itself does not support this)
38+
res = {}
39+
for col in result.columns:
40+
res[col] = result[col].reindex(index, fill_value=fill_value[col])
41+
return DataFrame(res, index=index).sort_index()
42+
3543
return result.reindex(index, fill_value=fill_value).sort_index()
3644

3745

@@ -317,18 +325,14 @@ def test_apply(ordered):
317325
tm.assert_series_equal(result, expected)
318326

319327

320-
def test_observed(request, using_infer_string, observed):
328+
def test_observed(observed, using_infer_string):
321329
# multiple groupers, don't re-expand the output space
322330
# of the grouper
323331
# gh-14942 (implement)
324332
# gh-10132 (back-compat)
325333
# gh-8138 (back-compat)
326334
# gh-8869
327335

328-
if using_infer_string and not observed:
329-
# TODO(infer_string) this fails with filling the string column with 0
330-
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
331-
332336
cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
333337
cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
334338
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
@@ -356,7 +360,10 @@ def test_observed(request, using_infer_string, observed):
356360
result = gb.sum()
357361
if not observed:
358362
expected = cartesian_product_for_groupers(
359-
expected, [cat1, cat2], list("AB"), fill_value=0
363+
expected,
364+
[cat1, cat2],
365+
list("AB"),
366+
fill_value={"values": 0, "C": ""} if using_infer_string else 0,
360367
)
361368

362369
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_timegrouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def test_groupby_with_timegrouper(self, using_infer_string):
108108
unit=df.index.unit,
109109
)
110110
expected = DataFrame(
111-
{"Buyer": 0, "Quantity": 0},
111+
{"Buyer": "" if using_infer_string else 0, "Quantity": 0},
112112
index=exp_dti,
113113
)
114114
# Cast to object/str to avoid implicit cast when setting

0 commit comments

Comments
 (0)