Skip to content

Commit 82da2ea

Browse files
committed
Add some comments about possible NaN when not enough samples
1 parent ff49495 commit 82da2ea

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h

+2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct Moments {
8787
double Skew(bool bias = true) const {
8888
double result;
8989
// This may return NaN for m2 == 0 and m3 == 0, which is expected
90+
// or if unbiased and not enough samples (count < 2).
9091
if (bias) {
9192
result = sqrt(count) * m3 / sqrt(m2 * m2 * m2);
9293
} else {
@@ -99,6 +100,7 @@ struct Moments {
99100
double Kurtosis(bool bias = true) const {
100101
double result;
101102
// This may return NaN for m2 == 0 and m4 == 0, which is expected
103+
// or if unbiased and not enough samples (count < 3).
102104
if (bias) {
103105
result = count * m4 / (m2 * m2) - 3;
104106
} else {

python/pyarrow/tests/test_compute.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -3841,12 +3841,19 @@ def test_pivot_wider():
38413841

38423842

38433843
@pytest.mark.pandas
3844-
def test_unbiased_skew_and_kurtosis():
3844+
@pytest.mark.parametrize("input", ([1.0, 2.0, 3.0, 40.0, None], [1, 40]))
3845+
def test_unbiased_skew_and_kurtosis(input):
38453846
# Validate computing unbiased skew and kurtosis matches pandas
3846-
input = [1.0, 2.0, 3.0, 40.0, None]
38473847
arrow_skew = pc.skew(input, skip_nulls=True, bias=False)
38483848
pandas_skew = pd.Series(np.array(input)).skew(skipna=True)
3849-
assert arrow_skew == pa.scalar(pandas_skew)
38503849
arrow_kurtosis = pc.kurtosis(input, skip_nulls=True, bias=False)
38513850
pandas_kurtosis = pd.Series(np.array(input)).kurtosis(skipna=True)
3852-
assert arrow_kurtosis == pa.scalar(pandas_kurtosis)
3851+
3852+
if len(input) > 2:
3853+
assert arrow_skew == pa.scalar(pandas_skew)
3854+
assert arrow_kurtosis == pa.scalar(pandas_kurtosis)
3855+
else:
3856+
# Validate if not enough samples to compute skew and kurtosis
3857+
# then the result is NaN for arrow, matching pandas.
3858+
assert pc.is_nan(arrow_skew) == pc.is_nan(pandas_skew)
3859+
assert pc.is_nan(arrow_kurtosis) == pc.is_nan(pandas_kurtosis)

0 commit comments

Comments
 (0)