Skip to content

Commit 208fdfb

Browse files
committed
Add unbiased computation to kurtosis
1 parent 3d145d2 commit 208fdfb

File tree

3 files changed

+18
-6
lines changed

3 files changed

+18
-6
lines changed

cpp/src/arrow/compute/kernels/aggregate_var_std.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ struct StatisticImpl : public ScalarAggregator {
212212
out->value = std::make_shared<DoubleScalar>(state.moments.Skew(bias));
213213
break;
214214
case StatisticType::Kurtosis:
215-
out->value = std::make_shared<DoubleScalar>(state.moments.Kurtosis());
215+
out->value = std::make_shared<DoubleScalar>(state.moments.Kurtosis(bias));
216216
break;
217217
default:
218218
return Status::NotImplemented("Unsupported statistic type ",

cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h

+10-2
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,17 @@ struct Moments {
9696
return result;
9797
}
9898

99-
double Kurtosis() const {
99+
double Kurtosis(bool bias = true) const {
100+
double result;
100101
// This may return NaN for m2 == 0 and m4 == 0, which is expected
101-
return count * m4 / (m2 * m2) - 3;
102+
if (bias) {
103+
result = count * m4 / (m2 * m2) - 3;
104+
} else {
105+
result = 1.0 / (count - 2) / (count - 3) *
106+
((pow(count, 2) - 1.0) * (m4 / count) / pow((m2 / count), 2.0) -
107+
3 * pow((count - 1), 2.0));
108+
}
109+
return result;
102110
}
103111

104112
void MergeFrom(int level, const Moments& other) { *this = Merge(level, *this, other); }

python/pyarrow/tests/test_compute.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -3841,7 +3841,11 @@ def test_pivot_wider():
38413841

38423842

38433843
@pytest.mark.pandas
3844-
def test_biased_skew():
3845-
arrow_skew = pc.skew([1.0, 2.0, 3.0, 40.0, None], skip_nulls=True, bias=False)
3846-
pandas_skew = pd.Series(np.array([1.0, 2.0, 3.0, 40.0, np.nan])).skew(skipna=True)
3844+
def test_biased_skew_and_kurtosis():
3845+
input = [1.0, 2.0, 3.0, 40.0, None]
3846+
arrow_skew = pc.skew(input, skip_nulls=True, bias=False)
3847+
pandas_skew = pd.Series(np.array(input)).skew(skipna=True)
38473848
assert arrow_skew == pa.scalar(pandas_skew)
3849+
arrow_kurtosis = pc.kurtosis(input, skip_nulls=True, bias=False)
3850+
pandas_kurtosis = pd.Series(np.array(input)).kurtosis(skipna=True)
3851+
assert arrow_kurtosis == pa.scalar(pandas_kurtosis)

0 commit comments

Comments
 (0)