Skip to content

Commit 53a47f7

Browse files
committed
Add more documentation and tests
1 parent 360bcd0 commit 53a47f7

File tree

4 files changed

+13
-1
lines changed

4 files changed

+13
-1
lines changed

cpp/src/arrow/compute/api_aggregate.h

+3
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ class ARROW_EXPORT SkewOptions : public FunctionOptions {
124124
/// If true (the default), null values are ignored. Otherwise, if any value is null,
125125
/// emit null.
126126
bool skip_nulls;
127+
/// If true (the default), the calculated value is biased. If false, the calculated
128+
/// value includes a correction factor to reduce bias, making it more accurate for
129+
/// small sample sizes.
127130
bool bias;
128131
/// If less than this many non-null values are observed, emit null.
129132
uint32_t min_count;

cpp/src/arrow/compute/kernels/aggregate_test.cc

+5
Original file line numberDiff line numberDiff line change
@@ -3693,6 +3693,11 @@ TEST_F(TestSkewKurtosis, Options) {
36933693
AssertSkewKurtosisInvalid(type, {"[]", "[]", "[]"}, options);
36943694
AssertSkewKurtosisAre(type, "[0, 1, null, 2]", options, 0.0, -1.5);
36953695
AssertSkewKurtosisAre(type, {"[0, 1]", "[]", "[null, 2]"}, options, 0.0, -1.5);
3696+
options.bias = false;
3697+
AssertSkewKurtosisAre(type, {"[0, 1]", "[]", "[null, 2]"}, options, 0.0, -NAN);
3698+
AssertSkewKurtosisAre(type, {"[1, 2, 3]", "[40, null]"}, options, 1.9889477403978211,
3699+
3.9631931024230695);
3700+
options.bias = true;
36963701
options.min_count = 3;
36973702
AssertSkewKurtosisAre(type, "[0, 1, null, 2]", options, 0.0, -1.5);
36983703
AssertSkewKurtosisAre(type, {"[0, 1]", "[]", "[null, 2]"}, options, 0.0, -1.5);

python/pyarrow/_compute.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -1920,6 +1920,9 @@ class SkewOptions(_SkewOptions):
19201920
Parameters
19211921
----------
19221922
{_skip_nulls_doc()}
1923+
bias : bool, default True
1924+
Whether the calculated value is biased.
1925+
If False, the value computed includes a corrections factor to reduce bias.
19231926
{_min_count_doc(default=0)}
19241927
"""
19251928

python/pyarrow/tests/test_compute.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3841,7 +3841,8 @@ def test_pivot_wider():
38413841

38423842

38433843
@pytest.mark.pandas
3844-
def test_biased_skew_and_kurtosis():
3844+
def test_unbiased_skew_and_kurtosis():
3845+
# Validate computing unbiased skew and kurtosis matches pandas
38453846
input = [1.0, 2.0, 3.0, 40.0, None]
38463847
arrow_skew = pc.skew(input, skip_nulls=True, bias=False)
38473848
pandas_skew = pd.Series(np.array(input)).skew(skipna=True)

0 commit comments

Comments
 (0)