Skip to content

Commit 672a4dd

Browse files
authored
Specify reference level when comparing two groups/levels (#666)
Closes #665
1 parent ed102df commit 672a4dd

19 files changed

+308
-91
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: effectsize
33
Title: Indices of Effect Size
4-
Version: 1.0.0
4+
Version: 1.0.0.1
55
Authors@R:
66
c(person(given = "Mattan S.",
77
family = "Ben-Shachar",

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# effectsize 1.0.x
2+
3+
- `cohens_d()`, `p_superiority()`, `rank_biserial()` and their relatives gain a `reference=` argument to control which level of the group variable should be treated as the reference (thanks @profandyfield for the suggestion).
4+
15
# effectsize 1.0.0
26

37
***First stable release of `{effectsize}`!***

R/cohens_d.R

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Cohen's *d* and Other Standardized Differences
22
#'
3-
#' Compute effect size indices for standardized differences: Cohen's *d*,
3+
#' Compute effect size indices for standardized mean differences: Cohen's *d*,
44
#' Hedges' *g* and Glass’s *delta* (\eqn{\Delta}). (This function returns the
55
#' **population** estimate.) Pair with any reported [`stats::t.test()`].
66
#' \cr\cr
@@ -9,7 +9,7 @@
99
#' correction for small-sample bias (using the exact method) to Cohen's *d*. For
1010
#' sample sizes > 20, the results for both statistics are roughly equivalent.
1111
#' Glass’s *delta* is appropriate when the standard deviations are significantly
12-
#' different between the populations, as it uses only the *second* group's
12+
#' different between the populations, as it uses only the reference group's
1313
#' standard deviation.
1414
#'
1515
#' @param x,y A numeric vector, or a character name of one in `data`.
@@ -29,6 +29,8 @@
2929
#' @param adjust Should the effect size be adjusted for small-sample bias using
3030
#' Hedges' method? Note that `hedges_g()` is an alias for
3131
#' `cohens_d(adjust = TRUE)`.
32+
#' @param reference (Optional) character value of the "group" used as the
33+
#' reference. By default, the _second_ group is the reference group.
3234
#' @param ... Arguments passed to or from other methods. When `x` is a formula,
3335
#' these can be `subset` and `na.action`.
3436
#' @inheritParams chisq_to_phi
@@ -136,6 +138,7 @@
136138
#' @export
137139
cohens_d <- function(x, y = NULL, data = NULL,
138140
pooled_sd = TRUE, mu = 0, paired = FALSE,
141+
reference = NULL,
139142
adjust = FALSE,
140143
ci = 0.95, alternative = "two.sided",
141144
verbose = TRUE, ...) {
@@ -147,6 +150,7 @@ cohens_d <- function(x, y = NULL, data = NULL,
147150
y = y, data = data,
148151
type = "d", adjust = adjust,
149152
pooled_sd = pooled_sd, mu = mu, paired = paired,
153+
reference = reference,
150154
ci = ci, alternative = alternative,
151155
verbose = verbose,
152156
...
@@ -157,6 +161,7 @@ cohens_d <- function(x, y = NULL, data = NULL,
157161
#' @export
158162
hedges_g <- function(x, y = NULL, data = NULL,
159163
pooled_sd = TRUE, mu = 0, paired = FALSE,
164+
reference = NULL,
160165
ci = 0.95, alternative = "two.sided",
161166
verbose = TRUE, ...) {
162167
cl <- match.call()
@@ -169,13 +174,15 @@ hedges_g <- function(x, y = NULL, data = NULL,
169174
#' @export
170175
glass_delta <- function(x, y = NULL, data = NULL,
171176
mu = 0, adjust = TRUE,
177+
reference = NULL,
172178
ci = 0.95, alternative = "two.sided",
173179
verbose = TRUE, ...) {
174180
.effect_size_difference(
175181
x,
176182
y = y, data = data,
177183
type = "delta",
178184
mu = mu, adjust = adjust,
185+
reference = reference,
179186
ci = ci, alternative = alternative,
180187
verbose = verbose,
181188
pooled_sd = NULL, paired = FALSE,
@@ -189,10 +196,12 @@ glass_delta <- function(x, y = NULL, data = NULL,
189196
.effect_size_difference <- function(x, y = NULL, data = NULL,
190197
type = "d", adjust = FALSE,
191198
mu = 0, pooled_sd = TRUE, paired = FALSE,
199+
reference = NULL,
192200
ci = 0.95, alternative = "two.sided",
193201
verbose = TRUE, ...) {
194202
if (type == "d" && adjust) type <- "g"
195203

204+
# TODO: Check if we can do anything with `reference` for these classes
196205
if (type != "delta") {
197206
if (.is_htest_of_type(x, "t-test")) {
198207
return(effectsize(x, type = type, verbose = verbose, data = data, ...))
@@ -203,7 +212,7 @@ glass_delta <- function(x, y = NULL, data = NULL,
203212

204213

205214
alternative <- .match.alt(alternative)
206-
out <- .get_data_2_samples(x, y, data, paired = paired, verbose = verbose, ...)
215+
out <- .get_data_2_samples(x, y, data, paired = paired, reference = reference, verbose = verbose, ...)
207216
x <- out[["x"]]
208217
y <- out[["y"]]
209218
paired <- out[["paired"]]
@@ -308,7 +317,7 @@ glass_delta <- function(x, y = NULL, data = NULL,
308317
paired, pooled_sd, mu, ci, ci_method, alternative, adjust,
309318
approximate = FALSE
310319
)
311-
return(out)
320+
out
312321
}
313322

314323
#' @keywords internal

R/common_language.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
#'
4242
#' Where \eqn{U_1}, \eqn{U_2}, and *Overlap* are agnostic to the direction of
4343
#' the difference between the groups, \eqn{U_3} and probability of superiority
44-
#' are not.
44+
#' are not (this can be controlled with the `reference` argument).
4545
#'
4646
#' The parametric version of these effects assumes normality of both populations
4747
#' and homoscedasticity. If those are not met, the non parametric versions
@@ -111,6 +111,7 @@
111111
#' @aliases cles
112112
p_superiority <- function(x, y = NULL, data = NULL,
113113
mu = 0, paired = FALSE, parametric = TRUE,
114+
reference = NULL,
114115
ci = 0.95, alternative = "two.sided",
115116
verbose = TRUE, ...) {
116117
if (.is_htest_of_type(x, "(t-test|Wilcoxon)", "t-test or a Wilcoxon-test")) {
@@ -120,7 +121,7 @@ p_superiority <- function(x, y = NULL, data = NULL,
120121
}
121122

122123
data <- .get_data_2_samples(x, y, data,
123-
paired = paired,
124+
paired = paired, reference = reference,
124125
allow_ordered = !parametric,
125126
verbose = verbose, ...
126127
)
@@ -244,6 +245,7 @@ cohens_u2 <- function(x, y = NULL, data = NULL,
244245
#' @rdname p_superiority
245246
cohens_u3 <- function(x, y = NULL, data = NULL,
246247
mu = 0, parametric = TRUE,
248+
reference = NULL,
247249
ci = 0.95, alternative = "two.sided", iterations = 200,
248250
verbose = TRUE, ...) {
249251
if (.is_htest_of_type(x, "(t-test|Wilcoxon)", "t-test or a Wilcoxon-test")) {
@@ -254,7 +256,7 @@ cohens_u3 <- function(x, y = NULL, data = NULL,
254256

255257

256258
data <- .get_data_2_samples(x, y, data,
257-
allow_ordered = !parametric,
259+
allow_ordered = !parametric, reference = reference,
258260
verbose = verbose, ...
259261
)
260262
x <- data[["x"]]
@@ -455,13 +457,13 @@ wmw_odds <- function(x, y = NULL, data = NULL,
455457

456458
out$CI <- ci
457459

458-
R <- boot::boot(
460+
res <- boot::boot(
459461
data = d,
460462
statistic = est,
461463
R = iterations
462464
)
463465

464-
bCI <- boot::boot.ci(R, conf = ci, type = "perc")[["percent"]]
466+
bCI <- boot::boot.ci(res, conf = ci, type = "perc")[["percent"]]
465467
bCI <- utils::tail(as.vector(bCI), 2)
466468
out$CI_low <- bCI[1]
467469
out$CI_high <- bCI[2]
@@ -478,5 +480,5 @@ wmw_odds <- function(x, y = NULL, data = NULL,
478480
approximate = TRUE,
479481
table_footer = "Non-parametric CLES"
480482
)
481-
return(out)
483+
out
482484
}

R/means_ratio.R

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
#'
1515
#' @details
1616
#' The Means Ratio ranges from 0 to \eqn{\infty}, with values smaller than 1
17-
#' indicating that the second mean is larger than the first, values larger than
18-
#' 1 indicating that the second mean is smaller than the first, and values of 1
17+
#' indicating that the mean of the reference group is larger, values larger than
18+
#' 1 indicating that the mean of the reference group is smaller, and values of 1
1919
#' indicating that the means are equal.
2020
#'
2121
#' # Confidence (Compatibility) Intervals (CIs)
@@ -63,15 +63,16 @@
6363
#' @export
6464
means_ratio <- function(x, y = NULL, data = NULL,
6565
paired = FALSE, adjust = TRUE, log = FALSE,
66+
reference = NULL,
6667
ci = 0.95, alternative = "two.sided",
6768
verbose = TRUE, ...) {
6869
alternative <- .match.alt(alternative)
6970

7071
## Prep data
7172
out <- .get_data_2_samples(
7273
x = x, y = y, data = data,
74+
paired = paired, reference = reference,
7375
verbose = verbose,
74-
paired = paired,
7576
...
7677
)
7778
x <- out[["x"]]
@@ -104,14 +105,14 @@ means_ratio <- function(x, y = NULL, data = NULL,
104105

105106
# Calc log RR
106107
log_val <- .logrom_calc(
107-
paired = TRUE,
108108
m1 = m1,
109109
sd1 = sd1,
110110
m2 = m2,
111111
sd2 = sd2,
112112
n1 = n,
113113
r = r,
114-
adjust = adjust
114+
adjust = adjust,
115+
paired = TRUE
115116
)
116117
} else {
117118
## ------------------------ 2-sample case -------------------------
@@ -121,14 +122,14 @@ means_ratio <- function(x, y = NULL, data = NULL,
121122

122123
# Calc log RR
123124
log_val <- .logrom_calc(
124-
paired = FALSE,
125125
m1 = m1,
126126
sd1 = sd1,
127127
n1 = n1,
128128
m2 = m2,
129129
sd2 = sd2,
130130
n2 = n2,
131-
adjust = adjust
131+
adjust = adjust,
132+
paired = FALSE
132133
)
133134
}
134135

@@ -175,44 +176,44 @@ means_ratio <- function(x, y = NULL, data = NULL,
175176
mu = 0,
176177
approximate = TRUE
177178
)
178-
return(out)
179+
out
179180
}
180181

181182

182183
#' @keywords internal
183-
.logrom_calc <- function(paired = FALSE,
184-
m1,
184+
.logrom_calc <- function(m1,
185185
sd1,
186186
n1,
187187
m2,
188188
sd2,
189189
n2 = n1,
190190
r = NULL,
191-
adjust = TRUE) {
191+
adjust = TRUE,
192+
paired = FALSE) {
192193
if (isTRUE(paired)) {
193-
yi <- log(m1 / m2)
194-
vi <-
194+
y_i <- log(m1 / m2)
195+
v_i <-
195196
sd1^2 / (n1 * m1^2) +
196197
sd2^2 / (n1 * m2^2) -
197198
2 * r * sd1 * sd2 / (m1 * m2 * n1)
198199
} else {
199-
yi <- log(m1 / m2)
200+
y_i <- log(m1 / m2)
200201
### large sample approximation to the sampling variance (does not assume homoscedasticity)
201-
vi <- sd1^2 / (n1 * m1^2) + sd2^2 / (n2 * m2^2)
202+
v_i <- sd1^2 / (n1 * m1^2) + sd2^2 / (n2 * m2^2)
202203
}
203204

204205

205206
if (isTRUE(adjust)) {
206207
J <- 0.5 * (sd1^2 / (n1 * m1^2) - sd2^2 / (n2 * m2^2))
207-
yi <- yi + J
208+
y_i <- y_i + J
208209

209210
Jvar <- 0.5 * (sd1^4 / (n1^2 * m1^4) - sd2^4 / (n2^2 * m2^4))
210-
vi <- vi + Jvar
211+
v_i <- v_i + Jvar
211212
}
212213

213214

214215
list(
215-
log_rom = yi,
216-
var_rom = vi
216+
log_rom = y_i,
217+
var_rom = v_i
217218
)
218219
}

R/rank_diff.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
#' @export
121121
rank_biserial <- function(x, y = NULL, data = NULL,
122122
mu = 0, paired = FALSE,
123+
reference = NULL,
123124
ci = 0.95, alternative = "two.sided",
124125
verbose = TRUE, ...) {
125126
alternative <- .match.alt(alternative)
@@ -131,6 +132,7 @@ rank_biserial <- function(x, y = NULL, data = NULL,
131132
## Prep data
132133
out <- .get_data_2_samples(x, y, data,
133134
paired = paired,
135+
reference = reference,
134136
allow_ordered = TRUE,
135137
verbose = verbose, ...
136138
)
@@ -201,19 +203,21 @@ rank_biserial <- function(x, y = NULL, data = NULL,
201203
attr(out, "ci_method") <- ci_method
202204
attr(out, "approximate") <- FALSE
203205
attr(out, "alternative") <- alternative
204-
return(out)
206+
out
205207
}
206208

207209
#' @export
208210
#' @rdname rank_biserial
209211
cliffs_delta <- function(x, y = NULL, data = NULL,
210212
mu = 0,
213+
reference = NULL,
211214
ci = 0.95, alternative = "two.sided",
212215
verbose = TRUE, ...) {
213216
cl <- match.call()
214217
data <- .get_data_2_samples(x, y, data,
215218
verbose = verbose,
216219
allow_ordered = TRUE,
220+
reference = reference,
217221
...
218222
)
219223
x <- data$x
@@ -255,5 +259,5 @@ cliffs_delta <- function(x, y = NULL, data = NULL,
255259

256260
u_ <- U1 / S
257261
f_ <- U2 / S
258-
return(u_ - f_)
262+
u_ - f_
259263
}

R/repeated_measures_d.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ repeated_measures_d <- function(x, y,
157157
data = NULL,
158158
mu = 0, method = c("rm", "av", "z", "b", "d", "r"),
159159
adjust = TRUE,
160+
reference = NULL,
160161
ci = 0.95, alternative = "two.sided",
161162
verbose = TRUE, ...) {
162163
method <- match.arg(method)
@@ -165,7 +166,11 @@ repeated_measures_d <- function(x, y,
165166
}
166167

167168
alternative <- .match.alt(alternative)
168-
data <- .get_data_paired(x, y, data = data, method = method, verbose = verbose, ...)
169+
data <- .get_data_paired(x, y,
170+
data = data, method = method,
171+
reference = reference,
172+
verbose = verbose, ...
173+
)
169174

170175
if (method %in% c("d", "r")) {
171176
values <- .replication_d(data, mu = mu, method = method)
@@ -217,7 +222,7 @@ repeated_measures_d <- function(x, y,
217222
mu, ci, ci_method, alternative,
218223
approximate = FALSE
219224
)
220-
return(out)
225+
out
221226
}
222227

223228
#' @rdname repeated_measures_d

0 commit comments

Comments
 (0)