-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathSPRT2.r
138 lines (138 loc) · 5.2 KB
/
SPRT2.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#
# SPRT2.r
# Classical SPRT
# See https://en.wikipedia.org/wiki/Sequential_probability_ratio_test
# and Kendall, Stuart, & Ord Vol. II Chapter 24.
# 9 October 2015, WAH @QD
# Updated 14 October 2015: fixed the position of the decision point in the
# plots and included the threshold lines in them.
setwd("~/Rank Dynamics/Development/R Workspace/SPRT") # Set working directory
#==============================================================================#
SPRT <- function(x, p.0=1/2, p.A=1/2, alpha=0.05, beta=0.05, LR=0) {
#
# `x` is an array of counts: positive counts for successes, negative
# counts for failures.
# `p.0` is the null hypothesis (chance of success).
# `p.A` is the alternative hypothesis.
# `alpha` is an upper bound on the false positive rate.
# `beta` is an upper bound on the false negative rate.
# `LR` is the previous log likelihood ratio.
#
likelihood.log <- function(y, p) y * ifelse(y > 0, log(p), -log(1-p))
lr <- cumsum(likelihood.log(x, p.A) - likelihood.log(x, p.0)) + LR
a <- log(beta/(1-alpha))
b <- log((1-beta)/alpha)
i.stop <- which(lr <= a | lr >= b)
if (length(i.stop) >= 1) {
i.stop <- min(i.stop)
result <- ifelse(lr[i.stop] <= a, "Accept", "Reject")
} else {
i.stop <- Inf
result <- "Continue"
}
return(list(Stop=i.stop, Result=result))
}
#==============================================================================#
#
# Actual data.
#
x.df <- read.csv("Ebay_interleave.csv")
#x.df <- read.csv("shopwiki_interleave.csv")
riffle <- function(x, y) as.vector(rbind(x, y))
x <- riffle(-x.df$Control, x.df$Test) # For each record, deduct all control events before processing test events
truex <- c(read.csv("Ebay_interleave_stream.csv"))
#truex <- c(read.csv("shopwiki_interleave_stream.csv"))
truex <- truex$Stream
#
# Run the test at an odds ratio of 'odds'.
#
odds <- 1.1
alpha <- 0.05
beta <- 0.05
p.0 <- 1/2
(p.A <- 1/ ( 1 + 1/odds ))
a <- log(1-beta) - log(alpha)
b <- log(beta) - log(1-alpha)
s <- log(p.0) - log(p.A) + log(1-p.A) - log(1-p.0)
u <- log(1-p.0) - log(1-p.A)
result <- SPRT(x, p.0=p.0, p.A = p.A, alpha=alpha, beta=beta)
#
# Display the estimate of p.
#
n.C <- sum(x.df$Control)
n.T <- sum(x.df$Test)
cat("Estimated proportion is", format(n.T / (n.C + n.T), digits=3), "\n")
#
# Compute a p-value as if this were a designed experiment.
#
cat("p-value is", format(pbinom(n.T, n.T+n.C, 1/2, lower.tail=FALSE), digits=3), "\n")
#
# Plot everything.
#
par(mfrow=c(1,1))
action <- ifelse(result$Result=="Continue", "Sampling", "The Null Hypothesis")
action <- paste(result$Result, action)
n <- cumsum(abs(x))
y <- cumsum(x)
usr <- par("usr")
pin <- par("pin")
asp <- diff(usr[1:2]) / diff(usr[3:4]) * pin[2] / pin[1] # Aspect ratio
mu <- -2*u/s - 1 # Slope of lines
srt <- atan(mu * asp) * 180 / pi
plot(cumsum(abs(truex)), cumsum(truex), type="l", col="Gray",
xlab="Trials = Clicks (1 dot = 1 day)", ylab="Cumulative Treatment Minus Control Clicks",
ylim=c(-25, 200),
main="SPRT: Significant User Preference for Dynamically Ranked Results")
mtext(bquote(paste(H[0], ": p=", .(p.0), "; ", H[A], ": p=", .(round(p.A, 3)),
" - Clicks beyond top 10 results for queries with 200+ results")))
x.0 <- crossprod(range(n), c(1,2)/3)
text(x.0, 2*a/s + mu*x.0, "Accept", pos=1, col="Blue", srt=srt)
x.0 <- crossprod(range(n), c(2,1)/3)
text(x.0, 2*b/s + mu*x.0, "Reject", pos=3, col="Red", srt=srt)
abline(c(a, -u - s/2) * 2/s, col="Blue", lwd=2, lty=3)
abline(c(b, -u - s/2) * 2/s, col="Red", lwd=2, lty=3)
points(cumsum(x.df$Trials), cumsum(x.df$Test - x.df$Control), pch=16, cex=0.7)
col <- ifelse(result$Result=="Reject", "Red", "Blue")
if (result$Result != "Continue") {
abline(v = n[result$Stop], col=col) #horizontal line at accept/reject decision
i <- cumsum(x.df$Trials) >= n[result$Stop]
points(cumsum(x.df$Trials)[i], cumsum(x.df$Test - x.df$Control)[i], pch=16, cex=0.8, col=col)
}
#==============================================================================#
# #
# # Code for testing SPRT.
# #
# p.0 <- 0.50
# p.A <- 0.55
# alpha <- 0.05
# beta <- 0.05
# a <- log(1-beta) - log(alpha)
# b <- log(beta) - log(1-alpha)
# s <- log(p.0) - log(p.A) + log(1-p.A) - log(1-p.0)
# u <- log(1-p.0) - log(1-p.A)
#
# par(mfcol=c(2,5))
# set.seed(17)
# n <- 500
# for (i in 1:5) {
# #
# # Example 1: generate a series according to the null.
# #
# x <- 2*(runif(n) < 1/2) - 1
# result <- SPRT(x, p.A=p.A, alpha=alpha, beta=beta)
# plot(cumsum(x), cex=1/2, main="Null", sub=result$Result)
# abline(c(a, -u - s/2)* 2/s, col="Blue", lwd=2, lty=3)
# abline(c(b, -u - s/2)* 2/s, col="Red", lwd=2, lty=3)
# col <- ifelse(result$Result=="Reject", "Red", "Blue")
# if (result$Result != "Continue") abline(v = result$Stop, col=col)
# #
# # Example 2: generate a series according to the alternative.
# #
# x <- 2*(runif(n) < p.A) - 1
# result <- SPRT(x, p.A=p.A)
# plot(cumsum(x), cex=1/2, main="Alternate", sub=result$Result)
# abline(c(a, -u - s/2)* 2/s, col="Blue", lwd=2, lty=3)
# abline(c(b, -u - s/2)* 2/s, col="Red", lwd=2, lty=3)
# col <- ifelse(result$Result=="Reject", "Red", "Blue")
# if (result$Result != "Continue") abline(v = result$Stop, col=col)
# }