CASE/R/CASE.R at 9780f5344903dead43d0f68755405acb63458248 · leaffur/CASE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#' @title CASE
#'
#' @description Perform Multi-trait Fine-mapping
#' @author Chen Lin, Hongyu Zhao
#' @details TBD
#' @references TBD
#' @param Z M * C matrix of z scores.
#' @param R M * M matrix of LD.
#' @param hatB M * C matrix of the estimated effects. Alternative summary data (together with hatS) to be provided instead of Z.
#' @param hatS M * C matrix of standard errors of the estimated effects. Alternative summary data (together with hatB) to be provided instead of Z.
#' @param N either C vector of the sample size, or C * C matrix of the sample size (diagonal) and ovelaps  (off-diagonal). If provided with a vector, CASE assumes that each pair of traits overlaps with their minimal sample size.
#' @param V (optional) C * C covariance (correlation) matrix for the noise between traits. If not provided, the default is an identity matrix representing no correlations of the error.
#' @param cs (optional) logical, whether to get credible sets. Default = TRUE.
#' @param verbose (optional) logical, whether to print logging information. Default = TRUE.
#' @param ... additional arguments.
#' @return A \code{"CASE"} object with the following elements:
#' \item{pi:}{L-vector, the prior probabilities of sharing patterns.}
#' \item{U:}{L-list of C * C matrix, the prior covariances of sharing patterns.}
#' \item{V:}{C * C matrix, the sample-adjusted phenotypical variance.}
#' \item{pip:}{M * C matrix, posterior probability of having eQTL effects per SNP per cell type.}
#' \item{post_mean:}{M * C matrix, average posterior estimates of eQTL effects per SNP per cell type.}
#' @examples
#' ## A single-trait example.
#' set.seed(3)
#' N = 500
#' M = 1000
#' X = matrix(sample(0:2, size = N * M, replace = TRUE), N, M)
#' B = rep(0, M)
#' B[1:4] = runif(4, 1, 2)
#' e = rnorm(N)
#' Y = X %*% B + e
#'
#' X = scale(X)
#' Y = scale(Y)
#' R = cor(X)
#'
#' Z = hatB = hatS = rep(0, M)
#' hatB <- as.vector(t(X) %*% Y / (N - 1))
#' hatS <- sqrt((sum(Y^2) / (N - 1)  - hatB^2 ) / (N - 1))
#' Z <- hatB / hatS
#'
#' fit <- CASE(Z = Z, R = R, N = N)
#' # print(fit$sets)
#'
#'
#' ## A multi-trait example.
#' set.seed(3)
#' N = 500
#' M = 1000
#' C = 3
#' X = matrix(sample(0:2, size = N * M, replace = TRUE), N, M)
#' B = matrix(0, M, C)
#' B[1, ] = runif(C, 1, 2)
#' B[2, 1] = runif(1, 2, 3)
#' e = matrix(rnorm(N * C), N, C)
#' Y = X %*% B + e
#'
#' X = scale(X)
#' Y = scale(Y)
#' R = cor(X)
#'
#' Z = hatB = hatS = matrix(0, M, C)
#' for (c in 1:C){
#'   hatB[, c] <- t(X) %*% Y[, c] / (N - 1)
#'   hatS[, c] <- sqrt((sum(Y[, c]^2) / (N - 1)  - hatB[, c]^2 ) / (N - 1))
#'   Z[, c] <- hatB[, c] / hatS[, c]
#' }
#'
#' fit <- CASE(Z = Z, R = R, N = N)
#' # print(fit$sets)
#' @export
CASE <- function(Z = NULL, R, hatB = NULL, hatS = NULL, N, V = NULL, cs = TRUE, verbose = TRUE, ...){
    t1 = Sys.time()
    if (is.null(Z)){
        Z = hatB / hatS
    }
    Z = as.matrix(Z)

    hatBS = transform_Z(Z, N)
    hatB = hatBS$hatB
    hatS = hatBS$hatS

    # V = estimate_null_correlation_simple(mash_set_data(do.call(rbind, raw.data$hatB), do.call(rbind, raw.data$hatS)))

    m1 <- CASE_train(hatB = hatB, hatS = hatS, V = V, R = R, N = N, Z = NULL, verbose = verbose, ...)

    res <- CASE_test(hatB = hatB, hatS = hatS, R = R, N = N, CASE_training = m1, Z = NULL, verbose = verbose, ...)
    t2 = Sys.time()
    res$time = difftime(t2, t1, units = "secs")
    if (cs){
      res$sets <- get_credible_sets(res$pip, R = R, verbose = verbose)
    }

    return(res)
}