-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathDE_genes_and_GSVA_function.R
More file actions
117 lines (96 loc) · 3.64 KB
/
Copy pathDE_genes_and_GSVA_function.R
File metadata and controls
117 lines (96 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
## load data
.libPaths("/data2/csj/tools/Rlib")
library(Seurat)
library(dplyr)
library(monocle)
options(stringsAsFactors=FALSE)
library(reticulate)
parse_h5ad <- function(adata){
require(reticulate)
ad <- import("anndata", convert = FALSE)
ada <- ad$read_h5ad(adata)
meta <- py_to_r(ada$obs)
if(class(ada$raw$X)[1] == "scipy.sparse.csr.csr_matrix"){
exp <- t(py_to_r(ada$raw$X$toarray()))
}
else{
exp <- t(py_to_r(ada$raw$X))
}
rownames(exp) <- rownames(py_to_r(ada$raw$var))
colnames(exp) <- rownames(meta)
return(
list(
metadata = meta,
expression = exp
)
)
}
############## different expression
DEGene <- function(h5ad,Cluster1,Cluster2){
## h5ad: the result of function parse_h5ad
## Cluster1 : a vector of Cluster
## Cluster2 : a vector of Cluster
pseudocount.use =1
cell_name_1 <- rownames(h5ad$metadata[h5ad$metadata$MajorCluster %in% Cluster1,])
cell_name_2 <- rownames(h5ad$metadata[h5ad$metadata$MajorCluster %in% Cluster2,])
Expression_1 <- h5ad$expression[,cell_name_1]
Expression_2 <- h5ad$expression[,cell_name_2]
## log FC
mean_c1 <- as.data.frame(rowMeans(as.matrix(Expression_1)))
colnames(mean_c1) <- "mean_c1"
mean_c2 <- as.data.frame(rowMeans(as.matrix(Expression_2)))
colnames(mean_c2) <- "mean_c2"
log2fc <- data.frame(log2fc = log2(mean_c1$mean_c1 + pseudocount.use) - log2(mean_c2$mean_c2 + pseudocount.use))
rownames(log2fc) <- rownames(mean_c1)
log2fc$gene <- rownames(log2fc)
## wilcox test
group.info <- data.frame(row.names = c(cell_name_1, cell_name_2))
group.info[cell_name_1, "group"] <- "Group1"
group.info[cell_name_2, "group"] <- "Group2"
group.info[, "group"] <- factor(x = group.info[, "group"])
data.use <- h5ad$expression[, rownames(x = group.info), drop = FALSE]
p_val <- sapply(
X = 1:nrow(x = data.use),
FUN = function(x) {
return(wilcox.test(data.use[x, ] ~ group.info[, "group"])$p.value)
})
## BH correction
adj_p_val <- p.adjust(p_val, method="BH")
## DE table
result <- data.frame(gene=log2fc$gene, log2FC=log2fc$log2fc, Pvalue=p_val, Adj_pval=adj_p_val)
return(result)
}
##################################################### GSVA
runGSVA <- function(h5ad,Cluster1,Cluster2,kcdf="Gaussian",AdjPvalueCutoff=0.05){
## h5ad: the result of function parse_h5ad
## Cluster1 : Cluster1
## Cluster2 : Cluster2
## kcdf: kcdf="Gaussian" for continuous and 'Poisson for integer counts'
require(GSVA)
require(GSEABase)
require(GSVAdata)
require(clusterProfiler)
data(c2BroadSets)
library(limma)
expression <- h5ad$expression[,rownames(h5ad$metadata[h5ad$metadata$MajorCluster %in% c(Cluster1,Cluster2),])]
## change gene symbol to geneid
gene_entrezid <- bitr(rownames(expression), fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Hs.eg.db")
expression_filt <- expression[gene_entrezid$SYMBOL,]
rownames(expression_filt) <- gene_entrezid$ENTREZID
expression_filt <- as.matrix(expression_filt)
res_gsva <- gsva(expression_filt, c2BroadSets, parallel.sz=10,kcdf=kcdf)
annotation_col = data.frame(CellType = factor(h5ad$metadata[colnames(res_gsva),]$MajorCluster))
rownames(annotation_col) = colnames(res_gsva)
## using limma to conduct DE analysis
f <- factor(annotation_col$CellType)
design <- model.matrix(~0+f)
colnames(design) <- c("C1","C2")
rownames(design) <- colnames(res_gsva)
fit <- lmFit(res_gsva, design)
cont.matrix=makeContrasts('C1-C2',levels = design)
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)
gs <- topTable(fit2,adjust='BH', number=Inf, p.value=AdjPvalueCutoff)
gs$cluster <- ifelse(gs$t > 0 , "C1", "C2")
return(gs)
}