@@ -97,7 +97,8 @@ get_FANTOM5 = function(){
97
97
mutate(`Cell type formatted` = ifelse(grepl(" Monocyte-derived macrophages 0h" , onto_value , ignore.case = T ), " macrophage" , `Cell type formatted` )) %> %
98
98
mutate(`Cell type formatted` = ifelse(grepl(" Monocyte-derived macrophages repsonse to LPS, 00hr00" , onto_value , ignore.case = T ), " macrophage" , `Cell type formatted` )) %> %
99
99
mutate(`Cell type formatted` = ifelse(grepl(" Natural Killer Cells, donor" , onto_value , ignore.case = T ), " natural_killer" , `Cell type formatted` )) %> %
100
- mutate(`Cell type formatted` = ifelse(grepl(" adipose, donor" , onto_value , ignore.case = T ), " adipose" , `Cell type formatted` )) %> %
100
+ mutate(`Cell type formatted` = ifelse(grepl(" adipose, donor" , onto_value , ignore.case = T ), " adipocyte" , `Cell type formatted` )) %> %
101
+
101
102
102
103
# filter only recognised cell types
103
104
distinct(onto_link , `Cell type` , `Cell type formatted` )
@@ -241,7 +242,7 @@ get_bloodRNA = function(){
241
242
mutate(`Cell type` = ifelse(grepl(" _Monocytes_" , file ), " monocyte" , `Cell type` )) %> %
242
243
mutate(`Cell type` = ifelse(grepl(" _Naive_Bcells_" , file ), " b_naive" , `Cell type` )) %> %
243
244
mutate(`Cell type` = ifelse(grepl(" _Neutrophils_" , file ), " neutrophil" , `Cell type` )) %> %
244
- mutate(`Cell type` = ifelse(grepl(" _Nkcells_" , file ), " nk " , `Cell type` )) %> %
245
+ mutate(`Cell type` = ifelse(grepl(" _Nkcells_" , file ), " natural_killer " , `Cell type` )) %> %
245
246
mutate(`Cell type` = ifelse(grepl(" _CD8_Tcells_" , file ), " t_CD8" , `Cell type` )) %> %
246
247
mutate(`Cell type` = ifelse(grepl(" _Mem_Bcell_" , file ), " b_memory" , `Cell type` )) %> %
247
248
filter(`Cell type` %> % is.na %> % `!` ) %> %
@@ -250,6 +251,8 @@ get_bloodRNA = function(){
250
251
mutate(sample = gsub(" _C1B73ACXX.+" , " " , file )) %> %
251
252
group_by(sample , ensembl_gene_id , `Cell type` ) %> %
252
253
summarise(`read count` = `read count` %> % median(na.rm = T )) %> %
254
+ ungroup() %> %
255
+
253
256
254
257
# Cell type formatted
255
258
mutate(`Cell type formatted` = `Cell type` ) %> %
@@ -321,13 +324,13 @@ get_ENCODE = function(){
321
324
) %> %
322
325
dplyr ::: select(gene_id , expected_count ) %> %
323
326
mutate(sample = f )
324
- } %> %
327
+ }) %> %
328
+
329
+ left_join(
330
+ (. ) %> %
331
+ separate(gene_id , c(" ENSEMBL_ID" , " dummy" ), sep = " \\ ." , remove = F ) %> %
332
+ distinct(ENSEMBL_ID , gene_id ) %> %
325
333
326
- # Add symbol
327
- spread(sample , expected_count ) %> %
328
- dplyr :: rename(raw_geneID = gene_id ) %> %
329
- separate(raw_geneID , c(" ENSEMBL_ID" , " dummy" ), sep = " \\ ." ) %> %
330
- dplyr :: select(- dummy ) %> %
331
334
mutate(
332
335
symbol =
333
336
AnnotationDbi :: mapIds(
@@ -337,12 +340,10 @@ get_ENCODE = function(){
337
340
keytype = " ENSEMBL" ,
338
341
multiVals = " first"
339
342
)
340
- ) %> %
341
- gather(sample , `read count` , - ENSEMBL_ID , - symbol )
343
+ )
342
344
) %> %
343
-
345
+ rename( `read count` = expected_count ) % > %
344
346
dplyr :: select(sample , `Cell type` , `Cell type formatted` , `read count` , symbol , ENSEMBL_ID ) %> %
345
- distinct %> %
346
347
mutate(`Data base` = " ENCODE" )
347
348
348
349
}
@@ -360,7 +361,10 @@ save(FANTOM5, file="big_data/tibble_cellType_files/FANTOM5.RData")
360
361
bloodRNA = get_bloodRNA()
361
362
save(bloodRNA , file = " big_data/tibble_cellType_files/bloodRNA.RData" )
362
363
363
- ENCODE %> % bind_rows(BLUEPRINT ) %> % bind_rows(FANTOM5 ) %> % bind_rows(bloodRNA ) %> %
364
+ ENCODE %> %
365
+ bind_rows(BLUEPRINT ) %> %
366
+ bind_rows(FANTOM5 ) %> %
367
+ bind_rows(bloodRNA ) %> %
364
368
filter(symbol %> % is.na %> % `!` ) %> %
365
369
filter(`Cell type formatted` %> % is.na %> % `!` ) %> %
366
370
mutate(`read count` = `read count` %> % as.integer ) %> %
0 commit comments