Skip to content

Commit 84ed53a

Browse files
authored
Merge pull request #13 from stemangiola/test-NB-how-many-genes-it-fits
Test nb how many genes it fits
2 parents 93e8ccf + c960ac9 commit 84ed53a

17 files changed

+187929
-13
lines changed

dev/prepocess_cellTypes_tibble.R

+17-13
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ get_FANTOM5 = function(){
9797
mutate(`Cell type formatted` = ifelse(grepl("Monocyte-derived macrophages 0h", onto_value, ignore.case = T), "macrophage", `Cell type formatted`)) %>%
9898
mutate(`Cell type formatted` = ifelse(grepl("Monocyte-derived macrophages repsonse to LPS, 00hr00", onto_value, ignore.case = T), "macrophage", `Cell type formatted`)) %>%
9999
mutate(`Cell type formatted` = ifelse(grepl("Natural Killer Cells, donor", onto_value, ignore.case = T), "natural_killer", `Cell type formatted`)) %>%
100-
mutate(`Cell type formatted` = ifelse(grepl("adipose, donor", onto_value, ignore.case = T), "adipose", `Cell type formatted`)) %>%
100+
mutate(`Cell type formatted` = ifelse(grepl("adipose, donor", onto_value, ignore.case = T), "adipocyte", `Cell type formatted`)) %>%
101+
101102

102103
# filter only recognised cell types
103104
distinct(onto_link, `Cell type`, `Cell type formatted`)
@@ -241,7 +242,7 @@ get_bloodRNA = function(){
241242
mutate(`Cell type` = ifelse(grepl("_Monocytes_", file), "monocyte", `Cell type`)) %>%
242243
mutate(`Cell type` = ifelse(grepl("_Naive_Bcells_", file), "b_naive", `Cell type`)) %>%
243244
mutate(`Cell type` = ifelse(grepl("_Neutrophils_", file), "neutrophil", `Cell type`)) %>%
244-
mutate(`Cell type` = ifelse(grepl("_Nkcells_", file), "nk", `Cell type`)) %>%
245+
mutate(`Cell type` = ifelse(grepl("_Nkcells_", file), "natural_killer", `Cell type`)) %>%
245246
mutate(`Cell type` = ifelse(grepl("_CD8_Tcells_", file), "t_CD8", `Cell type`)) %>%
246247
mutate(`Cell type` = ifelse(grepl("_Mem_Bcell_", file), "b_memory", `Cell type`)) %>%
247248
filter(`Cell type` %>% is.na %>% `!`) %>%
@@ -250,6 +251,8 @@ get_bloodRNA = function(){
250251
mutate(sample = gsub("_C1B73ACXX.+", "", file)) %>%
251252
group_by(sample, ensembl_gene_id, `Cell type`) %>%
252253
summarise(`read count` = `read count` %>% median(na.rm=T)) %>%
254+
ungroup() %>%
255+
253256

254257
# Cell type formatted
255258
mutate(`Cell type formatted` = `Cell type`) %>%
@@ -321,13 +324,13 @@ get_ENCODE = function(){
321324
) %>%
322325
dplyr:::select(gene_id, expected_count) %>%
323326
mutate(sample = f)
324-
} %>%
327+
}) %>%
328+
329+
left_join(
330+
(.) %>%
331+
separate(gene_id, c("ENSEMBL_ID", "dummy"), sep = "\\." , remove = F) %>%
332+
distinct(ENSEMBL_ID, gene_id) %>%
325333

326-
# Add symbol
327-
spread(sample, expected_count) %>%
328-
dplyr::rename(raw_geneID = gene_id) %>%
329-
separate(raw_geneID, c("ENSEMBL_ID", "dummy"), sep = "\\." ) %>%
330-
dplyr::select(-dummy) %>%
331334
mutate(
332335
symbol =
333336
AnnotationDbi::mapIds(
@@ -337,12 +340,10 @@ get_ENCODE = function(){
337340
keytype="ENSEMBL",
338341
multiVals="first"
339342
)
340-
) %>%
341-
gather(sample, `read count`, -ENSEMBL_ID, -symbol)
343+
)
342344
) %>%
343-
345+
rename(`read count` = expected_count) %>%
344346
dplyr::select(sample, `Cell type`, `Cell type formatted`, `read count`, symbol, ENSEMBL_ID) %>%
345-
distinct %>%
346347
mutate(`Data base` = "ENCODE")
347348

348349
}
@@ -360,7 +361,10 @@ save(FANTOM5, file="big_data/tibble_cellType_files/FANTOM5.RData")
360361
bloodRNA = get_bloodRNA()
361362
save(bloodRNA, file="big_data/tibble_cellType_files/bloodRNA.RData")
362363

363-
ENCODE %>% bind_rows(BLUEPRINT) %>% bind_rows(FANTOM5) %>% bind_rows(bloodRNA) %>%
364+
ENCODE %>%
365+
bind_rows(BLUEPRINT) %>%
366+
bind_rows(FANTOM5) %>%
367+
bind_rows(bloodRNA) %>%
364368
filter(symbol %>% is.na %>% `!`) %>%
365369
filter(`Cell type formatted` %>% is.na %>% `!`) %>%
366370
mutate(`read count` = `read count` %>% as.integer) %>%

0 commit comments

Comments
 (0)