-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2.3_clean_taxonomic_data.R
61 lines (47 loc) · 2.01 KB
/
2.3_clean_taxonomic_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# ##############################################################################
#
## Species feature table pre-processing for all studies
#
# ##############################################################################
# Setting working directory
setwd("~/Desktop/crc_analysis/scripts") #macbook
setwd("C:/Users/Erika Dvarionaite/iCloudDrive/Desktop/crc_analysis/scripts") #windows
# Packages
library("readr")
library("stringr")
library("tidyr")
library("yaml")
library("dplyr")
library(tibble)
# ##############################################################################
# Get data
parameters <- yaml.load_file('../parameters.yaml')
all.studies <- parameters$all.studies
# Feature tables
feat.species <- read.table("../data/species/species_full_relab.tsv",
sep = "\t", stringsAsFactors = F,
header = T, check.names = F,
row.names = 1, quote ="", fill = F)
# Metadata
meta.all <- read_delim("../data/meta/meta.crc.tsv",
delim = "\t",
escape_double = FALSE,
trim_ws = TRUE)
# ##############################################################################
# Filter low abundant features
library(matrixStats)
feat.matrix <- as.matrix(feat)
temp.max.ab <- sapply(unique(meta.all$Group), FUN=function(s){
rowMaxs(feat.matrix[,meta.all %>% filter(Group == s) %>% pull(Sample_ID)])
})
f.idx = rowSums(temp.max.ab >= 1e-03) >= 1
species.filtr <- feat.matrix[f.idx,]
cat('Out of', nrow(feat), 'features,', 'retaining', sum(f.idx), 'species after low-abundance filtering...\n')
# ##############################################################################
# Save filtered species table
filtered.species <- '../data/species/filtered.species.NEW.tsv'
write.table(species.filtr, file=filtered.species, quote=FALSE, sep='\t',
row.names=TRUE, col.names=TRUE)
# #######################
# End of script
# #######################