Skip to content

Commit

Permalink
release 1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ftwkoopmans committed Jul 21, 2024
1 parent 0ae8038 commit 9239384
Show file tree
Hide file tree
Showing 32 changed files with 928 additions and 775 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Package: msdap
Title: Mass Spectrometry Downstream Analysis Pipeline
Description: Analyze label-free proteomics datasets from various sources (MaxQuant, Spectronaut, etc) using a pipeline that facilitates peptide filtering and many algorithms for normalization and statistical analysis. A comprehensive PDF report can be generated that includes many data visualizations and documentation thereof.
URL: https://github.com/ftwkoopmans/msdap
Version: 1.0.9
Version: 1.1
Authors@R:
person(given = "Frank",
family = "Koopmans",
Expand Down
4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export(differential_detect)
export(export_peptide_abundance_matrix)
export(export_protein_abundance_matrix)
export(export_statistical_results)
export(export_stats_genesummary)
export(file_check)
export(filename_strip_illegal_characters)
export(filter_dataset)
Expand Down Expand Up @@ -69,8 +70,6 @@ export(plot_sample_pca__sample_in_contrast)
export(plot_variance_explained)
export(plot_volcano)
export(print_dataset_summary)
export(protein2gene_by_symbol)
export(protein2gene_orthologs)
export(protein_eset_from_data)
export(read_textfile_compressed)
export(regex_classification)
Expand All @@ -84,7 +83,6 @@ export(rollup_pep2prot_tmp)
export(sample_color_coding__long_format)
export(sample_metadata_custom)
export(setup_contrasts)
export(summarise_stats)
export(tibble_as_eset)
export(update_protein_mapping)
export(update_protein_mapping_from_maxquant)
Expand Down
4 changes: 2 additions & 2 deletions R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ tibble_peptides_reorder = function(tib) {
#' @param peptides peptide tibble in long format
empty_protein_tibble = function(peptides) {
uprot = unique(peptides$protein_id)
return(tibble(protein_id = uprot, fasta_headers = uprot, gene_symbols_or_id = uprot))
return(tibble(protein_id = uprot, fasta_headers = uprot, gene_symbols = uprot, gene_symbols_or_id = uprot))
}


Expand Down Expand Up @@ -314,7 +314,7 @@ diffdetect_summary_prettyprint = function(dataset, use_quant = FALSE, trim_contr
# summary stats per contrast
group_by(contrast) %>%
summarise(`#proteins` = n(),
`#abs(zscore) >= 4` = sum(abs(zscore) >= 4),
`#abs(zscore) >= 6` = sum(abs(zscore) >= 6),
`top10` = tolower(paste(stringr::str_trunc(head(gene_symbols_or_id, 10), width = 10, side = "right"), collapse=", ") )) %>%
ungroup() %>%
# sort contrasts in same order as defined by user
Expand Down
12 changes: 9 additions & 3 deletions R/dea.R
Original file line number Diff line number Diff line change
Expand Up @@ -494,15 +494,21 @@ dea_results_to_wide = function(dataset) {
}

# first, get the number of peptides used in each contrast. next, add the results from each dea algorithm in each contrast
tmp = dataset$de_proteins %>% select(protein_id, dea_algorithm, contrast, foldchange.log2, tidyselect::any_of("effectsize"), pvalue, qvalue, signif)
if("effectsize" %in% colnames(tmp)) {
tmp = tmp %>% pivot_wider(names_from = c(dea_algorithm, contrast), values_from = c(foldchange.log2, effectsize, pvalue, qvalue, signif))
} else {
tmp = tmp %>% pivot_wider(names_from = c(dea_algorithm, contrast), values_from = c(foldchange.log2, pvalue, qvalue, signif))
}


tib = left_join(dataset$de_proteins %>%
select(protein_id, contrast, peptides_used_for_dea) %>%
distinct(protein_id, contrast, .keep_all = T) %>%
pivot_wider(names_from = contrast, values_from = peptides_used_for_dea, names_prefix = "peptides_used_for_dea_") %>%
replace(is.na(.), 0),
#
dataset$de_proteins %>%
select(protein_id, dea_algorithm, contrast, foldchange.log2, pvalue, qvalue, signif) %>%
pivot_wider(names_from = c(dea_algorithm, contrast), values_from = c(foldchange.log2, pvalue, qvalue, signif)),
tmp,
by="protein_id")

# if there are multiple DEA algorithms in the results, add a column that combines their results such that all proteins significant in 2 or more tests/algorithms are flagged
Expand Down
11 changes: 4 additions & 7 deletions R/export_data_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,10 @@ export_protein_abundance_matrix = function(dataset, rollup_algorithm, output_dir
m = m[ , order(match(colnames(m), dataset$samples$sample_id)), drop=F]

# add protein metadata
tib = dataset$proteins %>% inner_join(as_tibble(m) %>% add_column(protein_id = rownames(m)), by="protein_id") %>% arrange(protein_id)
if("accessions" %in% colnames(tib)) {
tib = tib %>% select(-accessions) # not useful for user, redundant with protein_id column in virtually all datasets
}
if("gene_symbols_or_id" %in% colnames(tib)) {
tib = tib %>% arrange(gene_symbols_or_id!=protein_id, gene_symbols_or_id) # proteins without gene symbol first, then sort by symbol
}
tib = dataset$proteins %>%
select(protein_id, fasta_headers, gene_symbols_or_id) %>%
inner_join(as_tibble(m) %>% add_column(protein_id = rownames(m)), by="protein_id") %>%
arrange(gene_symbols_or_id!=protein_id, gene_symbols_or_id) # proteins without gene symbol first, then sort by symbol

## write to file
# generate filename. if very long (eg; huge contrast name + long path in output_dir), try to shorting with md5 hash
Expand Down
Loading

0 comments on commit 9239384

Please sign in to comment.