Skip to content

Commit fe522fb

Browse files
committed
Start of development version, bug fixes and new gene annotation feature
1 parent 0f4485a commit fe522fb

16 files changed

+531
-19
lines changed

NEWS.html

Lines changed: 428 additions & 0 deletions
Large diffs are not rendered by default.

NEWS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# ProActive (development version)
22

3-
# Changes in 0.1.0
3+
# Changes in 0.1.0.9000
44
+ Changed name of `ProActive()` to `ProActiveDetect()`
55
+ Added ability to search read coverage plots for gene annotations of
66
interest with the geneAnnotationSearch() function
77
+ Fixed bug with pattern-match start and stop positions on genome/contig chunks
8+
+ Bug fixes with contig chunking parameter
9+
10+
# Version 0.1.0 submitted to CRAN!
811

912
# Changes in 0.0.2
1013
+ Changes for CRAN re-submission after review

ProphageActivityTool.Rproj

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Version: 1.0
2+
3+
RestoreWorkspace: Default
4+
SaveWorkspace: Default
5+
AlwaysSaveHistory: Default
6+
7+
EnableCodeIndexing: Yes
8+
UseSpacesForTab: Yes
9+
NumSpacesForTab: 2
10+
Encoding: UTF-8
11+
12+
RnwWeave: Sweave
13+
LaTeX: pdfLaTeX
14+
15+
AutoAppendNewline: Yes
16+
StripTrailingWhitespace: Yes
17+
18+
BuildType: Package
19+
PackageUseDevtools: Yes
20+
PackageInstallArgs: --no-multiarch --with-keep.source
21+
PackageRoxygenize: rd,collate,namespace

R/ProActiveDetect.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ ProActiveDetect <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContig
7575
pileup <- contigChunks(pileup, chunkSize)
7676
}
7777
if(verbose){message("Starting pattern-matching...")}
78-
patternMatchSummary <- patternMatcher(pileup, windowSize, minSize, maxSize, mode, minContigLength, verbose)
78+
patternMatchSummary <- patternMatcher(pileup, windowSize, minSize, maxSize, mode, minContigLength, chunkContigs, verbose)
7979
if (IncludeNoPatterns) {
8080
classifList <- patternMatchSummary[[1]]
8181
} else {
8282
classifList <- removeNoPatterns(patternMatchSummary[[1]])
8383
}
8484
filteredOutContigsDf <- patternMatchSummary[[2]]
8585
if(verbose){message("Summarizing pattern-matching results")}
86-
summaryTable <- classifSumm(pileup, patternMatchSummary[[1]], windowSize, mode, chunkSize)
86+
summaryTable <- classifSumm(pileup, patternMatchSummary[[1]], windowSize, mode, chunkContigs, chunkSize)
8787
if (missing(gffTSV) == FALSE) {
8888
if(verbose){message("Finding gene predictions in elevated or gapped regions of read coverage...")}
8989
elevGapSummList <- removeNoPatterns(patternMatchSummary[[1]])
@@ -116,7 +116,7 @@ ProActiveDetect <- function(pileup, mode, gffTSV, windowSize = 1000, chunkContig
116116
table <- (table(summaryTable[, 2]))
117117
if(verbose){message(paste0(capture.output(table), collapse = "\n"))}
118118
if(mode == "genome" || (mode == "metagenome" & chunkContigs == TRUE)){
119-
linkChunks(classifList, pileup, windowSize, mode, verbose)
119+
linkChunks(classifList, pileup, windowSize, mode, chunkContigs, verbose)
120120
}
121121
if (missing(saveFilesTo) == FALSE) {
122122
ifelse(!dir.exists(paths = paste0(saveFilesTo, "\\ProActiveOutput")),

R/bestMatchListFunctions.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@
1010
#' Options are 100, 200, 500, 1000 ONLY. Default is 1000.
1111
#' @param chunkSize If `mode`="genome" OR if `mode`="metagenome" and `chunkContigs`=TRUE,
1212
#' chunk the genome or contigs, respectively, into smaller subsets for pattern-matching.
13+
#' @param chunkContigs TRUE or FALSE, If TRUE and `mode`="metagenome", contigs longer
14+
#' than the `chunkSize` will be 'chunked' into smaller subsets and pattern-matching
15+
#' will be performed on each subset. Default is FALSE.
1316
#' @param mode Either "genome" or "metagenome"
1417
#' @keywords internal
15-
classifSumm <- function(pileup, bestMatchList, windowSize, mode, chunkSize) {
18+
classifSumm <- function(pileup, bestMatchList, windowSize, mode, chunkContigs, chunkSize) {
1619
if (length(bestMatchList) == 0) {
1720
stop("No pattern-matches detected")
1821
}
@@ -34,7 +37,7 @@ classifSumm <- function(pileup, bestMatchList, windowSize, mode, chunkSize) {
3437
classification <- vapply(seq_along(bestMatchList), function(i) {bestMatchList[[i]][[7]]}, character(1))
3538
matchSize <- vapply(seq_along(bestMatchList), function(i) {
3639
pileupSubset <- pileup[which(pileup[, 1] == bestMatchList[[i]][[8]]), ]
37-
pileupSubset <- changewindowSize(pileupSubset, windowSize, mode)
40+
pileupSubset <- changewindowSize(pileupSubset, windowSize, chunkContigs, mode)
3841
(length(seq(pileupSubset[bestMatchList[[i]][[4]], 3], pileupSubset[bestMatchList[[i]][[5]], 3], windowSize)) - 1) * windowSize},
3942
numeric(1))
4043
classifSummTable <- cbind.data.frame(refName, classification, elevRatio, startPos, endPos, matchSize)

R/chunkingFunctions.R

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ contigChunks <- function(pileup, chunkSize) {
5858
#' 100 bp windows/bins.
5959
#' @param chunkSize If `mode`="genome" OR if `mode`="metagenome" and `chunkContigs`=TRUE,
6060
#' chunk the genome or contigs, respectively, into smaller subsets for pattern-matching.
61-
#' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 50000.
61+
#' `chunkSize` determines the size (in bp) of each 'chunk'. Default is 100000.
6262
#' @keywords internal
6363
genomeChunks <- function(pileup, chunkSize) {
6464
refName <- rep(NA, nrow(pileup))
@@ -99,10 +99,13 @@ genomeChunks <- function(pileup, chunkSize) {
9999
#' 100 bp windows/bins.
100100
#' @param windowSize The number of basepairs to average read coverage values over.
101101
#' @param mode Either "genome" or "metagenome"
102+
#' @param chunkContigs TRUE or FALSE, If TRUE and `mode`="metagenome", contigs longer
103+
#' than the `chunkSize` will be 'chunked' into smaller subsets and pattern-matching
104+
#' will be performed on each subset. Default is FALSE.
102105
#' @param verbose TRUE or FALSE. Print progress messages to console. Default is TRUE.
103106
#' @importFrom stringr str_extract
104107
#' @keywords internal
105-
linkChunks <- function(bestMatchList, pileup, windowSize, mode, verbose){
108+
linkChunks <- function(bestMatchList, pileup, windowSize, mode, chunkContigs, verbose){
106109
potLink <- rep(NA, length(bestMatchList))
107110
refNames <- vapply(seq_along(bestMatchList), function(i){bestMatchList[[i]][[8]]}, character(1))
108111
classifVector <- vapply(seq_along(bestMatchList), function(i){
@@ -111,7 +114,7 @@ linkChunks <- function(bestMatchList, pileup, windowSize, mode, verbose){
111114
startPos <- bestMatchList[[i]][[4]]
112115
endPos <- bestMatchList[[i]][[5]]
113116
pileupSubset <- pileup[which(pileup[, 1] == refName), ]
114-
pileupSubset <- changewindowSize(pileupSubset, windowSize, mode)
117+
pileupSubset <- changewindowSize(pileupSubset, windowSize, chunkContigs, mode)
115118
if(grepl("chunk", refName) == FALSE) {
116119
"NoChunk"
117120
} else if(classification == "NoPattern") {
@@ -140,7 +143,7 @@ linkChunks <- function(bestMatchList, pileup, windowSize, mode, verbose){
140143
classifDf[rightIdx,3] <- "no"
141144
}
142145
}
143-
if(verbose == TRUE){
146+
if(verbose){
144147
if("link" %in% classifDf[,3]){
145148
linkIdxs <- which(classifDf[,3] == "link")
146149
lapply(seq_along(linkIdxs), function(x){

R/patternMatcher.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@
1414
#' @param mode Either "genome" or "metagenome".
1515
#' @param minContigLength The minimum contig/chunk size (in bp) to perform pattern-matching
1616
#' on. Default is 25000.
17+
#' @param chunkContigs TRUE or FALSE, If TRUE and `mode`="metagenome", contigs longer
18+
#' than the `chunkSize` will be 'chunked' into smaller subsets and pattern-matching
19+
#' will be performed on each subset. Default is FALSE.
1720
#' @param verbose TRUE or FALSE. Print progress messages to console. Default is TRUE.
1821
#' @importFrom stats na.omit
1922
#' @keywords internal
20-
patternMatcher <- function(pileup, windowSize, minSize, maxSize, mode, minContigLength, verbose) {
23+
patternMatcher <- function(pileup, windowSize, minSize, maxSize, mode, minContigLength, chunkContigs, verbose) {
2124
refNames <- unique(pileup[, 1])
2225
bestMatchList <- vector(mode='list', length=length(refNames))
2326
filteredOutContigs <- rep(NA, length(refNames))
@@ -51,7 +54,7 @@ patternMatcher <- function(pileup, windowSize, minSize, maxSize, mode, minContig
5154
C <- C + 1
5255
next
5356
}
54-
pileupSubset <- changewindowSize(pileupSubset, windowSize, mode)
57+
pileupSubset <- changewindowSize(pileupSubset, windowSize, chunkContigs, mode)
5558
noPatternBestMatch <- noPattern(pileupSubset)
5659
partialElevBestMatch <- partialElevGap(pileupSubset, windowSize, minSize, maxSize)
5760
fullElevBestMatch <- fullElevGap(pileupSubset, windowSize, minSize, maxSize, "Elevation")

R/plotProActiveResults.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ plotProActiveResults <- function(pileup, ProActiveResults, elevFilter, saveFiles
4242
return(NULL)
4343
}
4444
pileupSubset <- pileup[which(pileup[, 1] == refName), ]
45-
pileupSubset <- changewindowSize(pileupSubset, windowSize, mode)
45+
pileupSubset <- changewindowSize(pileupSubset, windowSize, contigChunk, mode)
4646
patternMatch <- patternBuilder(pileupSubset, patternMatches[[i]])
4747
matchLength <- matchInfo[, 6]
4848
plot <- ggplot(data = patternMatch, aes(x = position, y = coverage)) +

R/reformatInputFunctions.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@
66
#' currently being assessed.
77
#' @param windowSize The number of basepairs to average read coverage values over.
88
#' Options are 100, 200, 500, 1000 ONLY. Default is 1000.
9+
#' @param chunkContigs TRUE or FALSE, If TRUE and `mode`="metagenome", contigs longer
10+
#' than the `chunkSize` will be 'chunked' into smaller subsets and pattern-matching
11+
#' will be performed on each subset. Default is FALSE.
912
#' @param mode Either "genome" or "metagenome"
1013
#' @keywords internal
11-
changewindowSize <- function(pileupSubset, windowSize, mode) {
14+
changewindowSize <- function(pileupSubset, windowSize, chunkContigs, mode) {
1215
coverage <- vector()
1316
X <- 0
1417
Y <- windowSize / 100
@@ -18,7 +21,7 @@ changewindowSize <- function(pileupSubset, windowSize, mode) {
1821
Y <- Y + (windowSize / 100)
1922
if (Y > nrow(pileupSubset)) break
2023
}
21-
if (mode == "genome") {
24+
if (mode == "genome" || chunkContigs == TRUE) {
2225
position <- seq(pileupSubset[1, 3], pileupSubset[nrow(pileupSubset), 3], length.out = length(coverage))
2326
} else {
2427
position <- seq(windowSize, length(coverage) * windowSize, windowSize)

cran-comments.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## Resubmission
2+
This is a resubmission. In this version (0.1.0) I have:
3+
4+
* Changed name of `ProActive()` to `ProActiveDetect()`
5+
6+
* Added geneAnnotationSearch() to main functions available to users
7+
8+
* Fixed bug with pattern-match start and stop positions on genome/contig chunks
9+
10+
## Resubmission
11+
This is a resubmission. In this version (0.0.2) I have:
12+
13+
* Changed description text so it does not start with 'this package does..'
14+
15+
* Removed all use of directed quotation marks in description text
16+
17+
* Fixed a bug where use of the elevFilter parameter in the plotProActiveResults
18+
function produced an error message
19+
20+
## R CMD check results
21+
22+
0 errors | 0 warnings | 1 note
23+
24+
* This is a new release.

0 commit comments

Comments
 (0)