diff --git a/bin/D2-dada copy.R b/bin/D2-dada copy.R new file mode 100755 index 0000000..99ef1be --- /dev/null +++ b/bin/D2-dada copy.R @@ -0,0 +1,354 @@ +#!/usr/bin/env Rscript +# edited after q2-dada plugin + +#################################################### +# DESCRIPTION OF ARGUMENTS # +#################################################### +# NOTE: All numeric arguments should be zero or positive. +# NOTE: All numeric arguments save maxEEF/R are expected to be integers. +# NOTE: Currently the filterered_dirF/R must already exist. +# NOTE: ALL ARGUMENTS ARE POSITIONAL! +# +### FILE SYSTEM ARGUMENTS ### +# +# 1) File path to directory with the FORWARD .fastq.gz files to be processed. +# Ex: path/to/dir/with/FWD_fastqgzs +# +# 2) File path to directory with the REVERSE .fastq.gz files to be processed. +# Ex: path/to/dir/with/REV_fastqgzs +# +# 3) File path to output tsv file. If already exists, will be overwritten. +# Ex: path/to/output_file.tsv +# +# 4) File path to tracking tsv file. If already exists, will be overwritte. +# Ex: path/to/tracking_stats.tsv +# +# 5) File path to directory to write the filtered FORWARD .fastq.gz files. These files are intermediate +# for the full workflow. Currently they remain after the script finishes. Directory must +# already exist. +# Ex: path/to/dir/with/FWD_fastqgzs/filtered +# +# 6) File path to directory to write the filtered REVERSE .fastq.gz files. These files are intermediate +# for the full workflow. Currently they remain after the script finishes. Directory must +# already exist. +# Ex: path/to/dir/with/REV_fastqgzs/filtered +# +### FILTERING ARGUMENTS ### +# +# 7) truncLenF - The position at which to truncate forward reads. Forward reads shorter +# than truncLenF will be discarded. +# Special values: 0 - no truncation or length filtering. +# Ex: 240 +# +# 8) truncLenR - The position at which to truncate reverse reads. Reverse reads shorter +# than truncLenR will be discarded. +# Special values: 0 - no truncation or length filtering. +# Ex: 160 +# +# 9) trimLeftF - The number of nucleotides to remove from the start of +# each forward read. Should be less than truncLenF. +# Ex: 0 +# +# 10) trimLeftR - The number of nucleotides to remove from the start of +# each reverse read. Should be less than truncLenR. +# Ex: 0 +# +# 11) maxEEF - Forward reads with expected errors higher than maxEEF are discarded. +# Both forward and reverse reads are independently tested. +# Ex: 2.0 +# +# 12) maxEER - Reverse reads with expected errors higher than maxEER are discarded. +# Both forward and reverse reads are independently tested. +# Ex: 2.0 +# +# 13) truncQ - Reads are truncated at the first instance of quality score truncQ. +# If the read is then shorter than truncLen, it is discarded. +# Ex: 2 +# +### CHIMERA ARGUMENTS ### +# +# 14) chimeraMethod - The method used to remove chimeras. Valid options are: +# none: No chimera removal is performed. +# pooled: All reads are pooled prior to chimera detection. +# consensus: Chimeras are detect in samples individually, and a consensus decision +# is made for each sequence variant. +# Ex: consensus +# +# 15) minParentFold - The minimum abundance of potential "parents" of a sequence being +# tested as chimeric, expressed as a fold-change versus the abundance of the sequence being +# tested. Values should be greater than or equal to 1 (i.e. parents should be more +# abundant than the sequence being tested). +# Ex: 1.0 +# +### SPEED ARGUMENTS ### +# +# 16) nthreads - The number of threads to use. +# Special values: 0 - detect available and use all. +# Ex: 1 +# +# 17) nreads_learn - The minimum number of reads to learn the error model from. +# Special values: 0 - Use all input reads. +# Ex: 1000000 +# +# +# 18) Output directory +# +# 19) 'do_plots' to save quality plots +# +# 20) taxonomy DB -or- 'skip' +# 21) save rds +# 22) join paired end +# 23) join samples + +cat(R.version$$version.string, "\\n") +errQuit <- function(mesg, status=1) { message("DADAIST2-ERROR: ", mesg); q(status=status) } +getN <- function(x) sum(getUniques(x)) +args <- commandArgs(TRUE) + +feature_table_header = '#OTU ID'; +# Assign each of the arguments, in positional order, to an appropriately named R variable +inp.dirF <- "$input_dir_1" +inp.dirR <- "$input_dir_2" +out.path <- "$output_file" +out.track <- "$output_track" +filtered_dirF <- "$filtered_dir_1" +filtered_dirR <- "$filtered_dir_2" +truncLenF <- as.integer($trunc_len_1) +truncLenR <- as.integer($trunc_len_2) +trimLeftF <- as.integer($trim_left_1) +trimLeftR <- as.integer($trim_left_2) +maxEEF <- as.numeric($max_ee_1) +maxEER <- as.numeric($max_ee_2) +truncQual <- as.integer($trunc_qual) +chimeraMethod <- "$chimeraMethod" +minParentFold <- as.numeric(args[[15]]) +nthreads <- as.integer(args[[16]]) +nreads.learn <- as.integer(args[[17]]) + +outbasepath <- "$output_base" +make_plots <- $male_plots_bool +taxonomy_db <- "$taxonomy_db" +save_rds <- $save_rds_bool +paramConcat <- $concat_bool # TRUE or FALSE +processPool <- $pool_bool # TRUE or FALSE + + +### VALIDATE ARGUMENTS ### +# Input directory is expected to contain .fastq.gz file(s) +# that have not yet been filtered and globally trimmed +# to the same length. +if(!(dir.exists(inp.dirF) && dir.exists(inp.dirR))) { + errQuit("Input directory does not exist.") +} else { + unfiltsF <- list.files(inp.dirF, pattern=".fastq.gz$$", full.names=TRUE) + unfiltsR <- list.files(inp.dirR, pattern=".fastq.gz$$", full.names=TRUE) + if(length(unfiltsF) == 0) { + errQuit("No input forward files with the expected filename format found.") + } + if(length(unfiltsR) == 0) { + errQuit("No input reverse files with the expected filename format found.") + } + if(length(unfiltsF) != length(unfiltsR)) { + errQuit("Different numbers of forward and reverse .fastq.gz files.") + } + cat("# Received ", length(unfiltsF), " paired-end samples.\\n") +} + +# Output files are to be filenames (not directories) and are to be +# removed and replaced if already present. +for(fn in c(out.path, out.track)) { + if(dir.exists(fn)) { + errQuit("Output filename ", fn, " is a directory.") + } else if(file.exists(fn)) { + invisible(file.remove(fn)) + cat("# removing: ", fn, "\\n") + } +} + +# Convert nthreads to the logical/numeric expected by dada2 +if(nthreads < 0) { + errQuit("nthreads must be non-negative.") +} else if(nthreads == 0) { + multithread <- TRUE # detect and use all +} else if(nthreads == 1) { + multithread <- FALSE +} else { + multithread <- nthreads +} +cat("# Threads: ", nthreads, "\\n") + +### LOAD LIBRARIES ### +suppressWarnings(library(methods)) +suppressWarnings(library(dada2)) +cat("# DADA2:", as.character(packageVersion("dada2")), "/", + "Rcpp:", as.character(packageVersion("Rcpp")), "/", + "RcppParallel:", as.character(packageVersion("RcppParallel")), "\\n") + + +cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[1] Filtering reads ") +filtsF <- file.path(filtered_dirF, basename(unfiltsF)) +filtsR <- file.path(filtered_dirR, basename(unfiltsR)) +cat("\\n") + + +### QUALITY PLOTS +# DADA2:plotQualityProfile +if (make_plots == TRUE) { + pdf(paste(outbasepath,"/quality_R1.pdf",sep = "")); + + print(plotQualityProfile( unfiltsF, n = 100000, aggregate=TRUE)) + for (p in c(unfiltsF)) { + print(plotQualityProfile( file.path(p), n = 100000)) + } + dev.off(); + + pdf(paste(outbasepath,"/quality_R2.pdf",sep = "")); + print(plotQualityProfile( unfiltsR, n = 100000, aggregate=TRUE)) + for (p in c(unfiltsR)) { + print(plotQualityProfile( file.path(p), n = 100000)) + } +} + +# DADA2:filterAndTrim +out <- suppressWarnings(filterAndTrim(unfiltsF, filtsF, unfiltsR, filtsR, + truncLen=c(truncLenF, truncLenR), trimLeft=c(trimLeftF, trimLeftR), + maxEE=c(maxEEF, maxEER), truncQ=truncQual, rm.phix=TRUE, + multithread=multithread)) + +cat(" Filter and Trim, finished\\n") +cat(ifelse(file.exists(filtsF), ".", "x"), sep="") +filtsF <- list.files(filtered_dirF, pattern=".fastq.gz$$", full.names=TRUE) +filtsR <- list.files(filtered_dirR, pattern=".fastq.gz$$", full.names=TRUE) +cat("\\n") + +if(length(filtsF) == 0) { # All reads were filtered out + errQuit("No reads passed the filter (were truncLenF/R longer than the read lengths?)", status=2) +} + +### LEARN ERROR RATES ### +# DADA2:learnErrors +# Dereplicate enough samples to get nreads.learn total reads +cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[2] Learning Error Rates\\n") +errF <- suppressWarnings(learnErrors(filtsF, nreads=nreads.learn, multithread=multithread)) +errR <- suppressWarnings(learnErrors(filtsR, nreads=nreads.learn, multithread=multithread)) + +### PROCESS ALL SAMPLES ### +# Loop over rest in streaming fashion with learned error rates + + +cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[3] Denoise remaining samples \\n") + +if (processPool == FALSE) { + cat(" * Sample by sample") + denoisedF <- rep(0, length(filtsF)) + mergers <- vector("list", length(filtsF)) + + for(j in seq(length(filtsF))) { + drpF <- derepFastq(filtsF[[j]]) + ddF <- dada(drpF, err=errF, multithread=multithread, verbose=FALSE) + drpR <- derepFastq(filtsR[[j]]) + ddR <- dada(drpR, err=errR, multithread=multithread, verbose=FALSE) + mergers[[j]] <- mergePairs( + ddF, drpF, + ddR, drpR, + justConcatenate=paramConcat, + trimOverhang=TRUE) + denoisedF[[j]] <- getN(ddF) + + } + # Make sequence table + seqtab <- makeSequenceTable(mergers) + +} else { + cat(" * Dereplicate all samples\\n") + derepFs <- derepFastq(filtsF, verbose=TRUE) + derepRs <- derepFastq(filtsR, verbose=TRUE) + + # Name the derep-class objects by the sample names + #cat(" * Rename samples\\n") + #names(derepFs) <- sample.names + #names(derepRs) <- sample.names + + cat(" * Denoise all samples\\n") + dadaFs <- dada(derepFs, err=errF, multithread=TRUE) + dadaRs <- dada(derepRs, err=errR, multithread=TRUE) + + cat(" * Merge all samples\\n") + mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) + + cat(" * Make feature table\\n") + seqtab <- makeSequenceTable(mergers) + + denoisedF <- sapply(dadaFs, getN) + #seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) +} + +cat("\\n") + + +# Remove chimeras +cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[4] Remove chimeras (method = ", chimeraMethod, ")\\n", sep="") +if(chimeraMethod %in% c("pooled", "consensus")) { + seqtab.nochim <- removeBimeraDenovo(seqtab, method=chimeraMethod, minFoldParentOverAbundance=minParentFold, multithread=multithread) +} else { # No chimera removal, copy seqtab to seqtab.nochim + seqtab.nochim <- seqtab +} + +### REPORT READ COUNTS AT EACH PROCESSING STEP ### +# Handle edge cases: Samples lost in filtering; One sample +track <- cbind(out, matrix(0, nrow=nrow(out), ncol=3)) +colnames(track) <- c("input", "filtered", "denoised", "merged", "non-chimeric") +passed.filtering <- track[,"filtered"] > 0 +track[passed.filtering,"denoised"] <- denoisedF +track[passed.filtering,"merged"] <- rowSums(seqtab) +track[passed.filtering,"non-chimeric"] <- rowSums(seqtab.nochim) +write.table(track, out.track, sep="\\t", row.names=TRUE, col.names=NA, + quote=FALSE) + + +# ### TAXONOMY + +if (taxonomy_db != 'skip' && file.exists(taxonomy_db)) { + cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[5.1] Taxonomy\\n"); + taxa <- assignTaxonomy(seqtab.nochim, file.path(taxonomy_db), multithread=TRUE,tryRC=TRUE) + + taxa.print <- taxa # Removing sequence rownames for display only + rownames(taxa.print) <- NULL + +} else { + cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[5.1] Taxonomy (SKIPPED)\\n"); +} + +### WRITE OUTPUT AND QUIT ### +# Formatting as tsv plain-text sequence table table + +cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\\t[6] Write output\\n") +seqtab.nochim <- t(seqtab.nochim) # QIIME has OTUs as rows +col.names <- basename(filtsF) +col.names[[1]] <- paste0(feature_table_header,"\\t", col.names[[1]]) + +cat("\\t * ", out.path, "\\n"); +write.table(seqtab.nochim, out.path, sep="\\t", + row.names=TRUE, col.names=col.names, quote=FALSE) + + +## If taxonomy required with DADA2 +if (taxonomy_db != 'skip' && file.exists(taxonomy_db)) { + cat("\\t * ", file.path(paste(outbasepath, '/taxonomy.tsv', sep='')), "\\n"); + write.table(taxa.print, + file.path(paste(outbasepath, '/taxonomy.tsv', sep='')), + row.names=TRUE, + quote=FALSE + ) +} else { + cat("\\t * ", file.path(paste(outbasepath, '/taxonomy.tsv', sep='')), " ", taxonomy_db, " (SKIPPED)\\n"); +} + +if (save_rds == TRUE) { + cat("\\t * Saving RDS: ", gsub("tsv", "rds", out.path)) + saveRDS(seqtab.nochim, gsub("tsv", "rds", out.path)) ### TESTING +} else { + cat("\\t * Not saving RDS\\n") +} + +q(status=0) diff --git a/bin/D2-dada.R b/bin/D2-dada.R index 9cd73b6..a4d80ff 100755 --- a/bin/D2-dada.R +++ b/bin/D2-dada.R @@ -152,8 +152,8 @@ if (paramPool == 0) { if(!(dir.exists(inp.dirF) && dir.exists(inp.dirR))) { errQuit("Input directory does not exist.") } else { - unfiltsF <- list.files(inp.dirF, pattern=".fastq.gz$", full.names=TRUE) - unfiltsR <- list.files(inp.dirR, pattern=".fastq.gz$", full.names=TRUE) + unfiltsF <- sort(list.files(inp.dirF, pattern=".fastq.gz$", full.names=TRUE)) + unfiltsR <- sort(list.files(inp.dirR, pattern=".fastq.gz$", full.names=TRUE)) if(length(unfiltsF) == 0) { errQuit("No input forward files with the expected filename format found.") } diff --git a/bin/dadaist2 b/bin/dadaist2 index 6e88679..54f6a1b 100755 --- a/bin/dadaist2 +++ b/bin/dadaist2 @@ -2,7 +2,7 @@ #ABSTRACT: A program to run DADA2 from the CLI use 5.012; use warnings; -my $VERSION = '1.2.0'; +my $VERSION = '1.2.1'; BEGIN { diff --git a/bin/dadaist2-rundada b/bin/dadaist2-rundada index 71dab10..771bf69 100755 --- a/bin/dadaist2-rundada +++ b/bin/dadaist2-rundada @@ -14,7 +14,7 @@ dada2list = ['forward_reads', 'reverse_reads', 'feature_table_output', 'stats_ou 'filt_forward', 'filt_reverse', 'truncLenF', 'truncLenR', 'trimLeftF', 'trimLeftR', 'maxEEF', 'maxEER', 'truncQ', 'chimeraMethod', 'minFold','threads', 'nreads_learn','baseDir', 'doPlots', 'taxonomyDb', 'saveRDS', 'noMerge', 'processPool'] - + def eprint(*args, **kwargs): """ Print to stderr @@ -206,9 +206,6 @@ if __name__ == "__main__": m = args.add_argument_group("Other parameters") m.add_argument("-t", "--threads", help="Number of threads", type=int, default=1) m.add_argument("--keep-temp", help="Keep temporary files", action="store_true") - m.add_argument("--save-rds", help="Save RDS file with DADA2 output", action="store_true") - m.add_argument("--save-plots", help="Save Quality plots of the input reads (PDF)", action="store_true") - m.add_argument("--log", help="Log file", default=None) m.add_argument("--copy", help="Copy input files instead of symbolic linking", action="store_true") m.add_argument('--skip-checks', help="Do not check installation of dependencies", action="store_true") @@ -237,7 +234,6 @@ if __name__ == "__main__": logger.error("DADA2 script not found: {}".format(dada2script)) raise - ## Make random temporary directory tmpdir = tempfile.mkdtemp(dir=opts.tmp) logger.debug("Temporary directory: {}".format(tmpdir)) @@ -259,7 +255,6 @@ if __name__ == "__main__": logger.error("Specify either input directory or forward/reverse directories.") exit(1) - # Prepare input files for DADA2.R if opts.input_dir: forDirectory, revDirectory = initInput(opts.input_dir, tmpdir, opts.fortag, opts.revertag, opts.sample_separator, opts.sample_extension, opts.copy) else: @@ -287,9 +282,6 @@ if __name__ == "__main__": tsvFile = os.path.join(tmpdir, "dada2.tsv") statsFile = os.path.join(tmpdir, "dada2.stats") - # Misc options - saveRDS = 'save' if opts.save_rds else 'do-not-save-rds' - savePlots = 'do_plots' if opts.save_plots else 'do-not-save-plots' joinPairs = 1 if opts.join else 0 poolSamples = 1 if opts.pool else 0 @@ -311,19 +303,14 @@ if __name__ == "__main__": opts.min_parent_fold, # 15 opts.threads, # 16 opts.n_learn, # 17 - tmpdir, # 18 - savePlots, # 19 - 'skip', # 20 taxonomy - saveRDS, # 21save RDS + tmpdir, # 18 + 'do_plots', # 19 + 'skip', # 20 taxonomy + 'save', # 21save RDS joinPairs, poolSamples ] - if len(dada2list) != len(dada2args): - raise Exception("Number of arguments in dada2list does not match number of arguments in dada2args") - for name, value in zip(dada2list, dada2args): - logger.info("Parameter `{}`: {}".format(name, value)) - cmd = ["Rscript", "--vanilla", "--no-save", dada2script] + dada2args # Convert cmd elements to string cmd = [str(x) for x in cmd] @@ -349,13 +336,7 @@ if __name__ == "__main__": logger.info("Output files are in temp dir: " + tmpdir) raise - expectedFiles = ["dada2.stats", "dada2.tsv", "dada2.execution.txt", "dada2.execution.log"] - if opts.save_plots: - expectedFiles += ["quality_R1.png", "quality_R2.png"] - if opts.save_rds: - expectedFiles += ["dada2.rds"] - - for file in expectedFiles: + for file in ["dada2.stats", "dada2.tsv", "quality_R1.pdf", "quality_R2.pdf", "dada2.rds", "dada2.execution.txt", "dada2.execution.log"]: if os.path.exists(os.path.join(tmpdir, file)): logger.debug("Moving file: {}".format(file)) shutil.move(os.path.join(tmpdir, file), os.path.join(opts.output_dir, file)) diff --git a/docs/pages/dadaist2-phyloseqMake.md b/docs/pages/dadaist2-phyloseqMake.md index dcdc44f..32048ed 100644 --- a/docs/pages/dadaist2-phyloseqMake.md +++ b/docs/pages/dadaist2-phyloseqMake.md @@ -2,12 +2,15 @@ sort: 12 --- ## dadaist2-phyloseqMake + **dadaist2-phyloseqMake** - Generate PhyloSeq object from the command line ## Author + Andrea Telatin and Rebecca Ansorge ## Parameters + - _-i_, _--input_ DIR Directory containing the `MicrobiomeAnalyst` folder generated by Dadaist2. @@ -17,5 +20,6 @@ Andrea Telatin and Rebecca Ansorge Output filename. If omitted, a 'phyloseq.rds' file will be placed in the input directory. ## Source code and documentation + The program is freely available at [https://quadram-institute-bioscience.github.io/dadaist2](https://quadram-institute-bioscience.github.io/dadaist2) released under the MIT licence. The website contains further DOCUMENTATION. diff --git a/docs/pages/dadaist2-rundada.md b/docs/pages/dadaist2-rundada.md index b24910d..ab93d41 100644 --- a/docs/pages/dadaist2-rundada.md +++ b/docs/pages/dadaist2-rundada.md @@ -16,13 +16,13 @@ The input can be supplied either: The latter is used as a compatibility layer and will be used by _dadaist2_ itself to invoke the wrapper. -## Synopsis +## Synopsis ```text usage: dadaist2-rundada [-h] [-i INPUT_DIR] [-f FOR_DIR] [-r REV_DIR] -o OUTPUT_DIR [--tmp TMP] [--fortag FORTAG] [--revertag REVERTAG] [--sample-separator SAMPLE_SEPARATOR] [--sample-extension SAMPLE_EXTENSION] [-q TRUNC_QUAL] [-j] [-p] [--trunc-len-1 TRUNC_LEN_1] [--trunc-len-2 TRUNC_LEN_2] [--trim-left-1 TRIM_LEFT_1] [--trim-left-2 TRIM_LEFT_2] - [--maxee-1 MAXEE_1] [--maxee-2 MAXEE_2] [--chimera {none,pooled,consensus}] [--min-parent-fold MIN_PARENT_FOLD] [--n-learn N_LEARN] [-t THREADS] [--keep-temp] [--log LOG] [--copy] - [--skip-checks] [--verbose] + [--maxee-1 MAXEE_1] [--maxee-2 MAXEE_2] [--chimera {none,pooled,consensus}] [--min-parent-fold MIN_PARENT_FOLD] [--n-learn N_LEARN] [-t THREADS] [--keep-temp] [--save-rds] + [--save-plots] [--log LOG] [--copy] [--skip-checks] [--verbose] Run DADA2 @@ -73,6 +73,8 @@ Other parameters: -t THREADS, --threads THREADS Number of threads --keep-temp Keep temporary files + --save-rds Save RDS file with DADA2 output + --save-plots Save Quality plots of the input reads (PDF) --log LOG Log file --copy Copy input files instead of symbolic linking --skip-checks Do not check installation of dependencies @@ -85,8 +87,6 @@ The output directory will contain: * dada2.stats (table with the statistics of reads loss) * dada2.tsv (main feature table) -* dada2.rds (R object with the table) -* quality_R1.pdf, quality_R2.pdf (quality plots) -* dada2.execution.log, dada2.execution.txt (log files) - - +* dada2.rds (R object with the table. if `--save-rds` is specified) +* quality_R1.pdf, quality_R2.pdf (quality plots, if `--save-plots` is specified) +* dada2.execution.log, dada2.execution.txt (wrapper log files) diff --git a/env/dadaist2_1.2.0_Linux.yaml b/env/dadaist2_1.2.0_Linux.yaml new file mode 100644 index 0000000..67a646e --- /dev/null +++ b/env/dadaist2_1.2.0_Linux.yaml @@ -0,0 +1,311 @@ +name: dadaist2_1.2 +channels: + - defaults + - conda-forge + - bioconda +dependencies: + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - _r-mutex=1.0.1 + - argtable2=2.13 + - binutils_impl_linux-64=2.36.1 + - binutils_linux-64=2.36 + - bioconductor-biobase=2.50.0 + - bioconductor-biocgenerics=0.36.0 + - bioconductor-biocparallel=1.24.1 + - bioconductor-biomformat=1.18.0 + - bioconductor-biostrings=2.58.0 + - bioconductor-dada2=1.18.0 + - bioconductor-decipher=2.18.1 + - bioconductor-delayedarray=0.16.3 + - bioconductor-genomeinfodb=1.26.4 + - bioconductor-genomeinfodbdata=1.2.4 + - bioconductor-genomicalignments=1.26.0 + - bioconductor-genomicranges=1.42.0 + - bioconductor-iranges=2.24.1 + - bioconductor-matrixgenerics=1.2.1 + - bioconductor-microbiome=1.12.0 + - bioconductor-multtest=2.46.0 + - bioconductor-phyloseq=1.34.0 + - bioconductor-rhdf5=2.34.0 + - bioconductor-rhdf5filters=1.2.0 + - bioconductor-rhdf5lib=1.12.1 + - bioconductor-rhtslib=1.22.0 + - bioconductor-rsamtools=2.6.0 + - bioconductor-s4vectors=0.28.1 + - bioconductor-shortread=1.48.0 + - bioconductor-summarizedexperiment=1.20.0 + - bioconductor-xvector=0.30.0 + - bioconductor-zlibbioc=1.36.0 + - biom-format=2.1.10 + - brotlipy=0.7.0 + - bwidget=1.9.14 + - bzip2=1.0.8 + - c-ares=1.18.1 + - ca-certificates=2021.10.8 + - cached-property=1.5.2 + - cached_property=1.5.2 + - cairo=1.16.0 + - certifi=2021.10.8 + - cffi=1.15.0 + - chardet=4.0.0 + - charset-normalizer=2.0.0 + - click=8.0.3 + - clustalo=1.2.4 + - colorama=0.4.4 + - coloredlogs=15.0.1 + - colormath=3.0.0 + - commonmark=0.9.1 + - cryptography=35.0.0 + - curl=7.79.1 + - cutadapt=3.5 + - cycler=0.11.0 + - dadaist2=1.2.0 + - dnaio=0.6.0 + - fastp=0.22.0 + - fasttree=2.1.10 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.13.1 + - fonts-conda-ecosystem=1 + - fonts-conda-forge=1 + - freetype=2.10.4 + - fribidi=1.0.10 + - future=0.18.2 + - gcc_impl_linux-64=9.4.0 + - gcc_linux-64=9.4.0 + - gettext=0.19.8.1 + - gfortran_impl_linux-64=9.4.0 + - gfortran_linux-64=9.4.0 + - gmp=6.2.1 + - graphite2=1.3.13 + - gsl=2.7 + - gxx_impl_linux-64=9.4.0 + - gxx_linux-64=9.4.0 + - h5py=3.4.0 + - harfbuzz=3.1.0 + - hdf5=1.12.1 + - humanfriendly=10.0 + - icu=68.2 + - idna=3.1 + - importlib-metadata=4.8.1 + - isa-l=2.30.0 + - jbig=2.1 + - jinja2=3.0.2 + - jpeg=9d + - kernel-headers_linux-64=2.6.32 + - kiwisolver=1.3.2 + - krb5=1.19.2 + - lcms2=2.12 + - ld_impl_linux-64=2.36.1 + - lerc=3.0 + - libblas=3.9.0 + - libcblas=3.9.0 + - libcurl=7.79.1 + - libdeflate=1.8 + - libedit=3.1.20191231 + - libev=4.33 + - libffi=3.4.2 + - libgcc-devel_linux-64=9.4.0 + - libgcc-ng=11.2.0 + - libgfortran-ng=11.2.0 + - libgfortran5=11.2.0 + - libglib=2.70.0 + - libgomp=11.2.0 + - libiconv=1.16 + - liblapack=3.9.0 + - libnghttp2=1.43.0 + - libopenblas=0.3.18 + - libpng=1.6.37 + - libsanitizer=9.4.0 + - libssh2=1.10.0 + - libstdcxx-devel_linux-64=9.4.0 + - libstdcxx-ng=11.2.0 + - libtiff=4.3.0 + - libuuid=2.32.1 + - libwebp-base=1.2.1 + - libxcb=1.13 + - libxml2=2.9.12 + - libzip=1.8.0 + - libzlib=1.2.11 + - lz4-c=1.9.3 + - lzstring=1.0.4 + - make=4.3 + - markdown=3.3.4 + - markupsafe=2.0.1 + - matplotlib-base=3.4.3 + - multiqc=1.11 + - ncurses=6.2 + - networkx=2.6.3 + - numpy=1.21.4 + - olefile=0.46 + - openjpeg=2.4.0 + - openssl=1.1.1l + - pandas=1.3.4 + - pango=1.48.10 + - pbzip2=1.1.13 + - pcre=8.45 + - pcre2=10.37 + - perl=5.26.2 + - perl-capture-tiny=0.48 + - perl-carp=1.38 + - perl-exporter=5.72 + - perl-extutils-makemaker=7.36 + - perl-fastx-reader=1.3.0 + - pigz=2.6 + - pillow=8.3.2 + - pip=21.3.1 + - pixman=0.40.0 + - pthread-stubs=0.4 + - pycparser=2.21 + - pygments=2.10.0 + - pyopenssl=21.0.0 + - pyparsing=3.0.5 + - pysocks=1.7.1 + - python=3.9.7 + - python-dateutil=2.8.2 + - python-isal=0.11.1 + - python_abi=3.9 + - pytz=2021.3 + - pyyaml=6.0 + - qax=0.9.5 + - r-ade4=1.7_18 + - r-ape=5.5 + - r-assertthat=0.2.1 + - r-backports=1.3.0 + - r-base=4.0.5 + - r-bh=1.75.0_0 + - r-bit=4.0.4 + - r-bit64=4.0.5 + - r-bitops=1.0_7 + - r-blob=1.2.2 + - r-brio=1.1.2 + - r-cachem=1.0.6 + - r-callr=3.7.0 + - r-cli=3.1.0 + - r-cluster=2.1.2 + - r-codetools=0.2_18 + - r-colorspace=2.0_2 + - r-crayon=1.4.2 + - r-data.table=1.14.2 + - r-dbi=1.1.1 + - r-desc=1.4.0 + - r-diffobj=0.3.5 + - r-digest=0.6.28 + - r-dplyr=1.0.7 + - r-ellipsis=0.3.2 + - r-evaluate=0.14 + - r-fansi=0.5.0 + - r-farver=2.1.0 + - r-fastmap=1.1.0 + - r-foreach=1.5.1 + - r-formatr=1.11 + - r-futile.logger=1.4.3 + - r-futile.options=1.0.1 + - r-generics=0.1.1 + - r-ggplot2=3.3.5 + - r-glue=1.5.0 + - r-gtable=0.3.0 + - r-hms=1.1.1 + - r-hwriter=1.3.2 + - r-igraph=1.2.8 + - r-isoband=0.2.5 + - r-iterators=1.0.13 + - r-jpeg=0.1_9 + - r-jsonlite=1.7.2 + - r-labeling=0.4.2 + - r-lambda.r=1.2.4 + - r-lattice=0.20_45 + - r-latticeextra=0.6_29 + - r-lifecycle=1.0.1 + - r-magrittr=2.0.1 + - r-mass=7.3_54 + - r-matrix=1.3_2 + - r-matrixstats=0.61.0 + - r-memoise=2.0.0 + - r-mgcv=1.8_38 + - r-munsell=0.5.0 + - r-nlme=3.1_153 + - r-permute=0.9_5 + - r-pillar=1.6.4 + - r-pixmap=0.4_12 + - r-pkgconfig=2.0.3 + - r-pkgload=1.2.3 + - r-plogr=0.2.0 + - r-plyr=1.8.6 + - r-png=0.1_7 + - r-praise=1.0.0 + - r-prettyunits=1.1.1 + - r-processx=3.5.2 + - r-progress=1.2.2 + - r-ps=1.6.0 + - r-purrr=0.3.4 + - r-r6=2.5.1 + - r-rcolorbrewer=1.1_2 + - r-rcpp=1.0.7 + - r-rcppparallel=5.1.4 + - r-rcurl=1.98_1.5 + - r-rematch2=2.1.2 + - r-reshape2=1.4.4 + - r-rlang=0.4.12 + - r-rprojroot=2.0.2 + - r-rsqlite=2.2.8 + - r-rstudioapi=0.13 + - r-rtsne=0.15 + - r-scales=1.1.1 + - r-snow=0.4_4 + - r-sp=1.4_5 + - r-stringi=1.7.5 + - r-stringr=1.4.0 + - r-survival=3.2_13 + - r-testthat=3.1.0 + - r-tibble=3.1.6 + - r-tidyr=1.1.4 + - r-tidyselect=1.1.1 + - r-utf8=1.2.2 + - r-vctrs=0.3.8 + - r-vegan=2.5_7 + - r-viridislite=0.4.0 + - r-waldo=0.3.1 + - r-withr=2.4.2 + - readline=8.1 + - requests=2.26.0 + - rich=10.13.0 + - scipy=1.7.1 + - sed=4.8 + - seqfu=1.8.4 + - setuptools=58.5.3 + - simplejson=3.17.5 + - six=1.16.0 + - spectra=0.0.11 + - sqlite=3.36.0 + - sysroot_linux-64=2.12 + - tk=8.6.11 + - tktable=2.10 + - tornado=6.1 + - typing_extensions=3.10.0.2 + - tzdata=2021e + - urllib3=1.26.7 + - wheel=0.37.0 + - xopen=1.2.1 + - xorg-kbproto=1.0.7 + - xorg-libice=1.0.10 + - xorg-libsm=1.2.3 + - xorg-libx11=1.7.2 + - xorg-libxau=1.0.9 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.10 + - xorg-libxt=1.2.1 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.2.5 + - yaml=0.2.5 + - zip=3.0 + - zipp=3.6.0 + - zlib=1.2.11 + - zstd=1.5.0 +prefix: /mnt/disk/miniconda3/envs/dadaist2_1.2 diff --git a/lab/templateR.py b/lab/templateR.py new file mode 100644 index 0000000..f128d3f --- /dev/null +++ b/lab/templateR.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import os, sys +import subprocess +from string import Template +from select import epoll, EPOLLIN + +TEMPLATE=""" +cat("Starting\\n") +cat("Starting again\\n") +library(dada2) +cat("Dadaing\\n") +library(notechoppe) +cat("Never\\n") +""" + + +def read_with_timeout(fd, timeout__s): + """Reads from fd until there is no new data for at least timeout__s seconds. + + This only works on linux > 2.5.44. + """ + buf = [] + e = epoll() + e.register(fd, EPOLLIN) + while True: + ret = e.poll(timeout__s) + if not ret or ret[0][1] is not EPOLLIN: + break + buf.append( + fd.read(1) + ) + return ''.join(buf) + +def runScript(script): + """ + Given a multiline Rscript (script), feed it to RScript via STDIN + line by line and return error if one line fails + """ + p = subprocess.Popen(["Rscript", "--vanilla", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + for line in script.split("\n"): + stdout_data = p.communicate(input=line.encode()) + print(stdout_data) + + return 1 + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Run a template') + parser.add_argument('--template', '-t', type=str, default=TEMPLATE) + + args = parser.parse_args() + p = subprocess.Popen(["Rscript", "--vanilla", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate(input=args.template.encode()) + print(out.decode()) + print("===") + print(err.decode()) \ No newline at end of file