diff --git a/BoolODE_gendatasets.py b/BoolODE_gendatasets.py new file mode 100644 index 0000000..e16dbbc --- /dev/null +++ b/BoolODE_gendatasets.py @@ -0,0 +1,67 @@ +#generating datasets for Boolean models + +path_to_boolode = "/home/cbuck016/BoolODE-0.1/" + +maxtime = "8" +numcells = "1000" +numdatasets = "10" + +output_dir = "CuratedData/" + + +# Boolean model 1: mCAD +model_name = "mCAD" +output_name = "mCAD" +list_of_mCAD_sims = ["mCAD-sim-01-ts-800-cells-1000", "mCAD-sim-02-ts-800-cells-1000", "mCAD-sim-03-ts-800-cells-1000", "mCAD-sim-04-ts-800-cells-1000", "mCAD-sim-05-ts-800-cells-1000", "mCAD-sim-06-ts-800-cells-1000", "mCAD-sim-07-ts-800-cells-1000", "mCAD-sim-08-ts-800-cells-1000", "mCAD-sim-09-ts-800-cells-1000", "mCAD-sim-10-ts-800-cells-1000"] +for mCAD_sim in list_of_mCAD_sims: + echo "Simulating "$model_name + python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \ + --ics $path_to_boolode/data/$model_name"_ics.txt" \ + --max-time $maxtime --num-cells $numcells \ + --do-parallel \ + --outPrefix $output_dir$output_name"/" \ + --sample-cells + + +# Boolean model 2: VSC +model_name = "VSC" +output_name = "VSC" +list_of_VSC_sims = ["VSC-sim-01-ts-800-cells-1000", "VSC-sim-02-ts-800-cells-1000", "VSC-sim-03-ts-800-cells-1000", "VSC-sim-04-ts-800-cells-1000", "VSC-sim-05-ts-800-cells-1000", "VSC-sim-06-ts-800-cells-1000", "VSC-sim-07-ts-800-cells-1000", "VSC-sim-08-ts-800-cells-1000", "VSC-sim-09-ts-800-cells-1000", "VSC-sim-10-ts-800-cells-1000"] +for VSC_sim in list_of_VSC_sims: + echo "Simulating "$model_name + python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \ + --max-time $maxtime --num-cells $numcells \ + --do-parallel \ + --outPrefix $output_dir$output_name"/" \ + --sample-cells + + +# Boolean model 3: HSC +model_name = "HSC" +output_name = "HSC" +list_of_HSC_sims = ["HSC-sim-01-ts-800-cells-1000", "HSC-sim-02-ts-800-cells-1000", "HSC-sim-03-ts-800-cells-1000", "HSC-sim-04-ts-800-cells-1000", "HSC-sim-05-ts-800-cells-1000", "HSC-sim-06-ts-800-cells-1000", "HSC-sim-07-ts-800-cells-1000", "HSC-sim-08-ts-800-cells-1000", "HSC-sim-09-ts-800-cells-1000", "HSC-sim-10-ts-800-cells-1000"] +for HSC_sim in list_of_HSC_sims: + echo "Simulating "$model_name + python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \ + --ics $path_to_boolode/data/$model_name"_ics.txt" \ + --max-time $maxtime --num-cells $numcells \ + --do-parallel \ + --outPrefix $output_dir$output_name"/" \ + --sample-cells + + +# Boolean model 4: GSD +model_name = "GSD" +output_name = "GSD" +list_of_GSD_sims = ["GSD-sim-01-ts-800-cells-1000", "GSD-sim-02-ts-800-cells-1000", "GSD-sim-03-ts-800-cells-1000", "GSD-sim-04-ts-800-cells-1000", "GSD-sim-05-ts-800-cells-1000", "GSD-sim-06-ts-800-cells-1000", "GSD-sim-07-ts-800-cells-1000", "GSD-sim-08-ts-800-cells-1000", "GSD-sim-09-ts-800-cells-1000", "GSD-sim-10-ts-800-cells-1000"] +for GSD_sim in list_of_GSD_sims: + echo "Simulating "$model_name + python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \ + --ics $path_to_boolode/data/$model_name"_ics.txt" \ + --max-time $maxtime --num-cells $numcells \ + --do-parallel \ + --outPrefix $output_dir$output_name"/" \ + --sample-cells + + + diff --git a/BoolODE_gendatasets_master.py b/BoolODE_gendatasets_master.py new file mode 100644 index 0000000..18cfe98 --- /dev/null +++ b/BoolODE_gendatasets_master.py @@ -0,0 +1,27 @@ +#BoolODE_gendatasets_master.py + +import argparse +import sys +import os + +pathtoBoolODE = '/home/cbuck016/BoolODE-0.1/' +outputdir = '/home/cbuck016/BoolODE-0.1/GSD-sims/' + +num_simulations = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +num_cells = [250, 500, 1000, 2000, 5000] +num_timesteps = [1, 2, 4, 8, 16] +model_name = 'GSD' + +for cells in num_cells: + for ts in num_timesteps: + for i in num_simulations: + experiment_name = model_name + '-ts-' + str(ts) + '00' + '-cells-' + str(cells) + '-sim-' + str(i) + input_file_prefix = pathtoBoolODE + 'data/' + model_name + command = 'python3 ' + pathtoBoolODE + 'src/BoolODE.py --path ' + input_file_prefix + '.txt ' \ + + ' --ics ' + input_file_prefix + '_ics.txt ' \ + + ' --max-time ' + str(ts) + ' --num-cells ' + str(cells) \ + + ' --do-parallel ' \ + + ' --outPrefix ' + outputdir + experiment_name + "/sim-" + str(i) + "-" \ + + ' --sample-cells' + print(command) + os.system(command) diff --git a/autonumclusters.py b/autonumclusters.py new file mode 100644 index 0000000..a5ccf3f --- /dev/null +++ b/autonumclusters.py @@ -0,0 +1,89 @@ +#Silhouette Model example code below retrieved from scitkit-learn, currently editing to fit/run on BoolODE expression data + +from sklearn.cluster import KMeans +from sklearn.metrics import silhouette_samples, silhouette_score + +import os +import sys +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import numpy as np +import matplotlib.style as style +from optparse import OptionParser +from pandas import DataFrame + +def parseArgs(args): + parser = OptionParser() + parser.add_option('', '--expressionfile', type='str', help='Path to ExpressionData.csv file') + parser.add_option('', '--outPrefix', type='str', default='', help='Prefix for output files') + + (opts, args) = parser.parse_args(args) + return opts, args + + +def main(args): + opts, args = parseArgs(args) + expressionfile = opts.expressionfile + outPrefix = opts.outPrefix + if expressionfile is None or len(expressionfile) == 0: + print("Please specify path to ExpressionData.csv file") + sys.exit + if len(expressionfile) > 0: + expfileDF = pd.read_csv(expressionfile, index_col=0) + data = expfileDF.transpose() + print(expfileDF.shape) + print(data.shape) + if len(outPrefix) > 0: + if '/' in outPrefix: + outDir = '/'.join(outPrefix.split('/')[:-1]) + if not os.path.exists(outDir): + print(outDir, "does not exist, creating it...") + os.makedirs(outDir) + + range_n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10] + silhouette_avg_n_clusters = [] + + for n_clusters in range_n_clusters: + clusterer = KMeans(n_clusters=n_clusters, random_state=42) + cluster_labels = clusterer.fit_predict(data) + + silhouette_avg = silhouette_score(data, cluster_labels) + print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg) + + silhouette_avg_n_clusters.append(silhouette_avg) + + best_avg_silhouette_value = max(silhouette_avg_n_clusters) + best_num_cluster = silhouette_avg_n_clusters.index(best_avg_silhouette_value) + + if index_best_num_cluster == 0: + best_num_cluster = 2 + elif index_best_num_cluster == 1: + best_num_cluster = 3 + elif index_best_num_cluster == 2: + best_num_cluster = 4 + elif index_best_num_cluster == 3: + best_num_cluster = 5 + elif index_best_num_cluster == 4: + best_num_cluster = 6 + elif index_best_num_cluster == 5: + best_num_cluster = 7 + elif index_best_num_cluster == 6: + best_num_cluster = 8 + elif index_best_num_cluster == 7: + best_num_cluster = 9 + elif index_best_num_cluster == 8: + best_num_cluster = 10 + else: + print("No best cluster number found") + + print("The best average silhouette score is: ", best_avg_silhouette_value) + + df = pd.DataFrame({'Average Silhouette Method':[expressionfile, best_num_cluster, best_avg_silhouette_value]}, index=['file name', 'predicted number of clusters', 'average silhouette score']) + print(df) + df.to_csv(outPrefix + 'silhouettescores.csv') + + +if __name__ == "__main__": + main(sys.argv) + diff --git a/autonumclusters_master.py b/autonumclusters_master.py new file mode 100644 index 0000000..226c583 --- /dev/null +++ b/autonumclusters_master.py @@ -0,0 +1,19 @@ +#Silhouette Method for use with output results from BoolODE datasets + +import os + +pathtoBoolOutFile = '/home/cbuck016/BoolODE-0.1/mCAD-sims/' + +num_simulations = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +num_cells = [250, 500, 1000, 2000, 5000] +num_timesteps = [1, 2, 4, 8, 16] +model_name = 'mCAD' + +for cells in num_cells: + for ts in num_timesteps: + for i in num_simulations: + experiment_name = model_name + '-ts-' + str(ts) + '00' + '-cells-' + str(cells) + '-sim-' + str(i) + command = 'python3 ' + 'autonumclusters.py --expressionfile ' + pathtoBoolOutFile + experiment_name \ + + '/' + 'sim-' + str(i) + '-ExpressionData.csv ' + '--outPrefix ' + pathtoBoolOutFile + experiment_name + "/" + print(command) + os.system(command)