Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
f176ba8
Create autonumclusters
cbuck016 Mar 16, 2021
42b54b2
Update autonumclusters
cbuck016 Mar 23, 2021
4ddd1e4
Update autonumclusters
cbuck016 Mar 23, 2021
bd6643e
Update autonumclusters
cbuck016 Mar 23, 2021
b78e9dc
Rename autonumclusters to autonumclusters.py
cbuck016 Mar 23, 2021
2102db3
Update autonumclusters.py
cbuck016 Mar 23, 2021
ab2fb20
Update autonumclusters.py
cbuck016 Mar 23, 2021
b6eb500
Add files via upload
cbuck016 Apr 13, 2021
aa6cb34
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
b957e21
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
390c871
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
fed45d3
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
7d49b84
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
5373b20
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
06c519c
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
2953712
Update BoolODE_gendatasets.py
cbuck016 Apr 13, 2021
428d8f3
Add files via upload
cbuck016 Apr 14, 2021
ebc7ebf
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
3de98d0
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
908be25
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
7545dae
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
ab5c9c9
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
6e98589
Update BoolODE_gendatasets_master.py
cbuck016 Apr 14, 2021
90551c8
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
212265f
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
8a30945
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
f1d45fd
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
eef335c
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
b08b10a
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
2113d60
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
a965281
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
f3486a1
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
66a30da
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
873d7fc
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
495adcf
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
6bb08a3
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
f4c8d3f
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
eab015f
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
0765bc5
Update BoolODE_gendatasets_master.py
cbuck016 Apr 20, 2021
ae874f3
Update BoolODE_gendatasets_master.py
cbuck016 Apr 27, 2021
cf2e58c
Update BoolODE_gendatasets_master.py
cbuck016 Apr 27, 2021
0255f2d
Update BoolODE_gendatasets_master.py
cbuck016 Apr 27, 2021
fcc3667
Update BoolODE_gendatasets_master.py
cbuck016 Apr 27, 2021
18a478b
Update BoolODE_gendatasets_master.py
cbuck016 Apr 27, 2021
dc9e17d
Create autonumclusters_master.py
cbuck016 Apr 28, 2021
935c818
Update BoolODE_gendatasets_master.py
cbuck016 Apr 28, 2021
44bc2d2
Update BoolODE_gendatasets_master.py
cbuck016 Apr 28, 2021
5e21471
Update BoolODE_gendatasets_master.py
cbuck016 Apr 30, 2021
dd88a63
Update BoolODE_gendatasets_master.py
cbuck016 Apr 30, 2021
3993913
Update BoolODE_gendatasets_master.py
cbuck016 May 1, 2021
cf57f9a
Update autonumclusters_master.py
cbuck016 May 1, 2021
4db7a79
Update autonumclusters.py
cbuck016 May 1, 2021
7833ae1
Update autonumclusters.py
cbuck016 May 2, 2021
bd7a2cc
Update autonumclusters.py
cbuck016 May 2, 2021
ce53cc0
Update autonumclusters_master.py
cbuck016 May 2, 2021
b1ca161
Update autonumclusters.py
cbuck016 May 2, 2021
c1a07b4
Update autonumclusters.py
cbuck016 May 2, 2021
2bcbcda
Update autonumclusters.py
cbuck016 May 2, 2021
c5c0151
Update autonumclusters.py
cbuck016 May 2, 2021
ee7d004
Update autonumclusters.py
cbuck016 May 2, 2021
c1e315b
Update autonumclusters_master.py
cbuck016 May 3, 2021
f7bf748
Update autonumclusters.py
cbuck016 May 3, 2021
d70e618
Update autonumclusters.py
cbuck016 May 3, 2021
c20052a
Update autonumclusters_master.py
cbuck016 May 3, 2021
e6b64a4
Update autonumclusters_master.py
cbuck016 May 3, 2021
77ba293
Update autonumclusters.py
cbuck016 May 3, 2021
e06f829
Update autonumclusters.py
cbuck016 May 3, 2021
c9b3891
Update autonumclusters.py
cbuck016 May 3, 2021
0176738
Update autonumclusters_master.py
cbuck016 May 3, 2021
83d7309
Update autonumclusters.py
cbuck016 May 3, 2021
fec9e91
Update autonumclusters.py
cbuck016 May 3, 2021
654e78a
Update autonumclusters.py
cbuck016 May 3, 2021
09cfe26
Update autonumclusters.py
cbuck016 May 3, 2021
7bbb71b
Update autonumclusters.py
cbuck016 May 3, 2021
be3041b
Update autonumclusters.py
cbuck016 May 3, 2021
0c977c2
Update autonumclusters.py
cbuck016 May 3, 2021
bbe1ef6
Update autonumclusters.py
cbuck016 May 3, 2021
d899a31
Update autonumclusters.py
cbuck016 May 5, 2021
62f63b7
Update autonumclusters_master.py
cbuck016 May 5, 2021
5404d5c
Update autonumclusters_master.py
cbuck016 May 5, 2021
65fd0b6
Update autonumclusters.py
cbuck016 May 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions BoolODE_gendatasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#generating datasets for Boolean models

path_to_boolode = "/home/cbuck016/BoolODE-0.1/"

maxtime = "8"
numcells = "1000"
numdatasets = "10"

output_dir = "CuratedData/"


# Boolean model 1: mCAD
model_name = "mCAD"
output_name = "mCAD"
list_of_mCAD_sims = ["mCAD-sim-01-ts-800-cells-1000", "mCAD-sim-02-ts-800-cells-1000", "mCAD-sim-03-ts-800-cells-1000", "mCAD-sim-04-ts-800-cells-1000", "mCAD-sim-05-ts-800-cells-1000", "mCAD-sim-06-ts-800-cells-1000", "mCAD-sim-07-ts-800-cells-1000", "mCAD-sim-08-ts-800-cells-1000", "mCAD-sim-09-ts-800-cells-1000", "mCAD-sim-10-ts-800-cells-1000"]
for mCAD_sim in list_of_mCAD_sims:
echo "Simulating "$model_name
python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \
--ics $path_to_boolode/data/$model_name"_ics.txt" \
--max-time $maxtime --num-cells $numcells \
--do-parallel \
--outPrefix $output_dir$output_name"/" \
--sample-cells


# Boolean model 2: VSC
model_name = "VSC"
output_name = "VSC"
list_of_VSC_sims = ["VSC-sim-01-ts-800-cells-1000", "VSC-sim-02-ts-800-cells-1000", "VSC-sim-03-ts-800-cells-1000", "VSC-sim-04-ts-800-cells-1000", "VSC-sim-05-ts-800-cells-1000", "VSC-sim-06-ts-800-cells-1000", "VSC-sim-07-ts-800-cells-1000", "VSC-sim-08-ts-800-cells-1000", "VSC-sim-09-ts-800-cells-1000", "VSC-sim-10-ts-800-cells-1000"]
for VSC_sim in list_of_VSC_sims:
echo "Simulating "$model_name
python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \
--max-time $maxtime --num-cells $numcells \
--do-parallel \
--outPrefix $output_dir$output_name"/" \
--sample-cells


# Boolean model 3: HSC
model_name = "HSC"
output_name = "HSC"
list_of_HSC_sims = ["HSC-sim-01-ts-800-cells-1000", "HSC-sim-02-ts-800-cells-1000", "HSC-sim-03-ts-800-cells-1000", "HSC-sim-04-ts-800-cells-1000", "HSC-sim-05-ts-800-cells-1000", "HSC-sim-06-ts-800-cells-1000", "HSC-sim-07-ts-800-cells-1000", "HSC-sim-08-ts-800-cells-1000", "HSC-sim-09-ts-800-cells-1000", "HSC-sim-10-ts-800-cells-1000"]
for HSC_sim in list_of_HSC_sims:
echo "Simulating "$model_name
python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \
--ics $path_to_boolode/data/$model_name"_ics.txt" \
--max-time $maxtime --num-cells $numcells \
--do-parallel \
--outPrefix $output_dir$output_name"/" \
--sample-cells


# Boolean model 4: GSD
model_name = "GSD"
output_name = "GSD"
list_of_GSD_sims = ["GSD-sim-01-ts-800-cells-1000", "GSD-sim-02-ts-800-cells-1000", "GSD-sim-03-ts-800-cells-1000", "GSD-sim-04-ts-800-cells-1000", "GSD-sim-05-ts-800-cells-1000", "GSD-sim-06-ts-800-cells-1000", "GSD-sim-07-ts-800-cells-1000", "GSD-sim-08-ts-800-cells-1000", "GSD-sim-09-ts-800-cells-1000", "GSD-sim-10-ts-800-cells-1000"]
for GSD_sim in list_of_GSD_sims:
echo "Simulating "$model_name
python3 $path_to_boolode/src/BoolODE.py --path $path_to_boolode/data/$model_name".txt" \
--ics $path_to_boolode/data/$model_name"_ics.txt" \
--max-time $maxtime --num-cells $numcells \
--do-parallel \
--outPrefix $output_dir$output_name"/" \
--sample-cells



27 changes: 27 additions & 0 deletions BoolODE_gendatasets_master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#BoolODE_gendatasets_master.py

import argparse
import sys
import os

pathtoBoolODE = '/home/cbuck016/BoolODE-0.1/'
outputdir = '/home/cbuck016/BoolODE-0.1/GSD-sims/'

num_simulations = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
num_cells = [250, 500, 1000, 2000, 5000]
num_timesteps = [1, 2, 4, 8, 16]
model_name = 'GSD'

for cells in num_cells:
for ts in num_timesteps:
for i in num_simulations:
experiment_name = model_name + '-ts-' + str(ts) + '00' + '-cells-' + str(cells) + '-sim-' + str(i)
input_file_prefix = pathtoBoolODE + 'data/' + model_name
command = 'python3 ' + pathtoBoolODE + 'src/BoolODE.py --path ' + input_file_prefix + '.txt ' \
+ ' --ics ' + input_file_prefix + '_ics.txt ' \
+ ' --max-time ' + str(ts) + ' --num-cells ' + str(cells) \
+ ' --do-parallel ' \
+ ' --outPrefix ' + outputdir + experiment_name + "/sim-" + str(i) + "-" \
+ ' --sample-cells'
print(command)
os.system(command)
89 changes: 89 additions & 0 deletions autonumclusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#Silhouette Model example code below retrieved from scitkit-learn, currently editing to fit/run on BoolODE expression data

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import matplotlib.style as style
from optparse import OptionParser
from pandas import DataFrame

def parseArgs(args):
parser = OptionParser()
parser.add_option('', '--expressionfile', type='str', help='Path to ExpressionData.csv file')
parser.add_option('', '--outPrefix', type='str', default='', help='Prefix for output files')

(opts, args) = parser.parse_args(args)
return opts, args


def main(args):
opts, args = parseArgs(args)
expressionfile = opts.expressionfile
outPrefix = opts.outPrefix
if expressionfile is None or len(expressionfile) == 0:
print("Please specify path to ExpressionData.csv file")
sys.exit
if len(expressionfile) > 0:
expfileDF = pd.read_csv(expressionfile, index_col=0)
data = expfileDF.transpose()
print(expfileDF.shape)
print(data.shape)
if len(outPrefix) > 0:
if '/' in outPrefix:
outDir = '/'.join(outPrefix.split('/')[:-1])
if not os.path.exists(outDir):
print(outDir, "does not exist, creating it...")
os.makedirs(outDir)

range_n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10]
silhouette_avg_n_clusters = []

for n_clusters in range_n_clusters:
clusterer = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = clusterer.fit_predict(data)

silhouette_avg = silhouette_score(data, cluster_labels)
print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg)

silhouette_avg_n_clusters.append(silhouette_avg)

best_avg_silhouette_value = max(silhouette_avg_n_clusters)
best_num_cluster = silhouette_avg_n_clusters.index(best_avg_silhouette_value)

if index_best_num_cluster == 0:
best_num_cluster = 2
elif index_best_num_cluster == 1:
best_num_cluster = 3
elif index_best_num_cluster == 2:
best_num_cluster = 4
elif index_best_num_cluster == 3:
best_num_cluster = 5
elif index_best_num_cluster == 4:
best_num_cluster = 6
elif index_best_num_cluster == 5:
best_num_cluster = 7
elif index_best_num_cluster == 6:
best_num_cluster = 8
elif index_best_num_cluster == 7:
best_num_cluster = 9
elif index_best_num_cluster == 8:
best_num_cluster = 10
else:
print("No best cluster number found")

print("The best average silhouette score is: ", best_avg_silhouette_value)

df = pd.DataFrame({'Average Silhouette Method':[expressionfile, best_num_cluster, best_avg_silhouette_value]}, index=['file name', 'predicted number of clusters', 'average silhouette score'])
print(df)
df.to_csv(outPrefix + 'silhouettescores.csv')


if __name__ == "__main__":
main(sys.argv)

19 changes: 19 additions & 0 deletions autonumclusters_master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#Silhouette Method for use with output results from BoolODE datasets

import os

pathtoBoolOutFile = '/home/cbuck016/BoolODE-0.1/mCAD-sims/'

num_simulations = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
num_cells = [250, 500, 1000, 2000, 5000]
num_timesteps = [1, 2, 4, 8, 16]
model_name = 'mCAD'

for cells in num_cells:
for ts in num_timesteps:
for i in num_simulations:
experiment_name = model_name + '-ts-' + str(ts) + '00' + '-cells-' + str(cells) + '-sim-' + str(i)
command = 'python3 ' + 'autonumclusters.py --expressionfile ' + pathtoBoolOutFile + experiment_name \
+ '/' + 'sim-' + str(i) + '-ExpressionData.csv ' + '--outPrefix ' + pathtoBoolOutFile + experiment_name + "/"
print(command)
os.system(command)