|
| 1 | +# Global workflow control |
| 2 | + |
| 3 | +# The length of the hash used to identify a parameter combination |
| 4 | +hash_length: 7 |
| 5 | + |
| 6 | +# Specify the container framework. Current supported versions include 'docker' and |
| 7 | +# 'singularity'. If container_framework is not specified, SPRAS will default to docker. |
| 8 | +container_framework: singularity |
| 9 | + |
| 10 | +# Unpack singularity. See config/config.yaml for details. |
| 11 | +unpack_singularity: true |
| 12 | + |
| 13 | +# Allow the user to configure which container registry containers should be pulled from |
| 14 | +# Note that this assumes container names are consistent across registries, and that the |
| 15 | +# registry being passed doesn't require authentication for pull actions |
| 16 | +container_registry: |
| 17 | + base_url: docker.io |
| 18 | + # The owner or project of the registry |
| 19 | + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs |
| 20 | + owner: reedcompbio |
| 21 | + |
| 22 | +# This list of algorithms should be generated by a script which checks the filesystem for installs. |
| 23 | +# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm |
| 24 | +# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) |
| 25 | +# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change |
| 26 | +# which algorithms are run in a given experiment. |
| 27 | +# |
| 28 | +# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple |
| 29 | +# parameters are specified then the algorithm will be run as many times as needed to cover all parameter |
| 30 | +# combinations. For instance if we have the following: |
| 31 | +# - name: "myAlg" |
| 32 | +# params: |
| 33 | +# include: true |
| 34 | +# a: [1,2] |
| 35 | +# b: [0.5,0.75] |
| 36 | +# |
| 37 | +# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be |
| 38 | +# careful: too many parameters might make your runs take a long time. |
| 39 | + |
| 40 | +algorithms: |
| 41 | + - name: "pathlinker" |
| 42 | + params: |
| 43 | + include: false |
| 44 | + run1: |
| 45 | + k: range(100,201,100) |
| 46 | + |
| 47 | + - name: "omicsintegrator1" |
| 48 | + params: |
| 49 | + include: true |
| 50 | + run1: |
| 51 | + r: [5] |
| 52 | + b: [5, 6] |
| 53 | + w: np.linspace(0,5,2) |
| 54 | + g: [3] |
| 55 | + d: [10] |
| 56 | + |
| 57 | + - name: "omicsintegrator2" |
| 58 | + params: |
| 59 | + include: true |
| 60 | + run1: |
| 61 | + b: [4] |
| 62 | + g: [0] |
| 63 | + run2: |
| 64 | + b: [2] |
| 65 | + g: [3] |
| 66 | + |
| 67 | + - name: "meo" |
| 68 | + params: |
| 69 | + include: true |
| 70 | + run1: |
| 71 | + max_path_length: [3] |
| 72 | + local_search: ["Yes"] |
| 73 | + rand_restarts: [10] |
| 74 | + |
| 75 | + - name: "mincostflow" |
| 76 | + params: |
| 77 | + include: true |
| 78 | + run1: |
| 79 | + flow: [1] # The flow must be an int |
| 80 | + capacity: [1] |
| 81 | + |
| 82 | + - name: "allpairs" |
| 83 | + params: |
| 84 | + include: true |
| 85 | + |
| 86 | + - name: "domino" |
| 87 | + params: |
| 88 | + include: true |
| 89 | + run1: |
| 90 | + slice_threshold: [0.3] |
| 91 | + module_threshold: [0.05] |
| 92 | + |
| 93 | + |
| 94 | +# Here we specify which pathways to run and other file location information. |
| 95 | +# DataLoader.py can currently only load a single dataset |
| 96 | +# Assume that if a dataset label does not change, the lists of associated input files do not change |
| 97 | +datasets: |
| 98 | + - |
| 99 | + label: data0 |
| 100 | + node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] |
| 101 | + # DataLoader.py can currently only load a single edge file, which is the primary network |
| 102 | + edge_files: ["network.txt"] |
| 103 | + # Placeholder |
| 104 | + other_files: [] |
| 105 | + # Relative path from the spras directory |
| 106 | + data_dir: "input" |
| 107 | + # - |
| 108 | + # label: data1 |
| 109 | + # # Reuse some of the same sources file as 'data0' but different network and targets |
| 110 | + # node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] |
| 111 | + # edge_files: ["alternative-network.txt"] |
| 112 | + # other_files: [] |
| 113 | + # # Relative path from the spras directory |
| 114 | + # data_dir: "input" |
| 115 | + |
| 116 | +# If we want to reconstruct then we should set run to true. |
| 117 | +# TODO: if include is true above but run is false here, algs are not run. |
| 118 | +# is this the behavior we want? |
| 119 | +reconstruction_settings: |
| 120 | + |
| 121 | + #set where everything is saved |
| 122 | + locations: |
| 123 | + |
| 124 | + #place the save path here |
| 125 | + # TODO move to global |
| 126 | + reconstruction_dir: "output" |
| 127 | + |
| 128 | + run: true |
| 129 | + |
| 130 | +analysis: |
| 131 | + # Create one summary per pathway file and a single summary table for all pathways for each dataset |
| 132 | + summary: |
| 133 | + include: true |
| 134 | + # Create output files for each pathway that can be visualized with GraphSpace |
| 135 | + graphspace: |
| 136 | + include: true |
| 137 | + # Create Cytoscape session file with all pathway graphs for each dataset |
| 138 | + cytoscape: |
| 139 | + include: false |
| 140 | + # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset |
| 141 | + ml: |
| 142 | + include: true |
| 143 | + # specify how many principal components to calculate |
| 144 | + components: 2 |
| 145 | + # boolean to show the labels on the pca graph |
| 146 | + labels: true |
| 147 | + # 'ward', 'complete', 'average', 'single' |
| 148 | + # if linkage: ward, must use metric: euclidean |
| 149 | + linkage: 'ward' |
| 150 | + # 'euclidean', 'manhattan', 'cosine' |
| 151 | + metric: 'euclidean' |
0 commit comments