Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ container_registry:
algorithms:
- name: "strwr"
params:
include: false
include: true
run1:
alpha: [0.85]

- name: "rwr"
params:
include: true
include: false
run1:
alpha: [0.85]

Expand Down
13 changes: 13 additions & 0 deletions docker-wrappers/ST_RWR/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## Notes
The random walk with restarts algorithm requires a directed input network. However, the algorithm in its current form will accept an undirected input network and interpret it as a directed network. The resulting output from an undirected network does not accuratly represent directionality.

## Building docker fileAdd commentMore actions
to build a new docker image for rwr navigate to the /docker-wrappers/rwr directory and enter:
```
docker build -t ade0brien/strwr -f Dockerfile .
```

## Testing
Test code is located in `test/ST_RWR`.
The `input` subdirectory contains test files `strwr-network.txt`, `strwr-sources.txt`, and `strwr-targets.txt`
The Docker wrapper can be tested with `pytest`.
42 changes: 24 additions & 18 deletions docker-wrappers/ST_RWR/ST_RWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def parse_arguments():
parser.add_argument("--sources", type=Path, required=True, help="Path to the source nodes file")
parser.add_argument("--targets", type=Path, required=True, help="Path to the target nodes file")
parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written")
parser.add_argument("--alpha", type=float, required=False, help="Optional alpha value for the RWR algorithm (defaults to 0.85)")
parser.add_argument("--alpha", type=float, required=False, default=0.85, help="Optional alpha value for the RWR algorithm (defaults to 0.85)")

return parser.parse_args()

Expand All @@ -28,44 +28,57 @@ def RWR(network_file: Path, source_nodes_file: Path,target_nodes_file: Path, alp
raise OSError(f"Nodes file {str(target_nodes_file)} does not exist")
if output_file.exists():
print(f"Output file {str(output_file)} will be overwritten")
if not alpha > 0 or not alpha <=1:
raise ValueError("Alpha value must be between 0 and 1")

# Create the parent directories for the output file if needed
output_file.parent.mkdir(parents=True, exist_ok=True)

# Read in network file
edgelist = []
with open(network_file) as file:
for line in file:
edge = line.split('|')
edge[1] = edge[1].strip('\n')
edgelist.append(edge)


# Read in sources file
sources = []
with open(source_nodes_file) as source_nodes:
for line in source_nodes:
source = line.split('\t')
sources.append(source[0].strip('\n'))

# Read in targets file
targets = []
with open(target_nodes_file) as target_nodes:
for line in target_nodes:
target = line.split('\t')
targets.append(target[0].strip('\n'))

# Create directed graph from input network
source_graph = nx.DiGraph(edgelist)

# Create reversed graph to run pagerank on targets
target_graph = source_graph.reverse(copy= True)

source_scores = nx.pagerank(source_graph,personalization=add_ST(sources),alpha=alpha)
target_scores = nx.pagerank(target_graph,personalization=add_ST(targets),alpha=alpha)
total_scores = merge_scores(source_scores,target_scores)
# Run pagegrank algorithm on source and target graph seperatly
source_scores = nx.pagerank(source_graph,personalization={n:1 for n in sources},alpha=alpha)
target_scores = nx.pagerank(target_graph,personalization={n:1 for n in targets},alpha=alpha)

# Merge scores from source and target pagerank runs
# While merge_scores currently returns the average of the two scores, alternate methods such as taking
# the minimum of the two scores may be used
total_scores = merge_scores(source_scores,target_scores)

#todo: threshold should to be adjusted automatically
with output_file.open('w') as output_f:
for node in total_scores.keys():
if total_scores.get(node) > 0.1:
for edge in edgelist:
if node in edge[0] or node in edge[1]:
output_f.write(f"{edge[0]}\t{edge[1]}\n")
output_f.write("Node\tScore\n")
node_scores = list(total_scores.items())
node_scores.sort(reverse=True,key=lambda kv: (kv[1], kv[0]))
for node in node_scores:
#todo: filter scores based on threshold value
output_f.write(f"{node[0]}\t{node[1]}\n")
return

def merge_scores(sources,targets):
output = {}
Expand All @@ -74,13 +87,6 @@ def merge_scores(sources,targets):
output.update({node:((sources.get(node)+targets.get(node))/2)})
return output

def add_ST(nodes):
output = {}
for node in nodes:
output.update({node:1})
return output



def main():
"""
Expand Down
4 changes: 0 additions & 4 deletions docs/prms/localn.rst

This file was deleted.

14 changes: 11 additions & 3 deletions spras/strwr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def generate_inputs(data, filename_map):
for input_type in ST_RWR.required_inputs:
if input_type not in filename_map:
raise ValueError(f"{input_type} filename is missing")


# Get seperate source and target nodes for source and target files
if data.contains_node_columns(["sources","targets"]):
sources = data.request_node_columns(["sources"])
sources.to_csv(filename_map['sources'],sep='\t',index=False,columns=['NODEID'],header=False)
Expand All @@ -26,8 +27,8 @@ def generate_inputs(data, filename_map):
else:
raise ValueError("Invalid node data")

# Get edge data for network file
edges = data.get_interactome()

edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)


Expand All @@ -46,6 +47,7 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,

work_dir = '/spras'

# Each volume is a tuple (src, dest)
volumes = list()

bind_path, source_file = prepare_volume(sources, work_dir)
Expand All @@ -57,7 +59,10 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,
bind_path, network_file = prepare_volume(network, work_dir)
volumes.append(bind_path)

out_dir = Path(output_file).parent
# ST_RWR does not provide an argument to set the output directory
# Use its --output argument to set the output file prefix to specify an absolute path and prefix
out_dir = Path(output_file).parent
# ST_RWR requires that the output directory exist
out_dir.mkdir(parents=True, exist_ok=True)
bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
volumes.append(bind_path)
Expand All @@ -69,6 +74,7 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,
'--targets',target_file,
'--output', mapped_out_prefix]

# Add alpha as an optional argument
if alpha is not None:
command.extend(['--alpha', str(alpha)])

Expand All @@ -80,6 +86,8 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None,
work_dir)

print(out)
# Rename the primary output file to match the desired output filenameAdd commentMore actions
# Currently ST_RWR only writes one output file so we do not need to delete others
output_edges = Path(out_dir,'out')
output_edges.rename(output_file)

Expand Down
5 changes: 3 additions & 2 deletions test/ST_RWR/test_STRWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ class TestSTRWR:
"""
def test_ln(self):
OUT_FILE.unlink(missing_ok=True)
ST_RWR.run(network=Path(TEST_DIR, 'input', 'rwr-network.txt'),
ST_RWR.run(network=Path(TEST_DIR, 'input', 'strwr-network.txt'),
sources=Path(TEST_DIR, 'input', 'strwr-sources.txt'),
targets = Path(TEST_DIR, 'input','strwr-targets.txt'),
alpha = 0.85,
output_file= OUT_FILE)
assert OUT_FILE.exists(), 'Output file was not written'
expected_file = Path(TEST_DIR, 'expected_output', 'strwr-output.txt')
assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file'
# The test below will always fail until thresholding is implemented
# assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file'

"""
Run the ST_RWR algorithm with a missing input file
Expand Down

This file was deleted.

This file was deleted.

Loading