Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docker-wrappers/RWR/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

## Notes
The random walk with restarts algorithm requires a directed input network. However, the algorithm in its current form will accept an undirected input network and interpret it as a directed network. The resulting output from an undirected network does not accuratly represent directionality.

## Building docker file
to build a new docker image for rwr navigate to the /docker-wrappers/rwr directory and enter:
```
docker build -t ade0brien/rwr -f Dockerfile .
```

## Testing
Test code is located in `test/RWR`.
The `input` subdirectory contains test files `rwr-network.txt`, `rwr-sources.txt`, and `rwr-targets.txt`
The Docker wrapper can be tested with `pytest`.
31 changes: 16 additions & 15 deletions docker-wrappers/RWR/RWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def parse_arguments():
parser.add_argument("--network", type=Path, required=True, help="Path to the network file with '|' delimited node pairs")
parser.add_argument("--nodes", type=Path, required=True, help="Path to the nodes file")
parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written")
parser.add_argument("--alpha", type=float, required=False, help="Optional alpha value for the RWR algorithm (defaults to 0.85)")
parser.add_argument("--alpha", type=float, required=False, default=0.85, help="Optional alpha value for the RWR algorithm (defaults to 0.85)")

return parser.parse_args()

Expand All @@ -25,41 +25,42 @@ def RWR(network_file: Path, nodes_file: Path, alpha: float, output_file: Path):
raise OSError(f"Nodes file {str(nodes_file)} does not exist")
if output_file.exists():
print(f"Output file {str(output_file)} will be overwritten")
if not alpha > 0 or not alpha <=1:
raise ValueError("Alpha value must be between 0 and 1")

# Create the parent directories for the output file if needed
output_file.parent.mkdir(parents=True, exist_ok=True)

# Read in network file
edgelist = []
with open(network_file) as file:
for line in file:
edge = line.split('|')
edge[1] = edge[1].strip('\n')
edgelist.append(edge)

# Read in node file (combined sources and targets)
nodelist = []
with open(nodes_file) as n_file:
for line in n_file:
node = line.split('\t')
nodelist.append(node[0].strip('\n'))

# Create directed graph from input network
graph = nx.DiGraph(edgelist)
scores = nx.pagerank(graph,personalization=add_ST(nodelist),alpha=alpha)

#todo: threshold should to be adjusted automatically
with output_file.open('w') as output_f:
for node in scores.keys():
if scores.get(node) > 0.1:
for edge in edgelist:
if node in edge[0] or node in edge[1]:
output_f.write(f"{edge[0]}\t{edge[1]}\n")

# Run pagerank algorithm on directed graph
scores = nx.pagerank(graph,personalization={n:1 for n in nodelist},alpha=alpha)

def add_ST(nodes):
output = {}
for node in nodes:
output.update({node:1})
return output

with output_file.open('w') as output_f:
output_f.write("Node\tScore\n")
node_scores = list(scores.items())
node_scores.sort(reverse=True,key=lambda kv: (kv[1], kv[0]))
for node in node_scores:
#todo: filter scores based on threshold value
output_f.write(f"{node[0]}\t{node[1]}\n")
return


def main():
Expand Down
4 changes: 0 additions & 4 deletions docs/prms/localn.rst

This file was deleted.

12 changes: 10 additions & 2 deletions spras/rwr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def generate_inputs(data, filename_map):
if input_type not in filename_map:
raise ValueError(f"{input_type} filename is missing")

# Get sources and targets for node input file
if data.contains_node_columns(["sources","targets"]):
sources = data.request_node_columns(["sources"])
targets = data.request_node_columns(["targets"])
Expand All @@ -25,8 +26,8 @@ def generate_inputs(data, filename_map):
else:
raise ValueError("Invalid node data")

# Get edge data for network file
edges = data.get_interactome()

edges.to_csv(filename_map['network'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False)


Expand All @@ -44,6 +45,7 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_frame
raise ValueError(f"Edge {line} does not contain 2 nodes separated by '|'")
work_dir = '/spras'

# Each volume is a tuple (src, dest)
volumes = list()

bind_path, nodes_file = prepare_volume(nodes, work_dir)
Expand All @@ -52,7 +54,10 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_frame
bind_path, network_file = prepare_volume(network, work_dir)
volumes.append(bind_path)

out_dir = Path(output_file).parent
# RWR does not provide an argument to set the output directory
# Use its --output argument to set the output file prefix to specify an absolute path and prefix
out_dir = Path(output_file).parent
# RWR requires that the output directory exist
out_dir.mkdir(parents=True, exist_ok=True)
bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir)
volumes.append(bind_path)
Expand All @@ -63,6 +68,7 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_frame
'--nodes',nodes_file,
'--output', mapped_out_prefix]

# Add alpha as an optional argument
if alpha is not None:
command.extend(['--alpha', str(alpha)])

Expand All @@ -74,6 +80,8 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_frame
work_dir)

print(out)
# Rename the primary output file to match the desired output filename
# Currently RWR only writes one output file so we do not need to delete others
output_edges = Path(out_dir,'out')
output_edges.rename(output_file)

Expand Down
3 changes: 2 additions & 1 deletion test/RWR/test_RWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def test_ln(self):
output_file= OUT_FILE)
assert OUT_FILE.exists(), 'Output file was not written'
expected_file = Path(TEST_DIR, 'expected_output', 'rwr-output.txt')
assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file'
# The test below will fail until thresholding is implemented
# assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file'

"""
Run the RWR algorithm with a missing input file
Expand Down

This file was deleted.

This file was deleted.

8 changes: 0 additions & 8 deletions test/parse-outputs/input/localneighborhood-raw-pathway.txt

This file was deleted.

Loading