-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathshortest_common_superstring.py
executable file
·65 lines (52 loc) · 1.59 KB
/
shortest_common_superstring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
"""
usage:
shortest_common_superstring.py [options] sequences.txt
where the options are:
-h,--help : print usage and quit
sequences.txt is a file with one sequence to be included in each row.
"""
from sys import argv, stderr
from getopt import getopt, GetoptError
import itertools
def read_sequences(filename):
'''Read the sequences (one per line) from the filename and return a list
'''
sequences = []
with open(filename, 'r') as f:
for line in f:
sequences.append(line.strip())
return sequences
def calculate_scs(reads):
"""
Implement the greedy shortest-common-superstring strategy discussed in
class. From the reads, find two string with the maximal overlap and merge
them. Keep doing this until you have only 1 string left
"""
scs = ""
return scs
def main(filename):
# read the sequences from the file
sequences = read_sequences(filename)
print("Read the sequences", file=stderr)
# calculate the shortest common superstring
superstring = calculate_scs(sequences)
# print the result
print(superstring)
if __name__ == "__main__":
try:
opts, args = getopt(argv[1:], "h", ["help"])
except GetoptError as err:
print(err)
print(__doc__, file=stderr)
exit(1)
for o, a in opts:
if o in ("-h", "--help"):
print(__doc__, file=stderr)
exit()
else:
assert False, "unhandled option"
if len(args) != 1:
print(__doc__, file=stderr)
exit(2)
main(args[0])