|
| 1 | +#!/usr/bin/python3 |
| 2 | + |
| 3 | +import sys |
| 4 | + |
| 5 | +def approximate_pattern_match_positions(pattern, sequence, d): |
| 6 | + """ |
| 7 | + Approximate Pattern Matching Problem: Find all approximate occurrences of a pattern in a string. |
| 8 | + Input: Two strings Pattern and Text along with an integer d. |
| 9 | + Output: All positions where Pattern appears in Text with at most d mismatches. |
| 10 | + """ |
| 11 | + |
| 12 | + pattern_length = len(pattern) |
| 13 | + pattern_positions = [] |
| 14 | + for i in range(len(sequence) - pattern_length + 1): |
| 15 | + kmer = sequence[i:i+pattern_length] |
| 16 | + if edit_distance(pattern, kmer) <= d: |
| 17 | + pattern_positions.append(str(i)) |
| 18 | + |
| 19 | + return pattern_positions |
| 20 | + |
| 21 | + |
| 22 | +def edit_distance(pattern1, pattern2): |
| 23 | + """ |
| 24 | + Compare two patterns and calculate edit distance - the number of mismatches between the sequences |
| 25 | + """ |
| 26 | + |
| 27 | + edit_distance = 0 |
| 28 | + for i, nt in enumerate(pattern1): |
| 29 | + if (nt != pattern2[i]): |
| 30 | + edit_distance += 1 |
| 31 | + |
| 32 | + return edit_distance |
| 33 | + |
| 34 | +# Get filename from the command arguments and open the file |
| 35 | +filename = str(sys.argv[1]) |
| 36 | +f = open(filename, 'r') |
| 37 | + |
| 38 | +# The first line in the file is the pattern |
| 39 | +pattern = f.readline().strip() |
| 40 | +sequence = f.readline().strip() |
| 41 | +allowed_mismatches = int(f.readline().strip()) |
| 42 | + |
| 43 | +# Call function to calculate approximate pattern positions |
| 44 | +pattern_positions = approximate_pattern_match_positions(pattern, sequence, allowed_mismatches) |
| 45 | +print(' '.join(pattern_positions)) |
0 commit comments