Skip to content

Commit e7c0701

Browse files
committed
Add wildcard pattern matching algorithm using FFT
1 parent e9e7c96 commit e7c0701

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed
+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import numpy as np
2+
from numpy.fft import fft, ifft
3+
4+
def preprocess_text_and_pattern(text, pattern):
5+
"""Preprocesses text and pattern for pattern matching.
6+
7+
Args:
8+
text: The input text string.
9+
pattern: The input pattern string, potentially containing wildcards ('*').
10+
11+
Returns:
12+
A tuple containing:
13+
- A list of integers representing the text characters.
14+
- A list of integers representing the pattern characters, with 0 for wildcards.
15+
"""
16+
17+
unique_chars = set(text + pattern)
18+
char_to_int = {char: i + 1 for i, char in enumerate(unique_chars)} # Unique non-zero integers
19+
20+
# Replace pattern '*' with 0, other characters with their unique integers
21+
pattern_int = [char_to_int[char] if char != '*' else 0 for char in pattern]
22+
text_int = [char_to_int[char] for char in text]
23+
24+
return text_int, pattern_int
25+
26+
def fft_convolution(a, b):
27+
"""Performs convolution using the Fast Fourier Transform (FFT).
28+
29+
Args:
30+
a: The first sequence.
31+
b: The second sequence.
32+
33+
Returns:
34+
The convolution of the two sequences.
35+
"""
36+
37+
n = len(a) + len(b) - 1
38+
A = fft(a, n)
39+
B = fft(b, n)
40+
return np.real(ifft(A * B))
41+
42+
def compute_A_fft(text_int, pattern_int):
43+
"""Computes the A array for the pattern matching algorithm.
44+
45+
Args:
46+
text_int: The integer representation of the text.
47+
pattern_int: The integer representation of the pattern.
48+
49+
Returns:
50+
The A array.
51+
"""
52+
53+
n = len(text_int)
54+
m = len(pattern_int)
55+
56+
# Power transforms of the pattern and text based on the formula
57+
p1 = np.array(pattern_int)
58+
p2 = np.array([p**2 for p in pattern_int])
59+
p3 = np.array([p**3 for p in pattern_int])
60+
61+
t1 = np.array(text_int)
62+
t2 = np.array([t**2 for t in text_int])
63+
t3 = np.array([t**3 for t in text_int])
64+
65+
# Convolution to calculate the terms for A[i]
66+
sum1 = fft_convolution(p3[::-1], t1)
67+
sum2 = fft_convolution(p2[::-1], t2)
68+
sum3 = fft_convolution(p1[::-1], t3)
69+
70+
# Calculate A[i] using the convolution results
71+
A = sum1[:n - m + 1] - 2 * sum2[:n - m + 1] + sum3[:n - m + 1]
72+
73+
return A
74+
75+
# Main function to run the matching
76+
if __name__ == "__main__":
77+
78+
import doctest
79+
doctest.testmod()
80+
# Get text and pattern as input from the user
81+
# text = input("Enter the text: ")
82+
# pattern = input("Enter the pattern (use '*' for wildcard): ")
83+
84+
text = "abcabc"
85+
pattern = "abc*"
86+
87+
88+
89+
90+
text_int, pattern_int = preprocess_text_and_pattern(text, pattern)
91+
A = compute_A_fft(text_int, pattern_int)
92+
93+
# Matches occur where A[i] == 0
94+
matches = [i for i in range(len(A)) if np.isclose(A[i], 0)]
95+
print("Pattern matches at indices:", matches)
96+

0 commit comments

Comments
 (0)