1
+ import numpy as np
2
+ from numpy .fft import fft , ifft
3
+
4
+ def preprocess_text_and_pattern (text , pattern ):
5
+ """Preprocesses text and pattern for pattern matching.
6
+
7
+ Args:
8
+ text: The input text string.
9
+ pattern: The input pattern string, potentially containing wildcards ('*').
10
+
11
+ Returns:
12
+ A tuple containing:
13
+ - A list of integers representing the text characters.
14
+ - A list of integers representing the pattern characters, with 0 for wildcards.
15
+ """
16
+
17
+ unique_chars = set (text + pattern )
18
+ char_to_int = {char : i + 1 for i , char in enumerate (unique_chars )} # Unique non-zero integers
19
+
20
+ # Replace pattern '*' with 0, other characters with their unique integers
21
+ pattern_int = [char_to_int [char ] if char != '*' else 0 for char in pattern ]
22
+ text_int = [char_to_int [char ] for char in text ]
23
+
24
+ return text_int , pattern_int
25
+
26
+ def fft_convolution (a , b ):
27
+ """Performs convolution using the Fast Fourier Transform (FFT).
28
+
29
+ Args:
30
+ a: The first sequence.
31
+ b: The second sequence.
32
+
33
+ Returns:
34
+ The convolution of the two sequences.
35
+ """
36
+
37
+ n = len (a ) + len (b ) - 1
38
+ A = fft (a , n )
39
+ B = fft (b , n )
40
+ return np .real (ifft (A * B ))
41
+
42
+ def compute_A_fft (text_int , pattern_int ):
43
+ """Computes the A array for the pattern matching algorithm.
44
+
45
+ Args:
46
+ text_int: The integer representation of the text.
47
+ pattern_int: The integer representation of the pattern.
48
+
49
+ Returns:
50
+ The A array.
51
+ """
52
+
53
+ n = len (text_int )
54
+ m = len (pattern_int )
55
+
56
+ # Power transforms of the pattern and text based on the formula
57
+ p1 = np .array (pattern_int )
58
+ p2 = np .array ([p ** 2 for p in pattern_int ])
59
+ p3 = np .array ([p ** 3 for p in pattern_int ])
60
+
61
+ t1 = np .array (text_int )
62
+ t2 = np .array ([t ** 2 for t in text_int ])
63
+ t3 = np .array ([t ** 3 for t in text_int ])
64
+
65
+ # Convolution to calculate the terms for A[i]
66
+ sum1 = fft_convolution (p3 [::- 1 ], t1 )
67
+ sum2 = fft_convolution (p2 [::- 1 ], t2 )
68
+ sum3 = fft_convolution (p1 [::- 1 ], t3 )
69
+
70
+ # Calculate A[i] using the convolution results
71
+ A = sum1 [:n - m + 1 ] - 2 * sum2 [:n - m + 1 ] + sum3 [:n - m + 1 ]
72
+
73
+ return A
74
+
75
+ # Main function to run the matching
76
+ if __name__ == "__main__" :
77
+
78
+ import doctest
79
+ doctest .testmod ()
80
+ # Get text and pattern as input from the user
81
+ # text = input("Enter the text: ")
82
+ # pattern = input("Enter the pattern (use '*' for wildcard): ")
83
+
84
+ text = "abcabc"
85
+ pattern = "abc*"
86
+
87
+
88
+
89
+
90
+ text_int , pattern_int = preprocess_text_and_pattern (text , pattern )
91
+ A = compute_A_fft (text_int , pattern_int )
92
+
93
+ # Matches occur where A[i] == 0
94
+ matches = [i for i in range (len (A )) if np .isclose (A [i ], 0 )]
95
+ print ("Pattern matches at indices:" , matches )
96
+
0 commit comments