-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsre2_test.go
279 lines (230 loc) · 9.59 KB
/
sre2_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
package sre2
import (
"fmt"
"testing"
)
// Check the given state to be true.
func checkState(t *testing.T, state bool, err string) {
if !state {
t.Error(err)
}
}
// Check the equality of two []int slices.
func checkIntSlice(t *testing.T, expected []int, result []int, err string) {
match := true
if (expected == nil || result == nil) && (expected != nil && result != nil) {
match = false
} else if len(expected) != len(result) {
match = false
} else {
for i := 0; i < len(expected); i++ {
if expected[i] != result[i] {
match = false
}
}
}
checkState(t, match, fmt.Sprintf("%s: got %s, expected %s", err, result, expected))
}
// Run a selection of basic regular expressions against this package.
func TestSimpleRe(t *testing.T) {
r := MustParse("")
checkState(t, r.NumSubexps() == 0, "blank re should have no alts")
checkState(t, r.Match(""), "everything should match")
checkState(t, r.Match("fadsnjkflsdafnas"), "everything should match")
r = MustParse("^(a|b)+c*$")
checkState(t, r.NumSubexps() == 1, "simple re should have single alt")
checkState(t, !r.Match("abd"), "not a valid match")
checkState(t, r.Match("a"), "basic string should match")
checkState(t, !r.Match(""), "empty string should not match")
checkState(t, r.Match("abcccc"), "longer string should match")
r = MustParse("(\\w*)\\s*(\\w*)")
res := r.MatchIndex("zing hello there")
checkIntSlice(t, []int{0, 10, 0, 4, 5, 10}, res, "did not match first two words as expected")
r = MustParse(".*?(\\w+)$")
res = r.MatchIndex("zing hello there")
checkIntSlice(t, []int{0, 16, 11, 16}, res, "did not match last word as expected")
res = r.MatchIndex("\n")
checkIntSlice(t, res, nil, "should return nil on failed match")
}
// Test parsing an invalid RE returns an error.
func TestInvalidRe(t *testing.T) {
r, err := Parse("a**")
checkState(t, err != nil, "must fail parsing")
checkState(t, r == nil, "regexp must be nil")
pass := false
func() {
defer func() {
if r := recover(); r != nil {
pass = true
}
}()
MustParse("z(((a")
}()
checkState(t, pass, "should panic")
}
// Test behaviour related to character classes expressed within [...].
func TestCharClass(t *testing.T) {
r := MustParse("^[\t[:word:]]+$") // Match tabs and word characters.
checkState(t, r.Match("c"), "non-space should match")
checkState(t, !r.Match("c t"), "space should not match")
checkState(t, r.Match("c\tt"), "tab should match")
r = MustParse("^[:ascii:]*$")
checkState(t, r.Match(""), "nothing should match")
checkState(t, r.Match("c"), "ascii should match")
checkState(t, !r.Match("Π"), "unicode should not match")
r = MustParse("^\\pN$")
checkState(t, r.Match("〩"), "character from Nl should match")
checkState(t, r.Match("¾"), "character from Nu should match")
r = MustParse("^\\p{Nl}$")
checkState(t, r.Match("〩"), "character from Nl should match")
checkState(t, !r.Match("¾"), "character from Nu should not match")
r = MustParse("^[^. ]$")
checkState(t, r.Match("\n"), "not everything should match \\n")
checkState(t, !r.Match(" "), "should match only \\n")
r = MustParse("^[.\n]$")
checkState(t, r.Match("\n"), "should match \\n")
r = MustParse("^\\W$")
checkState(t, !r.Match("a"), "should not match word char")
checkState(t, r.Match("!"), "should match non-word")
r = MustParse("^[abc\\W]$")
checkState(t, r.Match("a"), "should match 'a'")
checkState(t, r.Match("!"), "should match '!'")
checkState(t, !r.Match("d"), "should not match 'd'")
r = MustParse("^[^abc\\W]$")
checkState(t, !r.Match("a"), "should not match 'a'")
checkState(t, !r.Match("%"), "should not match non-word char")
checkState(t, r.Match("d"), "should match 'd'")
r = MustParse("^[\\w\\D]$")
checkState(t, r.Match("a"), "should match regular char 'a'")
checkState(t, r.Match("2"), "should still match number '2', caught by \\w")
r = MustParse("^[\\[-\\]]$")
checkState(t, r.Match("]"), "should match ']'")
checkState(t, r.Match("["), "should match '['")
checkState(t, r.Match("\\"), "should match '\\', between [ and ]")
r = MustParse("^a-z$")
checkState(t, r.Match("a-z"), "should match literal a-z")
checkState(t, !r.Match("a"), "should not match single start char")
checkState(t, !r.Match("b"), "should not match single char (e.g., virtual range)")
}
// Test regexp generated by escape sequences (e.g. \n, \. etc).
func TestEscapeSequences(t *testing.T) {
r := MustParse("^\\.\n\\044$") // Match '.\n$'
checkState(t, r.Match(".\n$"), "should match")
checkState(t, !r.Match(" \n$"), "space should not match")
checkState(t, !r.Match("\n\n$"), ". does not match \n by default")
checkState(t, !r.Match(".\n"), "# should not be treated as end char")
r = MustParse("^\\x{03a0}\\x25$") // Match 'Π%'.
checkState(t, r.Match("Π%"), "should match pi+percent")
r, err := Parse("^\\Π$")
checkState(t, err != nil && r == nil,
"should have failed on trying to escape Π, not punctuation")
}
// Tests string literals between \Q...\E.
func TestStringLiteral(t *testing.T) {
r := MustParse("^\\Qhello\\E$")
checkState(t, r.Match("hello"), "should match hello")
r = MustParse("^\\Q.$\\\\E$") // match ".$\\"
checkState(t, r.Match(".$\\"), "should match")
checkState(t, !r.Match(" $\\"), "should not match")
// r = MustParse("^a\\Q\\E*b$") // match absolutely nothing between 'ab'
// checkState(t, r.Match("ab"), "should match")
// checkState(t, !r.Match("acb"), "should not match")
}
// Test closure expansion types, such as {..}, ?, +, * etc.
func TestClosureExpansion(t *testing.T) {
r := MustParse("^za?$")
checkState(t, r.Match("z"), "should match none")
checkState(t, r.Match("za"), "should match single")
checkState(t, !r.Match("zaa"), "should not match more")
r = MustParse("^a{2,4}$")
checkState(t, !r.Match(""), "0 should fail")
checkState(t, !r.Match("a"), "1 should fail")
checkState(t, r.Match("aa"), "2 should succeed")
checkState(t, r.Match("aaa"), "3 should succeed")
checkState(t, r.Match("aaaa"), "4 should succeed")
checkState(t, !r.Match("aaaaa"), "5 should fail")
r = MustParse("^a{2}$")
checkState(t, !r.Match(""), "0 should fail")
checkState(t, !r.Match("a"), "1 should fail")
checkState(t, r.Match("aa"), "2 should succeed")
checkState(t, !r.Match("aaa"), "3 should fail")
r = MustParse("^a{3,}$")
checkState(t, !r.Match("aa"), "2 should fail")
checkState(t, r.Match("aaa"), "3 should succeed")
checkState(t, r.Match("aaaaaa"), "more should succeed")
}
// Test specific greedy/non-greedy closure types.
func TestClosureGreedy(t *testing.T) {
r := MustParse("^(a{0,2}?)(a*)$")
res := r.MatchIndex("aaa")
checkIntSlice(t, []int{0, 3, 0, 0, 0, 3}, res, "did not match expected")
r = MustParse("^(a{0,2})?(a*)$")
res = r.MatchIndex("aaa")
checkIntSlice(t, []int{0, 3, 0, 2, 2, 3}, res, "did not match expected")
r = MustParse("^(a{2,}?)(a*)$")
res = r.MatchIndex("aaa")
checkIntSlice(t, []int{0, 3, 0, 2, 2, 3}, res, "did not match expected")
}
// Test simple left/right matchers.
func TestLeftRight(t *testing.T) {
r := MustParse("^.\\b.$")
checkState(t, r.Match("a "), "left char is word")
checkState(t, r.Match(" a"), "right char is word")
checkState(t, !r.Match(" "), "not a boundary")
checkState(t, !r.Match("aa"), "not a boundary")
}
// Test general flags in sre2.
func TestFlags(t *testing.T) {
r := MustParse("^(?i:AbC)zz$")
checkState(t, r.Match("abczz"), "success")
checkState(t, !r.Match("abcZZ"), "fail, flag should not escape")
res := r.MatchIndex("ABCzz")
checkIntSlice(t, []int{0, 5}, res, "should just have a single outer paren")
r = MustParse("^(?U)(a+)(.+)$")
res = r.MatchIndex("aaaabb")
checkIntSlice(t, []int{0, 6, 0, 1, 1, 6}, res, "should be ungreedy")
r = MustParse("^(?i)a*(?-i)b*$")
checkState(t, r.Match("AAaaAAaabbbbb"), "success")
checkState(t, !r.Match("AAaaAAaaBBBa"), "should fail, flag should not escape")
r = MustParse("(?s)^abc$.^def$")
checkState(t, !r.Match("abc\ndef"), "multiline mode not on by default")
r = MustParse("(?ms)^abc$.^def$")
checkState(t, r.Match("abc\ndef"), "multiline mode works as expected")
}
// Test the behaviour of rune filters.
func TestRuneFilter(t *testing.T) {
var filter RuneFilter
filter = matchRune('#')
checkState(t, !filter('B'), "should not match random rune")
checkState(t, filter('#'), "should match configured rune")
filter = matchRuneRange('A', 'Z')
checkState(t, filter('A'), "should match rune 'A' in range")
checkState(t, filter('B'), "should match rune 'B' in range")
checkState(t, !filter('a'), "should not match rune 'a', is lowercase")
filter = filter.ignoreCase()
checkState(t, filter('a'), "should match rune 'a', case ignored")
checkState(t, filter('A'), "should still match rune 'A', case ignored")
filter = matchUnicodeClass("Greek")
checkState(t, filter('Ω'), "should match omega")
checkState(t, !filter('Z'), "should not match regular latin rune")
filter = matchUnicodeClass("Cyrillic").not()
checkState(t, filter('%'), "should match a random non-Cyrillic rune")
checkState(t, !filter('Ӄ'), "should not match Cyrillic rune")
}
// Test complex grouping configuration.
func TestGroup(t *testing.T) {
r := MustParse("^(a)*$")
res := r.MatchIndex("aaa")
checkIntSlice(t, []int{0, 3, 2, 3}, res, "a should have matched last char")
}
// Test the SafeParser used by much of the code.
func TestStringParser(t *testing.T) {
src := NewSafeReader("a{bc}d")
checkState(t, src.curr() == -1, "should not yet be parsing")
checkState(t, src.nextCh() == 'a', "first char should be a")
checkState(t, src.nextCh() == '{', "second char should be {")
lit := src.literal("{", "}")
checkState(t, lit == "bc", "should equal contained value, got: "+lit)
checkState(t, src.curr() == 'd', "should now rest on d")
checkState(t, src.nextCh() == -1, "should be done now")
}