Skip to content

Commit 5216524

Browse files
chore: character class <=> character set
fixes #9
1 parent ac2ac35 commit 5216524

File tree

7 files changed

+112
-112
lines changed

7 files changed

+112
-112
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ The next phase of development will focussed on more extensive testing and perfor
3333

3434
Based on the classfication within the [MDN cheatsheet](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
3535

36-
**Character classes**
36+
**Character sets**
3737

3838
- [x] .
3939
- [x] \d
+36-37
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,54 @@
1-
import { RegExp } from "..";
2-
import { expectMatch, expectNotMatch, exec } from "./utils";
1+
import { expectMatch, expectNotMatch } from "./utils";
32

4-
it("dot", () => {
5-
expectMatch(".", [" ", "B", "|", "9"]);
6-
expectNotMatch(".", ["", "\n"]);
3+
it("throws an error if no closing bracket is found", () => {
4+
// expect(() => new RegExp("[abce")).toThrow();
75
});
86

9-
it("digit", () => {
10-
expectMatch("\\d", ["0", "9"]);
11-
expectNotMatch("\\d", ["", "b"]);
7+
it("matches discrete characters", () => {
8+
expectMatch("[abce]", ["a", "b", "c", "e"]);
9+
expectNotMatch("[abce]", ["", "f", "h"]);
1210
});
1311

14-
it("non-digit", () => {
15-
expectNotMatch("\\D", ["0", "9", ""]);
16-
expectMatch("\\D", ["b", "|"]);
12+
it("matches character ranges", () => {
13+
expectMatch("[a-c]", ["a", "b", "c"]);
14+
expectNotMatch("[a-c]", ["d", "e", ""]);
15+
expectMatch("[K-M]", ["K", "L", "M"]);
16+
expectNotMatch("[K-M]", ["9", "J"]);
17+
expectMatch("[0-9]", ["0", "9"]);
18+
expectNotMatch("[0-9]", ["a", "A"]);
1719
});
1820

19-
it("word", () => {
20-
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
21-
expectNotMatch("\\w", ["", "$"]);
21+
it("matches multiple ranges", () => {
22+
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
23+
expectNotMatch("[a-ce-f]", ["d"]);
2224
});
2325

24-
it("not word", () => {
25-
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
26-
expectMatch("\\W", ["&", "$"]);
26+
it("supports closing brackets", () => {
27+
expectMatch("[]a]", ["]", "a"]);
2728
});
2829

29-
it("whitespace", () => {
30-
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
31-
expectNotMatch("\\s", ["", "a", "0"]);
30+
it("supports negated sets", () => {
31+
expectNotMatch("[^a-c]", ["a", "b", "c"]);
32+
expectMatch("[^a-c]", ["d", "e"]);
33+
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
34+
expectMatch("[^a-ce-f]", ["d"]);
3235
});
3336

34-
it("not whitespace", () => {
35-
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
36-
expectMatch("\\S", ["a", "0"]);
37+
it("treats - as a literal", () => {
38+
expectMatch("[-abc]", ["-", "a", "b", "c"]);
39+
expectMatch("[abc-]", ["-", "a", "b", "c"]);
3740
});
3841

39-
it("tab, cr, lf, vt, ff", () => {
40-
expectMatch("\\t", ["\t"]);
41-
expectMatch("\\r", ["\r"]);
42-
expectMatch("\\n", ["\n"]);
43-
expectMatch("\\v", ["\v"]);
44-
expectMatch("\\f", ["\f"]);
45-
expectNotMatch("\\t", ["a", " ", ""]);
42+
it("treats - as a literal in negated sets", () => {
43+
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
44+
expectMatch("[^-abc]", ["1", "A"]);
4645
});
4746

48-
it("escaped dot", () => {
49-
expectMatch("\\.", ["."]);
50-
expectNotMatch("\\.", ["", "a"]);
51-
});
52-
53-
it("unrecognised character classes are treated as characters", () => {
54-
expectMatch("\\g\\m", ["gm"]);
47+
it("supports case insensitive matching", () => {
48+
// simple ranges
49+
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
50+
expectNotMatch("[a-c]", ["D", "d"], "i");
51+
// complex
52+
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
53+
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
5554
});
+37-36
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,55 @@
1-
import { expectMatch, expectNotMatch } from "./utils";
1+
import { RegExp } from "..";
2+
import { expectMatch, expectNotMatch, exec } from "./utils";
23

3-
it("throws an error if no closing bracket is found", () => {
4-
// expect(() => new RegExp("[abce")).toThrow();
4+
it("dot", () => {
5+
expectMatch(".", [" ", "B", "|", "9"]);
6+
expectNotMatch(".", ["", "\n"]);
57
});
68

7-
it("matches discrete characters", () => {
8-
expectMatch("[abce]", ["a", "b", "c", "e"]);
9-
expectNotMatch("[abce]", ["", "f", "h"]);
9+
it("digit", () => {
10+
expectMatch("\\d", ["0", "9"]);
11+
expectNotMatch("\\d", ["", "b"]);
1012
});
1113

12-
it("matches character ranges", () => {
13-
expectMatch("[a-c]", ["a", "b", "c"]);
14-
expectNotMatch("[a-c]", ["d", "e", ""]);
15-
expectMatch("[K-M]", ["K", "L", "M"]);
16-
expectNotMatch("[K-M]", ["9", "J"]);
17-
expectMatch("[0-9]", ["0", "9"]);
18-
expectNotMatch("[0-9]", ["a", "A"]);
14+
it("non-digit", () => {
15+
expectNotMatch("\\D", ["0", "9", ""]);
16+
expectMatch("\\D", ["b", "|"]);
1917
});
2018

21-
it("matches multiple ranges", () => {
22-
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
23-
expectNotMatch("[a-ce-f]", ["d"]);
19+
it("word", () => {
20+
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
21+
expectNotMatch("\\w", ["", "$"]);
2422
});
2523

26-
it("supports closing brackets", () => {
27-
expectMatch("[]a]", ["]", "a"]);
24+
it("not word", () => {
25+
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
26+
expectMatch("\\W", ["&", "$"]);
2827
});
2928

30-
it("supports negated sets", () => {
31-
expectNotMatch("[^a-c]", ["a", "b", "c"]);
32-
expectMatch("[^a-c]", ["d", "e"]);
33-
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
34-
expectMatch("[^a-ce-f]", ["d"]);
29+
it("whitespace", () => {
30+
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
31+
expectNotMatch("\\s", ["", "a", "0"]);
3532
});
3633

37-
it("treats - as a literal", () => {
38-
expectMatch("[-abc]", ["-", "a", "b", "c"]);
39-
expectMatch("[abc-]", ["-", "a", "b", "c"]);
34+
it("not whitespace", () => {
35+
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
36+
expectMatch("\\S", ["a", "0"]);
4037
});
4138

42-
it("treats - as a literal in negated sets", () => {
43-
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
44-
expectMatch("[^-abc]", ["1", "A"]);
39+
it("tab, cr, lf, vt, ff", () => {
40+
expectMatch("\\t", ["\t"]);
41+
expectMatch("\\r", ["\r"]);
42+
expectMatch("\\n", ["\n"]);
43+
expectMatch("\\v", ["\v"]);
44+
expectMatch("\\f", ["\f"]);
45+
expectNotMatch("\\t", ["a", " ", ""]);
4546
});
4647

47-
it("supports case insensitive matching", () => {
48-
// simple ranges
49-
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
50-
expectNotMatch("[a-c]", ["D", "d"], "i");
51-
// complex
52-
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
53-
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
48+
it("escaped dot", () => {
49+
expectMatch("\\.", ["."]);
50+
expectNotMatch("\\.", ["", "a"]);
51+
});
52+
53+
it("unrecognised character classes are treated as characters", () => {
54+
expectMatch("\\g\\m", ["gm"]);
5455
});

assembly/nfa/matcher.ts

+18-18
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char";
22

33
import {
44
CharacterNode,
5-
CharacterSetNode,
65
CharacterClassNode,
6+
CharacterSetNode,
77
CharacterRangeNode,
88
NodeType,
99
} from "../parser/node";
@@ -13,8 +13,8 @@ import { Range } from "../util";
1313
const enum MatcherType {
1414
Character,
1515
CharacterRange,
16-
CharacterClass,
1716
CharacterSet,
17+
CharacterClass,
1818
}
1919

2020
let _flags: Flags;
@@ -27,10 +27,10 @@ export class Matcher {
2727
}
2828

2929
static fromCharacterClassNode(
30-
node: CharacterClassNode,
30+
node: CharacterSetNode,
3131
flags: Flags
32-
): CharacterClassMatcher {
33-
return new CharacterClassMatcher(node.charClass, flags.dotAll);
32+
): CharacterSetMatcher {
33+
return new CharacterSetMatcher(node.charClass, flags.dotAll);
3434
}
3535

3636
static fromCharacterRangeNode(
@@ -44,9 +44,9 @@ export class Matcher {
4444
}
4545

4646
static fromCharacterSetNode(
47-
node: CharacterSetNode,
47+
node: CharacterClassNode,
4848
flags: Flags
49-
): CharacterSetMatcher {
49+
): CharacterClassMatcher {
5050
_flags = flags;
5151
const matchers = node.expressions.map<Matcher>((exp) => {
5252
switch (exp.type) {
@@ -57,16 +57,16 @@ export class Matcher {
5757
);
5858
case NodeType.Character:
5959
return Matcher.fromCharacterNode(exp as CharacterNode, _flags);
60-
case NodeType.CharacterClass:
60+
case NodeType.CharacterSet:
6161
return Matcher.fromCharacterClassNode(
62-
exp as CharacterClassNode,
62+
exp as CharacterSetNode,
6363
_flags
6464
);
6565
default:
6666
throw new Error("unsupported node type within character set");
6767
}
6868
});
69-
return new CharacterSetMatcher(matchers, node.negated);
69+
return new CharacterClassMatcher(matchers, node.negated);
7070
}
7171

7272
static fromCharacterNode(
@@ -126,9 +126,9 @@ export class CharacterRangeMatcher extends Matcher {
126126
}
127127
}
128128

129-
export class CharacterClassMatcher extends Matcher {
129+
export class CharacterSetMatcher extends Matcher {
130130
constructor(public charClass: Char, private dotAll: bool) {
131-
super(MatcherType.CharacterClass);
131+
super(MatcherType.CharacterSet);
132132
}
133133

134134
matches(code: u32): bool {
@@ -171,9 +171,9 @@ export class CharacterClassMatcher extends Matcher {
171171
}
172172
}
173173

174-
export class CharacterSetMatcher extends Matcher {
174+
export class CharacterClassMatcher extends Matcher {
175175
constructor(public matchers: Matcher[], public negated: bool) {
176-
super(MatcherType.CharacterSet);
176+
super(MatcherType.CharacterClass);
177177
}
178178

179179
matches(code: u32): bool {
@@ -189,13 +189,13 @@ export class CharacterSetMatcher extends Matcher {
189189
match = (matcher as CharacterRangeMatcher).matches(code);
190190
break;
191191

192-
case MatcherType.CharacterClass:
193-
match = (matcher as CharacterClassMatcher).matches(code);
194-
break;
195-
196192
case MatcherType.CharacterSet:
197193
match = (matcher as CharacterSetMatcher).matches(code);
198194
break;
195+
196+
case MatcherType.CharacterClass:
197+
match = (matcher as CharacterClassMatcher).matches(code);
198+
break;
199199
}
200200
if (match) break;
201201
}

assembly/nfa/nfa.ts

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import {
55
ConcatenationNode,
66
RepetitionNode,
77
AlternationNode,
8-
CharacterSetNode,
98
CharacterClassNode,
9+
CharacterSetNode,
1010
GroupNode,
1111
NodeType,
1212
} from "../parser/node";
@@ -222,17 +222,17 @@ class AutomataFactor {
222222
this.automataForNode(node.right)
223223
);
224224
}
225-
case NodeType.CharacterSet:
225+
case NodeType.CharacterClass:
226226
return Automata.fromMatcher(
227227
Matcher.fromCharacterSetNode(
228-
expression as CharacterSetNode,
228+
expression as CharacterClassNode,
229229
this.flags
230230
)
231231
);
232-
case NodeType.CharacterClass:
232+
case NodeType.CharacterSet:
233233
return Automata.fromMatcher(
234234
Matcher.fromCharacterClassNode(
235-
expression as CharacterClassNode,
235+
expression as CharacterSetNode,
236236
this.flags
237237
)
238238
);

assembly/parser/node.ts

+7-7
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ export const enum NodeType {
77
Alternation,
88
Concatenation,
99
Character,
10-
CharacterSet,
1110
CharacterClass,
11+
CharacterSet,
1212
CharacterRange,
1313
Repetition,
1414
RangeRepetition,
@@ -72,13 +72,13 @@ export class ConcatenationNode extends Node {
7272
}
7373
}
7474

75-
export class CharacterSetNode extends Node {
75+
export class CharacterClassNode extends Node {
7676
constructor(public expressions: Node[], public negated: bool) {
77-
super(NodeType.CharacterSet);
77+
super(NodeType.CharacterClass);
7878
}
7979

8080
clone(): Node {
81-
return new CharacterSetNode(
81+
return new CharacterClassNode(
8282
this.expressions.slice(0).map<Node>((s) => s.clone()),
8383
this.negated
8484
);
@@ -126,13 +126,13 @@ export class AssertionNode extends Node {
126126
}
127127
}
128128

129-
export class CharacterClassNode extends Node {
129+
export class CharacterSetNode extends Node {
130130
constructor(public charClass: Char) {
131-
super(NodeType.CharacterClass);
131+
super(NodeType.CharacterSet);
132132
}
133133

134134
clone(): Node {
135-
return new CharacterClassNode(this.charClass);
135+
return new CharacterSetNode(this.charClass);
136136
}
137137
}
138138

0 commit comments

Comments
 (0)