-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsparser.go
102 lines (89 loc) · 3.02 KB
/
sparser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package sre2
// Describes a string parser type. Notably, allows users to examine the current
// rune, peek at the next rune, consume literals between prefix/suffix, and
// rebase the cursor in an absolute fashion within the underlying string. On
// instantiation, this string parser is focused 'before' the initial string:
// calling curr() will return -1, and peek() will return the first rune.
//
// Within sre2, this is used for both the parser as well as matchers to
// traverse through the input string. The curr()/peek() semantics are most
// useful for identifying conditions between runes, such as '\W', '\w' or '$'
// and '^' in multiline mode.
import (
"strings"
"unicode/utf8"
)
type SafeReader struct {
str string // backing string
ch rune // current ch
opos int // previous (absolute) position in str, before ch
pos int // current (absolute) position in str, after ch
}
func NewSafeReader(str string) SafeReader {
return SafeReader{str, -1, -1, 0}
}
// Absolute position after the current character, inside SafeReader. This will
// be -1 if EOF.
func (r *SafeReader) npos() int {
return r.pos
}
// Returns the current focus rune in SafeReader. This will be -1 if EOF or if
// nextCh() has not yet been called.
func (r *SafeReader) curr() rune {
return r.ch
}
// Peek at the next focus rune in SafeReader.
func (r *SafeReader) peek() rune {
if r.pos < len(r.str) {
r, _ := utf8.DecodeRuneInString(r.str[r.pos:])
return r
}
return -1
}
// Move forward, and return the next rune. This will return -1 if the string is
// at EOF.
func (r *SafeReader) nextCh() rune {
if r.pos < len(r.str) {
rune, size := utf8.DecodeRuneInString(r.str[r.pos:])
r.ch = rune
r.opos = r.pos
r.pos += size
} else {
r.ch = -1
r.opos = r.pos
r.pos = -1
}
return r.ch
}
// Refocus the parser at a given point within the parsed string. When this method
// returns, the current focus rune will be directly after the given index.
func (r *SafeReader) jump(to int) {
r.pos = to
r.nextCh()
}
// Consume a known literal at the given point. If the literal does not exist,
// starting with the current focus rune, then panic.
func (r *SafeReader) consume(str string) {
if r.opos == -1 {
panic("can't consume before reading")
}
if !strings.HasPrefix(r.str[r.opos:], str) {
panic("could not find correct str: " + str + " in available: " + r.str[r.opos:])
}
r.jump(r.opos + len(str))
}
// Consume a literal that *must* exist, between the given prefix and suffix.
// Searches for the prefix starting with the current focus rune, and returns
// this SafeReader focused on the rune directly after the suffix.
// e.g. "abcde\Qhello\Eblah", with literal("\\Q", "\\E"), returns "hello"
// focus --^ and will focus on "b" after "\E".
func (r *SafeReader) literal(prefix string, suffix string) string {
r.consume(prefix)
start := r.opos
idx := strings.Index(r.str[r.opos:], suffix)
if idx == -1 {
panic("could not find correct suffix: " + suffix)
}
r.jump(r.opos + idx + len(suffix))
return r.str[start : start+idx]
}