Skip to content

Commit 7165e29

Browse files
committed
wip
1 parent 63aa1e8 commit 7165e29

File tree

4 files changed

+159
-125
lines changed

4 files changed

+159
-125
lines changed

go/data/logReaderState.go

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package data
2+
3+
import (
4+
"bufio"
5+
"errors"
6+
"os"
7+
"regexp"
8+
"sync"
9+
)
10+
11+
type logReaderState struct {
12+
fd *os.File
13+
scanner *bufio.Scanner
14+
reg *regexp.Regexp
15+
mu sync.Mutex
16+
lineNumber int
17+
closed bool
18+
err error
19+
}
20+
21+
func (s *logReaderState) Close() error {
22+
s.mu.Lock()
23+
defer s.mu.Unlock()
24+
25+
if !s.closed && s.fd != nil {
26+
ferr := s.fd.Close()
27+
if ferr != nil {
28+
s.err = errors.Join(s.err, ferr)
29+
}
30+
s.closed = true
31+
}
32+
33+
return s.err
34+
}

go/data/query_log_parse.go

-36
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,13 @@ package data
1818

1919
import (
2020
"bufio"
21-
"errors"
2221
"os"
2322
"regexp"
24-
"sync"
2523
)
2624

2725
type (
2826
MySQLLogLoader struct{}
2927

30-
logReaderState struct {
31-
fd *os.File
32-
scanner *bufio.Scanner
33-
reg *regexp.Regexp
34-
mu sync.Mutex
35-
lineNumber int
36-
closed bool
37-
err error
38-
}
39-
4028
mysqlLogReaderState struct {
4129
logReaderState
4230
prevQuery string
@@ -127,30 +115,6 @@ func (s *mysqlLogReaderState) Next() (Query, bool) {
127115
return Query{}, false
128116
}
129117

130-
func (s *logReaderState) Close() error {
131-
s.mu.Lock()
132-
defer s.mu.Unlock()
133-
134-
if !s.closed {
135-
ferr := s.fd.Close()
136-
if ferr != nil {
137-
s.err = errors.Join(s.err, ferr)
138-
}
139-
s.closed = true
140-
}
141-
142-
return s.err
143-
}
144-
145-
func (s *logReaderState) NextLine() (string, bool) {
146-
more := s.scanner.Scan()
147-
if !more {
148-
return "", false
149-
}
150-
151-
return s.scanner.Text(), true
152-
}
153-
154118
func (MySQLLogLoader) Loadit(fileName string) IteratorLoader {
155119
reg := regexp.MustCompile(`^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z)\s+(\d+)\s+(\w+)\s+(.*)`)
156120

go/data/query_log_parse_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package data
1818

1919
import (
20+
"github.com/google/go-cmp/cmp"
2021
"testing"
2122

2223
"github.com/stretchr/testify/require"
@@ -70,5 +71,8 @@ LIMIT 1`,
7071
},
7172
}
7273

74+
if diff := cmp.Diff(expected, gotQueries); diff != "" {
75+
t.Errorf("unexpected queries (-want +got):\n%s", diff)
76+
}
7377
require.Equal(t, expected, gotQueries)
7478
}

go/data/sql_script_loader.go

+121-89
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License.
1717
package data
1818

1919
import (
20-
"bytes"
20+
"bufio"
2121
"fmt"
2222
"io"
2323
"net/http"
@@ -27,118 +27,150 @@ import (
2727

2828
type SQLScriptLoader struct{}
2929

30-
func readData(url string) ([]byte, error) {
31-
if strings.HasPrefix(url, "http") {
32-
client := http.Client{}
33-
res, err := client.Get(url)
30+
func (SQLScriptLoader) Load(fileName string) ([]Query, error) {
31+
loader := SQLScriptLoader{}.Loadit(fileName)
32+
return makeSlice(loader)
33+
}
34+
35+
func (SQLScriptLoader) Loadit(fileName string) IteratorLoader {
36+
var fd *os.File
37+
var err error
38+
39+
if strings.HasPrefix(fileName, "http") {
40+
// Read from URL
41+
data, err := readData(fileName)
3442
if err != nil {
35-
return nil, err
43+
return &errLoader{err}
3644
}
37-
if res.StatusCode != http.StatusOK {
38-
return nil, fmt.Errorf("failed to get data from %s, status code %d", url, res.StatusCode)
45+
scanner := bufio.NewScanner(strings.NewReader(string(data)))
46+
return &sqlScriptReaderState{
47+
logReaderState: logReaderState{
48+
scanner: scanner,
49+
},
50+
newStmt: true,
3951
}
40-
defer res.Body.Close()
41-
return io.ReadAll(res.Body)
4252
}
43-
return os.ReadFile(url)
44-
}
4553

46-
func (SQLScriptLoader) Load(url string) ([]Query, error) {
47-
data, err := readData(url)
54+
// Read from file
55+
fd, err = os.OpenFile(fileName, os.O_RDONLY, 0)
4856
if err != nil {
49-
return nil, err
57+
return &errLoader{err}
5058
}
51-
seps := bytes.Split(data, []byte("\n"))
52-
queries := make([]Query, 0, len(seps))
53-
newStmt := true
54-
for i, v := range seps {
55-
v := bytes.TrimSpace(v)
56-
s := string(v)
57-
// Skip comments and empty lines
58-
switch {
59-
case strings.HasPrefix(s, "#"):
60-
newStmt = true
61-
continue
62-
case strings.HasPrefix(s, "--"):
63-
queries = append(queries, Query{Query: s, Line: i + 1})
64-
newStmt = true
65-
continue
66-
case len(s) == 0:
67-
continue
68-
}
6959

70-
if newStmt {
71-
queries = append(queries, Query{Query: s, Line: i + 1})
72-
} else {
73-
lastQuery := queries[len(queries)-1]
74-
lastQuery.Query = fmt.Sprintf("%s\n%s", lastQuery.Query, s)
75-
queries[len(queries)-1] = lastQuery
76-
}
77-
78-
// Treat new line as a new statement if line ends with ';'
79-
newStmt = strings.HasSuffix(s, ";")
60+
scanner := bufio.NewScanner(fd)
61+
return &sqlScriptReaderState{
62+
logReaderState: logReaderState{
63+
fd: fd,
64+
scanner: scanner,
65+
lineNumber: 0,
66+
},
67+
newStmt: true,
8068
}
69+
}
8170

82-
// Process queries directly without calling ParseQueries
83-
finalQueries := make([]Query, 0, len(queries))
84-
for _, rs := range queries {
85-
q, err := parseQuery(rs)
86-
if err != nil {
87-
return nil, err
88-
}
89-
if q != nil {
90-
finalQueries = append(finalQueries, *q)
91-
}
92-
}
93-
return finalQueries, nil
71+
type sqlScriptReaderState struct {
72+
logReaderState
73+
prevQuery string
74+
queryStart int
75+
newStmt bool
9476
}
9577

96-
// Helper function to parse individual queries
97-
func parseQuery(rs Query) (*Query, error) {
98-
realS := rs.Query
99-
s := rs.Query
100-
q := Query{Line: rs.Line, Type: Unknown}
78+
func (s *sqlScriptReaderState) Next() (Query, bool) {
79+
s.mu.Lock()
80+
defer s.mu.Unlock()
10181

102-
if len(s) < 3 {
103-
return nil, nil
82+
if s.closed {
83+
return Query{}, false
10484
}
10585

106-
switch {
107-
case strings.HasPrefix(s, "#"):
108-
q.Type = Comment
109-
return &q, nil
110-
case strings.HasPrefix(s, "--"):
111-
q.Type = CommentWithCommand
112-
if len(s) > 2 && s[2] == ' ' {
113-
s = s[3:]
86+
for s.scanner.Scan() {
87+
line := s.scanner.Text()
88+
line = strings.TrimSpace(line)
89+
s.lineNumber++
90+
91+
// Skip empty lines and comments
92+
if len(line) == 0 {
93+
continue
94+
}
95+
switch {
96+
case strings.HasPrefix(line, "#"):
97+
s.newStmt = true
98+
continue
99+
case strings.HasPrefix(line, "--"):
100+
// Return previous query before processing the comment
101+
if s.prevQuery != "" {
102+
query := Query{
103+
Query: s.prevQuery,
104+
Line: s.queryStart,
105+
Type: QueryT,
106+
}
107+
s.prevQuery = ""
108+
s.queryStart = 0
109+
s.newStmt = true
110+
// Store current comment line as new query
111+
s.prevQuery = line
112+
s.queryStart = s.lineNumber
113+
return query, true
114+
} else {
115+
s.prevQuery = line
116+
s.queryStart = s.lineNumber
117+
s.newStmt = true
118+
continue
119+
}
120+
}
121+
122+
if s.newStmt {
123+
s.prevQuery = line
124+
s.queryStart = s.lineNumber
114125
} else {
115-
s = s[2:]
126+
s.prevQuery += "\n" + line
116127
}
117-
case s[0] == '\n':
118-
q.Type = EmptyLine
119-
return &q, nil
120-
}
121128

122-
i := findFirstWord(s)
123-
if i > 0 {
124-
q.FirstWord = s[:i]
129+
// Check if the line ends with a semicolon
130+
if strings.HasSuffix(line, ";") {
131+
query := Query{
132+
Query: s.prevQuery,
133+
Line: s.queryStart,
134+
Type: QueryT,
135+
}
136+
s.prevQuery = ""
137+
s.queryStart = 0
138+
s.newStmt = true
139+
return query, true
140+
} else {
141+
s.newStmt = false
142+
}
125143
}
126-
q.Query = s[i:]
127144

128-
if q.Type == Unknown || q.Type == CommentWithCommand {
129-
if err := q.getQueryType(realS); err != nil {
130-
return nil, err
145+
s.closed = true
146+
147+
// Return the last query if we have one
148+
if s.prevQuery != "" {
149+
query := Query{
150+
Query: s.prevQuery,
151+
Line: s.queryStart,
152+
Type: QueryT,
131153
}
154+
s.prevQuery = ""
155+
return query, true
132156
}
133157

134-
return &q, nil
158+
s.err = s.scanner.Err()
159+
return Query{}, false
135160
}
136161

137-
// findFirstWord calculates the length of the first word in the string
138-
func findFirstWord(s string) int {
139-
i := 0
140-
for i < len(s) && s[i] != '(' && s[i] != ' ' && s[i] != ';' && s[i] != '\n' {
141-
i++
162+
func readData(url string) ([]byte, error) {
163+
if strings.HasPrefix(url, "http") {
164+
client := http.Client{}
165+
res, err := client.Get(url)
166+
if err != nil {
167+
return nil, err
168+
}
169+
if res.StatusCode != http.StatusOK {
170+
return nil, fmt.Errorf("failed to get data from %s, status code %d", url, res.StatusCode)
171+
}
172+
defer res.Body.Close()
173+
return io.ReadAll(res.Body)
142174
}
143-
return i
175+
return os.ReadFile(url)
144176
}

0 commit comments

Comments
 (0)