Skip to content

Commit 04dafd5

Browse files
committed
Update comments, function name, and file name for clarity
1 parent 8ca0e8e commit 04dafd5

File tree

3 files changed

+35
-27
lines changed

3 files changed

+35
-27
lines changed

cmd/src/snapshot_upload.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -152,30 +152,30 @@ func copyDumpToBucket(ctx context.Context, src io.ReadSeeker, stat fs.FileInfo,
152152
// To assert against actual file size
153153
var totalWritten int64
154154

155-
// Do a partial copy that trims out unwanted statements
155+
// Do a partial copy, that filters out incompatible statements
156156
if trimExtensions {
157-
written, err := pgdump.CommentOutInvalidLines(object, src, progressFn)
157+
written, err := pgdump.FilterInvalidLines(object, src, progressFn)
158158
if err != nil {
159-
return errors.Wrap(err, "trim extensions and upload")
159+
return errors.Wrap(err, "filter out incompatible statements and upload")
160160
}
161161
totalWritten += written
162162
}
163163

164-
// io.Copy is the best way to copy from a reader to writer in Go, and storage.Writer
165-
// has its own chunking mechanisms internally. io.Reader is stateful, so this copy
166-
// will just continue from where we left off if we use copyAndTrimExtensions.
164+
// io.Copy is the best way to copy from a reader to writer in Go,
165+
// storage.Writer has its own chunking mechanisms internally.
166+
// io.Reader is stateful, so this copy will just continue from where FilterInvalidLines left off, if used
167167
written, err := io.Copy(object, src)
168168
if err != nil {
169169
return errors.Wrap(err, "upload")
170170
}
171171
totalWritten += written
172172

173-
// Progress is not called on completion of io.Copy, so we call it manually after to
174-
// update our pretty progress bars.
173+
// Progress is not called on completion of io.Copy,
174+
// so we call it manually after to update our pretty progress bars.
175175
progressFn(written)
176176

177-
// Validate we have sent all data. copyAndTrimExtensions may add some bytes, so the
178-
// check is not a strict equality.
177+
// Validate we have sent all data.
178+
// FilterInvalidLines may add some bytes, so the check is not a strict equality.
179179
size := stat.Size()
180180
if totalWritten < size {
181181
return errors.Newf("expected to write %d bytes, but actually wrote %d bytes (diff: %d bytes)",

internal/pgdump/extensions.go renamed to internal/pgdump/pgdump_sql_filter.go

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,22 @@ import (
88
"github.com/sourcegraph/sourcegraph/lib/errors"
99
)
1010

11-
// CommentOutInvalidLines will comment out lines in the customer's SQL database dump file
12-
// which gcloud sql import errors out on
11+
// FilterInvalidLines copies the initial lines of the pg_dump-created .sql files,
12+
// from src to dst (the GCS bucket),
13+
// until it hits a line prefixed with a filterEndMarker,
14+
// while commenting out the linesToFilter which cause `gcloud sql import` to error out.
15+
// It then resets src to the position of the last contents written to dst.
1316
//
14-
// It performs a partial copy of a SQL database dump from
15-
// src to dst while commenting out the problematic lines.
16-
// When it determines there are no more EXTENSIONs-related statements,
17-
// it will return, resetting src to the position of the last contents written to dst.
17+
// Filtering requires reading entire lines into memory,
18+
// this can be a very expensive operation, so when filtering is complete,
19+
// the more efficient io.Copy is used to perform the remainder of the copy in the calling funciton
1820
//
19-
// This is needed for import to Google Cloud Storage, which does not like many statements which pg_dump may insert
20-
// For more details, see https://cloud.google.com/sql/docs/postgres/import-export/import-export-dmp
21-
//
22-
// Filtering requires reading entire lines into memory - this can be a very expensive
23-
// operation, so when filtering is complete, the more efficient io.Copy should be used
24-
// to perform the remainder of the copy from src to dst.
25-
func CommentOutInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(int64)) (int64, error) {
21+
// pg_dump writes these .sql files based on its own version,
22+
// not based on the Postgres version of either the source or destination database;
23+
// so self-hosted customers' diverse database environments
24+
// have inserted a variety of statements into the .sql files which cause the import to fail
25+
// For details, see https://cloud.google.com/sql/docs/postgres/import-export/import-export-dmp
26+
func FilterInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(int64)) (int64, error) {
2627
var (
2728
reader = bufio.NewReader(src)
2829

@@ -60,8 +61,15 @@ func CommentOutInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(in
6061
"SET transaction_timeout", // pg_dump v17, importing to Postgres 16
6162

6263
"\\connect",
64+
65+
// Cloud instances' databases have been upgraded to Postgres v16.10,
66+
// which should include support for \restrict and \unrestrict
67+
// but leaving in the list in case we need to re-add them
6368
// "\\restrict",
69+
// To handle the \unrestrict command,
70+
// we'd have to add a search from the end of the file
6471
// "\\unrestrict",
72+
// Remove comments after databases are upgraded >= Postgres 17
6573
}
6674
)
6775

@@ -71,8 +79,8 @@ func CommentOutInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(in
7179
line, err := reader.ReadBytes('\n')
7280
consumed += int64(len(line))
7381

74-
// If this function has read through the whole file,
75-
// then hand the last line
82+
// If this function has read through the whole file without hitting a filterEndMarker,
83+
// then handle the last line correctly
7684
if err == io.EOF {
7785
noMoreLinesToFilter = true
7886

internal/pgdump/extensions_test.go renamed to internal/pgdump/pgdump_sql_filter_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ func createTestFile(t *testing.T, content string) *os.File {
2222
return src
2323
}
2424

25-
func TestCommentOutInvalidLines(t *testing.T) {
25+
func TestFilterInvalidLines(t *testing.T) {
2626
if runtime.GOOS == "windows" {
2727
t.Skip("Test doesn't work on Windows of weirdness with t.TempDir() handling")
2828
}
@@ -187,7 +187,7 @@ CREATE TABLE "public"."access_tokens" (
187187
src := createTestFile(t, tt.input)
188188
var dst bytes.Buffer
189189

190-
_, err := CommentOutInvalidLines(&dst, src, func(i int64) {})
190+
_, err := FilterInvalidLines(&dst, src, func(i int64) {})
191191
require.NoError(t, err)
192192

193193
// Copy rest of contents

0 commit comments

Comments
 (0)