Skip to content

Commit acb9f1d

Browse files
authored
Merge 1051aac into blathers/backport-release-25.3-160027
2 parents 2c799ea + 1051aac commit acb9f1d

File tree

2 files changed

+58
-42
lines changed

2 files changed

+58
-42
lines changed

pkg/cmd/roachtest/tests/backup_restore_roundtrip.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ func testOnlineRestoreRecovery(ctx context.Context, t test.Test, c cluster.Clust
551551
return errors.Wrap(err, "failed to remove pausepoint for online restore")
552552
}
553553

554-
if err := d.deleteUserTableSST(ctx, t.L(), dbConn, collection); err != nil {
554+
if err := d.deleteSSTFromBackupLayers(ctx, t.L(), dbConn, collection); err != nil {
555555
return err
556556
}
557557
if _, err := dbConn.ExecContext(ctx, "RESUME JOB $1", downloadJobID); err != nil {

pkg/cmd/roachtest/tests/mixed_version_backup.go

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2336,50 +2336,57 @@ func (d *BackupRestoreTestDriver) createBackupCollection(
23362336
return d.saveContents(ctx, l, rng, &collection, fingerprintAOST)
23372337
}
23382338

2339-
// deleteUserTableSST deletes an SST that covers a user table from the full
2340-
// backup in a collection. This is used to test online restore recovery. We
2341-
// delete from the full backup specifically to avoid deleting an SST from an
2342-
// incremental that is skipped due to a compacted backup. This ensures that
2343-
// deleting an SST will cause the download job to fail (assuming it hasn't
2344-
// already been downloaded).
2345-
// Note: We delete specifically a user-table SST as opposed to choosing a random
2346-
// SST because the temporary system table SSTs may be GC'd before the download
2347-
// phase attempts to download the SST. This would cause the download job to
2348-
// succeed instead of failing as expected. See
2349-
// https://github.com/cockroachdb/cockroach/issues/148408 for more details.
2350-
// TODO (kev-cao): Investigate if blocking storage level compaction would
2351-
// prevent GC of temporary system tables and allow us to delete a random SST
2352-
// instead. Technically, it is still possible, but unlikely, for storage to
2353-
// compact away a user-table SST before the download job attempts to download
2354-
// it, which would result in false positives in the test.
2355-
func (d *BackupRestoreTestDriver) deleteUserTableSST(
2339+
// deleteSSTFromBackupLayers deletes one SST from each layer of a backup
2340+
// collection. This is used to test online restore recovery. Deleting an SST
2341+
// from each layer ensures that at least one SST required for a restore will be
2342+
// missing.
2343+
func (d *BackupRestoreTestDriver) deleteSSTFromBackupLayers(
23562344
ctx context.Context, l *logger.Logger, db *gosql.DB, collection *backupCollection,
23572345
) error {
2358-
var sstPath string
2359-
var startPretty, endPretty string
2360-
var sizeBytes, numRows int
2361-
if err := db.QueryRowContext(
2346+
type sstInfo struct {
2347+
path, start, end string
2348+
bytes, rows int
2349+
}
2350+
rows, err := db.QueryContext(
23622351
ctx,
2363-
`SELECT path, start_pretty, end_pretty, size_bytes, rows FROM
2364-
(
2365-
SELECT row_number() OVER (), * FROM
2366-
[SHOW BACKUP FILES FROM LATEST IN $1]
2352+
// We delete the last SST in each directory just to ensure that we always
2353+
// delete user data. If a system table data is deleted, it is possible for
2354+
// the temporary system table to be GC'd before the download phase detects
2355+
// the missing file.
2356+
`WITH with_dir AS (
2357+
SELECT *, regexp_replace(path, '/[^/]+\.sst$', '') AS dir
2358+
FROM [SHOW BACKUP FILES FROM LATEST IN $1]
2359+
)
2360+
SELECT path, start_pretty, end_pretty, size_bytes, rows
2361+
FROM (
2362+
SELECT *, row_number() OVER (PARTITION BY dir ORDER BY path DESC) AS rn
2363+
FROM with_dir
23672364
)
2368-
WHERE backup_type = 'full'
2369-
ORDER BY row_number DESC
2370-
LIMIT 1`,
2365+
WHERE rn = 1`,
23712366
collection.uri(),
2372-
).Scan(&sstPath, &startPretty, &endPretty, &sizeBytes, &numRows); err != nil {
2373-
return errors.Wrapf(err, "failed to get SST path from %s", collection.uri())
2367+
)
2368+
if err != nil {
2369+
return errors.Wrapf(err, "failed to query for backup files in %q", collection.uri())
2370+
}
2371+
defer rows.Close()
2372+
var ssts []sstInfo
2373+
for rows.Next() {
2374+
var sst sstInfo
2375+
if err := rows.Scan(&sst.path, &sst.start, &sst.end, &sst.bytes, &sst.rows); err != nil {
2376+
return errors.Wrap(err, "error scanning SHOW BACKUP FILES row")
2377+
}
2378+
ssts = append(ssts, sst)
2379+
}
2380+
if rows.Err() != nil {
2381+
return errors.Wrap(rows.Err(), "error iterating over SHOW BACKUP FILES")
2382+
}
2383+
if len(ssts) == 0 {
2384+
return errors.Newf("unexpectedly found no SST files to delete in %q", collection.uri())
23742385
}
23752386
uri, err := url.Parse(collection.uri())
23762387
if err != nil {
23772388
return errors.Wrapf(err, "failed to parse backup collection URI %q", collection.uri())
23782389
}
2379-
l.Printf(
2380-
"deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)",
2381-
sstPath, uri, sizeBytes, numRows, startPretty, endPretty,
2382-
)
23832390
storage, err := cloud.ExternalStorageFromURI(
23842391
ctx,
23852392
collection.uri(),
@@ -2395,13 +2402,22 @@ func (d *BackupRestoreTestDriver) deleteUserTableSST(
23952402
return errors.Wrapf(err, "failed to create external storage for %q", collection.uri())
23962403
}
23972404
defer storage.Close()
2398-
return errors.Wrapf(
2399-
storage.Delete(ctx, sstPath),
2400-
"failed to delete sst %s from backup %s at %s",
2401-
sstPath,
2402-
collection.name,
2403-
uri,
2404-
)
2405+
for _, sst := range ssts {
2406+
l.Printf(
2407+
"deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)",
2408+
sst.path, uri, sst.bytes, sst.rows, sst.start, sst.end,
2409+
)
2410+
if err := storage.Delete(ctx, sst.path); err != nil {
2411+
return errors.Wrapf(
2412+
err,
2413+
"failed to delete sst %s from backup %s at %s",
2414+
sst.path,
2415+
collection.name,
2416+
uri,
2417+
)
2418+
}
2419+
}
2420+
return nil
24052421
}
24062422

24072423
// sentinelFilePath returns the path to the file that prevents job

0 commit comments

Comments
 (0)