@@ -2336,50 +2336,57 @@ func (d *BackupRestoreTestDriver) createBackupCollection(
23362336 return d .saveContents (ctx , l , rng , & collection , fingerprintAOST )
23372337}
23382338
2339- // deleteUserTableSST deletes an SST that covers a user table from the full
2340- // backup in a collection. This is used to test online restore recovery. We
2341- // delete from the full backup specifically to avoid deleting an SST from an
2342- // incremental that is skipped due to a compacted backup. This ensures that
2343- // deleting an SST will cause the download job to fail (assuming it hasn't
2344- // already been downloaded).
2345- // Note: We delete specifically a user-table SST as opposed to choosing a random
2346- // SST because the temporary system table SSTs may be GC'd before the download
2347- // phase attempts to download the SST. This would cause the download job to
2348- // succeed instead of failing as expected. See
2349- // https://github.com/cockroachdb/cockroach/issues/148408 for more details.
2350- // TODO (kev-cao): Investigate if blocking storage level compaction would
2351- // prevent GC of temporary system tables and allow us to delete a random SST
2352- // instead. Technically, it is still possible, but unlikely, for storage to
2353- // compact away a user-table SST before the download job attempts to download
2354- // it, which would result in false positives in the test.
2355- func (d * BackupRestoreTestDriver ) deleteUserTableSST (
2339+ // deleteSSTFromBackupLayers deletes one SST from each layer of a backup
2340+ // collection. This is used to test online restore recovery. Deleting an SST
2341+ // from each layer ensures that at least one SST required for a restore will be
2342+ // missing.
2343+ func (d * BackupRestoreTestDriver ) deleteSSTFromBackupLayers (
23562344 ctx context.Context , l * logger.Logger , db * gosql.DB , collection * backupCollection ,
23572345) error {
2358- var sstPath string
2359- var startPretty , endPretty string
2360- var sizeBytes , numRows int
2361- if err := db .QueryRowContext (
2346+ type sstInfo struct {
2347+ path , start , end string
2348+ bytes , rows int
2349+ }
2350+ rows , err := db .QueryContext (
23622351 ctx ,
2363- `SELECT path, start_pretty, end_pretty, size_bytes, rows FROM
2364- (
2365- SELECT row_number() OVER (), * FROM
2366- [SHOW BACKUP FILES FROM LATEST IN $1]
2352+ // We delete the last SST in each directory just to ensure that we always
2353+ // delete user data. If a system table data is deleted, it is possible for
2354+ // the temporary system table to be GC'd before the download phase detects
2355+ // the missing file.
2356+ `WITH with_dir AS (
2357+ SELECT *, regexp_replace(path, '/[^/]+\.sst$', '') AS dir
2358+ FROM [SHOW BACKUP FILES FROM LATEST IN $1]
2359+ )
2360+ SELECT path, start_pretty, end_pretty, size_bytes, rows
2361+ FROM (
2362+ SELECT *, row_number() OVER (PARTITION BY dir ORDER BY path DESC) AS rn
2363+ FROM with_dir
23672364 )
2368- WHERE backup_type = 'full'
2369- ORDER BY row_number DESC
2370- LIMIT 1` ,
2365+ WHERE rn = 1` ,
23712366 collection .uri (),
2372- ).Scan (& sstPath , & startPretty , & endPretty , & sizeBytes , & numRows ); err != nil {
2373- return errors .Wrapf (err , "failed to get SST path from %s" , collection .uri ())
2367+ )
2368+ if err != nil {
2369+ return errors .Wrapf (err , "failed to query for backup files in %q" , collection .uri ())
2370+ }
2371+ defer rows .Close ()
2372+ var ssts []sstInfo
2373+ for rows .Next () {
2374+ var sst sstInfo
2375+ if err := rows .Scan (& sst .path , & sst .start , & sst .end , & sst .bytes , & sst .rows ); err != nil {
2376+ return errors .Wrap (err , "error scanning SHOW BACKUP FILES row" )
2377+ }
2378+ ssts = append (ssts , sst )
2379+ }
2380+ if rows .Err () != nil {
2381+ return errors .Wrap (rows .Err (), "error iterating over SHOW BACKUP FILES" )
2382+ }
2383+ if len (ssts ) == 0 {
2384+ return errors .Newf ("unexpectedly found no SST files to delete in %q" , collection .uri ())
23742385 }
23752386 uri , err := url .Parse (collection .uri ())
23762387 if err != nil {
23772388 return errors .Wrapf (err , "failed to parse backup collection URI %q" , collection .uri ())
23782389 }
2379- l .Printf (
2380- "deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)" ,
2381- sstPath , uri , sizeBytes , numRows , startPretty , endPretty ,
2382- )
23832390 storage , err := cloud .ExternalStorageFromURI (
23842391 ctx ,
23852392 collection .uri (),
@@ -2395,13 +2402,22 @@ func (d *BackupRestoreTestDriver) deleteUserTableSST(
23952402 return errors .Wrapf (err , "failed to create external storage for %q" , collection .uri ())
23962403 }
23972404 defer storage .Close ()
2398- return errors .Wrapf (
2399- storage .Delete (ctx , sstPath ),
2400- "failed to delete sst %s from backup %s at %s" ,
2401- sstPath ,
2402- collection .name ,
2403- uri ,
2404- )
2405+ for _ , sst := range ssts {
2406+ l .Printf (
2407+ "deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)" ,
2408+ sst .path , uri , sst .bytes , sst .rows , sst .start , sst .end ,
2409+ )
2410+ if err := storage .Delete (ctx , sst .path ); err != nil {
2411+ return errors .Wrapf (
2412+ err ,
2413+ "failed to delete sst %s from backup %s at %s" ,
2414+ sst .path ,
2415+ collection .name ,
2416+ uri ,
2417+ )
2418+ }
2419+ }
2420+ return nil
24052421}
24062422
24072423// sentinelFilePath returns the path to the file that prevents job
0 commit comments