From 997e5534bd763335d0157330a45431b611aae1fb Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 20 Dec 2025 17:02:47 -0600 Subject: [PATCH] Fix race condition in TestDirectorRegistration and goroutine leaks - Fix federation discovery race: Force GetFederation() to run immediately after setting config to prevent sync.Once from capturing empty viper state - Fix nil pointer dereferences: Check for nil before calling Value() on cache lookups in 7 test cases - Fix goroutine leaks: Properly capture and wait for cancel/egrp in TestDiscoverOriginCache, TestRedirects, and TestGetHealthTestFile The sporadic test failure occurred when background goroutines called GetFederation() between ResetConfig() and param.Set(), causing the sync.Once to execute with empty config and cache that state permanently. --- client/handle_http_test.go | 2 +- cmd/object_put.go | 3 ++- cmd/plugin_stage.go | 2 +- cmd/plugin_test.go | 8 +++---- director/director_test.go | 48 +++++++++++++++++++++++++------------- xrootd/origin_test.go | 2 +- 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/client/handle_http_test.go b/client/handle_http_test.go index 80bcd737d..fa9509eb7 100644 --- a/client/handle_http_test.go +++ b/client/handle_http_test.go @@ -1377,7 +1377,7 @@ func TestStatusCodeErrorWrappingUpload(t *testing.T) { var te *TransferErrors if errors.As(err, &te) { // Extract the first error from TransferErrors - if te.errors != nil && len(te.errors) > 0 { + if len(te.errors) > 0 { if tsErr, ok := te.errors[0].(*TimestampedError); ok && tsErr != nil { err = tsErr.err } else { diff --git a/cmd/object_put.go b/cmd/object_put.go index 2318bbceb..84ffdff5d 100644 --- a/cmd/object_put.go +++ b/cmd/object_put.go @@ -254,7 +254,8 @@ func putMain(cmd *cobra.Command, args []string) { for _, src := range source { isRecursive, _ := cmd.Flags().GetBool("recursive") - transferResults, result := client.DoPut(ctx, src, dest, isRecursive, options...) + transferResults, err := client.DoPut(ctx, src, dest, isRecursive, options...) + result = err if result != nil { lastSrc = src break diff --git a/cmd/plugin_stage.go b/cmd/plugin_stage.go index 8bcf0666a..5ef98cc1d 100644 --- a/cmd/plugin_stage.go +++ b/cmd/plugin_stage.go @@ -231,7 +231,7 @@ func processTransferInput(reader io.Reader, mountPrefixStr string, originPrefixP return nil, nil, fmt.Errorf("Failed to parse ClassAd from stdin: %v", err), 1 } inputList := classad.EvaluateAttr("TransferInput") - if err != nil || inputList.IsUndefined() { + if inputList.IsUndefined() { // No TransferInput, no need to transform therefore we exit(0) return nil, nil, fmt.Errorf("No transfer input found in classad, no need to transform."), 0 } diff --git a/cmd/plugin_test.go b/cmd/plugin_test.go index 8f15af699..798970feb 100644 --- a/cmd/plugin_test.go +++ b/cmd/plugin_test.go @@ -553,7 +553,7 @@ func TestPluginMulti(t *testing.T) { for !done { select { case <-fed.Ctx.Done(): - break + done = true case resultAd, ok := <-results: if !ok { done = true @@ -622,7 +622,7 @@ func TestPluginDirectRead(t *testing.T) { for !done { select { case <-fed.Ctx.Done(): - break + done = true case resultAd, ok := <-results: if !ok { done = true @@ -701,7 +701,7 @@ func TestPluginCorrectStartAndEndTime(t *testing.T) { for !done { select { case <-fed.Ctx.Done(): - break + done = true case resultAd, ok := <-results: if !ok { done = true @@ -1124,7 +1124,7 @@ func TestPluginRecursiveDownload(t *testing.T) { for !done { select { case <-fed.Ctx.Done(): - break + done = true case resultAd, ok := <-results: if !ok { done = true diff --git a/director/director_test.go b/director/director_test.go index 5b37671ac..6609bc51a 100644 --- a/director/director_test.go +++ b/director/director_test.go @@ -66,6 +66,7 @@ func NamespaceAdContainsPath(ns []server_structs.NamespaceAdV2, path string) boo func TestGetLinkDepth(t *testing.T) { t.Cleanup(test_utils.SetupTestLogging(t)) + tests := []struct { name string filepath string @@ -76,28 +77,34 @@ func TestGetLinkDepth(t *testing.T) { { name: "empty-file-prefix", err: errors.New("either filepath or prefix is an empty path"), - }, { + }, + { name: "empty-file", err: errors.New("either filepath or prefix is an empty path"), - }, { + }, + { name: "empty-prefix", err: errors.New("either filepath or prefix is an empty path"), - }, { + }, + { name: "no-match", filepath: "/foo/bar/barz.txt", prefix: "/bar", err: errors.New("filepath does not contain the prefix"), - }, { + }, + { name: "depth-1-case", filepath: "/foo/bar/barz.txt", prefix: "/foo/bar", depth: 1, - }, { + }, + { name: "depth-1-w-trailing-slash", filepath: "/foo/bar/barz.txt", prefix: "/foo/bar/", depth: 1, - }, { + }, + { name: "depth-2-case", filepath: "/foo/bar/barz.txt", prefix: "/foo", @@ -187,6 +194,8 @@ func TestDirectorRegistration(t *testing.T) { require.NoError(t, param.Set("Director.CacheSortMethod", "distance")) require.NoError(t, param.Set("Director.StatTimeout", 300*time.Millisecond)) require.NoError(t, param.Set("Director.StatConcurrencyLimit", 1)) + // Force federation discovery to run with the new config to avoid race condition + _, _ = config.GetFederation(ctx) setupContext := func() (*gin.Context, *gin.Engine, *httptest.ResponseRecorder) { // Setup httptest recorder and context for the the unit test @@ -306,8 +315,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, getAd.Name, ad.Name) require.Len(t, getAd.NamespaceAds, 1) assert.Equal(t, getAd.NamespaceAds[0].Path, "/foo/bar") @@ -346,8 +355,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, getAd.Name, ad.Name) require.Len(t, getAd.NamespaceAds, 1) assert.Equal(t, getAd.NamespaceAds[0].Path, "/foo/bar") @@ -990,8 +999,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, ad.Version, getAd.Version) teardown() @@ -1026,8 +1035,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, "7.0.0", getAd.Version) teardown() }) @@ -1065,8 +1074,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, "unknown", getAd.Version) teardown() }) @@ -1100,8 +1109,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, "7.0.0", getAd.Version) teardown() @@ -1141,8 +1150,8 @@ func TestDirectorRegistration(t *testing.T) { assert.Equal(t, 200, w.Result().StatusCode, "Expected status code of 200") get := serverAds.Get("https://or-url.org") - getAd := get.Value() require.NotNil(t, get, "Coudln't find server in the director cache.") + getAd := get.Value() assert.Equal(t, "7.0.0", getAd.Version) teardown() }) @@ -1596,7 +1605,9 @@ func TestDiscoverOriginCache(t *testing.T) { // Set up the mock federation, which must exist for the auth handler to fetch federation keys test_utils.MockFederationRoot(t, nil, &pKeySet) - ctx, _, _ := test_utils.TestContext(context.Background(), t) + ctx, cancel, egrp := test_utils.TestContext(context.Background(), t) + defer func() { require.NoError(t, egrp.Wait()) }() + defer cancel() // Isolate the test so it doesn't use system config require.NoError(t, param.Set("ConfigDir", t.TempDir())) @@ -2010,7 +2021,10 @@ func TestRedirects(t *testing.T) { serverAds.DeleteAll() serverAds.Stop() }) - ctx, _, _ := test_utils.TestContext(context.Background(), t) + + ctx, cancel, egrp := test_utils.TestContext(context.Background(), t) + defer func() { require.NoError(t, egrp.Wait()) }() + defer cancel() t.Cleanup(func() { server_utils.ResetTestState() }) @@ -2300,7 +2314,8 @@ func TestGetHealthTestFile(t *testing.T) { gEngine := gin.Default() router := gEngine.Group("/") ctx := context.Background() - ctx, cancel, _ := test_utils.TestContext(ctx, t) + ctx, cancel, egrp := test_utils.TestContext(ctx, t) + defer func() { require.NoError(t, egrp.Wait()) }() defer cancel() RegisterDirectorAPI(ctx, router) @@ -2437,6 +2452,7 @@ func TestGetHealthTestFile(t *testing.T) { func TestGetRedirectUrl(t *testing.T) { t.Cleanup(test_utils.SetupTestLogging(t)) + adFromTopo := server_structs.ServerAd{ URL: url.URL{ Host: "fake-topology-ad.org:8443", diff --git a/xrootd/origin_test.go b/xrootd/origin_test.go index cb6b586b8..f8d8250d4 100644 --- a/xrootd/origin_test.go +++ b/xrootd/origin_test.go @@ -74,7 +74,7 @@ func ensureXrdS3PluginAvailable(t *testing.T) { appendEnvPaths("LD_LIBRARY_PATH") appendEnvPaths("DYLD_LIBRARY_PATH") - pluginNames := []string{"libXrdS3.so", "libXrdS3-5.so", "libXrdS3.dylib", "libXrdS3-5.dylib"} + pluginNames := []string{"libXrdS3.so", "libXrdS3-5.so", "libXrdS3-6.so", "libXrdS3.dylib", "libXrdS3-5.dylib", "libXrdS3-6.dylib"} for _, dir := range searchPaths { for _, name := range pluginNames { if _, err := os.Stat(filepath.Join(dir, name)); err == nil {