diff --git a/director/cache_ads.go b/director/cache_ads.go index 9fce79543..5f4549005 100644 --- a/director/cache_ads.go +++ b/director/cache_ads.go @@ -384,7 +384,7 @@ func recordAd(ctx context.Context, sAd server_structs.ServerAd, namespaceAds *[] Status: HealthStatusInit, } errgrp.Go(func() error { - LaunchPeriodicDirectorTest(cancelCtx, sAd) + LaunchPeriodicDirectorTest(cancelCtx, ad.URL.String()) return nil }) log.Debugf("New director test suite issued for %s %s. Errgroup was evicted", string(ad.Type), ad.URL.String()) @@ -392,7 +392,7 @@ func recordAd(ctx context.Context, sAd server_structs.ServerAd, namespaceAds *[] // Existing errorgroup still working cancelCtx, cancel := context.WithCancel(existingUtil.ErrGrpContext) started := existingUtil.ErrGrp.TryGo(func() error { - LaunchPeriodicDirectorTest(cancelCtx, sAd) + LaunchPeriodicDirectorTest(cancelCtx, ad.URL.String()) return nil }) if !started { @@ -422,7 +422,7 @@ func recordAd(ctx context.Context, sAd server_structs.ServerAd, namespaceAds *[] Status: HealthStatusInit, } errgrp.Go(func() error { - LaunchPeriodicDirectorTest(cancelCtx, sAd) + LaunchPeriodicDirectorTest(cancelCtx, ad.URL.String()) return nil }) } @@ -546,6 +546,16 @@ func applyServerDowntimes(serverName string, downtimes []server_structs.Downtime } } +// isServerInDowntime checks if a server is in the filteredServers map with an active filter. +// A server is considered in downtime if it exists in filteredServers with any filter type except tempAllowed. +func isServerInDowntime(serverName string) bool { + filteredServersMutex.RLock() + defer filteredServersMutex.RUnlock() + + existingFilterType, isServerFiltered := filteredServers[serverName] + return isServerFiltered && existingFilterType != tempAllowed +} + // applyActiveDowntimeFilter checks federationDowntimes for any active downtime for the given server // and applies the tempFiltered filter immediately if found. This ensures that when a server wakes up // mid-downtime, it is blocked right away without waiting for the next registry poll. diff --git a/director/monitor.go b/director/monitor.go index 6043d8e72..0fb01edf8 100644 --- a/director/monitor.go +++ b/director/monitor.go @@ -28,6 +28,7 @@ import ( "net/url" "time" + "github.com/jellydator/ttlcache/v3" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -124,9 +125,21 @@ func reportStatusToServer(ctx context.Context, serverWebUrl string, status strin return nil } -// Run a periodic test file transfer against an origin to ensure -// it's talking to the director -func LaunchPeriodicDirectorTest(ctx context.Context, serverAd server_structs.ServerAd) { +// LaunchPeriodicDirectorTest runs periodic test file transfers against an origin or cache to ensure +// it's responding to director test requests. The test fetches the current server ad +// from the TTL cache on each cycle and stops when the ad is no longer present. +func LaunchPeriodicDirectorTest(ctx context.Context, serverUrlStr string) { + // Option to disable touch on hit when fetching from cache to avoid extending TTL + disableTouchOpt := ttlcache.WithDisableTouchOnHit[string, *server_structs.Advertisement]() + + // Fetch the initial server ad to set up metrics + initialAdItem := serverAds.Get(serverUrlStr, disableTouchOpt) + if initialAdItem == nil { + log.Errorf("Failed to start director test suite: server ad not found in cache for URL %s. Test will not be started.", serverUrlStr) + return + } + initialAd := initialAdItem.Value() + serverAd := initialAd.ServerAd serverName := serverAd.Name serverUrl := serverAd.URL.String() serverWebUrl := serverAd.WebURL.String() @@ -158,15 +171,30 @@ func LaunchPeriodicDirectorTest(ctx context.Context, serverAd server_structs.Ser // runDirectorTestCycle executes a single director test cycle and reports the result back to the server. // Extracted as a helper to allow running the first test immediately upon registration, avoiding the // race condition where the origin/cache 30s timeout fires before the first ticker-driven test. - runDirectorTestCycle := func() { - log.Debug(fmt.Sprintf("Starting a director test cycle for %s server %s at %s", serverAd.Type, serverName, serverUrl)) + // Returns true if the test was run, false if it was skipped (e.g., server not in cache or in downtime). + runDirectorTestCycle := func() bool { + // Fetch the current server ad from the TTL cache + adItem := serverAds.Get(serverUrlStr, disableTouchOpt) + if adItem == nil { + log.Infof("The Director doesn't have any advertisements for server with URL %s. Stopping director tests.", serverUrlStr) + return false + } + currentServerAd := adItem.Value().ServerAd + + // Check if the server is in downtime by checking the filteredServers map + if isServerInDowntime(currentServerAd.Name) { + log.Debugf("Skipping director test cycle for %s server %s: server is in downtime", currentServerAd.Type, currentServerAd.Name) + return true // Return true to continue the loop, but don't run the test + } + + log.Debug(fmt.Sprintf("Starting a director test cycle for %s server %s at %s", currentServerAd.Type, currentServerAd.Name, currentServerAd.URL.String())) testSucceeded := true var testErr error - if serverAd.Type == server_structs.OriginType.String() { + if currentServerAd.Type == server_structs.OriginType.String() { fileTests := server_utils.TestFileTransferImpl{} - testSucceeded, testErr = fileTests.RunTests(ctx, serverUrl, serverUrl, "", server_utils.DirectorTest) - } else if serverAd.Type == server_structs.CacheType.String() { - testErr = runCacheTest(ctx, serverAd.URL) + testSucceeded, testErr = fileTests.RunTests(ctx, currentServerAd.URL.String(), currentServerAd.URL.String(), "", server_utils.DirectorTest) + } else if currentServerAd.Type == server_structs.CacheType.String() { + testErr = runCacheTest(ctx, currentServerAd.URL) } // Compose the result of this Director-test to report to the server @@ -176,25 +204,25 @@ func LaunchPeriodicDirectorTest(ctx context.Context, serverAd server_structs.Ser reportStatus = "ok" reportMessage = "Director test cycle succeeded at " + time.Now().Format(time.RFC3339) healthStatus = HealthStatusOK - log.Debugf("Director file transfer test cycle succeeded at %s for %s server with URL at %s", time.Now().Format(time.RFC3339), serverAd.Type, serverUrl) + log.Debugf("Director file transfer test cycle succeeded at %s for %s server with URL at %s", time.Now().Format(time.RFC3339), currentServerAd.Type, currentServerAd.URL.String()) } else { reportStatus = "error" - reportMessage = "Director file transfer test cycle failed for server: " + serverUrl + reportMessage = "Director file transfer test cycle failed for server: " + currentServerAd.URL.String() if testErr != nil { reportMessage += " " + testErr.Error() } healthStatus = HealthStatusError - log.Warningln("Director file transfer test cycle failed for ", serverAd.Type, " server: ", serverUrl, " ", testErr) + log.Warningln("Director file transfer test cycle failed for ", currentServerAd.Type, " server: ", currentServerAd.URL.String(), " ", testErr) } // Update healthTestUtils once per cycle func() { healthTestUtilsMutex.Lock() defer healthTestUtilsMutex.Unlock() - if existingUtil, ok := healthTestUtils[serverAd.URL.String()]; ok { + if existingUtil, ok := healthTestUtils[currentServerAd.URL.String()]; ok { existingUtil.Status = healthStatus } else { - log.Debugln("HealthTestUtil missing for ", serverAd.Type, " server: ", serverUrl, " Failed to update internal status") + log.Debugln("HealthTestUtil missing for ", currentServerAd.Type, " server: ", currentServerAd.URL.String(), " Failed to update internal status") } }() @@ -205,25 +233,27 @@ func LaunchPeriodicDirectorTest(ctx context.Context, serverAd server_structs.Ser } // Report the result of this Director-test back to origin/server (single call) - reportErr := reportStatusToServer(ctx, serverWebUrl, reportStatus, reportMessage, serverAd.Type, false) + reportErr := reportStatusToServer(ctx, currentServerAd.WebURL.String(), reportStatus, reportMessage, currentServerAd.Type, false) // Determine report status metric and log if reporting failed reportStatusMetric := metrics.MetricSucceeded if reportErr != nil { reportStatusMetric = metrics.MetricFailed - log.Warningf("Failed to report director test result to %s server at %s: %v", serverAd.Type, serverAd.WebURL.String(), reportErr) + log.Warningf("Failed to report director test result to %s server at %s: %v", currentServerAd.Type, currentServerAd.WebURL.String(), reportErr) } // Record metrics once per cycle metrics.PelicanDirectorFileTransferTestsRuns.With( prometheus.Labels{ - "server_name": serverName, - "server_web_url": serverWebUrl, - "server_type": string(serverAd.Type), + "server_name": currentServerAd.Name, + "server_web_url": currentServerAd.WebURL.String(), + "server_type": string(currentServerAd.Type), "status": string(testStatusMetric), "report_status": string(reportStatusMetric), }, ).Inc() + + return true // Test was run successfully } // Run the first test immediately to avoid race with origin/cache 30s timeout. @@ -235,7 +265,7 @@ func LaunchPeriodicDirectorTest(ctx context.Context, serverAd server_structs.Ser for { select { case <-ctx.Done(): - log.Debug(fmt.Sprintf("End director test suite for %s server %s at %s", serverAd.Type, serverName, serverUrl)) + log.Debug(fmt.Sprintf("Stopped the Director test suite for %s server %s at %s", serverAd.Type, serverName, serverUrl)) metrics.PelicanDirectorActiveFileTransferTestSuite.With( prometheus.Labels{ diff --git a/web_ui/frontend/package-lock.json b/web_ui/frontend/package-lock.json index 2c3646504..c9bde192e 100644 --- a/web_ui/frontend/package-lock.json +++ b/web_ui/frontend/package-lock.json @@ -111,7 +111,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.7.tgz", "integrity": "sha512-SRijHmF0PSPgLIBYlWnG0hyeJLwXE2CgpsXaMOrtt2yp9/86ALw6oUlj9KYuZ0JN07T4eBMVIW4li/9S1j2BGA==", "dev": true, - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.26.2", @@ -3164,7 +3163,6 @@ "version": "7.3.2", "resolved": "https://registry.npmjs.org/@mui/material/-/material-7.3.2.tgz", "integrity": "sha512-qXvbnawQhqUVfH1LMgMaiytP+ZpGoYhnGl7yYq2x57GYzcFL/iPzSZ3L30tlbwEjSVKNYcbiKO8tANR1tadjUg==", - "peer": true, "dependencies": { "@babel/runtime": "^7.28.3", "@mui/core-downloads-tracker": "^7.3.2", @@ -3356,6 +3354,7 @@ "version": "6.4.3", "resolved": "https://registry.npmjs.org/@mui/private-theming/-/private-theming-6.4.3.tgz", "integrity": "sha512-7x9HaNwDCeoERc4BoEWLieuzKzXu5ZrhRnEM6AUcRXUScQLvF1NFkTlP59+IJfTbEMgcGg1wWHApyoqcksrBpQ==", + "peer": true, "dependencies": { "@babel/runtime": "^7.26.0", "@mui/utils": "^6.4.3", @@ -3382,6 +3381,7 @@ "version": "6.4.3", "resolved": "https://registry.npmjs.org/@mui/styled-engine/-/styled-engine-6.4.3.tgz", "integrity": "sha512-OC402VfK+ra2+f12Gef8maY7Y9n7B6CZcoQ9u7mIkh/7PKwW/xH81xwX+yW+Ak1zBT3HYcVjh2X82k5cKMFGoQ==", + "peer": true, "dependencies": { "@babel/runtime": "^7.26.0", "@emotion/cache": "^11.13.5", @@ -3455,6 +3455,7 @@ "version": "7.2.21", "resolved": "https://registry.npmjs.org/@mui/types/-/types-7.2.21.tgz", "integrity": "sha512-6HstngiUxNqLU+/DPqlUJDIPbzUBxIVHb1MmXP0eTWDIROiCR2viugXpEif0PPe2mLqqakPzzRClWAnK+8UJww==", + "peer": true, "peerDependencies": { "@types/react": "^17.0.0 || ^18.0.0 || ^19.0.0" }, @@ -3468,6 +3469,7 @@ "version": "6.4.3", "resolved": "https://registry.npmjs.org/@mui/utils/-/utils-6.4.3.tgz", "integrity": "sha512-jxHRHh3BqVXE9ABxDm+Tc3wlBooYz/4XPa0+4AI+iF38rV1/+btJmSUgG4shDtSWVs/I97aDn5jBCt6SF2Uq2A==", + "peer": true, "dependencies": { "@babel/runtime": "^7.26.0", "@mui/types": "^7.2.21", @@ -4500,6 +4502,18 @@ } } }, + "node_modules/@swagger-api/apidom-parser-adapter-yaml-1-2/node_modules/tree-sitter": { + "version": "0.22.4", + "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.22.4.tgz", + "integrity": "sha512-usbHZP9/oxNsUY65MQUsduGRqDHQOou1cagUSwjhoSYAmSahjQDAVsh9s+SlZkn8X8+O1FULRGwHu7AFP3kjzg==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + } + }, "node_modules/@swagger-api/apidom-reference": { "version": "1.0.0-beta.48", "resolved": "https://registry.npmjs.org/@swagger-api/apidom-reference/-/apidom-reference-1.0.0-beta.48.tgz", @@ -4846,7 +4860,6 @@ "version": "19.1.13", "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.13.tgz", "integrity": "sha512-hHkbU/eoO3EG5/MZkuFSKmYqPbSVk5byPFa3e7y/8TybHiLMACgI8seVYlicwk7H5K/rI2px9xrQp/C+AUDTiQ==", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4986,7 +4999,6 @@ "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.44.0.tgz", "integrity": "sha512-VGMpFQGUQWYT9LfnPcX8ouFojyrZ/2w3K5BucvxL/spdNehccKhB4jUyB1yBCXpr2XFm0jkECxgrpXBW2ipoAw==", "dev": true, - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.44.0", "@typescript-eslint/types": "8.44.0", @@ -5226,7 +5238,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -5723,7 +5734,6 @@ "version": "19.1.0-rc.3", "resolved": "https://registry.npmjs.org/babel-plugin-react-compiler/-/babel-plugin-react-compiler-19.1.0-rc.3.tgz", "integrity": "sha512-mjRn69WuTz4adL0bXGx8Rsyk1086zFJeKmes6aK0xPuK3aaXmDJdLHqwKKMrpm6KAI1MCoUK72d2VeqQbu8YIA==", - "peer": true, "dependencies": { "@babel/types": "^7.26.0" } @@ -5844,7 +5854,6 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001688", "electron-to-chromium": "^1.5.73", @@ -6079,7 +6088,6 @@ "version": "4.4.7", "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.4.7.tgz", "integrity": "sha512-pwkcKfdzTMAU/+jNosKhNL2bHtJc/sSmYgVbuGTEDhzkrhmyihmP7vUc/5ZK9WopidMDHNe3Wm7jOd/WhuHWuw==", - "peer": true, "dependencies": { "@kurkle/color": "^0.3.0" }, @@ -6959,7 +6967,6 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.35.0.tgz", "integrity": "sha512-QePbBFMJFjgmlE+cXAlbHZbHpdFVS2E/6vzCy7aKlebddvl1vadiC4JFV5u/wqTkNUwEV8WrQi257jf5f06hrg==", "dev": true, - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -7140,7 +7147,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.31.0.tgz", "integrity": "sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==", "dev": true, - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.8", @@ -8515,7 +8521,6 @@ "version": "3.8.2", "resolved": "https://registry.npmjs.org/immutable/-/immutable-3.8.2.tgz", "integrity": "sha512-15gZoQ38eYjEjxkorfbcgBKBL6R7T459OuK+CpcWt7O3KF4uPCx2tD0uFETlUDIyo+1789crbMhTvQBSR5yBMg==", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -9234,7 +9239,6 @@ "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", "dev": true, - "peer": true, "dependencies": { "@jest/core": "^29.7.0", "@jest/types": "^29.6.3", @@ -10100,7 +10104,6 @@ "version": "3.7.2", "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.7.2.tgz", "integrity": "sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew==", - "peer": true, "engines": { "node": ">=12" } @@ -10150,7 +10153,6 @@ "version": "4.7.1", "resolved": "https://registry.npmjs.org/maplibre-gl/-/maplibre-gl-4.7.1.tgz", "integrity": "sha512-lgL7XpIwsgICiL82ITplfS7IGwrB1OJIw/pCvprDp2dhmSSEBgmPzYRvwYYYvJGJD7fxUv1Tvpih4nZ6VrLuaA==", - "peer": true, "dependencies": { "@mapbox/geojson-rewind": "^0.5.2", "@mapbox/jsonlint-lines-primitives": "^2.0.2", @@ -11715,7 +11717,6 @@ "version": "0.30.1", "resolved": "https://registry.npmjs.org/ramda/-/ramda-0.30.1.tgz", "integrity": "sha512-tEF5I22zJnuclswcZMc8bDIrwRHRzf+NqVEmqg50ShAZMP7MWeR/RGDthfM/p+BlqvF2fXAzpn8i+SJcYD3alw==", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/ramda" @@ -11760,7 +11761,6 @@ "version": "19.1.1", "resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz", "integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -11802,7 +11802,6 @@ "version": "19.1.1", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.1.tgz", "integrity": "sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==", - "peer": true, "dependencies": { "scheduler": "^0.26.0" }, @@ -11949,8 +11948,7 @@ "node_modules/redux": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/redux/-/redux-5.0.1.tgz", - "integrity": "sha512-M9/ELqF6fy8FwmkpnF0S3YKOqMyoWJ4+CS5Efg2ct3oY9daQvd/Pc71FpGZsVsbl3Cpb+IIcjBDUnnyBdQbq4w==", - "peer": true + "integrity": "sha512-M9/ELqF6fy8FwmkpnF0S3YKOqMyoWJ4+CS5Efg2ct3oY9daQvd/Pc71FpGZsVsbl3Cpb+IIcjBDUnnyBdQbq4w==" }, "node_modules/redux-immutable": { "version": "4.0.0", @@ -13530,6 +13528,18 @@ "node": ">=12" } }, + "node_modules/tree-sitter": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.21.1.tgz", + "integrity": "sha512-7dxoA6kYvtgWw80265MyqJlkRl4yawIjO7S5MigytjELkX43fV2WsAXzsNfO7sBpPPCF5Gp0+XzHk0DwLCq3xQ==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "node-addon-api": "^8.0.0", + "node-gyp-build": "^4.8.0" + } + }, "node_modules/tree-sitter-json": { "version": "0.24.8", "resolved": "https://registry.npmjs.org/tree-sitter-json/-/tree-sitter-json-0.24.8.tgz", @@ -13794,7 +13804,6 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.1.6.tgz", "integrity": "sha512-zaWCozRZ6DLEWAWFrVDz1H6FVXzUSfTy5FUMWsQlU8Ym5JP9eO4xkTIROFCQvhQf61z6O/G6ugw3SgAnvvm+HA==", "dev": true, - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver"