diff --git a/config/config.go b/config/config.go index fd4b97085..674132b04 100644 --- a/config/config.go +++ b/config/config.go @@ -1425,6 +1425,7 @@ func SetServerDefaults(v *viper.Viper) error { v.SetDefault(param.Origin_Multiuser.GetName(), false) } + v.SetDefault(param.Director_EnableFederationMetadataHosting.GetName(), true) v.SetDefault(param.Director_CheckOriginPresence.GetName(), true) v.SetDefault(param.Director_CheckCachePresence.GetName(), true) diff --git a/director/discovery.go b/director/discovery.go index f78afeff0..e887e1e26 100644 --- a/director/discovery.go +++ b/director/discovery.go @@ -1,6 +1,6 @@ /*************************************************************** * - * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * Copyright (C) 2025, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may @@ -20,18 +20,22 @@ package director import ( "encoding/json" + "fmt" "net/http" "net/url" "strings" "github.com/gin-gonic/gin" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/metrics" "github.com/pelicanplatform/pelican/param" "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" + "github.com/pelicanplatform/pelican/utils" ) const ( @@ -43,6 +47,59 @@ const ( // Director hosts a discovery endpoint at federationDiscoveryPath to provide URLs to various // Pelican central servers in a federation. func federationDiscoveryHandler(ctx *gin.Context) { + // Because of the class of bugs related to federation metadata hosting at the Director, we record + // who's trying to access this endpoint as a prometheus metric + ipAddr := utils.ClientIPAddr(ctx) + network, ok := utils.ApplyIPMask(ipAddr.String()) + if !ok { + log.Warningf("Failed to apply IP mask to address %s", ipAddr.String()) + network = "unknown" + } + + // A hacky way to bootstrap service type ("who's" contacting the Director) from the user agent -- we don't use + // the raw user agent because we must protect against cardinality explosion in prometheus. + var serviceType string + userAgents := ctx.Request.Header.Values("User-Agent") + for _, ua := range userAgents { + uaLower := strings.ToLower(ua) + switch { + case strings.Contains(uaLower, "director"): + serviceType = "director" + case strings.Contains(uaLower, "registry"): + serviceType = "registry" + case strings.Contains(uaLower, "origin"): + serviceType = "origin" + case strings.Contains(uaLower, "cache"): + serviceType = "cache" + } + if serviceType != "" { + break + } + } + if serviceType == "" { + serviceType = "unknown" + } + + labels := prometheus.Labels{ + "network": network, + "service_type": serviceType, + } + metrics.PelicanDirectorFederationMetadataRequestsTotal.With(labels).Inc() + + // If federation metadata hosting is disabled, return an error + if !param.Director_EnableFederationMetadataHosting.GetBool() { + // Use 410 Gone to indicate that the resource is no longer available. + // While it's possible it was _never_ available (an argument to use 404), 410 is a + // louder signal to clients that they should _quit_ trying to access this endpoint. + ctx.JSON(http.StatusGone, + server_structs.SimpleApiResp{ + Status: server_structs.RespFailed, + Msg: fmt.Sprintf("This Director is configured to disallow federation metadata hosting; "+ + "your service is likely misconfigured to use a Director as its %s URL", param.Federation_DiscoveryUrl.GetName()), + }) + return + } + fedInfo, err := config.GetFederation(ctx) if err != nil { log.Errorln("Bad server configuration: Federation discovery could not resolve:", err) @@ -51,6 +108,7 @@ func federationDiscoveryHandler(ctx *gin.Context) { Status: server_structs.RespFailed, Msg: "Bad server configuration: Federation discovery could not resolve", }) + return } discoveryUrlStr := fedInfo.DiscoveryEndpoint @@ -158,6 +216,12 @@ func federationDiscoveryHandler(ctx *gin.Context) { } func RegisterDirectorOIDCAPI(router *gin.RouterGroup) { - router.GET(federationDiscoveryPath, federationDiscoveryHandler) server_utils.RegisterOIDCAPI(router, true) } + +// Register the federation metadata hosting endpoint -- we do this even if the fed metadata hosting is disabled +// because the endpoint handler will still record metrics about the attempted access (which can be used by fed +// operators to detect misconfigurations). +func RegisterFedMetadata(router *gin.RouterGroup) { + router.GET(federationDiscoveryPath, federationDiscoveryHandler) +} diff --git a/director/discovery_test.go b/director/discovery_test.go index 2e1c45053..5656a009c 100644 --- a/director/discovery_test.go +++ b/director/discovery_test.go @@ -135,6 +135,9 @@ func TestFederationDiscoveryHandler(t *testing.T) { param.TLSSkipVerify.GetName(): true, }) + // Enable federation metadata hosting for the test -- must be done _after_ + // the test client initialization because that function blows out any existing params + require.NoError(t, param.Set(param.Director_EnableFederationMetadataHosting.GetName(), true)) w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/test", nil) router.ServeHTTP(w, req) diff --git a/docs/parameters.yaml b/docs/parameters.yaml index 9e3a8a171..5ad064b23 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1870,6 +1870,21 @@ components: ["cache"] ############################ # Director-level configs # ############################ +name: Director.EnableFederationMetadataHosting +description: |+ + Controls whether or not the Director should host a copy of the Federation's metadata. + + This feature should be enabled whenever your Director is expected to serve as the federation's + root discovery source through the `Federation.DiscoveryUrl` parameter, or whenever clients + reference your Director's hostname with their Pelican URLs, e.g. `pelican object get pelican:// ...`. + + If your federation uses a "federation hostname" or "discovery URL" that is different from the Director hostname + (for example, the OSDF uses https://osg-htc.org for discovery, whereas the Director is hosted at + https://osdf-director.osg-htc.org), then this feature should be set to `false`. +type: bool +default: true +components: ["director"] +--- name: Director.AdvertiseUrl description: |+ The URL that director advertisements should be sent to. diff --git a/e2e_fed_tests/director_test.go b/e2e_fed_tests/director_test.go index 50b6529a1..31d76d0fe 100644 --- a/e2e_fed_tests/director_test.go +++ b/e2e_fed_tests/director_test.go @@ -41,8 +41,8 @@ import ( "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/fed_test_utils" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" - "github.com/pelicanplatform/pelican/server_utils" "github.com/pelicanplatform/pelican/test_utils" ) @@ -258,12 +258,12 @@ func TestDirectorFedTokenCacheAPI(t *testing.T) { tok, err := jwt.ParseInsecure([]byte(tokStr)) require.NoError(t, err, "Failed to parse token") - // In this case, the "fed issuer" is the director because we're running as fed-in-a-box. - // However, that need not be true in general wherever the Director has a configured Federation.DiscoveryUrl. + // The fed-test utility uses a separate HTTP server for hosting federation metadata, + // and sets it as the Discovery endpoint -- tokens need to be issued by that endpoint fedInfo, err := config.GetFederation(ctx) require.NoError(t, err, "Failed to get federation info") - directorUrlStr := fedInfo.DirectorEndpoint - assert.Equal(t, directorUrlStr, tok.Issuer()) + discoveryUrlStr := fedInfo.DiscoveryEndpoint + assert.Equal(t, discoveryUrlStr, tok.Issuer()) var scopes []string if rawScopes, exists := tok.Get("scope"); exists { if scopeStr, ok := rawScopes.(string); ok { @@ -274,3 +274,149 @@ func TestDirectorFedTokenCacheAPI(t *testing.T) { }) } } + +// Test that the Director.EnableFederationMetadataHosting knob correctly +// toggles hosting of the federation discovery metadata at the Director. +func TestDirectorMetadataHosting(t *testing.T) { + server_utils.ResetTestState() + t.Cleanup(server_utils.ResetTestState) + + discoveryPath, err := url.JoinPath(".well-known", "pelican-configuration") + require.NoError(t, err) + + newInsecureClient := func() *http.Client { + return &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + } + } + + // Helper function that tries to fetch the federation discovery metadata + // from a given URL, and checks for the expected status code. + // + // If the expected status code is http.StatusOK, it also returns the response body + // containing the metadata JSON + fetchDiscovery := func( + t *testing.T, + client *http.Client, + baseURL string, + expectedStatus int, + ) []byte { + t.Helper() + + u, err := url.Parse(baseURL) + require.NoError(t, err, "Failed to parse base URL") + + u.Path = discoveryPath + + t.Log("Fetching discovery URL:", u.String()) + + req, err := http.NewRequest("GET", u.String(), nil) + require.NoError(t, err, "Failed to create request") + + resp, err := client.Do(req) + require.NoError(t, err, "Failed to perform request") + t.Cleanup(func() { resp.Body.Close() }) + + require.Equal(t, expectedStatus, resp.StatusCode) + + if expectedStatus != http.StatusOK { + return nil + } + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err, "Failed to read response body") + return body + } + + tests := []struct { + name string + enableHosting bool + expectedStatus int + }{ + { + name: "director-hosts-metadata", + enableHosting: true, + expectedStatus: http.StatusOK, + }, + { + name: "director-does-not-host-metadata", + enableHosting: false, + expectedStatus: http.StatusGone, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.NoError(t, param.Set(param.Director_EnableFederationMetadataHosting.GetName(), tt.enableHosting)) + _ = fed_test_utils.NewFedTest(t, bothPubNamespaces) + + ctx := context.Background() + ctx, _, _ = test_utils.TestContext(ctx, t) + + fedInfo, err := config.GetFederation(ctx) + require.NoError(t, err) + + client := newInsecureClient() + + // Always test the Director endpoint + directorBody := fetchDiscovery( + t, + client, + fedInfo.DirectorEndpoint, + tt.expectedStatus, + ) + + // If hosting is enabled at the Director, also test the Discovery endpoint + if tt.enableHosting { + // Because the fed tests set up a separate Discovery server, + // the Director and Discovery endpoints should differ + require.NotEqual( + t, + fedInfo.DirectorEndpoint, + fedInfo.DiscoveryEndpoint, + "Director and Discovery endpoints must differ", + ) + + discoveryBody := fetchDiscovery( + t, + client, + fedInfo.DiscoveryEndpoint, + http.StatusOK, + ) + + // Contents must match exactly + require.JSONEq( + t, + string(discoveryBody), + string(directorBody), + "Metadata from Director and Discovery endpoints must match", + ) + + // Optional: still unmarshal once for semantic validation + var fedMetadata pelican_url.FederationDiscovery + require.NoError( + t, + json.Unmarshal(directorBody, &fedMetadata), + ) + + require.Equal( + t, + fedInfo.DiscoveryEndpoint, + fedMetadata.DiscoveryEndpoint, + ) + } else { + // Here we don't actually care about the content because there's + // nothing to compare it against -- we only want to verify that + // discovery resulted in an okay status code. + _ = fetchDiscovery( + t, + client, + fedInfo.DiscoveryEndpoint, + http.StatusOK, + ) + } + }) + } +} diff --git a/fed_test_utils/fed.go b/fed_test_utils/fed.go index d4ae2d4b2..2fceee79a 100644 --- a/fed_test_utils/fed.go +++ b/fed_test_utils/fed.go @@ -44,6 +44,7 @@ import ( "github.com/pelicanplatform/pelican/director" "github.com/pelicanplatform/pelican/launchers" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" "github.com/pelicanplatform/pelican/test_utils" @@ -259,26 +260,40 @@ func NewFedTest(t *testing.T, originConfig string) (ft *FedTest) { ft.Pids = append(ft.Pids, server.GetPids()...) } + var discoveryServer *httptest.Server // Set up discovery for federation metadata hosting. This needs to be done AFTER launching // servers, because they populate the param values we use to set the metadata. handler := func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/.well-known/pelican-configuration" { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - _, err := w.Write([]byte(fmt.Sprintf(`{ - "director_endpoint": "%s", - "namespace_registration_endpoint": "%s", - "broker_endpoint": "%s", - "jwks_uri": "%s" - }`, param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString()))) - assert.NoError(t, err) + + discoveryMetadata := pelican_url.FederationDiscovery{ + DiscoveryEndpoint: discoveryServer.URL, + DirectorEndpoint: param.Server_ExternalWebUrl.GetString(), + RegistryEndpoint: param.Server_ExternalWebUrl.GetString(), + BrokerEndpoint: param.Server_ExternalWebUrl.GetString(), + JwksUri: param.Server_ExternalWebUrl.GetString() + "/.well-known/issuer.jwks", + DirectorAdvertiseEndpoints: param.Server_DirectorUrls.GetStringSlice(), + } + + discoveryJSONBytes, err := json.Marshal(discoveryMetadata) + require.NoError(t, err, "Failed to marshal discovery metadata") + _, err = w.Write(discoveryJSONBytes) + require.NoError(t, err) } else { http.NotFound(w, r) } } - discoveryServer := httptest.NewTLSServer(http.HandlerFunc(handler)) + discoveryServer = httptest.NewTLSServer(http.HandlerFunc(handler)) t.Cleanup(discoveryServer.Close) + + // Set the discovery URL in both viper and the global fed info object require.NoError(t, param.Set(param.Federation_DiscoveryUrl.GetName(), discoveryServer.URL)) + fedInfo, err := config.GetFederation(ctx) + require.NoError(t, err, "error getting federation info") + fedInfo.DiscoveryEndpoint = discoveryServer.URL + config.SetFederation(fedInfo) desiredURL := param.Server_ExternalWebUrl.GetString() + "/api/v1.0/health" err = server_utils.WaitUntilWorking(ctx, "GET", desiredURL, "director", 200, false) diff --git a/launchers/director_serve.go b/launchers/director_serve.go index af1be3057..1a81c5881 100644 --- a/launchers/director_serve.go +++ b/launchers/director_serve.go @@ -88,6 +88,7 @@ func DirectorServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group } rootGroup := engine.Group("/", web_ui.ServerHeaderMiddleware) director.RegisterDirectorOIDCAPI(rootGroup) + director.RegisterFedMetadata(rootGroup) director.RegisterDirectorWebAPI(rootGroup) engine.Use(director.ShortcutMiddleware(defaultResponse)) director.RegisterDirectorAPI(ctx, rootGroup) diff --git a/metrics/director.go b/metrics/director.go index 6d9755c1a..17c59806c 100644 --- a/metrics/director.go +++ b/metrics/director.go @@ -145,4 +145,10 @@ var ( Name: "pelican_director_server_statusweight", Help: "The EWMA-smoothed status weight generated by the Director for each server", }, []string{"server_name", "server_url", "server_type"}) + + PelicanDirectorFederationMetadataRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pelican_director_federationmetadata_requests_total", + Help: "The total number of requests made by a service (cache, origin, etc.) to fetch federation metadata hosted by the Director. " + + "Can be used to detect misconfigured servers, as federations with non-Director discovery URLs should generally not be discovering federation info via the Director", + }, []string{"network", "service_type"}) ) diff --git a/param/parameters.go b/param/parameters.go index 560fbeab8..8abe0d1f1 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -150,6 +150,7 @@ var runtimeConfigurableMap = map[string]bool{ "Director.DbLocation": false, "Director.DefaultResponse": false, "Director.EnableBroker": false, + "Director.EnableFederationMetadataHosting": false, "Director.EnableOIDC": false, "Director.EnableStat": false, "Director.FedTokenLifetime": false, @@ -946,6 +947,8 @@ func (bP BoolParam) GetBool() bool { return config.Director.CheckOriginPresence case "Director.EnableBroker": return config.Director.EnableBroker + case "Director.EnableFederationMetadataHosting": + return config.Director.EnableFederationMetadataHosting case "Director.EnableOIDC": return config.Director.EnableOIDC case "Director.EnableStat": @@ -1250,6 +1253,7 @@ var allParameterNames = []string{ "Director.DbLocation", "Director.DefaultResponse", "Director.EnableBroker", + "Director.EnableFederationMetadataHosting", "Director.EnableOIDC", "Director.EnableStat", "Director.FedTokenLifetime", @@ -1753,6 +1757,7 @@ var ( Director_CheckCachePresence = BoolParam{"Director.CheckCachePresence"} Director_CheckOriginPresence = BoolParam{"Director.CheckOriginPresence"} Director_EnableBroker = BoolParam{"Director.EnableBroker"} + Director_EnableFederationMetadataHosting = BoolParam{"Director.EnableFederationMetadataHosting"} Director_EnableOIDC = BoolParam{"Director.EnableOIDC"} Director_EnableStat = BoolParam{"Director.EnableStat"} Director_FilterCachesInErrorState = BoolParam{"Director.FilterCachesInErrorState"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 3ffd839f9..c0cad25c9 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -99,6 +99,7 @@ type Config struct { DbLocation string `mapstructure:"dblocation" yaml:"DbLocation"` DefaultResponse string `mapstructure:"defaultresponse" yaml:"DefaultResponse"` EnableBroker bool `mapstructure:"enablebroker" yaml:"EnableBroker"` + EnableFederationMetadataHosting bool `mapstructure:"enablefederationmetadatahosting" yaml:"EnableFederationMetadataHosting"` EnableOIDC bool `mapstructure:"enableoidc" yaml:"EnableOIDC"` EnableStat bool `mapstructure:"enablestat" yaml:"EnableStat"` FedTokenLifetime time.Duration `mapstructure:"fedtokenlifetime" yaml:"FedTokenLifetime"` @@ -481,6 +482,7 @@ type configWithType struct { DbLocation struct { Type string; Value string } DefaultResponse struct { Type string; Value string } EnableBroker struct { Type string; Value bool } + EnableFederationMetadataHosting struct { Type string; Value bool } EnableOIDC struct { Type string; Value bool } EnableStat struct { Type string; Value bool } FedTokenLifetime struct { Type string; Value time.Duration } diff --git a/server_utils/server_utils.go b/server_utils/server_utils.go index b5e615253..7742584a8 100644 --- a/server_utils/server_utils.go +++ b/server_utils/server_utils.go @@ -406,6 +406,9 @@ func GetAdvertisementTok(server server_structs.XRootDServer, directorUrl string) // GetFedTok retrieves a federation token from the Director, which can be passed to other // federation services as proof of federation membership. func CreateFedTok(ctx context.Context, server server_structs.XRootDServer) (tok string, err error) { + // Note: This is iterating over advertisements for _Directors_, not for Origins/Caches at a specific Director + // (the function naming is a bit confusing, but `GetDirectorAds` does not get ads from a Director, it gets all the + // known advertisements _of_ Directors in a HA setup) for _, ad := range GetDirectorAds() { directorUrl := ad.AdvertiseUrl var directorEndpoint string