diff --git a/.gitignore b/.gitignore index b2c02d519..feda60290 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ pelican.yaml oidc-client-id oidc-client-secret MaxMindKey + +# Various ways test binaries have almost been committed +# from top-level directories to the code base via `git -A`. e2e_fed_tests.test -client.test *.test +/client.test +/pelican diff --git a/client/handle_http.go b/client/handle_http.go index 9720b9855..10ad999d5 100644 --- a/client/handle_http.go +++ b/client/handle_http.go @@ -270,6 +270,7 @@ type ( requireChecksum bool recursive bool skipAcquire bool + dryRun bool // Enable dry-run mode to display what would be transferred without actually doing it syncLevel SyncLevel // Policy for handling synchronization when the destination exists prefObjServers []*url.URL // holds any client-requested caches/origins dirResp server_structs.DirectorResponse @@ -333,6 +334,7 @@ type ( syncLevel SyncLevel // Policy for the client to synchronize data tokenLocation string // Location of a token file to use for transfers token string // Token that should be used for transfers + dryRun bool // Enable dry-run mode to display what would be transferred without actually doing it work chan *TransferJob closed bool prefObjServers []*url.URL // holds any client-requested caches/origins @@ -356,6 +358,7 @@ type ( identTransferOptionWriter struct{} identTransferOptionReader struct{} identTransferOptionInPlace struct{} + identTransferOptionDryRun struct{} transferDetailsOptions struct { NeedsToken bool @@ -750,6 +753,15 @@ func WithInPlace(inPlace bool) TransferOption { return option.New(identTransferOptionInPlace{}, inPlace) } +// Create an option to enable dry-run mode +// +// When enabled, the transfer will display what would be copied without actually +// modifying the destination. Useful for verifying paths and sources before +// performing actual transfers. +func WithDryRun(enable bool) TransferOption { + return option.New(identTransferOptionDryRun{}, enable) +} + // Create a new client to work with an engine func (te *TransferEngine) NewClient(options ...TransferOption) (client *TransferClient, err error) { log.Debugln("Making new clients") @@ -780,6 +792,8 @@ func (te *TransferEngine) NewClient(options ...TransferOption) (client *Transfer client.token = option.Value().(string) case identTransferOptionSynchronize{}: client.syncLevel = option.Value().(SyncLevel) + case identTransferOptionDryRun{}: + client.dryRun = option.Value().(bool) } } func() { @@ -1136,6 +1150,7 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL remoteURL: ©Url, callback: tc.callback, skipAcquire: tc.skipAcquire, + dryRun: tc.dryRun, syncLevel: tc.syncLevel, xferType: transferTypeDownload, uuid: id, @@ -1179,6 +1194,8 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL tj.reader = option.Value().(io.ReadCloser) case identTransferOptionInPlace{}: tj.inPlace = option.Value().(bool) + case identTransferOptionDryRun{}: + tj.dryRun = option.Value().(bool) } } @@ -1941,6 +1958,7 @@ func downloadObject(transfer *transferFile) (transferResults TransferResults, er log.Debugln("Downloading object from", transfer.remoteURL, "to", transfer.localPath) var downloaded int64 localPath := transfer.localPath + transferResults.job = transfer.job // Create a checksum hash instance for each requested checksum; these will all be // joined together into a single writer interface with the output file @@ -1977,6 +1995,18 @@ func downloadObject(transfer *transferFile) (transferResults TransferResults, er fileCloser = transfer.writer localPath = "" // Don't use localPath when using custom writer } else if transfer.xferType == transferTypeDownload { + // In dry-run mode, skip actual file operations and just report what would happen + if transfer.job != nil && transfer.job.dryRun { + // Determine the final local path + finalLocalPath := localPath + if len(localPath) > 0 && os.IsPathSeparator(localPath[len(localPath)-1]) { + finalLocalPath = path.Join(localPath, path.Base(transfer.remoteURL.Path)) + } + // Print to stdout with structured format for easy parsing + fmt.Printf("DOWNLOAD: %s -> %s\n", transfer.remoteURL.Path, finalLocalPath) + return transferResults, nil + } + var info os.FileInfo if info, err = os.Stat(localPath); err != nil { if os.IsNotExist(err) { @@ -3089,6 +3119,14 @@ func uploadObject(transfer *transferFile) (transferResult TransferResults, err e xferErrors := NewTransferErrors() transferResult.job = transfer.job + // In dry-run mode, log what would be uploaded and return success + if transfer.job != nil && transfer.job.dryRun { + // Print to stdout with structured format for easy parsing + fmt.Printf("UPLOAD: %s -> %s\n", transfer.localPath, transfer.remoteURL.Path) + // Return success for dry-run without performing any file operations + return transferResult, nil + } + // Check if the remote object already exists using statHttp // If the job is recursive, we skip this check as the check is already performed in walkDirUpload // If the job is not recursive, we check if the object exists at the origin diff --git a/cmd/object_dryrun_output_test.go b/cmd/object_dryrun_output_test.go new file mode 100644 index 000000000..1367a1f32 --- /dev/null +++ b/cmd/object_dryrun_output_test.go @@ -0,0 +1,221 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/fed_test_utils" + "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_utils" + "github.com/pelicanplatform/pelican/token" + "github.com/pelicanplatform/pelican/token_scopes" +) + +const dryRunSubprocessEnv = "DRYRUN_SUBPROCESS" + +func TestDryRunOutputFormat(t *testing.T) { + if os.Getenv(dryRunSubprocessEnv) == "1" { + runDryRunSubprocess(t) + return + } + + server_utils.ResetTestState() + + originCfg := ` +Origin: + StorageType: "posix" + EnableDirectReads: true + Exports: + - StoragePrefix: / + FederationPrefix: /test + Capabilities: ["Reads", "Writes", "DirectReads", "Listings"] +` + fed := fed_test_utils.NewFedTest(t, originCfg) + + host := fmt.Sprintf("%s:%d", param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + discoveryURL := param.Federation_DiscoveryUrl.GetString() + require.NotEmpty(t, discoveryURL) + + issuer, err := config.GetServerIssuerURL() + require.NoError(t, err) + + tokenCfg := token.NewWLCGToken() + tokenCfg.Lifetime = time.Minute + tokenCfg.Issuer = issuer + tokenCfg.Subject = "test" + tokenCfg.AddAudienceAny() + tokenCfg.AddResourceScopes( + token_scopes.NewResourceScope(token_scopes.Wlcg_Storage_Read, "/"), + token_scopes.NewResourceScope(token_scopes.Wlcg_Storage_Modify, "/"), + token_scopes.NewResourceScope(token_scopes.Wlcg_Storage_Create, "/"), + ) + tokStr, err := tokenCfg.CreateToken() + require.NoError(t, err) + tokenFile := filepath.Join(t.TempDir(), "token.jwt") + require.NoError(t, os.WriteFile(tokenFile, []byte(tokStr), 0600)) + + emptyCfg := filepath.Join(t.TempDir(), "empty.yaml") + require.NoError(t, os.WriteFile(emptyCfg, []byte(""), 0644)) + + exportPrefix := fed.Exports[0].FederationPrefix + + t.Run("object_get", func(t *testing.T) { + destFile := filepath.Join(t.TempDir(), "downloaded.txt") + srcURL := fmt.Sprintf("pelican://%s%s/hello_world.txt", host, exportPrefix) + + stdout, stderr, err := runDryRunCLI(t, discoveryURL, emptyCfg, []string{ + "object", "get", + "--dry-run", + "--token", tokenFile, + srcURL, + destFile, + }) + require.NoError(t, err, "stderr: %s", stderr) + + lines := nonEmptyLines(stdout) + require.Len(t, lines, 1) + require.Equal(t, fmt.Sprintf("DOWNLOAD: %s -> %s", exportPrefix+"/hello_world.txt", destFile), lines[0]) + _, statErr := os.Stat(destFile) + require.Error(t, statErr) + require.True(t, os.IsNotExist(statErr)) + }) + + t.Run("object_put", func(t *testing.T) { + srcFile := filepath.Join(t.TempDir(), "upload_me.txt") + require.NoError(t, os.WriteFile(srcFile, []byte("hello"), 0644)) + + remotePath := exportPrefix + "/dryrun_upload.txt" + dstURL := fmt.Sprintf("pelican://%s%s", host, remotePath) + + stdout, stderr, err := runDryRunCLI(t, discoveryURL, emptyCfg, []string{ + "object", "put", + "--dry-run", + "--token", tokenFile, + srcFile, + dstURL, + }) + require.NoError(t, err, "stderr: %s", stderr) + + lines := nonEmptyLines(stdout) + require.Len(t, lines, 1) + require.Equal(t, fmt.Sprintf("UPLOAD: %s -> %s", srcFile, remotePath), lines[0]) + + originFile := filepath.Join(fed.Exports[0].StoragePrefix, "dryrun_upload.txt") + _, statErr := os.Stat(originFile) + require.Error(t, statErr) + require.True(t, os.IsNotExist(statErr)) + }) + + t.Run("object_sync", func(t *testing.T) { + localDir := filepath.Join(t.TempDir(), "src") + require.NoError(t, os.MkdirAll(localDir, 0755)) + srcFile := filepath.Join(localDir, "sync_me.txt") + require.NoError(t, os.WriteFile(srcFile, []byte("sync"), 0644)) + + remoteDir := fmt.Sprintf("pelican://%s%s/syncdest/", host, exportPrefix) + remotePath := exportPrefix + "/syncdest/sync_me.txt" + + stdout, stderr, err := runDryRunCLI(t, discoveryURL, emptyCfg, []string{ + "object", "sync", + "--dry-run", + "--token", tokenFile, + localDir, + remoteDir, + }) + require.NoError(t, err, "stderr: %s", stderr) + + lines := nonEmptyLines(stdout) + require.Len(t, lines, 1) + require.Equal(t, fmt.Sprintf("UPLOAD: %s -> %s", srcFile, remotePath), lines[0]) + + originFile := filepath.Join(fed.Exports[0].StoragePrefix, "syncdest", "sync_me.txt") + _, statErr := os.Stat(originFile) + require.Error(t, statErr) + require.True(t, os.IsNotExist(statErr)) + }) +} + +func runDryRunSubprocess(t *testing.T) { + argsJSON := os.Getenv("DRYRUN_ARGS_JSON") + if argsJSON == "" { + t.Fatalf("missing DRYRUN_ARGS_JSON") + } + var args []string + require.NoError(t, json.Unmarshal([]byte(argsJSON), &args)) + + rootCmd.SetArgs(args) + defer rootCmd.SetArgs(nil) + + require.NoError(t, rootCmd.Execute()) +} + +func runDryRunCLI(t *testing.T, discoveryURL, cfgFile string, args []string) (stdout string, stderr string, err error) { + argsFull := []string{"-test.run", "TestDryRunOutputFormat"} + cmd := exec.Command(os.Args[0], argsFull...) + + argsJSON, jerr := json.Marshal(append([]string{"--config", cfgFile}, args...)) + require.NoError(t, jerr) + + cmd.Env = append(os.Environ(), + fmt.Sprintf("%s=1", dryRunSubprocessEnv), + "DRYRUN_ARGS_JSON="+string(argsJSON), + "PELICAN_FEDERATION_DISCOVERYURL="+discoveryURL, + "PELICAN_TLSSKIPVERIFY=true", + "PELICAN_LOGGING_DISABLEPROGRESSBARS=true", + ) + + var outBuf, errBuf bytes.Buffer + cmd.Stdout = &outBuf + cmd.Stderr = &errBuf + err = cmd.Run() + return outBuf.String(), errBuf.String(), err +} + +func nonEmptyLines(s string) []string { + var out []string + for _, line := range strings.Split(s, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + if line == "PASS" { + continue + } + // Skip coverage output lines that appear when running tests with -cover + if strings.HasPrefix(line, "coverage:") { + continue + } + out = append(out, line) + } + return out +} diff --git a/cmd/object_dryrun_test.go b/cmd/object_dryrun_test.go new file mode 100644 index 000000000..29e8aabae --- /dev/null +++ b/cmd/object_dryrun_test.go @@ -0,0 +1,135 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package main + +import ( + "bytes" + "io" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestDryRunFlagExists verifies that the --dry-run flag is available for object commands +func TestDryRunFlagExists(t *testing.T) { + // Check object get + cmd := getCmd + flag := cmd.Flags().Lookup("dry-run") + assert.NotNil(t, flag, "object get should have --dry-run flag") + assert.Equal(t, "bool", flag.Value.Type(), "--dry-run should be a boolean flag") + + // Check object put + cmd = putCmd + flag = cmd.Flags().Lookup("dry-run") + assert.NotNil(t, flag, "object put should have --dry-run flag") + assert.Equal(t, "bool", flag.Value.Type(), "--dry-run should be a boolean flag") + + // Check object sync + cmd = syncCmd + flag = cmd.Flags().Lookup("dry-run") + assert.NotNil(t, flag, "object sync should have --dry-run flag") + assert.Equal(t, "bool", flag.Value.Type(), "--dry-run should be a boolean flag") +} + +// TestDryRunHelpText verifies that the --dry-run flag appears in help text +func TestDryRunHelpText(t *testing.T) { + tests := []struct { + name string + cmd string + helpCmd func(t *testing.T) string + }{ + { + name: "object get", + cmd: "object get", + helpCmd: func(t *testing.T) string { + old := os.Stdout + r, w, err := os.Pipe() + require.NoError(t, err) + os.Stdout = w + defer func() { os.Stdout = old }() + + require.NoError(t, getCmd.Help()) + + require.NoError(t, w.Close()) + var buf bytes.Buffer + _, err = io.Copy(&buf, r) + require.NoError(t, err) + require.NoError(t, r.Close()) + return buf.String() + }, + }, + { + name: "object put", + cmd: "object put", + helpCmd: func(t *testing.T) string { + old := os.Stdout + r, w, err := os.Pipe() + require.NoError(t, err) + os.Stdout = w + defer func() { os.Stdout = old }() + + require.NoError(t, putCmd.Help()) + + require.NoError(t, w.Close()) + var buf bytes.Buffer + _, err = io.Copy(&buf, r) + require.NoError(t, err) + require.NoError(t, r.Close()) + return buf.String() + }, + }, + { + name: "object sync", + cmd: "object sync", + helpCmd: func(t *testing.T) string { + old := os.Stdout + r, w, err := os.Pipe() + require.NoError(t, err) + os.Stdout = w + defer func() { os.Stdout = old }() + + require.NoError(t, syncCmd.Help()) + + require.NoError(t, w.Close()) + var buf bytes.Buffer + _, err = io.Copy(&buf, r) + require.NoError(t, err) + require.NoError(t, r.Close()) + return buf.String() + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helpText := tt.helpCmd(t) + assert.Contains(t, helpText, "--dry-run", "%s help should mention --dry-run flag", tt.cmd) + // Verify the help text describes what dry-run does + lowerHelp := strings.ToLower(helpText) + assert.True(t, + strings.Contains(lowerHelp, "without actually") || + strings.Contains(lowerHelp, "show what") || + strings.Contains(lowerHelp, "preview"), + "%s --dry-run help should describe what it does", tt.cmd) + }) + } +} diff --git a/cmd/object_get.go b/cmd/object_get.go index 9aad990ee..1eb4860e1 100644 --- a/cmd/object_get.go +++ b/cmd/object_get.go @@ -51,6 +51,7 @@ the client should fallback to discovered caches if all preferred caches fail.`) flagSet.StringP("token", "t", "", "Token file to use for transfer") flagSet.BoolP("recursive", "r", false, "Recursively download a collection. Forces methods to only be http to get the freshest collection contents") flagSet.Bool("inplace", false, "Write files directly to destination (default: use temporary files)") + flagSet.Bool("dry-run", false, "Show what would be downloaded without actually downloading") flagSet.StringP("cache-list-name", "n", "xroot", "(Deprecated) Cache list to use, currently either xroot or xroots; may be ignored") flagSet.Lookup("cache-list-name").Hidden = true flagSet.String("caches", "", "A JSON file containing the list of caches") @@ -167,7 +168,15 @@ func getMain(cmd *cobra.Command, args []string) { for _, src := range source { isRecursive, _ := cmd.Flags().GetBool("recursive") - transferResults, err := client.DoGet(ctx, src, dest, isRecursive, client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), client.WithCaches(caches...), client.WithInPlace(inPlace)) + dryRun, _ := cmd.Flags().GetBool("dry-run") + options := []client.TransferOption{ + client.WithCallback(pb.callback), + client.WithTokenLocation(tokenLocation), + client.WithCaches(caches...), + client.WithInPlace(inPlace), + client.WithDryRun(dryRun), + } + transferResults, err := client.DoGet(ctx, src, dest, isRecursive, options...) if err != nil { attemptErr = err lastSrc = src diff --git a/cmd/object_put.go b/cmd/object_put.go index cb902818a..e3df561c6 100644 --- a/cmd/object_put.go +++ b/cmd/object_put.go @@ -51,6 +51,7 @@ func init() { flagSet := putCmd.Flags() flagSet.StringP("token", "t", "", "Token file to use for transfer") flagSet.BoolP("recursive", "r", false, "Recursively upload a collection. Forces methods to only be http to get the freshest collection contents") + flagSet.Bool("dry-run", false, "Show what would be uploaded without actually uploading") flagSet.String("checksum-algorithm", "", "Checksum algorithm to use for upload and validation") flagSet.Bool("require-checksum", false, "Require the server to return a checksum for the uploaded file (uses crc32c algorithm if no specific algorithm is specified)") flagSet.String("checksums", "", "Verify files against a checksums manifest. The format is ALGORITHM:FILENAME") @@ -249,7 +250,9 @@ func putMain(cmd *cobra.Command, args []string) { var result error lastSrc := "" - options = append(options, client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation)) + dryRun, _ := cmd.Flags().GetBool("dry-run") + options = append(options, client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), client.WithDryRun(dryRun)) + finalResults := make([][]client.TransferResults, 0) for _, src := range source { diff --git a/cmd/object_sync.go b/cmd/object_sync.go index 33921e0a7..e88ffbea5 100644 --- a/cmd/object_sync.go +++ b/cmd/object_sync.go @@ -50,6 +50,7 @@ func init() { the client should fallback to discovered caches if all preferred caches fail.`) flagSet.StringP("token", "t", "", "Token file to use for transfer") flagSet.Bool("inplace", false, "Write files directly to destination (default: use temporary files)") + flagSet.Bool("dry-run", false, "Show what would be synchronized without actually modifying the destination") objectCmd.AddCommand(syncCmd) } @@ -169,12 +170,19 @@ func syncMain(cmd *cobra.Command, args []string) { lastSrc := "" + dryRun, _ := cmd.Flags().GetBool("dry-run") + if doDownload { for _, src := range sources { - if _, err = client.DoGet(ctx, src, dest, true, - client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), - client.WithCaches(caches...), client.WithSynchronize(client.SyncSize), - client.WithInPlace(inPlace)); err != nil { + options := []client.TransferOption{ + client.WithCallback(pb.callback), + client.WithTokenLocation(tokenLocation), + client.WithSynchronize(client.SyncSize), + client.WithCaches(caches...), + client.WithInPlace(inPlace), + client.WithDryRun(dryRun), + } + if _, err = client.DoGet(ctx, src, dest, true, options...); err != nil { lastSrc = src break } @@ -188,9 +196,14 @@ func syncMain(cmd *cobra.Command, args []string) { log.Warningln("Destination: " + dest + " ends with '/', but the source is a file. If the destination does not exist, it will be treated as an object, not a collection.") } - if _, err = client.DoPut(ctx, src, dest, true, - client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), - client.WithCaches(caches...), client.WithSynchronize(client.SyncSize)); err != nil { + options := []client.TransferOption{ + client.WithCallback(pb.callback), + client.WithTokenLocation(tokenLocation), + client.WithSynchronize(client.SyncSize), + client.WithCaches(caches...), + client.WithDryRun(dryRun), + } + if _, err = client.DoPut(ctx, src, dest, true, options...); err != nil { lastSrc = src break }