diff --git a/metaflow/metaflow_config.py b/metaflow/metaflow_config.py index 79a42620431..0dbf9ed7368 100644 --- a/metaflow/metaflow_config.py +++ b/metaflow/metaflow_config.py @@ -87,6 +87,7 @@ ### S3_ENDPOINT_URL = from_conf("S3_ENDPOINT_URL") S3_VERIFY_CERTIFICATE = from_conf("S3_VERIFY_CERTIFICATE") +S3_SIGN_REQUEST = from_conf("S3_SIGN_REQUEST", True) # Set ServerSideEncryption for S3 uploads S3_SERVER_SIDE_ENCRYPTION = from_conf("S3_SERVER_SIDE_ENCRYPTION") @@ -130,6 +131,11 @@ DATATOOLS_CLIENT_PARAMS["endpoint_url"] = S3_ENDPOINT_URL if S3_VERIFY_CERTIFICATE: DATATOOLS_CLIENT_PARAMS["verify"] = S3_VERIFY_CERTIFICATE +if not S3_SIGN_REQUEST: + # TODO: possible to achieve this without importing botocore? + from botocore import UNSIGNED + from botocore.config import Config + DATATOOLS_CLIENT_PARAMS["config"] = Config(signature_version=UNSIGNED) DATATOOLS_SESSION_VARS = from_conf("DATATOOLS_SESSION_VARS", {}) diff --git a/metaflow/metaflow_environment.py b/metaflow/metaflow_environment.py index dde7be0b9fe..3d8c4fbbd0b 100644 --- a/metaflow/metaflow_environment.py +++ b/metaflow/metaflow_environment.py @@ -96,8 +96,13 @@ def _get_download_code_package_cmd(self, code_package_url, datastore_type): # Boto3 does not play well with passing None or an empty string to endpoint_url return "{python} -c '{script}'".format( python=self._python(), - script='import boto3, os; ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\"); boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {})).download_file(\\"%s\\", \\"%s\\", \\"job.tar\\")' - % (bucket, s3_object), + script='import boto3, os;' + 'from botocore import UNSIGNED; from botocore.config import Config;' + 'ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\");' + 'no_sign=os.getenv(\\"METAFLOW_S3_SIGN_REQUEST\\") == "False";' + 'boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {}),' + '**({\\"config\\": Config(signature_version=UNSIGNED)} if no_sign else {}))' + '.download_file(\\"%s\\", \\"%s\\", \\"job.tar\\")' % (bucket, s3_object), ) elif datastore_type == "azure": from .plugins.azure.azure_utils import parse_azure_full_path diff --git a/metaflow/plugins/argo/argo_workflows.py b/metaflow/plugins/argo/argo_workflows.py index f88d955ffdb..92023b1998b 100644 --- a/metaflow/plugins/argo/argo_workflows.py +++ b/metaflow/plugins/argo/argo_workflows.py @@ -43,6 +43,7 @@ KUBERNETES_SECRETS, S3_ENDPOINT_URL, S3_SERVER_SIDE_ENCRYPTION, + S3_SIGN_REQUEST, SERVICE_HEADERS, SERVICE_INTERNAL_URL, UI_URL, @@ -1765,6 +1766,9 @@ def _container_templates(self): # add METAFLOW_S3_ENDPOINT_URL env["METAFLOW_S3_ENDPOINT_URL"] = S3_ENDPOINT_URL + # support for unsigned s3 requests + env["METAFLOW_S3_SIGN_REQUEST"] = S3_SIGN_REQUEST + # support Metaflow sandboxes env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT env["METAFLOW_KUBERNETES_SANDBOX_INIT_SCRIPT"] = ( diff --git a/metaflow/plugins/kubernetes/kubernetes.py b/metaflow/plugins/kubernetes/kubernetes.py index 6625047395a..5b30dba0bb8 100644 --- a/metaflow/plugins/kubernetes/kubernetes.py +++ b/metaflow/plugins/kubernetes/kubernetes.py @@ -36,6 +36,7 @@ OTEL_ENDPOINT, S3_ENDPOINT_URL, S3_SERVER_SIDE_ENCRYPTION, + S3_SIGN_REQUEST, SERVICE_HEADERS, KUBERNETES_SECRETS, SERVICE_INTERNAL_URL, @@ -271,6 +272,7 @@ def create_jobset( "METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT", AZURE_STORAGE_BLOB_SERVICE_ENDPOINT, ) + .environment_variable("METAFLOW_S3_SIGN_REQUEST", S3_SIGN_REQUEST) .environment_variable( "METAFLOW_DATASTORE_SYSROOT_AZURE", DATASTORE_SYSROOT_AZURE )