From d8c20799e0480a862ab0cb1d4c1dc0119c67d442 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 21:56:39 +0200 Subject: [PATCH 1/7] add s3 compatible storage - first commit --- .gitignore | 4 + build.gradle.kts | 3 + helm/polaris/values.yaml | 273 +++++------------- .../polaris/core/PolarisConfiguration.java | 1 + .../polaris/core/entity/CatalogEntity.java | 50 ++++ .../storage/PolarisCredentialProperty.java | 2 + .../PolarisStorageConfigurationInfo.java | 3 + .../s3/S3CredentialsStorageIntegration.java | 138 +++++++++ .../s3/S3StorageConfigurationInfo.java | 164 +++++++++++ regtests/minio/Readme.md | 42 +++ regtests/minio/certs/CAs/private.key | 5 + regtests/minio/certs/CAs/public.crt | 13 + regtests/minio/certs/private.key | 5 + regtests/minio/certs/public.crt | 13 + regtests/minio/docker-compose.yml | 69 +++++ regtests/minio/queries-for-spark.sql | 42 +++ regtests/run_spark_sql_s3compatible.sh | 220 ++++++++++++++ ...PolarisStorageIntegrationProviderImpl.java | 4 + spec/polaris-management-service.yml | 54 ++++ 19 files changed, 902 insertions(+), 203 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 regtests/minio/Readme.md create mode 100644 regtests/minio/certs/CAs/private.key create mode 100644 regtests/minio/certs/CAs/public.crt create mode 100644 regtests/minio/certs/private.key create mode 100644 regtests/minio/certs/public.crt create mode 100644 regtests/minio/docker-compose.yml create mode 100644 regtests/minio/queries-for-spark.sql create mode 100755 regtests/run_spark_sql_s3compatible.sh diff --git a/.gitignore b/.gitignore index e220135f6..0092dccc2 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ regtests/output/ # This file, if checked in after running for example regtests, contains unmanaged dependencies that eventually # cause unnecessary "security alerts" like https://github.com/apache/polaris/pull/718. regtests/client/python/poetry.lock +regtests/minio/miniodata/* # Python stuff (see note about poetry.lock above as well!) /poetry.lock @@ -64,6 +65,9 @@ gradle/wrapper/gradle-wrapper-*.sha256 *.ipr *.iws +# VScode +.vscode + # Gradle /.gradle /build-logic/.gradle diff --git a/build.gradle.kts b/build.gradle.kts index 45f20b59e..02c24a4db 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -109,6 +109,9 @@ tasks.named("rat").configure { excludes.add("regtests/metastore_db/**") excludes.add("regtests/client/python/.openapi-generator/**") excludes.add("regtests/output/**") + excludes.add("regtests/minio/miniodata/**") + excludes.add("regtests/minio/**/*.crt") + excludes.add("regtests/minio/**/*.key") excludes.add("**/*.ipynb") excludes.add("**/*.iml") diff --git a/helm/polaris/values.yaml b/helm/polaris/values.yaml index 7713d8ca2..a6d7f3180 100644 --- a/helm/polaris/values.yaml +++ b/helm/polaris/values.yaml @@ -359,209 +359,76 @@ extraEnv: # name: aws-secret # key: secret_access_key -# -- Extra volumes to add to the polaris pod. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumes: [] - # - name: extra-volume - # emptyDir: {} - -# -- Extra volume mounts to add to the polaris container. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumeMounts: [] - # - name: extra-volume - # mountPath: /usr/share/extra-volume - -# -- Add additional init containers to the polaris pod(s) See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/. -extraInitContainers: [] - # - name: your-image-name - # image: your-image - # imagePullPolicy: Always - # command: ['sh', '-c', 'echo "hello world"'] - -tracing: - # -- Specifies whether tracing for the polaris server should be enabled. - enabled: false - # -- The collector endpoint URL to connect to (required). - # The endpoint URL must have either the http:// or the https:// scheme. - # The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port (by default 4317). - # See https://quarkus.io/guides/opentelemetry for more information. - endpoint: "http://otlp-collector:4317" - # -- Which requests should be sampled. Valid values are: "all", "none", or a ratio between 0.0 and - # "1.0d" (inclusive). E.g. "0.5d" means that 50% of the requests will be sampled. - # Note: avoid entering numbers here, always prefer a string representation of the ratio. - sample: "1.0d" - # -- Resource attributes to identify the polaris service among other tracing sources. - # See https://opentelemetry.io/docs/reference/specification/resource/semantic_conventions/#service. - # If left empty, traces will be attached to a service named "Apache Polaris"; to change this, - # provide a service.name attribute here. - attributes: - {} - # service.name: my-polaris - -metrics: - # -- Specifies whether metrics for the polaris server should be enabled. - enabled: true - # -- Additional tags (dimensional labels) to add to the metrics. - tags: - {} - # service: polaris - # environment: production - -serviceMonitor: - # -- Specifies whether a ServiceMonitor for Prometheus operator should be created. - enabled: true - # -- The scrape interval; leave empty to let Prometheus decide. Must be a valid duration, e.g. 1d, 1h30m, 5m, 10s. - interval: "" - # -- Labels for the created ServiceMonitor so that Prometheus operator can properly pick it up. - labels: - {} - # release: prometheus - # -- Relabeling rules to apply to metrics. Ref https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config. - metricRelabelings: [] - # - source_labels: [ __meta_kubernetes_namespace ] - # separator: ; - # regex: (.*) - # target_label: namespace - # replacement: $1 - # action: replace - -# -- Logging configuration. -logging: - # -- The log level of the root category, which is used as the default log level for all categories. - level: INFO - # -- The header name to use for the request ID. - requestIdHeaderName: Polaris-Request-Id - # -- Configuration for the console appender. - console: - # -- Whether to enable the console appender. - enabled: true - # -- The log level of the console appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- Configuration for the file appender. - file: - # -- Whether to enable the file appender. - enabled: false - # -- The log level of the file appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- The local directory where log files are stored. The persistent volume claim will be mounted - # here. - logsDir: /deployments/logs - # -- The log file name. - fileName: polaris.log - # -- Log rotation configuration. - rotation: - # -- The maximum size of the log file before it is rotated. Should be expressed as a Kubernetes quantity. - maxFileSize: 100Mi - # -- The maximum number of backup files to keep. - maxBackupIndex: 5 - # -- An optional suffix to append to the rotated log files. If present, the rotated log files - # will be grouped in time buckets, and each bucket will contain at most maxBackupIndex files. - # The suffix must be in a date-time format that is understood by DateTimeFormatter. If the - # suffix ends with .gz or .zip, the rotated files will also be compressed using the - # corresponding algorithm. - fileSuffix: ~ # .yyyy-MM-dd.gz - # -- The log storage configuration. A persistent volume claim will be created using these - # settings. - storage: - # -- The storage class name of the persistent volume claim to create. - className: standard - # -- The size of the persistent volume claim to create. - size: 512Gi - # -- Labels to add to the persistent volume claim spec selector; a persistent volume with - # matching labels must exist. Leave empty if using dynamic provisioning. - selectorLabels: {} - # app.kubernetes.io/name: polaris - # app.kubernetes.io/instance: RELEASE-NAME - # -- Configuration for specific log categories. - categories: - org.apache.polaris: INFO - org.apache.iceberg.rest: INFO - # Useful to debug configuration issues: - # io.smallrye.config: DEBUG - # -- Configuration for MDC (Mapped Diagnostic Context). Values specified here will be added to the - # log context of all incoming requests and can be used in log patterns. - mdc: - # aid=polaris - # sid=polaris-service - {} - -# -- Realm context resolver configuration. -realmContext: - # -- The type of realm context resolver to use. Two built-in types are supported: default and test; - # test is not recommended for production as it does not perform any realm validation. - type: default - # -- List of valid realms, for use with the default realm context resolver. The first realm in - # the list is the default realm. Realms not in this list will be rejected. - realms: - - POLARIS - -# -- Polaris features configuration. -features: - # -- Features to enable or disable globally. If a feature is not present in the map, the default - # built-in value is used. - defaults: {} - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - # SUPPORTED_CATALOG_STORAGE_TYPES: - # - S3 - # - GCS - # - AZURE - # - FILE - # -- Features to enable or disable per realm. This field is a map of maps. The realm name is the key, and the value is a map of - # feature names to values. If a feature is not present in the map, the default value from the 'defaults' field is used. - realmOverrides: {} - # my-realm: - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: true - -# -- Polaris persistence configuration. -persistence: - # -- The type of persistence to use. Two built-in types are supported: in-memory and eclipse-link. - type: eclipse-link # in-memory - # -- The configuration for the eclipse-link persistence manager. - eclipseLink: - # -- The secret name to pull persistence.xml from. - secret: - # -- The name of the secret to pull persistence.xml from. - # If not provided, the default built-in persistence.xml will be used. This is probably not what you want. - name: ~ - # -- The key in the secret to pull persistence.xml from. - key: persistence.xml - # -- The persistence unit name to use. - persistenceUnit: polaris - -# -- Polaris FileIO configuration. -fileIo: - # -- The type of file IO to use. Two built-in types are supported: default and wasb. The wasb one translates WASB paths to ABFS ones. - type: default - -# -- Storage credentials for the server. If the following properties are unset, default -# credentials will be used, in which case the pod must have the necessary permissions to access the storage. -storage: - # -- The secret to pull storage credentials from. - secret: - # -- The name of the secret to pull storage credentials from. - name: ~ - # -- The key in the secret to pull the AWS access key ID from. Only required when using AWS. - awsAccessKeyId: ~ - # -- The key in the secret to pull the AWS secret access key from. Only required when using AWS. - awsSecretAccessKey: ~ - # -- The key in the secret to pull the GCP token from. Only required when using GCP. - gcpToken: ~ - # -- The key in the secret to pull the GCP token expiration time from. Only required when using GCP. Must be a valid ISO 8601 duration. The default is PT1H (1 hour). - gcpTokenLifespan: ~ - -# -- Polaris authentication configuration. -authentication: - # -- The type of authentication to use. Two built-in types are supported: default and test; - # test is not recommended for production. - authenticator: +# -- Configures whether to enable the bootstrap metastore manager job +bootstrapMetastoreManager: false + +# -- Extra environment variables to add to the bootstrap metastore manager job (see `extraEnv` for an example) +bootstrapExtraEnv: [] + +# -- The secret name to pull persistence.xml from (ensure the key name is 'persistence.xml') +persistenceConfigSecret: ~ + +# -- Configures for polaris-server.yml +polarisServerConfig: + server: + # Maximum number of threads. + maxThreads: 200 + + # Minimum number of thread to keep alive. + minThreads: 10 + applicationConnectors: + # HTTP-specific options. + - type: http + + # The port on which the HTTP server listens for service requests. + port: 8181 + + adminConnectors: + - type: http + port: 8182 + + # The hostname of the interface to which the HTTP server socket wil be found. If omitted, the + # socket will listen on all interfaces. + # bindHost: localhost + + # ssl: + # keyStore: ./example.keystore + # keyStorePassword: example + # + # keyStoreType: JKS # (optional, JKS is default) + + # HTTP request log settings + requestLog: + appenders: + # Settings for logging to stdout. + - type: console + + # # Settings for logging to a file. + # - type: file + + # # The file to which statements will be logged. + # currentLogFilename: ./logs/request.log + + # # When the log file rolls over, the file will be archived to requests-2012-03-15.log.gz, + # # requests.log will be truncated, and new statements written to it. + # archivedLogFilenamePattern: ./logs/requests-%d.log.gz + + # # The maximum number of log files to archive. + # archivedFileCount: 14 + + # # Enable archiving if the request log entries go to the their own file + # archive: true + + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + + callContextResolver: type: default # -- The type of token service to use. Two built-in types are supported: default and test; # test is not recommended for production. diff --git a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java index ca1962e3c..29b5424b8 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java @@ -216,6 +216,7 @@ public static Builder builder() { .defaultValue( List.of( StorageConfigInfo.StorageTypeEnum.S3.name(), + StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE.name(), StorageConfigInfo.StorageTypeEnum.AZURE.name(), StorageConfigInfo.StorageTypeEnum.GCS.name(), StorageConfigInfo.StorageTypeEnum.FILE.name())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f3bfd6edf..f8a37dd6f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,12 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.S3StorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -141,6 +143,32 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } + if (configInfo instanceof S3StorageConfigurationInfo) { + S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; + return S3StorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) + .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) + .setCredsVendingStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum + .valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsVendingStrategyEnum.class, + s3Config.getCredsVendingStrategy().name())) + .setCredsCatalogAndClientStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.class, + s3Config.getCredsCatalogAndClientStrategy().name())) + .setAllowedLocations(s3Config.getAllowedLocations()) + .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKey( + s3Config.getS3CredentialsCatalogSecretAccessKey()) + .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) + .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .build(); + } if (configInfo instanceof AzureStorageConfigurationInfo) { AzureStorageConfigurationInfo azureConfig = (AzureStorageConfigurationInfo) configInfo; return AzureStorageConfigInfo.builder() @@ -250,6 +278,28 @@ public Builder setStorageConfigurationInfo( awsConfig.validateArn(awsConfigModel.getRoleArn()); config = awsConfig; break; + + case S3_COMPATIBLE: + S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + config = + new S3StorageConfigurationInfo( + PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, + S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsVendingStrategyEnum.class, + s3ConfigModel.getCredsVendingStrategy().name()), + S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsCatalogAndClientStrategyEnum.class, + s3ConfigModel.getCredsCatalogAndClientStrategy().name()), + s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), + s3ConfigModel.getS3CredentialsClientAccessKeyId(), + s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3PathStyleAccess(), + new ArrayList<>(allowedLocations)); + break; case AZURE: AzureStorageConfigInfo azureConfigModel = (AzureStorageConfigInfo) storageConfigModel; AzureStorageConfigurationInfo azureConfigInfo = diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index c79aaf595..13838e6af 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -23,6 +23,8 @@ public enum PolarisCredentialProperty { AWS_KEY_ID(String.class, "s3.access-key-id", "the aws access key id"), AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), + AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), + AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 6b0638e83..4f290e77b 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,6 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,6 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), @@ -241,6 +243,7 @@ public void validateMaxAllowedLocations(int maxAllowedLocations) { /** Polaris' storage type, each has a fixed prefix for its location */ public enum StorageType { S3("s3://"), + S3_COMPATIBLE("s3://"), AZURE(List.of("abfs://", "wasb://", "abfss://", "wasbs://")), GCS("gs://"), FILE("file://"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java new file mode 100644 index 000000000..5fdbbdf37 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import java.net.URI; +import java.util.EnumMap; +import java.util.Set; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** Credential vendor that supports generating */ +public class S3CredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); + + private StsClient stsClient; + + // Constructor + public S3CredentialsStorageIntegration() { + super(S3CredentialsStorageIntegration.class.getName()); + } + + public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { + + LOGGER.debug("S3Compatible - createStsClient()"); + + LOGGER.info( + "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); + + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + + stsBuilder.region( + Region + .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares + // do not care about regions + stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.create( + s3storageConfig.getS3CredentialsCatalogAccessKeyId(), + s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); + + this.stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3StorageConfigurationInfo storageConfig, + boolean allowListOperation, + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { + + LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + + switch (storageConfig.getCredsVendingStrategy()) { + case KEYS_SAME_AS_CATALOG: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsCatalogAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsCatalogSecretAccessKey()); + break; + + case KEYS_DEDICATED_TO_CLIENT: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsClientAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsClientSecretAccessKey()); + break; + + case TOKEN_WITH_ASSUME_ROLE: + if (this.stsClient == null) { + createStsClient(storageConfig); + } + LOGGER.debug("S3Compatible - assumeRole !"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); + + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put( + PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + break; + + // @TODO implement the MinIO external OpenID Connect - + // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 + // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: + // break; + } + + return propertiesMap; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java new file mode 100644 index 000000000..c66deeff7 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ +public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private @NotNull CredsVendingStrategyEnum credsVendingStrategy; + private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; + private @NotNull String s3endpoint; + private @NotNull Boolean s3pathStyleAccess; + private @NotNull String s3CredentialsCatalogAccessKeyId; + private @NotNull String s3CredentialsCatalogSecretAccessKey; + private @Nullable String s3CredentialsClientAccessKeyId; + private @Nullable String s3CredentialsClientSecretAccessKey; + + // Define how and what the catalog client will receive as credentials + public static enum CredsVendingStrategyEnum { + KEYS_SAME_AS_CATALOG, + KEYS_DEDICATED_TO_CLIENT, + TOKEN_WITH_ASSUME_ROLE; + }; + + // Define how the access and secret keys will be receive during the catalo creation, if + // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, + // but less dynamic + public static enum CredsCatalogAndClientStrategyEnum { + VALUE, + ENV_VAR_NAME; + }; + + // Constructor + @JsonCreator + public S3StorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull + CredsVendingStrategyEnum credsVendingStrategy, + @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull + CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable + String s3CredentialsClientAccessKeyId, + @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable + String s3CredentialsClientSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "allowedLocations", required = true) @NotNull + List allowedLocations) { + + // Classic super and constructor stuff storing data in private internal properties + super(storageType, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.credsVendingStrategy = + CredsVendingStrategyEnum.valueOf( + CredsVendingStrategyEnum.class, credsVendingStrategy.name()); + this.credsCatalogAndClientStrategy = + CredsCatalogAndClientStrategyEnum.valueOf( + CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); + this.s3pathStyleAccess = s3PathStyleAccess; + this.s3endpoint = s3Endpoint; + + // The constructor is called multiple time during catalog life + // to do substitution only once, there is a basic if null test, otherwise affect the data from + // the "Polaris cache storage" + // this way the first time the value is retrived from the name of the variable + // next time the getenv will try to retrive a variable but is using the value as a nome, it will + // be null, we affect the value provided by "Polaris cache storage" + if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { + String cai = System.getenv(s3CredentialsCatalogAccessKeyId); + String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); + String cli = System.getenv(s3CredentialsClientAccessKeyId); + String cls = System.getenv(s3CredentialsClientSecretAccessKey); + this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = + (cls != null) ? cls : s3CredentialsClientSecretAccessKey; + } else { + this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; + } + } + + public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { + return this.credsVendingStrategy; + } + + public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { + return this.credsCatalogAndClientStrategy; + } + + public @NotNull String getS3Endpoint() { + return this.s3endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3pathStyleAccess; + } + + public @NotNull String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @NotNull String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3CredentialsClientAccessKeyId() { + return this.s3CredentialsClientAccessKeyId; + } + + public @Nullable String getS3CredentialsClientSecretAccessKey() { + return this.s3CredentialsClientSecretAccessKey; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType()) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md new file mode 100644 index 000000000..08089f56f --- /dev/null +++ b/regtests/minio/Readme.md @@ -0,0 +1,42 @@ + + +# MiniIO Secured +## Minio and secured buckets with TLS self-signed / custom AC + +To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate + +## MiniIO generate self-signed certificates designed for docker-compose setup + +- Download minio certificate generator : https://github.com/minio/certgen +- ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs +- they will be mounted in default minio container placeholder + +## Test minIO secured TLS buckets from self-signed certificate with AWS CLI +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` + +## add to java cacerts only the public.crt as an AC +- ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` + +## remove from java cacerts the public.crt +- ```sudo keytool -delete -trustcacerts -cacerts -storepass changeit -noprompt -alias minio``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/certs/CAs/private.key b/regtests/minio/certs/CAs/private.key new file mode 100644 index 000000000..e2e7ffca0 --- /dev/null +++ b/regtests/minio/certs/CAs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/CAs/public.crt b/regtests/minio/certs/CAs/public.crt new file mode 100644 index 000000000..b06cc51e5 --- /dev/null +++ b/regtests/minio/certs/CAs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/certs/private.key b/regtests/minio/certs/private.key new file mode 100644 index 000000000..e2e7ffca0 --- /dev/null +++ b/regtests/minio/certs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/public.crt b/regtests/minio/certs/public.crt new file mode 100644 index 000000000..b06cc51e5 --- /dev/null +++ b/regtests/minio/certs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml new file mode 100644 index 000000000..b61ca6537 --- /dev/null +++ b/regtests/minio/docker-compose.yml @@ -0,0 +1,69 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-minio: + image: minio/minio:latest + container_name: minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + minio_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + volumes: + - ./miniodata:/data + - ./certs:/root/.minio/certs/ + command: ["server", "/data", "--console-address", ":9001"] + minio-configured: + depends_on: + - polaris-minio + image: minio/mc:latest + container_name: mc + networks: + minio_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + volumes: + - ./certs:/root/.mc/certs + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc policy set readwrite minio/warehouse; + /usr/bin/mc rm -r --force --quiet minio/warehouse2; + /usr/bin/mc mb --ignore-existing minio/warehouse2; + /usr/bin/mc policy set readwrite minio/warehouse2; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; + tail -f /dev/null + " +networks: + minio_net: + diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql new file mode 100644 index 000000000..966ea6db6 --- /dev/null +++ b/regtests/minio/queries-for-spark.sql @@ -0,0 +1,42 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE DATABASE IF NOT EXISTS db1; +CREATE DATABASE IF NOT EXISTS db1.ns1; +CREATE DATABASE IF NOT EXISTS db1.ns2; +CREATE OR REPLACE TABLE db1.ns1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (10, 20); +INSERT INTO db1.ns1.table1 VALUES (11, 21); +INSERT INTO db1.ns1.table1 VALUES (12, 22); +SELECT * FROM db1.ns1.table1; + +CREATE OR REPLACE VIEW db1.ns2.view1 ( line_count COMMENT 'Count of lines') AS SELECT COUNT(1) as qty FROM db1.ns1.table1; +SELECT * FROM db1.ns2.view1; +INSERT INTO db1.ns1.table1 VALUES (13, 23); +SELECT * FROM db1.ns2.view1; + +CREATE DATABASE IF NOT EXISTS db1; +CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (3, 2); + +-- Test the second bucket allowed in the catalog +CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; +CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); +INSERT INTO db2.table1 VALUES (01, 02); +SELECT * FROM db2.table1; + +quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh new file mode 100755 index 000000000..fc16fa542 --- /dev/null +++ b/regtests/run_spark_sql_s3compatible.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ----------------------------------------------------------------------------- +# Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. +# ----------------------------------------------------------------------------- +# +# Prequisite: +# This script use a MinIO with TLS. +# Please follow instructions in regtests/minio/Readme.md and update your +# java cacerts with self-signed certificate +# +# Usage: +# ./run_spark_sql_s3compatible.sh [S3-location] +# +# Description: +# - Without arguments: Runs against default minio bucket s3://warehouse/polaris +# - With one arguments: Runs against a catalog backed by minio S3. +# - [S3-location] - The S3 path to use as the default base location for the catalog. +# +# Examples: +# - Run against AWS S3_COMPATIBLE: +# ./run_spark_sql_s3compatible.sh s3://warehouse/polaris + + +clear +if [ $# -ne 0 ] && [ $# -ne 1 ]; then + echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" + echo "Usage: ./run_spark_sql.sh [S3-location]" + exit 1 +fi + +# Init +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" +REGTEST_HOME=$(dirname $(realpath $0)) +cd ${REGTEST_HOME} + + +if [ $# -eq 0 ]; then + echo "creating a catalog backed by S3, default bucket is s3://warehouse/polaris" + S3_LOCATION="s3://warehouse/polaris" +fi + +if [ $# -eq 1 ]; then + echo "creating a catalog backed by S3 from first arg of this script respecting pattern 's3://mybucket/path'" + S3_LOCATION=$1 +fi +# Second location for testing catalog update +S3_LOCATION_2="s3://warehouse2/polaris/" + + + +# check if Polaris is running +polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) +if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then + echo "Polaris is not running on ${POLARIS_HOST:-localhost}:8181. End of script" + exit 1 +fi + +# check if cacerts contain MinIO certificate +cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) +echo $cert_response +if [ -z "$cert_response" ]; then + echo "There is no MinIO certificate in your cacerts, please read regtests/minio/Readme.md" + echo "End of script :-(" + exit 1 +fi + +# start minio with buckets and users +echo -e "\n\n-------\n\n" +echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." +docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured + +echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo -e "\n\n-------\n\n" + +# spark setup +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 + +echo "Doing spark setup... wait a moment" +./setup.sh > /dev/null 2>&1 + +if [ -z "${SPARK_HOME}"]; then + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi + + + + +# start of tests + +# creation of catalog + + +# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris +# CATALOG_ID=minio-user-catalog +# CATALOG_SECRET=12345678-minio-catalog +# CLIENT_ID=minio-user-client +# CLIENT_SECRET=12345678-minio-client + +echo -e "\n----\nCREATE Catalog\n" +response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{ + \"name\": \"manual_spark\", + \"id\": 100, + \"type\": \"INTERNAL\", + \"readOnly\": false, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" +) +echo -e "Catalog creation - response API http code : $response_catalog \n" +if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then + echo "Problem during catalog creation" + exit 1 +fi + + + + +echo -e "Get the catalog created : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + +# Try to update the catalog, - adding a second bucket in the alllowed locations +echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark \ + -d "{ + \"currentEntityVersion\":1, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" + + +echo -e "Get the catalog updated with second allowed location : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + + +echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + + +echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ + -d '{"name": "catalog_admin"}' > /dev/stderr + + +echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +${SPARK_HOME}/bin/spark-sql --verbose \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ + --conf spark.sql.catalog.polaris.warehouse=manual_spark \ + --conf spark.sql.defaultCatalog=polaris \ + --conf spark.hadoop.hive.cli.print.header=true \ + -f "minio/queries-for-spark.sql" + + +echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" +echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" +echo ":-)" +echo "" +docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down + diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index f61c67620..80b0729d2 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,6 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -73,6 +74,9 @@ public PolarisStorageIntegrationProviderImpl( (PolarisStorageIntegration) new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; + case S3_COMPATIBLE: + storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + break; case GCS: storageIntegration = (PolarisStorageIntegration) diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 54c3b9675..d4a1f44fb 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -862,6 +862,7 @@ components: type: string enum: - S3 + - S3_COMPATIBLE - GCS - AZURE - FILE @@ -877,6 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -905,6 +907,58 @@ components: required: - roleArn + S3StorageConfigInfo: + type: object + description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + credsCatalogAndClientStrategy: + type: string + enum: + - VALUE + - ENV_VAR_NAME + default: ENV_VAR_NAME + example: "ACCESS_KEY" + description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running + credsVendingStrategy: + type: string + enum: + - TOKEN_WITH_ASSUME_ROLE + - KEYS_SAME_AS_CATALOG + - KEYS_DEDICATED_TO_CLIENT + default: TOKEN_WITH_ASSUME_ROLE + description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) + s3.path-style-access: + type: boolean + description: if true use path style + default: false + s3.endpoint: + type: string + description: the S3 endpoint + example: "http[s]://host:port" + s3.credentials.catalog.access-key-id: + type: string + description: The ACCESS_KEY_ID used y the catalog to communicate with S3 + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secret-access-key: + type: string + description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 + example: "$AWS_SECRET_ACCESS_KEY" + s3.credentials.client.access-key-id: + type: string + description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.client.secret-access-key: + type: string + description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_SECRET_ACCESS_KEY" + required: + - credsVendingStrategy + - s3.endpoint + - s3.credentials.catalog.access-key-id + - s3.credentials.catalog.secret-access-key + AzureStorageConfigInfo: type: object description: azure storage configuration info From 1ec4f07bd11a0f0a9dbacbaa2e24419677749022 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 22:30:38 +0200 Subject: [PATCH 2/7] Create Readme.md for s3-compatible Better descriptions typo & comments Refacoring with skipCredentialSubscopingIndirection -> finaly removed Rebase with AWS updates from main branch adding roleArn, camelCase refactoring, typo, cleaning Add default AWS credentials provider for STS Error Co-authored-by: Gerrit-K Rebase from quarkus and keep only sts with some suggestions from code review helm unit test --- helm/polaris/tests/configmap_test.yaml | 286 ++++++++---------- .../polaris/core/entity/CatalogEntity.java | 52 ++-- .../storage/PolarisCredentialProperty.java | 3 +- .../PolarisStorageConfigurationInfo.java | 4 +- .../s3/S3CredentialsStorageIntegration.java | 138 --------- .../s3/S3StorageConfigurationInfo.java | 164 ---------- ...mpatibleCredentialsStorageIntegration.java | 220 ++++++++++++++ .../S3CompatibleStorageConfigurationInfo.java | 113 +++++++ .../main/resources/application-it.properties | 2 +- .../src/main/resources/application.properties | 2 +- regtests/minio/Readme.md | 11 +- regtests/minio/docker-compose.yml | 4 - regtests/minio/miniodata/Readme.md | 1 + regtests/run_spark_sql_s3compatible.sh | 70 +++-- ...PolarisStorageIntegrationProviderImpl.java | 6 +- spec/polaris-management-service.yml | 57 ++-- 16 files changed, 553 insertions(+), 580 deletions(-) delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java create mode 100644 regtests/minio/miniodata/Readme.md diff --git a/helm/polaris/tests/configmap_test.yaml b/helm/polaris/tests/configmap_test.yaml index ef725ec4f..e070bf0dc 100644 --- a/helm/polaris/tests/configmap_test.yaml +++ b/helm/polaris/tests/configmap_test.yaml @@ -183,159 +183,141 @@ tests: set: logging: { file: { enabled: true, json: true }, console: { enabled: true, json: true } } asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.json=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.json=true" } - - - it: should include logging categories - set: - logging: - categories: - # compact style - org.acme: DEBUG - # expanded style - org: - acme: - service: INFO - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme\".level=DEBUG" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme.service\".level=INFO" } - - - it: should include MDC context - set: - logging: - mdc: - # compact style - org.acme: foo - # expanded style - org: - acme: - service: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme\"=foo" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme.service\"=foo" } - - - it: should include telemetry configuration - set: - tracing: { enabled: true, endpoint: http://custom:4317, attributes: { service.name: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.exporter.otlp.endpoint=http://custom:4317" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=service.name=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=foo=bar" } - - - it: should include set sample rate numeric - set: - tracing: { enabled: true, sample: "0.123" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_traceidratio" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler.arg=0.123" } - - - it: should include set sample rate "all" - set: - tracing: { enabled: true, sample: "all" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_always_on" } - - - it: should include set sample rate "none" - set: - tracing: { enabled: true, sample: "none" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=always_off" } - - - it: should disable tracing by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should disable tracing - set: - tracing: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should include custom metrics - set: - metrics: { enabled: true, tags: { app: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.app=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.foo=bar" } - - - it: should disable metrics - set: - metrics: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.micrometer.enabled=false" } - - - it: should include advanced configuration - set: - advancedConfig: - # compact style - quarkus.compact.custom: true - # expanded style - quarkus: - expanded: - custom: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.compact.custom=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.expanded.custom=foo" } - - - it: should not include CORS configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors" } - not: true - - - it: should include CORS configuration if defined - set: - cors: { allowedOrigins: [ "http://localhost:3000", "https://localhost:4000" ], allowedMethods: [ "GET", "POST" ], allowedHeaders: [ "X-Custom1", "X-Custom2" ], exposedHeaders: [ "X-Exposed-Custom1", "X-Exposed-Custom2" ], accessControlMaxAge: "PT1H", accessControlAllowCredentials: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.origins=http://localhost:3000,https://localhost:4000" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.methods=GET,POST" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.headers=X-Custom1,X-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.exposed-headers=X-Exposed-Custom1,X-Exposed-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-max-age=PT1H" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-allow-credentials=false" } - - - it: should configure rate-limiter with default values - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter no-op - set: - rateLimiter.type: no-op - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter with default token bucket values - set: - rateLimiter.type: default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=9999" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT10S" } - - - it: should configure rate-limiter with custom token bucket values - set: - rateLimiter: - type: custom - tokenBucket: - type: custom - requestsPerSecond: 1234 - window: PT5S - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=1234" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT5S" } - - - it: should not include tasks configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks" } - not: true - - - it: should include tasks configuration if defined + - equal: + path: data + value: + polaris-server.yml: |- + authenticator: + class: org.apache.polaris.service.auth.TestInlineBearerTokenPolarisAuthenticator + callContextResolver: + type: default + cors: + allowed-credentials: true + allowed-headers: + - '*' + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + exposed-headers: + - '*' + preflight-max-age: 600 + defaultRealms: + - default-realm + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + io: + factoryType: default + logging: + appenders: + - logFormat: '%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] + %c{30}: %m %kvp%n%ex' + threshold: ALL + type: console + level: INFO + loggers: + org.apache.iceberg.rest: DEBUG + org.apache.polaris: DEBUG + maxRequestBodyBytes: -1 + metaStoreManager: + type: in-memory + oauth2: + type: test + rateLimiter: + type: no-op + realmContextResolver: + type: default + server: + adminConnectors: + - port: 8182 + type: http + applicationConnectors: + - port: 8181 + type: http + maxThreads: 200 + minThreads: 10 + requestLog: + appenders: + - type: console + - it: should set config map data (auto sorted) set: - tasks: { maxConcurrentTasks: 10, maxQueuedTasks: 20 } + polarisServerConfig: + server: + maxThreads: 200 + minThreads: 10 + applicationConnectors: + - type: http + port: 8181 + adminConnectors: + - type: http + port: 8182 + requestLog: + appenders: + - type: console + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + callContextResolver: + type: default + realmContextResolver: + type: default + defaultRealms: + - default-realm + metaStoreManager: + type: eclipse-link + persistence-unit: polaris + conf-file: /eclipselink-config/conf.jar!/persistence.xml + io: + factoryType: default + oauth2: + type: default + tokenBroker: + type: symmetric-key + secret: polaris + authenticator: + class: org.apache.polaris.service.auth.DefaultPolarisAuthenticator + cors: + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-headers: + - "*" + exposed-headers: + - "*" + preflight-max-age: 600 + allowed-credentials: true + logging: + level: INFO + loggers: + org.apache.iceberg.rest: INFO + org.apache.polaris: INFO + appenders: + - type: console + threshold: ALL + logFormat: "%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] %c{30}: %m %kvp%n%ex" + maxRequestBodyBytes: -1 + rateLimiter: + type: no-op asserts: - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-concurrent-tasks=10" } - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-queued-tasks=20" } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f8a37dd6f..ab70b9b49 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,14 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; -import org.apache.polaris.core.admin.model.S3StorageConfigInfo; +import org.apache.polaris.core.admin.model.S3CompatibleStorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -143,30 +143,19 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } - if (configInfo instanceof S3StorageConfigurationInfo) { - S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; - return S3StorageConfigInfo.builder() + if (configInfo instanceof S3CompatibleStorageConfigurationInfo) { + S3CompatibleStorageConfigurationInfo s3Config = + (S3CompatibleStorageConfigurationInfo) configInfo; + return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) - .setCredsVendingStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum - .valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsVendingStrategyEnum.class, - s3Config.getCredsVendingStrategy().name())) - .setCredsCatalogAndClientStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.class, - s3Config.getCredsCatalogAndClientStrategy().name())) .setAllowedLocations(s3Config.getAllowedLocations()) - .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) - .setS3CredentialsCatalogSecretAccessKey( + .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKeyEnvVar( s3Config.getS3CredentialsCatalogSecretAccessKey()) - .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) - .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .setS3Region(s3Config.getS3Region()) + .setS3RoleArn(s3Config.getS3RoleArn()) .build(); } if (configInfo instanceof AzureStorageConfigurationInfo) { @@ -280,24 +269,17 @@ public Builder setStorageConfigurationInfo( break; case S3_COMPATIBLE: - S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + S3CompatibleStorageConfigInfo s3ConfigModel = + (S3CompatibleStorageConfigInfo) storageConfigModel; config = - new S3StorageConfigurationInfo( + new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, - S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsVendingStrategyEnum.class, - s3ConfigModel.getCredsVendingStrategy().name()), - S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsCatalogAndClientStrategyEnum.class, - s3ConfigModel.getCredsCatalogAndClientStrategy().name()), s3ConfigModel.getS3Endpoint(), - s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), - s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), - s3ConfigModel.getS3CredentialsClientAccessKeyId(), - s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), + s3ConfigModel.getS3Region(), + s3ConfigModel.getS3RoleArn(), new ArrayList<>(allowedLocations)); break; case AZURE: diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index 13838e6af..b7f1a9808 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -24,7 +24,8 @@ public enum PolarisCredentialProperty { AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), - AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), + AWS_PATH_STYLE_ACCESS( + Boolean.class, "s3.path-style-access", "whether or not to use path-style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 4f290e77b..c6eac4f7e 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,7 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), - @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3CompatibleStorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java deleted file mode 100644 index 5fdbbdf37..000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import java.net.URI; -import java.util.EnumMap; -import java.util.Set; -import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.storage.InMemoryStorageIntegration; -import org.apache.polaris.core.storage.PolarisCredentialProperty; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.sts.StsClient; -import software.amazon.awssdk.services.sts.StsClientBuilder; -import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; -import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; - -/** Credential vendor that supports generating */ -public class S3CredentialsStorageIntegration - extends InMemoryStorageIntegration { - - private static final Logger LOGGER = - LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); - - private StsClient stsClient; - - // Constructor - public S3CredentialsStorageIntegration() { - super(S3CredentialsStorageIntegration.class.getName()); - } - - public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { - - LOGGER.debug("S3Compatible - createStsClient()"); - - LOGGER.info( - "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); - - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - - stsBuilder.region( - Region - .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares - // do not care about regions - stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.create( - s3storageConfig.getS3CredentialsCatalogAccessKeyId(), - s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); - - this.stsClient = stsBuilder.build(); - LOGGER.debug("S3Compatible - stsClient successfully built"); - } - - /** {@inheritDoc} */ - @Override - public EnumMap getSubscopedCreds( - @NotNull PolarisDiagnostics diagnostics, - @NotNull S3StorageConfigurationInfo storageConfig, - boolean allowListOperation, - @NotNull Set allowedReadLocations, - @NotNull Set allowedWriteLocations) { - - LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); - - EnumMap propertiesMap = - new EnumMap<>(PolarisCredentialProperty.class); - propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); - propertiesMap.put( - PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, - storageConfig.getS3PathStyleAccess().toString()); - - switch (storageConfig.getCredsVendingStrategy()) { - case KEYS_SAME_AS_CATALOG: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsCatalogAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsCatalogSecretAccessKey()); - break; - - case KEYS_DEDICATED_TO_CLIENT: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsClientAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsClientSecretAccessKey()); - break; - - case TOKEN_WITH_ASSUME_ROLE: - if (this.stsClient == null) { - createStsClient(storageConfig); - } - LOGGER.debug("S3Compatible - assumeRole !"); - AssumeRoleResponse response = - stsClient.assumeRole( - AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); - - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); - propertiesMap.put( - PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); - break; - - // @TODO implement the MinIO external OpenID Connect - - // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 - // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: - // break; - } - - return propertiesMap; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java deleted file mode 100644 index c66deeff7..000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import java.util.List; -import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ -public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { - - // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. - @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; - private @NotNull CredsVendingStrategyEnum credsVendingStrategy; - private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; - private @NotNull String s3endpoint; - private @NotNull Boolean s3pathStyleAccess; - private @NotNull String s3CredentialsCatalogAccessKeyId; - private @NotNull String s3CredentialsCatalogSecretAccessKey; - private @Nullable String s3CredentialsClientAccessKeyId; - private @Nullable String s3CredentialsClientSecretAccessKey; - - // Define how and what the catalog client will receive as credentials - public static enum CredsVendingStrategyEnum { - KEYS_SAME_AS_CATALOG, - KEYS_DEDICATED_TO_CLIENT, - TOKEN_WITH_ASSUME_ROLE; - }; - - // Define how the access and secret keys will be receive during the catalo creation, if - // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, - // but less dynamic - public static enum CredsCatalogAndClientStrategyEnum { - VALUE, - ENV_VAR_NAME; - }; - - // Constructor - @JsonCreator - public S3StorageConfigurationInfo( - @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, - @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull - CredsVendingStrategyEnum credsVendingStrategy, - @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull - CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, - @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull - String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull - String s3CredentialsCatalogSecretAccessKey, - @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable - String s3CredentialsClientAccessKeyId, - @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable - String s3CredentialsClientSecretAccessKey, - @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull - Boolean s3PathStyleAccess, - @JsonProperty(value = "allowedLocations", required = true) @NotNull - List allowedLocations) { - - // Classic super and constructor stuff storing data in private internal properties - super(storageType, allowedLocations); - validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); - this.credsVendingStrategy = - CredsVendingStrategyEnum.valueOf( - CredsVendingStrategyEnum.class, credsVendingStrategy.name()); - this.credsCatalogAndClientStrategy = - CredsCatalogAndClientStrategyEnum.valueOf( - CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); - this.s3pathStyleAccess = s3PathStyleAccess; - this.s3endpoint = s3Endpoint; - - // The constructor is called multiple time during catalog life - // to do substitution only once, there is a basic if null test, otherwise affect the data from - // the "Polaris cache storage" - // this way the first time the value is retrived from the name of the variable - // next time the getenv will try to retrive a variable but is using the value as a nome, it will - // be null, we affect the value provided by "Polaris cache storage" - if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { - String cai = System.getenv(s3CredentialsCatalogAccessKeyId); - String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); - String cli = System.getenv(s3CredentialsClientAccessKeyId); - String cls = System.getenv(s3CredentialsClientSecretAccessKey); - this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = - (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = - (cls != null) ? cls : s3CredentialsClientSecretAccessKey; - } else { - this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; - } - } - - public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { - return this.credsVendingStrategy; - } - - public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { - return this.credsCatalogAndClientStrategy; - } - - public @NotNull String getS3Endpoint() { - return this.s3endpoint; - } - - public @NotNull Boolean getS3PathStyleAccess() { - return this.s3pathStyleAccess; - } - - public @NotNull String getS3CredentialsCatalogAccessKeyId() { - return this.s3CredentialsCatalogAccessKeyId; - } - - public @NotNull String getS3CredentialsCatalogSecretAccessKey() { - return this.s3CredentialsCatalogSecretAccessKey; - } - - public @Nullable String getS3CredentialsClientAccessKeyId() { - return this.s3CredentialsClientAccessKeyId; - } - - public @Nullable String getS3CredentialsClientSecretAccessKey() { - return this.s3CredentialsClientSecretAccessKey; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("storageType", getStorageType()) - .add("storageType", getStorageType().name()) - .add("allowedLocation", getAllowedLocations()) - .toString(); - } - - @Override - public String getFileIoImplClassName() { - return "org.apache.iceberg.aws.s3.S3FileIO"; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java new file mode 100644 index 000000000..3dfb03814 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; + +import jakarta.annotation.Nonnull; +import java.net.URI; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.apache.polaris.core.PolarisConfigurationStore; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.apache.polaris.core.storage.StorageUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; +import software.amazon.awssdk.policybuilder.iam.IamEffect; +import software.amazon.awssdk.policybuilder.iam.IamPolicy; +import software.amazon.awssdk.policybuilder.iam.IamResource; +import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** S3 compatible implementation of PolarisStorageIntegration */ +public class S3CompatibleCredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); + private final PolarisConfigurationStore configurationStore; + + public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { + super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); + this.configurationStore = configurationStore; + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @Nonnull RealmContext realmContext, + @Nonnull PolarisDiagnostics diagnostics, + @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + boolean allowListOperation, + @Nonnull Set allowedReadLocations, + @Nonnull Set allowedWriteLocations) { + + StsClient stsClient; + String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); + String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + if (storageConfig.getS3Region() != null) { + propertiesMap.put(PolarisCredentialProperty.CLIENT_REGION, storageConfig.getS3Region()); + } + + LOGGER.debug("S3Compatible - createStsClient()"); + try { + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (caI != null && caS != null) { + // else default provider build credentials from profile or standard AWS env var + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug( + "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); + } + stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleSessionName("PolarisCredentialsSTS") + .roleArn( + (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .policy( + policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + .toJson()) + .durationSeconds( + configurationStore.getConfiguration( + realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + LOGGER.debug( + "S3Compatible - assumeRole - Token Expiration at : {}", + response.credentials().expiration().toString()); + + } catch (Exception e) { + System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + } + + return propertiesMap; + } + + /* + * function from AwsCredentialsStorageIntegration but without roleArn parameter + */ + private IamPolicy policyString( + boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = "arn:aws:s3:::"; + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = StorageUtil.getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java new file mode 100644 index 000000000..776279546 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * S3-Compatible Storage Configuration. This class holds the parameters needed to connect to + * S3-compatible storage services such as MinIO, Ceph, Dell ECS, etc. + */ +public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private final @NotNull String s3Endpoint; + private final @Nullable String s3CredentialsCatalogAccessKeyId; + private final @Nullable String s3CredentialsCatalogSecretAccessKey; + private final @NotNull Boolean s3PathStyleAccess; + private final @Nullable String s3Region; + private final @Nullable String s3RoleArn; + + @JsonCreator + public S3CompatibleStorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, + @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, + @JsonProperty(value = "allowedLocations", required = true) @Nullable + List allowedLocations) { + + super(StorageType.S3_COMPATIBLE, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.s3PathStyleAccess = s3PathStyleAccess; + this.s3Endpoint = s3Endpoint; + this.s3CredentialsCatalogAccessKeyId = + (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; + this.s3Region = s3Region; + this.s3RoleArn = s3RoleArn; + } + + public @NotNull String getS3Endpoint() { + return this.s3Endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3PathStyleAccess; + } + + public @Nullable String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @Nullable String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3RoleArn() { + return this.s3RoleArn; + } + + public @Nullable String getS3Region() { + return this.s3Region; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .add("s3Region", getS3Region()) + .add("s3RoleArn", getS3RoleArn()) + .add("s3PathStyleAccess", getS3PathStyleAccess()) + .add("s3Endpoint", getS3Endpoint()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/quarkus/defaults/src/main/resources/application-it.properties b/quarkus/defaults/src/main/resources/application-it.properties index 5f46d203f..e4ad1a6e0 100644 --- a/quarkus/defaults/src/main/resources/application-it.properties +++ b/quarkus/defaults/src/main/resources/application-it.properties @@ -35,7 +35,7 @@ polaris.features.defaults."ALLOW_WILDCARD_LOCATION"=true polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=true polaris.features.defaults."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_it"=true polaris.features.defaults."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","GCS","AZURE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","S3_COMPATIBLE","GCS","AZURE"] polaris.realm-context.realms=POLARIS,OTHER diff --git a/quarkus/defaults/src/main/resources/application.properties b/quarkus/defaults/src/main/resources/application.properties index d3a205737..9bc6cc03e 100644 --- a/quarkus/defaults/src/main/resources/application.properties +++ b/quarkus/defaults/src/main/resources/application.properties @@ -90,7 +90,7 @@ polaris.realm-context.header-name=Polaris-Realm polaris.realm-context.require-header=false polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=false -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","GCS","AZURE","FILE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","S3_COMPATIBLE","GCS","AZURE","FILE"] # realm overrides # polaris.features.realm-overrides."my-realm"."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_TEST"=true # polaris.features.realm-overrides."my-realm"."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md index 08089f56f..afa54e0b2 100644 --- a/regtests/minio/Readme.md +++ b/regtests/minio/Readme.md @@ -18,22 +18,21 @@ --> # MiniIO Secured -## Minio and secured buckets with TLS self-signed / custom AC +## Minio and secured buckets with TLS self-signed / custom Certificate Authority -To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate +To be able to test Polaris with buckets in TLS under custom Certificate Authority or self-signed certificate ## MiniIO generate self-signed certificates designed for docker-compose setup - Download minio certificate generator : https://github.com/minio/certgen -- ```./certgen -host "localhost,minio,*"``` -- put them in ./certs and ./certs/CAs -- they will be mounted in default minio container placeholder +- Generate certifications: ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs. They will be mounted in the default MinIO container placeholder. ## Test minIO secured TLS buckets from self-signed certificate with AWS CLI - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` -## add to java cacerts only the public.crt as an AC +## add to java cacerts only the public.crt as an Certificate Authority - ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` - ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml index b61ca6537..ff6a5c0a7 100644 --- a/regtests/minio/docker-compose.yml +++ b/regtests/minio/docker-compose.yml @@ -54,14 +54,10 @@ services: until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force --quiet minio/warehouse; /usr/bin/mc mb --ignore-existing minio/warehouse; - /usr/bin/mc policy set readwrite minio/warehouse; /usr/bin/mc rm -r --force --quiet minio/warehouse2; /usr/bin/mc mb --ignore-existing minio/warehouse2; - /usr/bin/mc policy set readwrite minio/warehouse2; /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; - /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; - /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; tail -f /dev/null " networks: diff --git a/regtests/minio/miniodata/Readme.md b/regtests/minio/miniodata/Readme.md new file mode 100644 index 000000000..d65c6f472 --- /dev/null +++ b/regtests/minio/miniodata/Readme.md @@ -0,0 +1 @@ +# Folder for MinIO data container volume diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index fc16fa542..172488b7b 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -47,7 +47,6 @@ if [ $# -ne 0 ] && [ $# -ne 1 ]; then fi # Init -SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} @@ -65,6 +64,20 @@ fi S3_LOCATION_2="s3://warehouse2/polaris/" +# SPARK_BEARER_TOKEN +if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi +SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') +if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi # check if Polaris is running polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) @@ -73,6 +86,7 @@ if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then exit 1 fi + # check if cacerts contain MinIO certificate cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) echo $cert_response @@ -108,14 +122,15 @@ fi # creation of catalog +echo """ +These environnement variables have to be available to Polaris service : +CATALOG_S3_KEY_ID = minio-user-catalog +CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +export CATALOG_S3_KEY_ID=minio-user-catalog +export CATALOG_S3_KEY_SECRET=12345678-minio-catalog +""" -# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris -# CATALOG_ID=minio-user-catalog -# CATALOG_SECRET=12345678-minio-catalog -# CLIENT_ID=minio-user-client -# CLIENT_SECRET=12345678-minio-client - -echo -e "\n----\nCREATE Catalog\n" +echo -e "\n----\nCREATE Catalog with few parameters \n" response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -130,18 +145,12 @@ response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H " }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\"], - \"s3.path-style-access\": true, - \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.endpoint\": \"https://localhost:9000\" } }" ) + echo -e "Catalog creation - response API http code : $response_catalog \n" if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then echo "Problem during catalog creation" @@ -149,16 +158,14 @@ if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then fi - - echo -e "Get the catalog created : \n" curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark -# Try to update the catalog, - adding a second bucket in the alllowed locations -echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +# Update the catalog +echo -e "\n----\nUPDATE the catalog v1, - adding a second bucket in the alllowed locations\n" curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -170,26 +177,17 @@ curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], - \"s3.path-style-access\": true, \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.region\": \"region-1\", + \"s3.pathStyleAccess\": true, + \"s3.credentials.catalog.accessKeyEnvVar\": \"CATALOG_S3_KEY_ID\", + \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"CATALOG_S3_KEY_SECRET\", + \"s3.roleArn\": \"arn:xxx:xxx:xxx:xxxx\" } }" -echo -e "Get the catalog updated with second allowed location : \n" -curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ - -H 'Accept: application/json' \ - -H 'Content-Type: application/json' \ - http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark - - echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ @@ -212,9 +210,9 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" + echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" echo ":-)" echo "" -docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down - +docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 80b0729d2..9c3aeedb1 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,7 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; -import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleCredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -75,7 +75,9 @@ public PolarisStorageIntegrationProviderImpl( new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; case S3_COMPATIBLE: - storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + storageIntegration = + (PolarisStorageIntegration) + new S3CompatibleCredentialsStorageIntegration(configurationStore); break; case GCS: storageIntegration = diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index d4a1f44fb..370a62dc0 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -878,7 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" - S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3CompatibleStorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -907,57 +907,38 @@ components: required: - roleArn - S3StorageConfigInfo: + S3CompatibleStorageConfigInfo: type: object - description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + description: s3-compatible storage configuration info (MinIO, Ceph, Dell ECS, Netapp StorageGRID, ...) allOf: - $ref: '#/components/schemas/StorageConfigInfo' properties: - credsCatalogAndClientStrategy: - type: string - enum: - - VALUE - - ENV_VAR_NAME - default: ENV_VAR_NAME - example: "ACCESS_KEY" - description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running - credsVendingStrategy: - type: string - enum: - - TOKEN_WITH_ASSUME_ROLE - - KEYS_SAME_AS_CATALOG - - KEYS_DEDICATED_TO_CLIENT - default: TOKEN_WITH_ASSUME_ROLE - description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) - s3.path-style-access: - type: boolean - description: if true use path style - default: false s3.endpoint: type: string description: the S3 endpoint example: "http[s]://host:port" - s3.credentials.catalog.access-key-id: + s3.credentials.catalog.accessKeyEnvVar: type: string - description: The ACCESS_KEY_ID used y the catalog to communicate with S3 - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.catalog.secret-access-key: + description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 - example: "$AWS_SECRET_ACCESS_KEY" - s3.credentials.client.access-key-id: + description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" + s3.pathStyleAccess: + type: boolean + description: Whether or not to use path-style access + default: false + s3.region: type: string - description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.client.secret-access-key: + description: Optional - the s3 region where data is stored + example: "rack-1 or us-east-1" + s3.roleArn: type: string - description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_SECRET_ACCESS_KEY" + description: Optional - a s3 role arn + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" required: - - credsVendingStrategy - s3.endpoint - - s3.credentials.catalog.access-key-id - - s3.credentials.catalog.secret-access-key AzureStorageConfigInfo: type: object From 0327f1519e3d246fe48cb597a219efc513d0bdd0 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Thu, 27 Feb 2025 19:55:21 +0100 Subject: [PATCH 3/7] add s3 profile --- .../polaris/core/entity/CatalogEntity.java | 2 + .../polaris/core/storage/StorageUtil.java | 137 ++++++++++++++++ ...mpatibleCredentialsStorageIntegration.java | 148 ++++-------------- .../S3CompatibleStorageConfigurationInfo.java | 19 ++- regtests/minio/queries-for-spark.sql | 12 +- regtests/run_spark_sql_s3compatible.sh | 140 +++++++++-------- spec/polaris-management-service.yml | 19 ++- 7 files changed, 271 insertions(+), 206 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index ab70b9b49..d5ad54771 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -149,6 +149,7 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3ProfileName(s3Config.getS3ProfileName()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) .setAllowedLocations(s3Config.getAllowedLocations()) .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) @@ -275,6 +276,7 @@ public Builder setStorageConfigurationInfo( new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3ProfileName(), s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java index 02cc2af12..6eb26a94d 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java @@ -20,6 +20,11 @@ import jakarta.annotation.Nonnull; import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import software.amazon.awssdk.policybuilder.iam.*; public class StorageUtil { /** @@ -62,4 +67,136 @@ public class StorageUtil { public static @Nonnull String getBucket(URI uri) { return uri.getAuthority(); } + + /** + * Given a path, return it without leading slash + * + * @param path A path to parse + * @return Same path without leading slash + */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } + + /** + * Given an uri, and format an S3 path + * + * @param uri A path to parse + * @return A bucket and a path joined by slash + */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /** + * Given a roleArn, return the prefix + * + * @param roleArn A roleArn to parse + * @return The prefix of the roleArn + */ + private static String getArnPrefixFor(String roleArn) { + if (roleArn.contains("aws-cn")) { + return "arn:aws-cn:s3:::"; + } else if (roleArn.contains("aws-us-gov")) { + return "arn:aws-us-gov:s3:::"; + } else { + return "arn:aws:s3:::"; + } + } + + /** + * generate an IamPolicy from the input readLocations and writeLocations, optionally with list + * support. Credentials will be scoped to exactly the resources provided. If read and write + * locations are empty, a non-empty policy will be generated that grants GetObject and optionally + * ListBucket privileges with no resources. This prevents us from sending an empty policy to AWS + * and just assuming the role with full privileges. + * + * @param roleArn A roleArn + * @param allowList Allow list or not + * @param readLocations A list of input read locations + * @param writeLocations A list of input write locations + * @return A policy limiting scope access + */ + // TODO - add KMS key access + public static IamPolicy policyString( + String roleArn, boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = getArnPrefixFor(roleArn); + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + // TODO add support for CN and GOV + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + // TODO add support for CN and GOV + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 3dfb03814..b1aebb4af 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -21,12 +21,10 @@ import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import jakarta.annotation.Nonnull; +import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; -import java.util.HashMap; -import java.util.Map; import java.util.Set; -import java.util.stream.Stream; import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; import org.apache.polaris.core.context.RealmContext; @@ -36,12 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; -import software.amazon.awssdk.policybuilder.iam.IamEffect; -import software.amazon.awssdk.policybuilder.iam.IamPolicy; -import software.amazon.awssdk.policybuilder.iam.IamResource; -import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.profiles.ProfileFileSupplier; import software.amazon.awssdk.services.sts.StsClient; import software.amazon.awssdk.services.sts.StsClientBuilder; import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; @@ -60,7 +55,6 @@ public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore confi this.configurationStore = configurationStore; } - /** {@inheritDoc} */ @Override public EnumMap getSubscopedCreds( @Nonnull RealmContext realmContext, @@ -70,7 +64,6 @@ public EnumMap getSubscopedCreds( @Nonnull Set allowedReadLocations, @Nonnull Set allowedWriteLocations) { - StsClient stsClient; String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -85,136 +78,51 @@ public EnumMap getSubscopedCreds( } LOGGER.debug("S3Compatible - createStsClient()"); - try { - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); - if (caI != null && caS != null) { - // else default provider build credentials from profile or standard AWS env var - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); - LOGGER.debug( - "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); - } - stsClient = stsBuilder.build(); + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (storageConfig.getS3ProfileName() != null) { + stsBuilder.credentialsProvider( + ProfileCredentialsProvider.builder() + .profileFile(ProfileFileSupplier.defaultSupplier()) + .profileName(storageConfig.getS3ProfileName()) + .build()); + LOGGER.debug("S3Compatible - stsClient using profile from catalog settings"); + } else if (caI != null && caS != null) { + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug("S3Compatible - stsClient using keys from catalog settings"); + } + try (StsClient stsClient = stsBuilder.build()) { LOGGER.debug("S3Compatible - stsClient successfully built"); AssumeRoleResponse response = stsClient.assumeRole( AssumeRoleRequest.builder() .roleSessionName("PolarisCredentialsSTS") - .roleArn( - (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .roleArn(storageConfig.getS3RoleArn()) .policy( - policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + StorageUtil.policyString( + storageConfig.getS3RoleArn(), + allowListOperation, + allowedReadLocations, + allowedWriteLocations) .toJson()) .durationSeconds( configurationStore.getConfiguration( realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); propertiesMap.put( PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); LOGGER.debug( - "S3Compatible - assumeRole - Token Expiration at : {}", + "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); - } catch (Exception e) { - System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + throw new NotAuthorizedException( + "Unable to build S3 Security Token Service client - " + e.getMessage()); } return propertiesMap; } - - /* - * function from AwsCredentialsStorageIntegration but without roleArn parameter - */ - private IamPolicy policyString( - boolean allowList, Set readLocations, Set writeLocations) { - IamPolicy.Builder policyBuilder = IamPolicy.builder(); - IamStatement.Builder allowGetObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetObject") - .addAction("s3:GetObjectVersion"); - Map bucketListStatementBuilder = new HashMap<>(); - Map bucketGetLocationStatementBuilder = new HashMap<>(); - - String arnPrefix = "arn:aws:s3:::"; - Stream.concat(readLocations.stream(), writeLocations.stream()) - .distinct() - .forEach( - location -> { - URI uri = URI.create(location); - allowGetObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - final var bucket = arnPrefix + StorageUtil.getBucket(uri); - if (allowList) { - bucketListStatementBuilder - .computeIfAbsent( - bucket, - (String key) -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:ListBucket") - .addResource(key)) - .addCondition( - IamConditionOperator.STRING_LIKE, - "s3:prefix", - StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); - } - bucketGetLocationStatementBuilder.computeIfAbsent( - bucket, - key -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetBucketLocation") - .addResource(key)); - }); - - if (!writeLocations.isEmpty()) { - IamStatement.Builder allowPutObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:PutObject") - .addAction("s3:DeleteObject"); - writeLocations.forEach( - location -> { - URI uri = URI.create(location); - allowPutObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - }); - policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); - } - if (!bucketListStatementBuilder.isEmpty()) { - bucketListStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - } else if (allowList) { - // add list privilege with 0 resources - policyBuilder.addStatement( - IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); - } - - bucketGetLocationStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String parseS3Path(URI uri) { - String bucket = StorageUtil.getBucket(uri); - String path = trimLeadingSlash(uri.getPath()); - return String.join("/", bucket, path); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String trimLeadingSlash(String path) { - if (path.startsWith("/")) { - path = path.substring(1); - } - return path; - } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 776279546..76fe11008 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; +import jakarta.annotation.Nonnull; import java.util.List; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.jetbrains.annotations.NotNull; @@ -34,9 +35,10 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. + // for allowed read and write locations for sub-scoping credentials. @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; private final @NotNull String s3Endpoint; + private final @Nullable String s3ProfileName; private final @Nullable String s3CredentialsCatalogAccessKeyId; private final @Nullable String s3CredentialsCatalogSecretAccessKey; private final @NotNull Boolean s3PathStyleAccess; @@ -47,33 +49,39 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigur public S3CompatibleStorageConfigurationInfo( @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable String s3CredentialsCatalogSecretAccessKey, @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull Boolean s3PathStyleAccess, @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, - @JsonProperty(value = "allowedLocations", required = true) @Nullable + @JsonProperty(value = "allowedLocations", required = true) @Nonnull List allowedLocations) { super(StorageType.S3_COMPATIBLE, allowedLocations); validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); this.s3PathStyleAccess = s3PathStyleAccess; this.s3Endpoint = s3Endpoint; + this.s3ProfileName = s3ProfileName; this.s3CredentialsCatalogAccessKeyId = (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; this.s3CredentialsCatalogSecretAccessKey = (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; this.s3Region = s3Region; - this.s3RoleArn = s3RoleArn; + this.s3RoleArn = (s3RoleArn == null) ? "" : s3RoleArn; } public @NotNull String getS3Endpoint() { return this.s3Endpoint; } + public @Nullable String getS3ProfileName() { + return this.s3ProfileName; + } + public @NotNull Boolean getS3PathStyleAccess() { return this.s3PathStyleAccess; } @@ -103,6 +111,7 @@ public String toString() { .add("s3RoleArn", getS3RoleArn()) .add("s3PathStyleAccess", getS3PathStyleAccess()) .add("s3Endpoint", getS3Endpoint()) + .add("s3ProfileName", getS3ProfileName()) .toString(); } diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql index 966ea6db6..0932af1ee 100644 --- a/regtests/minio/queries-for-spark.sql +++ b/regtests/minio/queries-for-spark.sql @@ -29,14 +29,10 @@ SELECT * FROM db1.ns2.view1; INSERT INTO db1.ns1.table1 VALUES (13, 23); SELECT * FROM db1.ns2.view1; -CREATE DATABASE IF NOT EXISTS db1; -CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); -INSERT INTO db1.ns1.table1 VALUES (3, 2); - -- Test the second bucket allowed in the catalog -CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; -CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); -INSERT INTO db2.table1 VALUES (01, 02); -SELECT * FROM db2.table1; +CREATE DATABASE IF NOT EXISTS wh2 LOCATION 's3://warehouse2/polaris'; +CREATE OR REPLACE TABLE wh2.table1 ( f1 int, f2 int ); +INSERT INTO wh2.table1 VALUES (01, 02); +SELECT * FROM wh2.table1; quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index 172488b7b..ebd490b58 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -21,7 +21,7 @@ # Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. # ----------------------------------------------------------------------------- # -# Prequisite: +# Requisite: # This script use a MinIO with TLS. # Please follow instructions in regtests/minio/Readme.md and update your # java cacerts with self-signed certificate @@ -40,6 +40,7 @@ clear + if [ $# -ne 0 ] && [ $# -ne 1 ]; then echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" echo "Usage: ./run_spark_sql.sh [S3-location]" @@ -63,18 +64,20 @@ fi # Second location for testing catalog update S3_LOCATION_2="s3://warehouse2/polaris/" +# If Polaris run inMemory classic mode, principal credentials are : root:secret +# If Polaris run inMemory DEBUG mode, principal credentials are to retrieve from service log within this pattern: 522f251cc2b9c121:6eff0915385979684d575fa1d3f18e2b # SPARK_BEARER_TOKEN if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=secret" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then echo "Error: Failed to retrieve bearer token" exit 1 fi SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') -if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then +if [ "$SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then echo "Error: Failed to retrieve bearer token" exit 1 fi @@ -101,54 +104,54 @@ echo -e "\n\n-------\n\n" echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured -echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo "minio browser is available during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" echo -e "\n\n-------\n\n" # spark setup -export SPARK_VERSION=spark-3.5.2 -export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 - echo "Doing spark setup... wait a moment" +export SPARK_VERSION=spark-3.5.4 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding ./setup.sh > /dev/null 2>&1 - -if [ -z "${SPARK_HOME}"]; then +if [ -z "${SPARK_HOME}" ]; then export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi - - -# start of tests - -# creation of catalog - echo """ -These environnement variables have to be available to Polaris service : -CATALOG_S3_KEY_ID = minio-user-catalog -CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +These environment variables have to be available to Polaris service or as keys in the aws profile, and the name of this profile provided to the catalog as parameter : export CATALOG_S3_KEY_ID=minio-user-catalog export CATALOG_S3_KEY_SECRET=12345678-minio-catalog """ - -echo -e "\n----\nCREATE Catalog with few parameters \n" +echo Add minio-catalog-1 section in aws profile +cat >>~/.aws/credentials < /dev/stderr -echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +echo -e "\n\n---- Assign the catalog_admin to the service_admin.\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ -d '{"name": "catalog_admin"}' > /dev/stderr -echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +echo -e "\n\n---- Start Spark-sql to test Polaris catalog with queries\n" ${SPARK_HOME}/bin/spark-sql --verbose \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ @@ -210,9 +214,13 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" +echo Remove minio-catalog-1 section from aws profile +sed -i '/\[minio-catalog-1\]/,${/\[minio-catalog-1\]/d; d}' ~/.aws/credentials +echo Done. + +echo +echo End of tests, a table and a view data with displayed should be visible in log above +echo Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata' +echo ':-)' -echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" -echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" -echo ":-)" -echo "" -docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down +docker-compose --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down \ No newline at end of file diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 370a62dc0..2819b802c 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -915,28 +915,33 @@ components: properties: s3.endpoint: type: string - description: the S3 endpoint + description: S3 endpoint example: "http[s]://host:port" + s3.profileName: + type: string + description: optional - S3 profile name (credentials) used by this catalog to communicate with S3 + example: "default or minio-storage-catalog-1 or ceph-storage-catalog-2" s3.credentials.catalog.accessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'ACCESS_KEY_ID' used by this catalog to communicate with S3 example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'SECRET_ACCESS_KEY' used by this catalog to communicate with S3 example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" s3.pathStyleAccess: type: boolean - description: Whether or not to use path-style access + description: optional - whether or not to use path-style access default: false s3.region: type: string - description: Optional - the s3 region where data is stored + description: optional - s3 region where data is stored example: "rack-1 or us-east-1" s3.roleArn: type: string - description: Optional - a s3 role arn - example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" + description: optional - s3 role arn, used with assumeRole to obtain a Security Token Service + pattern: '^([u|a]rn:\S*:\S*:\S*:\S*:\S*).*$' + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234 or urn:ecs:iam::namespace:user/role" required: - s3.endpoint From 1fa3a97f3fe5870b5c81bffa98a86636dbe3f3b3 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Tue, 4 Mar 2025 22:35:37 +0100 Subject: [PATCH 4/7] rebase --- ...mpatibleCredentialsStorageIntegration.java | 24 +++++++------------ ...PolarisStorageIntegrationProviderImpl.java | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index b1aebb4af..02b694311 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -19,18 +19,17 @@ package org.apache.polaris.core.storage.s3compatible; import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; +import static org.apache.polaris.core.PolarisConfiguration.loadConfig; -import jakarta.annotation.Nonnull; import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; import java.util.Set; -import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.storage.InMemoryStorageIntegration; import org.apache.polaris.core.storage.PolarisCredentialProperty; import org.apache.polaris.core.storage.StorageUtil; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -48,21 +47,18 @@ public class S3CompatibleCredentialsStorageIntegration private static final Logger LOGGER = LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); - private final PolarisConfigurationStore configurationStore; - public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { - super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); - this.configurationStore = configurationStore; + public S3CompatibleCredentialsStorageIntegration() { + super(S3CompatibleCredentialsStorageIntegration.class.getName()); } @Override public EnumMap getSubscopedCreds( - @Nonnull RealmContext realmContext, - @Nonnull PolarisDiagnostics diagnostics, - @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3CompatibleStorageConfigurationInfo storageConfig, boolean allowListOperation, - @Nonnull Set allowedReadLocations, - @Nonnull Set allowedWriteLocations) { + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -106,9 +102,7 @@ public EnumMap getSubscopedCreds( allowedReadLocations, allowedWriteLocations) .toJson()) - .durationSeconds( - configurationStore.getConfiguration( - realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 9c3aeedb1..dff903786 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -77,7 +77,7 @@ public PolarisStorageIntegrationProviderImpl( case S3_COMPATIBLE: storageIntegration = (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(configurationStore); + new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From bd903043b6b79a4f519d99c0240a768087093e37 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 00:05:03 +0100 Subject: [PATCH 5/7] format --- .../service/storage/PolarisStorageIntegrationProviderImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index dff903786..3f7247b48 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -76,8 +76,7 @@ public PolarisStorageIntegrationProviderImpl( break; case S3_COMPATIBLE: storageIntegration = - (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(); + (PolarisStorageIntegration) new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From eedee829fbaf9736cc01c4505c1f6f010853a8bf Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 18:12:39 +0100 Subject: [PATCH 6/7] Update polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java Co-authored-by: Yufei Gu --- .../S3CompatibleCredentialsStorageIntegration.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 02b694311..61c5fbd78 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -105,10 +105,9 @@ public EnumMap getSubscopedCreds( .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); - propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); - propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + propertiesMap.put(AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put(AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(AWS_TOKEN, response.credentials().sessionToken()); LOGGER.debug( "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); From ac951e3660852a9a2d6d93be012e91049c91a631 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 18:13:26 +0100 Subject: [PATCH 7/7] Update polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java Co-authored-by: Yufei Gu --- .../s3compatible/S3CompatibleCredentialsStorageIntegration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 61c5fbd78..53bd7e1a8 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -113,7 +113,7 @@ public EnumMap getSubscopedCreds( response.credentials().expiration().toString()); } catch (Exception e) { throw new NotAuthorizedException( - "Unable to build S3 Security Token Service client - " + e.getMessage()); + "Unable to build S3 Security Token Service client", e); } return propertiesMap;