diff --git a/.example.env b/.example.env index a1394ba0..24b182f4 100644 --- a/.example.env +++ b/.example.env @@ -46,3 +46,5 @@ VEDA_SHARED_WEB_ACL_ID=[OPTIONAL ID ARN for WEB ACL] VEDA_DISABLE_DEFAULT_APIGW_ENDPOINT=[OPTIONAL BOOL TO DISABLE DEFAULT API GATEWAY ENDPOINTS] VEDA_STAC_ENABLE_STAC_AUTH_PROXY= VEDA_TENANT_FILTER_FIELD=[OPTIONAL STRING, DEFAULTS TO "eic:tenant"] +VEDA_KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME=[OPTIONAL] +VEDA_KEYCLOAK_SECRET_KMS_KEY_ARN=[OPTIONAL, REQUIRED TO READ KEYCLOAK SECRETS] diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 823840b7..d615daf5 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -70,7 +70,9 @@ jobs: run: python -m pytest .github/workflows/tests/ -vv -s - name: Install reqs for ingest api - run: python -m pip install -r ingest_api/runtime/requirements_dev.txt + run: | + python -m pip install -r ingest_api/runtime/requirements_dev.txt + python -m pip install common/auth/ - name: Install reqs for stac api run: python -m pip install stac_api/runtime/ diff --git a/common/auth/README.md b/common/auth/README.md new file mode 100644 index 00000000..6deccc3e --- /dev/null +++ b/common/auth/README.md @@ -0,0 +1,161 @@ +# VEDA Auth + +Authentication and authorization utilities for veda-backend + +## KeycloakPDPClient + +A client for interacting with Keycloak's Authorization Services using the UMA (User-Managed Access) protocol to make authorization decisions. + +### Overview + +The `KeycloakPDPClient` enables applications to: + +- Request a RPT (Requesting Party Token) from Keycloak +- Check user permissions for specific resources and scopes +- Extract tenant information from user tokens +- Get lists of tenants where users have create/update access + +### Installation + +The client is part of the `veda_auth` package. You can install it with: + +```bash +pip install common/auth/ +``` + +### Basic Usage + +```python +from veda_auth.keycloak_client import KeycloakPDPClient + +# Initialize the client +pdp_client = KeycloakPDPClient( + keycloak_url="https://keycloak.example.com", + realm="my-realm", + client_id="my-resource-server-client", + client_secret="my-client-secret", # Optional for confidential clients to retrieve values from AWS secrets + timeout=10.0 +) + +try: + # Get tenants with create/update access + tenants = pdp_client.get_tenants_with_create_update_access( + access_token=user_access_token, + resource_type="collection" + ) + + # Check specific permission via scope (scopes represent an action you can take on a resource) + has_permission = pdp_client.check_permission( + access_token=user_access_token, + resource_id="collection:tenant-name", + scope="create" + ) +finally: + pdp_client.close() +``` + +### Configuration + +#### Required Parameters + +- `keycloak_url`: Base URL of your Keycloak instance (e.g., `https://keycloak.example.com`) +- `realm`: Keycloak realm name +- `client_id`: Client ID for the resource server in Keycloak + +#### Optional Parameters + +- `client_secret`: Client secret (required for confidential clients) +- `timeout`: Request timeout in seconds (default: 5.0) + +### Key Methods + +#### `get_tenants_with_create_update_access(access_token, resource_type, tenant_list=None)` + +Returns a list of tenant names where the user has both `create` and `update` scopes for the specified resource type. + +**Parameters:** + +- `access_token` (str): User's OAuth2 access token +- `resource_type` (str): Type of resource (`"collection"` or `"item"`) +- `tenant_list` (List[str], optional): Pre-filtered list of tenants to check + +**Returns:** + +- `List[str]`: Sorted list of tenant names with create/update access + +**Example:** + +```python +tenants = pdp_client.get_tenants_with_create_update_access( + access_token=token, + resource_type="collection" +) +# Returns: ["tenant1", "tenant2", "public"] +``` + +#### `check_permission(access_token, resource_id, scope)` + +Checks if a user has a specific permission for a resource. + +**Parameters:** +- `access_token` (str): User's OAuth2 access token +- `resource_id` (str): Resource identifier (e.g., `"collection:tenant-name"`) +- `scope` (str): Permission scope to check (e.g., `"create"`, `"update"`, `"read"`) + +The scope is defined by the resource server. To see the definition for veda, check out [veda-keycloak config](https://github.com/NASA-IMPACT/veda-keycloak/blob/main/keycloak-config-cli/config/dev/veda.yaml#L340-L344) + +**Returns:** + +- `bool`: `True` if permission granted, `False` otherwise + +**Example:** + +```python +can_create = pdp_client.check_permission( + access_token=token, + resource_id="collection:my-tenant", + scope="create" +) +``` + +#### `get_rpt(access_token, resources)` + +Requests an RPT (Requesting Party Token) from Keycloak containing the user's permissions. + +**Parameters:** + +- `access_token` (str): User's OAuth2 access token +- `resources` (List[Dict]): List of resources to request permissions for. Each resource should have: + - `resource_id`: Resource identifier + - `resource_scopes`: List of scopes to check + +**Returns:** + +- `Dict`: RPT response containing permissions or an access token JWT + +### Resource Identifier Format + +Resources in Keycloak should follow this naming convention: +``` +stac:{resource_type}:{tenant_name} +``` + +They are defined by the resource server configuration settings. For more examples, see [veda-keycloak config](https://github.com/NASA-IMPACT/veda-keycloak/blob/main/keycloak-config-cli/config/dev/veda.yaml#L331) + +Examples: + +- `stac:collection:tenant1` +- `stac:item:tenant2` +- `stac:collection:public` + +### Token Claims + +The client extracts tenant information from JWT token claims. It looks for tenants in: + +- `group_membership.tenants` array +- `groups` array + +### See Also + +- [Keycloak Authorization Services Documentation](https://www.keycloak.org/docs/latest/authorization_services/) +- [Keycloak Resource Server Settings](https://www.keycloak.org/docs/latest/authorization_services/#resource_server_settings) diff --git a/common/auth/__init__.py b/common/auth/__init__.py new file mode 100644 index 00000000..7f5f552d --- /dev/null +++ b/common/auth/__init__.py @@ -0,0 +1 @@ +""" Common auth """ diff --git a/common/auth/setup.py b/common/auth/setup.py index bccc80ba..cb91c1bb 100644 --- a/common/auth/setup.py +++ b/common/auth/setup.py @@ -3,7 +3,13 @@ from setuptools import find_packages, setup -inst_reqs = ["cryptography>=42.0.5", "pyjwt>=2.8.0", "fastapi", "pydantic"] +inst_reqs = [ + "cryptography>=42.0.5", + "pyjwt>=2.8.0", + "fastapi", + "pydantic", + "httpx>=0.24.0", +] setup( name="veda_auth", diff --git a/common/auth/veda_auth/keycloak_client.py b/common/auth/veda_auth/keycloak_client.py new file mode 100644 index 00000000..e7b301b1 --- /dev/null +++ b/common/auth/veda_auth/keycloak_client.py @@ -0,0 +1,427 @@ +"""Keycloak Policy Decision Point (PDP) Client + +This provides a client for interacting with Keycloak's Authorization Services +to make authorization decisions via UMA (User-Managed Access) protocol. +""" + +import base64 +import json +import logging +from typing import Any, Dict, List, Optional, Tuple +from urllib.parse import urlencode + +import httpx + +logger = logging.getLogger(__name__) + + +def _add_base64_padding(payload: str) -> str: + """Add padding to base64 string if needed + + JWT tokens use URL-safe base64 encoding (Base64URL) which omits padding + characters (`=`) to avoid issues in URLs and HTTP headers. + + Python's base64.urlsafe_b64decode() requires proper padding (length must be + a multiple of 4) or it raises binascii.Error: Incorrect padding. + """ + # determine the padding needed to make length a multiple of 4 + padding = 4 - len(payload) % 4 + if padding != 4: + payload += "=" * padding + return payload + + +class KeycloakPDPClient: + """Client for Keycloak Policy Decision Point (Authorization Services) + + This client calls Keycloak's User Managed Access (UMA) endpoints to get authorization decisions. + """ + + def __init__( + self, + keycloak_url: str, + realm: str, + client_id: str, + client_secret: Optional[str] = None, + timeout: float = 5.0, + ): + """ + Args: + keycloak_url: Base URL of Keycloak + realm: Realm name + client_id: Client ID for the resource server + client_secret: Optional client secret for confidential clients + timeout: Request timeout in seconds + """ + self.keycloak_url = keycloak_url.rstrip("/") + self.realm = realm + self.client_id = client_id + self.client_secret = client_secret + self.timeout = timeout + + self.token_endpoint = ( + f"{self.keycloak_url}/realms/{realm}/protocol/openid-connect/token" + ) + self.uma2_config_endpoint = ( + f"{self.keycloak_url}/realms/{realm}/.well-known/uma2-configuration" + ) + + self._timeout = timeout + + def get_rpt( + self, + access_token: str, + resources: List[Dict[str, Any]], + ) -> Dict[str, Any]: + """Request the RPT (Requesting Party Token) from Keycloak""" + permissions = [] + for resource in resources: + resource_id = resource.get("resource_id") + scopes = resource.get("resource_scopes", []) + if resource_id and scopes: + permission_str = f"{resource_id}#{','.join(scopes)}" + permissions.append(permission_str) + + # https://www.keycloak.org/docs/latest/authorization_services/#_service_authorization_api + data_dict: Dict[str, Any] = { + "grant_type": "urn:ietf:params:oauth:grant-type:uma-ticket", + "audience": self.client_id, + } + + permission_list = permissions + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/x-www-form-urlencoded", + } + + if self.client_secret: + data_dict["client_id"] = self.client_id + data_dict["client_secret"] = self.client_secret + + try: + form_data = [ + ("grant_type", "urn:ietf:params:oauth:grant-type:uma-ticket"), + ("audience", str(self.client_id)), + ] + for permission in permission_list: + form_data.append(("permission", str(permission))) + if self.client_secret: + form_data.append(("client_id", str(self.client_id))) + form_data.append(("client_secret", str(self.client_secret))) + + form_data_encoded = urlencode(form_data, doseq=True) + + with httpx.Client(timeout=self._timeout) as client: + response = client.post( + self.token_endpoint, + content=form_data_encoded.encode("utf-8"), + headers=headers, + ) + response.raise_for_status() + return response.json() + except httpx.HTTPStatusError as e: + logger.error( + f"Failed to get RPT from Keycloak: {e.response.status_code} {e.response.text}" + ) + raise + except Exception as e: + logger.error(f"error getting RPT: {e}") + raise + + def _extract_permissions_from_jwt(self, jwt_token: str) -> List[Dict[str, Any]]: + """Extract permissions from RPT (requesting party token) JWT token""" + try: + parts = jwt_token.split(".") + if len(parts) != 3: + logger.warning( + f"Invalid JWT format: expected 3 parts, got {len(parts)}" + ) + return [] + + payload = parts[1] + payload = _add_base64_padding(payload) + decoded_payload = base64.urlsafe_b64decode(payload) + claims = json.loads(decoded_payload) + + authorization = claims.get("authorization", {}) + permissions = authorization.get("permissions", []) + logger.info( + f"Extracted {len(permissions)} permissions from JWT authorization claim" + ) + if permissions: + logger.debug(f"Sample permission: {permissions[0]}") + return permissions + + except Exception as e: + logger.warning(f"Failed to extract permissions from JWT: {e}") + return [] + + def check_permission( + self, + access_token: str, + resource_id: str, + scope: str, + ) -> bool: + """Check if user has permission for a resource and scope + + Args: + access_token: User's access token + resource_id: Resource identifier + scope: Scope/permission to check + + Returns: + True if permission granted, False otherwise + """ + try: + rpt_response = self.get_rpt( + access_token=access_token, + resources=[ + { + "resource_id": resource_id, + "resource_scopes": [scope], + } + ], + ) + + permissions = rpt_response.get("permissions", []) + if not permissions: + rpt_jwt = rpt_response.get("access_token") + if rpt_jwt: + permissions = self._extract_permissions_from_jwt(rpt_jwt) + logger.debug( + f"Extracted {len(permissions)} permissions from RPT JWT" + ) + + # https://www.keycloak.org/docs/latest/authorization_services/#_service_rpt_overview + for permission in permissions: + # Check rsid (RPT token format), resource_id (introspection format), or rsname (resource name) + resource_identifier = ( + permission.get("rsid") + or permission.get("resource_id") + or permission.get("rsname") + ) + if resource_identifier == resource_id: + scopes = permission.get("scopes", []) + if scope in scopes: + return True + + return False + except httpx.HTTPStatusError as e: + if e.response.status_code in (401, 403): + return False + logger.error( + f"Permission check failed: {e.response.status_code} {e.response.text}" + ) + raise + except Exception as e: + logger.error(f"Unexpected error checking permission: {e}") + raise + + def _decode_jwt_payload(self, token: str) -> Dict[str, Any]: + """Decode JWT payload to extract the claims""" + parts = token.split(".") + if len(parts) < 2: + raise ValueError("Invalid JWT token") + + payload = parts[1] + payload = _add_base64_padding(payload) + decoded = base64.urlsafe_b64decode(payload) + return json.loads(decoded) + + def _extract_tenants_from_token(self, access_token: str) -> List[str]: + """Extract tenant names from user token claims""" + try: + claims = self._decode_jwt_payload(access_token) + tenants = [] + + # Check group_membership + group_membership = claims.get("group_membership", {}) + if isinstance(group_membership, dict): + tenant_groups = group_membership.get("tenants", []) + for group in tenant_groups: + if "/Tenants/" in group: + parts = group.split("/") + if len(parts) >= 3: + tenants.append(parts[2]) + + # Check groups array + groups = claims.get("groups", []) + for group in groups: + if isinstance(group, str) and "/Tenants/" in group: + parts = group.split("/") + if len(parts) >= 3: + tenants.append(parts[2]) + + return list(set(tenants)) # remove duplicates + except Exception as e: + logger.warning(f"Could not extract tenants from token: {e}") + return [] + + def get_tenants_with_create_update_access( + self, + access_token: str, + tenant_list: Optional[List[str]] = None, + resource_type: str = "collection", + ) -> List[str]: + """Get list of tenants the user has create and update access to""" + + if tenant_list is None: + tenant_list = self._extract_tenants_from_token(access_token) + if "public" not in tenant_list: + tenant_list.append("public") + + try: + permissions = self._get_permissions_from_rpt(access_token) + + if not permissions: + logger.warning("No permissions found in RPT response") + return [] + + tenant_scopes = self._process_permissions_for_tenants( + permissions, resource_type + ) + + return self._filter_tenants_with_create_update(tenant_scopes) + + except httpx.HTTPStatusError as e: + logger.error( + f"Failed to get tenant access from Keycloak: {e.response.status_code} {e.response.text}" + ) + if e.response.status_code in (401, 403): + return [] + raise + except Exception as e: + logger.error(f"Error getting tenant access: {e}") + raise + + def _get_permissions_from_rpt(self, access_token: str) -> List[Dict[str, Any]]: + """Get permissions from RPT response (either from JSON or JWT)""" + rpt_response = self.get_rpt( + access_token=access_token, + resources=[], # empty resources so it gets all permissions + ) + + permissions = rpt_response.get("permissions", []) + logger.info(f"Got {len(permissions)} permissions from RPT response JSON") + + if not permissions: + rpt_jwt = rpt_response.get("access_token") + if rpt_jwt: + logger.info( + "No permissions in response JSON, extracting from JWT token" + ) + permissions = self._extract_permissions_from_jwt(rpt_jwt) + logger.info(f"Extracted {len(permissions)} permissions from RPT JWT") + if permissions: + logger.debug( + f"Sample permission from JWT: {permissions[0] if permissions else 'None'}" + ) + else: + logger.warning("No permissions in RPT response and no access_token JWT") + + return permissions + + def _process_permissions_for_tenants( + self, permissions: List[Dict[str, Any]], resource_type: str + ) -> Dict[str, set]: + """Process permissions and extract tenant scopes by resource type""" + logger.info( + f"Processing {len(permissions)} permissions for resource_type={resource_type}" + ) + + tenant_scopes: Dict[str, set] = {} + + for permission in permissions: + resource_identifier = self._extract_resource_identifier(permission) + if not resource_identifier: + continue + + tenant, scopes = self._parse_resource_permission( + resource_identifier, permission, resource_type + ) + + if tenant and scopes: + if tenant not in tenant_scopes: + tenant_scopes[tenant] = set() + tenant_scopes[tenant].update(scopes) + logger.info( + f"Found tenant {tenant} with scopes {scopes} for {resource_type}" + ) + + return tenant_scopes + + def _extract_resource_identifier(self, permission: Dict[str, Any]) -> Optional[str]: + """Extract resource identifier from permission, skipping UUIDs""" + resource_identifier = permission.get("rsname") or permission.get("resource_id") + + if not resource_identifier: + logger.info(f"Permission missing resource identifier: {permission}") + return None + + logger.info( + f"Processing permission: resource_identifier={resource_identifier}, scopes={permission.get('scopes', [])}" + ) + + if ":" not in resource_identifier: + logger.info( + f"Skipping UUID resource identifier (not a resource name): {resource_identifier}" + ) + return None + + return resource_identifier + + def _parse_resource_permission( + self, + resource_identifier: str, + permission: Dict[str, Any], + resource_type: str, + ) -> Tuple[Optional[str], Optional[set]]: + """Parse resource identifier and return Tuple(tenant and scopes) if matches resource_type""" + parts = resource_identifier.split(":") + logger.info(f"Split resource_identifier into parts: {parts}") + + if len(parts) < 3: + logger.info(f"Resource identifier doesn't have enough parts: {parts}") + return None, None + + resource_category = parts[1] # "collection" or "item" + tenant = parts[2] + scopes = set(permission.get("scopes", [])) + + logger.info( + f"Resource: category={resource_category}, tenant={tenant}, scopes={scopes}, looking for {resource_type}" + ) + + if resource_category == resource_type: + return tenant, scopes + else: + logger.info(f"Skipping {resource_category} (not {resource_type})") + return None, None + + def _filter_tenants_with_create_update( + self, tenant_scopes: Dict[str, set] + ) -> List[str]: + """Filter tenants that have both create and update scopes""" + result = [] + logger.info( + f"Checking {len(tenant_scopes)} tenants for create and update access: {list(tenant_scopes.keys())}" + ) + + for tenant, scopes in tenant_scopes.items(): + logger.info(f"Tenant {tenant} has scopes: {scopes}") + if "create" in scopes and "update" in scopes: + result.append(tenant) + else: + logger.info( + f"Tenant {tenant} missing create or update: actual scopes are {scopes}" + ) + + logger.info( + f"Returning {len(result)} tenants with create/update access: {result}" + ) + return sorted(result) + + def close(self): + """Close the HTTP client""" + pass diff --git a/docker-compose.yml b/docker-compose.yml index a65292cb..592d6664 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,7 +40,8 @@ services: - database - raster - oidc - command: bash -c "bash /tmp/scripts/wait-for-it.sh -t 120 -h database -p 5432 && /start.sh" + # command: bash -c "bash /tmp/scripts/wait-for-it.sh -t 120 -h database -p 5432 && /start.sh" + command: bash -c "bash /tmp/scripts/wait-for-it.sh -t 120 -h database -p 5432 && bash /tmp/scripts/wait-for-it.sh -t 120 -h oidc -p 8888 && /start.sh" volumes: - ./scripts:/tmp/scripts - ./stac_api/runtime/src:/opt/bitnami/python/lib/python3.12/site-packages/src/ diff --git a/ingest_api/infrastructure/config.py b/ingest_api/infrastructure/config.py index b01a6e6e..df20cddb 100644 --- a/ingest_api/infrastructure/config.py +++ b/ingest_api/infrastructure/config.py @@ -90,6 +90,16 @@ class IngestorConfig(BaseSettings): keycloak_ingest_api_client_id: str = Field(description="Auth client ID") openid_configuration_url: AnyHttpUrl = Field(description="OpenID config url") + + keycloak_uma_resource_server_client_secret_name: Optional[str] = Field( + None, + description="Name of secret containing Keycloak UMA resource server client_id and client_secret", + ) + + keycloak_secret_kms_key_arn: Optional[str] = Field( + None, + description="ARN of KMS key used to encrypt the Keycloak secret", + ) model_config = SettingsConfigDict( case_sensitive=False, env_file=".env", env_prefix="VEDA_", extra="ignore" ) diff --git a/ingest_api/infrastructure/construct.py b/ingest_api/infrastructure/construct.py index 5756a552..22a57f56 100644 --- a/ingest_api/infrastructure/construct.py +++ b/ingest_api/infrastructure/construct.py @@ -7,6 +7,7 @@ from aws_cdk import aws_dynamodb as dynamodb from aws_cdk import aws_ec2 as ec2 from aws_cdk import aws_iam as iam +from aws_cdk import aws_kms as kms from aws_cdk import aws_lambda from aws_cdk import aws_lambda_event_sources as events from aws_cdk import aws_secretsmanager as secretsmanager @@ -47,11 +48,22 @@ def __init__( "GIT_SHA": config.git_sha, } + if config.keycloak_uma_resource_server_client_secret_name: + lambda_env[ + "KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME" + ] = config.keycloak_uma_resource_server_client_secret_name + + keycloak_secret = get_keycloak_secret( + self, config.keycloak_uma_resource_server_client_secret_name + ) + build_api_lambda_params = { "table": self.table, "db_secret": db_secret, "db_vpc": db_vpc, "db_security_group": db_security_group, + "keycloak_secret": keycloak_secret, + "config": config, "pgstac_version": config.db_pgstac_version, } @@ -109,6 +121,8 @@ def build_api_lambda( db_secret: secretsmanager.ISecret, db_vpc: ec2.IVpc, db_security_group: ec2.ISecurityGroup, + keycloak_secret: Optional[secretsmanager.ISecret] = None, + config: "IngestorConfig", data_access_role: Union[iam.IRole, None] = None, pgstac_version: str, code_dir: str = "./", @@ -157,6 +171,15 @@ def build_api_lambda( # Allow handler to read DB secret db_secret.grant_read(handler) + # Allow handler to read Keycloak secret if provided + if keycloak_secret: + keycloak_secret.grant_read(handler) + if config.keycloak_secret_kms_key_arn: + kms_key = kms.Key.from_key_arn( + self, "keycloak-secret-kms-key", config.keycloak_secret_kms_key_arn + ) + kms_key.grant(handler, "kms:Decrypt", "kms:GenerateDataKey") + # Allow handler to connect to DB db_security_group.add_ingress_rule( peer=handler.connections.security_groups[0], @@ -240,9 +263,18 @@ def __init__( "GIT_SHA": config.git_sha, } + if config.keycloak_uma_resource_server_client_secret_name: + lambda_env[ + "KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME" + ] = config.keycloak_uma_resource_server_client_secret_name + if config.raster_data_access_role_arn: lambda_env["DATA_ACCESS_ROLE_ARN"] = config.raster_data_access_role_arn + keycloak_secret = get_keycloak_secret( + self, config.keycloak_uma_resource_server_client_secret_name + ) + db_security_group = ec2.SecurityGroup.from_security_group_id( self, "db-security-group", @@ -255,6 +287,7 @@ def __init__( db_secret=db_secret, db_vpc=db_vpc, db_security_group=db_security_group, + keycloak_secret=keycloak_secret, pgstac_version=config.db_pgstac_version, ) @@ -266,6 +299,7 @@ def build_ingestor( db_secret: secretsmanager.ISecret, db_vpc: ec2.IVpc, db_security_group: ec2.ISecurityGroup, + keycloak_secret: Optional[secretsmanager.ISecret] = None, pgstac_version: str, code_dir: str = "./", ) -> aws_lambda.Function: @@ -289,6 +323,10 @@ def build_ingestor( # Allow handler to read DB secret db_secret.grant_read(handler) + # Allow handler to read Keycloak secret if provided + if keycloak_secret: + keycloak_secret.grant_read(handler) + # Allow handler to connect to DB db_security_group.add_ingress_rule( peer=handler.connections.security_groups[0], @@ -338,3 +376,14 @@ def get_db_secret( return secretsmanager.Secret.from_secret_name_v2( ctx, f"pgstac-db-secret-{stage}", secret_name ) + + +def get_keycloak_secret( + ctx: Construct, secret_name: Optional[str] +) -> Optional[secretsmanager.ISecret]: + """Get Keycloak UMA resource server client secret from ARN.""" + if not secret_name: + return None + return secretsmanager.Secret.from_secret_name_v2( + ctx, "veda-keycloak-{stage}-client-uma-resource-server", secret_name + ) diff --git a/ingest_api/runtime/Dockerfile b/ingest_api/runtime/Dockerfile index a6656515..43012a49 100644 --- a/ingest_api/runtime/Dockerfile +++ b/ingest_api/runtime/Dockerfile @@ -8,6 +8,12 @@ WORKDIR /tmp COPY ingest_api/runtime/requirements.txt /tmp/ingestor/requirements.txt RUN pip install -r /tmp/ingestor/requirements.txt pypgstac==${PGSTAC_VERSION} -t /asset --no-binary pydantic uvicorn RUN rm -rf /tmp/ingestor + +# Install common/auth package (veda_auth) +COPY common/auth /tmp/common_auth +RUN pip install /tmp/common_auth -t /asset --no-binary pydantic +RUN rm -rf /tmp/common_auth + # TODO this is temporary until we use a real packaging system like setup.py or poetry COPY ingest_api/runtime/src /asset/src diff --git a/ingest_api/runtime/src/config.py b/ingest_api/runtime/src/config.py index 26b55c18..9911707f 100644 --- a/ingest_api/runtime/src/config.py +++ b/ingest_api/runtime/src/config.py @@ -26,5 +26,10 @@ class Settings(BaseSettings): "", description="Git SHA of the deployed service" ) # default to str so that docker compose tests work + keycloak_uma_resource_server_client_secret_name: Optional[str] = Field( + None, + description="Name of AWS Secret containing Keycloak UMA resource server client_id and client_secret", + ) + settings = Settings() diff --git a/ingest_api/runtime/src/main.py b/ingest_api/runtime/src/main.py index 4a6c3d17..574b93f7 100644 --- a/ingest_api/runtime/src/main.py +++ b/ingest_api/runtime/src/main.py @@ -7,6 +7,8 @@ from src.config import settings from src.doc import DESCRIPTION from src.monitoring import ObservabilityMiddleware, logger, metrics, tracer +from src.utils import get_keycloak_client_credentials +from veda_auth.keycloak_client import KeycloakPDPClient from fastapi import Depends, FastAPI, HTTPException, Security from fastapi.exceptions import RequestValidationError @@ -215,6 +217,130 @@ def who_am_i(claims=Depends(oidc_auth.valid_token_dependency)): return claims +def _extract_access_token(request: Request) -> str: + """Extract and validate Bearer token from Authorization header""" + auth_header = request.headers.get("Authorization", "") + if not auth_header.startswith("Bearer "): + raise HTTPException( + status_code=401, + detail="Authorization header required", + ) + return auth_header[7:] + + +def _parse_keycloak_config() -> tuple[str, str]: + """Extract Keycloak URL and realm from OIDC configuration URL""" + oidc_url = ( + str(auth_settings.openid_configuration_url) + if auth_settings.openid_configuration_url + else None + ) + if not oidc_url: + raise HTTPException( + status_code=503, + detail="Missing OPENID_CONFIGURATION_URL", + ) + + if "/realms/" not in oidc_url: + raise HTTPException( + status_code=503, + detail="Invalid OpenID configuration URL format", + ) + + keycloak_url = oidc_url.split("/realms/")[0] + realm_parts = oidc_url.split("/realms/") + if len(realm_parts) < 2: + raise HTTPException( + status_code=503, + detail="Could not extract realm from OpenID configuration URL", + ) + realm = realm_parts[1].split("/")[0] + return keycloak_url, realm + + +def _get_keycloak_credentials() -> tuple[str, str]: + """Retrieve Keycloak UMA resource server credentials from the Secrets Manager""" + if not settings.keycloak_uma_resource_server_client_secret_name: + raise HTTPException( + status_code=503, + detail="UMA authorization not configured (missing KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME)", + ) + + try: + keycloak_creds = get_keycloak_client_credentials( + settings.keycloak_uma_resource_server_client_secret_name + ) + client_id = keycloak_creds.get("id") + client_secret = keycloak_creds.get("secret") + if not client_id: + raise HTTPException( + status_code=503, + detail="Keycloak secret missing id", + ) + return client_id, client_secret + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to retrieve Keycloak credentials: {e}") + raise HTTPException( + status_code=503, + detail=f"Failed to retrieve Keycloak credentials: {str(e)}", + ) from e + + +@app.get( + "/auth/tenants/writable", response_model=schemas.TenantAccessResponse, tags=["Auth"] +) +async def get_writable_tenant_access( + request: Request, + claims=Depends(oidc_auth.valid_token_dependency), +): + """ + Returns the list of tenants the user has create and update access to. + """ + user_access_token = _extract_access_token(request) + keycloak_url, realm = _parse_keycloak_config() + client_id, client_secret = _get_keycloak_credentials() + + pdp_client = None + try: + pdp_client = KeycloakPDPClient( + keycloak_url=keycloak_url, + realm=realm, + client_id=client_id, + client_secret=client_secret, + timeout=10.0, + ) + + # Get tenants with create/update access for collections + collection_tenants = pdp_client.get_tenants_with_create_update_access( + access_token=user_access_token, + resource_type="collection", + ) + + # Get tenants with create/update access for items + item_tenants = pdp_client.get_tenants_with_create_update_access( + access_token=user_access_token, + resource_type="item", + ) + + all_tenants = sorted(list(set(collection_tenants + item_tenants))) + + return schemas.TenantAccessResponse(tenants=all_tenants) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting tenant access: {e}", exc_info=True) + raise HTTPException( + status_code=502, + detail=f"Failed to retrieve tenant access: {str(e)}", + ) + finally: + if pdp_client: + pdp_client.close() + + app.add_middleware(ObservabilityMiddleware) diff --git a/ingest_api/runtime/src/schemas.py b/ingest_api/runtime/src/schemas.py index ca8d67ed..5bc36f6e 100644 --- a/ingest_api/runtime/src/schemas.py +++ b/ingest_api/runtime/src/schemas.py @@ -109,6 +109,14 @@ class AuthResponse(BaseModel): ) +class TenantAccessResponse(BaseModel): + """Response model for list tenant access (create and update) endpoint""" + + tenants: List[str] = Field( + ..., description="List of tenant names the user has create/update access to" + ) + + class Ingestion(BaseModel): id: str = Field(..., description="ID of the STAC item") status: Status = Field(..., description="Status of the ingestion") diff --git a/ingest_api/runtime/src/utils.py b/ingest_api/runtime/src/utils.py index d059acb6..b86a8c2a 100644 --- a/ingest_api/runtime/src/utils.py +++ b/ingest_api/runtime/src/utils.py @@ -1,3 +1,4 @@ +import json from enum import Enum from typing import Sequence, Union @@ -38,6 +39,16 @@ def get_db_credentials(secret_arn: str) -> DbCreds: return DbCreds.parse_raw(response["SecretString"]) +def get_keycloak_client_credentials(secret_name: str) -> dict: + """ + Load Keycloak UMA resource server client credentials from AWS Secrets Manager + """ + session = boto3.session.Session() + client = session.client(service_name="secretsmanager") + response = client.get_secret_value(SecretId=secret_name) + return json.loads(response["SecretString"]) + + def load_items(items: Sequence[AccessibleItem], loader): """ Loads items into the PgSTAC database and diff --git a/local/Dockerfile.ingest b/local/Dockerfile.ingest index 0c1ae386..e2081dc5 100644 --- a/local/Dockerfile.ingest +++ b/local/Dockerfile.ingest @@ -5,6 +5,12 @@ WORKDIR /tmp COPY ingest_api/runtime /tmp/ingestor RUN pip install -r /tmp/ingestor/requirements.txt --no-binary pydantic uvicorn RUN rm -rf /tmp/ingestor + +# Install common/auth package (veda_auth) +COPY common/auth /tmp/common_auth +RUN pip install /tmp/common_auth -t /asset --no-binary pydantic +RUN rm -rf /tmp/common_auth + # TODO this is temporary until we use a real packaging system like setup.py or poetry COPY ingest_api/runtime/src /asset/src