diff --git a/apps/beeai-cli/src/beeai_cli/commands/model.py b/apps/beeai-cli/src/beeai_cli/commands/model.py index 9666c25d4..a3877b026 100644 --- a/apps/beeai-cli/src/beeai_cli/commands/model.py +++ b/apps/beeai-cli/src/beeai_cli/commands/model.py @@ -44,6 +44,7 @@ def _ollama_exe() -> str: RECOMMENDED_LLM_MODELS = [ f"{ModelProviderType.WATSONX}:ibm/granite-3-3-8b-instruct", + f"{ModelProviderType.AWS_BEDROCK}:anthropic.claude-3-sonnet-20240229-v1:0", f"{ModelProviderType.OPENAI}:gpt-4o", f"{ModelProviderType.ANTHROPIC}:claude-sonnet-4-20250514", f"{ModelProviderType.CEREBRAS}:llama-3.3-70b", @@ -72,9 +73,17 @@ def _ollama_exe() -> str: ] LLM_PROVIDERS = [ + Choice( + name="Amazon Bedrock".ljust(20), + value=(ModelProviderType.AWS_BEDROCK, "Amazon Bedrock", None), + ), Choice( name="Anthropic Claude".ljust(20), - value=(ModelProviderType.ANTHROPIC, "Anthropic Claude", "https://api.anthropic.com/v1"), + value=( + ModelProviderType.ANTHROPIC, + "Anthropic Claude", + "https://api.anthropic.com/v1", + ), ), Choice( name="Cerebras".ljust(20) + "🆓 has a free tier", @@ -86,23 +95,44 @@ def _ollama_exe() -> str: ), Choice( name="Cohere".ljust(20) + "🆓 has a free tier", - value=(ModelProviderType.COHERE, "Cohere", "https://api.cohere.ai/compatibility/v1"), + value=( + ModelProviderType.COHERE, + "Cohere", + "https://api.cohere.ai/compatibility/v1", + ), + ), + Choice( + name="DeepSeek", + value=(ModelProviderType.DEEPSEEK, "DeepSeek", "https://api.deepseek.com/v1"), ), - Choice(name="DeepSeek", value=(ModelProviderType.DEEPSEEK, "DeepSeek", "https://api.deepseek.com/v1")), Choice( name="Google Gemini".ljust(20) + "🆓 has a free tier", - value=(ModelProviderType.GEMINI, "Google Gemini", "https://generativelanguage.googleapis.com/v1beta/openai"), + value=( + ModelProviderType.GEMINI, + "Google Gemini", + "https://generativelanguage.googleapis.com/v1beta/openai", + ), ), Choice( name="GitHub Models".ljust(20) + "🆓 has a free tier", - value=(ModelProviderType.GITHUB, "GitHub Models", "https://models.github.ai/inference"), + value=( + ModelProviderType.GITHUB, + "GitHub Models", + "https://models.github.ai/inference", + ), ), Choice( name="Groq".ljust(20) + "🆓 has a free tier", value=(ModelProviderType.GROQ, "Groq", "https://api.groq.com/openai/v1"), ), - Choice(name="IBM watsonx".ljust(20), value=(ModelProviderType.WATSONX, "IBM watsonx", None)), - Choice(name="Jan".ljust(20) + "💻 local", value=(ModelProviderType.JAN, "Jan", "http://localhost:1337/v1")), + Choice( + name="IBM watsonx".ljust(20), + value=(ModelProviderType.WATSONX, "IBM watsonx", None), + ), + Choice( + name="Jan".ljust(20) + "💻 local", + value=(ModelProviderType.JAN, "Jan", "http://localhost:1337/v1"), + ), Choice( name="Mistral".ljust(20) + "🆓 has a free tier", value=(ModelProviderType.MISTRAL, "Mistral", "https://api.mistral.ai/v1"), @@ -113,7 +143,11 @@ def _ollama_exe() -> str: ), Choice( name="NVIDIA NIM".ljust(20), - value=(ModelProviderType.NVIDIA, "NVIDIA NIM", "https://integrate.api.nvidia.com/v1"), + value=( + ModelProviderType.NVIDIA, + "NVIDIA NIM", + "https://integrate.api.nvidia.com/v1", + ), ), Choice( name="Ollama".ljust(20) + "💻 local", @@ -125,7 +159,11 @@ def _ollama_exe() -> str: ), Choice( name="OpenRouter".ljust(20) + "🆓 has some free models", - value=(ModelProviderType.OPENROUTER, "OpenRouter", "https://openrouter.ai/api/v1"), + value=( + ModelProviderType.OPENROUTER, + "OpenRouter", + "https://openrouter.ai/api/v1", + ), ), Choice( name="Perplexity".ljust(20), @@ -133,7 +171,11 @@ def _ollama_exe() -> str: ), Choice( name="Together.ai".ljust(20) + "🆓 has a free tier", - value=(ModelProviderType.TOGETHER, "together.ai", "https://api.together.xyz/v1"), + value=( + ModelProviderType.TOGETHER, + "together.ai", + "https://api.together.xyz/v1", + ), ), Choice( name="🛠️ Other (RITS, Amazon Bedrock, vLLM, ..., any OpenAI-compatible API)", @@ -182,6 +224,7 @@ async def _add_provider(capability: ModelCapability, use_true_localhost: bool = provider_name: str base_url: str watsonx_project_id, watsonx_space_id = None, None + aws_region, aws_access_key_id = None, None choices = LLM_PROVIDERS if capability == ModelCapability.LLM else EMBEDDING_PROVIDERS provider_type, provider_name, base_url = await inquirer.fuzzy( # type: ignore message=f"Select {capability} provider (type to search):", choices=choices @@ -231,14 +274,46 @@ async def _add_provider(capability: ModelCapability, use_true_localhost: bool = watsonx_project_id = watsonx_project_or_space_id if watsonx_project_or_space == "project" else None watsonx_space_id = watsonx_project_or_space_id if watsonx_project_or_space == "space" else None - if (api_key := os.environ.get(f"{provider_type.upper()}_API_KEY")) is None or not await inquirer.confirm( # type: ignore + if provider_type == ModelProviderType.AWS_BEDROCK: + aws_region = await inquirer.select( # type: ignore + message="Select AWS region:", + choices=[ + "us-east-1", + "us-west-2", + "eu-central-1", + "ap-northeast-1", + "ap-southeast-2", + ], + default="us-east-1", + ).execute_async() + base_url = f"https://bedrock-runtime.{aws_region}.amazonaws.com/openai/v1" + if ( + os.environ.get("AWS_ACCESS_KEY_ID") + and os.environ.get("AWS_SECRET_ACCESS_KEY") + and await inquirer.confirm( # type: ignore + message="Use AWS credentials from environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)?", + default=True, + ).execute_async() + ): + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + api_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + else: + aws_access_key_id = await inquirer.text( # type: ignore + message="Enter AWS Access Key ID:", validate=EmptyInputValidator() + ).execute_async() + api_key = await inquirer.secret( # type: ignore + message="Enter AWS Secret Access Key:", validate=EmptyInputValidator() + ).execute_async() + elif (api_key := os.environ.get(f"{provider_type.upper()}_API_KEY")) is None or not await inquirer.confirm( # type: ignore message=f"Use the API key from environment variable '{provider_type.upper()}_API_KEY'?", default=True, ).execute_async(): - api_key: str = ( + api_key = ( "dummy" if provider_type in {ModelProviderType.OLLAMA, ModelProviderType.JAN} - else await inquirer.secret(message="Enter API key:", validate=EmptyInputValidator()).execute_async() # type: ignore + else await inquirer.secret( # type: ignore + message="Enter API key:", validate=EmptyInputValidator() + ).execute_async() ) try: @@ -286,9 +361,10 @@ async def _add_provider(capability: ModelCapability, use_true_localhost: bool = name=provider_name, type=ModelProviderType(provider_type), base_url=base_url, - api_key=api_key, + api_key=api_key or "", watsonx_space_id=watsonx_space_id, watsonx_project_id=watsonx_project_id, + aws_access_key_id=aws_access_key_id, ) except httpx.HTTPError as e: diff --git a/apps/beeai-sdk/src/beeai_sdk/platform/model_provider.py b/apps/beeai-sdk/src/beeai_sdk/platform/model_provider.py index e0df473ed..362cdc96e 100644 --- a/apps/beeai-sdk/src/beeai_sdk/platform/model_provider.py +++ b/apps/beeai-sdk/src/beeai_sdk/platform/model_provider.py @@ -12,6 +12,7 @@ class ModelProviderType(StrEnum): ANTHROPIC = "anthropic" + AWS_BEDROCK = "aws_bedrock" CEREBRAS = "cerebras" CHUTES = "chutes" COHERE = "cohere" @@ -52,6 +53,7 @@ class ModelProvider(pydantic.BaseModel): base_url: pydantic.HttpUrl watsonx_project_id: str | None = None watsonx_space_id: str | None = None + aws_access_key_id: str | None = None created_at: pydantic.AwareDatetime capabilities: set[ModelCapability] @@ -64,6 +66,7 @@ async def create( base_url: str | pydantic.HttpUrl, watsonx_project_id: str | None = None, watsonx_space_id: str | None = None, + aws_access_key_id: str | None = None, api_key: str, client: PlatformClient | None = None, ) -> ModelProvider: @@ -79,6 +82,7 @@ async def create( "base_url": str(base_url), "watsonx_project_id": watsonx_project_id, "watsonx_space_id": watsonx_space_id, + "aws_access_key_id": aws_access_key_id, "api_key": api_key, }, ) diff --git a/apps/beeai-server/pyproject.toml b/apps/beeai-server/pyproject.toml index aee2c17a6..fc4b03eb1 100644 --- a/apps/beeai-server/pyproject.toml +++ b/apps/beeai-server/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "openai>=1.97.0", "authlib>=1.6.4", "async-lru>=2.0.5", + "aws-bedrock-token-generator>=1.1.0", ] [dependency-groups] diff --git a/apps/beeai-server/src/beeai_server/api/routes/model_providers.py b/apps/beeai-server/src/beeai_server/api/routes/model_providers.py index 96c01bec4..5a1db3d44 100644 --- a/apps/beeai-server/src/beeai_server/api/routes/model_providers.py +++ b/apps/beeai-server/src/beeai_server/api/routes/model_providers.py @@ -33,6 +33,7 @@ async def create_model_provider( base_url=request.base_url, watsonx_project_id=request.watsonx_project_id, watsonx_space_id=request.watsonx_space_id, + aws_access_key_id=request.aws_access_key_id, api_key=request.api_key.get_secret_value(), ) return EntityModel(model_provider) diff --git a/apps/beeai-server/src/beeai_server/api/routes/openai.py b/apps/beeai-server/src/beeai_server/api/routes/openai.py index a6cfe64ff..6634c0b88 100644 --- a/apps/beeai-server/src/beeai_server/api/routes/openai.py +++ b/apps/beeai-server/src/beeai_server/api/routes/openai.py @@ -12,7 +12,6 @@ import ibm_watsonx_ai import ibm_watsonx_ai.foundation_models.embeddings import openai -import openai.pagination import openai.types.chat from fastapi import Depends, HTTPException from fastapi.concurrency import run_in_threadpool @@ -44,6 +43,22 @@ async def create_chat_completion( api_key = await model_provider_service.get_provider_api_key(model_provider_id=provider.id) + if provider.type == ModelProviderType.AWS_BEDROCK: + import aws_bedrock_token_generator + import botocore.credentials + + # exchange aws_secret_access_key for short-lived Bedrock API key + api_key = await run_in_threadpool( + aws_bedrock_token_generator.provide_token, + region=provider.aws_region, + aws_credentials_provider=botocore.credentials.EnvProvider( + { + "AWS_ACCESS_KEY_ID": provider.aws_access_key_id, + "AWS_SECRET_ACCESS_KEY": api_key, + } + ), + ) + if provider.type == ModelProviderType.WATSONX: model = ibm_watsonx_ai.foundation_models.ModelInference( model_id=model_id, diff --git a/apps/beeai-server/src/beeai_server/api/schema/model_provider.py b/apps/beeai-server/src/beeai_server/api/schema/model_provider.py index eab292fd1..eb4850bcc 100644 --- a/apps/beeai-server/src/beeai_server/api/schema/model_provider.py +++ b/apps/beeai-server/src/beeai_server/api/schema/model_provider.py @@ -13,6 +13,7 @@ class CreateModelProviderRequest(BaseModel): base_url: HttpUrl watsonx_project_id: str | None = None watsonx_space_id: str | None = None + aws_access_key_id: str | None = None api_key: Secret[str] diff --git a/apps/beeai-server/src/beeai_server/domain/models/model_provider.py b/apps/beeai-server/src/beeai_server/domain/models/model_provider.py index 1a5b34e6e..e7f726e4f 100644 --- a/apps/beeai-server/src/beeai_server/domain/models/model_provider.py +++ b/apps/beeai-server/src/beeai_server/domain/models/model_provider.py @@ -1,6 +1,7 @@ # Copyright 2025 © BeeAI a Series of LF Projects, LLC # SPDX-License-Identifier: Apache-2.0 +import re from datetime import datetime from enum import StrEnum from typing import Any, Literal @@ -15,6 +16,7 @@ class ModelProviderType(StrEnum): ANTHROPIC = "anthropic" + AWS_BEDROCK = "aws_bedrock" CEREBRAS = "cerebras" CHUTES = "chutes" COHERE = "cohere" @@ -75,13 +77,27 @@ class ModelProvider(BaseModel): exclude=True, ) + # AWS Bedrock specific fields + aws_access_key_id: str | None = Field(None, description="AWS access key ID for Bedrock", exclude=True) + @model_validator(mode="after") - def validate_watsonx_config(self): + def validate_provider_config(self): """Validate that watsonx providers have either project_id or space_id.""" if self.type == ModelProviderType.WATSONX and not (bool(self.watsonx_project_id) ^ bool(self.watsonx_space_id)): raise ValueError("WatsonX providers must have either watsonx_project_id or watsonx_space_id") + if self.type == ModelProviderType.AWS_BEDROCK and not self.aws_access_key_id: + raise ValueError("AWS Bedrock providers must have aws_access_key_id") return self + @computed_field + @property + def aws_region(self) -> str | None: + if self.type == ModelProviderType.AWS_BEDROCK: + match = re.search(r"bedrock-runtime\.([^.]+)\.amazonaws\.com", str(self.base_url)) + if match: + return match.group(1) + return None + @computed_field @property def capabilities(self) -> set[ModelCapability]: @@ -99,6 +115,26 @@ def _parse_openai_compatible_model(self, model: dict[str, Any]) -> Model: async def load_models(self, api_key: str) -> list[Model]: async with AsyncClient() as client: match self.type: + case ModelProviderType.AWS_BEDROCK: + import boto3 + + response = boto3.client( + "bedrock", + region_name=self.aws_region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=api_key, + ).list_foundation_models(byInferenceType="ON_DEMAND") + return [ + Model( + id=f"{self.type}:{model['modelId']}", + created=int(datetime.now().timestamp()), + object="model", + owned_by=model["providerName"], + provider=self._model_provider_info, + ) + for model in response["modelSummaries"] + if "TEXT" in model["outputModalities"] + ] case ModelProviderType.WATSONX: response = await client.get(f"{self.base_url}/ml/v1/foundation_model_specs?version=2025-08-27") response_models = response.raise_for_status().json()["resources"] @@ -189,6 +225,7 @@ class ModelWithScore(BaseModel): _PROVIDER_CAPABILITIES: dict[ModelProviderType, set[ModelCapability]] = { ModelProviderType.ANTHROPIC: {ModelCapability.LLM}, + ModelProviderType.AWS_BEDROCK: {ModelCapability.LLM, ModelCapability.EMBEDDING}, ModelProviderType.CEREBRAS: {ModelCapability.LLM}, ModelProviderType.CHUTES: {ModelCapability.LLM}, ModelProviderType.COHERE: {ModelCapability.LLM, ModelCapability.EMBEDDING}, diff --git a/apps/beeai-server/src/beeai_server/infrastructure/persistence/migrations/alembic/versions/198d161f5b5c_.py b/apps/beeai-server/src/beeai_server/infrastructure/persistence/migrations/alembic/versions/198d161f5b5c_.py new file mode 100644 index 000000000..4a06b0eb4 --- /dev/null +++ b/apps/beeai-server/src/beeai_server/infrastructure/persistence/migrations/alembic/versions/198d161f5b5c_.py @@ -0,0 +1,35 @@ +# Copyright 2025 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""empty message + +Revision ID: 198d161f5b5c +Revises: 73e2d8596ada +Create Date: 2025-10-02 15:55:24.226680 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "198d161f5b5c" +down_revision: str | None = "73e2d8596ada" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("model_providers", sa.Column("aws_access_key_id", sa.String(length=256), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("model_providers", "aws_access_key_id") + # ### end Alembic commands ### diff --git a/apps/beeai-server/src/beeai_server/infrastructure/persistence/repositories/model_provider.py b/apps/beeai-server/src/beeai_server/infrastructure/persistence/repositories/model_provider.py index 2b3cc60ad..34ee39d23 100644 --- a/apps/beeai-server/src/beeai_server/infrastructure/persistence/repositories/model_provider.py +++ b/apps/beeai-server/src/beeai_server/infrastructure/persistence/repositories/model_provider.py @@ -25,6 +25,7 @@ Column("created_at", DateTime(timezone=True), nullable=False), Column("watsonx_project_id", String(256), nullable=True), Column("watsonx_space_id", String(256), nullable=True), + Column("aws_access_key_id", String(256), nullable=True), Column("description", Text, nullable=True), ) @@ -43,6 +44,7 @@ async def create(self, *, model_provider: ModelProvider) -> None: "created_at": model_provider.created_at, "watsonx_project_id": model_provider.watsonx_project_id, "watsonx_space_id": model_provider.watsonx_space_id, + "aws_access_key_id": model_provider.aws_access_key_id, "description": model_provider.description, } ) @@ -84,5 +86,6 @@ def _row_to_model_provider(self, row: Row) -> ModelProvider: created_at=row.created_at, watsonx_project_id=row.watsonx_project_id, watsonx_space_id=row.watsonx_space_id, + aws_access_key_id=row.aws_access_key_id, description=row.description, ) diff --git a/apps/beeai-server/src/beeai_server/service_layer/services/model_providers.py b/apps/beeai-server/src/beeai_server/service_layer/services/model_providers.py index 8f8974f2f..c5bfb829c 100644 --- a/apps/beeai-server/src/beeai_server/service_layer/services/model_providers.py +++ b/apps/beeai-server/src/beeai_server/service_layer/services/model_providers.py @@ -43,6 +43,7 @@ async def create_provider( base_url: HttpUrl, watsonx_project_id: str | None = None, watsonx_space_id: str | None = None, + aws_access_key_id: str | None = None, api_key: str, ) -> ModelProvider: model_provider = ModelProvider( @@ -52,6 +53,7 @@ async def create_provider( base_url=base_url, watsonx_project_id=watsonx_project_id, watsonx_space_id=watsonx_space_id, + aws_access_key_id=aws_access_key_id, ) # Check if models are available await self._get_provider_models(provider=model_provider, api_key=api_key, raise_error=True) diff --git a/apps/beeai-server/uv.lock b/apps/beeai-server/uv.lock index b88c9fd6e..903f3bb57 100644 --- a/apps/beeai-server/uv.lock +++ b/apps/beeai-server/uv.lock @@ -254,6 +254,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/aa/91355b5f539caf1b94f0e66ff1e4ee39373b757fce08204981f7829ede51/authlib-1.6.4-py2.py3-none-any.whl", hash = "sha256:39313d2a2caac3ecf6d8f95fbebdfd30ae6ea6ae6a6db794d976405fdd9aa796", size = 243076, upload-time = "2025-09-17T09:59:22.259Z" }, ] +[[package]] +name = "aws-bedrock-token-generator" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/39/cf1c2e12bc5a84af0f96a546481213f81fa6e7927d2bbabd81758c6558ca/aws_bedrock_token_generator-1.1.0.tar.gz", hash = "sha256:95ccb07f63a91ac486561f6df05cc4e04784c8ff5086dc687ed9c5fd3ab1b5ba", size = 19123, upload-time = "2025-07-29T19:53:19.511Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/fd/745ece98870c3824d294bcdce5dc5e15381188a41bc80832c246b205e40e/aws_bedrock_token_generator-1.1.0-py3-none-any.whl", hash = "sha256:bd12854f7c7e52dde5d980d369379f12d0cc5f0855099d87f38688b0f9de5cd4", size = 10291, upload-time = "2025-07-29T19:53:18.704Z" }, +] + [[package]] name = "beeai-sdk" version = "0.3.5" @@ -322,6 +334,7 @@ dependencies = [ { name = "async-lru" }, { name = "asyncpg" }, { name = "authlib" }, + { name = "aws-bedrock-token-generator" }, { name = "cachetools" }, { name = "fastapi", extra = ["standard"] }, { name = "httpx" }, @@ -371,6 +384,7 @@ requires-dist = [ { name = "async-lru", specifier = ">=2.0.5" }, { name = "asyncpg", specifier = ">=0.30.0" }, { name = "authlib", specifier = ">=1.6.4" }, + { name = "aws-bedrock-token-generator", specifier = ">=1.1.0" }, { name = "cachetools", specifier = ">=5.5.2" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.115.7" }, { name = "httpx", specifier = ">=0.28.1" },