diff --git a/README.md b/README.md index 425cf01..474133a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Large Language Models (LLMs): - Llama2 and 3 from several different providers, including - Anyscale - Azure + - CentML - Cerebras - Cloudflare - Groq diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py index b60b3f2..2c8c2c0 100644 --- a/llm_benchmark_suite.py +++ b/llm_benchmark_suite.py @@ -27,6 +27,7 @@ GPT_35_TURBO_1106 = "gpt-3.5-turbo-1106" GEMINI_1_5_PRO = "gemini-1.5-pro" GEMINI_1_5_FLASH = "gemini-1.5-flash" +LLAMA_33_70B_INSTRUCT = "Llama-3.3-70B-Instruct" LLAMA_31_405B_CHAT = "llama-3.1-405b-chat" LLAMA_31_405B_CHAT_FP8 = "llama-3.1-405b-chat-fp8" LLAMA_31_70B_CHAT = "llama-3.1-70b-chat" @@ -125,6 +126,18 @@ async def run(self, pass_argv: List[str], spread: float) -> asyncio.Task: return await llm_benchmark.run(full_argv) +class _CentmlLlm(_Llm): + """See https://docs.centml.ai/resources/pricing""" + + def __init__(self, model: str, display_model: Optional[str] = None): + super().__init__( + model, + "centml.ai/" + (display_model or model), + api_key=os.getenv("CENTML_API_KEY"), + base_url="https://api.centml.com/openai/v1", + ) + + class _CerebrasLlm(_Llm): """See https://docs.cerebras.ai/en/latest/wsc/Model-zoo/MZ-overview.html#list-of-models""" @@ -368,6 +381,8 @@ def _text_models(): "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", LLAMA_31_405B_CHAT_FP8 ), # _OvhLlm("llama-3-1-405b-instruct", LLAMA_31_405B_CHAT), + # Llama 3.3 70b + _CentmlLlm("meta-llama/Llama-3.3-70B-Instruct", LLAMA_33_70B_INSTRUCT), # Llama 3.1 70b _CerebrasLlm("llama3.1-70b", LLAMA_31_70B_CHAT), _CloudflareLlm("@cf/meta/llama-3.1-70b-preview", LLAMA_31_70B_CHAT), @@ -459,6 +474,7 @@ def _tools_models(): # _FireworksLlm( # "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8 # ), returns "FUNCTION" and the call as text + _CentmlLlm("meta-llama/Llama-3.3-70B-Instruct", LLAMA_33_70B_INSTRUCT), _GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8), _GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8), _GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),